In [None]:
# Import Libraries
import numpy as np
import copy
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# Read CSV file into DataFrame
df = pd.read_csv('table.csv')
print(df.head())

   time  jiggle  scroll  sales
0   232      33     402   2201
1    10      22     160      0
2  6437     343     231   7650
3   512     101      17   5599
4   441     212      55   8900


In [None]:
# Initialize feature matrix X and target vector y
X = df.iloc[:, :3].values
y = df.iloc[:, 3].values

In [None]:
# Create a column of ones for the intercept term
column_ones = np.ones((X.shape[0], 1))
X = np.hstack((column_ones, X))

# Normal equation: θ = (X^T X)^(-1) X^T y
theta = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

In [None]:
# Hypothesis function
def hypothesis(X, theta):
    return X.dot(theta)

    # Derivative of J(0) = (X.dot(X.T) - y.T)(X.dot(X.T) - y)
    # Final Result = θ = (X^T X)^(-1) X^T y

In [24]:
print("Learned parameters (theta):")
print(f"θ₀ (intercept): {theta[0]:.6f}")
print(f"θ₁ (time): {theta[1]:.6f}")
print(f"θ₂ (jiggle): {theta[2]:.6f}")
print(f"θ₃ (scroll): {theta[3]:.6f}")

Learned parameters (theta):
θ₀ (intercept): 2626.268614
θ₁ (time): 0.420484
θ₂ (jiggle): 12.716237
θ₃ (scroll): -6.496562


## Problem 2: Normal Equation Derivation

### Linear Regression Model
The linear regression model can be written in matrix form as:

$$y = X\theta + \varepsilon$$

Where:
- y is the (11×1) vector of sales values
- X is the (11×4) design matrix with columns [1, time, jiggle, scroll]  
- θ is the (4×1) parameter vector [θ₀, θ₁, θ₂, θ₃]
- ε is the error term

### Mean Squared Error (MSE) Objective Function
We want to minimize the sum of squared residuals:

$$J(\theta) = \frac{1}{2m} ||X\theta - y||^2$$

Expanding this:

$$J(\theta) = \frac{1}{2m} (X\theta - y)^T(X\theta - y)$$

### Deriving the Normal Equation
To find the minimum, we take the derivative with respect to θ and set it to zero:

$$\frac{\partial J(\theta)}{\partial \theta} = \frac{1}{m} X^T(X\theta - y) = 0$$

Solving for θ:

$$X^T(X\theta - y) = 0$$

$$X^TX\theta - X^Ty = 0$$

$$X^TX\theta = X^Ty$$

**Normal Equation:**

$$\theta = (X^TX)^{-1}X^Ty$$

This gives us the closed-form analytical solution for the optimal parameters.

### Design Matrix Construction
For our dongles dataset, the design matrix X has the structure where the first column is all ones for the intercept term.

In [25]:
print(f"Linear model: sales = {theta[0]:.6f} + {theta[1]:.6f}*time + {theta[2]:.6f}*jiggle + {theta[3]:.6f}*scroll")

Linear model: sales = 2626.268614 + 0.420484*time + 12.716237*jiggle + -6.496562*scroll
