# Lab - Linear Regression

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

sns.set_style("whitegrid")

In [None]:
from sklearn.linear_model import LinearRegression

## Linear Regression from scratch

### Import data

In [None]:

df = pd.read_csv('https://raw.githubusercontent.com/justmarkham/scikit-learn-videos/master/data/Advertising.csv',
                 index_col=0)
X = df[['TV']].values
y = df[['Sales']].values

plt.scatter(X, y, alpha=0.6)
plt.xlabel('TV ads')
plt.ylabel('Sales')
plt.show()

In [None]:
df.head()

### Implement Linear Regression with sklearn

We will build a Linear Regression model to predict `Sales` from the other features. Let's start with a Simple Linear Regression first: Use `TV` to predict `Sales`. We build the model using `sklearn.linear_model.LinearRegression` first, so that we can compare our result later on:

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

lg = LinearRegression()
lg.fit(X, y)

print(f'Coef: {lg.coef_}')
print(f'Intercept: {lg.intercept_}')
print(f'MSE: {mean_squared_error(y, lg.predict(X))}')

plt.scatter(X, y, alpha=0.6)
plt.plot(X, lg.predict(X), c='r')

plt.show()

### Implement from scratch

Here are the main 5 steps of Linear Regression using Gradient Descent

1. Initialize a value for $w$ and $b$. We will initialize them with zeros
2. Calculate predictions $\hat{y}$ on all training observations (forward pass)
3. Calculate the loss value L($w$,$b$)
4. Find $\frac{\partial L}{\partial w_j}$ and $\frac{\partial L}{\partial b}$ (backward pass)
5. Update our parameters: $\begin{cases} 
w = w - \alpha\frac{\partial L}{\partial w} \\
b = b - \alpha\frac{\partial L}{\partial b}
\end{cases}$



Alright, let's do it!

**Step 1: Initialization**

In [None]:
# Initialization
def initialize_params(X):
    '''Initialize w and b with zeros and return them. Make sure to get the right shape for w and b'''
    # Your code here
    w = np.zeros((X.shape[1], 1))
    b = np.zeros((1, ))
    return w, b



**Step 2: Making prediction (Forward Propagation)**


$$
\hat{y} = w_1x_1 + w_2x_2 + \dots + w_nx_n + b = b + \sum^n{w_ix_i} = w^Tx + b
$$

but we will use vectorization form (matrix multiplication)
$$
\hat{y} = Xw + b
$$

In [None]:
# Function for forward propagation
def forward(X, w, b):
    '''Calculate and return y_hat'''
    # Your code here
    
    return X @ w + b 
    

**Step 3: Loss function:**

$$
L(w, b) = \frac{1}{m} \sum_{i=1}^{m}{(\hat{y}^{(i)} - y^{(i)})^2}
$$

In [None]:
# Funnction to calculate mean squared error
def mse(y_hat, y):
    # Your code here
    
    return ((y_hat - y)**2).mean() 

**Step 4: Calculating gradients (Backward Propagation)**

Vectorization form

$$
\frac{\partial L}{\partial w} = \frac{2}{m} X^T . (\hat{y} - y)
$$
<br/>
$$
\frac{\partial L}{\partial b} = \frac{2}{m} \sum_{i=1}^{m}{(\hat{y}^{(i)} - y^{(i)})}
$$

In [None]:
# Function for backward propagation
def backward(X, y, y_hat, w, b):
    '''Calculate dw, db and return them'''
    m = X.shape[0]
    dw = 2/m * X.T @ (y_hat - y)
    db = 2/m * np.sum(y_hat - y)
    
    return dw, db


**Step 5: Update parameters**


Update our parameters: $\begin{cases} 
w = w - \alpha\frac{\partial L}{\partial w} \\
b = b - \alpha\frac{\partial L}{\partial b}
\end{cases}$

In [None]:
def update_params(w, b, dw, db, learning_rate):
    '''Update w, b and return them'''
    # Your code here
    w = w - learning_rate * dw
    b = b - learning_rate * db
    return w, b

**Train model**

In [None]:
# Function for training model
def train(X, y, iterations, learning_rate):
    '''Train w, b and return'''
    # Your code here

    # Step 1: initialize the parameters
    w,b = initialize_params(X)

    for i in range(iterations):
        # Step 2: forward pass
        y_hat = forward(X, w, b)
        # Step 3: calculate loss
        L = mse(y_hat, y)
        # Step 4: backward pass
        dw, db = backward(X, y, y_hat, w, b)
        # Step 5: update params
        w, b = update_params(w, b, dw, db, learning_rate)

        if i % 100 == 0:
            print(f'Step {i}, MSE = {L}')

    return w, b

In [None]:
# Setup learning rate & number of iterations
# Your code here
learning_rate = 0.00001
iterations = 100000
# train the model. Use the train function you created above
# Your code here
w, b = train(X, y, iterations=iterations, learning_rate=learning_rate)

**Evaluation**

In [None]:
# Prediction
def predict(X, w, b):
    '''Return predicted y with the input X'''
    return forward(X,w,b)

In [None]:
# Calculate y_hat from X with the parameters w, b that you have trained
# Print out the MSE between y_hat and y
y_hat = predict(X, w, b)
print(mse(y_hat, y))

In [None]:
# Plot the data with your model
plt.scatter(X, y, alpha=0.5) # the data
plt.plot(X, y_hat, c='r') # the prediction
plt.show()

In [None]:
# Print out the value of w and b
print('Coef:', w)
print('Intercept:', b)

In [None]:
# Output from sklearn model. Try to match your MSE with sklearn's
# Coef: [[0.04753664]]
# Intercept: [7.03259355]
# MSE: 10.512652915656757

## Multiple Linear Regression

Now let's move on to use all of the features to predict `Sales`

In [None]:
X = df[['TV', 'Radio', 'Newspaper']].values
y = df[['Sales']].values

In [None]:
# Standardization
x_mean = np.mean(X, axis = 0)
x_std = np.std(X, axis = 0)
X_scaled = (X - x_mean)/x_std

In [None]:
# Train the model
learning_rate = 1e-2
iterations = 1000

w, b = train(X_scaled, y, iterations=iterations, learning_rate=learning_rate)

In [None]:
# Print out w and b
print('Coef:', w) # only for X_scaled, not X
print('Intercept:', b)

In [None]:
# Print out the mse of the model after training
mse(predict(X_scaled, w, b), y)

In [None]:
# Compare with the model from sklearn
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

print(f'MSE: {mean_squared_error(y, model.predict(X))}')

# Standardization

In [None]:
df.describe()

Visualization of loss landscape: standardized vs unstandardized


![](https://i.imgur.com/4Vdlq5l.png)