# <center>`Gradient Descent Exercise`</center>

In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import math

import warnings
warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv("advertising.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,TV,radio,newspaper,sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9


In [4]:
X = df[['TV', 'radio', 'newspaper']]
y = df['sales']

#### Standardization

Standardized coefficients allow researchers to compare the relative magnitude of the effects of different explanatory variables in the path model by adjusting the standard deviations such that all the variables, despite different units of measurement, have equal standard deviations.    


Standardizing the data in linear regression is important because it allows for fair comparison of the coefficients, avoids skewed influence of features with different scales, and helps in interpreting the relative importance of different features.

In [5]:
y = np.array((y - y.mean()) / y.std())
X = X.apply(lambda row: (row - row.mean()) / row.std(), axis=0)

#### Method 1: Initialize the values of parameter

In [7]:
def initialize(dimension):
    np.random.seed(42)
    random.seed(42)
    
    # Beta 0
    b = random.random()
    
    # required no of coefficients
    w = np.random.rand(dimension)
    
    return b, w

In [11]:
b, w = initialize(3)

print("Bias(intercept) is", b)
print("Weights(coefficients) are", w)

Bias(intercept) is 0.6394267984578837
Weights(coefficients) are [0.37454012 0.95071431 0.73199394]


#### Method 2: Calculate the value of y_pred

In [14]:
def predict(b, w, X):
    
    return b + np.matmul(X, w) # matrix multiplication

In [16]:
b, w = initialize(3)
y_hat = predict(b, w, X)

y_hat

0      3.231496
1      1.707849
2      2.824761
3      2.753090
4      0.924486
         ...   
195   -1.651292
196   -1.521730
197   -0.935899
198    3.633005
199   -0.663044
Length: 200, dtype: float64

#### Method 3: Calculate the value of loss

In [18]:
def calculate_cost(y, y_hat):
    residual = y - y_hat
    
    return np.sum(np.matmul(residual, residual.T)) / len(residual) # returns the value of MSE

In [20]:
b, w = initialize(3)
y_hat = predict(b, w, X)

calculate_cost(y, y_hat)

1.53031001985059

#### Method 4: Updating the parameters

In [22]:
def update_beta(x, y, y_hat, b0, w0, lr): # lr is eta: learning rate
    # gradients,
    db = (np.sum( y_hat - y) * 2) / len(y)
    dw = (np.dot( (y_hat - y), x) * 2) / len(y)
    
    b1 = b0 - lr*db
    w1 = w0 - lr*dw
    
    return b1, w1

In [23]:
b, w = initialize(3)
print("Initialized Bias(intercept) is", b)
print("Initialized Weights(coefficients) are", w)

y_hat = predict(b, w, X)

b, w = update_beta(X, y, y_hat, b, w, 0.01)
print("After updating, Bias(intercept) is", b)
print("After updating, Weights(coefficients) are", w)

Initialized Bias(intercept) is 0.6394267984578837
Initialized Weights(coefficients) are [0.37454012 0.95071431 0.73199394]
After updating, Bias(intercept) is 0.6266382624887261
After updating, Weights(coefficients) are [0.38079093 0.9376953  0.71484883]
