In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sn
import random
import math

In [None]:
df=pd.read_csv('./Advertising.csv')
df.info()

X would represent TV, Radio and Newspaper while Y would represent our sales. As all these sales might be on different scales, we then normalise our X & Y variables.

In [None]:
X=df[['TV','radio','newspaper']]
Y=df['sales']
Y=np.array((Y-Y.mean())/Y.std())
X=X.apply(lambda rec:(rec-rec.mean())/rec.std(),axis=0)

To implement a gradient descent algorithm we need to follow 4 steps:  
1)Randomly initialize the bias and the weight theta  
2)Calculate predicted value of y that is Y given the bias and the weight  
3)Calculate the cost function from predicted and actual values of Y  
4)Calculate gradient and the weights  

In [None]:
def initialize(dim):
    b=random.random()
    theta=np.random.rand(dim)
    return b,theta
b,theta=initialize(3)
print("Bias: ",b,"Weights: ",theta)

In [None]:
def predict_Y(b,theta,X):
    return b + np.dot(X,theta)
Y_hat=predict_Y(b,theta,X)
Y_hat[0:10]

In [None]:
def get_cost(Y,Y_hat):
    Y_resd=Y-Y_hat
    return np.sum(np.dot(Y_resd.T,Y_resd))/len(Y-Y_resd)
get_cost(Y,Y_hat)

In [None]:
def update_theta(x,y,y_hat,b_0,theta_0,learning_rate):
    db=(np.sum(y_hat-y)*2)/len(y)
    dw=(np.dot((y_hat-y),x)*2)/len(y)
    b_1=b_0-learning_rate*db
    theta_1=theta_0-learning_rate*dw
    return b_1,theta_1
print("After initialization :- Bias: ",b,"theta: ",theta)
Y_hat=predict_Y(b,theta,X)
b,theta=update_theta(X,Y,Y_hat,b,theta,0.001)
print("After first update :- Bias: ",b,"theta: ",theta)
get_cost(Y,Y_hat)

In [None]:
def gradient_descent(X,Y,alpha,num_iterations):
    b,theta=initialize(X.shape[1])
    iter_num=0
    gd_iterations_df=pd.DataFrame(columns=['iteration','cost'])
    result_idx=0
    for each_iter in range(num_iterations):
        Y_hat=predict_Y(b,theta,X)
        this_cost=get_cost(Y,Y_hat)
        prev_b=b
        prev_theta=theta
        b,theta=update_theta(X,Y,Y_hat,prev_b,prev_theta,alpha)
        if(iter_num%10==0):
            gd_iterations_df.loc[result_idx]=[iter_num,this_cost]
        result_idx +=1
        iter_num +=1
    print("Final Estimate of b and theta : ",b,theta)
    return gd_iterations_df,b,theta
gd_iterations_df,b,theta=gradient_descent(X,Y,alpha=0.001,num_iterations=2000)

In [None]:
gd_iterations_df[0:10]

Therefore if we print the cost function for each iteration we can see the decrease in the cost function. 

In [None]:
alpha_values = [0.001, 0.005, 0.01, 0.05, 0.1]
num_iterations = 2000
for alpha in alpha_values:
    alpha_df,b,theta = gradient_descent(X, Y, alpha, num_iterations)
    plt.plot(alpha_df['iteration'], alpha_df['cost'], label=f'alpha = {alpha}')
plt.xlabel('Number of iterations')
plt.ylabel('Cost')
plt.title('Gradient Descent - Cost vs Iterations')
plt.legend()
plt.show()