In this notebook, we applied batch gradient descent from scratch on diabetes dataset.

In [2]:
import numpy as np
import random

from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_diabetes 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [3]:
#return_X_y, If True, returns ``(data, target)``
X,y= load_diabetes(return_X_y=True)   

In [4]:
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [5]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42,test_size=.2)

In [6]:
reg = LinearRegression()

reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)


In [7]:
"""
We will get 10 coefficients and 1 intercept for our diabetes dataset as there are 10 input columns.
"""
print("Intercept is: ",reg.intercept_)
print("Coefficients are: ",reg.coef_)

Intercept is:  151.3456553477407
Coefficients are:  [  37.90031426 -241.96624835  542.42575342  347.70830529 -931.46126093
  518.04405547  163.40353476  275.31003837  736.18909839   48.67112488]


In [8]:
r2 = r2_score(y_test,y_pred)
print(r2)

0.452606602161738


Dot product of vector and matrix performs accordingly by matching the dimensions. Either by treating the vector as a column vector or a row vector, depending on the scenario.

In [45]:
class BatchGDRegressor():
    def __init__(self,learning_rate = 0.01,epochs = 100):
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
        
    def fit(self,X_train,y_train):
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])
        
        #Updating all the coefficients and the intercept.
        for i in range(self.epochs):
            #Updating the intercept.
            y_pred = np.dot(X_train,self.coef_) + self.intercept_
            der_intercept = -2 * np.mean(y_train - y_pred)
            self.intercept_ = self.intercept_ - self.lr * der_intercept
            
            #Updating all the coefficients
            der_coef = -2 * np.dot((y_train - y_pred),X_train) / X_train.shape[0]
            self.coef_ = self.coef_ - self.lr * der_coef
    
    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_
    
    

In [131]:
reg = BatchGDRegressor(.8,250)

In [132]:
reg.fit(X_train,y_train)
print(reg.coef_)
print(reg.intercept_)

[  56.45497537 -105.1126865   384.61219732  262.84764018   -9.52422369
  -48.89006569 -188.36902077  149.70628344  299.47024639  148.55118809]
151.76603368318544


In [133]:
y_pred = reg.predict(X_test)

In [134]:
r2s = r2_score(y_test,y_pred)
print(r2s)

0.4512096265970631
