# Mini Batch Gradient Descent for nd data and linear Regression 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression
from sklearn.metrics import r2_score
from sklearn.datasets import load_diabetes
import random

In [2]:
X,y = load_diabetes(return_X_y=True)

In [3]:
X.shape

(442, 10)

In [4]:
Xtrain,Xtest,ytrain,ytest = train_test_split(X,y,test_size=0.2,random_state = 2)

In [5]:
lr = LinearRegression()
lr.fit(Xtrain,ytrain)
print("Coefiecent: ",lr.coef_)
print()
print("Intercept: ",lr.intercept_)

Coefiecent:  [  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]

Intercept:  151.88331005254167


In [6]:
predit = lr.predict(Xtest)

In [7]:
r2s = r2_score(ytest,predit)
r2s

0.4399338661568968

# MY Class for Mini Batch Gradient Descent

#### The Summation is incorporated by the dot product while calculation gradients

In [13]:
class GdRegressor:
    def __init__(self,batch_size = 32, lr=0.5, epochs= 200):  
        self.lr = lr
        self.epochs = epochs
        self.intercept_ = None
        self.coef_ = None
        self.batch_size = batch_size

    def fit(self, X_train, y_train):
        rows = X_train.shape[0]
        cols = Xtrain.shape[1]
        
        self.intercept_ = 0                           # The normal convention is to initiliaze the intercept to 0 and coef to 1
        self.coef_ = np.ones(cols)
        
        batches = int(rows/self.batch_size)
        
        for i in range(self.epochs):
            for j in range(batches):        # There are n updates per epoch where n is the number of rows in the data set
                idx = random.sample(range(0,rows),self.batch_size)
              
              
          
                y_hat = self.intercept_ + np.dot(X_train[idx],self.coef_)
        
                # Update parameters
                der_inter = -2 * np.mean(y_train[idx] - y_hat)
                self.intercept_ = self.intercept_ - (self.lr * der_inter)
                
                der_coef = (-2/rows) * np.dot((y_train[idx] - y_hat),X_train[idx])
                self.coef_ = self.coef_ - (self.lr * der_coef)
            
        # print(self.coef_)
        # print(self.intercept_)
        

    def predict(self, Xtest):
        return np.dot(Xtest,self.coef_) + self.intercept_


In [14]:
gd = GdRegressor()
gd.fit(Xtrain,ytrain)

In [15]:
predit = gd.predict(Xtest)

In [16]:
r2s = r2_score(ytest,predit)
r2s

0.35654986438497516

In [17]:
count = 0
all_r2Score = []
for i in np.linspace(0.01, 0.9, 60):
    gd = GdRegressor(lr=i, epochs=300)
    gd.fit(Xtrain, ytrain)
    predictions = gd.predict(Xtest)
    score = r2_score(ytest, predictions)
    all_r2Score.append((float(i), float(score)))


In [18]:
df = pd.DataFrame(all_r2Score)
cols = {0:'Learning Rate',1:'R2_Score'}
df = df.rename(columns = cols)
maxValue = df[df['R2_Score'] == df['R2_Score'].max()]
maxValue

Unnamed: 0,Learning Rate,R2_Score
57,0.869831,0.445786


In [None]:
accuracyScore = [s[1] for s in all_r2Score]
maxAccuracyScore = max(accuracyScore)
index = accuracyScore.index(maxAccuracyScore)
lr_max = all_r2Score[27][0]
print(f"Max accuracy is at learning rate: ",lr_max)
print("Accuracy Score is: ",maxAccuracyScore)
