In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [2]:
from sklearn.datasets import load_diabetes
X,y = load_diabetes(return_X_y=True)

In [3]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=15)

In [39]:
class MiniBatchGD:
    def __init__(self):
        self.B0 = None
        self.B = None

    def fit(self, X_train, y_train,batch_size = 10, epochs = 100, lr = 0.1):
        self.epochs = epochs
        self.lr = lr
        self.batch_size = batch_size
        # making a matrix of 1  for Beta (B)
        # B = [B1, B2, ....Bn ]
        self.B0 = 0
        self.B = np.ones(X_train.shape[1])

        for i in range(self.epochs):
            for j in range(int(X_train.shape[0]/self.batch_size)):
                
                # Picking random sample of values from rows
                idx = np.random.choice(range(X_train.shape[0]),self.batch_size)
                
                # For the B0 (intercept) value
                y_cap = np.dot(X_train[idx], self.B) + self.B0
                der_B0 = -2 * np.mean(y_train[idx] - y_cap)
                self.B0 = self.B0 - self.lr * der_B0

                # For the coef_ values (B)
                der_B = -2 * np.dot((y_train[idx] - y_cap) ,X_train[idx])
                self.B = self.B - self.lr * der_B
            print(self.B0,self.B)
        print("Final B0 :", self.B0)
        print("Final B :" ,self.B)
    def pred(self,X_test):
        return np.dot(X_test, self.B) + self.B0

In [40]:
mbgd = MiniBatchGD()

In [41]:
mbgd.fit(X_train,y_train,10,40,0.05)

138.4796774115637 [  6.10925232  -2.21580026  80.4833258   53.93632972  36.51412097
  24.40333949 -32.629978    44.30903284  74.06566149  35.42448664]
155.73068441319887 [ 22.0492611   -4.29874903 133.15293306  99.97243104  36.69900053
  18.02341442 -61.98276847  62.59911263 122.28385291  66.05651475]
147.75926116095184 [ 27.59750925   1.32691794 185.38682539 134.58938776  36.66159188
  14.30206977 -92.60715943  89.6020086  162.45679094  94.56974302]
153.3359907466553 [  29.64780507   -9.81692415  231.55188733  157.52393673   39.57038217
   11.88820622 -113.11484045  108.65734151  196.86509425  110.07924391]
155.2566152590516 [  29.67359289  -11.87565452  257.6630614   175.004813     33.58434493
    2.96375181 -134.90217098  124.48867861  222.92301354  116.60939197]
159.27734555143422 [  37.09363462  -22.40086886  285.70900331  196.7901541    28.28834046
   -6.6125811  -151.72274819  131.43713947  248.94814875  125.78034174]
151.62656387341056 [  41.213439    -34.79291028  306.92296003

In [42]:
y_pred = mbgd.pred(X_test)

In [43]:
r2_score(y_test,y_pred)

0.49058395962787216

# Improved version

In [54]:
class ImprovedMiniBatchGD:
    def __init__(self):
        self.B0 = None
        self.B = None

    def fit(self, X_train, y_train,batch_size = 10, epochs = 40, lr = 0.05):
        self.epochs = epochs
        self.lr = lr
        self.batch_size = batch_size
        # making a matrix of 1  for Beta (B)
        # B = [B1, B2, ....Bn ]
        self.B0 = 0
        self.B = np.ones(X_train.shape[1])

        # Learning Schedule : varying learning rate
        t0,t1 = 5,50
        def learning_rate(t):
            return t0/(t1+t)
    
        

        for epoch in range(self.epochs):
            for j in range(int(X_train.shape[0]/self.batch_size)):
                lr = learning_rate(i * X_train.shape[0] + j)
                # Picking random sample of values from rows
                idx = np.random.choice(range(X_train.shape[0]),self.batch_size)
                
                # For the B0 (intercept) value
                y_cap = np.dot(X_train[idx], self.B) + self.B0
                der_B0 = -2 * np.mean(y_train[idx] - y_cap)
                self.B0 = self.B0 - lr * der_B0

                # For the coef_ values (B)
                der_B = -2 * np.dot((y_train[idx] - y_cap) ,X_train[idx])/batch_size # taking mean
                self.B = self.B - lr * der_B

                # Track loss
                loss = np.mean((y_train - (np.dot(X_train, self.B) + self.B0))**2)
            print("loss:", loss)
            print(" value B0:" ,self.B0,"value B:",self.B)
        print("Final B0 :", self.B0)
        print("Final B :" ,self.B)
    def pred(self,X_test):
        return np.dot(X_test, self.B) + self.B0

In [67]:
class ImprovedMiniBatchGD:
    def __init__(self):
        self.B0 = None
        self.B = None

    def fit(self, X_train, y_train,batch_size = 10, epochs = 40, lr = 0.05):
        self.epochs = epochs
        self.lr = lr
        self.batch_size = batch_size
        # making a matrix of 1  for Beta (B)
        # B = [B1, B2, ....Bn ]
        self.B0 = 0
        self.B = np.ones(X_train.shape[1])

        # Learning Schedule : varying learning rate
        t0,t1 = 5,100
        def learning_rate(t):
            return t0/(t1+t)
    
        

        for i in range(self.epochs):
            for j in range(int(X_train.shape[0]/self.batch_size)):
                lr = learning_rate(i * X_train.shape[0] + j)
                # Picking random sample of values from rows
                idx = np.random.choice(range(X_train.shape[0]),self.batch_size)
                
                # For the B0 (intercept) value
                y_cap = np.dot(X_train[idx], self.B) + self.B0
                der_B0 = -2 * np.mean(y_train[idx] - y_cap)
                self.B0 = self.B0 - lr * der_B0

                # For the coef_ values (B)
                der_B = -2 * np.dot((y_train[idx] - y_cap) ,X_train[idx])/batch_size # taking mean
                self.B = self.B - lr * der_B

                # Track loss
                loss = np.mean((y_train - (np.dot(X_train, self.B) + self.B0))**2)
            print("loss:", loss)
            print(" value B0:" ,self.B0,"value B:",self.B)
        print("Final B0 :", self.B0)
        print("Final B :" ,self.B)
    def pred(self,X_test):
        return np.dot(X_test, self.B) + self.B0

In [68]:
imbgd = ImprovedMiniBatchGD()

In [69]:
imbgd.fit(X_train,y_train)

loss: 5956.879996735325
 value B0: 153.76263766940974 value B: [ 1.87879139  2.32498301  9.59677074  7.58124758  2.55677419  1.26927942
 -3.85795061  5.56801276  9.16407688  6.46583369]
loss: 5947.3466016328775
 value B0: 156.16459315556034 value B: [ 1.91449484  2.29771242 11.34174475  8.7350779   2.84148836  1.49814858
 -4.79393019  6.46418431 10.41077754  7.20970116]
loss: 5926.198162367441
 value B0: 155.6626504365532 value B: [ 2.26526913  2.42109688 12.31965111  9.57868627  3.10707029  1.70036559
 -5.4366332   7.12893758 11.32420706  7.78829936]
loss: 5903.105523698665
 value B0: 153.5417496504138 value B: [ 2.45677654  2.63969938 12.93535256 10.0683302   3.24413578  1.77345413
 -5.89769835  7.60906886 11.98863661  8.24538073]
loss: 5893.361457539819
 value B0: 152.8028841714092 value B: [ 2.61975335  2.65276162 13.39781548 10.40093472  3.4196417   1.9016522
 -6.13238017  7.91279159 12.41593256  8.57332218]
loss: 5886.24964865675
 value B0: 152.84489381716273 value B: [ 2.7334262

In [70]:
y_pred = imbgd.pred(X_test)

In [71]:
r2_score(y_test,y_pred)

0.050476431727202065