*first of all we are going to find coefficients and intercept values for checking*

In [22]:
from sklearn.datasets import load_diabetes

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import random

In [23]:
X, y = load_diabetes(return_X_y=True)

In [24]:
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=2)

In [26]:
reg = LinearRegression()
reg.fit(X_train, y_train)

In [27]:
y_pred = reg.predict(X_test)
y_pred

array([154.1213881 , 204.81835118, 124.93755353, 106.08950893,
       258.5348576 , 256.3310074 , 118.75087616, 119.52440696,
       101.50816735, 190.54048661, 141.70656811, 172.51883961,
       174.33861649, 134.80942706, 294.13994537,  94.11798038,
       211.97059795, 156.49579378, 134.21000428, 119.62664644,
       148.87842251, 165.00873409, 151.10021038, 176.04063756,
       133.27769647, 221.29555392, 197.17324941,  96.1577688 ,
        50.26012711, 230.48580317, 242.06073866, 114.11129218,
        67.07532417,  94.52943825, 201.21415375, 167.05136201,
       159.881268  , 192.78746659, 114.49551325, 233.48234551,
       140.82563045, 121.0680409 , 192.27480772, 191.12738845,
       179.16865788, 148.34935601, 163.47414622, 276.81647884,
       100.17926432, 164.10555298, 255.80762189, 136.9466204 ,
       152.37503699, 107.92237882, 194.21924678,  77.34670792,
       118.50482479,  68.38335763, 154.29258529, 162.48840259,
       168.36788326, 156.87790322,  97.14191797, 238.16

In [28]:
r2_score(y_test, y_pred)

0.4399338661568969

In [29]:
print(reg.coef_)
print(reg.intercept_)

[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
151.88331005254167


## Building own class

In [87]:
class miniBatchGD:
    # constructor
    def __init__(self, learning_rate = .01, epochs = 100, batch_size = 10):
        self.lr = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X_train, y_trian):
        # initialize the values
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])

        for i in range(self.epochs):
            for j in range(int(np.ceil(X_train.shape[0] / self.batch_size))): # if not divisible it must handle remaining rows at last
                # randomly selecting rows for a batch of batch_size
                ids = np.random.choice(X_train.shape[0], self.batch_size, replace=False)
                # update values
                # vectorization
                y_hat = np.dot(X_train[ids], self.coef_) + self.intercept_ # predicted value
                intercept_derivative = -2 * np.mean(y_train[ids] - y_hat)
                self.intercept_ = self.intercept_ - (self.lr * intercept_derivative)
    
                coef_derivative = -2 * np.dot((y_train[ids] - y_hat), X_train[ids])
                self.coef_ = self.coef_ - (self.lr * coef_derivative)

        print(self.intercept_)
        print(self.coef_)

    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_
            

In [109]:
mbgd = miniBatchGD(batch_size=int(X_train.shape[0]/25), epochs = 60, learning_rate = .01)

In [111]:
mbgd.fit(X_train, y_train)

154.48573747890765
[  51.00128124  -95.022413    386.49263979  270.58017993    3.20235475
  -51.1752487  -179.9884328   126.18659415  352.12836381  130.45034458]


In [112]:
y_pred2 = mbgd.predict(X_test)
y_pred2

array([156.33696814, 195.02679929, 139.46885713, 110.10162029,
       253.0650006 , 243.91227377, 111.0828582 , 117.24101641,
        95.78031041, 188.156272  , 159.4293336 , 175.50258191,
       185.97218856, 150.07754057, 266.49717087,  92.8685539 ,
       191.91446711, 144.24260113, 140.58396249, 139.52984193,
       138.71020413, 188.01225736, 165.11178181, 179.74754293,
       129.42721544, 224.12521381, 199.51784295, 118.42823869,
        62.26403562, 243.35661759, 241.32159282, 120.08434863,
        74.33202427, 108.48467384, 204.01955253, 168.31757093,
       168.5022497 , 198.04578479, 116.98506404, 238.70999207,
       139.35374869, 127.8664604 , 187.71211515, 189.00263021,
       174.39150272, 147.29288591, 175.52662275, 287.09786703,
       117.01516835, 187.19552169, 244.46072185, 135.13193553,
       147.9288423 , 147.00843064, 193.05789937, 111.3363342 ,
       151.93619839,  85.91079604, 161.49694471, 148.55891668,
       166.25632838, 171.34370886, 111.08225706, 213.29

In [115]:
r2_score(y_test, y_pred2)

0.44291327966432104

## using sklearn

In [120]:
from sklearn.linear_model import SGDRegressor

In [122]:
sgd = SGDRegressor(learning_rate='constant', eta0=.2)

In [146]:
batch_size = 25

for i in range(100):
    ids = random.sample(range(X_train.shape[0]), batch_size)
    sgd.partial_fit(X_train[ids], y_train[ids]) # using partial fit to perform gradient descent on given subset only
                        

In [148]:
sgd.coef_

array([   9.69435614, -204.28564092,  523.10816471,  344.72745348,
        -67.24381806, -130.03854137, -200.53483154,   80.54908413,
        527.22204818,   98.68491435])

In [150]:
sgd.intercept_

array([133.04883926])

In [152]:
y_pred3 = sgd.predict(X_test)

In [154]:
r2_score(y_test, y_pred3)

0.37242724424979523