In [193]:
from sklearn.datasets import load_diabetes
import numpy as np
import time
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [194]:
x, y = load_diabetes(return_X_y=True)
x.shape

(442, 10)

In [195]:
y.shape

(442,)

In [196]:
xTrain, xTest, yTrain, yTest = train_test_split(
    x, y, test_size=0.2, random_state=2)

In [197]:
reg = LinearRegression()

In [198]:
reg.fit(xTrain, yTrain)

In [199]:
reg.predict(xTest)

array([154.1213881 , 204.81835118, 124.93755353, 106.08950893,
       258.5348576 , 256.3310074 , 118.75087616, 119.52440696,
       101.50816735, 190.54048661, 141.70656811, 172.51883961,
       174.33861649, 134.80942706, 294.13994537,  94.11798038,
       211.97059795, 156.49579378, 134.21000428, 119.62664644,
       148.87842251, 165.00873409, 151.10021038, 176.04063756,
       133.27769647, 221.29555392, 197.17324941,  96.1577688 ,
        50.26012711, 230.48580317, 242.06073866, 114.11129218,
        67.07532417,  94.52943825, 201.21415375, 167.05136201,
       159.881268  , 192.78746659, 114.49551325, 233.48234551,
       140.82563045, 121.0680409 , 192.27480772, 191.12738845,
       179.16865788, 148.34935601, 163.47414622, 276.81647884,
       100.17926432, 164.10555298, 255.80762189, 136.9466204 ,
       152.37503699, 107.92237882, 194.21924678,  77.34670792,
       118.50482479,  68.38335763, 154.29258529, 162.48840259,
       168.36788326, 156.87790322,  97.14191797, 238.16

In [200]:
r2_score(yTest, reg.predict(xTest))

0.4399338661568968

In [201]:
reg.intercept_

151.88331005254167

In [202]:
reg.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [203]:
class SGDRegressor:
    def __init__(self, lr=0.01, epochs=100) -> None:
        self.coef = None
        self.intercept = 0
        self.lr = lr
        self.epochs = epochs
        # self.t0, self.t1 = 5, 50

    # def learningRate(self, t):
    #     return self.t0/(t + self.t1)

    def fit(self, xTrain, yTrain):
        self.coef = np.ones(xTrain.shape[1])
        loss_slope_i = 25
        for i in range(self.epochs):
            for j in range(xTrain.shape[0]):
                # self.lr = self.learningRate(i * xTrain.shape[0] + j)
                idx = np.random.randint(0, xTrain.shape[0])
                yHat = self.intercept + (xTrain[idx].dot(self.coef))
                loss_slope_i = -2 * (yTrain[idx] - yHat)
                self.intercept = self.intercept - (self.lr * loss_slope_i)

                loss_slope_c = -2 * np.dot((yTrain[idx] - yHat), (xTrain[idx]))
                self.coef = self.coef - (self.lr * loss_slope_c)

        print(self.intercept, self.coef)

    def predict(self, xTest):
        return self.intercept + (xTest.dot(self.coef))

In [204]:
sgdr = SGDRegressor(0.01,100)

In [205]:
start = time.time()
sgdr.fit(xTrain,yTrain)
print("time Taken : ", time.time() - start)

162.2092864060064 [  27.97410255 -149.72053343  463.38852596  304.02240225  -23.03664784
  -91.08335787 -192.92338962  114.23415492  406.2653452   112.88995806]
time Taken :  0.941901683807373


In [206]:
yPred = sgdr.predict(xTest)
yPred

array([163.05442112, 206.9622802 , 141.06579324, 115.51693321,
       270.09487763, 260.14997841, 120.68960365, 125.17787873,
       104.77246723, 197.68678691, 159.33685743, 182.59527502,
       191.52783662, 151.1184353 , 292.69735483,  97.66503599,
       208.05264472, 156.10926867, 144.04732492, 142.0538465 ,
       154.93628355, 187.79165364, 165.14269049, 185.22730047,
       137.31993656, 233.44475259, 210.00404736, 117.09705688,
        66.52549196, 249.67851561, 253.82631476, 124.40480833,
        79.15856163, 109.24518212, 212.97345514, 175.49988444,
       173.05955156, 203.40871379, 123.72851729, 247.99203246,
       150.34428181, 131.81467686, 197.38699474, 196.24951985,
       184.22811163, 153.67688051, 180.95855723, 304.23083177,
       118.47429049, 185.23362277, 260.18258173, 150.27517058,
       160.16872692, 141.49314134, 201.15048336, 109.94010791,
       146.74886521,  88.74973124, 168.96515248, 163.40664015,
       172.80761794, 174.56594291, 114.93877499, 232.01

In [207]:
r2_score(yTest,yPred)

0.4378133467937594

### How To Use Stochastic GD Using Sicket-Learn

In [208]:
from sklearn.linear_model import SGDRegressor

In [209]:
reg = SGDRegressor(max_iter=100, learning_rate="constant", eta0=0.01)

In [210]:
reg.fit(xTrain,yTrain)

In [211]:
yPred = reg.predict(xTest)
yPred

array([151.87706156, 186.19281209, 140.22276796, 109.35466898,
       234.44272717, 227.52014179, 106.02033531, 113.86571499,
        92.7226892 , 180.9473255 , 160.57238013, 169.35376325,
       181.45192004, 151.52248553, 240.45152653,  94.03734992,
       179.32338178, 135.76746242, 139.60170315, 139.06344813,
       128.23982837, 189.71480523, 166.70903498, 175.4413863 ,
       125.33628308, 214.19821894, 189.55916902, 123.84482865,
        66.7974796 , 235.67087066, 229.26291054, 119.84257885,
        77.59598766, 112.73890415, 194.50015197, 164.07265287,
       165.24068369, 192.79593562, 115.24813265, 225.9401516 ,
       133.27724877, 127.15229247, 181.48698387, 182.44807505,
       166.53785779, 144.88868478, 170.95060846, 266.20205602,
       119.19755187, 189.08167608, 226.66963074, 126.81710248,
       140.34108864, 152.85133825, 185.26422698, 116.63496367,
       158.32028146,  88.78499862, 155.97729024, 139.6575228 ,
       161.28471056, 170.24361645, 112.26234116, 196.63

In [212]:
r2_score(yTest,yPred)

0.4189083353598587

# End