In [1]:
from sklearn.datasets import load_diabetes

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import random

In [2]:
X,y = load_diabetes(return_X_y=True)

In [3]:
print(X.shape)
print(y.shape)

(442, 10)
(442,)


#### Using Sklearn LinearRegression class, I am calculating actual coefficient & intercept of the data

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [5]:
reg = LinearRegression()

In [6]:
reg.fit(X_train,y_train)

In [7]:
print(reg.intercept_)
print(reg.coef_)

151.88331005254167
[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]


In [8]:
y_pred = reg.predict(X_test)
y_pred

array([154.1213881 , 204.81835118, 124.93755353, 106.08950893,
       258.5348576 , 256.3310074 , 118.75087616, 119.52440696,
       101.50816735, 190.54048661, 141.70656811, 172.51883961,
       174.33861649, 134.80942706, 294.13994537,  94.11798038,
       211.97059795, 156.49579378, 134.21000428, 119.62664644,
       148.87842251, 165.00873409, 151.10021038, 176.04063756,
       133.27769647, 221.29555392, 197.17324941,  96.1577688 ,
        50.26012711, 230.48580317, 242.06073866, 114.11129218,
        67.07532417,  94.52943825, 201.21415375, 167.05136201,
       159.881268  , 192.78746659, 114.49551325, 233.48234551,
       140.82563045, 121.0680409 , 192.27480772, 191.12738845,
       179.16865788, 148.34935601, 163.47414622, 276.81647884,
       100.17926432, 164.10555298, 255.80762189, 136.9466204 ,
       152.37503699, 107.92237882, 194.21924678,  77.34670792,
       118.50482479,  68.38335763, 154.29258529, 162.48840259,
       168.36788326, 156.87790322,  97.14191797, 238.16

In [9]:
r2_score(y_test,y_pred)

0.4399338661568968

### Creating our Stochastic Gradient Descent Class

In [10]:
class SGDRegressor:

  def __init__(self, learning_rate=0.01, epoch=100):
    self.lr = learning_rate
    self.epoch = epoch
    self.coef_ = None
    self.intercept_ = None


  def fit(self,X_train,y_train):
    # initialize coefficient
    self.intercept_ = 0
    # columns coefficient initalize with 1
    self.coef_ = np.ones(X_train.shape[1])

    for i in range(self.epoch):
      # update all the coef and the intercept for every row

      for i in range(X_train.shape[0]):
        # generating a random value between 0 & number of rows
        idx = np.random.randint(0,X_train.shape[0])

        # prediction of idx row
        y_hat = np.dot(X_train[idx],self.coef_) + self.intercept_ 

        # updating intercept
        intercept_slope = -2 * (y_train[idx] - y_hat)
        self.intercept_ = self.intercept_ - (self.lr * intercept_slope)

        # updating coef 
        coef_slope = -2 * np.dot((y_train[idx] - y_hat),X_train[idx])
        self.coef_ = self.coef_ - (self.lr * coef_slope)

    print(self.intercept_, self.coef_)


  def predict(self, X_test):
    return np.dot(X_test,self.coef_) + self.intercept_

In [11]:
sgdr = SGDRegressor(learning_rate=0.01, epoch=52)

In [12]:
sgdr.fit(X_train,y_train)

151.68647283242626 [  66.50822248  -72.39335616  360.40955181  254.30407609   18.72985156
  -31.09619949 -168.52601665  126.19444153  325.51170251  129.12815383]


In [13]:
y_pred = sgdr.predict(X_test)
y_pred

array([155.09173568, 191.01399998, 139.36166491, 108.23905744,
       245.19621289, 236.74675427, 107.17067247, 113.5897496 ,
        93.64876859, 184.37314342, 159.84929494, 172.15675361,
       183.52718695, 150.75927911, 255.74011218,  93.10489222,
       185.44208644, 138.41433984, 138.94810994, 138.74067295,
       133.20078931, 188.93741002, 165.04912263, 178.37159829,
       127.24225306, 218.801967  , 195.42889675, 120.62419923,
        62.60627678, 240.31815652, 235.86411814, 118.68792769,
        72.96390579, 109.35433292, 199.27122288, 165.46734965,
       167.39725323, 195.87798622, 115.45500868, 233.4751912 ,
       135.36003495, 127.06765668, 183.93232286, 184.92452541,
       171.18726511, 144.30390233, 173.74157496, 278.14510772,
       116.90089511, 188.92825663, 236.80145353, 129.87285075,
       142.98499577, 149.18203283, 190.18518243, 112.69401969,
       154.76917676,  85.51119647, 158.45280606, 143.03700611,
       164.02674175, 171.33164259, 111.41082563, 205.94

In [14]:
r2_score(y_test,y_pred)

0.43379059181664104

#### Apply sklearn SGDRegressor class

In [15]:
from sklearn.linear_model import SGDRegressor

In [16]:
reg = SGDRegressor(max_iter=100,learning_rate='constant',eta0=0.01)

In [17]:
reg.fit(X_train,y_train)



In [18]:
y_pred = reg.predict(X_test)

In [19]:
r2_score(y_test,y_pred)

0.43261473961644925