# Implementation of Multiple_Linear_Regression Using Stocastic_Gradient_Descent

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt

### Importing and Splitting of Dataset into Train and Test..

In [2]:
# Loading sklearn's built-in dataset..
X, y = load_diabetes(return_X_y = True)

In [3]:
X   # X contains all features and y contains target variables

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06832974, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286377, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04687948,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452837, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00421986,  0.00306441]])

In [4]:
X.shape, y.shape

((442, 10), (442,))

In [5]:
# Splitting Data into Train and Test..
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 2)

In [6]:
print("Shape of X_train : ", X_train.shape)
print("Shape of y_train : ", y_train.shape)
print("Shape of X_test : ", X_test.shape)
print("Shape of y_test : ", y_test.shape)

Shape of X_train :  (353, 10)
Shape of y_train :  (353,)
Shape of X_test :  (89, 10)
Shape of y_test :  (89,)


### Using SciKit_Learn's LinearRegression() Class

In [60]:
reg = SGDRegressor(alpha = 0.000009, max_iter = 1000)
reg.fit(X_train, y_train)



SGDRegressor(alpha=9e-06)

In [61]:
y_pred2 = reg.predict(X_test)
y_pred2

array([154.41987087, 184.33603572, 145.51848658, 116.05206391,
       225.05687365, 219.28357074, 110.77712452, 118.91940419,
        99.24888249, 180.01530222, 165.10494465, 169.44648512,
       181.67538266, 156.95916533, 227.38056626, 102.74759146,
       176.19563532, 137.05693458, 144.17392289, 143.71695508,
       128.94869785, 192.88537729, 170.8325912 , 176.41029331,
       129.53830811, 210.36340995, 186.76943466, 132.79680233,
        78.36408309, 231.55350865, 223.62889493, 126.08865377,
        87.90032801, 121.30398533, 191.86269897, 165.84839117,
       167.20038062, 192.23574244, 121.06519251, 220.15407908,
       135.5847817 , 132.49716991, 181.10492832, 181.75672922,
       166.10570152, 148.32790616, 171.81817016, 253.86697589,
       126.29017591, 192.30340745, 218.13911614, 128.53165885,
       141.7048827 , 159.80591499, 183.9133755 , 125.31828059,
       165.27454011,  98.3709852 , 157.56172798, 140.46784136,
       162.6103585 , 172.8897143 , 119.7134163 , 190.53

In [62]:
print("Co-efficients or Slopes or Weights : ", reg.coef_)
print("Intercepts : ", reg.intercept_)

Co-efficients or Slopes or Weights :  [  61.78687085  -22.89291366  256.3536611   188.00180035   39.97660423
   11.55180823 -141.3489749   122.81735849  239.24113022  117.83769874]
Intercepts :  [151.99967414]


In [63]:
r2_score(y_test, y_pred2)

0.3942845312845865

## Writing our Own SGDRegressor() Class

In [86]:
class MySGDRegressor:
    def __init__(self, learning_rate, epochs):
        self.coef_ = None  # weight or slop or coefficient
        self.intercept_ = None  # intercept
        
        self.lr = learning_rate  # Setting the Learning Rate
        self.epochs = epochs  # Setting the number of Epochs or Iterations
        
    def fit(self, X_train, y_train):
        
        self.intercept_ = 0  # intercept, generally initialised with 0.
        self.coef_ = np.ones(X_train.shape[1])  # weight or slop or coefficient, generally initialised with 1.
        
        # Running for loop for each epochs..
        for i in range(self.epochs): 
            for j in range(X_train.shape[0]):
                
                indx = np.random.randint(0, X_train.shape[0]) # Choosing a row randomly row to perform our operations..
                
                # Determining the value of (y_hat)
                y_hat = np.dot(X_train[indx], self.coef_) + self.intercept_

                # Finding slopes of loss_function using partial differentiation by m and b, i.e. dl/dm & dl/db
                intercept_der = -2 * (y_train[indx] - y_hat)
                coef_der = -2 * np.dot((y_train[indx] - y_hat), X_train[indx])

                # Updating slope(m) and intercept(b) in each epochs..
                self.intercept_ = self.intercept_ - (self.lr * intercept_der)  # b(new) = b(old) - (learnig_rate * dl/db)
                self.coef_ = self.coef_ - (self.lr * coef_der)  # m(new) = m(old) - (learnig_rate * dl/dm)
          
        print("Co-efficient/Weights : ", self.coef_)
        print("Intercept : ", self.intercept_)
        
    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [87]:
# Building Model with our own MultipleGDRegressor() Class..
sgd = MySGDRegressor(learning_rate = 0.001, epochs = 500)
# sgd = MultipleGDRegressor(learning_rate = 0.8, epochs = 100)

sgd.fit(X_train, y_train)  # Learning from data..

Co-efficient/Weights :  [  56.11997851  -67.75725629  352.05785023  248.75088333   16.30853678
  -30.52292269 -173.44314991  129.80487741  321.15119201  127.62818524]
Intercept :  150.0274169347985


In [88]:
# predicting values from test data.
y_pred = sgd.predict(X_test)
y_pred

array([151.6832657 , 189.00222424, 137.84691184, 106.95524394,
       242.50550236, 234.73180161, 105.70069033, 112.91703788,
        91.40296375, 183.01740739, 157.95268853, 170.80330528,
       182.20707886, 148.57123193, 252.03553949,  90.50053695,
       183.76617475, 137.52112861, 137.96415426, 137.26479294,
       131.06177108, 187.28011816, 164.40200033, 175.85897008,
       124.77220511, 217.5030724 , 192.86204028, 118.55595832,
        61.75976114, 238.32232327, 233.92116916, 117.46051162,
        73.20419751, 108.49524517, 197.60751188, 164.23042682,
       165.02088107, 193.95909423, 113.54925416, 230.5288011 ,
       134.25132593, 125.3656494 , 183.19971973, 184.15987254,
       168.80501563, 144.32491939, 171.48387105, 275.83854254,
       115.99580145, 186.67131692, 234.01248606, 128.70173422,
       141.88992165, 148.11307417, 187.49732028, 111.89161705,
       153.39306974,  84.41633582, 156.39418424, 141.94157477,
       161.99826079, 169.21162054, 109.04209733, 203.15

In [89]:
# Checking score of the Algorithm..
r2_score(y_test, y_pred)

0.4332181805766242

#### Observe that the Accuracy for both own MySGDRegressor() class and Scikit-Learn's SGDRegressor() class are same..