# Implementation of Multiple_Linear_Regression Using Gradient_Descent Algorithm

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt

### Importing and Splitting of Dataset into Train and Test..

In [2]:
# Loading sklearn's built-in dataset..
X, y = load_diabetes(return_X_y = True)

In [3]:
X   # X contains all features and y contains target variables

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06832974, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286377, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04687948,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452837, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00421986,  0.00306441]])

In [4]:
X.shape, y.shape

((442, 10), (442,))

In [5]:
# Splitting Data into Train and Test..
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 2)

In [6]:
print("Shape of X_train : ", X_train.shape)
print("Shape of y_train : ", y_train.shape)
print("Shape of X_test : ", X_test.shape)
print("Shape of y_test : ", y_test.shape)

Shape of X_train :  (353, 10)
Shape of y_train :  (353,)
Shape of X_test :  (89, 10)
Shape of y_test :  (89,)


### Using SciKit_Learn's LinearRegression() Class

In [7]:
reg = LinearRegression()
reg.fit(X_train, y_train)

LinearRegression()

In [8]:
y_pred2 = reg.predict(X_test)
y_pred2

array([154.1235067 , 204.81721599, 124.92988001, 106.09339576,
       258.53035681, 256.32953702, 118.75258786, 119.52147402,
       101.50717468, 190.54137158, 141.70360267, 172.51631204,
       174.34089304, 134.81184017, 294.13950798,  94.11403289,
       211.97052873, 156.49984762, 134.20709632, 119.62534726,
       148.88045343, 165.00378118, 151.09977307, 176.03719872,
       133.27651748, 221.29531227, 197.17482787,  96.15923158,
        50.26531577, 230.48342249, 242.06266394, 114.1153262 ,
        67.0785352 ,  94.53179042, 201.21593262, 167.05306138,
       159.87838519, 192.78601513, 114.49291816, 233.4825497 ,
       140.82309666, 121.06814332, 192.27431013, 191.13157307,
       179.1698153 , 148.35140027, 163.47610288, 276.81203359,
       100.17813072, 164.11265163, 255.81074398, 136.94979051,
       152.37507828, 107.92662528, 194.21661635,  77.35015426,
       118.50951725,  68.38527563, 154.29094022, 162.48905632,
       168.36590928, 156.8764705 ,  97.13958436, 238.17

In [9]:
print("Co-efficients or Slopes or Weights : ", reg.coef_)
print("Intercepts : ", reg.intercept_)

Co-efficients or Slopes or Weights :  [  -9.16088483 -205.46225988  516.68462383  340.62734108 -895.54360867
  561.21453306  153.88478595  126.73431596  861.12139955   52.41982836]
Intercepts :  151.88334520854633


In [10]:
r2_score(y_test, y_pred2)

0.4399387660024645

## Writing our Own MultipleGDRegressor() Class

In [15]:
class MultipleGDRegressor:
    def __init__(self, learning_rate, epochs):
        self.coef_ = None  # weight or slop or coefficient.
        self.intercept_ = None  # intercept.
        
        self.lr = learning_rate  # Setting the Learning Rate
        self.epochs = epochs  # Setting the number of Epochs or Iterations
        
    def fit(self, X_train, y_train):
        
        self.intercept_ = 0  # intercept, generally initialised with 0.
        self.coef_ = np.ones(X_train.shape[1])  # weight or slop or coefficient, initialised with Array of all 1s.
        
        # Running for loop for each epochs..
        for i in range(self.epochs): 
            
            # Determining the value of (y_hat)
            y_hat = np.dot(X_train, self.coef_) + self.intercept_
            
            # Finding slopes of loss_function using partial differentiation by m and b, i.e. dl/dm & dl/db
            intercept_der = -2 * np.mean(y_train - y_hat)
            coef_der = -2 * np.dot((y_train - y_hat), X_train) / X_train.shape[0]
            
            # Updating slope(m) and intercept(b) in each epochs..
            self.intercept_ = self.intercept_ - (self.lr * intercept_der)  # b(new) = b(old) - (learnig_rate * dl/db)
            self.coef_ = self.coef_ - (self.lr * coef_der)  # coef(new) = coef(old) - (learnig_rate * dl/dm)
          
        print("Co-efficient/Weights : ", self.coef_)
        print("Intercept : ", self.intercept_)
        
    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [12]:
# Building Model with our own MultipleGDRegressor() Class..
gd = MultipleGDRegressor(learning_rate = 0.95, epochs = 500)
# sgd = MultipleGDRegressor(learning_rate = 0.8, epochs = 100)

gd.fit(X_train, y_train)  # Learning from data..

Co-efficient/Weights :  [  16.39739527 -169.60488374  486.77833156  321.67061813  -37.3178431
 -113.27936021 -193.98952168  104.804799    445.93768309  100.04030759]
Intercept :  152.02089477912753


In [13]:
# predicting values from test data.
y_pred = gd.predict(X_test)
y_pred

array([152.31047551, 198.60713971, 128.14814454, 104.69653282,
       264.60975104, 251.78982847, 112.53303682, 115.65764375,
        96.14386619, 187.60707007, 145.5424313 , 172.26933121,
       179.17535824, 137.00735816, 290.81751134,  87.23927441,
       201.52085925, 148.76340321, 132.60127033, 129.21830096,
       147.81855769, 172.46068718, 151.51805629, 174.59385815,
       127.67583934, 221.94280112, 199.97961162, 102.15902239,
        54.96314252, 237.84032994, 244.04075778, 113.1058621 ,
        68.18448062,  96.43729876, 204.24721184, 164.08608595,
       161.25194358, 192.12497231, 113.33164409, 238.4688459 ,
       141.23142495, 120.70213832, 187.93787444, 186.52144249,
       174.90101249, 143.31210064, 169.13090131, 298.61993407,
       105.8814832 , 170.25383361, 253.91413206, 142.13613276,
       151.43544048, 123.91404669, 191.52768383,  95.05083384,
       130.07245878,  76.28739126, 158.03148844, 155.83804257,
       163.23480014, 161.35648277, 102.54766406, 226.92

In [14]:
# Checking score of the Algorithm..
r2_score(y_test, y_pred)

0.4537070692803782

#### Observe that the value of r2_score for our class is nearly same as the scikit-learns linearRegression() class  