# Implementation of Multiple_Linear_Regression Using Mini_Batch_Gradient_Descent

In [26]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt
import random

### Importing and Splitting of Dataset into Train and Test..

In [2]:
# Loading sklearn's built-in dataset..
X, y = load_diabetes(return_X_y = True)

In [3]:
X   # X contains all features and y contains target variables

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06832974, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286377, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04687948,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452837, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00421986,  0.00306441]])

In [4]:
X.shape, y.shape

((442, 10), (442,))

In [5]:
# Splitting Data into Train and Test..
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 2)

In [6]:
print("Shape of X_train : ", X_train.shape)
print("Shape of y_train : ", y_train.shape)
print("Shape of X_test : ", X_test.shape)
print("Shape of y_test : ", y_test.shape)

Shape of X_train :  (353, 10)
Shape of y_train :  (353,)
Shape of X_test :  (89, 10)
Shape of y_test :  (89,)


### Using SciKit_Learn's SGDRegressor() Class

In [15]:
reg = SGDRegressor(alpha = 0.00001, max_iter = 1000)
reg.fit(X_train, y_train)



SGDRegressor(alpha=1e-05)

In [16]:
y_pred2 = reg.predict(X_test)
y_pred2

array([154.378347  , 184.3284537 , 145.48258911, 116.00481101,
       225.03758466, 219.25083032, 110.71171064, 118.85422789,
        99.17407359, 179.99139313, 165.1084001 , 169.39532914,
       181.64970803, 156.92625794, 227.32986716, 102.70117799,
       176.14218792, 136.99103423, 144.14956049, 143.67543241,
       128.87090507, 192.91179961, 170.80370572, 176.38681122,
       129.48356386, 210.34822852, 186.74931367, 132.77160693,
        78.27992527, 231.5764248 , 223.61463625, 126.02786247,
        87.82911527, 121.26254268, 191.84827899, 165.80408958,
       167.17600141, 192.21572276, 121.0011819 , 220.12792217,
       135.5471233 , 132.45001496, 181.08674706, 181.73147254,
       166.05307487, 148.28801369, 171.78478334, 253.8558005 ,
       126.24170094, 192.29775478, 218.13411798, 128.45688113,
       141.64084787, 159.78500248, 183.88295032, 125.27422664,
       165.25906702,  98.32320259, 157.52089929, 140.40217877,
       162.56276288, 172.86140838, 119.66070402, 190.48

In [17]:
print("Co-efficients or Slopes or Weights : ", reg.coef_)
print("Intercepts : ", reg.intercept_)

Co-efficients or Slopes or Weights :  [  61.7283821   -22.64721544  256.39570945  187.99869374   40.13377159
   11.60954672 -141.22749872  122.93007744  239.21419019  118.01666636]
Intercepts :  [151.95577937]


In [18]:
r2_score(y_test, y_pred2)

0.39423008446649144

## Writing our Own MBGDRegressor() Class

In [31]:
class MBSGDRegressor:
    def __init__(self, batch_size, learning_rate, epochs):
        self.coef_ = None  # weight or slop or coefficient
        self.intercept_ = None  # intercept
        
        self.lr = learning_rate  # Setting the Learning Rate
        self.epochs = epochs  # Setting the number of Epochs or Iterations
        self.b_size = batch_size  # Setting the Batch Size
        
        
    def fit(self, X_train, y_train):
        
        self.intercept_ = 0  # intercept, generally initialised with 0.
        self.coef_ = np.ones(X_train.shape[1])  # weight or slop or coefficient, generally initialised with 1.
        
        # Running for loop for each epochs..
        for i in range(self.epochs): 
            for j in range(int(X_train.shape[0]/self.b_size)):  # no. of Batches = total_size/batch_size..
                
                indx = random.sample(range(X_train.shape[0]), self.b_size)  
                # choosing a random set of row of size = b_size and then we will perform our further operations..
                
                # Determining the value of (y_hat)
                y_hat = np.dot(X_train[indx], self.coef_) + self.intercept_

                # Finding slopes of loss_function using partial differentiation by m and b, i.e. dl/dm & dl/db
                intercept_der = -2 * np.mean(y_train[indx] - y_hat)
                coef_der = -2 * np.dot((y_train[indx] - y_hat), X_train[indx])

                # Updating slope(m) and intercept(b) in each epochs..
                self.intercept_ = self.intercept_ - (self.lr * intercept_der)  # b(new) = b(old) - (learnig_rate * dl/db)
                self.coef_ = self.coef_ - (self.lr * coef_der)  # m(new) = m(old) - (learnig_rate * dl/dm)
          
        print("Co-efficient/Weights : ", self.coef_)
        print("Intercept : ", self.intercept_)
        
    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [32]:
# Building Model with our own MultipleGDRegressor() Class..
sgd = MBSGDRegressor(batch_size = 20, learning_rate = 0.001, epochs = 500)
# sgd = MultipleGDRegressor(learning_rate = 0.8, epochs = 100)

sgd.fit(X_train, y_train)  # Learning from data..

Co-efficient/Weights :  [  56.4510549   -63.71062934  343.89000736  247.19648534   19.68930566
  -27.75523759 -168.72047111  128.21441959  317.44394282  129.42113947]
Intercept :  151.94597363349365


In [33]:
# predicting values from test data.
y_pred = sgd.predict(X_test)
y_pred

array([154.04707377, 190.7866595 , 140.12279332, 109.56388466,
       243.44112263, 234.97498021, 108.05450587, 115.03526863,
        93.73509039, 184.46961107, 160.74079881, 172.36852327,
       183.80399677, 150.8725984 , 252.26147602,  93.50728392,
       184.87632241, 139.40840615, 140.38867948, 139.44190135,
       132.3002348 , 189.6760138 , 166.48327279, 177.99921637,
       127.06813029, 218.67221749, 194.43650536, 121.5634946 ,
        64.11032195, 240.12859034, 234.84252219, 119.82681536,
        75.65682789, 111.03158853, 199.3854656 , 165.82308742,
       167.16909814, 195.71108837, 115.81357552, 232.17922517,
       135.96404931, 127.68185096, 184.63125128, 185.93573032,
       170.14804704, 146.09153304, 173.3682044 , 276.32685751,
       118.36914285, 188.98065461, 235.42995269, 129.90601203,
       143.40811301, 150.48219298, 189.29177127, 114.34242978,
       156.10364436,  87.23951819, 158.54044135, 143.21694179,
       163.94987193, 171.09682968, 111.55323048, 204.21

In [34]:
# Checking score of the Algorithm..
r2_score(y_test, y_pred)

0.43151238679276094

#### Observe that the Accuracy for both own MBSGDRegressor() class and Scikit-Learn's SGDRegressor() class are same..