In [1]:
%config IPCompleter.use_jedi = False
%config Completer.evaluation = 'limited'
import warnings
warnings.filterwarnings('ignore') 

In [2]:
import  numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from sklearn.datasets import load_diabetes
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [4]:
X , y = load_diabetes(return_X_y = True) 

In [5]:
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state = 2)

<h3> Testing with sklearn Linear Regression Model

In [6]:
from sklearn.linear_model import LinearRegression
sk_lr = LinearRegression()

In [7]:
sk_lr.fit(X_train , y_train) 

In [8]:
sk_lr_y_pred = sk_lr.predict(X_test)
sk_lr_r2 = r2_score(y_test , sk_lr_y_pred)
sk_lr_r2

0.4399338661568968

In [9]:
sk_lr.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [10]:
sk_lr.intercept_

np.float64(151.88331005254167)

<h2> Let's make our own Batch Gradient Descent

In [19]:
import random

class BGD_Regressor:
    def __init__(self , batch_size = 36 , learning_rate = 0.001 , epochs = 100):
        self.intercept_ = None
        self.coef_ = None 
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epochs = epochs
    
    def fit(self , X_train , y_train):
        # Initialize the coef_ and intercept_
        self.intercept_ = 0 
        n , m = X_train.shape

        self.coef_ = np.ones(m)
        # Based on batch_size , Find the number of batches
        total_batches = int(n / self.batch_size)

        # Do the iteration part
        for epoch in range(self.epochs):
            # for 1 epoch we will update intercept and coef total_batches times
            for j in range(total_batches):
                # take random batch_size number of rows
                indices = random.sample(range(n) , self.batch_size) # a list of random integers indicate index's
                # make the batches
                X_batch = X_train[indices]
                y_batch = y_train[indices]
                
                # Now do prediction using this rows
                y_pred = np.dot(X_batch , self.coef_) + self.intercept_
                # Find the error
                error = y_batch - y_pred

                # update the intercept
                intercept_der = -2 * np.mean(error)
                self.intercept_ = self.intercept_ - (self.learning_rate * intercept_der)

                # update the coef_
                coef_der = -2 * np.dot(error , X_batch)
                self.coef_ = self.coef_ - (self.learning_rate * coef_der)

        print(f"coef_ : {self.coef_}")
        print(f"intercept_ : {self.intercept_}")

        
    def predict(self , X_test):
        return np.dot(X_test , self.coef_) + self.intercept_

In [24]:
my_bgd = BGD_Regressor(batch_size = int(X_train.shape[0] / 10) , learning_rate = 0.01 , epochs = 50)

In [25]:
my_bgd.fit(X_train , y_train)

coef_ : [  54.76934632  -68.54197476  349.57673349  253.10716744   24.30291617
  -30.80825743 -161.3238244   120.60927185  328.3289754   127.38591501]
intercept_ : 151.1981974626405


In [26]:
my_bgd_y_pred = my_bgd.predict(X_test) 

In [27]:
my_bgd_r2_score = r2_score(y_test , my_bgd_y_pred)

my_bgd_r2_score

0.431418465512985