# Batch Gradient Descent

In [1]:
import numpy as np
from sklearn.datasets import load_diabetes

In [10]:
class GDRegressor:

    def __init__(self, lr, epochs):
        self.intercept_ = None
        self.coef_ = None
        self.lr = lr
        self.epochs = epochs

    def fit(self, X_train, y_train):

        # initializing the coefficients
        self.intercept_ = 0                       # B0 (usual convention is to set it 0)
        self.coef_ = np.ones(X_train.shape[1])    # array of length = number of features (for each B1 - Bm) (usual convention is to set 1)

        # updating the values
        for epoch in range(self.epochs):
            y_pred = self.intercept_ + np.dot(X_train, self.coef_)                       # y_pred = B0 + B1X1 + B2X2 + ... BmXm 

            # updating B0
            gradient_intercept = -2 * np.mean(y_train - y_pred)                          # summation and 1/n is handled by mean
            self.intercept_ = self.intercept_ - (self.lr * gradient_intercept)
            
            # updating B1 - Bm
            gradient_coef = -2 * np.dot(y_train - y_pred, X_train) / X_train.shape[0]    # summation is handled by the dot product 
                                                                                         # this is beacause we are calculating all the coeffs together
                                                                                         # and so took the entire X and not just Xim thus we could do dot product
            self.coef_ = self.coef_ - (self.lr * gradient_coef)

            print(self.coef_, self.intercept_)

        def predict(self, X_test):
            return self.intercept_ + np.dot(self.coef_, X_test)

In [24]:
intercept_ = 0                    
coef_ = np.ones(X_train.shape[1])
lr = 0.01

y_pred = intercept_ + np.dot(X_train, coef_)

print(X_train.shape, y_train.shape, y_pred.shape)

(353, 10) (353,) (353,)


In [27]:
gradient_intercept = -2 * np.mean(y_train - y_pred)  
intercept_ = intercept_ - (lr * gradient_intercept)

print(gradient_intercept, intercept_)

-307.4617401326832 9.223852203980496


In [28]:
gradient_coef = -2 * np.dot(y_train - y_pred, X_train) / X_train.shape[0]
coef_ = coef_ - (lr * gradient_coef)

print(gradient_coef.shape)


(10,)


In [11]:
X, y = load_diabetes(return_X_y=True)
X. shape, y.shape

((442, 10), (442,))

In [19]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
gd = GDRegressor(lr=0.01, epochs=100)
gd.fit(X_train, y_train)

[1.01848336 1.00101279 1.04970628 1.03704219 1.01294848 1.00884116
 0.96889956 1.03261807 1.04468183 1.03505543] 3.074617401326832
[1.03687481 1.00201217 1.09929981 1.07400687 1.02592659 1.01772759
 0.93786467 1.06520645 1.08928227 1.06998875] 6.0877363317546545
[1.05517611 1.00299838 1.14878274 1.11089548 1.03893366 1.02665831
 0.90689412 1.09776561 1.13380284 1.10480232] 9.040586778320295
[1.07338902 1.00397165 1.19815717 1.14770947 1.05196902 1.03563234
 0.8759867  1.13029602 1.17824501 1.13949842] 11.934374127982261
[1.09151525 1.0049322  1.24742517 1.18445023 1.06503203 1.04464874
 0.84514124 1.16279812 1.22261023 1.1740793 ] 14.770279659629061
[1.10955647 1.00588024 1.29658876 1.22111915 1.07812204 1.05370658
 0.81435657 1.19527237 1.26689992 1.20854715] 17.549461026247208
[1.12751433 1.00681598 1.34564991 1.25771756 1.09123842 1.06280494
 0.78363158 1.22771919 1.31111547 1.24290415] 20.273052727445748
[1.14539044 1.00773965 1.39461056 1.29424679 1.10438057 1.07194294
 0.75296516

In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

reg = LinearRegression()
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)

r2_score(y_test, y_pred)

0.4526027629719195

In [9]:
reg.intercept_, reg.coef_

(np.float64(151.34560453985995),
 array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
        -931.48884588,  518.06227698,  163.41998299,  275.31790158,
         736.1988589 ,   48.67065743]))