In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes

In [3]:
X, y = load_diabetes(return_X_y=True)

In [4]:
X.shape, y.shape

((442, 10), (442,))

In [5]:
X, y

(array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990749, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06833155, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286131, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04688253,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452873, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00422151,  0.00306441]]),
 array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
   

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()

lr.fit(X_train, y_train)

y_pred_sk = lr.predict(X_test)

In [8]:
lr.coef_, lr.intercept_

(array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
        -931.48884588,  518.06227698,  163.41998299,  275.31790158,
         736.1988589 ,   48.67065743]),
 np.float64(151.34560453985995))

In [9]:
from sklearn.metrics import r2_score

r2_score_sk = r2_score(y_test, y_pred_sk)

r2_score_sk

0.4526027629719195

In [60]:
class Batch_Gradient_Descent():

    def __init__(self, learning_rate, epochs):
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
    
    def fit(self, X_train, y_train):
        X_train = np.insert(X_train, 0,1,axis=1)
        self.coef_ = np.random.randn(X_train.shape[1]) * 0.01
        self.intercept_ = 0

        for i in range(self.epochs):
            y_hat = np.dot(X_train, self.coef_ ) + self.intercept_
            intercept_slope  = -2 * np.mean(y_train - y_hat)
            self.intercept_ = self.intercept_ + (self.lr * intercept_slope)

            coef_slope = -2 * np.mean((y_train - y_hat)[:, np.newaxis] * X_train, axis=0)
            self.coef_ = self.coef_ + (self.lr * coef_slope)

        print(f"Coef_: {self.coef_}, intercept_: {self.intercept_}")
        print(f"Coef Shape: {self.coef_.shape}")
        print(f"X_train Shape: {X_train.shape}")
        print(f"y_hat: {y_hat}")
        print(f"Interecept_slope: {intercept_slope}, Coef Slope: {coef_slope}")

    def predict(self, X_test):
        X_test = np.insert(X_test, 0,1, axis=1)
        y_pred = self.intercept_ + np.dot(X_test, self.coef_)
        return y_pred
                                      

In [61]:
bgd = Batch_Gradient_Descent(learning_rate=0.000001, epochs=500)

In [62]:
bgd.fit(X_train, y_train)

Coef_: [-0.15294936 -0.0097822   0.00423783 -0.00937663 -0.0110347  -0.0048782
 -0.00442949  0.00567004 -0.00254194  0.0187287  -0.00800669], intercept_: -0.15388916175909145
Coef Shape: (11,)
X_train Shape: (353, 11)
y_hat: [-0.30761093 -0.30608034 -0.30746444 -0.3045759  -0.30406519 -0.3045303
 -0.30698398 -0.30639398 -0.30670304 -0.30482089 -0.30610044 -0.30684889
 -0.30554227 -0.30807562 -0.30379114 -0.30688446 -0.30379394 -0.30617516
 -0.30756157 -0.30605382 -0.30771839 -0.30607047 -0.30603542 -0.30738563
 -0.30731923 -0.30541414 -0.30498896 -0.30562058 -0.30586347 -0.30476511
 -0.30528227 -0.30629247 -0.3054206  -0.30765677 -0.30627223 -0.30732951
 -0.30548813 -0.30639389 -0.3051412  -0.3069399  -0.30389578 -0.30888099
 -0.30561399 -0.30635852 -0.30609404 -0.30924428 -0.30630889 -0.30681709
 -0.30654759 -0.30459243 -0.30861447 -0.30439904 -0.30702001 -0.30746956
 -0.30490887 -0.30748655 -0.30705995 -0.30641063 -0.30604021 -0.30587442
 -0.30826473 -0.30737033 -0.30732332 -0.306924

In [65]:
y_pred = bgd.predict(X_test)

In [64]:
def r2_score_custom(y_true, y_pred):

    mean_value = np.mean(y_true)

    SSE = np.sum((y_true - y_pred) ** 2)
    TSS = np.sum((y_true - mean_value) ** 2)

    r2_score = 1 - (SSE / TSS)
    return r2_score

In [67]:
r2_score_c = r2_score_custom(y_test, y_pred)

r2_score_c

np.float64(-4.027818624331719)

In [68]:
r2_score(y_test, y_pred)

-4.027818624331719