# Gradient Descent

In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

## Obtaining Data

In [8]:
X, y = load_diabetes(return_X_y=True)

In [11]:
X_, X_test, y_, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
X_train, X_val, y_train, y_val = train_test_split(X_, y_, test_size=0.2, random_state=42)

## Using sklearn LinearRegression

let's first train a model using sklearn's LinearRegression. We can use this to verify results obtained by our own implementation.

In [13]:
sk_lr = LinearRegression()

In [16]:
sk_lr.fit(X_train, y_train)

In [19]:
sk_lr.intercept_

147.8363360547508

In [20]:
sk_lr.coef_

array([   7.0366653 , -246.78306312,  547.24888098,  324.79496319,
       -921.9697261 ,  472.19859546,  172.52537595,  292.87021204,
        764.64280056,   83.23467618])

In [53]:
sk_lr.score(X_train, y_train)

0.5305594711173106

In [54]:
sk_lr.score(X_test, y_test)

0.45058896473344867

# Batch Gradient Descent

In [57]:
class BGD:

    def __init__(self, lr=0.01, epochs=200):
        self.coef = None
        self.intercept = None
        self.lr = lr
        self.epochs = epochs

    def fit(self, X_train, X_test):

        m, n = X_train.shape

        # initialize coefficients and intercept
        self.coef = np.ones(n)
        self.intercept = 1

        for _ in range(self.epochs):

            # predictions using old parameters
            y_hat = (X_train @ self.coef) + self.intercept
            errors = y_train - y_hat

            # calculating gradients
            intercept_gradient = -2 * np.mean(errors)
            coef_gradient = (-2/m)*(X_train.T @ errors)

            # updating model parameters
            self.intercept = self.intercept - self.lr * intercept_gradient
            self.coef = self.coef - self.lr * coef_gradient

    @property
    def intercept_(self):
        return self.intercept

    @property
    def coef_(self):
        return self.coef

    def predict(self, X_train):
        return (X_train @ self.coef) + self.intercept

    def score(self, features, target):
        predictions = self.predict(features)
        r2 = r2_score(target, predictions)
        return r2

In [58]:
lrs = [0.5, 0.2, 0.1, 0.01, 0.002, 0.0002, 0.0001]
epochs = [200, 500, 1000, 2000]

train_results = []
val_results = []

for l in lrs:
    for e in epochs:
        cbgd = BGD(l, e)
        cbgd.fit(X_train, y_train)
        train_score = cbgd.score(X_train, y_train)
        val_score = cbgd.score(X_val, y_val)
        train_results.append({"learning_rate": l, "epochs": e, "score": train_score})
        val_results.append({"learning_rate": l, "epochs": e, "score": val_score})

In [63]:
pd.DataFrame(val_results).sort_values(by="score", ascending=False).head(1)

Unnamed: 0,learning_rate,epochs,score
7,0.2,2000,0.465556


In [64]:
final_model = BGD(0.2, 2000)
final_model.fit(X_train, y_train)

In [65]:
final_model.score(X_train, y_train)

0.5211432906839515

In [66]:
final_model.score(X_test, y_test)

0.4546932266369046

In [67]:
final_model.intercept_

147.84987372837907

In [68]:
final_model.coef_

array([  29.59572518, -195.00789504,  495.90803809,  297.7485222 ,
        -68.54531593, -135.56124884, -214.91064168,  148.86086722,
        382.71839551,  151.16594694])