# Gradient Descent

In [179]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

## Obtaining Data

In [194]:
X, y = load_diabetes(return_X_y=True)

In [195]:
X_, X_test, y_, y_test = train_test_split(X[:,[0]], y, test_size=0.2, random_state=42)

In [196]:
X_train, X_val, y_train, y_val = train_test_split(X_, y_, test_size=0.2, random_state=42)

## Using sklearn LinearRegression

let's first train a model using sklearn's LinearRegression. We can use this to verify results obtained by our own implementation.

In [203]:
sk_lr = LinearRegression()

In [204]:
sk_lr.fit(X_train, y_train)

In [205]:
sk_lr.intercept_

149.7142689293783

In [206]:
sk_lr.coef_

array([318.95931387])

# Batch Gradient Descent

In [209]:
class BGD:

    def __init__(self, lr=0.01, epochs=200):
        self.coef = None
        self.intercept = None
        self.lr = lr
        self.epochs = epochs

    def fit(self, X_train, y_train):

        m, n = X_train.shape

        # initialize coefficients and intercept
        self.coef = np.ones(n)
        self.intercept = 0

        for _ in range(self.epochs):

            # predictions using old parameters
            y_hat = (X_train @ self.coef) + self.intercept
            errors = y_train - y_hat

            # calculating gradients
            intercept_gradient = -2 * np.mean(errors)
            coef_gradient = (-2/m)*(X_train.T @ errors)

            # updating model parameters
            self.intercept = self.intercept - self.lr * intercept_gradient
            self.coef = self.coef - self.lr * coef_gradient

    @property
    def intercept_(self):
        return self.intercept

    @property
    def coef_(self):
        return self.coef

    def predict(self, X_test):
        return (X_test @ self.coef) + self.intercept

    def score(self, features, target):
        predictions = self.predict(features)
        r2 = r2_score(target, predictions)
        return r2

# Stochastic Gradient Descent

In [217]:
class SGD:

    def __init__(self, lr=0.01, epochs=200):
        self.coef = None
        self.intercept = None
        self.lr = lr
        self.epochs = epochs

    def fit(self, X_train, y_train):

        m, n = X_train.shape

        # initialize coefficients and intercept
        self.coef = np.ones(n)
        self.intercept = 0

        for _ in range(self.epochs):
            for _ in range(m):

                # fetch random sample
                idx = np.random.randint(0,m)
                sample = X_train[idx]
                
                # predictions using old parameters
                y_hat = (sample @ self.coef) + self.intercept
                error = y_train[idx] - y_hat
    
                # calculating gradients
                intercept_gradient = -2 * error
                coef_gradient = -2 * sample * error
    
                # updating model parameters
                self.intercept = self.intercept - self.lr * intercept_gradient
                self.coef = self.coef - self.lr * coef_gradient

    @property
    def intercept_(self):
        return self.intercept

    @property
    def coef_(self):
        return self.coef

    def predict(self, X_test):
        return (X_test @ self.coef) + self.intercept

    def score(self, features, target):
        predictions = self.predict(features)
        r2 = r2_score(target, predictions)
        return r2

# Mini-Batch Gradient Descent

In [225]:
class MBGD:

    def __init__(self, lr=0.01, epochs=200, batch_size=32):
        self.coef = None
        self.intercept = None
        self.lr = lr
        self.epochs = epochs
        self.batch_size = batch_size

    def fit(self, X_train, y_train):

        m, n = X_train.shape

        # initialize coefficients and intercept
        self.coef = np.ones(n)
        self.intercept = 0

        for _ in range(self.epochs):
            for _ in range(int(m/self.batch_size)):

                # fetch batch 
                idx = np.random.choice(m,size=self.batch_size,replace=False)
                X_batch = X_train[idx]
                
                # predictions using old parameters
                y_hat = (X_batch @ self.coef) + self.intercept
                errors = y_train[idx] - y_hat
    
                # calculating gradients
                intercept_gradient = -2 * np.mean(errors)
                coef_gradient = (-2/self.batch_size) * (X_batch.T @ errors)
    
                # updating model parameters
                self.intercept = self.intercept - self.lr * intercept_gradient
                self.coef = self.coef - self.lr * coef_gradient

    @property
    def intercept_(self):
        return self.intercept

    @property
    def coef_(self):
        return self.coef

    def predict(self, X_test):
        return (X_test @ self.coef) + self.intercept

    def score(self, features, target):
        predictions = self.predict(features)
        r2 = r2_score(target, predictions)
        return r2