# Importing Libraries

In [78]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge , SGDRegressor
from sklearn.datasets import load_diabetes
from sklearn.metrics import r2_score
import warnings
warnings.filterwarnings('ignore')

# Ridge Regression using scikit learn library

In [11]:
import numpy as np

np.random.seed(42)

# Single feature X
X = np.random.uniform(-3, 3, (200, 1))

# Nonlinear relationship: y = 2xÂ² + 3x + 4 + noise
y = 2 * (X**2) + 3 * X + 4 + np.random.normal(0, 1.5, (200, 1))

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [13]:
ridge = Ridge(alpha=0.1)
ridge.fit(X_train , y_train)
y_pred = ridge.predict(X_test)

In [14]:
r2_score(y_test , y_pred)

0.5161598519692874

In [36]:
ridge.coef_

array([2.88686792])

In [37]:
ridge.intercept_

array([10.28823055])

# Closed form solution
# Ridge Rregression from scratch (Ordinary least Square method)

In [42]:
class MyRidge:
  def __init__(self , alpha):
    self.alpha = alpha
    self.coef_ = None
    self.intercept_ = None

  def fit(self , X_train , y_train):
    num = np.sum((y_train - y_train.mean()) * (X_train - X_train.mean()))
    deno = np.sum((X_train - X_train.mean()) ** 2) + self.alpha
    self.coef_ = num / deno
    self.intercept_ = y_train.mean() - self.coef_ * X_train.mean()

  def predict(self , X_test):
    return self.coef_ * X_test + self.intercept_

In [43]:
my_ridge = MyRidge(alpha=0.1)
my_ridge.fit(X_train , y_train)
y_pred = my_ridge.predict(X_test)

In [44]:
my_ridge.coef_

np.float64(2.8868679165270144)

In [46]:
my_ridge.intercept_

np.float64(10.28823055203199)

In [47]:
r2_score(y_test , y_pred)

0.5161598519692874

# Ridge Regression for multiple rows

In [48]:
data = load_diabetes()
X = data.data
y = data.target

X_train , X_test , y_train , y_test = train_test_split(X , y , test_size=0.2 , random_state=2)

In [49]:
ridge = Ridge(alpha=0.1)
ridge.fit(X_train , y_train)
y_pred = ridge.predict(X_test)

In [50]:
ridge.coef_

array([   6.64373321, -172.23747375,  485.52958514,  314.67584612,
        -72.94020918,  -80.58868817, -174.46352722,   83.61470987,
        484.36531622,   73.58509056])

In [51]:
ridge.intercept_

np.float64(151.92546856900984)

In [52]:
r2_score(y_test , y_pred)

0.45199494197195456

In [74]:
class Ridge:
  def __init__(self , alpha):
    self.alpha = alpha
    self.coef_ = None
    self.intercept_ = None

  def fit(self , X_train , y_train):
    X_train = np.insert(X_train , 0 , 1 , axis=1)
    I = np.identity(X_train.shape[1])
    I[0][0] = 0

    result = np.dot(np.linalg.inv(np.dot(X_train.T , X_train) + self.alpha * I) , np.dot(X_train.T , y_train))
    self.intercept_ = result[0]
    self.coef_ = result[1:]

  def predict(self , X_test):
    return np.dot(X_test , self.coef_) + self.intercept_

In [75]:
r = Ridge(alpha=0.1)
r.fit(X_train , y_train)
y_pred = r.predict(X_test)

In [76]:
r.coef_

array([   6.64373321, -172.23747375,  485.52958514,  314.67584612,
        -72.94020918,  -80.58868817, -174.46352722,   83.61470987,
        484.36531622,   73.58509056])

In [68]:
r.intercept_

np.float64(151.92546856900984)

In [77]:
r2_score(y_test , y_pred)

0.45199494197195444

# Ridge Regression using Batch Gradient Descent

In [150]:
# using sgd regressor
sgdr = SGDRegressor(penalty='l2' , alpha=0.0001 , max_iter=1000 , eta0=0.1, learning_rate='constant')
sgdr.fit(X_train , y_train)
y_pred = sgdr.predict(X_test)

In [151]:
sgdr.coef_

array([  18.63495782, -153.26612083,  464.3554668 ,  313.33051195,
        -33.56452813, -104.96244352, -194.26441315,  107.60303836,
        425.31569728,  104.78708902])

In [152]:
sgdr.intercept_

array([141.26103563])

In [153]:
r2_score(y_test , y_pred)

0.4312724164802373

In [156]:
# using sparse_cg
l2 = Ridge(alpha=0.0001 , max_iter=500 , solver='sparse_cg')
l2.fit(X_train , y_train)
y_pred = l2.predict(X_test)

In [157]:
l2.coef_

array([  -9.11611525, -205.32921617,  516.88966862,  340.5494409 ,
       -883.4238665 ,  551.55958783,  148.58705882,  125.35302171,
        856.48594086,   52.46892151])

In [158]:
l2.intercept_

np.float64(151.8835292785134)

In [159]:
r2_score(y_test , y_pred)

0.44004294994064097

In [183]:
class BGD_Ridge:
  def __init__(self , alpha , learning_rate , epochs):
    self.alpha = alpha
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.coef = None
    self.intercept_ = None

  def fit(self , X_train , y_train):
    self.intercept_ = 0
    self.coef_ = np.ones(X_train.shape[1])
    X_train = np.insert(X_train , 0 , 1 , axis=1)
    w = np.insert(self.coef_ , 0 , self.intercept_)

    for i in range(self.epochs):
      w_der = np.dot(np.dot(X_train.T ,  X_train) , w) - np.dot(X_train.T , y_train) + self.alpha * w
      w = w - self.learning_rate * w_der

    self.coef_ = w[1:]
    self.intercept_ = w[0]

  def predict(self , X_test):
    return np.dot(X_test , self.coef_) + self.intercept_

In [200]:
bgd_ridge = BGD_Ridge(alpha=0.001 , learning_rate=0.005 , epochs=500)
bgd_ridge.fit(X_train , y_train)
y_pred = bgd_ridge.predict(X_test)

In [201]:
bgd_ridge.coef_

array([  19.50919039, -162.92602513,  478.95477998,  317.86376108,
        -34.07709121, -108.63608801, -193.66871805,  106.94769192,
        437.10746813,  103.57606041])

In [202]:
bgd_ridge.intercept_

np.float64(152.03121813717044)

In [203]:
r2_score(y_test , y_pred)

0.45395431712097367

# Ridge Regression using Stochastic Gradient Descent

In [276]:
class SGD_Ridge:
  def __init__(self , alpha , learning_rate , epochs):
    self.alpha = alpha
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.coef = None
    self.intercept_ = None

  def fit(self , X_train , y_train):
    self.intercept_ = 0
    self.coef_ = np.ones(X_train.shape[1])
    X_train = np.insert(X_train , 0 , 1 , axis=1)
    w = np.insert(self.coef_ , 0 , self.intercept_)

    for epochs in range(self.epochs):
      for i in range(X_train.shape[0]):
        w_der = ((np.dot(X_train[i] , w)) - y_train[i]) + (self.alpha * w)
        w = w - self.learning_rate * w_der

    self.coef_ = w[1:]
    self.intercept_ = w[0]

  def predict(self , X_test):
    return np.dot(X_test , self.coef_) + self.intercept_

In [289]:
sgd_ridge = SGD_Ridge(alpha=0.001 , learning_rate=0.001 , epochs=50)
sgd_ridge.fit(X_train , y_train)
y_pred = sgd_ridge.predict(X_test)

In [290]:
bgd_ridge.coef_

array([  19.50919039, -162.92602513,  478.95477998,  317.86376108,
        -34.07709121, -108.63608801, -193.66871805,  106.94769192,
        437.10746813,  103.57606041])

In [291]:
bgd_ridge.intercept_

np.float64(152.03121813717044)

In [292]:
r2_score(y_test , y_pred)

0.18252663400247016

# Ridge Regression using Mini Batch Gradient Descent

In [296]:
import math
import random

class MBGD_Ridge:
  def __init__(self , alpha , learning_rate , epochs , batch_size):
    self.alpha = alpha
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.batch_size = batch_size
    self.coef = None
    self.intercept_ = None

  def fit(self , X_train , y_train):
    self.intercept_ = 0
    self.coef_ = np.ones(X_train.shape[1])
    X_train = np.insert(X_train , 0 , 1 , axis=1)
    w = np.insert(self.coef_ , 0 , self.intercept_)

    for epochs in range(self.epochs):
      for i in range(math.ceil(X_train.shape[0] / self.batch_size)):
        idx = random.sample(range(X_train.shape[0]) , self.batch_size)

        w_der = np.dot(np.dot(X_train[idx].T ,  X_train[idx]) , w) - np.dot(X_train[idx].T , y_train[idx]) + self.alpha * w
        w = w - self.learning_rate * w_der

    self.coef_ = w[1:]
    self.intercept_ = w[0]

  def predict(self , X_test):
    return np.dot(X_test , self.coef_) + self.intercept_

In [297]:
mbgd_ridge = MBGD_Ridge(alpha=0.001 , learning_rate=0.005 , epochs=500 , batch_size = 10)
mbgd_ridge.fit(X_train , y_train)
y_pred = mbgd_ridge.predict(X_test)

In [298]:
mbgd_ridge.coef_

array([  22.06265168, -158.28278941,  471.68269377,  312.22816802,
        -31.87958099, -102.84053052, -191.24743924,  106.22275414,
        425.02469391,  101.84747424])

In [299]:
mbgd_ridge.intercept_

np.float64(150.96281821420143)

In [300]:
r2_score(y_test , y_pred)

0.4544331367835197