In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import Ridge

In [2]:
x,y = load_diabetes(return_X_y=True)

In [3]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 2)

### At first we will use SGDRegressor from the sklearn


In [4]:
reg1 = SGDRegressor(penalty = 'l2', alpha = 0.001, max_iter = 500, eta0=0.1, learning_rate='constant')

In [5]:
reg1.fit(x_train, y_train)
pred1 = reg1.predict(x_test)
print("R2 score : ", r2_score(y_test, pred1))
print(reg1.coef_)
print(reg1.intercept_)

R2 score :  0.4469406471517511
[  33.32573848 -110.59272726  386.62576311  266.27624593  -10.12925457
  -67.8967586  -167.9819519   105.80298493  354.34637264  102.64998306]
[153.193574]


### Second we will use Ridge from the sklearn by adjusting its parameter for gradient descent

In [6]:
reg2 = Ridge(solver='saga', max_iter = 500, alpha = 0.001)

Take a look on the results of Ridge which somewhat differ from the results of SGDRegressor

In [7]:
reg2.fit(x_train, y_train)
pred2 = reg2.predict(x_test)
print("R2 score : ", r2_score(y_test, pred2))
print(reg2.coef_)
print(reg2.intercept_)


R2 score :  0.4408754071205436
[  -8.75651924 -204.2917832   518.42584893  339.95926418 -785.48863324
  473.52797274  105.81340424  114.34641699  818.9180828    52.87710294]
151.88536365993718


### Finally we will create our own class for the implementation of Ridge regression using Gradient Descent

In [12]:
class my_ridge:
    def __init__(self, alpha, max_iter, learning_rate):
        self.alpha = alpha
        self.max_iter = max_iter
        self.learning_rate = learning_rate
        self.intercept = None
        self.coeff = None

    def fit(self, x_train, y_train):
        # Initialize coefficients and intercept
        self.coeff = np.ones(x_train.shape[1])
        self.intercept = 0

        # Combine intercept and coefficients into a single weight vector
        w = np.insert(self.coeff, 0, self.intercept)

        # Add a bias column to x_train
        x_train = np.insert(x_train, 0, 1, axis=1)

        # Gradient descent loop
        for i in range(self.max_iter):
            predictions = np.dot(x_train, w)
            errors = predictions - y_train
            # Compute the gradient
            derivative = (2 / x_train.shape[0]) * (np.dot(x_train.T, errors) + self.alpha * np.insert(w[1:], 0, 0))
            # Update weights
            w -= self.learning_rate * derivative

        # Update intercept and coefficients
        self.intercept = w[0]
        self.coeff = w[1:]

    def predict(self, x_test):
        # Predict using the learned coefficients
        return np.dot(x_test, self.coeff) + self.intercept

In [15]:
reg3 = my_ridge(alpha = 0.001, max_iter = 500 , learning_rate = 0.005)

### Take a look at the difference between the results of prebuilt function and the results form the funciton that we created

In [16]:
reg3.fit(x_train, y_train)
pred3 = reg2.predict(x_test)
print("R2_socre : ", r2_score(y_test, pred3))
print(reg3.coeff)
print(reg3.intercept)

R2_socre :  0.4408753177579804
[ 5.04690153  1.68893578 11.41410258  9.26005258  4.70974156  3.75551296
 -5.54854704  7.87508898 11.54627214  7.36571964]
149.5404868004467
