In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

## __Using Closed Form Solution__ ##

In [2]:
import numpy as np

class RidgeRegression:
    def __init__(self, alpha=0.1):
        self.alpha = alpha
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X_train, y_train):
        X_train = np.insert(X_train, 0, 1, axis=1)
        I = np.identity(X_train.shape[1])
        I[0][0] = 0
        result = np.linalg.inv(np.dot(X_train.T, X_train) + self.alpha * I).dot(X_train.T).dot(y_train)
        self.intercept_ = result[0]
        self.coef_ = result[1:]

    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

**Result on Dataset 1: _Boston Housing Dataset_**

In [3]:
data = pd.read_csv('BostonHousing.csv')
data = data.iloc[:, 1:]
X = data.iloc[:, :-1]  # All columns except the last one
Y = data.iloc[:, -1]   # Only the last column

# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Do feature scaling of the data
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test) 

In [4]:
# Results using custom model
ridge_custom = RidgeRegression(alpha=0.01)
ridge_custom.fit(X_train_transformed, Y_train)
y_custom_ridge = ridge_custom.predict(X_test_transformed)
mse = mean_squared_error(Y_test, y_custom_ridge)
r2 = r2_score(Y_test, y_custom_ridge )
print("MSE: ", mse)
print("R2: ", r2)

MSE:  24.34875081306716
R2:  0.6679736164731103


In [5]:
# Results using sklearn model
ridge = Ridge(alpha=0.01)
ridge.fit(X_train_transformed, Y_train)
y_sklearn_ridge = ridge.predict(X_test_transformed)
mse = mean_squared_error(Y_test, y_sklearn_ridge)
r2 = r2_score(Y_test, y_sklearn_ridge)
print("MSE: ", mse)
print("R2: ", r2) 

MSE:  24.348750813067145
R2:  0.6679736164731105


**Result on Dataset 2: _Advertising Dataset_**

In [6]:
data = pd.read_csv('Advertising.csv')
data = data.iloc[:, 1:]
X = data.iloc[:, :-1]  # All columns except the last one
Y = data.iloc[:, -1]   # Only the last column

# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Do feature scaling of the data
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test) 

In [7]:
# Results using custom model
ridge_custom = RidgeRegression(alpha=0.01)
ridge_custom.fit(X_train_transformed, Y_train)
y_custom_ridge = ridge_custom.predict(X_test_transformed)
mse = mean_squared_error(Y_test, y_custom_ridge)
r2 = r2_score(Y_test, y_custom_ridge )
print("MSE: ", mse)
print("R2: ", r2)

MSE:  27.50064935992029
R2:  0.11004302889151873


In [8]:
# Results using sklearn model
ridge = Ridge(alpha=0.01)
ridge.fit(X_train_transformed, Y_train)
y_sklearn_ridge = ridge.predict(X_test_transformed)
mse = mean_squared_error(Y_test, y_sklearn_ridge)
r2 = r2_score(Y_test, y_sklearn_ridge)
print("MSE: ", mse)
print("R2: ", r2) 

MSE:  27.50064935992027
R2:  0.11004302889151929


## __Implementation using Stochastic Average Gradient (SAG)__ ##

In [9]:
import numpy as np

class RidgeRegressionSAG:
    def __init__(self, alpha=0.1, lr=0.01, n_iters=1000, tol=1e-4, random_state=None, fit_intercept=True):
        self.alpha = alpha
        self.lr = lr
        self.n_iters = n_iters
        self.tol = tol
        self.random_state = random_state
        self.fit_intercept = fit_intercept
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X, y):
        X = np.array(X, dtype=float)
        y = np.array(y, dtype=float).flatten()

        n_samples, n_features = X.shape

        # Handle intercept by centering
        if self.fit_intercept:
            self.X_mean_ = X.mean(axis=0)
            self.y_mean_ = y.mean()
            X = X - self.X_mean_
            y = y - self.y_mean_
        else:
            self.X_mean_ = np.zeros(n_features)
            self.y_mean_ = 0.0

        # Initialize
        self.coef_ = np.zeros(n_features)
        gradient_memory = np.zeros((n_samples, n_features))
        gradient_sum = np.zeros(n_features)
        prev_coef = np.copy(self.coef_)

        rng = np.random.default_rng(self.random_state)

        for it in range(self.n_iters):
            i = rng.integers(0, n_samples)
            xi = X[i]
            yi = y[i]

            y_pred_i = np.dot(xi, self.coef_)
            error_i = y_pred_i - yi

            grad_i = 2 * xi * error_i + 2 * self.alpha * self.coef_

            # SAG memory update
            gradient_sum -= gradient_memory[i]
            gradient_sum += grad_i
            gradient_memory[i] = grad_i

            avg_grad = gradient_sum / n_samples

            self.coef_ -= self.lr * avg_grad

            coef_change = np.linalg.norm(self.coef_ - prev_coef)
            if coef_change < self.tol:
                print(f"Converged after {it + 1} iterations with change {coef_change:.5f}")
                break

            prev_coef = np.copy(self.coef_)

        # Set final intercept after training
        if self.fit_intercept:
            self.intercept_ = self.y_mean_ - np.dot(self.X_mean_, self.coef_)
        else:
            self.intercept_ = 0.0

    def predict(self, X):
        X = np.array(X, dtype=float)
        return np.dot(X, self.coef_) + self.intercept_


**Result on Dataset 1: _Boston Housing Dataset_**

In [10]:
data = pd.read_csv('BostonHousing.csv')
data = data.iloc[:, 1:]
X = data.iloc[:, :-1]  # All columns except the last one
Y = data.iloc[:, -1]   # Only the last column

# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Do feature scaling of the data
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test) 

In [11]:
# Results using custom model
ridge_custom = RidgeRegressionSAG(alpha=0.01)
ridge_custom.fit(X_train_transformed, Y_train)
y_custom_ridge = ridge_custom.predict(X_test_transformed)
mse = mean_squared_error(Y_test, y_custom_ridge)
r2 = r2_score(Y_test, y_custom_ridge )
print("MSE: ", mse)
print("R2: ", r2)

MSE:  26.95358605724617
R2:  0.6324533537521171


In [12]:
# Results using sklearn model
ridge = Ridge(alpha=0.01, solver='sag')
ridge.fit(X_train_transformed, Y_train)
y_sklearn_ridge = ridge.predict(X_test_transformed)
mse = mean_squared_error(Y_test, y_sklearn_ridge)
r2 = r2_score(Y_test, y_sklearn_ridge)
print("MSE: ", mse)
print("R2: ", r2) 

MSE:  24.34841377722394
R2:  0.6679782123882355


**Result on Dataset 2: _Advertising Dataset_**

In [13]:
data = pd.read_csv('Advertising.csv')
data = data.iloc[:, 1:]
X = data.iloc[:, :-1]  # All columns except the last one
Y = data.iloc[:, -1]   # Only the last column

# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Do feature scaling of the data
scaler = StandardScaler()
X_train_transformed_a = scaler.fit_transform(X_train)
X_test_transformed_a = scaler.transform(X_test) 

In [14]:
# Results using custom model
ridge_custom = RidgeRegressionSAG(alpha=0.01)
ridge_custom.fit(X_train_transformed_a, Y_train)
y_custom_ridge = ridge_custom.predict(X_test_transformed_a)
mse = mean_squared_error(Y_test, y_custom_ridge)
r2 = r2_score(Y_test, y_custom_ridge )
print("MSE: ", mse)
print("R2: ", r2)

MSE:  27.55011995557022
R2:  0.10844209573218355


In [15]:
# Results using sklearn model
ridge = Ridge(alpha=0.01, solver='sag')
ridge.fit(X_train_transformed_a, Y_train)
y_sklearn_ridge = ridge.predict(X_test_transformed_a)
mse = mean_squared_error(Y_test, y_sklearn_ridge)
r2 = r2_score(Y_test, y_sklearn_ridge)
print("MSE: ", mse)
print("R2: ", r2) 

MSE:  27.500630633853376
R2:  0.1100436348915288
