In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

In [3]:
import numpy as np

class RidgeRegression:
    def __init__(self, alpha=0.1):
        self.alpha = alpha
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X_train, y_train):
        X_train = np.insert(X_train, 0, 1, axis=1)
        I = np.identity(X_train.shape[1])
        I[0][0] = 0
        result = np.linalg.inv(np.dot(X_train.T, X_train) + self.alpha * I).dot(X_train.T).dot(y_train)
        self.intercept_ = result[0]
        self.coef_ = result[1:]

    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

**Results on Dataset 1: _Boston Housing Dataset_**

In [4]:
data = pd.read_csv('Boston.csv')
data = data.iloc[:, 1:]
X = data.iloc[:, :-1]  # All columns except the last one
Y = data.iloc[:, -1]   # Only the last column

# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Do feature scaling of the data
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test) 

In [5]:
# Results using custom model
ridge_custom = RidgeRegression(alpha=0.01)
ridge_custom.fit(X_train_transformed, Y_train)
y_custom_ridge = ridge_custom.predict(X_test_transformed)
mse = mean_squared_error(Y_test, y_custom_ridge)
r2 = r2_score(Y_test, y_custom_ridge )
print("MSE: ", mse)
print("R2: ", r2)

MSE:  24.29133689017155
R2:  0.6687565288011172


In [6]:
# Results using sklearn model
ridge = Ridge(alpha=0.01)
ridge.fit(X_train_transformed, Y_train)
y_sklearn_ridge = ridge.predict(X_test_transformed)
mse = mean_squared_error(Y_test, y_sklearn_ridge)
r2 = r2_score(Y_test, y_sklearn_ridge)
print("MSE: ", mse)
print("R2: ", r2) 

MSE:  24.291336890171532
R2:  0.6687565288011175


**Results on Dataset 2: _Advertising Dataset_**

In [7]:
data = pd.read_csv('Advertising.csv')
data = data.iloc[:, 1:]
X = data.iloc[:, :-1]  # All columns except the last one
Y = data.iloc[:, -1]   # Only the last column

# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Do feature scaling of the data
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test) 

In [8]:
# Results using custom model
ridge_custom = RidgeRegression(alpha=0.01)
ridge_custom.fit(X_train_transformed, Y_train)
y_custom_ridge = ridge_custom.predict(X_test_transformed)
mse = mean_squared_error(Y_test, y_custom_ridge)
r2 = r2_score(Y_test, y_custom_ridge )
print("MSE: ", mse)
print("R2: ", r2)

MSE:  27.50064935992029
R2:  0.11004302889151873


In [9]:
# Results using sklearn model
ridge = Ridge(alpha=0.01)
ridge.fit(X_train_transformed, Y_train)
y_sklearn_ridge = ridge.predict(X_test_transformed)
mse = mean_squared_error(Y_test, y_sklearn_ridge)
r2 = r2_score(Y_test, y_sklearn_ridge)
print("MSE: ", mse)
print("R2: ", r2) 

MSE:  27.50064935992027
R2:  0.11004302889151929
