In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.datasets import fetch_california_housing

cali = fetch_california_housing()
data, labels, colNames = cali.data, cali.target, cali.feature_names

xtrain, xtest, ytrain, ytest = train_test_split(data, labels, test_size=0.2, random_state=42)

In [2]:
def predict(X, ls, b):
    return np.dot(X, ls) + b

def ridgeRegression(X, y, penalty, learningRate, iterations):
    b = 0
    weights = np.zeros(X.shape[1])
    
    for _ in range(iterations):
        pred = predict(X, weights, b)
        if _ % 1000 == 0:
            print(f"interation {_} mse: {np.sum((y-pred) ** 2) / X.shape[0]}")
            
        error = -2 * (y - pred) #mse as loss function
        grad = (np.dot(X.T, error) + (2 * penalty * weights)) / X.shape[0]
        
        weights -= learningRate * grad
        b -= learningRate * (np.sum(error) / X.shape[0])

    return weights, b

weights, b = ridgeRegression(xtrain, ytrain, 1e-5, 1e-7, 10000)

pred = predict(xtest, weights, b)
mse = mean_squared_error(pred, ytest)
print("test mse:", mse)

interation 0 mse: 5.629742323103131
interation 1000 mse: 1.4520539019883156
interation 2000 mse: 1.3331786379288588
interation 3000 mse: 1.323227201815024
interation 4000 mse: 1.3212815997506338
interation 5000 mse: 1.3199348022814423
interation 6000 mse: 1.3186427122142361
interation 7000 mse: 1.3173649376752556
interation 8000 mse: 1.3160980778898792
interation 9000 mse: 1.3148414691403578
test mse: 1.2872872651952245


In [3]:
libModel = Ridge(alpha=0.1)
libModel.fit(xtrain, ytrain)
pred = libModel.predict(xtest)
mse = mean_squared_error(pred, ytest)
print("test mse:", mse)

test mse: 0.5558827543113786
