In [1]:
#importing all required libraries
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

In [2]:
cali_housing = fetch_california_housing(as_frame=True)
sdf = cali_housing.frame
sdf.columns = sdf.columns.str.lower()
scaler = StandardScaler()
df = scaler.fit_transform(sdf)
X = df[:, :-1]
y = df[:, -1:]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

From scratch implementation of Linear Regression

In [41]:
def linReg(X, y):
    alpha = 0.1
    epoch = 100
    m, n = X.shape
    w = np.random.randn(n).reshape(n, 1)
    b = 0

    for i in range(epoch):
        yhat = np.dot(X, w) + b
        yhat = yhat.reshape(m, 1)
        residual = y - yhat
        dw = -(2*np.dot(X.T,residual))/m
        db = -(2*np.sum(residual))/m
        w = w - alpha*dw
        b = b - alpha*db

    return w, b

Comparision with the method implemented by sklearn

In [42]:
w, b = linReg(X_train, y_train)
ypred = np.dot(X_test, w) + b

In [43]:
sklr = LinearRegression()
sklr.fit(X_train, y_train)
skpred = sklr.predict(X_test)

In [44]:
print(mean_squared_error(y_test, ypred))
print(mean_squared_error(y_test, skpred))

0.40854031097865096
0.3977538926680307


From scratch implementation of Ridge (L2) Regression

In [38]:
def RidgeReg(X, y, lam = 1):
    alpha = 0.1
    epoch = 100
    m, n = X.shape
    w = np.random.randn(n).reshape(n, 1)
    b = 0
    
    for i in range(epoch):
        yhat = np.dot(X, w) + b
        yhat = yhat.reshape(m, 1)
        residual = y - yhat
        dw = -((2*np.dot(X.T,residual)) + (2*lam*w))/m
        db = -(2*np.sum(residual))/m
        w = w - alpha*dw
        b = b - alpha*db

    return w, b

Testing with various lambda values

In [53]:
rw, rb = RidgeReg(X_train, y_train, 2)
rypred = np.dot(X_test, rw) + rb
print(mean_squared_error(y_test, rypred))

0.39981169723468496


In [54]:
rw, rb = RidgeReg(X_train, y_train, 4)
rypred = np.dot(X_test, rw) + rb
print(mean_squared_error(y_test, rypred))

0.4009616286459957


From scratch implementation of Lasso (L1) Regression

In [94]:
def LassoReg(X, y , lam = 1):
    alpha = 0.1
    epoch = 100
    m, n = X.shape
    w = np.random.randn(n).reshape(n, 1)
    b = 0
    
    for i in range(epoch):
        yhat = np.dot(X, w) + b
        yhat = yhat.reshape(m, 1)
        residual = y - yhat
        dw = -((2*np.dot(X.T,residual)))/m + lam*np.sign(w)
        db = -(2*np.sum(residual))/m
        w = w - alpha*dw
        b = b - alpha*db

    return w, b

In [120]:
lw, lb = LassoReg(X_train, y_train, 0.1)
lypred = np.dot(X_test, lw) + lb
print(mean_squared_error(y_test, lypred))

0.47011477905019955
