In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.datasets import fetch_california_housing

cali = fetch_california_housing()
data, labels, colNames = cali.data, cali.target, cali.feature_names

xtrain, xtest, ytrain, ytest = train_test_split(data, labels, test_size=0.2, random_state=42)

In [2]:
def predict(data, weights, bias):
    return np.dot(data, weights) + bias

def lassoRegression(data, labels, learningRate, penalty, iterations):
    n = data.shape[1]
    weights = np.zeros(n)
    bias = 0
    
    for _ in range(iterations):
        pred = predict(data, weights, bias)
        loss = mean_squared_error(pred, labels)

        if _%1000 == 0:
            print("mse:", loss)

        error = (-2 * (labels - pred)) / data.shape[0]
        grad = np.dot(data.T, error) + penalty * np.sign(weights)
        weights -= learningRate * grad
        bias -= learningRate * (np.sum(error) / data.shape[0])

    return weights, bias

weights, bias = lassoRegression(xtrain, ytrain, 1e-7, 0.1, 5000)

weights_str = ", ".join([f"{weight:.4f}" for weight in weights])
print(f"coefficients: {weights_str}")

pred = predict(xtest, weights, bias)
print("test mse:", mean_squared_error(pred, ytest))

mse: 5.629742323103131
mse: 1.4525910254790166
mse: 1.3334169106816125
mse: 1.3233783364358205
mse: 1.3214369202608762
coefficients: 0.0020, 0.0049, 0.0011, 0.0001, -0.0000, -0.0001, 0.0040, -0.0149
test mse: 1.293682819465437


In [3]:
model = Lasso(alpha=0.1)
model.fit(xtrain, ytrain)
pred = model.predict(xtest)

weights_str = ", ".join([f"{weight:.4f}" for weight in model.coef_])
print(f"coefficients: {weights_str}")

mse = mean_squared_error(pred, ytest)
print("mse:", mse)

coefficients: 0.3927, 0.0151, -0.0000, 0.0000, 0.0000, -0.0031, -0.1143, -0.0993
mse: 0.6135115198058131
