# Problem 3

Explore different regularization strengths (λ) for both methods and observe the
effect on the coefficients and model performance. Compare the results of Ridge
and Lasso regression to a baseline multiple linear regression model without
regularization in terms of coefficient values and performance metrics.

# solution

In [5]:

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
import numpy as np

# Load the dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize a dictionary to store the results
results = {
    'Linear': {'r2_score': None, 'mse': None},
    'Ridge': {},
    'Lasso': {}
}

# Regularization strengths
lambdas = [0.001, 0.01, 0.1, 1, 10]

# Train and evaluate a baseline Linear Regression model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)
results['Linear']['r2_score'] = r2_score(y_test, y_pred_linear)
results['Linear']['mse'] = mean_squared_error(y_test, y_pred_linear)

# Function to train and evaluate models with regularization
def train_eval_model(Model, lambdas, X_train, y_train, X_test, y_test, model_name):
    for lam in lambdas:
        model = Model(alpha=lam)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        results[model_name][lam] = {
            'r2_score': r2_score(y_test, y_pred),
            'mse': mean_squared_error(y_test, y_pred),
            'coefficients': model.coef_
        }

# Evaluate Ridge and Lasso models with different regularization strengths
train_eval_model(Ridge, lambdas, X_train, y_train, X_test, y_test, 'Ridge')
train_eval_model(Lasso, lambdas, X_train, y_train, X_test, y_test, 'Lasso')

# Display results
for model in ['Linear', 'Ridge', 'Lasso']:
    print(f"Results for {model} Regression:")
    if model == 'Linear':
        print(f"  R2 Score: {results['Linear']['r2_score']}")
        print(f"  MSE: {results['Linear']['mse']}\n")
    else:
        for lam in lambdas:
            print(f"  Lambda: {lam}")
            print(f"    R2 Score: {results[model][lam]['r2_score']}")
            print(f"    MSE: {results[model][lam]['mse']}")
            print(f"    Coefficients: {results[model][lam]['coefficients']}\n")

     

Results for Linear Regression:
  R2 Score: 0.5757877060324524
  MSE: 0.5558915986952422

Results for Ridge Regression:
  Lambda: 0.001
    R2 Score: 0.5757877735520457
    MSE: 0.5558915102169609
    Coefficients: [ 4.48674745e-01  9.72425923e-03 -1.23323033e-01  7.83143175e-01
 -2.02961678e-06 -3.52631803e-03 -4.19792481e-01 -4.33708038e-01]

  Lambda: 0.01
    R2 Score: 0.575788381204484
    MSE: 0.5558907139437502
    Coefficients: [ 4.48673266e-01  9.72427460e-03 -1.23320244e-01  7.83127587e-01
 -2.02958254e-06 -3.52631388e-03 -4.19792433e-01 -4.33707794e-01]

  Lambda: 0.1
    R2 Score: 0.5757944553633948
    MSE: 0.5558827543113781
    Coefficients: [ 4.48658477e-01  9.72442833e-03 -1.23292361e-01  7.82971747e-01
 -2.02924019e-06 -3.52627239e-03 -4.19791946e-01 -4.33705352e-01]

  Lambda: 1
    R2 Score: 0.575854961144014
    MSE: 0.5558034669932193
    Coefficients: [ 4.48510924e-01  9.72596535e-03 -1.23014157e-01  7.81416761e-01
 -2.02581346e-06 -3.52585878e-03 -4.19786908e-01 