In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_california_housing


In [None]:
# Load California Housing dataset
data = fetch_california_housing()
X = data.data
y = data.target


In [None]:
# Apply Sturge's Rule to calculate the number of bins (k)
n = len(y)
k = int(np.ceil(np.log2(n) + 1))

# Stratified split (we need to discretize the target variable 'y' into bins)
bins = np.linspace(np.min(y), np.max(y), k)
y_binned = np.digitize(y, bins)

# Perform the stratified split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y_binned, test_size=0.2, random_state=42)


In [None]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
# Train Ridge Regression Model
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)

# Train Lasso Regression Model
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)


In [None]:
# Predict with Ridge Model
ridge_pred = ridge_model.predict(X_test)

# Predict with Lasso Model
lasso_pred = lasso_model.predict(X_test)

# Calculate MAE, MSE, and RMSE for Ridge Model
ridge_mae = mean_absolute_error(y_test, ridge_pred)
ridge_mse = mean_squared_error(y_test, ridge_pred)
ridge_rmse = np.sqrt(ridge_mse)

# Calculate MAE, MSE, and RMSE for Lasso Model
lasso_mae = mean_absolute_error(y_test, lasso_pred)
lasso_mse = mean_squared_error(y_test, lasso_pred)
lasso_rmse = np.sqrt(lasso_mse)

# Print the results
print("Ridge Regression Results:")
print(f"MAE: {ridge_mae:.2f}")
print(f"MSE: {ridge_mse:.2f}")
print(f"RMSE: {ridge_rmse:.2f}\n")

print("Lasso Regression Results:")
print(f"MAE: {lasso_mae:.2f}")
print(f"MSE: {lasso_mse:.2f}")
print(f"RMSE: {lasso_rmse:.2f}")


Ridge Regression Results:
MAE: 0.53
MSE: 0.54
RMSE: 0.73

Lasso Regression Results:
MAE: 0.62
MSE: 0.69
RMSE: 0.83


Ridge Regression Results:
MAE: 0.53 → The average absolute error is relatively low, meaning that, on average, predictions deviate by about 0.53 units.
MSE: 0.54 → The mean squared error is slightly higher, indicating the presence of some larger errors, but they are not drastically significant.
RMSE: 0.73 → The root mean squared error suggests that predictions typically deviate by 0.73 units, which is slightly larger than the MAE due to the squared nature of MSE.
Lasso Regression Results:
MAE: 0.62 → Lasso’s mean absolute error is higher than Ridge’s, suggesting slightly less accurate predictions.
MSE: 0.69 → Lasso exhibits a higher MSE, indicating that its errors tend to be larger on average compared to Ridge.
RMSE: 0.83 → The RMSE for Lasso is also greater, reinforcing that its errors are more pronounced.
Key Observations:
Ridge Regression appears to outperform Lasso Regression across all three metrics (MAE, MSE, RMSE). This suggests that Ridge is capturing the underlying trends in the data more effectively while maintaining generalizability.
Lasso Regression, although useful for feature selection by driving some coefficients to zero, may not be as effective here, possibly because the dataset lacks excessive irrelevant features that would benefit from Lasso’s regularization.
Potential Next Steps:
Hyperparameter Optimization: Experimenting with different values of the alpha parameter for both Ridge and Lasso may further enhance their performance.
Alternative Models: Since Ridge is performing better, exploring ElasticNet (a combination of Ridge and Lasso) might be beneficial. Additionally, trying out tree-based models like Random Forest or XGBoost could provide further improvements.
Cross-Validation: Implementing k-fold cross-validation can help ensure that the model’s performance is robust and not dependent on a specific train-test split.