In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load California Housing data
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ---------- BASELINE MODEL ----------
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

print("Linear Regression:")
print(f"R²: {r2_score(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}\n")

# ---------- FEATURE SCALING ----------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lr_scaled = LinearRegression()
lr_scaled.fit(X_train_scaled, y_train)
y_pred_scaled = lr_scaled.predict(X_test_scaled)

print("Linear Regression with Scaling:")
print(f"R²: {r2_score(y_test, y_pred_scaled):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_scaled)):.4f}")
print(f"MAE: {mean_absolute_error(y_test, y_pred_scaled):.4f}\n")

# ---------- POLYNOMIAL REGRESSION ----------
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)

lr_poly = LinearRegression()
lr_poly.fit(X_train_poly, y_train)
y_pred_poly = lr_poly.predict(X_test_poly)

print("Polynomial Regression (degree=2):")
print(f"R²: {r2_score(y_test, y_pred_poly):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_poly)):.4f}")
print(f"MAE: {mean_absolute_error(y_test, y_pred_poly):.4f}\n")

# ---------- RIDGE REGRESSION ----------
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
y_pred_ridge = ridge.predict(X_test_scaled)

print("Ridge Regression:")
print(f"R²: {r2_score(y_test, y_pred_ridge):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_ridge)):.4f}")
print(f"MAE: {mean_absolute_error(y_test, y_pred_ridge):.4f}\n")

# ---------- LASSO REGRESSION ----------
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)
y_pred_lasso = lasso.predict(X_test_scaled)

print("Lasso Regression:")
print(f"R²: {r2_score(y_test, y_pred_lasso):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_lasso)):.4f}")
print(f"MAE: {mean_absolute_error(y_test, y_pred_lasso):.4f}")

Linear Regression:
R²: 0.5758
RMSE: 0.7456
MAE: 0.5332

Linear Regression with Scaling:
R²: 0.5758
RMSE: 0.7456
MAE: 0.5332

Polynomial Regression (degree=2):
R²: 0.6457
RMSE: 0.6814
MAE: 0.4670

Ridge Regression:
R²: 0.5758
RMSE: 0.7456
MAE: 0.5332

Lasso Regression:
R²: 0.4814
RMSE: 0.8244
MAE: 0.6222
