In [1]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, BayesianRidge
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import (
    mean_squared_error, r2_score, mean_absolute_error,
    explained_variance_score
)
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Load Dataset
california = fetch_california_housing()
data = pd.DataFrame(california.data, columns=california.feature_names)
data['target'] = california.target

# Select Features and Target Variable
X = data.drop('target', axis=1)
y = data['target']

# Split Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Performance Metrics Dictionary
performance_metrics = {}

# Define Models
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(alpha=1.0),
    'Lasso Regression': Lasso(alpha=1.0),
    'Polynomial Regression': make_pipeline(PolynomialFeatures(degree=2), LinearRegression()),
    'Support Vector Regression (SVR)': make_pipeline(StandardScaler(), SVR(kernel='linear')),
    'Decision Tree Regression': DecisionTreeRegressor(),
    'Random Forest Regression': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting Regression': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=1, random_state=42),
    'Bayesian Ridge Regression': BayesianRidge()
}

# Train, Predict, and Collect Metrics for Each Model
for model_name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    performance_metrics[model_name] = {
        'MSE': mean_squared_error(y_test, predictions),
        'R2': r2_score(y_test, predictions),
        'MAE': mean_absolute_error(y_test, predictions),
        'Explained Variance': explained_variance_score(y_test, predictions)
    }

# Display Performance Metrics
for model_name, metrics in performance_metrics.items():
    print(f"{model_name} Performance Metrics:")
    for metric_name, metric_value in metrics.items():
        print(f"{metric_name}: {metric_value:.4f}")
    print("\n")


Linear Regression Performance Metrics:
MSE: 0.5559
R2: 0.5758
MAE: 0.5332
Explained Variance: 0.5758


Ridge Regression Performance Metrics:
MSE: 0.5558
R2: 0.5759
MAE: 0.5332
Explained Variance: 0.5759


Lasso Regression Performance Metrics:
MSE: 0.9380
R2: 0.2842
MAE: 0.7616
Explained Variance: 0.2842


Polynomial Regression Performance Metrics:
MSE: 0.4643
R2: 0.6457
MAE: 0.4670
Explained Variance: 0.6457


Support Vector Regression (SVR) Performance Metrics:
MSE: 0.5793
R2: 0.5579
MAE: 0.5120
Explained Variance: 0.5698


Decision Tree Regression Performance Metrics:
MSE: 0.4979
R2: 0.6201
MAE: 0.4552
Explained Variance: 0.6203


Random Forest Regression Performance Metrics:
MSE: 0.2554
R2: 0.8051
MAE: 0.3275
Explained Variance: 0.8052


Gradient Boosting Regression Performance Metrics:
MSE: 0.4893
R2: 0.6266
MAE: 0.5149
Explained Variance: 0.6266


Bayesian Ridge Regression Performance Metrics:
MSE: 0.5556
R2: 0.5760
MAE: 0.5332
Explained Variance: 0.5760


