In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
import matplotlib.pyplot as plt

In [4]:
import seaborn as sns

In [6]:
from sklearn.datasets import fetch_california_housing

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [8]:
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['Target'] = data.target

In [9]:
print("Missing values:")
print(df.isnull().sum())

Missing values:
MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
Target        0
dtype: int64


In [10]:
scaler = StandardScaler()
X = scaler.fit_transform(df.drop(columns=['Target']))
y = df['Target']


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
results = {}

In [13]:
def evaluate_model(model, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[model_name] = {'MSE': mse, 'MAE': mae, 'R²': r2}
    print(f"{model_name} Results:")
    print(f"MSE: {mse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}\n")

In [14]:
evaluate_model(LinearRegression(), "Linear Regression")
evaluate_model(DecisionTreeRegressor(), "Decision Tree Regressor")
evaluate_model(RandomForestRegressor(), "Random Forest Regressor")
evaluate_model(GradientBoostingRegressor(), "Gradient Boosting Regressor")
evaluate_model(SVR(), "Support Vector Regressor")


Linear Regression Results:
MSE: 0.5559, MAE: 0.5332, R²: 0.5758

Decision Tree Regressor Results:
MSE: 0.5033, MAE: 0.4542, R²: 0.6159

Random Forest Regressor Results:
MSE: 0.2513, MAE: 0.3259, R²: 0.8082

Gradient Boosting Regressor Results:
MSE: 0.2940, MAE: 0.3717, R²: 0.7756

Support Vector Regressor Results:
MSE: 0.3552, MAE: 0.3978, R²: 0.7289



In [15]:
results_df = pd.DataFrame(results).T

In [16]:
print("\nModel Performance Comparison:")
print(results_df)


Model Performance Comparison:
                                  MSE       MAE        R²
Linear Regression            0.555892  0.533200  0.575788
Decision Tree Regressor      0.503320  0.454221  0.615906
Random Forest Regressor      0.251316  0.325926  0.808216
Gradient Boosting Regressor  0.293999  0.371650  0.775643
Support Vector Regressor     0.355198  0.397763  0.728941


In [17]:
best_model = results_df['R²'].idxmax()
worst_model = results_df['R²'].idxmin()
print(f"Best Performing Model: {best_model}")
print(f"Worst Performing Model: {worst_model}")

Best Performing Model: Random Forest Regressor
Worst Performing Model: Linear Regression
