In [44]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
import numpy as np
import seaborn as sns

data = fetch_california_housing()
X = data.data
y = data.target

X_df = pd.DataFrame(X, columns=data.feature_names)
y_df = pd.DataFrame(y, columns=['MedHouseVal'])

X_df.head(), y_df.head()

(   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
 0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
 1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
 2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
 3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
 4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   
 
    Longitude  
 0    -122.23  
 1    -122.22  
 2    -122.24  
 3    -122.25  
 4    -122.25  ,
    MedHouseVal
 0        4.526
 1        3.585
 2        3.521
 3        3.413
 4        3.422)

In [40]:
print(X_df.isnull().sum())
print(y_df.isnull().sum())

MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
dtype: int64
MedHouseVal    0
dtype: int64


In [46]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder 

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [48]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

In [52]:
from sklearn.tree import DecisionTreeRegressor

model = DecisionTreeRegressor()
model.fit(X_train, y_train)

In [55]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor()
model.fit(X_train, y_train)

In [59]:
from sklearn.ensemble import GradientBoostingRegressor

model = GradientBoostingRegressor()
model.fit(X_train, y_train)

In [61]:
from sklearn.svm import SVR

model = SVR()
model.fit(X_train, y_train)

In [67]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return mse, mae, r2

print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R2: {r2}')

MAE: 0.397763096343787
MSE: 0.3551984619989429
R2: 0.7289407597956454


In [71]:
models = [lin_reg, dt_reg, rf_reg, gb_reg, svr_reg]
metrics = {}

for model in models:
    mse, mae, r2 = evaluate_model(model, X_test, y_test)
    metrics[model.__class__.__name__] = {'MSE': mse, 'MAE': mae, 'R²': r2}
    
metrics

{'LinearRegression': {'MSE': 0.5558915986952442,
  'MAE': 0.5332001304956566,
  'R²': 0.575787706032451},
 'DecisionTreeRegressor': {'MSE': 0.4942716777366763,
  'MAE': 0.4537843265503876,
  'R²': 0.6228111330554302},
 'RandomForestRegressor': {'MSE': 0.25549776668540763,
  'MAE': 0.32761306601259704,
  'R²': 0.805024407701793},
 'GradientBoostingRegressor': {'MSE': 0.29399901242474274,
  'MAE': 0.37165044848436773,
  'R²': 0.7756433164710084},
 'SVR': {'MSE': 0.3551984619989429,
  'MAE': 0.397763096343787,
  'R²': 0.7289407597956454}}