# Loading and Preprocessing

In [3]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load the dataset
california = fetch_california_housing()
data = pd.DataFrame(california.data, columns=california.feature_names)
data['MedHouseVal'] = california.target

# Check for missing values
print("Missing values:\n", data.isnull().sum())

# Feature scaling
scaler = StandardScaler()
features = california.feature_names
data[features] = scaler.fit_transform(data[features])

# Split into train and test sets
X = data.drop('MedHouseVal', axis=1)
y = data['MedHouseVal']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Missing values:
 MedInc         0
HouseAge       0
AveRooms       0
AveBedrms      0
Population     0
AveOccup       0
Latitude       0
Longitude      0
MedHouseVal    0
dtype: int64


# Regression Algorithm Implementation

# 1. Linear Regression

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

lr_mse = mean_squared_error(y_test, y_pred)
lr_mae = mean_absolute_error(y_test, y_pred)
lr_r2 = r2_score(y_test, y_pred)

# 2. Decision Tree Regression

In [14]:
from sklearn.tree import DecisionTreeRegressor

dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)

dt_mse = mean_squared_error(y_test, y_pred)
dt_mae = mean_absolute_error(y_test, y_pred)
dt_r2 = r2_score(y_test, y_pred)

# 3. Random Forest Regressor

In [16]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

rf_mse = mean_squared_error(y_test, y_pred)
rf_mae = mean_absolute_error(y_test, y_pred)
rf_r2 = r2_score(y_test, y_pred)

# 4. Gradient Boosting Regressor

In [19]:
from sklearn.ensemble import GradientBoostingRegressor

gb = GradientBoostingRegressor(random_state=42)
gb.fit(X_train, y_train)
y_pred = gb.predict(X_test)

gb_mse = mean_squared_error(y_test, y_pred)
gb_mae = mean_absolute_error(y_test, y_pred)
gb_r2 = r2_score(y_test, y_pred)

# 5. Support Vector Regressor (SVR)

In [33]:
from sklearn.svm import SVR

svr = SVR()
svr.fit(X_train, y_train)
y_pred = svr.predict(X_test)

svr_mse = mean_squared_error(y_test, y_pred)
svr_mae = mean_absolute_error(y_test, y_pred)
svr_r2 = r2_score(y_test, y_pred)

# Model Evaluation and Comparison

In [36]:
results = pd.DataFrame({
    'Model': ['Linear Regression', 'Decision Tree', 'Random Forest', 'Gradient Boosting', 'SVR'],
    'MSE': [lr_mse, dt_mse, rf_mse, gb_mse, svr_mse],
    'MAE': [lr_mae, dt_mae, rf_mae, gb_mae, svr_mae],
    'R2': [lr_r2, dt_r2, rf_r2, gb_r2, svr_r2]
})

print(results.sort_values(by='R2', ascending=False))

               Model       MSE       MAE        R2
2      Random Forest  0.255498  0.327613  0.805024
3  Gradient Boosting  0.293999  0.371650  0.775643
4                SVR  0.355198  0.397763  0.728941
1      Decision Tree  0.494272  0.453784  0.622811
0  Linear Regression  0.555892  0.533200  0.575788
