In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

# Load CSV

In [11]:
df = pd.read_csv("data/california_housing.csv")
X = df.drop("MedHouseValue", axis=1)
y = df["MedHouseValue"]

# Train-Test Split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features

In [14]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Linear Regression

In [15]:
lin_reg = LinearRegression()
lin_reg.fit(X_train_scaled, y_train)
y_pred_lin = lin_reg.predict(X_test_scaled)

# Decision Tree

In [22]:
tree = DecisionTreeRegressor(max_depth=8, random_state=42)
tree.fit(X_train, y_train)
y_pred_tree = tree.predict(X_test)


# Random Forest

In [16]:
rf = RandomForestRegressor(n_estimators=200, max_depth=8, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [23]:
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score

# ----------------------------
# Linear Regression
# ----------------------------
print("=== Linear Regression ===")
print("RMSE:", root_mean_squared_error(y_test, y_pred_lin))
print("MAE:", mean_absolute_error(y_test, y_pred_lin))
print("R2:", r2_score(y_test, y_pred_lin))
print("-"*40)

# ----------------------------
# Decision Tree
# ----------------------------
print("=== Decision Tree ===")
print("RMSE:", root_mean_squared_error(y_test, y_pred_tree))
print("MAE:", mean_absolute_error(y_test, y_pred_tree))
print("R2:", r2_score(y_test, y_pred_tree))
print("-"*40)

# ----------------------------
# Random Forest
# ----------------------------
print("=== Random Forest ===")
print("RMSE:", root_mean_squared_error(y_test, y_pred_rf))
print("MAE:", mean_absolute_error(y_test, y_pred_rf))
print("R2:", r2_score(y_test, y_pred_rf))
print("-"*40)

=== Linear Regression ===
RMSE: 0.7455813830127763
MAE: 0.5332001304956565
R2: 0.575787706032451
----------------------------------------
=== Decision Tree ===
RMSE: 0.6496502038503689
MAE: 0.44816862399364304
R2: 0.6779284983766045
----------------------------------------
=== Random Forest ===
RMSE: 0.584439948975711
MAE: 0.40185926143060774
R2: 0.7393408778248319
----------------------------------------
