In [1]:
from utils import load_data
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [2]:
df = load_data()
X = df.drop("MEDV", axis=1)
y = df["MEDV"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
from sklearn.linear_model import Ridge

param_grid_lr = {
    'alpha': [0.01, 0.1, 1.0, 10],
    'solver': ['auto', 'svd', 'cholesky'],
    'fit_intercept': [True, False]
}

grid_lr = GridSearchCV(Ridge(), param_grid_lr, cv=3, scoring='r2')
grid_lr.fit(X_train, y_train)
y_pred_lr = grid_lr.predict(X_test)


In [4]:
param_grid_dt = {
    'max_depth': [3, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_dt = GridSearchCV(DecisionTreeRegressor(random_state=42), param_grid_dt, cv=3, scoring='r2')
grid_dt.fit(X_train, y_train)
y_pred_dt = grid_dt.predict(X_test)


In [5]:
param_grid_rf = {
    'n_estimators': [50, 100],
    'max_depth': [5, 10, None],
    'min_samples_split': [2, 5]
}

grid_rf = GridSearchCV(RandomForestRegressor(random_state=42), param_grid_rf, cv=3, scoring='r2')
grid_rf.fit(X_train, y_train)
y_pred_rf = grid_rf.predict(X_test)


In [6]:
results_tuned = [
    ("Ridge Regression", mean_squared_error(y_test, y_pred_lr), r2_score(y_test, y_pred_lr)),
    ("Decision Tree (Tuned)", mean_squared_error(y_test, y_pred_dt), r2_score(y_test, y_pred_dt)),
    ("Random Forest (Tuned)", mean_squared_error(y_test, y_pred_rf), r2_score(y_test, y_pred_rf))
]

import pandas as pd
df_tuned = pd.DataFrame(results_tuned, columns=["Model", "MSE", "R2"])
df_tuned


Unnamed: 0,Model,MSE,R2
0,Ridge Regression,24.291746,0.668751
1,Decision Tree (Tuned),20.459822,0.721004
2,Random Forest (Tuned),7.422556,0.898784
