In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

data = fetch_california_housing()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

baseline_model = LinearRegression()
baseline_model.fit(X_train, y_train)
y_pred_baseline = baseline_model.predict(X_test)

baseline_rmse = np.sqrt(mean_squared_error(y_test, y_pred_baseline))
baseline_r2 = r2_score(y_test, y_pred_baseline)

tree_model = DecisionTreeRegressor(random_state=42)
tree_model.fit(X_train, y_train)
y_pred_tree = tree_model.predict(X_test)

tree_rmse = np.sqrt(mean_squared_error(y_test, y_pred_tree))
tree_r2 = r2_score(y_test, y_pred_tree)

cv_scores = cross_val_score(tree_model, X, y, cv=5, scoring='neg_root_mean_squared_error')
cv_rmse = -cv_scores.mean()

param_grid = {
    'max_depth': [None, 5, 10, 15, 20],
    'min_samples_split': [2, 5, 10, 20],
    'min_samples_leaf': [1, 2, 5, 10]
}

grid_search = GridSearchCV(
    DecisionTreeRegressor(random_state=42),
    param_grid,
    cv=5,
    scoring='neg_root_mean_squared_error',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

y_pred_best = best_model.predict(X_test)

best_rmse = np.sqrt(mean_squared_error(y_test, y_pred_best))
best_r2 = r2_score(y_test, y_pred_best)

print("Baseline Linear Regression RMSE:", baseline_rmse)
print("Baseline Linear Regression R2:", baseline_r2)

print("Decision Tree RMSE:", tree_rmse)
print("Decision Tree R2:", tree_r2)

print("Cross Validation RMSE (Decision Tree):", cv_rmse)

print("Best Parameters:", grid_search.best_params_)

print("Tuned Decision Tree RMSE:", best_rmse)
print("Tuned Decision Tree R2:", best_r2)

Baseline Linear Regression RMSE: 0.7455813830127747
Baseline Linear Regression R2: 0.5757877060324528
Decision Tree RMSE: 0.7037294974840077
Decision Tree R2: 0.622075845135081
Cross Validation RMSE (Decision Tree): 0.8957794382630011
Best Parameters: {'max_depth': 15, 'min_samples_leaf': 10, 'min_samples_split': 2}
Tuned Decision Tree RMSE: 0.602772148511738
Tuned Decision Tree R2: 0.7227321629700685
