In [12]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor


In [13]:
housing = fetch_california_housing()
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df['target'] = housing.target


In [14]:
scaler = StandardScaler()
numerical_cols = housing.feature_names
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

untuned_model = XGBRegressor(random_state=42)
untuned_model.fit(X_train, y_train)
untuned_preds = untuned_model.predict(X_test)
untuned_mse = mean_squared_error(y_test, untuned_preds)

In [15]:
param_grid = {
    'max_depth': [3, 5],
    'learning_rate': [0.01, 0.1]
}

tuned_model = XGBRegressor(random_state=42)
grid_search = GridSearchCV(tuned_model, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
tuned_preds = grid_search.predict(X_test)
tuned_mse = mean_squared_error(y_test, tuned_preds)

print("Optimal Parameters:", best_params)
print(f"Untuned Model MSE: {untuned_mse}")
print(f"Tuned Model MSE: {tuned_mse}")

if tuned_mse < untuned_mse:
    print("Tuning improved the model's performance.")
elif tuned_mse > untuned_mse:
  print("Tuning did not improve the model's performance.")
else:
    print("Tuning resulted in no change in model performance.")


Optimal Parameters: {'learning_rate': 0.1, 'max_depth': 5}
Untuned Model MSE: 0.2225899267544737
Tuned Model MSE: 0.2435998341534843
Tuning did not improve the model's performance.
