In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Load the dataset
data = fetch_california_housing()
X = data.data
y = data.target


In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=0)

In [4]:
# Define the hyperparameters to tune
params = {
    'max_depth': [2, 4, 6, 8],
    'min_samples_split': [2, 4, 6, 8],
    'min_samples_leaf': [1, 2, 4, 6],
    'max_features': ['auto', 'sqrt', 'log2']
}

In [5]:
# Define the decision tree regressor model
dt = DecisionTreeRegressor(random_state=42)


In [6]:
# Use grid search cross-validation to find the best hyperparameters
grid_search = GridSearchCV(dt, param_grid=params, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)






















In [7]:
# Print the best hyperparameters
print("Best hyperparameters: ", grid_search.best_params_)


Best hyperparameters:  {'max_depth': 8, 'max_features': 'auto', 'min_samples_leaf': 6, 'min_samples_split': 2}


In [8]:
# Use the best hyperparameters to fit the model and make predictions
best_dt = grid_search.best_estimator_
y_pred = best_dt.predict(X_test)

In [9]:
#  R-squared score
print("R-squared Score: ", r2_score(y_test, y_pred))

R-squared Score:  0.6653572020041751
