In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score, KFold, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

In [None]:
california_housing = fetch_california_housing()

In [None]:
california_housing

In [None]:
X = california_housing.data
y = california_housing.target
columns = california_housing.feature_names

In [None]:
df = pd.DataFrame(X, columns=columns)
df['MedHouseVal'] = california_housing.target
df

In [None]:
df.isnull().sum()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=41)

In [None]:
model = DecisionTreeRegressor(max_depth=11)
model.fit(X_train, y_train)

In [None]:
pred = model.predict(X_test)
print(r2_score(y_test, pred))

In [None]:
cross_validation_scores = cross_val_score(model, X, y, cv=8)
print(cross_validation_scores)

In [None]:
avg_score = (cross_validation_scores.mean())
print(avg_score)

#### HyperParameter Tuning
##### As we can see that our model is not performing well, so let's tune some parameters to better improve the performance and accuracy of the model.

In [None]:
param_grid = {
    'max_depth': [2, 6, 10, 14, 20, 24, 30, None],
    'criterion': ['squared_error', 'friedman_mse', 'absolute_error'],
    'max_features': [0.25, 0.5, 1.0],
    'min_samples_split': [0.25, 0.5, 0.75]
}

In [None]:
regression = GridSearchCV(DecisionTreeRegressor(), param_grid=param_grid, verbose=2)
regression.fit(X_train, y_train)

In [None]:
print(regression.best_score_)