In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [38]:
from sklearn.datasets import fetch_california_housing
dataset = fetch_california_housing()

In [39]:
df = pd.DataFrame(dataset.data, columns = dataset.feature_names)
df['Target'] = dataset.target
df.shape

(20640, 9)

In [40]:
df = df.sample(frac = 0.25) # here we are taking only 25% of actual data
df.shape

(5160, 9)

In [41]:
X = df.drop('Target', axis = 1)
y = df.Target

In [42]:
from sklearn.model_selection import train_test_split as tts

X_train, X_test, y_train, y_test = tts(X, y, test_size = 0.33, random_state = 42)

In [43]:
from sklearn.tree import DecisionTreeRegressor

dtr = DecisionTreeRegressor()
dtr.fit(X_train, y_train)
y_test_prediction = dtr.predict(X_test)

In [44]:
from sklearn.metrics import r2_score
score = r2_score(y_test_prediction, y_test)
score

0.4826379157044849

## Hyperparameter Tunning

In [45]:
import warnings
warnings.filterwarnings('ignore')

In [49]:
params = {
    'criterion' : ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
    'splitter' : ['best', 'random'],
    'max_depth' : [1, 2, 3, 4, 5],
    'max_features' : ['auto', 'sqrt', 'log2']
}

dtr = DecisionTreeRegressor()

In [50]:
from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(dtr, param_grid = params, cv = 2, scoring = 'neg_mean_squared_error')
clf.fit(X_train, y_train)
y_test_prediction = clf.predict(X_test)

print("Best Parameters:", clf.best_params_)

Best Parameters: {'criterion': 'poisson', 'max_depth': 5, 'max_features': 'auto', 'splitter': 'best'}


In [51]:
score = r2_score(y_test_prediction, y_test)
score

0.410605663871231