# Decision Tree Regressor

### Notes

<img src="../00-images/Decision-Tree-Regressor.jpeg" width = "50%" height= "10%">

## Importing Libraries

In [5]:
import pandas as pd
 

## Importing dataset from sklearn

In [6]:
from sklearn.datasets import fetch_california_housing
df = fetch_california_housing()

In [7]:
df.head()

AttributeError: head

## Creating DataFrame and splitting it into independent and independent variable

In [None]:
X =  pd.DataFrame(df.data, columns=df.feature_names)
y = df.target

In [None]:
X.shape

(20640, 8)

In [None]:
y.shape

(20640,)

## Test Train split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.33, random_state=42)

## Implementing Decision Tree Regression

In [None]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

#### Checking Accuracy

In [None]:
from sklearn.metrics import r2_score
print(r2_score(y_pred, y_test))

0.6041466304841902


- Only 60.4% accurate

## Hyperparameter Tuning

In [None]:
parameter = {
    "criterion" : ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
    'splitter' : ['best', 'random'],
    'max_depth' : [1,2,3,4,5,6,7,8,9,10,11,12],
    'max_features': ['sqrt', 'log2', None]
    
}

In [None]:
# This section will take time to implement hence run it with caution.
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import GridSearchCV
clf = GridSearchCV(regressor, param_grid=parameter, cv = 5, scoring='neg_mean_squared_error')
clf.fit(X_train, y_train)

In [None]:
clf.best_params_
# 
# After executing above grid search cv you will get 
# {'criterion': 'poisson',
#  'max_depth': 9,
#  'max_features': None,
#  'splitter': 'best'}

# 


{'criterion': 'poisson',
 'max_depth': 9,
 'max_features': None,
 'splitter': 'best'}

## Retraining the model 

In [None]:
regressor = DecisionTreeRegressor(criterion='poisson', max_depth=9, max_features=None, splitter='best')
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

### Checking the accuracy

In [None]:
print(r2_score(y_pred,y_test))

0.6321935662059437


# END