In [17]:
import pandas as pd
import numpy as np
from pandas_datareader import data
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV

In [18]:
df = pd.read_csv('/content/boston_house_prices.csv',header = None)
df = df.drop(index = 0)
df.columns = df.iloc[0]
df = df.drop(index = df.index[0])
df = df.reset_index(drop = True)
print(df.shape)
print(df.head())

(506, 14)
1     CRIM  ZN INDUS CHAS    NOX     RM   AGE     DIS RAD  TAX PTRATIO  \
0  0.00632  18  2.31    0  0.538  6.575  65.2    4.09   1  296    15.3   
1  0.02731   0  7.07    0  0.469  6.421  78.9  4.9671   2  242    17.8   
2  0.02729   0  7.07    0  0.469  7.185  61.1  4.9671   2  242    17.8   
3  0.03237   0  2.18    0  0.458  6.998  45.8  6.0622   3  222    18.7   
4  0.06905   0  2.18    0  0.458  7.147  54.2  6.0622   3  222    18.7   

1       B LSTAT  MEDV  
0   396.9  4.98    24  
1   396.9  9.14  21.6  
2  392.83  4.03  34.7  
3  394.63  2.94  33.4  
4   396.9  5.33  36.2  


In [19]:
X = df.iloc[:,0:13]
y = df.iloc[:,13]

In [20]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 42)

In [21]:
rt = DecisionTreeRegressor(criterion='squared_error',max_depth = 5)

In [22]:
rt.fit(X_train,y_train)

In [23]:
y_pred = rt.predict(X_test)

In [24]:
r2_score(y_test,y_pred)

0.885137272531848

# Hyper Parameter Tuning...

In [25]:
param_grid = {
    'max_depth':[2,4,8,10,None],
    'criterion':['squared_error','absolute_error'],
    'max_features':[0.25,0.5,1.0],
    'min_samples_split':[0.25,0.5,1.0]
}

In [26]:
reg = GridSearchCV(DecisionTreeRegressor(),param_grid = param_grid)

In [27]:
reg.fit(X_train,y_train)

In [28]:
reg.best_score_

np.float64(0.625520163408475)

In [29]:
reg.best_params_

{'criterion': 'squared_error',
 'max_depth': 8,
 'max_features': 0.5,
 'min_samples_split': 0.25}

# Feature Importance


In [30]:
for importance,name in sorted(zip(rt.feature_importances_,X_train.columns),reverse = True):
  print(name,importance)

RM 0.6511964103521288
LSTAT 0.19543823011082626
CRIM 0.06862175180122342
DIS 0.0665958125859827
AGE 0.0070254591654110654
PTRATIO 0.004391097507129506
NOX 0.003561040385702485
INDUS 0.002627468726682666
B 0.0005427293649131172
ZN 0.0
TAX 0.0
RAD 0.0
CHAS 0.0
