## Decision Tree

### Classifier

In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,roc_auc_score

In [4]:
cancer_data=load_breast_cancer()

In [73]:
X_train,X_test,y_train,y_test=train_test_split(cancer_data.data,cancer_data.target,test_size=0.3,random_state=30)
print(X_train.shape,X_test.shape)

(398, 30) (171, 30)


In [75]:
classifier=DecisionTreeClassifier(criterion='gini',max_depth=5,min_samples_split=10,random_state=40)
classifier.fit(X_train,y_train)

In [77]:
y_pred=classifier.predict(X_test)

In [81]:
a_score=accuracy_score(y_test,y_pred)
r_score=recall_score(y_test,y_pred)
p_score=precision_score(y_test,y_pred)
yp=classifier.predict_proba(X_test)[:,1]
rucauc_score=roc_auc_score(y_test,yp)

print(a_score,p_score,r_score,rucauc_score,sep="\n")

0.9415204678362573
0.9459459459459459
0.963302752293578
0.9640426161586267


### Regressor

In [10]:
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score

In [11]:
housing_data=fetch_california_housing()

In [64]:
X_train,X_test,y_train,y_test=train_test_split(housing_data.data,housing_data.target,test_size=0.3)
regressor_model=DecisionTreeRegressor(max_depth=4,ccp_alpha=0.1099)

In [66]:
regressor_model.fit(X_train,y_train)

In [68]:
ypred=regressor_model.predict(X_test)
print(mean_squared_error(y_test,ypred),mean_absolute_error(y_test,ypred),r2_score(y_test,ypred),sep="\n")

0.9155133790188528
0.7430147377898646
0.30672806002916553


In [83]:
"Pruning of decision tree"

'Pruning of decision tree'

In [16]:
path=regressor_model.cost_complexity_pruning_path(X_train,y_train)
alpha=path.ccp_alphas
alpha=alpha[:-1]

In [17]:
print([x for x in alpha])

[0.0, 0.00247980788581827, 0.004637981821264469, 0.006203085817834977, 0.006698997875666024, 0.00742701479976407, 0.01111495286865842, 0.012690454463103629, 0.01497317458966771, 0.015411042805172845, 0.01805811255328421, 0.023926368624329758, 0.05959573618225816, 0.07765428134752264, 0.10993146606916893]


In [18]:
models=[]
for value in alpha:
    model=DecisionTreeRegressor(ccp_alpha=value)
    model.fit(X_train,y_train)
    models.append(model)

In [19]:
for model in models:
    ypred=model.predict(X_test)
    print(model,round(mean_squared_error(y_test,ypred),2),round(mean_absolute_error(y_test,ypred),2),sep="  ")

DecisionTreeRegressor()  0.54  0.47
DecisionTreeRegressor(ccp_alpha=0.00247980788581827)  0.47  0.49
DecisionTreeRegressor(ccp_alpha=0.004637981821264469)  0.51  0.52
DecisionTreeRegressor(ccp_alpha=0.006203085817834977)  0.54  0.54
DecisionTreeRegressor(ccp_alpha=0.006698997875666024)  0.55  0.54
DecisionTreeRegressor(ccp_alpha=0.00742701479976407)  0.56  0.55
DecisionTreeRegressor(ccp_alpha=0.01111495286865842)  0.59  0.57
DecisionTreeRegressor(ccp_alpha=0.012690454463103629)  0.61  0.59
DecisionTreeRegressor(ccp_alpha=0.01497317458966771)  0.64  0.6
DecisionTreeRegressor(ccp_alpha=0.015411042805172845)  0.65  0.62
DecisionTreeRegressor(ccp_alpha=0.01805811255328421)  0.65  0.62
DecisionTreeRegressor(ccp_alpha=0.023926368624329758)  0.67  0.62
DecisionTreeRegressor(ccp_alpha=0.05959573618225816)  0.76  0.66
DecisionTreeRegressor(ccp_alpha=0.07765428134752264)  0.76  0.66
DecisionTreeRegressor(ccp_alpha=0.10993146606916893)  0.93  0.75


In [20]:
model.score(X_test,y_test)
model.score(X_train,y_train)

0.3078079415064968