# Decision Tree Examples

Example 1 - Using Entropy/Infogain or Gini index

In [89]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn import datasets
# from sklearn.datasets import load_iris
# data=load_iris()
# df = pd.DataFrame(data=data.data, columns=data.feature_names)
# df.head()

In [95]:
iris_data = datasets.load_iris()


In [96]:
features = iris_data.data
targets = iris_data.target

In [97]:
feature_train, feature_test, target_train, target_test = train_test_split(features, targets, test_size=0.2)

In [98]:
model = DecisionTreeClassifier(criterion='entropy')

In [99]:
predicted = cross_validate(model, features, targets, cv=10)
np.mean(predicted['test_score'])

0.9533333333333334

Example 2 - Use the optimum depth using Gridsearch cross validation - Parameter Tuning

In [100]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import datasets
from sklearn.model_selection import GridSearchCV

In [102]:
iris_data = datasets.load_iris()

features = iris_data.data
targets = iris_data.target
# iris_data.data.shape
# iris_data.target

In [103]:
# with grid search you can find an optimal parameter "parameter tuning"
param_grid = {'max_depth': np.arange(1, 10)}

In [108]:
feature_train, feature_test, target_train, target_test = train_test_split(features, targets, test_size=.2)

In [109]:
# in every iteration we split the data randomly in cross validation + DecisionTreeClassifier
# initializes the tree randomly: that's why you get different results !!!
tree = GridSearchCV(DecisionTreeClassifier(), param_grid)

In [110]:
tree.fit(feature_train, target_train)

GridSearchCV(estimator=DecisionTreeClassifier(),
             param_grid={'max_depth': array([1, 2, 3, 4, 5, 6, 7, 8, 9])})

In [111]:
"Best parameter with Grid Search: ", tree.best_params_


('Best parameter with Grid Search: ', {'max_depth': 3})

In [113]:
grid_predictions = tree.predict(feature_test)
confusion_matrix(target_test, grid_predictions)
accuracy_score(target_test, grid_predictions)

0.9333333333333333

Example 3 - Cancer Detection with the help of Decision Tree

In [36]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.model_selection import cross_validate

In [37]:
cancer_data = datasets.load_breast_cancer()

In [38]:
features = cancer_data.data
labels = cancer_data.target

In [39]:
feature_train, feature_test, target_train, target_test = train_test_split(features, labels, test_size=0.3)

In [40]:
model = DecisionTreeClassifier(criterion='entropy', max_depth=5)

In [44]:
predicted = cross_validate(model, features, labels, cv=10)
np.mean(predicted['test_score'])