### Example 01: Decision tree using 'entropy'

In [6]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

dataset = pd.read_csv("diabetes.csv")

X = dataset.iloc[:,0:-1]
y = dataset.iloc[:,-1]

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size =0.3,random_state =1)

clf = DecisionTreeClassifier(criterion="entropy",max_depth=3)
clf.fit(X_train,y_train)

#Predict 
y_pred = clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test,y_pred))

Accuracy: 0.7705627705627706


### Example 01: Decision tree using 'gini'

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size =0.3,random_state =1)

clf = DecisionTreeClassifier(criterion="gini",max_depth=3)

clf.fit(X_train,y_train)

#Predict 
y_pred = clf.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test,y_pred))

Accuracy: 0.7575757575757576


### Decision Tree with GridSearchCV

In [8]:
import numpy as np
import multiprocessing
from sklearn.datasets import load_digits
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

digits = load_digits()

# Define Param_grids
param_grid = [{
               'criterion': ['gini','entropy'],
               'max_features' : ['auto','log2',None],
               'min_samples_split' : [2,10,25,100,200],
                'max_depth' : [5,10,15,None]
             }]

# Create and train a Grid Search
gs = GridSearchCV(estimator=DecisionTreeClassifier(),param_grid=param_grid,scoring='accuracy',cv=10,n_jobs= \
                  multiprocessing.cpu_count())

gs.fit(digits.data, digits.target)

print(gs.best_estimator_)
print('Decision tree score: %.3f' % gs.best_score_)

DecisionTreeClassifier(max_depth=15)
Decision tree score: 0.831
