In [19]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV


In [3]:
data = load_iris()

In [4]:
X = data.data
y = data.target

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state = 42)

In [8]:
model = DecisionTreeClassifier()

print(model.get_params())


grid = GridSearchCV(estimator=model, 
                   param_grid={'criterion': ['gini', 'entropy'],
                               'splitter': ['best', 'random'],
                               'max_depth': [1,2]},
                    cv=4)
grid.fit(X_train,y_train)


{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'random_state': None, 'splitter': 'best'}


GridSearchCV(cv=4, estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ['gini', 'entropy'], 'max_depth': [1, 2],
                         'splitter': ['best', 'random']})

In [9]:
df_grid = pd.DataFrame(grid.cv_results_)
df_grid

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
0,0.003988,0.000853,0.001436,0.00087,gini,1,best,"{'criterion': 'gini', 'max_depth': 1, 'splitte...",0.7,0.666667,0.666667,0.666667,0.675,0.014434,5
1,0.005373,0.006333,0.001909,0.00157,gini,1,random,"{'criterion': 'gini', 'max_depth': 1, 'splitte...",0.633333,0.666667,0.666667,0.666667,0.658333,0.014434,7
2,0.003399,0.002321,0.005065,0.003915,gini,2,best,"{'criterion': 'gini', 'max_depth': 2, 'splitte...",0.966667,0.866667,0.933333,0.933333,0.925,0.036324,1
3,0.003549,0.002732,0.005596,0.006769,gini,2,random,"{'criterion': 'gini', 'max_depth': 2, 'splitte...",0.933333,0.9,0.7,0.733333,0.816667,0.101379,3
4,0.0055,0.003431,0.001605,0.000624,entropy,1,best,"{'criterion': 'entropy', 'max_depth': 1, 'spli...",0.7,0.666667,0.666667,0.666667,0.675,0.014434,5
5,0.002977,0.001722,0.001703,0.000896,entropy,1,random,"{'criterion': 'entropy', 'max_depth': 1, 'spli...",0.533333,0.666667,0.666667,0.666667,0.633333,0.057735,8
6,0.004659,0.003306,0.005992,0.005296,entropy,2,best,"{'criterion': 'entropy', 'max_depth': 2, 'spli...",0.966667,0.866667,0.933333,0.933333,0.925,0.036324,1
7,0.003518,0.002855,0.002189,0.001644,entropy,2,random,"{'criterion': 'entropy', 'max_depth': 2, 'spli...",1.0,0.8,0.7,0.733333,0.808333,0.116369,4


In [10]:
y_pred = grid.predict(X_test)

In [11]:
print(cross_val_score(grid, X_train, y_train, cv=3))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[0.9  0.9  0.95]
[[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.89      0.94         9
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30



# Zoo Data

In [12]:
zoo_data = pd.read_csv('zoo.data', header = None)


In [13]:
X = zoo_data.iloc[:,1:16]
y = zoo_data[17]

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state = 42)

In [23]:
model = DecisionTreeClassifier()

In [24]:
model.fit(X_train,y_train)

DecisionTreeClassifier()

In [25]:
y_pred = model.predict(X_test)

In [26]:
print(cross_val_score(model, X_train, y_train, cv=3))

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[0.96296296 1.         0.96153846]
[[12  0  0  0  0  0  0]
 [ 0  2  0  0  0  0  0]
 [ 0  0  0  0  1  0  0]
 [ 0  0  0  2  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  3  0]
 [ 0  0  0  0  0  0  1]]
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00         2
           3       0.00      0.00      0.00         1
           4       1.00      1.00      1.00         2
           5       0.00      0.00      0.00         0
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         1

    accuracy                           0.95        21
   macro avg       0.71      0.71      0.71        21
weighted avg       0.95      0.95      0.95        21



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
y_test

84    1
55    1
66    1
67    1
45    1
39    6
22    1
44    1
10    1
0     1
18    4
30    6
97    6
33    2
77    7
4     1
93    1
78    2
12    4
31    1
76    3
Name: 17, dtype: int64