In [112]:
import pandas as pd
from sklearn import tree
import pydotplus
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import matplotlib.image as pltimg
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix,f1_score
import collections
from IPython.display import Image
import os

In [113]:
training_df = pd.read_csv(os.path.join(os.path.abspath(""), '../datasets/merged/training_dataset.csv'))
test_df = pd.read_csv(os.path.join(os.path.abspath(""), '../datasets/merged/test_dataset.csv')) 
training_df.drop('Unnamed: 0',axis=1,inplace=True)
num_rows_with_nan = training_df.isna().any(axis=1).sum()

print(f"Number of rows with at least one NaN: {num_rows_with_nan}")
training_df.head()


Number of rows with at least one NaN: 0


Unnamed: 0,Date_ID,Month,NumberOfDaysInMonth,Quarter,Year,LeapYear,Minimum Temperature,Maximum Temperature,Barley,Canola,...,Quebec,Saskatchewan,Territories,Yukon,Atlantic_Region,British Columbia_Region,Canada_Region,Prairies_Region,Territories_Region,Total_Value
0,88,5,31,2,2008,1,3,29,False,True,...,True,False,False,False,False,False,True,False,False,1
1,32,9,30,3,2003,0,5,30,False,False,...,False,False,False,False,False,False,False,True,False,1
2,35,12,31,4,2003,0,5,34,False,False,...,False,False,False,False,False,True,False,False,False,1
3,94,11,30,4,2008,1,3,29,False,True,...,False,True,False,False,False,False,False,True,False,2
4,62,3,31,1,2006,0,5,34,False,False,...,False,True,False,False,False,False,False,True,False,1


In [114]:
X_train = training_df.drop('Total_Value',axis=1)
X_train.head()

Unnamed: 0,Date_ID,Month,NumberOfDaysInMonth,Quarter,Year,LeapYear,Minimum Temperature,Maximum Temperature,Barley,Canola,...,Prince Edward Island,Quebec,Saskatchewan,Territories,Yukon,Atlantic_Region,British Columbia_Region,Canada_Region,Prairies_Region,Territories_Region
0,88,5,31,2,2008,1,3,29,False,True,...,False,True,False,False,False,False,False,True,False,False
1,32,9,30,3,2003,0,5,30,False,False,...,False,False,False,False,False,False,False,False,True,False
2,35,12,31,4,2003,0,5,34,False,False,...,False,False,False,False,False,False,True,False,False,False
3,94,11,30,4,2008,1,3,29,False,True,...,False,False,True,False,False,False,False,False,True,False
4,62,3,31,1,2006,0,5,34,False,False,...,False,False,True,False,False,False,False,False,True,False


In [115]:
y_train = training_df['Total_Value']
y_train.head()

0    1
1    1
2    1
3    2
4    1
Name: Total_Value, dtype: int64

In [116]:
X_test = test_df.drop(['Unnamed: 0','Total_Value'],axis=1)
y_test = test_df['Total_Value']

In [117]:
X_test.head()

Unnamed: 0,Date_ID,Month,NumberOfDaysInMonth,Quarter,Year,LeapYear,Minimum Temperature,Maximum Temperature,Barley,Canola,...,Prince Edward Island,Quebec,Saskatchewan,Territories,Yukon,Atlantic_Region,British Columbia_Region,Canada_Region,Prairies_Region,Territories_Region
0,209,6,30,2,2018,0,5,28,True,False,...,False,False,False,False,False,False,False,True,False,False
1,17,6,30,2,2002,0,3,29,False,True,...,False,False,True,False,False,False,False,False,True,False
2,246,7,31,3,2021,0,5,29,False,False,...,False,False,False,False,False,False,False,False,True,False
3,107,12,31,4,2009,0,3,29,False,True,...,False,False,False,False,False,False,False,False,True,False
4,19,8,31,3,2002,0,5,34,False,False,...,False,False,False,False,False,False,False,False,True,False


In [118]:
y_test.head()

0    1
1    1
2    2
3    2
4    1
Name: Total_Value, dtype: int64

In [119]:
criteria = ["gini","entropy"]
splitters = ["best","random"]
classifiers_scores = []
#print("SPLIT")
for i in range(5,50,5):
    for criterion in criteria:
        for splitter in splitters:
            classifier = DecisionTreeClassifier(criterion=criterion,splitter=splitter,max_depth=i)
            classifier.fit(X_train, y_train)

            
            y_pred = classifier.predict(X_test)
            score = f1_score(y_test, y_pred, average='macro')
            classifiers_scores.append((criterion, splitter, i, "max_depth", score))

            print("Max Depth",str(i),criterion,splitter)
            print(confusion_matrix(y_test, y_pred))
            print(classification_report(y_test, y_pred))

for i in range(5,50,5):
    for criterion in criteria:
        for splitter in splitters:
            classifier = DecisionTreeClassifier(criterion=criterion,splitter=splitter,min_samples_split=i)
            classifier.fit(X_train, y_train)

            
            y_pred = classifier.predict(X_test)
            score = f1_score(y_test, y_pred, average='macro')
            classifiers_scores.append((criterion, splitter, i, "min_samples_split", score))
            

            print("Min Samples",str(i),criterion,splitter)
            print(confusion_matrix(y_test, y_pred))
            print(classification_report(y_test, y_pred))

Max Depth 5 gini best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1046    8    1    0    0    0    0    0]
 [   0   26   35   18    0    0    0    0    0]
 [   0    3    5   23    0    0    0    0    0]
 [   0    0    0   16    0    0    0    0    0]
 [   0    0    0    7    0    0    0    0    0]
 [   0    0    0    3    0    0    0    0    0]
 [   0    0    0    3    0    0    0    0    0]
 [   0    0    0    1    0    0    0    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.97      0.99      0.98      1055
           2       0.73      0.44      0.55        79
           3       0.32      0.74      0.45        31
           4       0.00      0.00      0.00        16
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1

   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.97      0.99      0.98      1055
           2       0.72      0.62      0.67        79
           3       0.46      0.42      0.44        31
           4       0.38      0.62      0.48        16
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1

    accuracy                           0.94      1196
   macro avg       0.28      0.30      0.29      1196
weighted avg       0.92      0.94      0.93      1196

Max Depth 10 gini best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1043   11    1    0    0    0    0    0]
 [   0   13   54   11    0    0    1    0    0]
 [   0    3   12   12    4    0    0    0    0]
 [   0    0    1    4   10    1    0    0    0]
 [   0    0    0    0    6    0 

Max Depth 10 entropy best
[[   0    1    0    0    0    0    0    0    0]
 [   2 1041   12    0    0    0    0    0    0]
 [   0   12   59    8    0    0    0    0    0]
 [   0    1    8   19    3    0    0    0    0]
 [   0    0    1    7    8    0    0    0    0]
 [   0    0    0    4    3    0    0    0    0]
 [   0    0    0    2    1    0    0    0    0]
 [   0    0    0    0    3    0    0    0    0]
 [   0    0    0    0    1    0    0    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.99      0.99      0.99      1055
           2       0.74      0.75      0.74        79
           3       0.47      0.61      0.54        31
           4       0.42      0.50      0.46        16
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.98      1055
           2       0.62      0.63      0.63        79
           3       0.39      0.29      0.33        31
           4       0.40      0.38      0.39        16
           5       0.14      0.14      0.14         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1

    accuracy                           0.93      1196
   macro avg       0.28      0.27      0.28      1196
weighted avg       0.92      0.93      0.92      1196

Max Depth 15 gini random
[[   0    1    0    0    0    0    0    0    0]
 [   0 1040   15    0    0    0    0    0    0]
 [   0   16   45   17    0    0    1    0    0]
 [   0    4    9   15    2    0    1    0    0]
 [   0    0    4    3    5    1    2    1    0]
 [   0    0    0    2    3    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Max Depth 20 entropy best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1044   11    0    0    0    0    0    0]
 [   0   14   55    8    1    0    1    0    0]
 [   0    2    9   14    6    0    0    0    0]
 [   0    0    2    3    8    1    2    0    0]
 [   0    0    0    1    4    0    2    0    0]
 [   0    0    0    1    1    0    1    0    0]
 [   0    0    0    0    1    2    0    0    0]
 [   0    0    0    0    0    1    0    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.99      1055
           2       0.71      0.70      0.71        79
           3       0.52      0.45      0.48        31
           4       0.38      0.50      0.43        16
           5       0.00      0.00      0.00         7
           6       0.17      0.33      0.22         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Max Depth 25 gini best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1043   12    0    0    0    0    0    0]
 [   0   18   48    9    2    1    1    0    0]
 [   0    3   13    9    6    0    0    0    0]
 [   0    0    2    3    7    2    2    0    0]
 [   0    0    0    0    5    0    1    0    1]
 [   0    0    0    0    1    1    0    1    0]
 [   0    0    0    0    2    1    0    0    0]
 [   0    0    0    0    0    0    1    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.98      1055
           2       0.64      0.61      0.62        79
           3       0.43      0.29      0.35        31
           4       0.30      0.44      0.36        16
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1

  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Max Depth 25 entropy best
[[   0    1    0    0    0    0    0    0    0]
 [   2 1043   10    0    0    0    0    0    0]
 [   0   13   58    7    0    0    1    0    0]
 [   0    3    8   12    8    0    0    0    0]
 [   0    0    2    4    5    2    2    1    0]
 [   0    0    0    1    4    0    2    0    0]
 [   0    0    0    2    0    1    0    0    0]
 [   0    0    0    0    1    2    0    0    0]
 [   0    0    0    0    0    0    1    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.99      1055
           2       0.74      0.73      0.74        79
           3       0.46      0.39      0.42        31
           4       0.28      0.31      0.29        16
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Max Depth 30 gini random
[[   0    1    0    0    0    0    0    0    0    0]
 [   1 1036   17    1    0    0    0    0    0    0]
 [   0   11   49   19    0    0    0    0    0    0]
 [   0    5    9    9    7    1    0    0    0    0]
 [   0    1    2    4    4    3    0    2    0    0]
 [   0    0    1    3    2    0    1    0    0    0]
 [   0    0    0    0    2    0    1    0    0    0]
 [   0    0    0    0    1    1    0    0    0    1]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    1    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.98      0.98      1055
           2       0.63      0.62      0.62        79
           3       0.25      0.29      0.27        31
           4       0.25      0.25      0.25        16
           5       0.00      0.00      0.00         7
           6       0.50      0.33      0.40         3
           

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.98      0.98      1055
           2       0.60      0.61      0.60        79
           3       0.26      0.19      0.22        31
           4       0.30      0.44      0.36        16
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         3
           7       1.00      0.33      0.50         3
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         1

    accuracy                           0.92      1196
   macro avg       0.31      0.26      0.27      1196
weighted avg       0.92      0.92      0.92      1196

Max Depth 35 gini best
[[   0    1    0    0    0    0    0    0    0    0]
 [   0 1044   11    0    0    0    0    0    0    0]
 [   0   17   50    9    2    0    1    0    0    0]
 [   0    3   14    8    5    0    1    0    0    0]
 [   0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Max Depth 35 entropy best
[[   0    1    0    0    0    0    0    0    0]
 [   2 1042   11    0    0    0    0    0    0]
 [   0   16   57    6    0    0    0    0    0]
 [   0    1   10   13    7    0    0    0    0]
 [   0    0    2    3    7    2    2    0    0]
 [   0    0    0    1    4    0    2    0    0]
 [   0    0    1    1    0    1    0    0    0]
 [   0    0    0    0    2    1    0    0    0]
 [   0    0    0    0    0    1    0    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.99      1055
           2       0.70      0.72      0.71        79
           3       0.54      0.42      0.47        31
           4       0.35      0.44      0.39        16
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.98      1055
           2       0.60      0.66      0.63        79
           3       0.28      0.16      0.20        31
           4       0.37      0.44      0.40        16
           5       0.25      0.29      0.27         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
           8       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         1

    accuracy                           0.93      1196
   macro avg       0.25      0.25      0.25      1196
weighted avg       0.92      0.93      0.92      1196

Max Depth 40 gini random
[[   0    1    0    0    0    0    0    0    0    0]
 [   0 1035   19    1    0    0    0    0    0    0]
 [   0   10   51   17    1    0    0    0    0    0]
 [   0    3    9   12    6    0    1    0    0    0]
 [  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Max Depth 45 entropy best
[[   0    1    0    0    0    0    0    0    0]
 [   1 1043   11    0    0    0    0    0    0]
 [   0   16   55    8    0    0    0    0    0]
 [   0    1    9   14    7    0    0    0    0]
 [   0    0    3    3    7    0    2    1    0]
 [   0    0    0    2    3    1    1    0    0]
 [   0    0    0    2    0    1    0    0    0]
 [   0    0    0    0    2    1    0    0    0]
 [   0    0    0    0    0    0    1    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.99      1055
           2       0.71      0.70      0.70        79
           3       0.48      0.45      0.47        31
           4       0.37      0.44      0.40        16
           5       0.33      0.14      0.20         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Min Samples 5 gini random
[[   0    1    0    0    0    0    0    0    0    0]
 [   3 1037   15    0    0    0    0    0    0    0]
 [   0   15   54   10    0    0    0    0    0    0]
 [   0    4   16    8    3    0    0    0    0    0]
 [   0    1    1    4    5    4    1    0    0    0]
 [   0    0    0    1    4    1    1    0    0    0]
 [   0    0    0    1    1    0    0    1    0    0]
 [   0    0    0    0    1    1    1    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    1    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.98      0.98      1055
           2       0.63      0.68      0.65        79
           3       0.33      0.26      0.29        31
           4       0.36      0.31      0.33        16
           5       0.17      0.14      0.15         7
           6       0.00      0.00      0.00         3
          

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Min Samples 10 gini best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1044   10    1    0    0    0    0    0]
 [   0   17   51   10    0    0    1    0    0]
 [   0    3   17    6    4    0    1    0    0]
 [   0    0    1    3    6    5    1    0    0]
 [   0    0    0    1    5    0    0    1    0]
 [   0    0    0    0    1    1    0    1    0]
 [   0    0    0    0    0    3    0    0    0]
 [   0    0    0    0    0    1    0    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.98      1055
           2       0.65      0.65      0.65        79
           3       0.29      0.19      0.23        31
           4       0.38      0.38      0.38        16
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Min Samples 15 gini best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1042   13    0    0    0    0    0    0]
 [   0   13   57    9    0    0    0    0    0]
 [   0    4   14    9    3    0    1    0    0]
 [   0    0    2    4    5    4    1    0    0]
 [   0    0    0    2    4    0    1    0    0]
 [   0    0    0    0    2    0    0    1    0]
 [   0    0    0    0    0    3    0    0    0]
 [   0    0    0    0    0    1    0    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.99      1055
           2       0.66      0.72      0.69        79
           3       0.38      0.29      0.33        31
           4       0.36      0.31      0.33        16
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Min Samples 20 gini best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1043   11    1    0    0    0    0    0]
 [   0   12   57   10    0    0    0    0    0]
 [   0    4   14   10    3    0    0    0    0]
 [   0    0    2    4    5    4    1    0    0]
 [   0    0    0    1    5    0    1    0    0]
 [   0    0    0    0    2    0    1    0    0]
 [   0    0    0    0    2    1    0    0    0]
 [   0    0    0    0    1    0    0    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.99      1055
           2       0.68      0.72      0.70        79
           3       0.38      0.32      0.35        31
           4       0.28      0.31      0.29        16
           5       0.00      0.00      0.00         7
           6       0.33      0.33      0.33         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Min Samples 25 gini best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1042   12    1    0    0    0    0    0]
 [   0   12   57   10    0    0    0    0    0]
 [   0    4   14    8    5    0    0    0    0]
 [   0    0    2    2   11    1    0    0    0]
 [   0    0    0    1    5    0    1    0    0]
 [   0    0    0    0    2    0    1    0    0]
 [   0    0    0    0    3    0    0    0    0]
 [   0    0    0    0    1    0    0    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.99      1055
           2       0.67      0.72      0.70        79
           3       0.36      0.26      0.30        31
           4       0.41      0.69      0.51        16
           5       0.00      0.00      0.00         7
           6       0.50      0.33      0.40         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Min Samples 30 gini best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1042   13    0    0    0    0    0    0]
 [   0   12   58    9    0    0    0    0    0]
 [   0    4   14    9    4    0    0    0    0]
 [   0    0    2    3   10    1    0    0    0]
 [   0    0    0    1    5    0    1    0    0]
 [   0    0    0    0    2    0    1    0    0]
 [   0    0    0    0    3    0    0    0    0]
 [   0    0    0    0    1    0    0    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.99      0.99      1055
           2       0.67      0.73      0.70        79
           3       0.41      0.29      0.34        31
           4       0.40      0.62      0.49        16
           5       0.00      0.00      0.00         7
           6       0.50      0.33      0.40         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.98      0.98      0.98      1055
           2       0.67      0.76      0.71        79
           3       0.50      0.32      0.39        31
           4       0.32      0.44      0.37        16
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1

    accuracy                           0.93      1196
   macro avg       0.27      0.28      0.27      1196
weighted avg       0.93      0.93      0.93      1196

Min Samples 35 gini best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1042   13    0    0    0    0    0    0]
 [   0   11   59    9    0    0    0    0    0]
 [   0    4   14    9    4    0    0    0    0]
 [   0    0    2    3    9    2    0    0    0]
 [   0    0    0    1    5    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Min Samples 40 entropy best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1039   16    0    0    0    0    0    0]
 [   0    8   63    8    0    0    0    0    0]
 [   0    0   10   15    6    0    0    0    0]
 [   0    0    0    4    9    0    3    0    0]
 [   0    0    0    1    4    0    2    0    0]
 [   0    0    0    0    2    0    1    0    0]
 [   0    0    0    0    3    0    0    0    0]
 [   0    0    0    0    0    1    0    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.99      0.98      0.99      1055
           2       0.71      0.80      0.75        79
           3       0.54      0.48      0.51        31
           4       0.38      0.56      0.45        16
           5       0.00      0.00      0.00         7
           6       0.17      0.33      0.22         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Min Samples 45 entropy best
[[   0    1    0    0    0    0    0    0    0]
 [   0 1039   16    0    0    0    0    0    0]
 [   0    8   66    5    0    0    0    0    0]
 [   0    0   12   13    6    0    0    0    0]
 [   0    0    0    4    9    0    3    0    0]
 [   0    0    0    1    4    0    2    0    0]
 [   0    0    0    0    2    0    1    0    0]
 [   0    0    0    0    3    0    0    0    0]
 [   0    0    0    0    0    1    0    0    0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.99      0.98      0.99      1055
           2       0.70      0.84      0.76        79
           3       0.57      0.42      0.48        31
           4       0.38      0.56      0.45        16
           5       0.00      0.00      0.00         7
           6       0.17      0.33      0.22         3
           7       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [120]:
classifiers_scores.sort(key=lambda x: x[-1], reverse=True)
print(classifiers_scores[0])

# Print the best classifier
print("Best classifier configuration:")
print("Criterion: {}, Splitter: {}, Value: {}, Parameter: {}, F1 Score: {}".format(*classifiers_scores[0]))

('gini', 'random', 20, 'min_samples_split', 0.3522281817879302)
Best classifier configuration:
Criterion: gini, Splitter: random, Value: 20, Parameter: min_samples_split, F1 Score: 0.3522281817879302


In [121]:
features = X_train.columns.tolist()
type = 'max_depth'
if classifiers_scores[0][3] == type:
    dtree = DecisionTreeClassifier(criterion=classifiers_scores[0][0],splitter=classifiers_scores[0][1],max_depth=classifiers_scores[0][2])
else:
    dtree = DecisionTreeClassifier(criterion=classifiers_scores[0][0],splitter=classifiers_scores[0][1],min_samples_split=classifiers_scores[0][2])
dtree.fit(X_train, y_train)
data = tree.export_graphviz(dtree, out_file=None, feature_names=features, node_ids=True)
graph = pydotplus.graph_from_dot_data(data)
graph.set_size('"100,100!"')
