## single tree 

better version of tree.ipynb

In [None]:
from sklearn import tree
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import LeaveOneOut
from sklearn import metrics    # contains all the effectivness idexes 
import pandas as pd    # for the dataset
from matplotlib import pyplot as plt
import numpy as np

#### importing and preparing the dataset

In [None]:
labels = ['class', 'spec_num', 'eccentr', 'asp_ratio', 'elong', 'solidity', 'stoch_conv', 'iso_factor', 'max_ind_depth', 'lobedness', 'av_intensity', 'av_contr', 'smooth', 'third_mom', 'unif', 'entropy']

df = pd.read_csv(r'./leaf/leaf.csv', header = None, names = labels)

# shuffling the dataframe
df = df.sample(frac=1).reset_index()
df = df.iloc[:, 1:17]   # needed to eliminate the old indexes column

In [None]:
X = df.iloc[:, 2:16]
y = df.iloc[:, 0]

#### finding best hyper-parameters

In [None]:
# names of all effectivness indexes available in sklearn

# note that roc_auc doesn't work in cv because it is a multiclass classification (we need to specify
# ovo or ovr) and that all roc_auc variants don't work in loocv because the testing sets contain
# only one observation

print(metrics.get_scorer_names())

In [None]:
# using grid search with k-fold CV to find the best hyperparameters and fitting the tree

# best_score non è uguale ad ogni run

k = 5

grid_param = {'criterion': ['gini', 'entropy'], 'min_samples_split': np.array(range(2, 8))}
t_cv = GridSearchCV(tree.DecisionTreeClassifier(), grid_param, cv=k, scoring='roc_auc_ovo')
t_cv.fit(X, y)
print(t_cv.best_params_)
print(t_cv.best_score_)

In [None]:
# using now grid search with loocv

grid_param = {'criterion': ['gini', 'entropy'], 'min_samples_split': np.array(range(2, 8))}
t_loocv = GridSearchCV(tree.DecisionTreeClassifier(), grid_param, cv=LeaveOneOut(), scoring='accuracy')
t_loocv.fit(X, y)
print(t_loocv.best_params_)
print(t_loocv.best_score_)

#### fitting the trees

In [None]:
t_cv_instance = t_cv.best_estimator_
t_loocv_instance = t_loocv.best_estimator_

In [None]:
fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(t_cv_instance, filled=True)

In [None]:
fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(t_loocv_instance, filled=True)

In [None]:
print(type(t_cv_instance))
print(type(t_cv))