### Import libraries and module

In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.model_selection import cross_val_score, StratifiedKFold, LeaveOneOut

# import custom module 
import data_preprocessing2 as dpp

<Figure size 2500x2500 with 1 Axes>

70.00% in training set
30.00% in test set


### Model training with decision tree classifier

In [2]:
#creating model object
dtc_model = DecisionTreeClassifier()

# fit model 
dtc_model.fit(dpp.F_train, dpp.P_train.ravel())

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

### Prediction results 

In [3]:
dtc_model_prediction_from_trained_data = dtc_model.predict(dpp.F_train)
dtc_model_prediction_from_test_data = dtc_model.predict(dpp.F_test)

# get current accuracy of the model
dtc_model_accuracy_from_trained_data = metrics.accuracy_score(dpp.P_train, dtc_model_prediction_from_trained_data)
dtc_model_accuracy_from_test_data = metrics.accuracy_score(dpp.P_test, dtc_model_prediction_from_test_data)

In [4]:
#output 
print ("Accuracy from trained data of dtc model is : {0:.4f}%".format(dtc_model_accuracy_from_trained_data*100))
print ("Accuracy from test data of dtc model is : {0:.4f}%".format(dtc_model_accuracy_from_test_data*100))

Accuracy from trained data of dtc model is : 100.0000%
Accuracy from test data of dtc model is : 100.0000%


### Cross Validation

#### k-Fold Cross Validation

In [5]:
kF_cv_score = cross_val_score(dtc_model,dpp.F,dpp.P.ravel(),cv=10)

In [6]:
kF_cv_score

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [7]:
kF_cv_score.mean()*100

100.0

#### Stratified cross validation 

In [8]:
SFKF = StratifiedKFold(n_splits=5)
SFKF_cv_score = cross_val_score(dtc_model,dpp.F,dpp.P.ravel(),cv=SFKF)

In [9]:
SFKF_cv_score

array([1., 1., 1., 1., 1.])

In [10]:
SFKF_cv_score.mean()*100 

100.0

#### LeaveOneOut cross validation

In [11]:
loo_validation = LeaveOneOut()
LOO_cv_score = cross_val_score(dtc_model,dpp.F,dpp.P.ravel(),cv=loo_validation)

In [12]:
LOO_cv_score

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1.

In [13]:
LOO_cv_score.mean()*100

100.0

### Performance metrics

#### Confusion matrix

In [14]:
print("confusion matrix for trained data :")
print(metrics.confusion_matrix(dpp.P_train, dtc_model_prediction_from_trained_data))

print("confusion matrix for test data :")
print(metrics.confusion_matrix(dpp.P_test, dtc_model_prediction_from_test_data))

confusion matrix for trained data :
[[214   0   0]
 [  0 236   0]
 [  0   0 250]]
confusion matrix for test data :
[[ 89   0   0]
 [  0  96   0]
 [  0   0 115]]


#### Classification report

In [15]:
print("classification report for trained data :")
print(metrics.classification_report(dpp.P_train, dtc_model_prediction_from_trained_data))

print("classification report for test data :")
print(metrics.classification_report(dpp.P_test, dtc_model_prediction_from_test_data))

classification report for trained data :
              precision    recall  f1-score   support

           1       1.00      1.00      1.00       214
           2       1.00      1.00      1.00       236
           3       1.00      1.00      1.00       250

    accuracy                           1.00       700
   macro avg       1.00      1.00      1.00       700
weighted avg       1.00      1.00      1.00       700

classification report for test data :
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        89
           2       1.00      1.00      1.00        96
           3       1.00      1.00      1.00       115

    accuracy                           1.00       300
   macro avg       1.00      1.00      1.00       300
weighted avg       1.00      1.00      1.00       300

