### Import libraries and module

In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.model_selection import cross_val_score, StratifiedKFold, LeaveOneOut

# import custom module 
import data_preprocessing2 as dpp

<Figure size 2500x2500 with 1 Axes>

70.00% in training set
30.00% in test set


### Model training with  k-nearest neighbors classifier

In [2]:
#creating model object
knn_model = KNeighborsClassifier()

# fit model
knn_model.fit(dpp.F_train, dpp.P_train.ravel())

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

### Prediction results 

In [3]:
knn_model_prediction_from_trained_data = knn_model.predict(dpp.F_train)
knn_model_prediction_from_test_data = knn_model.predict(dpp.F_test)

# get current accuracy of the model
knn_model_accuracy_from_trained_data = metrics.accuracy_score(dpp.P_train, knn_model_prediction_from_trained_data)
knn_model_accuracy_from_test_data = metrics.accuracy_score(dpp.P_test, knn_model_prediction_from_test_data)

In [4]:
#output 
print ("Accuracy from trained data of knn model is : {0:.4f}%".format(knn_model_accuracy_from_trained_data*100))
print ("Accuracy from test data of knn model is : {0:.4f}%".format(knn_model_accuracy_from_test_data*100))

Accuracy from trained data of knn model is : 99.8571%
Accuracy from test data of knn model is : 99.6667%


### Cross validation 

#### k-Fold Cross Validation

In [5]:
kF_cv_score = cross_val_score(knn_model,dpp.F,dpp.P.ravel(),cv=10)

In [6]:
kF_cv_score

array([0.99019608, 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 0.98989899])

In [7]:
kF_cv_score.mean()*100

99.80095068330363

#### Stratified cross validation 

In [8]:
SFKF = StratifiedKFold(n_splits=5)
SFKF_cv_score = cross_val_score(knn_model,dpp.F,dpp.P.ravel(),cv=SFKF)

In [9]:
SFKF_cv_score

array([0.99502488, 1.        , 1.        , 1.        , 0.99497487])

In [10]:
SFKF_cv_score.mean()*100

99.799994999875

#### LeaveOneOut cross validation

In [11]:
loo_validation = LeaveOneOut()
LOO_cv_score = cross_val_score(knn_model,dpp.F,dpp.P.ravel(),cv=loo_validation)

In [12]:
LOO_cv_score

array([1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1.

In [13]:
LOO_cv_score.mean()*100

99.8

### Performance metrics

#### Confusion matrix

In [14]:
print("confusion matrix for trained data :")
print(metrics.confusion_matrix(dpp.P_train, knn_model_prediction_from_trained_data))

print("confusion matrix for test data :")
print(metrics.confusion_matrix(dpp.P_test, knn_model_prediction_from_test_data))

confusion matrix for trained data :
[[213   1   0]
 [  0 236   0]
 [  0   0 250]]
confusion matrix for test data :
[[ 88   1   0]
 [  0  96   0]
 [  0   0 115]]


#### Classification report

In [15]:
print("classification report for trained data :")
print(metrics.classification_report(dpp.P_train, knn_model_prediction_from_trained_data))

print("classification report for test data :")
print(metrics.classification_report(dpp.P_test, knn_model_prediction_from_test_data))

classification report for trained data :
              precision    recall  f1-score   support

           1       1.00      1.00      1.00       214
           2       1.00      1.00      1.00       236
           3       1.00      1.00      1.00       250

    accuracy                           1.00       700
   macro avg       1.00      1.00      1.00       700
weighted avg       1.00      1.00      1.00       700

classification report for test data :
              precision    recall  f1-score   support

           1       1.00      0.99      0.99        89
           2       0.99      1.00      0.99        96
           3       1.00      1.00      1.00       115

    accuracy                           1.00       300
   macro avg       1.00      1.00      1.00       300
weighted avg       1.00      1.00      1.00       300

