### Import libraries and module

In [1]:
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from sklearn.model_selection import cross_val_score, StratifiedKFold, LeaveOneOut

# import custom module 
import data_preprocessing2 as dpp

<Figure size 2500x2500 with 1 Axes>

70.00% in training set
30.00% in test set


### Model training with naive bayes classifier

In [2]:
#creating model's object
gnb_model = GaussianNB()

# fit model 
gnb_model.fit(dpp.F_train, dpp.P_train.ravel())

GaussianNB(priors=None, var_smoothing=1e-09)

### Prediction results 

In [3]:
gnb_model_prediction_from_trained_data = gnb_model.predict(dpp.F_train)
gnb_model_prediction_from_test_data = gnb_model.predict(dpp.F_test)

# get current accuracy of the model
gnb_model_accuracy_from_trained_data = metrics.accuracy_score(dpp.P_train, gnb_model_prediction_from_trained_data)
gnb_model_accuracy_from_test_data = metrics.accuracy_score(dpp.P_test, gnb_model_prediction_from_test_data)

In [4]:
# Output
print ("Accuracy from trained data of naive bayes model is : {0:.4f}%".format(gnb_model_accuracy_from_trained_data*100))
print ("Accuracy from test data of naive bayes model is : {0:.4f}%".format(gnb_model_accuracy_from_test_data*100))

Accuracy from trained data of naive bayes model is : 89.0000%
Accuracy from test data of naive bayes model is : 92.3333%


### Cross Validation

#### k-Fold Cross Validation

In [5]:
kF_cv_score = cross_val_score(gnb_model,dpp.F,dpp.P.ravel(),cv=10)

In [6]:
kF_cv_score

array([0.89215686, 0.8627451 , 0.85148515, 0.89      , 0.93      ,
       0.88888889, 0.87878788, 0.8989899 , 0.90909091, 0.8989899 ])

In [7]:
kF_cv_score.mean()*100

89.01134584046639

#### Stratified cross validation 

In [8]:
SFKF = StratifiedKFold(n_splits=5)
SFKF_cv_score = cross_val_score(gnb_model,dpp.F,dpp.P.ravel(),cv=SFKF)

In [9]:
SFKF_cv_score

array([0.87562189, 0.87064677, 0.915     , 0.89447236, 0.89949749])

In [10]:
SFKF_cv_score.mean()*100

89.10477011925299

#### LeaveOneOut cross validation

In [11]:
loo_validation = LeaveOneOut()
LOO_cv_score = cross_val_score(gnb_model,dpp.F,dpp.P.ravel(),cv=loo_validation)

In [12]:
LOO_cv_score

array([1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 0., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 1.,
       1., 1., 1., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       0., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 1.,
       1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 0., 0., 1.

In [13]:
LOO_cv_score.mean()*100

89.0

### Performance metrics

#### Confusion matrix

In [14]:
print("confusion matrix for trained data :")
print(metrics.confusion_matrix(dpp.P_train, gnb_model_prediction_from_trained_data))

print("confusion matrix for test data :")
print(metrics.confusion_matrix(dpp.P_test, gnb_model_prediction_from_test_data))

confusion matrix for trained data :
[[191  16   7]
 [  0 191  45]
 [  0   9 241]]
confusion matrix for test data :
[[ 82   4   3]
 [  0  81  15]
 [  0   1 114]]


#### Classification report

In [15]:
print("classification report for trained data :")
print(metrics.classification_report(dpp.P_train, gnb_model_prediction_from_trained_data))

print("classification report for test data :")
print(metrics.classification_report(dpp.P_test, gnb_model_prediction_from_test_data))

classification report for trained data :
              precision    recall  f1-score   support

           1       1.00      0.89      0.94       214
           2       0.88      0.81      0.85       236
           3       0.82      0.96      0.89       250

    accuracy                           0.89       700
   macro avg       0.90      0.89      0.89       700
weighted avg       0.90      0.89      0.89       700

classification report for test data :
              precision    recall  f1-score   support

           1       1.00      0.92      0.96        89
           2       0.94      0.84      0.89        96
           3       0.86      0.99      0.92       115

    accuracy                           0.92       300
   macro avg       0.94      0.92      0.92       300
weighted avg       0.93      0.92      0.92       300

