### Import libraries and module

In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import cross_val_score, StratifiedKFold, LeaveOneOut

# import custom module 
import data_preprocessing2 as dpp

<Figure size 2500x2500 with 1 Axes>

70.00% in training set
30.00% in test set


### Model training with logistic regression classifier

In [2]:
#creating model object
lrc_model = LogisticRegression(random_state=0, solver='liblinear', multi_class='auto')

# fit model
lrc_model.fit(dpp.F_train, dpp.P_train.ravel())

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

### Prediction results 

In [3]:
lrc_model_prediction_from_trained_data = lrc_model.predict(dpp.F_train)
lrc_model_prediction_from_test_data = lrc_model.predict(dpp.F_test)

# get current accuracy of the model
lrc_model_accuracy_from_trained_data = metrics.accuracy_score(dpp.P_train, lrc_model_prediction_from_trained_data)
lrc_model_accuracy_from_test_data = metrics.accuracy_score(dpp.P_test, lrc_model_prediction_from_test_data)

In [4]:
#output 
print ("Accuracy from trained data of lrc model is : {0:.4f}%".format(lrc_model_accuracy_from_trained_data*100))
print ("Accuracy from test data of lrc model is : {0:.4f}%".format(lrc_model_accuracy_from_test_data*100))

Accuracy from trained data of lrc model is : 98.0000%
Accuracy from test data of lrc model is : 97.3333%


### Cross Validation

#### k-Fold Cross Validation

In [5]:
kF_cv_score = cross_val_score(lrc_model,dpp.F,dpp.P.ravel(),cv=10)

In [6]:
kF_cv_score

array([0.97058824, 0.98039216, 0.99009901, 0.98      , 0.98      ,
       0.98989899, 0.98989899, 1.        , 0.97979798, 0.98989899])

In [7]:
kF_cv_score.mean()*100

98.50574351552801

#### Stratified cross validation 

In [8]:
SFKF = StratifiedKFold(n_splits=5)
SFKF_cv_score = cross_val_score(lrc_model,dpp.F,dpp.P.ravel(),cv=SFKF)

In [9]:
SFKF_cv_score

array([0.97512438, 0.98507463, 0.985     , 0.99497487, 0.98492462])

In [10]:
SFKF_cv_score.mean()*100

98.50197004925123

#### LeaveOneOut cross validation

In [11]:
loo_validation = LeaveOneOut()
LOO_cv_score = cross_val_score(lrc_model,dpp.F,dpp.P.ravel(),cv=loo_validation)

In [12]:
LOO_cv_score

array([1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 0., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1.

In [13]:
LOO_cv_score.mean()*100

98.4

### Performance metrics

#### Confusion matrix

In [14]:
print("confusion matrix for trained data :")
print(metrics.confusion_matrix(dpp.P_train, lrc_model_prediction_from_trained_data))

print("confusion matrix for test data :")
print(metrics.confusion_matrix(dpp.P_test, lrc_model_prediction_from_test_data))

confusion matrix for trained data :
[[213   1   0]
 [  9 223   4]
 [  0   0 250]]
confusion matrix for test data :
[[ 89   0   0]
 [  5  88   3]
 [  0   0 115]]


#### Classification report

In [15]:
print("classification report for trained data :")
print(metrics.classification_report(dpp.P_train, lrc_model_prediction_from_trained_data))

print("classification report for test data :")
print(metrics.classification_report(dpp.P_test, lrc_model_prediction_from_test_data))

classification report for trained data :
              precision    recall  f1-score   support

           1       0.96      1.00      0.98       214
           2       1.00      0.94      0.97       236
           3       0.98      1.00      0.99       250

    accuracy                           0.98       700
   macro avg       0.98      0.98      0.98       700
weighted avg       0.98      0.98      0.98       700

classification report for test data :
              precision    recall  f1-score   support

           1       0.95      1.00      0.97        89
           2       1.00      0.92      0.96        96
           3       0.97      1.00      0.99       115

    accuracy                           0.97       300
   macro avg       0.97      0.97      0.97       300
weighted avg       0.97      0.97      0.97       300

