In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import accuracy_score 

In [2]:
model_looks_linear_HT = joblib.load('params_pkl/model_looks_linear_HT.pkl')
model_looks_rbf_HT = joblib.load('params_pkl/model_looks_rbf_ht.pkl')
model_looks_poly_HT = joblib.load('params_pkl/model_looks_poly_HT.pkl')

In [3]:
#input the selected feature by using variance threshold
VT_selected_feature_Zoo = joblib.load('VT_selected_feature_Zoo.pkl')
pso_linear = joblib.load('PSO-selected-feature/pso-linear.pkl')
pso_rbf = joblib.load('PSO-selected-feature/pso-rbf.pkl')
pso_poly = joblib.load('PSO-selected-feature/pso-poly.pkl')

In [4]:
#Input the dataset
X_train = pd.read_csv('dataset-after-preparation\X_train.csv')
X_test = pd.read_csv('dataset-after-preparation\X_test.csv')
y_train = pd.read_csv('dataset-after-preparation\y_train.csv')
y_test = pd.read_csv('dataset-after-preparation\y_test.csv')
X_valid = pd.read_csv('dataset-after-preparation\X_valid.csv')
y_valid = pd.read_csv('dataset-after-preparation\y_valid.csv')

In [5]:
# fill the dataset with the selected features
X_train = X_train[VT_selected_feature_Zoo]
X_test = X_test[VT_selected_feature_Zoo]

In [6]:
X_train.shape, X_test.shape,

((130, 1090), (57, 1090))

In [7]:
# Feature Slicing with PSO-SVM
X_train_linear = X_train[pso_linear]
X_train_rbf = X_train[pso_rbf]
X_train_poly = X_train[pso_poly]

X_test_linear = X_test[pso_linear]
X_test_rbf = X_test[pso_rbf]
X_test_poly = X_test[pso_poly]

In [8]:
# See the shape of Train data
X_train_linear.shape, X_train_rbf.shape, X_train_poly.shape

((130, 546), (130, 520), (130, 559))

In [9]:
X_test_linear.shape, X_test_rbf.shape, X_test_poly.shape

((57, 546), (57, 520), (57, 559))

### Making the prediction from the each kernel with them best estimator

In [10]:
y_pred_train_linear = model_looks_linear_HT.predict(X_train_linear)
y_pred_test_linear = model_looks_linear_HT.predict(X_test_linear)
### score
print(f'Linear model train score: {accuracy_score(y_train, y_pred_train_linear)}')
print(f'Linear model test score: {accuracy_score(y_test, y_pred_test_linear)}')

Linear model train score: 0.8615384615384616
Linear model test score: 0.8070175438596491


In [11]:
# Classification Report for Linear kernel (Ttest set) 
from sklearn.metrics import classification_report
target_names = ['LNR1', 'LNR2']
print(classification_report(y_test, y_pred_test_linear, target_names=target_names))

              precision    recall  f1-score   support

        LNR1       0.72      0.82      0.77        22
        LNR2       0.88      0.80      0.84        35

    accuracy                           0.81        57
   macro avg       0.80      0.81      0.80        57
weighted avg       0.82      0.81      0.81        57



In [12]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
CM_linear_test = confusion_matrix(y_test, y_pred_test_linear, labels=[1, 0])
TN_linear_test = CM_linear_test[0][0]
FN_linear_test = CM_linear_test[1][0]
TP_linear_test = CM_linear_test[1][1]
FP_linear_test = CM_linear_test[0][1]
print("TN_linear_test: ", TN_linear_test)
print("FN_linear_test: ", FN_linear_test)
print("TP_linear_test: ", TP_linear_test)
print("FP_linear_test: ", FP_linear_test)

TN_linear_test:  28
FN_linear_test:  4
TP_linear_test:  18
FP_linear_test:  7


In [13]:
y_pred_train_rbf = model_looks_rbf_HT.predict(X_train_rbf)
y_pred_test_rbf = model_looks_rbf_HT.predict(X_test_rbf)
### score
print(f'Linear model train score: {accuracy_score(y_train, y_pred_train_rbf)}')
print(f'Linear model test score: {accuracy_score(y_test, y_pred_test_rbf)}')

Linear model train score: 0.7923076923076923
Linear model test score: 0.7368421052631579


In [14]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
CM_rbf_test = confusion_matrix(y_test, y_pred_test_rbf, labels=[1, 0])
TN_rbf_test = CM_rbf_test[0][0]
FN_rbf_test = CM_rbf_test[1][0]
TP_rbf_test = CM_rbf_test[1][1]
FP_rbf_test = CM_rbf_test[0][1]
print("TN_rbf_test: ", TN_rbf_test)
print("FN_rbf_test: ", FN_rbf_test)
print("TP_rbf_test: ", TP_rbf_test)
print("FP_rbf_test: ", FP_rbf_test)

TN_rbf_test:  23
FN_rbf_test:  3
TP_rbf_test:  19
FP_rbf_test:  12


In [15]:
y_pred_train_poly = model_looks_poly_HT.predict(X_train_poly)
y_pred_test_poly = model_looks_poly_HT.predict(X_test_poly)
### score
print(f'Poly model train score: {accuracy_score(y_train, y_pred_train_poly)}')
print(f'Poly model test score: {accuracy_score(y_test, y_pred_test_poly)}')

Poly model train score: 0.9076923076923077
Poly model test score: 0.7017543859649122


In [16]:
# Confution Matrix for Poly kernel 
from sklearn.metrics import classification_report
target_names = ['POLY1', 'POLY2']
print(classification_report(y_test, y_pred_test_poly, target_names=target_names))

              precision    recall  f1-score   support

       POLY1       0.58      0.86      0.69        22
       POLY2       0.88      0.60      0.71        35

    accuracy                           0.70        57
   macro avg       0.73      0.73      0.70        57
weighted avg       0.76      0.70      0.70        57



In [17]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
CM_poly_test = confusion_matrix(y_test, y_pred_test_poly, labels=[1, 0])
TN_poly_test = CM_poly_test[0][0]
FN_poly_test = CM_poly_test[1][0]
TP_poly_test = CM_poly_test[1][1]
FP_poly_test = CM_poly_test[0][1]
print("TN_rbf_test: ", TN_poly_test)
print("FN_rbf_test: ", FN_poly_test)
print("TP_rbf_test: ", TP_poly_test)
print("FP_rbf_test: ", FP_poly_test)

TN_rbf_test:  21
FN_rbf_test:  3
TP_rbf_test:  19
FP_rbf_test:  14


### Classification Report and Confustion Matrix

In [18]:
# Classification Report for Linear kernel (Train)
from sklearn.metrics import classification_report
target_names = ['LNR1', 'LNR2']
print(classification_report(y_train, y_pred_train_linear, target_names=target_names))

              precision    recall  f1-score   support

        LNR1       0.93      0.79      0.86        68
        LNR2       0.81      0.94      0.87        62

    accuracy                           0.86       130
   macro avg       0.87      0.86      0.86       130
weighted avg       0.87      0.86      0.86       130



In [19]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
CM_linear_train = confusion_matrix(y_train, y_pred_train_linear, labels=[1, 0])
TN_linear_train = CM_linear_train[0][0]
FN_linear_train = CM_linear_train[1][0]
TP_linear_train = CM_linear_train[1][1]
FP_linear_train = CM_linear_train[0][1]
print("TN_linear_test: ", TN_linear_train)
print("FN_linear_test: ", FN_linear_train)
print("TP_linear_test: ", TP_linear_train)
print("FP_linear_test: ", FP_linear_train)

TN_linear_test:  58
FN_linear_test:  14
TP_linear_test:  54
FP_linear_test:  4


In [20]:
# Classification Report for RBF kernel (Train)
from sklearn.metrics import classification_report
target_names = ['RBF1', 'RBF2']
print(classification_report(y_train, y_pred_train_rbf, target_names=target_names))

              precision    recall  f1-score   support

        RBF1       0.85      0.74      0.79        68
        RBF2       0.75      0.85      0.80        62

    accuracy                           0.79       130
   macro avg       0.80      0.80      0.79       130
weighted avg       0.80      0.79      0.79       130



In [22]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
CM_rbf_train = confusion_matrix(y_train, y_pred_train_rbf, labels=[1, 0])
TN_rbf_train = CM_rbf_train[0][0]
FN_rbf_train = CM_rbf_train[1][0]
TP_rbf_train = CM_rbf_train[1][1]
FP_rbf_train = CM_rbf_train[0][1]
print("TN_rbf_train: ", TN_rbf_train)
print("FN_rbf_train: ", FN_rbf_train)
print("TP_rbf_train: ", TP_rbf_train)
print("FP_rbf_train: ", FP_rbf_train)

TN_rbf_train:  53
FN_rbf_train:  18
TP_rbf_train:  50
FP_rbf_train:  9


In [23]:
# Classification Report for Poly kernel (Train)
from sklearn.metrics import classification_report
target_names = ['PLY1', 'RPLY2']
print(classification_report(y_train, y_pred_train_poly, target_names=target_names))

              precision    recall  f1-score   support

        PLY1       0.91      0.91      0.91        68
       RPLY2       0.90      0.90      0.90        62

    accuracy                           0.91       130
   macro avg       0.91      0.91      0.91       130
weighted avg       0.91      0.91      0.91       130



In [25]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
CM_poly_train = confusion_matrix(y_train, y_pred_train_poly, labels=[1, 0])
TN_poly_train = CM_poly_train[0][0]
FN_poly_train = CM_poly_train[1][0]
TP_poly_train = CM_poly_train[1][1]
FP_poly_train = CM_poly_train[0][1]
print("TN_poly_train: ", TN_poly_train)
print("FN_poly_train: ", FN_poly_train)
print("TP_poly_train: ", TP_poly_train)
print("FP_poly_train: ", FP_poly_train)

TN_poly_train:  56
FN_poly_train:  6
TP_poly_train:  62
FP_poly_train:  6
