In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import accuracy_score 

In [2]:
model_looks_linear_HT = joblib.load('params_pkl/model_looks_linear_HT.pkl')
model_looks_rbf_HT = joblib.load('params_pkl/model_looks_rbf_ht.pkl')
model_looks_poly_HT = joblib.load('params_pkl/model_looks_poly_HT.pkl')

In [3]:
#input the selected feature by using variance threshold
VT_selected_feature_Zoo = joblib.load('VT_selected_feature_Zoo.pkl')
pso_linear = joblib.load('PSO-selected-feature/pso-linear.pkl')
pso_rbf = joblib.load('PSO-selected-feature/pso-rbf.pkl')
pso_poly = joblib.load('PSO-selected-feature/pso-poly.pkl')

In [4]:
#Input the dataset
X_train = pd.read_csv('dataset-after-preparation\X_train.csv')
X_test = pd.read_csv('dataset-after-preparation\X_test.csv')
y_train = pd.read_csv('dataset-after-preparation\y_train.csv')
y_test = pd.read_csv('dataset-after-preparation\y_test.csv')
X_valid = pd.read_csv('dataset-after-preparation\X_valid.csv')
y_valid = pd.read_csv('dataset-after-preparation\y_valid.csv')

In [5]:
# fill the dataset with the selected features
X_train = X_train[VT_selected_feature_Zoo]
X_test = X_test[VT_selected_feature_Zoo]

In [6]:
X_train.shape, X_test.shape,

((130, 1090), (57, 1090))

In [7]:
# Feature Slicing with PSO-SVM
X_train_linear = X_train[pso_linear]
X_train_rbf = X_train[pso_rbf]
X_train_poly = X_train[pso_poly]

X_test_linear = X_test[pso_linear]
X_test_rbf = X_test[pso_rbf]
X_test_poly = X_test[pso_poly]

In [8]:
# See the shape of Train data
X_train_linear.shape, X_train_rbf.shape, X_train_poly.shape

((130, 516), (130, 539), (130, 518))

In [9]:
X_test_linear.shape, X_test_rbf.shape, X_test_poly.shape

((57, 516), (57, 539), (57, 518))

### Making the prediction from the each kernel with them best estimator

In [10]:
y_pred_train_linear = model_looks_linear_HT.predict(X_train_linear)
y_pred_test_linear = model_looks_linear_HT.predict(X_test_linear)
### score
print(f'Linear model train score: {accuracy_score(y_train, y_pred_train_linear)}')
print(f'Linear model test score: {accuracy_score(y_test, y_pred_test_linear)}')

Linear model train score: 0.9846153846153847
Linear model test score: 0.7543859649122807


In [29]:
# Classification Report for Linear kernel (Ttest set) 
from sklearn.metrics import classification_report
target_names = ['LNR1', 'LNR2']
print(classification_report(y_test, y_pred_test_linear, target_names=target_names))

              precision    recall  f1-score   support

        LNR1       0.64      0.82      0.72        22
        LNR2       0.86      0.71      0.78        35

    accuracy                           0.75        57
   macro avg       0.75      0.77      0.75        57
weighted avg       0.78      0.75      0.76        57



In [12]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred_test_linear, labels=[1, 0])

array([[25, 10],
       [ 4, 18]], dtype=int64)

In [13]:
TP_linear = 37 
FN_linear = 13
FP_linear = 15
TN_linear = 29

In [14]:
y_pred_train_rbf = model_looks_rbf_HT.predict(X_train_rbf)
y_pred_test_rbf = model_looks_rbf_HT.predict(X_test_rbf)
### score
print(f'Linear model train score: {accuracy_score(y_train, y_pred_train_rbf)}')
print(f'Linear model test score: {accuracy_score(y_test, y_pred_test_rbf)}')

Linear model train score: 0.9769230769230769
Linear model test score: 0.6666666666666666


In [15]:
# Confution Matrix for RBF kernel (Test)
from sklearn.metrics import classification_report
target_names = ['RBF1', 'RBF2']
print(classification_report(y_test, y_pred_test_rbf, target_names=target_names))

              precision    recall  f1-score   support

        RBF1       0.54      0.86      0.67        22
        RBF2       0.86      0.54      0.67        35

    accuracy                           0.67        57
   macro avg       0.70      0.70      0.67        57
weighted avg       0.74      0.67      0.67        57



In [16]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred_test_rbf, labels=[1, 0])

array([[19, 16],
       [ 3, 19]], dtype=int64)

In [17]:
TP_rbf = 36
FN_rbf = 15
FP_rbf = 25 
TN_rbf = 75

In [18]:
y_pred_train_poly = model_looks_poly_HT.predict(X_train_poly)
y_pred_test_poly = model_looks_poly_HT.predict(X_test_poly)
### score
print(f'Poly model train score: {accuracy_score(y_train, y_pred_train_poly)}')
print(f'Poly model test score: {accuracy_score(y_test, y_pred_test_poly)}')

Poly model train score: 0.9923076923076923
Poly model test score: 0.7192982456140351


In [19]:
# Confution Matrix for Poly kernel 
from sklearn.metrics import classification_report
target_names = ['POLY1', 'POLY2']
print(classification_report(y_test, y_pred_test_rbf, target_names=target_names))

              precision    recall  f1-score   support

       POLY1       0.54      0.86      0.67        22
       POLY2       0.86      0.54      0.67        35

    accuracy                           0.67        57
   macro avg       0.70      0.70      0.67        57
weighted avg       0.74      0.67      0.67        57



In [20]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred_test_poly, labels=[1, 0])

array([[22, 13],
       [ 3, 19]], dtype=int64)

In [21]:
TP_poly = 32
FN_poly = 18 
FP_poly = 16
TN_poly = 28

### Classification Report and Confustion Matrix

In [22]:
# Classification Report for Linear kernel (Train)
from sklearn.metrics import classification_report
target_names = ['LNR1', 'LNR2']
print(classification_report(y_train, y_pred_train_linear, target_names=target_names))

              precision    recall  f1-score   support

        LNR1       0.99      0.99      0.99        68
        LNR2       0.98      0.98      0.98        62

    accuracy                           0.98       130
   macro avg       0.98      0.98      0.98       130
weighted avg       0.98      0.98      0.98       130



In [23]:
# Confusion Matrix linear (Train)
from sklearn.metrics import confusion_matrix
confusion_matrix(y_train, y_pred_train_linear, labels=[1, 0])

array([[61,  1],
       [ 1, 67]], dtype=int64)

In [24]:
# Classification Report for RBF kernel (Train)
from sklearn.metrics import classification_report
target_names = ['RBF1', 'RBF2']
print(classification_report(y_train, y_pred_train_rbf, target_names=target_names))

              precision    recall  f1-score   support

        RBF1       0.97      0.99      0.98        68
        RBF2       0.98      0.97      0.98        62

    accuracy                           0.98       130
   macro avg       0.98      0.98      0.98       130
weighted avg       0.98      0.98      0.98       130



In [25]:
# Confusion Matrix RBF (Train)
from sklearn.metrics import confusion_matrix
confusion_matrix(y_train, y_pred_train_rbf, labels=[1, 0])

array([[60,  2],
       [ 1, 67]], dtype=int64)

In [26]:
# Classification Report for Poly kernel (Train)
from sklearn.metrics import classification_report
target_names = ['PLY1', 'RPLY2']
print(classification_report(y_train, y_pred_train_poly, target_names=target_names))

              precision    recall  f1-score   support

        PLY1       0.99      1.00      0.99        68
       RPLY2       1.00      0.98      0.99        62

    accuracy                           0.99       130
   macro avg       0.99      0.99      0.99       130
weighted avg       0.99      0.99      0.99       130



In [27]:
# Confusion Matrix Poly (Train)
from sklearn.metrics import confusion_matrix
confusion_matrix(y_train, y_pred_train_poly, labels=[1, 0])

array([[61,  1],
       [ 0, 68]], dtype=int64)