In [1]:
from joblib import dump
from joblib import load
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import label_binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_recall_fscore_support

In [2]:
y_test_indices=np.load('y_test_indices.npy')

In [3]:
model_dt = load('model_dt.joblib')
model_knn = load('model_knn.joblib')
model_lr = load('model_lr.joblib')
model_nb = load('model_nb.joblib')
model_rf = load('model_rf.joblib')
model_svc = load('model_svc.joblib')
model_xgb = load('model_xgb.joblib')
models = [
    model_dt, model_knn, model_lr, 
    model_nb, model_rf, model_svc, model_xgb
]

## BIM

In [4]:
x_BIM_001= np.load('x_test_adv_BIM_eps_0.01.npy')
x_BIM_001 = np.nan_to_num(x_BIM_001)

In [5]:
model_accuracies = {}

# Evaluate each model
for model in models:
    
    # Make predictions with the loaded model
    predictions = model.predict(x_BIM_001)
    
     # Calculate micro and macro precision, recall, and F1 score
    precision_micro, recall_micro, fscore_micro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='micro')
    precision_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='macro')
    acc = np.sum(predictions == y_test_indices) / y_test_indices.shape[0]
    # Calculate confusion matrix and then TPR, TNR, FPR, FNR for each class
    conf_matrix = confusion_matrix(y_test_indices, predictions)
    FP = conf_matrix.sum(axis=0) - np.diag(conf_matrix)  
    FN = conf_matrix.sum(axis=1) - np.diag(conf_matrix)
    TP = np.diag(conf_matrix)
    TN = conf_matrix.sum() - (FP + FN + TP)

    TPR = TP / (TP + FN)
    TNR = TN / (TN + FP)
    FPR = FP / (FP + TN)
    FNR = FN / (TP + FN)

    # Averaging TPR, TNR, FPR, FNR
    TPR_avg = np.mean(TPR)
    TNR_avg = np.mean(TNR)
    FPR_avg = np.mean(FPR)
    FNR_avg = np.mean(FNR)
    TP_micro = np.sum(TP)
    FP_micro = np.sum(FP)
    FN_micro = np.sum(FN)
    TN_micro = np.sum(TN)

    # Micro averages for applicable metrics
    TPR_micro = TP_micro / (TP_micro + FN_micro)
    FPR_micro = FP_micro / (FP_micro + TN_micro)

    
    # Print the results for each epsilon
    print(f"Model: {model}")
    print(f"Accuracy: {acc}")
    print(f"Micro Precision: {precision_micro:.2f}")
    print(f"Macro Precision: {precision_macro:.2f}")
    print(f"Micro Recall: {recall_micro:.2f}")
    print(f"Macro Recall: {recall_macro:.2f}")
    print(f"Micro F1 Score: {fscore_micro:.2f}")
    print(f"Macro F1 Score: {fscore_macro:.2f}")
    print(f"Average TNR: {TNR_avg:.2f}, Average FPR: {FPR_avg:.2f}, Average FNR: {FNR_avg:.2f}\n")
    print(f"Macro TNR: {TNR_avg:.2f}, Macro FNR: {FNR_avg:.2f},Macro FPR: {FPR_avg:.2f}")
    print()



Model: DecisionTreeClassifier(max_depth=10, min_samples_split=12)
Accuracy: 0.4213680605623648
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40

Model: KNeighborsClassifier(n_neighbors=1)
Accuracy: 0.5260454217736121
Micro Precision: 0.53
Macro Precision: 0.68
Micro Recall: 0.53
Macro Recall: 0.67
Micro F1 Score: 0.53
Macro F1 Score: 0.53
Average TNR: 0.67, Average FPR: 0.33, Average FNR: 0.33

Macro TNR: 0.67, Macro FNR: 0.33,Macro FPR: 0.33

Model: LogisticRegression(max_iter=5000, verbose=1)
Accuracy: 0.4781407714491709
Micro Precision: 0.48
Macro Precision: 0.65
Micro Recall: 0.48
Macro Recall: 0.63
Micro F1 Score: 0.48
Macro F1 Score: 0.48
Average TNR: 0.63, Average FPR: 0.37, Average FNR: 0.37

Macro TNR: 0.63, Macro FNR: 0.37,Macro FPR: 0.37





Model: GaussianNB()
Accuracy: 0.8054253785147801
Micro Precision: 0.81
Macro Precision: 0.89
Micro Recall: 0.81
Macro Recall: 0.64
Micro F1 Score: 0.81
Macro F1 Score: 0.66
Average TNR: 0.64, Average FPR: 0.36, Average FNR: 0.36

Macro TNR: 0.64, Macro FNR: 0.36,Macro FPR: 0.36





Model: RandomForestClassifier()
Accuracy: 0.4158480533525595
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40





Model: SVC(shrinking=False, verbose=True)
Accuracy: 0.2753469718817592
Micro Precision: 0.28
Macro Precision: 0.64
Micro Recall: 0.28
Macro Recall: 0.50
Micro F1 Score: 0.28
Macro F1 Score: 0.22
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50

Model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='mlogloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.2, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=5,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=100,
       

In [6]:
x_BIM_01= np.load('x_test_adv_BIM_eps_0.1.npy')
x_BIM_01 = np.nan_to_num(x_BIM_01)

In [7]:
model_accuracies = {}

# Evaluate each model
for model in models:
    
    # Make predictions with the loaded model
    predictions = model.predict(x_BIM_01)
    
     # Calculate micro and macro precision, recall, and F1 score
    precision_micro, recall_micro, fscore_micro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='micro')
    precision_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='macro')
    acc = np.sum(predictions == y_test_indices) / y_test_indices.shape[0]
    # Calculate confusion matrix and then TPR, TNR, FPR, FNR for each class
    conf_matrix = confusion_matrix(y_test_indices, predictions)
    FP = conf_matrix.sum(axis=0) - np.diag(conf_matrix)  
    FN = conf_matrix.sum(axis=1) - np.diag(conf_matrix)
    TP = np.diag(conf_matrix)
    TN = conf_matrix.sum() - (FP + FN + TP)

    TPR = TP / (TP + FN)
    TNR = TN / (TN + FP)
    FPR = FP / (FP + TN)
    FNR = FN / (TP + FN)

    # Averaging TPR, TNR, FPR, FNR
    TPR_avg = np.mean(TPR)
    TNR_avg = np.mean(TNR)
    FPR_avg = np.mean(FPR)
    FNR_avg = np.mean(FNR)
    TP_micro = np.sum(TP)
    FP_micro = np.sum(FP)
    FN_micro = np.sum(FN)
    TN_micro = np.sum(TN)

    # Micro averages for applicable metrics
    TPR_micro = TP_micro / (TP_micro + FN_micro)
    FPR_micro = FP_micro / (FP_micro + TN_micro)

    
    # Print the results for each epsilon
    print(f"Model: {model}")
    print(f"Accuracy: {acc}")
    print(f"Micro Precision: {precision_micro:.2f}")
    print(f"Macro Precision: {precision_macro:.2f}")
    print(f"Micro Recall: {recall_micro:.2f}")
    print(f"Macro Recall: {recall_macro:.2f}")
    print(f"Micro F1 Score: {fscore_micro:.2f}")
    print(f"Macro F1 Score: {fscore_macro:.2f}")
    print(f"Average TNR: {TNR_avg:.2f}, Average FPR: {FPR_avg:.2f}, Average FNR: {FNR_avg:.2f}\n")
    print(f"Macro TNR: {TNR_avg:.2f}, Macro FNR: {FNR_avg:.2f},Macro FPR: {FPR_avg:.2f}")
    print()



Model: DecisionTreeClassifier(max_depth=10, min_samples_split=12)
Accuracy: 0.4213680605623648
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40

Model: KNeighborsClassifier(n_neighbors=1)
Accuracy: 0.5093096611391492
Micro Precision: 0.51
Macro Precision: 0.68
Micro Recall: 0.51
Macro Recall: 0.66
Micro F1 Score: 0.51
Macro F1 Score: 0.51
Average TNR: 0.66, Average FPR: 0.34, Average FNR: 0.34

Macro TNR: 0.66, Macro FNR: 0.34,Macro FPR: 0.34

Model: LogisticRegression(max_iter=5000, verbose=1)
Accuracy: 0.47891131939437637
Micro Precision: 0.48
Macro Precision: 0.65
Micro Recall: 0.48
Macro Recall: 0.63
Micro F1 Score: 0.48
Macro F1 Score: 0.48
Average TNR: 0.63, Average FPR: 0.37, Average FNR: 0.37

Macro TNR: 0.63, Macro FNR: 0.37,Macro FPR: 0.37





Model: GaussianNB()
Accuracy: 0.8061643835616439
Micro Precision: 0.81
Macro Precision: 0.90
Micro Recall: 0.81
Macro Recall: 0.64
Micro F1 Score: 0.81
Macro F1 Score: 0.66
Average TNR: 0.64, Average FPR: 0.36, Average FNR: 0.36

Macro TNR: 0.64, Macro FNR: 0.36,Macro FPR: 0.36





Model: RandomForestClassifier()
Accuracy: 0.4158480533525595
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40





Model: SVC(shrinking=False, verbose=True)
Accuracy: 0.27796953857245854
Micro Precision: 0.28
Macro Precision: 0.64
Micro Recall: 0.28
Macro Recall: 0.51
Micro F1 Score: 0.28
Macro F1 Score: 0.22
Average TNR: 0.51, Average FPR: 0.49, Average FNR: 0.49

Macro TNR: 0.51, Macro FNR: 0.49,Macro FPR: 0.49

Model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='mlogloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.2, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=5,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=100,
      

## DF

In [8]:
x_DF_001= np.load('x_test_adv_DF_eps_0.01.npy')
x_DF_001 = np.nan_to_num(x_DF_001)

In [9]:
model_accuracies = {}

# Evaluate each model
for model in models:
    
    # Make predictions with the loaded model
    predictions = model.predict(x_DF_001)
    
     # Calculate micro and macro precision, recall, and F1 score
    precision_micro, recall_micro, fscore_micro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='micro')
    precision_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='macro')
    acc = np.sum(predictions == y_test_indices) / y_test_indices.shape[0]
    # Calculate confusion matrix and then TPR, TNR, FPR, FNR for each class
    conf_matrix = confusion_matrix(y_test_indices, predictions)
    FP = conf_matrix.sum(axis=0) - np.diag(conf_matrix)  
    FN = conf_matrix.sum(axis=1) - np.diag(conf_matrix)
    TP = np.diag(conf_matrix)
    TN = conf_matrix.sum() - (FP + FN + TP)

    TPR = TP / (TP + FN)
    TNR = TN / (TN + FP)
    FPR = FP / (FP + TN)
    FNR = FN / (TP + FN)

    # Averaging TPR, TNR, FPR, FNR
    TPR_avg = np.mean(TPR)
    TNR_avg = np.mean(TNR)
    FPR_avg = np.mean(FPR)
    FNR_avg = np.mean(FNR)
    TP_micro = np.sum(TP)
    FP_micro = np.sum(FP)
    FN_micro = np.sum(FN)
    TN_micro = np.sum(TN)

    # Micro averages for applicable metrics
    TPR_micro = TP_micro / (TP_micro + FN_micro)
    FPR_micro = FP_micro / (FP_micro + TN_micro)

    
    # Print the results for each epsilon
    print(f"Model: {model}")
    print(f"Accuracy: {acc}")
    print(f"Micro Precision: {precision_micro:.2f}")
    print(f"Macro Precision: {precision_macro:.2f}")
    print(f"Micro Recall: {recall_micro:.2f}")
    print(f"Macro Recall: {recall_macro:.2f}")
    print(f"Micro F1 Score: {fscore_micro:.2f}")
    print(f"Macro F1 Score: {fscore_macro:.2f}")
    print(f"Average TNR: {TNR_avg:.2f}, Average FPR: {FPR_avg:.2f}, Average FNR: {FNR_avg:.2f}\n")
    print(f"Macro TNR: {TNR_avg:.2f}, Macro FNR: {FNR_avg:.2f},Macro FPR: {FPR_avg:.2f}")
    print()

  _warn_prf(average, modifier, msg_start, len(result))


Model: DecisionTreeClassifier(max_depth=10, min_samples_split=12)
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model: KNeighborsClassifier(n_neighbors=1)
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50

Model: LogisticRegression(max_iter=5000, verbose=1)
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50



  _warn_prf(average, modifier, msg_start, len(result))


Model: GaussianNB()
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50



  _warn_prf(average, modifier, msg_start, len(result))


Model: RandomForestClassifier()
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50

Model: SVC(shrinking=False, verbose=True)
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50

Model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='mlogloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
x_DF_01= np.load('x_test_adv_DF_eps_0.1.npy')
x_DF_01 = np.nan_to_num(x_DF_01)

In [11]:
model_accuracies = {}

# Evaluate each model
for model in models:
    
    # Make predictions with the loaded model
    predictions = model.predict(x_DF_01)
    
     # Calculate micro and macro precision, recall, and F1 score
    precision_micro, recall_micro, fscore_micro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='micro')
    precision_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='macro')
    acc = np.sum(predictions == y_test_indices) / y_test_indices.shape[0]
    # Calculate confusion matrix and then TPR, TNR, FPR, FNR for each class
    conf_matrix = confusion_matrix(y_test_indices, predictions)
    FP = conf_matrix.sum(axis=0) - np.diag(conf_matrix)  
    FN = conf_matrix.sum(axis=1) - np.diag(conf_matrix)
    TP = np.diag(conf_matrix)
    TN = conf_matrix.sum() - (FP + FN + TP)

    TPR = TP / (TP + FN)
    TNR = TN / (TN + FP)
    FPR = FP / (FP + TN)
    FNR = FN / (TP + FN)

    # Averaging TPR, TNR, FPR, FNR
    TPR_avg = np.mean(TPR)
    TNR_avg = np.mean(TNR)
    FPR_avg = np.mean(FPR)
    FNR_avg = np.mean(FNR)
    TP_micro = np.sum(TP)
    FP_micro = np.sum(FP)
    FN_micro = np.sum(FN)
    TN_micro = np.sum(TN)

    # Micro averages for applicable metrics
    TPR_micro = TP_micro / (TP_micro + FN_micro)
    FPR_micro = FP_micro / (FP_micro + TN_micro)

    
    # Print the results for each epsilon
    print(f"Model: {model}")
    print(f"Accuracy: {acc}")
    print(f"Micro Precision: {precision_micro:.2f}")
    print(f"Macro Precision: {precision_macro:.2f}")
    print(f"Micro Recall: {recall_micro:.2f}")
    print(f"Macro Recall: {recall_macro:.2f}")
    print(f"Micro F1 Score: {fscore_micro:.2f}")
    print(f"Macro F1 Score: {fscore_macro:.2f}")
    print(f"Average TNR: {TNR_avg:.2f}, Average FPR: {FPR_avg:.2f}, Average FNR: {FNR_avg:.2f}\n")
    print(f"Macro TNR: {TNR_avg:.2f}, Macro FNR: {FNR_avg:.2f},Macro FPR: {FPR_avg:.2f}")
    print()

  _warn_prf(average, modifier, msg_start, len(result))


Model: DecisionTreeClassifier(max_depth=10, min_samples_split=12)
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model: KNeighborsClassifier(n_neighbors=1)
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50

Model: LogisticRegression(max_iter=5000, verbose=1)
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50



  _warn_prf(average, modifier, msg_start, len(result))


Model: GaussianNB()
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50



  _warn_prf(average, modifier, msg_start, len(result))


Model: RandomForestClassifier()
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50

Model: SVC(shrinking=False, verbose=True)
Accuracy: 0.2706290555155011
Micro Precision: 0.27
Macro Precision: 0.14
Micro Recall: 0.27
Macro Recall: 0.50
Micro F1 Score: 0.27
Macro F1 Score: 0.21
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50

Model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='mlogloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## FGSM

In [12]:
x_FGSM_001= np.load('x_test_FGSM_eps_0.01.npy')
x_FGSM_001 = np.nan_to_num(x_FGSM_001)

In [13]:
model_accuracies = {}

# Evaluate each model
for model in models:
    
    # Make predictions with the loaded model
    predictions = model.predict(x_FGSM_001)
    
     # Calculate micro and macro precision, recall, and F1 score
    precision_micro, recall_micro, fscore_micro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='micro')
    precision_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='macro')
    acc = np.sum(predictions == y_test_indices) / y_test_indices.shape[0]
    # Calculate confusion matrix and then TPR, TNR, FPR, FNR for each class
    conf_matrix = confusion_matrix(y_test_indices, predictions)
    FP = conf_matrix.sum(axis=0) - np.diag(conf_matrix)  
    FN = conf_matrix.sum(axis=1) - np.diag(conf_matrix)
    TP = np.diag(conf_matrix)
    TN = conf_matrix.sum() - (FP + FN + TP)

    TPR = TP / (TP + FN)
    TNR = TN / (TN + FP)
    FPR = FP / (FP + TN)
    FNR = FN / (TP + FN)

    # Averaging TPR, TNR, FPR, FNR
    TPR_avg = np.mean(TPR)
    TNR_avg = np.mean(TNR)
    FPR_avg = np.mean(FPR)
    FNR_avg = np.mean(FNR)
    TP_micro = np.sum(TP)
    FP_micro = np.sum(FP)
    FN_micro = np.sum(FN)
    TN_micro = np.sum(TN)

    # Micro averages for applicable metrics
    TPR_micro = TP_micro / (TP_micro + FN_micro)
    FPR_micro = FP_micro / (FP_micro + TN_micro)

    
    # Print the results for each epsilon
    print(f"Model: {model}")
    print(f"Accuracy: {acc}")
    print(f"Micro Precision: {precision_micro:.2f}")
    print(f"Macro Precision: {precision_macro:.2f}")
    print(f"Micro Recall: {recall_micro:.2f}")
    print(f"Macro Recall: {recall_macro:.2f}")
    print(f"Micro F1 Score: {fscore_micro:.2f}")
    print(f"Macro F1 Score: {fscore_macro:.2f}")
    print(f"Average TNR: {TNR_avg:.2f}, Average FPR: {FPR_avg:.2f}, Average FNR: {FNR_avg:.2f}\n")
    print(f"Macro TNR: {TNR_avg:.2f}, Macro FNR: {FNR_avg:.2f},Macro FPR: {FPR_avg:.2f}")
    print()



Model: DecisionTreeClassifier(max_depth=10, min_samples_split=12)
Accuracy: 0.4213680605623648
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40

Model: KNeighborsClassifier(n_neighbors=1)
Accuracy: 0.5260454217736121
Micro Precision: 0.53
Macro Precision: 0.68
Micro Recall: 0.53
Macro Recall: 0.67
Micro F1 Score: 0.53
Macro F1 Score: 0.53
Average TNR: 0.67, Average FPR: 0.33, Average FNR: 0.33

Macro TNR: 0.67, Macro FNR: 0.33,Macro FPR: 0.33

Model: LogisticRegression(max_iter=5000, verbose=1)
Accuracy: 0.4781407714491709
Micro Precision: 0.48
Macro Precision: 0.65
Micro Recall: 0.48
Macro Recall: 0.63
Micro F1 Score: 0.48
Macro F1 Score: 0.48
Average TNR: 0.63, Average FPR: 0.37, Average FNR: 0.37

Macro TNR: 0.63, Macro FNR: 0.37,Macro FPR: 0.37





Model: GaussianNB()
Accuracy: 0.8054253785147801
Micro Precision: 0.81
Macro Precision: 0.89
Micro Recall: 0.81
Macro Recall: 0.64
Micro F1 Score: 0.81
Macro F1 Score: 0.66
Average TNR: 0.64, Average FPR: 0.36, Average FNR: 0.36

Macro TNR: 0.64, Macro FNR: 0.36,Macro FPR: 0.36





Model: RandomForestClassifier()
Accuracy: 0.4158480533525595
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40





Model: SVC(shrinking=False, verbose=True)
Accuracy: 0.2753469718817592
Micro Precision: 0.28
Macro Precision: 0.64
Micro Recall: 0.28
Macro Recall: 0.50
Micro F1 Score: 0.28
Macro F1 Score: 0.22
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50

Model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='mlogloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.2, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=5,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=100,
       

In [14]:
x_FGSM_01= np.load('x_test_FGSM_eps_0.1.npy')
x_FGSM_01 = np.nan_to_num(x_FGSM_01)

In [15]:
model_accuracies = {}

# Evaluate each model
for model in models:
    
    # Make predictions with the loaded model
    predictions = model.predict(x_FGSM_001)
    
     # Calculate micro and macro precision, recall, and F1 score
    precision_micro, recall_micro, fscore_micro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='micro')
    precision_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='macro')
    acc = np.sum(predictions == y_test_indices) / y_test_indices.shape[0]
    # Calculate confusion matrix and then TPR, TNR, FPR, FNR for each class
    conf_matrix = confusion_matrix(y_test_indices, predictions)
    FP = conf_matrix.sum(axis=0) - np.diag(conf_matrix)  
    FN = conf_matrix.sum(axis=1) - np.diag(conf_matrix)
    TP = np.diag(conf_matrix)
    TN = conf_matrix.sum() - (FP + FN + TP)

    TPR = TP / (TP + FN)
    TNR = TN / (TN + FP)
    FPR = FP / (FP + TN)
    FNR = FN / (TP + FN)

    # Averaging TPR, TNR, FPR, FNR
    TPR_avg = np.mean(TPR)
    TNR_avg = np.mean(TNR)
    FPR_avg = np.mean(FPR)
    FNR_avg = np.mean(FNR)
    TP_micro = np.sum(TP)
    FP_micro = np.sum(FP)
    FN_micro = np.sum(FN)
    TN_micro = np.sum(TN)

    # Micro averages for applicable metrics
    TPR_micro = TP_micro / (TP_micro + FN_micro)
    FPR_micro = FP_micro / (FP_micro + TN_micro)

    
    # Print the results for each epsilon
    print(f"Model: {model}")
    print(f"Accuracy: {acc}")
    print(f"Micro Precision: {precision_micro:.2f}")
    print(f"Macro Precision: {precision_macro:.2f}")
    print(f"Micro Recall: {recall_micro:.2f}")
    print(f"Macro Recall: {recall_macro:.2f}")
    print(f"Micro F1 Score: {fscore_micro:.2f}")
    print(f"Macro F1 Score: {fscore_macro:.2f}")
    print(f"Average TNR: {TNR_avg:.2f}, Average FPR: {FPR_avg:.2f}, Average FNR: {FNR_avg:.2f}\n")
    print(f"Macro TNR: {TNR_avg:.2f}, Macro FNR: {FNR_avg:.2f},Macro FPR: {FPR_avg:.2f}")
    print()



Model: DecisionTreeClassifier(max_depth=10, min_samples_split=12)
Accuracy: 0.4213680605623648
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40

Model: KNeighborsClassifier(n_neighbors=1)
Accuracy: 0.5260454217736121
Micro Precision: 0.53
Macro Precision: 0.68
Micro Recall: 0.53
Macro Recall: 0.67
Micro F1 Score: 0.53
Macro F1 Score: 0.53
Average TNR: 0.67, Average FPR: 0.33, Average FNR: 0.33

Macro TNR: 0.67, Macro FNR: 0.33,Macro FPR: 0.33

Model: LogisticRegression(max_iter=5000, verbose=1)
Accuracy: 0.4781407714491709
Micro Precision: 0.48
Macro Precision: 0.65
Micro Recall: 0.48
Macro Recall: 0.63
Micro F1 Score: 0.48
Macro F1 Score: 0.48
Average TNR: 0.63, Average FPR: 0.37, Average FNR: 0.37

Macro TNR: 0.63, Macro FNR: 0.37,Macro FPR: 0.37





Model: GaussianNB()
Accuracy: 0.8054253785147801
Micro Precision: 0.81
Macro Precision: 0.89
Micro Recall: 0.81
Macro Recall: 0.64
Micro F1 Score: 0.81
Macro F1 Score: 0.66
Average TNR: 0.64, Average FPR: 0.36, Average FNR: 0.36

Macro TNR: 0.64, Macro FNR: 0.36,Macro FPR: 0.36





Model: RandomForestClassifier()
Accuracy: 0.4158480533525595
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40





Model: SVC(shrinking=False, verbose=True)
Accuracy: 0.2753469718817592
Micro Precision: 0.28
Macro Precision: 0.64
Micro Recall: 0.28
Macro Recall: 0.50
Micro F1 Score: 0.28
Macro F1 Score: 0.22
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50

Model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='mlogloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.2, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=5,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=100,
       

## PGD

In [16]:
x_PGD_001= np.load('x_test_adv_PGD_eps_0.01.npy')
x_PGD_001 = np.nan_to_num(x_PGD_001)

In [17]:
model_accuracies = {}

# Evaluate each model
for model in models:
    
    # Make predictions with the loaded model
    predictions = model.predict(x_PGD_001)
    
     # Calculate micro and macro precision, recall, and F1 score
    precision_micro, recall_micro, fscore_micro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='micro')
    precision_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='macro')
    acc = np.sum(predictions == y_test_indices) / y_test_indices.shape[0]
    # Calculate confusion matrix and then TPR, TNR, FPR, FNR for each class
    conf_matrix = confusion_matrix(y_test_indices, predictions)
    FP = conf_matrix.sum(axis=0) - np.diag(conf_matrix)  
    FN = conf_matrix.sum(axis=1) - np.diag(conf_matrix)
    TP = np.diag(conf_matrix)
    TN = conf_matrix.sum() - (FP + FN + TP)

    TPR = TP / (TP + FN)
    TNR = TN / (TN + FP)
    FPR = FP / (FP + TN)
    FNR = FN / (TP + FN)

    # Averaging TPR, TNR, FPR, FNR
    TPR_avg = np.mean(TPR)
    TNR_avg = np.mean(TNR)
    FPR_avg = np.mean(FPR)
    FNR_avg = np.mean(FNR)
    TP_micro = np.sum(TP)
    FP_micro = np.sum(FP)
    FN_micro = np.sum(FN)
    TN_micro = np.sum(TN)

    # Micro averages for applicable metrics
    TPR_micro = TP_micro / (TP_micro + FN_micro)
    FPR_micro = FP_micro / (FP_micro + TN_micro)

    
    # Print the results for each epsilon
    print(f"Model: {model}")
    print(f"Accuracy: {acc}")
    print(f"Micro Precision: {precision_micro:.2f}")
    print(f"Macro Precision: {precision_macro:.2f}")
    print(f"Micro Recall: {recall_micro:.2f}")
    print(f"Macro Recall: {recall_macro:.2f}")
    print(f"Micro F1 Score: {fscore_micro:.2f}")
    print(f"Macro F1 Score: {fscore_macro:.2f}")
    print(f"Average TNR: {TNR_avg:.2f}, Average FPR: {FPR_avg:.2f}, Average FNR: {FNR_avg:.2f}\n")
    print(f"Macro TNR: {TNR_avg:.2f}, Macro FNR: {FNR_avg:.2f},Macro FPR: {FPR_avg:.2f}")
    print()



Model: DecisionTreeClassifier(max_depth=10, min_samples_split=12)
Accuracy: 0.4213680605623648
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40

Model: KNeighborsClassifier(n_neighbors=1)
Accuracy: 0.5260454217736121
Micro Precision: 0.53
Macro Precision: 0.68
Micro Recall: 0.53
Macro Recall: 0.67
Micro F1 Score: 0.53
Macro F1 Score: 0.53
Average TNR: 0.67, Average FPR: 0.33, Average FNR: 0.33

Macro TNR: 0.67, Macro FNR: 0.33,Macro FPR: 0.33

Model: LogisticRegression(max_iter=5000, verbose=1)
Accuracy: 0.4781407714491709
Micro Precision: 0.48
Macro Precision: 0.65
Micro Recall: 0.48
Macro Recall: 0.63
Micro F1 Score: 0.48
Macro F1 Score: 0.48
Average TNR: 0.63, Average FPR: 0.37, Average FNR: 0.37

Macro TNR: 0.63, Macro FNR: 0.37,Macro FPR: 0.37





Model: GaussianNB()
Accuracy: 0.8054253785147801
Micro Precision: 0.81
Macro Precision: 0.89
Micro Recall: 0.81
Macro Recall: 0.64
Micro F1 Score: 0.81
Macro F1 Score: 0.66
Average TNR: 0.64, Average FPR: 0.36, Average FNR: 0.36

Macro TNR: 0.64, Macro FNR: 0.36,Macro FPR: 0.36





Model: RandomForestClassifier()
Accuracy: 0.4158480533525595
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40





Model: SVC(shrinking=False, verbose=True)
Accuracy: 0.2753469718817592
Micro Precision: 0.28
Macro Precision: 0.64
Micro Recall: 0.28
Macro Recall: 0.50
Micro F1 Score: 0.28
Macro F1 Score: 0.22
Average TNR: 0.50, Average FPR: 0.50, Average FNR: 0.50

Macro TNR: 0.50, Macro FNR: 0.50,Macro FPR: 0.50

Model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='mlogloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.2, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=5,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=100,
       

In [18]:
x_PGD_01= np.load('x_test_adv_PGD_eps_0.1.npy')
x_PGD_01 = np.nan_to_num(x_PGD_01)

In [None]:
model_accuracies = {}

# Evaluate each model
for model in models:
    
    # Make predictions with the loaded model
    predictions = model.predict(x_PGD_01)
    
     # Calculate micro and macro precision, recall, and F1 score
    precision_micro, recall_micro, fscore_micro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='micro')
    precision_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(
        y_test_indices, predictions, average='macro')
    acc = np.sum(predictions == y_test_indices) / y_test_indices.shape[0]
    # Calculate confusion matrix and then TPR, TNR, FPR, FNR for each class
    conf_matrix = confusion_matrix(y_test_indices, predictions)
    FP = conf_matrix.sum(axis=0) - np.diag(conf_matrix)  
    FN = conf_matrix.sum(axis=1) - np.diag(conf_matrix)
    TP = np.diag(conf_matrix)
    TN = conf_matrix.sum() - (FP + FN + TP)

    TPR = TP / (TP + FN)
    TNR = TN / (TN + FP)
    FPR = FP / (FP + TN)
    FNR = FN / (TP + FN)

    # Averaging TPR, TNR, FPR, FNR
    TPR_avg = np.mean(TPR)
    TNR_avg = np.mean(TNR)
    FPR_avg = np.mean(FPR)
    FNR_avg = np.mean(FNR)
    TP_micro = np.sum(TP)
    FP_micro = np.sum(FP)
    FN_micro = np.sum(FN)
    TN_micro = np.sum(TN)

    # Micro averages for applicable metrics
    TPR_micro = TP_micro / (TP_micro + FN_micro)
    FPR_micro = FP_micro / (FP_micro + TN_micro)

    
    # Print the results for each epsilon
    print(f"Model: {model}")
    print(f"Accuracy: {acc}")
    print(f"Micro Precision: {precision_micro:.2f}")
    print(f"Macro Precision: {precision_macro:.2f}")
    print(f"Micro Recall: {recall_micro:.2f}")
    print(f"Macro Recall: {recall_macro:.2f}")
    print(f"Micro F1 Score: {fscore_micro:.2f}")
    print(f"Macro F1 Score: {fscore_macro:.2f}")
    print(f"Average TNR: {TNR_avg:.2f}, Average FPR: {FPR_avg:.2f}, Average FNR: {FNR_avg:.2f}\n")
    print(f"Macro TNR: {TNR_avg:.2f}, Macro FNR: {FNR_avg:.2f},Macro FPR: {FPR_avg:.2f}")
    print()



Model: DecisionTreeClassifier(max_depth=10, min_samples_split=12)
Accuracy: 0.4213680605623648
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40

Model: KNeighborsClassifier(n_neighbors=1)
Accuracy: 0.5093096611391492
Micro Precision: 0.51
Macro Precision: 0.68
Micro Recall: 0.51
Macro Recall: 0.66
Micro F1 Score: 0.51
Macro F1 Score: 0.51
Average TNR: 0.66, Average FPR: 0.34, Average FNR: 0.34

Macro TNR: 0.66, Macro FNR: 0.34,Macro FPR: 0.34

Model: LogisticRegression(max_iter=5000, verbose=1)
Accuracy: 0.47891131939437637
Micro Precision: 0.48
Macro Precision: 0.65
Micro Recall: 0.48
Macro Recall: 0.63
Micro F1 Score: 0.48
Macro F1 Score: 0.48
Average TNR: 0.63, Average FPR: 0.37, Average FNR: 0.37

Macro TNR: 0.63, Macro FNR: 0.37,Macro FPR: 0.37





Model: GaussianNB()
Accuracy: 0.8061643835616439
Micro Precision: 0.81
Macro Precision: 0.90
Micro Recall: 0.81
Macro Recall: 0.64
Micro F1 Score: 0.81
Macro F1 Score: 0.66
Average TNR: 0.64, Average FPR: 0.36, Average FNR: 0.36

Macro TNR: 0.64, Macro FNR: 0.36,Macro FPR: 0.36





Model: RandomForestClassifier()
Accuracy: 0.4158480533525595
Micro Precision: 0.42
Macro Precision: 0.66
Micro Recall: 0.42
Macro Recall: 0.60
Micro F1 Score: 0.42
Macro F1 Score: 0.41
Average TNR: 0.60, Average FPR: 0.40, Average FNR: 0.40

Macro TNR: 0.60, Macro FNR: 0.40,Macro FPR: 0.40



