In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

y_test_path = "/home/jovyan/Sample_Based_Extension/WUSTL/y_test.npy"
y_test = np.load(y_test_path)

# Attack models and epsilons
attack_models = [
    ("baseline", [0]),
    ("BIM", [0.01, 0.1, 0.2, 0.3]),
    ("FGSM", [0.01, 0.1, 0.2, 0.3]),
    ("PGD", [0.01, 0.1, 0.2, 0.3]),
    ("DF", [0.01, 0.1, 0.2, 0.3]),
    ("AutoPGD", [0.01, 0.1, 0.2, 0.3]),
    ("ZOO", [0.01, 0.1, 0.2, 0.3]),
    ("CaFA", [0.01, 0.1, 0.2, 0.3]),
    ("SINIFGSM", [0.01, 0.1, 0.2, 0.3]),
    ("VNIFGSM", [0.01, 0.1, 0.2, 0.3]),
]

results = []

for attack_name, epsilons in attack_models:
    for eps in epsilons:
        for defense_id in range(0, 12):

            y_pred_path = f"/home/jovyan/Sample_Based_Extension/WUSTL/WUSTL_Defense_Label/WUSTL_Def{defense_id}/y_pred_{attack_name}{eps}_Def{defense_id}.npy"
            y_pred = np.load(y_pred_path)
            
            accuracy = accuracy_score(y_test, y_pred)
            precision, recall, f1_macro, _ = precision_recall_fscore_support(y_test, y_pred, average='macro', zero_division=0)
            results.append((attack_name, eps, f"Def{defense_id}", accuracy, f1_macro))


df_results = pd.DataFrame(results, columns=["Attack", "Epsilon", "Defense", "Accuracy", "Macro F1"])

attack_order = ["baseline", "BIM", "FGSM", "PGD", "DF", "AutoPGD", "ZOO","CaFA", "SINIFGSM", "VNIFGSM"]

df_results["Epsilon"] = df_results["Epsilon"].astype(str)

df_pivot = df_results.pivot_table(index=["Attack", "Epsilon"], columns="Defense", values="Macro F1")
df_pivot = df_pivot.sort_index(level=["Attack", "Epsilon"], key=lambda x: x.map({v: i for i, v in enumerate(attack_order)}) if x.name == "Attack" else x.astype(float))

defense_order = [f"Def{i}" for i in range(0, 12)]
df_pivot = df_pivot[defense_order]
df_pivot

df_pivot.to_csv("/home/jovyan/Sample_Based_Extension/WUSTL/Macrof1_Performance.csv")
df_pivot


Unnamed: 0_level_0,Defense,Def0,Def1,Def2,Def3,Def4,Def5,Def6,Def7,Def8,Def9,Def10,Def11
Attack,Epsilon,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
baseline,0.0,0.942426,0.946514,0.951779,0.948088,0.942426,0.934648,0.950423,0.946876,0.916498,0.594327,0.59571,0.935948
BIM,0.01,0.857312,0.946514,0.969086,0.948088,0.934718,0.680634,0.926043,0.856737,0.853592,0.594072,0.594196,0.916792
BIM,0.1,0.005861,0.900597,0.492502,0.895169,0.174686,0.633889,0.585446,0.005868,0.308723,0.521197,0.524566,0.464955
BIM,0.2,0.004631,0.602747,0.256076,0.685744,0.004933,0.650213,0.394607,0.004631,0.052023,0.411447,0.412327,0.209992
BIM,0.3,0.004631,0.256177,0.19206,0.381231,0.004763,0.483222,0.196067,0.004631,0.275702,0.3773,0.396046,0.193058
FGSM,0.01,0.855496,0.946514,0.969086,0.948088,0.928468,0.684541,0.936732,0.856091,0.878195,0.594072,0.594331,0.917387
FGSM,0.1,0.005481,0.900223,0.602641,0.897626,0.487057,0.587246,0.638265,0.082927,0.258634,0.541695,0.552518,0.434455
FGSM,0.2,0.004792,0.593049,0.23632,0.667647,0.222199,0.443053,0.223005,0.00479,0.230333,0.360031,0.382593,0.199319
FGSM,0.3,0.004731,0.333245,0.306882,0.444769,0.028775,0.406062,0.193057,0.004731,0.25343,0.293191,0.31334,0.197206
PGD,0.01,0.857312,0.946514,0.969086,0.948088,0.934718,0.680634,0.926043,0.856725,0.853592,0.594072,0.594196,0.916792


In [11]:
# # Binary Oracle
# import numpy as np
# import pandas as pd
# from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# y_test_path = "/home/jovyan/Sample_Based_Extension/WUSTL/y_test.npy"
# y_test = np.load(y_test_path)

# # Attack models and epsilons
# attack_models = [
#     ("baseline", [0]),
#     ("BIM", [0.01, 0.1, 0.2, 0.3]),
#     ("FGSM", [0.01, 0.1, 0.2, 0.3]),
#     ("PGD", [0.01, 0.1, 0.2, 0.3]),
#     ("DF", [0.01, 0.1, 0.2, 0.3]),
#     ("AutoPGD", [0.01, 0.1, 0.2, 0.3]),
#     ("ZOO", [0.01, 0.1, 0.2, 0.3]),
#     ("CaFA", [0.01, 0.1, 0.2, 0.3]),
#     ("SINIFGSM", [0.01, 0.1, 0.2, 0.3]),
#     ("VNIFGSM", [0.01, 0.1, 0.2, 0.3]),
# ]

# results = []

# for attack_name, epsilons in attack_models:
#     for eps in epsilons:
#         y_preds_list = []

#         for defense_id in range(1, 12):
#             y_pred_path = f"/home/jovyan/Sample_Based_Extension/WUSTL/WUSTL_Defense_Label/WUSTL_Def{defense_id}/y_pred_{attack_name}{eps}_Def{defense_id}.npy"
#             y_pred = np.load(y_pred_path)
#             if y_pred.shape != y_test.shape:
#                 print(f"Warning: Shape mismatch in {y_pred_path}, expected {y_test.shape}, got {y_pred.shape}")
#                 continue

#             y_preds_list.append(y_pred)
#         if len(y_preds_list) > 0:
#             y_preds = np.array(y_preds_list)
#             final_pred = np.any(y_preds == y_test, axis=0)  # Shape: (num_samples,)
#             accuracy = np.mean(final_pred)
#             final_pred_labels = np.where(final_pred, y_test, 1 - y_test)
#             precision, recall, f1_macro, _ = precision_recall_fscore_support(y_test, final_pred_labels, average='macro', zero_division=0)
#             results.append((attack_name, eps, "Def1-11 Combined", accuracy, f1_macro))

#             # print(f"{attack_name} (ε={eps}) | Def1-10 Combined: Accuracy = {accuracy:.4f}, Macro F1 = {f1_macro:.4f}")

# df_results = pd.DataFrame(results, columns=["Attack", "Epsilon", "Defense", "Accuracy", "Macro F1"])


# attack_order = ["baseline", "BIM", "FGSM", "PGD", "DF", "AutoPGD", "ZOO","CaFA", "SINIFGSM", "VNIFGSM"]

# df_results["Epsilon"] = df_results["Epsilon"].astype(str)

# df_pivot = df_results.pivot_table(index=["Attack", "Epsilon"], columns="Defense", values="Macro F1")
# df_pivot = df_pivot.sort_index(level=["Attack", "Epsilon"], key=lambda x: x.map({v: i for i, v in enumerate(attack_order)}) if x.name == "Attack" else x.astype(float))

# df_pivot.to_csv("/home/jovyan/Sample_Based_Extension/WUSTL/Oracle_Performance.csv")

# df_pivot




In [12]:
# Multiple Label Oracle

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from scipy.stats import mode

y_test_path = "/home/jovyan/Sample_Based_Extension/WUSTL/y_test.npy"
y_test = np.load(y_test_path)

num_classes = len(np.unique(y_test))
print(f"Number of classes: {num_classes}")

y_test = y_test.astype(int)

attack_models = [
    ("baseline", [0]),
    ("BIM", [0.01, 0.1, 0.2, 0.3]),
    ("FGSM", [0.01, 0.1, 0.2, 0.3]),
    ("PGD", [0.01, 0.1, 0.2, 0.3]),
    ("DF", [0.01, 0.1, 0.2, 0.3]),
    ("AutoPGD", [0.01, 0.1, 0.2, 0.3]),
    ("ZOO", [0.01, 0.1, 0.2, 0.3]),
    ("CaFA", [0.01, 0.1, 0.2, 0.3]),
    ("SINIFGSM", [0.01, 0.1, 0.2, 0.3]),
    ("VNIFGSM", [0.01, 0.1, 0.2, 0.3]),
]

results = []

for attack_name, epsilons in attack_models:
    for eps in epsilons:
        y_preds_list = []
        for defense_id in range(1, 12):
            y_pred_path = f"/home/jovyan/Sample_Based_Extension/WUSTL/WUSTL_Defense_Label/WUSTL_Def{defense_id}/y_pred_{attack_name}{eps}_Def{defense_id}.npy"
            try:
                y_pred = np.load(y_pred_path)
                if y_pred.shape != y_test.shape:
                    print(f"Warning: Shape mismatch in {y_pred_path}, expected {y_test.shape}, got {y_pred.shape}")
                    continue
                y_pred = y_pred.astype(int)
                y_preds_list.append(y_pred)
            except FileNotFoundError:
                print(f"Warning: File not found: {y_pred_path}")
                continue

        y_preds = np.array(y_preds_list)

        final_pred = np.any(y_preds == y_test, axis=0)  # (num_samples,)

        final_pred_labels = y_test.copy()
        incorrect_indices = np.where(~final_pred)[0]

        if len(incorrect_indices) > 0:
            incorrect_preds = y_preds[:, incorrect_indices]
            majority_labels, _ = mode(incorrect_preds, axis=0, keepdims=True)
            final_pred_labels[incorrect_indices] = majority_labels[0]

        accuracy = accuracy_score(y_test, final_pred_labels)
        precision, recall, f1_macro, _ = precision_recall_fscore_support(
            y_test, final_pred_labels, average='macro', zero_division=0
        )
        results.append((attack_name, eps, "Def1-11 Combined", accuracy, f1_macro))
        # print(f"{attack_name} (ε={eps}) | Loaded {len(y_preds_list)}/11 models | Accuracy = {accuracy:.4f}, Macro F1 = {f1_macro:.4f}")


df_results = pd.DataFrame(results, columns=["Attack", "Epsilon", "Defense", "Accuracy", "Macro F1"])
attack_order = ["baseline", "BIM", "FGSM", "PGD", "DF", "AutoPGD", "ZOO", "CaFA", "SINIFGSM", "VNIFGSM"]
df_results["Epsilon"] = df_results["Epsilon"].astype(str)
df_pivot = df_results.pivot_table(index=["Attack", "Epsilon"], columns="Defense", values="Macro F1")
df_pivot = df_pivot.sort_index(level=["Attack", "Epsilon"], key=lambda x: x.map({v: i for i, v in enumerate(attack_order)}) if x.name == "Attack" else x.astype(float))


df_pivot.to_csv("/home/jovyan/Sample_Based_Extension/WUSTL/Oracle_Performance.csv")

df_pivot


Number of classes: 5


Unnamed: 0_level_0,Defense,Def1-11 Combined
Attack,Epsilon,Unnamed: 2_level_1
baseline,0.0,0.995109
BIM,0.01,0.995109
BIM,0.1,0.960065
BIM,0.2,0.930555
BIM,0.3,0.751838
FGSM,0.01,0.995109
FGSM,0.1,0.939003
FGSM,0.2,0.889464
FGSM,0.3,0.667343
PGD,0.01,0.995109


In [13]:
attack_models = [
    ("baseline", [0]),
    ("BIM", [0.01, 0.2, 0.3]),
    ("FGSM", [0.01, 0.2, 0.3]),
    ("PGD", [0.01, 0.2, 0.3]),
    ("DF", [0.01, 0.2, 0.3]),
    ("AutoPGD", [0.01, 0.2, 0.3]),
    ("ZOO", [0.01, 0.2, 0.3]),
    ("CaFA", [0.01, 0.2, 0.3]),
    ("SINIFGSM", [0.01, 0.2, 0.3]),
    ("VNIFGSM", [0.01, 0.2, 0.3]),
]

results = []
y_test = np.load(y_test_path)



for attack_name, epsilons in attack_models:
    f = []
    y_test_all_eps = []

    for eps in epsilons:
        y_preds_list = []
        y_test_all_eps.append(y_test)

        for defense_id in range(1, 12):
            y_pred_path = f"/home/jovyan/Sample_Based_Extension/WUSTL/WUSTL_Defense_Label/WUSTL_Def{defense_id}/y_pred_{attack_name}{eps}_Def{defense_id}.npy"
            y_pred = np.load(y_pred_path)
            if y_pred.shape != y_test.shape:
                print(f"Warning: Shape mismatch in {y_pred_path}, expected {y_test.shape}, got {y_pred.shape}")
                continue

            y_preds_list.append(y_pred)
        if len(y_preds_list) > 0:
            y_preds = np.array(y_preds_list)
            final_pred = np.any(y_preds == y_test, axis=0)  # Shape: (num_samples,)
            accuracy = np.mean(final_pred)
            final_pred_labels = np.where(final_pred, y_test, 1 - y_test)
            f.append(final_pred_labels)
            
    y_test_all_eps = np.concatenate(y_test_all_eps, axis=0)
    # print(y_test_all_eps.shape)

    f=np.concatenate(f, axis=0)
    # print(f.shape)
    precision, recall, f1_macro, _ = precision_recall_fscore_support(y_test_all_eps, f, average='macro', zero_division=0)
    results.append((attack_name, eps, "Def1-11 Combined", accuracy, f1_macro))


            

In [14]:
results

[('baseline', 0, 'Def1-11 Combined', 0.9999916231419081, 0.9913483663138518),
 ('BIM', 0.3, 'Def1-11 Combined', 0.9984796002563319, 0.7189707551442801),
 ('FGSM', 0.3, 'Def1-11 Combined', 0.9890221274706495, 0.642607802220218),
 ('PGD', 0.3, 'Def1-11 Combined', 0.9984796002563319, 0.7189707551442801),
 ('DF', 0.3, 'Def1-11 Combined', 0.9621324129958576, 0.5133604138401928),
 ('AutoPGD', 0.3, 'Def1-11 Combined', 0.9969047509350668, 0.6569247253101851),
 ('ZOO', 0.3, 'Def1-11 Combined', 0.9999916231419081, 0.9913483663138518),
 ('CaFA', 0.3, 'Def1-11 Combined', 0.8726256842845954, 0.57779857033252),
 ('SINIFGSM', 0.3, 'Def1-11 Combined', 0.9984879771144237, 0.7293516857692497),
 ('VNIFGSM', 0.3, 'Def1-11 Combined', 0.9995853455244541, 0.6593247506590866)]

In [16]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support


y_test_path = "/home/jovyan/Sample_Based_Extension/WUSTL/y_test.npy"
y_test = np.load(y_test_path)

attack_models = [
    ("baseline", [0]),
    ("BIM", [0.01, 0.1, 0.2, 0.3]),
    ("FGSM", [0.01, 0.1, 0.2, 0.3]),
    ("PGD", [0.01, 0.1, 0.2, 0.3]),
    ("DF", [0.01, 0.1, 0.2, 0.3]),
    ("AutoPGD", [0.01, 0.1, 0.2, 0.3]),
    ("ZOO", [0.01, 0.1, 0.2, 0.3]),
    ("CaFA", [0.01, 0.1, 0.2, 0.3]),
    ("SINIFGSM", [0.01, 0.1, 0.2, 0.3]),
    ("VNIFGSM", [0.01, 0.1, 0.2, 0.3]),
]

df_results = []

for attack_name, epsilons in attack_models:
    for eps in epsilons:
        for defense_id in range(1, 12):
            y_pred_path = f"/home/jovyan/Sample_Based_Extension/WUSTL/WUSTL_Defense_Label/WUSTL_Def{defense_id}/y_pred_{attack_name}{eps}_Def{defense_id}.npy"
            y_pred = np.load(y_pred_path)

            accuracy = accuracy_score(y_test, y_pred)
            precision, recall, f1_macro, _ = precision_recall_fscore_support(y_test, y_pred, average='macro', zero_division=0)
            df_results.append((attack_name, eps, f"Def{defense_id}", accuracy, f1_macro))

df_results = pd.DataFrame(df_results, columns=["Attack", "Epsilon", "Defense", "Accuracy", "Macro F1"])

df_pivot = df_results.pivot_table(index=["Attack", "Epsilon"], columns="Defense", values="Macro F1")

attack_order = ["BIM", "FGSM", "PGD", "DF", "AutoPGD", "ZOO","CaFA", "SINIFGSM", "VNIFGSM"]
df_pivot = df_pivot.sort_index(level=["Attack", "Epsilon"], key=lambda x: x.map({v: i for i, v in enumerate(attack_order)}) if x.name == "Attack" else x.astype(float))

defense_order = [f"Def{i}" for i in range(1, 12)]
df_pivot = df_pivot[defense_order]

final_results = []

for attack_name, epsilons in attack_models:
    for eps in epsilons:
        y_preds_list = []
        
        for defense_id in range(1, 12):
            y_pred_path = f"/home/jovyan/Sample_Based_Extension/WUSTL/WUSTL_Defense_Label/WUSTL_Def{defense_id}/y_pred_{attack_name}{eps}_Def{defense_id}.npy"
            y_pred = np.load(y_pred_path)
            
            y_preds_list.append(y_pred)
        
        if len(y_preds_list) > 0:
            y_preds = np.array(y_preds_list)
            final_pred = np.any(y_preds == y_test, axis=0)  # (num_samples,)
    
            final_pred_labels = y_test.copy()
            incorrect_indices = np.where(~final_pred)[0]
    
            if len(incorrect_indices) > 0:
                incorrect_preds = y_preds[:, incorrect_indices]
                majority_labels, _ = mode(incorrect_preds, axis=0, keepdims=True)
                final_pred_labels[incorrect_indices] = majority_labels[0]

            
            accuracy = np.mean(final_pred)
            precision, recall, f1_macro, _ = precision_recall_fscore_support(y_test, final_pred_labels, average='macro', zero_division=0)

            best_defense_indices = np.zeros_like(y_test, dtype=int)

            for i in range(len(y_test)):
                if final_pred[i]:
                    correct_defenses = np.where(y_preds[:, i] == y_test[i])[0]+1
                    correct_f1_scores = df_pivot.loc[(attack_name, eps), [f"Def{d}" for d in correct_defenses]].values
                    best_defense_indices[i] = correct_defenses[np.argmax(correct_f1_scores)]

                else:
                    best_defense_indices[i] = np.argmax(df_pivot.loc[(attack_name, eps)].values)+1

            final_results.append((attack_name, eps, "Def1-11 Combined", accuracy, f1_macro, best_defense_indices))

df_final_results = pd.DataFrame(final_results, columns=["Attack", "Epsilon", "Defense", "Accuracy", "Macro F1", "Best Defense Index"])





In [17]:
np.save("/home/jovyan/Sample_Based_Extension/WUSTL/best_defense_indices.npy", np.array(final_results, dtype=object))


In [18]:
df_final_results

Unnamed: 0,Attack,Epsilon,Defense,Accuracy,Macro F1,Best Defense Index
0,baseline,0.0,Def1-11 Combined,0.999992,0.995109,"[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ..."
1,BIM,0.01,Def1-11 Combined,0.999992,0.995109,"[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ..."
2,BIM,0.1,Def1-11 Combined,0.999883,0.960065,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
3,BIM,0.2,Def1-11 Combined,0.999698,0.930555,"[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ..."
4,BIM,0.3,Def1-11 Combined,0.99848,0.751838,"[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ..."
5,FGSM,0.01,Def1-11 Combined,0.999992,0.995109,"[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ..."
6,FGSM,0.1,Def1-11 Combined,0.99987,0.939003,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
7,FGSM,0.2,Def1-11 Combined,0.997152,0.889464,"[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ..."
8,FGSM,0.3,Def1-11 Combined,0.989022,0.667343,"[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ..."
9,PGD,0.01,Def1-11 Combined,0.999992,0.995109,"[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ..."


In [34]:
np.unique(df_final_results["Best Defense Index"][12], return_counts = True)

(array([ 3,  5, 10]), array([   816, 236723,   1214]))