In [None]:
%load_ext jupyter_spaces

In [None]:
from deeprobust.graph.data import Dataset
from hrdataset import CustomDataset
import pandas
import numpy as np
import scipy.sparse
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt
from collections import defaultdict
import signac
import pickle
import itertools
import copy

from jupyter_spaces import get_spaces
import jupyter_spaces
from scipy.special import softmax
import warnings
import itertools
import plotly
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [None]:
project = signac.get_project("../../") 

In [None]:
%pwd

In [None]:
df_expRun = pandas.read_csv("metattack-adj-only.csv", index_col=0, keep_default_na=False, na_values=[""])
df_expRun = df_expRun.melt(id_vars=[col for col in df_expRun.columns if col not in {"evasionJobID", "poisonJobID"}],
                           var_name = 'attackIDType',
                           value_name = 'attackID')
df_expRun = df_expRun.drop('Attack Phase', 1)

na_mask = (df_expRun['attackID'] == 'N/A')
df_expRun = df_expRun[~na_mask]

incomplete_mask = (df_expRun.attackID.isnull() | df_expRun.cleanJobID.isnull())
if incomplete_mask.sum() > 0:
    warnings.warn(f"{incomplete_mask.sum()} experiments are incomplete!")
df_expRun_Original = copy.deepcopy(df_expRun)
df_expRun = df_expRun.loc[~incomplete_mask]

df_expRun_evasion = df_expRun[df_expRun['attackIDType'] == 'evasionJobID']
df_expRun_poison = df_expRun[df_expRun['attackIDType'] == 'poisonJobID']

In [None]:
def check_correctness(prediction: np.array, mask=None, data=None):
    pred_class = prediction.argmax(1)
    if mask is not None:
        return pred_class[mask] == data.labels[mask]
    else:
        return pred_class == data.labels

# Datasets

In [None]:
HETERO_DATASETS = ['fb100', 'twitch-tw', 'snap-patent-downsampled']
HOMO_DATASETES = ['citeseer', 'cora']

# Evasion (Post-training Attack)

In [None]:
%%space `metattack-evasion`
df_subtask = df_expRun_evasion
perturbDataDict = dict()
defenseTableDict = dict()
lst_dict_result = list()

for tid, tdata in df_subtask.iterrows():
    
    if tdata.perturbJobID not in perturbDataDict:
        perturbJob = project.open_job(id=tdata.perturbJobID)
        with perturbJob:
            with open("perturbDict.pkl", "rb") as dataFile:
                dict_pertubation = pickle.load(dataFile)
            datasetName_ = perturbJob.sp['datasetName']
            if datasetName_ in HETERO_DATASETS:
                with open(f"../../datasets/data/{datasetName_}.pkl", "rb") as dataFile:
                    dataset = pickle.load(dataFile)
                    print(dataset)
            else:
                with open("data.pkl", "rb") as dataFile:
                    dataset = pickle.load(dataFile)
                    print(dataset)
        perturbDataDict[tdata.perturbJobID] = dict(
            dict_pertubation=dict_pertubation,
            dataset=dataset
        )
    else:
        dict_pertubation = perturbDataDict[tdata.perturbJobID]["dict_pertubation"]
        dataset = perturbDataDict[tdata.perturbJobID]["dataset"]

    job = project.open_job(id=tdata.attackID)
    assert job.sp.use_runner
    with job:
        with open(f"resultTable.csv", "r") as f:
            resultTableAttack = pandas.read_csv(f, index_col=0)
    perturb_name = tdata.perturb_prefix
    DEFENSE_MODEL = f"{tdata.model}_p"
    defenseModelType = tdata.model
    
    clean_job = project.open_job(id=tdata.cleanJobID)
    with clean_job.data.open(mode="r"):
        dict_prediction_clean = {key: np.array(val) for key, val in clean_job.data.predictionDict.items()}
    prediction_result_clean = softmax(np.array(dict_prediction_clean[f"f:{DEFENSE_MODEL}@clean"]), axis=1)
    
    dict_cur_result = resultTableAttack.iloc[0].to_dict()
    dict_cur_result = {**tdata.to_dict(), **dict_cur_result}
    dict_cur_result['train_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_train, dataset).mean()
    dict_cur_result['val_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_val, dataset).mean()
    dict_cur_result['test_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_test, dataset).mean()
    lst_dict_result += [dict_cur_result]

In [None]:
%%space `metattack-evasion`
pd_result_full = pandas.DataFrame.from_dict(lst_dict_result).drop(['AttackSession', 
                                                                   'perturb_prefix', 
                                                                   'cleanJobID',
                                                                   'attackIDType',
                                                                   'attackID',
                                                                   'attack_type'], axis=1)
pd_result_full['model_with_arg'] = pd_result_full["model"] + ":" + pd_result_full["model_arg"].fillna("")
pivot_index = ["model_with_arg", "perturbJobID"]
defensePivotDict = dict()
for cur_dataset in np.unique(pd_result_full['DATASET'].values):
    for cur_ptb_ratio in np.unique(pd_result_full['ptb_ratio'].values):
        h = f"{cur_dataset}@{cur_ptb_ratio}"
        cur_mask = (pd_result_full['DATASET'] == cur_dataset) & (pd_result_full['ptb_ratio'] == cur_ptb_ratio)
        pd_cur_masked = pd_result_full.loc[cur_mask, ~pd_result_full.columns.isin(['model', 'model_arg', 'gcnsvd_k'])]
        pd_cur_masked.rename(columns={"train_acc": "train_acc_attack",
                              "val_acc": "val_acc_attack",
                              "test_acc": "test_acc_attack"},inplace=True)
        
        pd_cur_masked['test_acc_abs_delta'] = pd_cur_masked['test_acc_attack'] - pd_cur_masked['test_acc_clean']
        pd_cur_masked['test_acc_rel_delta'] = (pd_cur_masked['test_acc_attack'] - pd_cur_masked['test_acc_clean']) / pd_cur_masked['test_acc_clean'] 
        defensePivot = pd_cur_masked.pivot_table(values=['test_acc_clean', 'test_acc_attack', 
                                                         'test_acc_abs_delta', 'test_acc_rel_delta'],
                                                 index=pivot_index, 
                                                 aggfunc={'test_acc_clean': [np.mean], 
                                                          'test_acc_attack': [np.mean],
                                                          'test_acc_abs_delta': [np.mean],
                                                          'test_acc_rel_delta': [np.mean]})
        for key2 in defensePivot.index.levels[0]:

            defensePivot.loc[(key2, 'subtotal_mean'), :] = defensePivot.loc[key2].mean(axis=0)
            defensePivot.loc[(key2, 'subtotal_std'), :] = defensePivot.loc[key2].std(axis=0)

        defensePivot.sort_index(inplace=True)
        defensePivotDict[h] = defensePivot.loc[pandas.IndexSlice[:, ["subtotal_mean", "subtotal_std"]], 
                                               ["test_acc_clean", "test_acc_attack", 
                                                "test_acc_abs_delta", "test_acc_rel_delta"]]
        defensePivotDict[h].columns = defensePivotDict[h].columns.droplevel(1)
        print(h)
        defensePivotDict[h].style.format(dict(test_acc_clean="{:.2%}", 
                                              test_acc_attack="{:.2%}",
                                              test_acc_abs_delta="{:.2%}",
                                              test_acc_rel_delta="{:.2%}"))

# Poison (Pre-training Attack) 

In [None]:
%%space `metattack-poison`
df_subtask = df_expRun_poison
perturbDataDict = dict()
defenseTableDict = dict()
lst_dict_result = list()

for tid, tdata in df_subtask.iterrows():
    
    if tdata.perturbJobID not in perturbDataDict:
        perturbJob = project.open_job(id=tdata.perturbJobID)
        with perturbJob:
            with open("perturbDict.pkl", "rb") as dataFile:
                dict_pertubation = pickle.load(dataFile)
            datasetName_ = perturbJob.sp['datasetName']
            if datasetName_ in HETERO_DATASETS:
                with open(f"../../datasets/data/{datasetName_}.pkl", "rb") as dataFile:
                    dataset = pickle.load(dataFile)
                    print(dataset)
            else:
                with open("data.pkl", "rb") as dataFile:
                    dataset = pickle.load(dataFile)
                    print(dataset)
        perturbDataDict[tdata.perturbJobID] = dict(
            dict_pertubation=dict_pertubation,
            dataset=dataset
        )
    else:
        dict_pertubation = perturbDataDict[tdata.perturbJobID]["dict_pertubation"]
        dataset = perturbDataDict[tdata.perturbJobID]["dataset"]

    job = project.open_job(id=tdata.attackID)
    assert job.sp.use_runner
    with job:
        with open(f"resultTable.csv", "r") as f:
            resultTableAttack = pandas.read_csv(f, index_col=0)
    perturb_name = tdata.perturb_prefix
    DEFENSE_MODEL = f"{tdata.model}_p"
    defenseModelType = tdata.model
    
    clean_job = project.open_job(id=tdata.cleanJobID)
    with clean_job.data.open(mode="r"):
        dict_prediction_clean = {key: np.array(val) for key, val in clean_job.data.predictionDict.items()}
    prediction_result_clean = softmax(np.array(dict_prediction_clean[f"f:{DEFENSE_MODEL}@clean"]), axis=1)
    
    dict_cur_result = resultTableAttack.iloc[0].to_dict()
    dict_cur_result = {**tdata.to_dict(), **dict_cur_result}
    dict_cur_result['train_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_train, dataset).mean()
    dict_cur_result['val_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_val, dataset).mean()
    dict_cur_result['test_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_test, dataset).mean()
    lst_dict_result += [dict_cur_result]

In [None]:
%%space `metattack-poison`
pd_result_full = pandas.DataFrame.from_dict(lst_dict_result).drop(['AttackSession', 
                                                                   'perturb_prefix', 
                                                                   'cleanJobID',
                                                                   'attackIDType',
                                                                   'attackID',
                                                                   'attack_type'], axis=1)
pd_result_full['model_with_arg'] = pd_result_full["model"] + ":" + pd_result_full["model_arg"].fillna("")
pivot_index = ["model_with_arg", "perturbJobID"]
defensePivotDict = dict()
for cur_dataset in np.unique(pd_result_full['DATASET'].values):
    for cur_ptb_ratio in np.unique(pd_result_full['ptb_ratio'].values):
        h = f"{cur_dataset}@{cur_ptb_ratio}"
        cur_mask = (pd_result_full['DATASET'] == cur_dataset) & (pd_result_full['ptb_ratio'] == cur_ptb_ratio)
        pd_cur_masked = pd_result_full.loc[cur_mask, ~pd_result_full.columns.isin(['model', 'model_arg', 'gcnsvd_k'])]
        pd_cur_masked.rename(columns={"train_acc": "train_acc_attack",
                              "val_acc": "val_acc_attack",
                              "test_acc": "test_acc_attack"},inplace=True)
        
        pd_cur_masked['test_acc_abs_delta'] = pd_cur_masked['test_acc_attack'] - pd_cur_masked['test_acc_clean']
        pd_cur_masked['test_acc_rel_delta'] = (pd_cur_masked['test_acc_attack'] - pd_cur_masked['test_acc_clean']) / pd_cur_masked['test_acc_clean'] 
        defensePivot = pd_cur_masked.pivot_table(values=['test_acc_clean', 'test_acc_attack', 
                                                         'test_acc_abs_delta', 'test_acc_rel_delta'],
                                                 index=pivot_index, 
                                                 aggfunc={'test_acc_clean': [np.mean], 
                                                          'test_acc_attack': [np.mean],
                                                          'test_acc_abs_delta': [np.mean],
                                                          'test_acc_rel_delta': [np.mean]})
        for key2 in defensePivot.index.levels[0]:

            defensePivot.loc[(key2, 'subtotal_mean'), :] = defensePivot.loc[key2].mean(axis=0)
            defensePivot.loc[(key2, 'subtotal_std'), :] = defensePivot.loc[key2].std(axis=0)

        defensePivot.sort_index(inplace=True)
        defensePivotDict[h] = defensePivot.loc[pandas.IndexSlice[:, ["subtotal_mean", "subtotal_std"]], 
                                               ["test_acc_clean", "test_acc_attack", 
                                                "test_acc_abs_delta", "test_acc_rel_delta"]]
        defensePivotDict[h].columns = defensePivotDict[h].columns.droplevel(1)
        print(h)
        defensePivotDict[h].style.format(dict(test_acc_clean="{:.2%}", 
                                              test_acc_attack="{:.2%}",
                                              test_acc_abs_delta="{:.2%}",
                                              test_acc_rel_delta="{:.2%}"))