In [None]:
%load_ext jupyter_spaces

In [None]:
from deeprobust.graph.data import Dataset
from hrdataset import CustomDataset
import pandas
import numpy as np
import scipy.sparse
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt
from collections import defaultdict
import signac
import pickle
import itertools
import tqdm

from jupyter_spaces import get_spaces
import jupyter_spaces
from scipy.special import softmax
import warnings
import itertools
import plotly
import copy
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [None]:
project = signac.get_project("../../") 

In [None]:
def get_acc_margin(predictions, label):
    if predictions.ndim == 1:
        pred_part = np.argpartition(predictions, (-1, -2))
        max_pred_ind, second_max_pred_ind = pred_part[-1], pred_part[-2]
        if label == max_pred_ind:
            return True, predictions[label], predictions[label] - predictions[second_max_pred_ind]
        else:
            return False, predictions[label], predictions[label] - predictions[max_pred_ind]
    elif predictions.ndim == 2:
        pred_part = np.argpartition(predictions, (-1, -2), axis=-1)
        max_pred_ind, second_max_pred_ind = pred_part[:, -1], pred_part[:, -2]
        acc = (max_pred_ind == label)
        pred = predictions[np.arange(len(predictions)), label]
        margin = acc * (pred - predictions[np.arange(len(predictions)), second_max_pred_ind]) + \
            (1 - acc) * (pred - predictions[np.arange(len(predictions)), max_pred_ind])
        return acc, pred, margin
    else:
        raise ValueError(f"Unsupported dim for predictions: {predictions.ndim}")

In [None]:
%pwd

# Load CSV

In [None]:
df_expRun = pandas.read_csv("./nettack-adj-only.csv", index_col=0, keep_default_na=False, na_values=[""])
df_expRun = df_expRun.melt(id_vars=[col for col in df_expRun.columns if col not in {"evasionJobID", "poisonJobID"}],
                           var_name = 'attackIDType',
                           value_name = 'attackID')
df_expRun = df_expRun.drop('Attack Phase', 1)

na_mask = (df_expRun['attackID'] == 'N/A')
df_expRun = df_expRun[~na_mask]

incomplete_mask = (df_expRun.attackID.isnull() | df_expRun.cleanJobID.isnull())
if incomplete_mask.sum() > 0:
    warnings.warn(f"{incomplete_mask.sum()} experiments are incomplete!")
df_expRun_Original = copy.deepcopy(df_expRun)
df_expRun = df_expRun.loc[~incomplete_mask]

df_expRun_evasion = df_expRun[df_expRun['attackIDType'] == 'evasionJobID']
df_expRun_poison = df_expRun[df_expRun['attackIDType'] == 'poisonJobID']

In [None]:
df_expRun_Original[incomplete_mask.values]

# Datasets

In [None]:
HETERO_DATASETS = ['fb100', 'twitch-tw', 'snap-patent-downsampled']
HOMO_DATASETES = ['citeseer', 'cora']

# Evasion (Post-training Attack)

In [None]:
%%space `evasion`
df_subtask = df_expRun_evasion
perturbDataDict = dict()
defenseTableDict = dict()

for tid, tdata in df_subtask.iterrows():
    if tdata.perturbJobID not in perturbDataDict:
        perturbJob = project.open_job(id=tdata.perturbJobID)
        with perturbJob:
            
            with open("perturbDict.pkl", "rb") as dataFile:
                dict_pertubation = pickle.load(dataFile)
            datasetName_ = perturbJob.sp['datasetName']
            print(datasetName_)
            if datasetName_ in HETERO_DATASETS:
                with open(f"../../datasets/data/{datasetName_}.pkl", "rb") as dataFile:
                    dataset = pickle.load(dataFile)
                    print(dataset)
            else:
                with open("data.pkl", "rb") as dataFile:
                    dataset = pickle.load(dataFile)
                    print(dataset)

        perturbDataDict[tdata.perturbJobID] = dict(
            dict_pertubation=dict_pertubation,
            dataset=dataset
        )
    else:
        dict_pertubation = perturbDataDict[tdata.perturbJobID]["dict_pertubation"]
        dataset = perturbDataDict[tdata.perturbJobID]["dataset"]

    # Load attacked prediction
    job = project.open_job(id=tdata.attackID)
    # print(f"a:{job.id}")
    assert job.sp.use_runner
    with job:
        with job.data.open(mode="r"):
            dict_prediction = {key: np.array(val) for key, val in job.data.predictionDict.items()}
        with open(f"resultTable.csv", "r") as f:
            resultTable = pandas.read_csv(f, index_col=0)
    perturb_name = tdata.perturb_prefix
    DEFENSE_MODEL = f"{tdata.model}_p"
    defenseModelType = tdata.model

    clean_job = project.open_job(id=tdata.cleanJobID)
    # print(f"c:{clean_job.id}")
    with clean_job.data.open(mode="r"):
        dict_prediction_clean = {key: np.array(val) for key, val in clean_job.data.predictionDict.items()}
    prediction_result_clean = softmax(np.array(dict_prediction_clean[f"f:{DEFENSE_MODEL}@clean"]), axis=1)


    defenseResultTable = resultTable[["target_node", "acc"]].set_index(
                "target_node").rename(columns=dict(acc="acc_attack"))
    acc_clean, ground_truth_confidence_clean, margin_clean = get_acc_margin(prediction_result_clean, dataset.labels)
    defenseResultTable["label"] = dataset.labels[defenseResultTable.index]
    defenseResultTable["acc_clean"] = acc_clean[defenseResultTable.index]
    defenseResultTable["pred_clean"] = ground_truth_confidence_clean[defenseResultTable.index]
    defenseResultTable["margin_clean"] = margin_clean[defenseResultTable.index]

    for cur_node in defenseResultTable.index:
        prediction_result_attack = softmax(dict_prediction[f"e:{DEFENSE_MODEL}@{perturb_name}_{cur_node}"][cur_node, :])
        acc_s, confidence_attack_s, margin_attack_s = get_acc_margin(prediction_result_attack, dataset.labels[cur_node])
        assert defenseResultTable.at[cur_node, "acc_attack"] == acc_s
        defenseResultTable.at[cur_node, "pred_attack"] = confidence_attack_s
        defenseResultTable.at[cur_node, "margin_attack"] = margin_attack_s

    defenseResultTable["pred_delta"] = defenseResultTable["pred_attack"] - defenseResultTable["pred_clean"]
    defenseResultTable["margin_delta"] = defenseResultTable["margin_attack"] - defenseResultTable["margin_clean"]

    for key, nodeList in job.sp.targetNodes.items():
        defenseResultTable[f"{key}_group"] = defenseResultTable.index.isin(nodeList)
    defenseTableDict[job.id] = defenseResultTable
        

# print("!")
defenseTableFull = pandas.concat(defenseTableDict.values(), keys=defenseTableDict.keys(), names=["attackID"])
display(defenseTableFull)

In [None]:
%%space `evasion`
defenseTableFullRH = defenseTableFull.sort_index().reset_index()
defenseTableFullExp = df_subtask.merge(defenseTableFullRH,
                        how='outer',
                        on=['attackID'])
defenseTableFullExp["model_with_arg"] = defenseTableFullExp["model"] + ":" + defenseTableFullExp["model_arg"].fillna("")

In [None]:
%%space `evasion`
group_key = "DATASET"
pivot_index = ["model_with_arg", "perturbJobID"]
defensePivotDict = dict()

for h in sorted(defenseTableFullExp[group_key].unique()):
    defenseTableFullR = defenseTableFullExp.loc[defenseTableFullExp[group_key] == h]
    defensePivot = defenseTableFullR.pivot_table(values=["acc_clean", "acc_attack"], index=pivot_index, 
                              aggfunc={"acc_clean": [np.mean], "acc_attack": [np.mean]})
    defensePivot["acc_delta"] = defensePivot.acc_attack - defensePivot.acc_clean
    defensePivot["acc_delta_rel"] = (defensePivot.acc_attack - defensePivot.acc_clean) / defensePivot.acc_clean
    # display(defensePivot)
    for key2 in defensePivot.index.levels[0]:
        
        defensePivot.loc[(key2, 'subtotal_mean'), :] = defensePivot.loc[key2].mean(axis=0)
        defensePivot.loc[(key2, 'subtotal_std'), :] = defensePivot.loc[key2].std(axis=0)

    defensePivot2 = defenseTableFullR.pivot_table(values=["margin_clean", "margin_attack", "margin_delta"], index=pivot_index, 
                              aggfunc={"margin_clean": [np.mean], "margin_attack": [np.mean], "margin_delta": [np.mean]})
    for key2 in defensePivot2.index.levels[0]:
        defensePivot2.loc[(key2, 'subtotal_mean'), :] = defensePivot2.loc[key2].mean(axis=0)
        defensePivot2.loc[(key2, 'subtotal_std'), :] = defensePivot2.loc[key2].std(axis=0)

    defensePivot = pandas.concat([defensePivot, defensePivot2], axis=1)
    defensePivot.sort_index(inplace=True)
    
    defensePivotDict[h] = defensePivot.loc[pandas.IndexSlice[:, ["subtotal_mean", "subtotal_std"]], :]
    defensePivotDict[h].columns = defensePivotDict[h].columns.droplevel(1)
    
    display(h)
    defensePivotDict[h].style.format(dict(acc_attack="{:.2%}", acc_clean="{:.2%}", acc_delta="{:.2%}", acc_delta_rel="{:.2%}"))

# Poison (Pre-training Attack)

In [None]:
%%space `poison`
df_subtask = df_expRun_poison
perturbDataDict = dict()
defenseTableDict = dict()

for tid, tdata in df_subtask.iterrows():
    if tdata.perturbJobID not in perturbDataDict:
        perturbJob = project.open_job(id=tdata.perturbJobID)
        # print("p")
        with perturbJob:
            with open("perturbDict.pkl", "rb") as dataFile:
                dict_pertubation = pickle.load(dataFile)
            datasetName_ = perturbJob.sp['datasetName']
            if datasetName_ in HETERO_DATASETS:
                with open(f"../../datasets/data/{datasetName_}.pkl", "rb") as dataFile:
                    dataset = pickle.load(dataFile)
                    print(dataset)
            else:
                with open("data.pkl", "rb") as dataFile:
                    dataset = pickle.load(dataFile)
                    print(dataset)
        perturbDataDict[tdata.perturbJobID] = dict(
            dict_pertubation=dict_pertubation,
            dataset=dataset
        )
    else:
        dict_pertubation = perturbDataDict[tdata.perturbJobID]["dict_pertubation"]
        dataset = perturbDataDict[tdata.perturbJobID]["dataset"]

    job = project.open_job(id=tdata.attackID)
    assert job.sp.use_runner
    with job:
        with job.data.open(mode="r"):
            dict_prediction = {key: np.array(val) for key, val in job.data.predictionDict.items()}
        with open(f"resultTable.csv", "r") as f:
            resultTable = pandas.read_csv(f, index_col=0)
    perturb_name = tdata.perturb_prefix
    DEFENSE_MODEL = f"{tdata.model}_p"
    defenseModelType = tdata.model

    clean_job = project.open_job(id=tdata.cleanJobID)
    with clean_job.data.open(mode="r"):
        dict_prediction_clean = {key: np.array(val) for key, val in clean_job.data.predictionDict.items()}
    prediction_result_clean = softmax(np.array(dict_prediction_clean[f"f:{DEFENSE_MODEL}@clean"]), axis=1)


    defenseResultTable = resultTable[["target_node", "acc"]].set_index(
                "target_node").rename(columns=dict(acc="acc_attack"))
    acc_clean, ground_truth_confidence_clean, margin_clean = get_acc_margin(prediction_result_clean, dataset.labels)
    defenseResultTable["label"] = dataset.labels[defenseResultTable.index]
    defenseResultTable["acc_clean"] = acc_clean[defenseResultTable.index]
    defenseResultTable["pred_clean"] = ground_truth_confidence_clean[defenseResultTable.index]
    defenseResultTable["margin_clean"] = margin_clean[defenseResultTable.index]

    for cur_node in defenseResultTable.index:
        prediction_result_attack = softmax(dict_prediction[f"p:{defenseModelType}@0@{perturb_name}_{cur_node}"][cur_node, :])
        acc_s, confidence_attack_s, margin_attack_s = get_acc_margin(prediction_result_attack, dataset.labels[cur_node])
        assert defenseResultTable.at[cur_node, "acc_attack"] == acc_s
        defenseResultTable.at[cur_node, "pred_attack"] = confidence_attack_s
        defenseResultTable.at[cur_node, "margin_attack"] = margin_attack_s

    defenseResultTable["pred_delta"] = defenseResultTable["pred_attack"] - defenseResultTable["pred_clean"]
    defenseResultTable["margin_delta"] = defenseResultTable["margin_attack"] - defenseResultTable["margin_clean"]

    for key, nodeList in job.sp.targetNodes.items():
        defenseResultTable[f"{key}_group"] = defenseResultTable.index.isin(nodeList)
    defenseTableDict[job.id] = defenseResultTable
        

# print("!")
defenseTableFull = pandas.concat(defenseTableDict.values(), keys=defenseTableDict.keys(), names=["attackID"])
display(defenseTableFull)

In [None]:
%%space `poison`
defenseTableFullRH = defenseTableFull.sort_index().reset_index()
defenseTableFullExp = df_subtask.merge(defenseTableFullRH,
                        how='outer',
                        on=['attackID'])
defenseTableFullExp["model_with_arg"] = defenseTableFullExp["model"] + ":" + defenseTableFullExp["model_arg"].fillna("")

In [None]:
%%space `poison`
group_key = "DATASET"
pivot_index = ["model_with_arg", "perturbJobID"]
defensePivotDict = dict()

for h in sorted(defenseTableFullExp[group_key].unique()):
    defenseTableFullR = defenseTableFullExp.loc[defenseTableFullExp[group_key] == h]
    defensePivot = defenseTableFullR.pivot_table(values=["acc_clean", "acc_attack"], index=pivot_index, 
                              aggfunc={"acc_clean": [np.mean], "acc_attack": [np.mean]})
    defensePivot["acc_delta"] = defensePivot.acc_attack - defensePivot.acc_clean
    defensePivot["acc_delta_rel"] = (defensePivot.acc_attack - defensePivot.acc_clean) / defensePivot.acc_clean
    # display(defensePivot)
    for key2 in defensePivot.index.levels[0]:
        
        defensePivot.loc[(key2, 'subtotal_mean'), :] = defensePivot.loc[key2].mean(axis=0)
        defensePivot.loc[(key2, 'subtotal_std'), :] = defensePivot.loc[key2].std(axis=0)

    defensePivot2 = defenseTableFullR.pivot_table(values=["margin_clean", "margin_attack", "margin_delta"], index=pivot_index, 
                              aggfunc={"margin_clean": [np.mean], "margin_attack": [np.mean], "margin_delta": [np.mean]})
    for key2 in defensePivot2.index.levels[0]:
        defensePivot2.loc[(key2, 'subtotal_mean'), :] = defensePivot2.loc[key2].mean(axis=0)
        defensePivot2.loc[(key2, 'subtotal_std'), :] = defensePivot2.loc[key2].std(axis=0)

    defensePivot = pandas.concat([defensePivot, defensePivot2], axis=1)
    defensePivot.sort_index(inplace=True)
    
    defensePivotDict[h] = defensePivot.loc[pandas.IndexSlice[:, ["subtotal_mean", "subtotal_std"]], :]
    defensePivotDict[h].columns = defensePivotDict[h].columns.droplevel(1)
    
    display(h)
    defensePivotDict[h].style.format(dict(acc_attack="{:.2%}", acc_clean="{:.2%}", acc_delta="{:.2%}", acc_delta_rel="{:.2%}"))