In [1]:
import seml
import pandas as pd
import numpy as np
from run_seml import run
from matplotlib import pyplot as plt

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 150)

In [2]:
%load_ext autoreload
%autoreload 2

In [8]:
df_experiments = seml.get_results('kdd21_rgnn_at_scale_attack_evasion_transfer_3',
                                  to_data_frame=True,
                                  fields=['batch_id', 'slurm', 'config', 'result'])

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




In [9]:
df_results = [
    pd.DataFrame(r) 
    for r in df_experiments['result.results']
]
for df_result, (_, df_experiment) in zip(df_results, df_experiments.iterrows()):
    df_result['dataset'] = df_experiment['config.dataset']
    df_result['attack'] = df_experiment['config.attack']
    df_result['seed'] = df_experiment['config.seed']
    df_result['batch_id'] = df_experiment['batch_id']
    df_result['loss_type'] = df_experiment['config.attack_params.loss_type']
    df_result['novel_loss'] = (
        #(df_experiment['config.attack_params.stop_optimizing_if_label_flipped'] == True)
         (df_experiment['config.attack_params.loss_type'] == 'MCE')
        | (df_experiment['config.attack_params.loss_type'] == 'tanhCW')
    )

df_results = pd.concat(df_results, ignore_index=True)
df_results = df_results.sort_values('batch_id')
df_results = df_results.drop_duplicates([
    c for c in df_results.columns if c != 'batch_id' and c != 'accuracy'
], keep='last')

df_results

Unnamed: 0,label,epsilon,accuracy,dataset,attack,seed,batch_id,loss_type,novel_loss
0,Soft Medoid RPPRGo (T=1.0),0.00,0.813043,cora_ml,PRBCD,0,1,CE,False
593,Jaccard GCN,0.10,0.699210,cora_ml,PRBCD,42,1,CE,False
594,Jaccard GCN,0.25,0.591700,cora_ml,PRBCD,42,1,CE,False
595,RGCN,0.00,0.788538,cora_ml,PRBCD,42,1,CE,False
596,RGCN,0.01,0.777075,cora_ml,PRBCD,42,1,CE,False
...,...,...,...,...,...,...,...,...,...
304,Vanilla GDC,0.25,0.525296,cora_ml,PRBCD,1,1,tanhCW,True
305,Soft Medoid GDC (T=1.0),0.00,0.821344,cora_ml,PRBCD,1,1,tanhCW,True
306,Soft Medoid GDC (T=1.0),0.01,0.804348,cora_ml,PRBCD,1,1,tanhCW,True
336,Soft Median RPPRGo (T=0.5),0.01,0.810277,cora_ml,PRBCD,1,1,tanhCW,True


In [5]:
dataset_map = {
    'cora_ml': r'Cora ML', 
    'citeseer': r'Citeseer', 
}
dataset_order = [dataset_map[k] for k in dataset_map.keys()]

In [6]:
attack_map = {
    'PRBCD': r'PR-BCD'
}
attack_order = [attack_map[k] for k in attack_map.keys()]

In [7]:
def transform_label(label: str):
    return label

In [8]:
def calc_mean_and_error(values: pd.Series, seeds: pd.Series, with_error=True, decimal_places: int = 3): 
    values, seeds = values.values, seeds.values
    seeds = seeds[~np.isnan(values)]
    values = values[~np.isnan(values)]

    idx = np.unique(seeds, return_index=True)[1]
    values = values[idx]
    
    if with_error:
        return rf'{np.mean(values):.{decimal_places}f} $\pm$ {np.std(values)/len(values):.{decimal_places}f}'
    else:
        return rf'{np.mean(values):.{decimal_places}f}'

In [9]:
architecture_c = r'Architecture'
dataset_c = r'   '
attack_c = r'Attack'
epsilons_c = r'Frac. edges (epsilon)'

epsilons = [0.05, 0.1, 0.25]
epsilon_marks = [r'\textit', r'\underline', r'\textbf']

In [10]:

df_cora = []
df_citeseer = []

for (dataset, attack, label, epsilon), df_group in df_results.groupby(['dataset', 'attack', 'label', 'epsilon']):
       if len(df_group.seed.unique()) != 3:
              print(f'For {dataset}-{attack}-{epsilon} collected runs for seed {df_group.seed.tolist()}')

       accurcy = calc_mean_and_error(df_group.accuracy, df_group.seed, with_error=False)
       if dataset == "citeseer":
              df_citeseer.append({
                     dataset_c: dataset_map[dataset],
                     architecture_c: transform_label(label),
                     attack_c: attack_map[attack],
                     epsilons_c: epsilon,
                     'accuracy': accurcy
              })
       elif dataset == "cora_ml":
              df_cora.append({
                     dataset_c: dataset_map[dataset],
                     architecture_c: transform_label(label),
                     attack_c: attack_map[attack],
                     epsilons_c: epsilon,
                     'accuracy': accurcy
              })


df_cora = pd.DataFrame(df_cora)
df_citeseer = pd.DataFrame(df_citeseer)

For cora_ml-PRBCD-0.0 collected runs for seed [42, 5, 100, 42, 100, 0, 0, 5, 1, 1]
For cora_ml-PRBCD-0.01 collected runs for seed [42, 5, 100, 42, 100, 0, 0, 5, 1, 1]
For cora_ml-PRBCD-0.05 collected runs for seed [42, 5, 100, 42, 100, 0, 0, 5, 1, 1]
For cora_ml-PRBCD-0.1 collected runs for seed [42, 5, 100, 42, 100, 0, 0, 5, 1, 1]
For cora_ml-PRBCD-0.25 collected runs for seed [42, 5, 100, 42, 100, 0, 0, 5, 1, 1]
For cora_ml-PRBCD-0.0 collected runs for seed [42, 5, 100, 42, 100, 0, 0, 5, 1, 1]
For cora_ml-PRBCD-0.01 collected runs for seed [42, 5, 100, 42, 100, 0, 0, 5, 1, 1]
For cora_ml-PRBCD-0.05 collected runs for seed [42, 5, 100, 42, 100, 0, 0, 5, 1, 1]
For cora_ml-PRBCD-0.1 collected runs for seed [42, 5, 100, 42, 100, 0, 0, 5, 1, 1]
For cora_ml-PRBCD-0.25 collected runs for seed [42, 5, 100, 42, 100, 0, 0, 5, 1, 1]
For cora_ml-PRBCD-0.0 collected runs for seed [42, 5, 100, 100, 42, 0, 0, 1, 5, 1]
For cora_ml-PRBCD-0.01 collected runs for seed [42, 5, 42, 100, 100, 0, 0, 5, 1, 

In [11]:

print(df_cora.to_markdown())

|    |         | Architecture               | Attack   |   Frac. edges (epsilon) |   accuracy |
|---:|:--------|:---------------------------|:---------|------------------------:|-----------:|
|  0 | Cora ML | Jaccard GCN                | PR-BCD   |                    0    |      0.809 |
|  1 | Cora ML | Jaccard GCN                | PR-BCD   |                    0.01 |      0.785 |
|  2 | Cora ML | Jaccard GCN                | PR-BCD   |                    0.05 |      0.723 |
|  3 | Cora ML | Jaccard GCN                | PR-BCD   |                    0.1  |      0.659 |
|  4 | Cora ML | Jaccard GCN                | PR-BCD   |                    0.25 |      0.532 |
|  5 | Cora ML | RGCN                       | PR-BCD   |                    0    |      0.787 |
|  6 | Cora ML | RGCN                       | PR-BCD   |                    0.01 |      0.769 |
|  7 | Cora ML | RGCN                       | PR-BCD   |                    0.05 |      0.717 |
|  8 | Cora ML | RGCN                   

In [30]:
df_cora

Unnamed: 0,Unnamed: 1,Architecture,Attack,Frac. edges (epsilon),accuracy
0,Cora ML,Jaccard GCN,PR-BCD,0.0,0.809
1,Cora ML,Jaccard GCN,PR-BCD,0.01,0.785
2,Cora ML,Jaccard GCN,PR-BCD,0.05,0.723
3,Cora ML,Jaccard GCN,PR-BCD,0.1,0.659
4,Cora ML,Jaccard GCN,PR-BCD,0.25,0.532
5,Cora ML,RGCN,PR-BCD,0.0,0.787
6,Cora ML,RGCN,PR-BCD,0.01,0.769
7,Cora ML,RGCN,PR-BCD,0.05,0.717
8,Cora ML,RGCN,PR-BCD,0.1,0.667
9,Cora ML,RGCN,PR-BCD,0.25,0.554
