In [1]:
import os
os.chdir('../')

In [2]:
%matplotlib inline
#%matplotlib notebook

%load_ext autoreload
%autoreload 2

In [3]:
from copy import deepcopy
from typing import List, Tuple

from cycler import cycler
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
import numpy as np
import pandas as pd
import scipy.sparse as sp
from sklearn.decomposition import PCA
import scipy.stats as stats
import torch
from torch import nn
import torch.nn.functional as F
import seml

import tqdm
tqdm.tqdm.pandas()
#plt.style.use('ggplot')

In [4]:
from notebooks import mpl_latex

  self[key] = other[key]


In [5]:
mpl_latex.enable_production_mode()

In [6]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)

In [7]:
labels_to_plot = [
    'Soft Median GDC',
    'Vanilla GCN',
    'Vanilla GDC',
    #'SVD GCN',
    #'Jaccard GCN',
    #'RGCN'
]

In [8]:
df_experiments = seml.get_results('neurips21_global_attack_transfer_block_size',
                                  to_data_frame=True,
                                  fields=['batch_id', 'slurm', 'config', 'result'])

df_experiments.head()

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  parsed = pd.io.json.json_normalize(parsed, sep='.')


Unnamed: 0,_id,batch_id,slurm.experiments_per_job,slurm.sbatch_options.time,slurm.sbatch_options.nodes,slurm.sbatch_options.cpus-per-task,slurm.sbatch_options.mem,slurm.sbatch_options.gres,config.overwrite,config.db_collection,config.dataset,config.data_dir,config.binary_attr,config.make_undirected,config.seed,config.attack,config.attack_params.epochs,config.attack_params.loss_type,config.epsilons,config.artifact_dir,config.pert_adj_storage_type,config.pert_attr_storage_type,config.model_storage_type,config.model_label,config.surrogate_model_storage_type,config.surrogate_model_label,config.device,config.data_device,config.debug_level,result.results,config.attack_params.fine_tune_epochs,config.attack_params.keep_heuristic,config.attack_params.search_space_size,slurm.sbatch_options.job-name,slurm.sbatch_options.array,slurm.sbatch_options.output,slurm.array_id,slurm.task_id
0,1,1,1,0-01:00,1,4,4G,gpu:1,1,neurips21_global_attack_transfer_block_size,cora_ml,datasets/,False,True,0,PGD,300,tanhCW,"[0, 0.05, 0.1, 0.25]",cache,evasion_global_transfer_adj,evasion_global_transfer_attr,pretrained,,pretrained,Vanilla Dense GCN,0,0,info,"[{'label': 'Vanilla GCN', 'epsilon': 0, 'accur...",,,,,,,,
1,2,1,1,0-01:00,1,4,4G,gpu:1,2,neurips21_global_attack_transfer_block_size,cora_ml,datasets/,False,True,1,PGD,300,tanhCW,"[0, 0.05, 0.1, 0.25]",cache,evasion_global_transfer_adj,evasion_global_transfer_attr,pretrained,,pretrained,Vanilla Dense GCN,0,0,info,"[{'label': 'Vanilla GCN', 'epsilon': 0, 'accur...",,,,,,,,
2,3,1,1,0-01:00,1,4,4G,gpu:1,3,neurips21_global_attack_transfer_block_size,cora_ml,datasets/,False,True,5,PGD,300,tanhCW,"[0, 0.05, 0.1, 0.25]",cache,evasion_global_transfer_adj,evasion_global_transfer_attr,pretrained,,pretrained,Vanilla Dense GCN,0,0,info,"[{'label': 'Vanilla GCN', 'epsilon': 0, 'accur...",,,,,,,,
3,4,1,1,0-01:00,1,4,4G,gpu:1,4,neurips21_global_attack_transfer_block_size,cora_ml,datasets/,False,True,42,PGD,300,tanhCW,"[0, 0.05, 0.1, 0.25]",cache,evasion_global_transfer_adj,evasion_global_transfer_attr,pretrained,,pretrained,Vanilla Dense GCN,0,0,info,"[{'label': 'Vanilla GCN', 'epsilon': 0, 'accur...",,,,,,,,
4,5,1,1,0-01:00,1,4,4G,gpu:1,5,neurips21_global_attack_transfer_block_size,cora_ml,datasets/,False,True,100,PGD,300,tanhCW,"[0, 0.05, 0.1, 0.25]",cache,evasion_global_transfer_adj,evasion_global_transfer_attr,pretrained,,pretrained,Vanilla Dense GCN,0,0,info,"[{'label': 'Vanilla GCN', 'epsilon': 0, 'accur...",,,,,,,,


In [9]:
df_experiments.columns

Index(['_id', 'batch_id', 'slurm.experiments_per_job',
       'slurm.sbatch_options.time', 'slurm.sbatch_options.nodes',
       'slurm.sbatch_options.cpus-per-task', 'slurm.sbatch_options.mem',
       'slurm.sbatch_options.gres', 'config.overwrite', 'config.db_collection',
       'config.dataset', 'config.data_dir', 'config.binary_attr',
       'config.make_undirected', 'config.seed', 'config.attack',
       'config.attack_params.epochs', 'config.attack_params.loss_type',
       'config.epsilons', 'config.artifact_dir',
       'config.pert_adj_storage_type', 'config.pert_attr_storage_type',
       'config.model_storage_type', 'config.model_label',
       'config.surrogate_model_storage_type', 'config.surrogate_model_label',
       'config.device', 'config.data_device', 'config.debug_level',
       'result.results', 'config.attack_params.fine_tune_epochs',
       'config.attack_params.keep_heuristic',
       'config.attack_params.search_space_size',
       'slurm.sbatch_options.job-name

In [10]:
df_results = [
    pd.DataFrame(r) 
    for r in df_experiments['result.results']
]
for df_result, (_, df_experiment) in zip(df_results, df_experiments.iterrows()):
    df_result['dataset'] = df_experiment['config.dataset']
    df_result['attack'] = df_experiment['config.attack']
    df_result['block_size'] = df_experiment['config.attack_params.search_space_size']
    df_result['seed'] = df_experiment['config.seed']
    df_result['epochs'] = df_experiment['config.attack_params.epochs']
    df_result['novel_loss'] = (
        (df_experiment['config.attack_params.loss_type'] == 'MCE')
        | (df_experiment['config.attack_params.loss_type'] == 'tanhMargin')
    )


df_results = pd.concat(df_results, ignore_index=True)
df_results

Unnamed: 0,label,epsilon,accuracy,dataset,attack,block_size,seed,epochs,novel_loss
0,Vanilla GCN,0.00,0.815020,cora_ml,PGD,,0,300,False
1,Vanilla Dense GCN,0.00,0.811067,cora_ml,PGD,,0,300,False
2,Vanilla GDC,0.00,0.853360,cora_ml,PGD,,0,300,False
3,Soft Medoid GDC (T=0.5),0.00,0.819368,cora_ml,PGD,,0,300,False
4,Soft Median GDC (T=1.0),0.00,0.831225,cora_ml,PGD,,0,300,False
...,...,...,...,...,...,...,...,...,...
12155,Soft Median GDC (T=0.2),0.25,0.636898,citeseer,GreedyRBCD,1000000.0,100,50,True
12156,SVD GCN (rank=50),0.25,0.495187,citeseer,GreedyRBCD,1000000.0,100,50,True
12157,SVD GCN (rank=100),0.25,0.540642,citeseer,GreedyRBCD,1000000.0,100,50,True
12158,Jaccard GCN,0.25,0.542781,citeseer,GreedyRBCD,1000000.0,100,50,True


In [11]:
df_results['label'].unique()

array(['Vanilla GCN', 'Vanilla Dense GCN', 'Vanilla GDC',
       'Soft Medoid GDC (T=0.5)', 'Soft Median GDC (T=1.0)',
       'Soft Median GDC (T=0.5)', 'Soft Median GDC (T=0.2)',
       'SVD GCN (rank=50)', 'SVD GCN (rank=100)', 'Jaccard GCN', 'RGCN',
       'Vanilla PPRGo', 'Soft Medoid RPPRGo (T=1.0)',
       'Soft Medoid RPPRGo (T=0.5)', 'Soft Medoid RPPRGo (T=0.2)',
       'Soft Median RPPRGo (T=1.0)', 'Soft Median RPPRGo (T=0.5)',
       'Soft Median RPPRGo (T=0.2)'], dtype=object)

In [12]:
df_results.label = df_results.label.apply(
    lambda m: m.replace(r' (T=0.2)', '')
)
df_results.label.unique()

array(['Vanilla GCN', 'Vanilla Dense GCN', 'Vanilla GDC',
       'Soft Medoid GDC (T=0.5)', 'Soft Median GDC (T=1.0)',
       'Soft Median GDC (T=0.5)', 'Soft Median GDC', 'SVD GCN (rank=50)',
       'SVD GCN (rank=100)', 'Jaccard GCN', 'RGCN', 'Vanilla PPRGo',
       'Soft Medoid RPPRGo (T=1.0)', 'Soft Medoid RPPRGo (T=0.5)',
       'Soft Medoid RPPRGo', 'Soft Median RPPRGo (T=1.0)',
       'Soft Median RPPRGo (T=0.5)', 'Soft Median RPPRGo'], dtype=object)

In [13]:
epsilons = [0.1]#, 0.25]

In [14]:
attack_names = {
    #'GreedyRBCD': 'Greedy',
    'PRBCD': 'PGD'
}

In [15]:
for novel_loss in [True, False]:
    loss = 'novel_loss' if novel_loss else 'CE'
    print(loss)
    for dataset in df_results.dataset.unique():
        print(dataset)
        for attack in attack_names.keys():
            print(attack)
            for epsilon in epsilons:
                print(epsilon)

                fig, ax = mpl_latex.newfig(width=0.30, ratio_yx=1)
                for label in df_results.label.unique():
                    if label not in labels_to_plot:
                        continue
                    group = df_results[(df_results.dataset == dataset)
                               & (df_results.attack == attack_names[attack])
                               & (df_results.label == label)
                               & (df_results.epsilon == epsilon)
                               & (df_results.novel_loss == novel_loss)]

                    if not group.shape[0]:
                        print(f'No experiment with label {label}, dataset {dataset} and attack {attack} found')
                        continue

                    accs = group.accuracy.mean()
                    acc_error = group.accuracy.std() / group.accuracy.size

                    block_sizes = df_results[(df_results.dataset == dataset)
                                     & (df_results.attack == attack)].block_size.unique()
                    plt.errorbar(
                        block_sizes,
                        [accs] * block_sizes.size,
                        [acc_error] * block_sizes.size,
                        linestyle='-.'
                    )
                plt.gca().set_prop_cycle(None)

                for label in df_results.label.unique():
                    if label not in labels_to_plot:
                        #print(label)
                        continue
                    group = df_results[(df_results.dataset == dataset)
                               & (df_results.attack == attack)
                               & (df_results.label == label)
                               & (df_results.epsilon == epsilon)
                               & (~df_results.epochs.isna())
                               & (df_results.novel_loss == novel_loss)]

                    if not group.shape[0]:
                        print(f'No experiment with label {label}, dataset {dataset} and attack {attack} found')
                        continue

                    accs = group.groupby('block_size').accuracy.mean()
                    sizes = group.groupby('block_size').accuracy.size()
                    acc_error = 3 * group.groupby('block_size').accuracy.agg(lambda accs: accs.std()) / sizes

                    plt.errorbar(
                        accs.index,
                        accs,
                        acc_error,
                        label=label,
                        linestyle='-'
                    )
                plt.gca().set_prop_cycle(None)

                #plt.title(f'Dataset={dataset}, Attack={attack_names[attack]}'.replace('_', ' '))
                plt.ylabel('Accuracy')
                plt.xlabel('Block size $b$')
                plt.xscale('log')
                plt.xticks([1e4, 1e5, 1e6, 1e7])
                mpl_latex.savefig(f'latex/assets/global_{attack}_{loss}_{dataset}_{epsilon}_block_size_no_legend', fig, close_fig=False)
                plt.legend()
                mpl_latex.savefig(f'latex/assets/global_{attack}_{loss}_{dataset}_{epsilon}_block_size', fig, close_fig=False)
                mpl_latex.dedicated_legend_plot(
                    f'latex/assets/global_{attack}_{loss}_{dataset}_{epsilon}_block_size_legend',
                    mod_label=lambda l: l,
                    ncol=3
                )

novel_loss
cora_ml
PRBCD
0.1


  self[key] = other[key]


citeseer
PRBCD
0.1


  self[key] = other[key]


CE
cora_ml
PRBCD
0.1


  self[key] = other[key]


citeseer
PRBCD
0.1


  self[key] = other[key]
