### Global analysis (no marginalization)
This notebook analyses and plots the global results of mode1 and mode4 with no marginalization on graph properties.


The results are based on

-----------
OBS: Uncomment lines when mode4 is ready to be analysed. Feel free to add what you want

In [21]:
1+1

2

In [22]:
df = pd.DataFrame({
    'a': [1, 2, 3],
    'b': [9, 0, 10]
})

In [23]:
import pandas as pd
import json
from evaluation_utils import read_processed_shards, get_best_configuration_per_model
import seaborn as sns
import matplotlib.pyplot as plt
from collections import defaultdict
import numpy as np
from matplotlib.ticker import PercentFormatter
from scipy.stats import f_oneway
from tqdm import tqdm
import pylab as pl
import matplotlib as mpl
from collections import OrderedDict

##### CONSTANTS
These constants are used to specify different views of our results which might be interesting

In [29]:
# --------- DATA ---------
MODE_1_PROCESSED_DIR = f'results/mode1/processed' # mode1
#MODE_4_PROCESSED_DIR = f'results/mode1/processed' # mode4

# --------- MODELS ---------
GENERATION_FEATURES = ['AttributeMask', 'CorruptedEmbeddingsReconstruction', 'CorruptedFeaturesReconstruction']
GENERATION_STRUCTURE = ['EdgeMask', 'GAE']
GENERATION_ALL = GENERATION_FEATURES + GENERATION_STRUCTURE
AUXILIARY_ALL = ['NodeClusteringWithAlignment', 'S2GRL', 'PairwiseAttrSim', 'GraphPartitioning']
CONTRAST_SAME_SCALE = ['BGRL', 'GBT', 'GCA', 'SelfGNNPPR', 'SelfGNNSplit', 'MERIT']
CONTRAST_CROSS_SCALE = ['DeepGraphInfomax', 'GraphInfoClust', 'SUBGCON']
CONTRAST_ALL = CONTRAST_SAME_SCALE + CONTRAST_CROSS_SCALE
HYBRID_ALL = ['G_Zoom', 'MEtAl', 'MVMI_FT']
ALL_MODELS = GENERATION_ALL + AUXILIARY_ALL + CONTRAST_ALL + HYBRID_ALL
N_MODELS = len(ALL_MODELS)

# --------- ENCODERS ---------
ENCODERS = ['GCN', 'GAT', 'GIN']

# --------- TRAINING SCHEMES ---------
TRAINING_SCHEMES = ['JL', 'PF', 'URL']

# --------- TEST METRIC ---------
TEST_METRIC = 'test_rocauc_ovr'

##### INDEXING
General scheme of a column: `Encoder_model_scheme`
Example of a column: `GCN_AttributeMask_JL`

In [25]:
def get_test_metric(df):
    return df[df.columns in [TEST_METRIC] ]

##### READING DATA
We read the data and remove graphs where any model has failed / crashed

In [26]:
# --------- READING DATA ---------
df1 = read_processed_shards(MODE_1_PROCESSED_DIR) # mode1
df1.drop(['marginal_param', 'fixed_params'], axis=1, inplace=True)
df1.dropna(axis=0, inplace=True)

#df4 = read_processed_shards(PROCESSED_DIR_4) # mode4
#df4.drop(['marginal_param', 'fixed_params'], axis=1, inplace=True)
#df4.dropna(axis=0, inplace=True)

1.ndjson


KeyboardInterrupt: 

##### Mean and std global results for all models
Here we report the mean and std test metric of all models per main category:
- Generation-based
- Auxiliary-based
- Contrast-based
- Hybrid


In [39]:
def get_performance_summary_for_models(df, MODELS, ENCODERS = ENCODERS, TRAINING_SCHEMES = TRAINING_SCHEMES):
    results = []
    for model in MODELS:
        data = [model]
        for e in ENCODERS:
            for ts in TRAINING_SCHEMES:
                model_string = f'{e}_{model}_{ts}_{TEST_METRIC}'
                if not model_string in df.columns:
                    continue
                metric = df[f'{e}_{model}_{ts}_{TEST_METRIC}']
                avg = metric.mean()
                std = metric.std()
                data.append(f'{round(avg, 2)}\u00B1{round(std,2)}')
        results.append(data)

    result_cols = ['model'] + [f'{encoder}-{ts}' for encoder in ENCODERS for ts in TRAINING_SCHEMES]
    return pd.DataFrame(results, columns=result_cols)

In [40]:
get_performance_summary_for_models(df1, GENERATION_ALL)

Unnamed: 0,model,GCN-JL,GCN-PF,GCN-URL,GAT-JL,GAT-PF,GAT-URL,GIN-JL,GIN-PF,GIN-URL
0,AttributeMask,0.78±0.18,0.85±0.14,0.78±0.15,0.77±0.17,0.82±0.14,0.79±0.14,0.71±0.17,0.76±0.14,0.71±0.14
1,CorruptedEmbeddingsReconstruction,0.71±0.18,0.84±0.14,0.69±0.15,0.7±0.18,0.82±0.14,0.72±0.15,0.64±0.15,0.75±0.14,0.61±0.1
2,CorruptedFeaturesReconstruction,0.79±0.18,0.85±0.14,0.8±0.14,0.77±0.18,0.83±0.14,0.81±0.14,0.7±0.17,0.76±0.14,0.74±0.15
3,EdgeMask,0.78±0.17,0.85±0.14,0.77±0.14,0.76±0.17,0.82±0.14,0.79±0.13,0.69±0.15,0.75±0.14,0.67±0.12
4,GAE,0.76±0.18,0.85±0.14,0.79±0.15,0.74±0.18,0.82±0.15,0.79±0.14,0.69±0.16,0.76±0.14,0.7±0.14


In [41]:
get_performance_summary_for_models(df1, AUXILIARY_ALL)

Unnamed: 0,model,GCN-JL,GCN-PF,GCN-URL,GAT-JL,GAT-PF,GAT-URL,GIN-JL,GIN-PF,GIN-URL
0,NodeClusteringWithAlignment,0.87±0.14,0.86±0.13,0.84±0.13,0.85±0.15,0.83±0.14,0.85±0.13,0.83±0.16,0.77±0.15,0.79±0.15
1,S2GRL,0.82±0.15,0.85±0.14,0.79±0.14,0.8±0.15,0.82±0.14,0.8±0.13,0.71±0.15,0.75±0.14,0.67±0.13
2,PairwiseAttrSim,0.75±0.17,0.85±0.14,0.77±0.14,0.75±0.17,0.82±0.14,0.79±0.14,0.69±0.15,0.76±0.14,0.68±0.13
3,GraphPartitioning,0.81±0.17,0.85±0.13,0.82±0.13,0.78±0.17,0.82±0.14,0.82±0.13,0.73±0.16,0.76±0.14,0.71±0.14


In [42]:
get_performance_summary_for_models(df1, CONTRAST_ALL)

Unnamed: 0,model,GCN-JL,GCN-PF,GCN-URL,GAT-JL,GAT-PF,GAT-URL,GIN-JL,GIN-PF,GIN-URL
0,BGRL,0.79±0.16,0.85±0.14,0.77±0.14,0.77±0.16,0.82±0.15,0.8±0.14,0.69±0.15,0.75±0.14,0.67±0.13
1,GBT,0.84±0.13,0.85±0.13,0.81±0.13,0.81±0.14,0.83±0.14,0.81±0.13,0.73±0.14,0.76±0.14,0.7±0.13
2,GCA,0.84±0.14,0.85±0.14,0.81±0.14,0.82±0.15,0.83±0.14,0.82±0.13,0.72±0.16,0.75±0.14,0.69±0.14
3,SelfGNNPPR,0.83±0.16,0.88±0.13,0.83±0.13,,,,,,
4,SelfGNNSplit,0.8±0.16,0.85±0.14,0.81±0.14,0.79±0.16,0.83±0.14,0.84±0.12,0.69±0.15,0.76±0.15,0.72±0.13
5,MERIT,0.83±0.14,0.84±0.15,0.79±0.15,,,,,,
6,DeepGraphInfomax,0.76±0.17,0.85±0.14,0.76±0.14,0.75±0.16,0.82±0.14,0.78±0.14,0.67±0.14,0.75±0.14,0.67±0.12
7,GraphInfoClust,0.75±0.17,0.85±0.14,0.76±0.14,0.74±0.17,0.82±0.14,0.78±0.14,0.66±0.15,0.75±0.14,0.67±0.12
8,SUBGCON,0.84±0.11,0.83±0.12,0.79±0.12,0.8±0.13,0.81±0.14,0.77±0.13,0.76±0.13,0.75±0.13,0.71±0.13


In [43]:
get_performance_summary_for_models(df1, HYBRID_ALL)

Unnamed: 0,model,GCN-JL,GCN-PF,GCN-URL,GAT-JL,GAT-PF,GAT-URL,GIN-JL,GIN-PF,GIN-URL
0,G_Zoom,0.84±0.16,0.89±0.13,0.87±0.12,,,,,,
1,MEtAl,0.73±0.19,0.85±0.14,0.78±0.15,0.72±0.19,0.83±0.14,0.79±0.15,0.69±0.18,0.76±0.14,0.7±0.14
2,MVMI_FT,0.82±0.16,0.91±0.11,0.78±0.15,0.83±0.15,0.9±0.12,0.8±0.15,0.77±0.16,0.83±0.13,0.74±0.14
