# Analysis of global results

In [1]:
from evaluation_utils import read_global_results, unpivot_baseline_model, unpivot_ssl_model, read_multiple_global_results
from constants import MODE_1_PROCESSED_DIR, MODE_2_3_PROCESSED_DIR, TEST_METRIC, ENCODERS, TRAINING_SCHEMES, SSL_MODELS, BASELINES
import pandas as pd

In [13]:
import itertools
def unpivot_ssl_model2(df : pd.DataFrame, suffix : str, ssl_models, encoders, training_schemes):
    '''
    Unpivot the results to a long format for all SSL methods. Each row corresponds to an experiment on graph.
    '''
    frames = []
    for (ssl_model, encoder, scheme) in itertools.product(*[ssl_models, encoders, training_schemes]):
        column = f'{encoder}_{ssl_model}_{scheme}_{suffix}'
        pretext_weight_col = f'{encoder}_{ssl_model}_{scheme}_train_pretext_weight'
        columns = [column]
        if not column in df.columns:
            continue
        if 'Experiment' in df.columns:
            columns += ['Experiment']
        df_model = df[[columns]].rename(columns=lambda col: col.replace(column, suffix))
        df_model['pretext_weight'] = df[pretext_weight_col] if pretext_weight_col in df.columns else None    
        df_model['SSL_model'] = ssl_model
        df_model['Encoder'] = encoder
        df_model['Training_scheme'] = scheme
        df_model['Graph_ID'] = df.index.values.tolist()


        frames += [df_model]
    return pd.concat(frames, ignore_index=True)


In [14]:
df_ssl = unpivot_ssl_model2(df=data, suffix=TEST_METRIC, ssl_models=SSL_MODELS, encoders=ENCODERS, training_schemes=TRAINING_SCHEMES)


In [12]:
data['Experiment']

0           Mode 1
1           Mode 1
2           Mode 1
3           Mode 1
4           Mode 1
            ...   
119424    Mode 2/3
119425    Mode 2/3
119426    Mode 2/3
119427    Mode 2/3
119428    Mode 2/3
Name: Experiment, Length: 119429, dtype: object

In [2]:
# Mode 1 and 2/3 raw
data = read_multiple_global_results(experiments=[
    ('Mode 1', MODE_1_PROCESSED_DIR),
    ('Mode 2/3', MODE_2_3_PROCESSED_DIR)
])

# SSL results
df_ssl = unpivot_ssl_model(df=data, suffix=TEST_METRIC, ssl_models=SSL_MODELS, encoders=ENCODERS, training_schemes=TRAINING_SCHEMES)
df_baseline = unpivot_baseline_model(df=data, suffix=TEST_METRIC, baseline_models=BASELINES, training_schemes=TRAINING_SCHEMES)

1.ndjson
2.ndjson
3.ndjson
4.ndjson
5.ndjson
6.ndjson
7.ndjson
8.ndjson
9.ndjson
10.ndjson
11.ndjson
concatenating
1.ndjson
2.ndjson
3.ndjson
4.ndjson
5.ndjson
6.ndjson
7.ndjson
8.ndjson
9.ndjson
10.ndjson
11.ndjson
12.ndjson
13.ndjson
14.ndjson
15.ndjson
16.ndjson
concatenating


# All results

In [3]:
df_ssl

Unnamed: 0,test_rocauc_ovr,pretext_weight,SSL_model,Encoder,Training_scheme,Graph_ID
0,0.949540,,AttributeMask,GCN,PF,0
1,0.984346,,AttributeMask,GCN,PF,1
2,0.775972,,AttributeMask,GCN,PF,2
3,0.763285,,AttributeMask,GCN,PF,3
4,0.978847,,AttributeMask,GCN,PF,4
...,...,...,...,...,...,...
20422354,0.996673,1,MVMI_FT,GIN,JL,119424
20422355,0.963580,1,MVMI_FT,GIN,JL,119425
20422356,0.988033,1,MVMI_FT,GIN,JL,119426
20422357,0.909963,1,MVMI_FT,GIN,JL,119427


# Increase in performance

Unnamed: 0,test_rocauc_ovr,pretext_weight,SSL_model,Encoder,Training_scheme,Graph_ID
0,0.949540,,AttributeMask,GCN,PF,0
1,0.984346,,AttributeMask,GCN,PF,1
2,0.775972,,AttributeMask,GCN,PF,4
3,0.763285,,AttributeMask,GCN,PF,5
4,0.978847,,AttributeMask,GCN,PF,6
...,...,...,...,...,...,...
16748248,0.953252,1,MVMI_FT,GIN,JL,99995
16748249,0.727067,50,MVMI_FT,GIN,JL,99996
16748250,0.657303,50,MVMI_FT,GIN,JL,99997
16748251,0.863095,10,MVMI_FT,GIN,JL,99998
