In [54]:
import pandas as pd
import json
from evaluation_utils import read_processed_shards
import seaborn as sns
import matplotlib.pyplot as plt
from collections import defaultdict
import numpy as np
from matplotlib.ticker import PercentFormatter
from scipy.stats import f_oneway


In [45]:
PROCESSED_DIR = f'results/preliminary/processed'
df = read_processed_shards(PROCESSED_DIR)

In [32]:
df.columns[-40:]

Index(['GIN_MEtAl_URL_val_f1_micro', 'GIN_MEtAl_URL_val_f1_macro',
       'GIN_MEtAl_URL_val_rocauc_ovr', 'GIN_MEtAl_URL_val_rocauc_ovo',
       'GIN_MEtAl_URL_val_logloss', 'GIN_MEtAl_URL_test_accuracy',
       'GIN_MEtAl_URL_test_f1_micro', 'GIN_MEtAl_URL_test_f1_macro',
       'GIN_MEtAl_URL_test_rocauc_ovr', 'GIN_MEtAl_URL_test_rocauc_ovo',
       'GIN_MEtAl_URL_test_logloss', 'GIN_MEtAl_URL_train_pretext_epochs',
       'GIN_MEtAl_URL_train_pretext_lr',
       'GIN_MEtAl_URL_train_downstream_epochs',
       'GIN_MEtAl_URL_train_downstream_lr', 'GIN_MEtAl_URL_train_patience',
       'GIN_MEtAl_URL_encoder_in_channels',
       'GIN_MEtAl_URL_encoder_hidden_channels',
       'GIN_MEtAl_URL_encoder_num_layers', 'GIN_MEtAl_URL_encoder_dropout',
       'GIN_MEtAl_URL_encoder_out_channels',
       'GIN_MEtAl_URL_pretext_partial_reconstruction',
       'GIN_MEtAl_URL_pretext_feature_mask_ratio',
       'GIN_MEtAl_URL_pretext_embedding_mask_ratio',
       'GIN_MEtAl_URL_pretext_ae_loss_wei

In [51]:
# Compute average test metric for each graph
TEST_METRIC = 'test_rocauc_ovr'
cols = [col for col in df.columns if TEST_METRIC in col]
df[f'avg_{TEST_METRIC}'] = df[cols].mean(axis=1)

In [63]:
# Bin each graph property into a fixed number of quantile bines
GRAPH_PROPERTIES = ['nvertex', 'avg_degree', 'feature_center_distance', 'num_clusters',
                    'cluster_size_slope', 'power_exponent', 'p_to_q_ratio', 'min_deg']
BINS = 100

for gp in GRAPH_PROPERTIES:
    bins = pd.qcut(df[gp], q=BINS, duplicates='drop')
    df[f'bin_{gp}'] = bins

# create an empty dictionary to store the results
results = {}

# loop over each parameter
for gp in GRAPH_PROPERTIES:

    bin_f_stats = {}

    # loop over each quantile bin
    for b in df[f'bin_{gp}'].unique():
        # filter the dataframe to only include rows where the current parameter is in bin i
        df_i = df[df[f'bin_{gp}'] == b]
        test_metric = df_i[f'avg_{TEST_METRIC}']

        # compute the average F statistic between the other parameters and the test metric
        f_stats = []
        for gp_other in GRAPH_PROPERTIES:
            if gp_other != gp:
                gp_other = df_i[gp_other]
                f_stat, p_val = f_oneway(gp_other, test_metric)
                f_stats.append(f_stat)
        bin_f_stats[b] = sum(f_stats) / len(f_stats)


    # find the quantized value that produced the highest average F statistic
    max_f_stat = max(bin_f_stats, key=bin_f_stats.get)
    results[gp] = max_f_stat    
for k,v in results.items():
    print(f'Optimal {k} bin: {v}')



Optimal nvertex bin: (465.0, 468.0]
Optimal avg_degree bin: (10.48, 10.514]
Optimal feature_center_distance bin: (2.063, 2.1]
Optimal num_clusters bin: nan
Optimal cluster_size_slope bin: (0.0793, 0.0873]
Optimal power_exponent bin: nan
Optimal p_to_q_ratio bin: (29.937, 30.862]
Optimal min_deg bin: (1.999, 3.0]


0      162.0
1      248.0
2      480.0
3      263.0
4      470.0
       ...  
995    244.0
996    353.0
997    502.0
998    172.0
999    296.0
Name: num_nodes, Length: 1000, dtype: float64

In [None]:
# Compute average GNN test metric
def get_average_for_graphs(metric):
    