In [1]:
import os
import yaml
import numpy as np
import pandas as pd

In [2]:
dataset_metrics = {
    'tolokers-tab': 'AP',
    'questions-tab': 'AP',
    'city-reviews': 'AP',
    'browser-games': 'accuracy',
    'hm-categories': 'accuracy',
    'web-fraud': 'AP',
    'city-roads-M': 'R2',
    'city-roads-L': 'R2',
    'avazu-devices': 'R2',
    'hm-prices': 'R2',
    'web-traffic': 'R2'
}

models = ['ResNet', 'GCN', 'GraphSAGE', 'GAT', 'GT']
models_plr = ['ResNet_plr', 'GCN_plr', 'GraphSAGE_plr', 'GAT_plr', 'GT_plr']

model_names_dict = {
    'ResNet': 'ResNet',
    'GCN': 'GCN',
    'GraphSAGE': 'GraphSAGE',
    'GAT': 'GAT',
    'GT': 'GT',
    'ResNet_plr': 'ResNet-PLR',
    'GCN_plr': 'GCN-PLR',
    'GraphSAGE_plr': 'GraphSAGE-PLR',
    'GAT_plr': 'GAT-PLR',
    'GT_plr': 'GT-PLR',
}

# Results for classification datasets

In [3]:
datasets = ['tolokers-tab', 'questions-tab', 'city-reviews', 'browser-games', 'hm-categories', 'web-fraud']

In [4]:
df_results = pd.DataFrame(columns=datasets)
df_hparams_strings = pd.DataFrame(columns=datasets)

In [5]:
for model in models:
    results = []
    results_hparams_strings = []
    for dataset in datasets:
        val_means = []
        test_strings = []
        hparams_strings = []
        for num_features_preproc in ('quantile', 'standard'):
            for dropout_p in ('0', '0.2'):
                for lr in ('3e-5', '3e-4', '3e-3'):
                    file_path = f'../experiments/{dataset}/{model}_{num_features_preproc}_dropout{dropout_p}_lr{lr}_01/metrics.yaml'
                    
                    if not os.path.isfile(file_path):
                        continue
                    
                    with open(file_path) as file:
                        metrics = yaml.safe_load(file)
                    
                    if metrics['num runs'] != 5:
                        continue
                    
                    metric = dataset_metrics[dataset]
                    
                    val_mean = metrics[f'val {metric} mean']
                    val_means.append(val_mean)
                    
                    test_mean = metrics[f'test {metric} mean']
                    test_std = metrics[f'test {metric} std']
                    
                    test_string = f'{test_mean * 100:.2f} \u00B1 {test_std * 100:.2f}'
                    test_strings.append(test_string)
                    
                    hparams_string = file_path.replace(f'../experiments/{dataset}/{model}_', '').replace('_01/metrics.yaml', '')
                    hparams_strings.append(hparams_string)
        
        if not val_means:
            raise RuntimeError(f'No successful experiments were found for dataset {dataset} '
                               f'and model {model_names_dict[model]}.')
        
        best_val_mean = max(val_means)
        best_val_id = val_means.index(best_val_mean)
        test_string = test_strings[best_val_id]
        results.append(test_string)
        hparams_string = hparams_strings[best_val_id]
        results_hparams_strings.append(hparams_string)
    
    df_results.loc[model_names_dict[model]] = results
    df_hparams_strings.loc[model_names_dict[model]] = results_hparams_strings


In [6]:
for model_plr in models_plr:
    model_orig = model_plr.replace('_plr', '')
    results = []
    results_hparams_strings = []
    for dataset in datasets:
        if dataset == 'web-fraud':
            results.append('MLE')
            results_hparams_strings.append('MLE')
            continue
        
        orig_hparams_string = df_hparams_strings[dataset][model_names_dict[model_orig]]
        _, dropout_p, lr = orig_hparams_string.split('_')
        dropout_p = dropout_p.replace('dropout', '')
        lr = lr.replace('lr', '')
        
        val_means = []
        test_strings = []
        hparams_strings = []
        for num_features_preproc in ('quantile', 'standard'):
            for freq in [0.01, 0.03, 0.1, 0.3, 1, 3, 10]:
                file_path = f'../experiments/{dataset}/{model_plr}_{num_features_preproc}_dropout{dropout_p}_lr{lr}_freq{freq}_01/metrics.yaml'

                if not os.path.isfile(file_path):
                    continue

                with open(file_path) as file:
                    metrics = yaml.safe_load(file)

                if metrics['num runs'] != 5:
                    continue

                metric = dataset_metrics[dataset]

                val_mean = metrics[f'val {metric} mean']
                val_means.append(val_mean)

                test_mean = metrics[f'test {metric} mean']
                test_std = metrics[f'test {metric} std']

                test_string = f'{test_mean * 100:.2f} \u00B1 {test_std * 100:.2f}'
                test_strings.append(test_string)

                hparams_string = file_path.replace(f'../experiments/{dataset}/{model_plr}_', '').replace('_01/metrics.yaml', '')
                hparams_strings.append(hparams_string)
        
        if not val_means:
            raise RuntimeError(f'No successful experiments were found for dataset {dataset} '
                               f'and model {model_names_dict[model_plr]}.')
        
        best_val_mean = max(val_means)
        best_val_id = val_means.index(best_val_mean)
        test_string = test_strings[best_val_id]
        results.append(test_string)
        hparams_string = hparams_strings[best_val_id]
        results_hparams_strings.append(hparams_string)
    
    df_results.loc[model_names_dict[model_plr]] = results
    df_hparams_strings.loc[model_names_dict[model_plr]] = results_hparams_strings


In [7]:
df_results

Unnamed: 0,tolokers-tab,questions-tab,city-reviews,browser-games,hm-categories,web-fraud
ResNet,45.17 ± 0.61,84.01 ± 0.26,64.33 ± 0.32,78.82 ± 0.32,70.45 ± 0.24,14.21 ± 0.24
GCN,61.09 ± 0.38,84.92 ± 0.95,71.08 ± 0.32,79.17 ± 0.41,86.42 ± 0.31,14.65 ± 0.24
GraphSAGE,57.08 ± 0.24,85.70 ± 0.30,71.15 ± 0.27,82.56 ± 0.11,86.35 ± 0.18,20.28 ± 0.48
GAT,58.77 ± 1.00,84.44 ± 0.68,71.38 ± 0.53,82.60 ± 0.26,87.84 ± 0.23,19.95 ± 0.51
GT,58.92 ± 0.57,83.59 ± 1.17,71.72 ± 0.23,83.29 ± 0.33,89.00 ± 0.23,20.19 ± 0.44
ResNet-PLR,47.86 ± 0.85,86.14 ± 0.88,65.49 ± 0.34,80.38 ± 0.51,70.24 ± 0.15,MLE
GCN-PLR,60.81 ± 0.56,88.80 ± 0.25,70.40 ± 0.58,80.50 ± 0.58,83.85 ± 0.28,MLE
GraphSAGE-PLR,60.28 ± 0.97,88.55 ± 0.48,72.26 ± 0.40,83.19 ± 0.34,86.77 ± 0.12,MLE
GAT-PLR,60.99 ± 0.82,88.69 ± 0.63,71.66 ± 0.66,83.59 ± 0.33,87.94 ± 0.20,MLE
GT-PLR,61.95 ± 0.73,82.41 ± 1.60,71.80 ± 0.21,83.26 ± 0.36,89.01 ± 0.15,MLE


In [8]:
df_hparams_strings

Unnamed: 0,tolokers-tab,questions-tab,city-reviews,browser-games,hm-categories,web-fraud
ResNet,quantile_dropout0.2_lr3e-4,quantile_dropout0_lr3e-3,quantile_dropout0.2_lr3e-5,standard_dropout0_lr3e-4,standard_dropout0_lr3e-3,quantile_dropout0.2_lr3e-4
GCN,quantile_dropout0.2_lr3e-4,quantile_dropout0_lr3e-4,quantile_dropout0_lr3e-4,standard_dropout0.2_lr3e-3,standard_dropout0.2_lr3e-3,standard_dropout0.2_lr3e-4
GraphSAGE,quantile_dropout0.2_lr3e-5,quantile_dropout0_lr3e-4,standard_dropout0.2_lr3e-5,standard_dropout0_lr3e-5,standard_dropout0.2_lr3e-4,quantile_dropout0.2_lr3e-4
GAT,quantile_dropout0.2_lr3e-4,quantile_dropout0_lr3e-4,quantile_dropout0.2_lr3e-4,standard_dropout0_lr3e-5,standard_dropout0.2_lr3e-4,quantile_dropout0.2_lr3e-3
GT,standard_dropout0_lr3e-5,quantile_dropout0.2_lr3e-3,standard_dropout0_lr3e-5,standard_dropout0_lr3e-5,quantile_dropout0.2_lr3e-4,quantile_dropout0.2_lr3e-3
ResNet-PLR,quantile_dropout0.2_lr3e-4_freq0.1,quantile_dropout0_lr3e-3_freq0.01,quantile_dropout0.2_lr3e-5_freq0.3,quantile_dropout0_lr3e-4_freq0.03,standard_dropout0_lr3e-3_freq1,MLE
GCN-PLR,quantile_dropout0.2_lr3e-4_freq1,quantile_dropout0_lr3e-4_freq0.01,quantile_dropout0_lr3e-4_freq0.1,standard_dropout0.2_lr3e-3_freq0.01,quantile_dropout0.2_lr3e-3_freq0.3,MLE
GraphSAGE-PLR,quantile_dropout0.2_lr3e-5_freq0.3,quantile_dropout0_lr3e-4_freq0.1,quantile_dropout0.2_lr3e-5_freq0.1,standard_dropout0_lr3e-5_freq0.1,quantile_dropout0.2_lr3e-4_freq0.3,MLE
GAT-PLR,quantile_dropout0.2_lr3e-4_freq0.3,quantile_dropout0_lr3e-4_freq0.1,quantile_dropout0.2_lr3e-4_freq0.1,standard_dropout0_lr3e-5_freq0.1,quantile_dropout0.2_lr3e-4_freq0.03,MLE
GT-PLR,quantile_dropout0_lr3e-5_freq0.1,quantile_dropout0.2_lr3e-3_freq10,quantile_dropout0_lr3e-5_freq0.1,standard_dropout0_lr3e-5_freq0.1,quantile_dropout0.2_lr3e-4_freq0.3,MLE


# Results for regression datasets

In [9]:
datasets = ['city-roads-M', 'city-roads-L', 'avazu-devices', 'hm-prices', 'web-traffic']

In [10]:
df_results = pd.DataFrame(columns=datasets)
df_hparams_strings = pd.DataFrame(columns=datasets)

In [11]:
for model in models:
    results = []
    results_hparams_strings = []
    for dataset in datasets:
        val_means = []
        test_strings = []
        hparams_strings = []
        for reg_target_preproc in ('none', 'standard'):
            for num_features_preproc in ('quantile', 'standard'):
                for dropout_p in ('0', '0.2'):
                    for lr in ('3e-5', '3e-4', '3e-3'):
                        file_path = f'../experiments/{dataset}/{model}_{reg_target_preproc}_{num_features_preproc}_dropout{dropout_p}_lr{lr}_01/metrics.yaml'

                        if not os.path.isfile(file_path):
                            continue

                        with open(file_path) as file:
                            metrics = yaml.safe_load(file)

                        if metrics['num runs'] != 5:
                            continue

                        metric = dataset_metrics[dataset]

                        val_mean = metrics[f'val {metric} mean']
                        val_means.append(val_mean)

                        test_mean = metrics[f'test {metric} mean']
                        test_std = metrics[f'test {metric} std']

                        test_string = f'{test_mean * 100:.2f} \u00B1 {test_std * 100:.2f}'
                        test_strings.append(test_string)
                        
                        hparams_string = file_path.replace(f'../experiments/{dataset}/{model}_', '').replace('_01/metrics.yaml', '')
                        hparams_strings.append(hparams_string)
        
        if not val_means:
            raise RuntimeError(f'No successful experiments were found for dataset {dataset} '
                               f'and model {model_names_dict[model_plr]}.')
        
        best_val_mean = max(val_means)
        best_val_id = val_means.index(best_val_mean)
        test_string = test_strings[best_val_id]
        results.append(test_string)
        hparams_string = hparams_strings[best_val_id]
        results_hparams_strings.append(hparams_string)
    
    df_results.loc[model_names_dict[model]] = results
    df_hparams_strings.loc[model_names_dict[model]] = results_hparams_strings


In [12]:
for model_plr in models_plr:
    model_orig = model_plr.replace('_plr', '')
    results = []
    results_hparams_strings = []
    for dataset in datasets:
        if dataset == 'hm-prices':
            results.append('N/A')
            results_hparams_strings.append('N/A')
            continue
        elif dataset == 'web-traffic':
            results.append('MLE')
            results_hparams_strings.append('MLE')
            continue
        
        orig_hparams_string = df_hparams_strings[dataset][model_names_dict[model_orig]]
        reg_target_preproc, _, dropout_p, lr = orig_hparams_string.split('_')
        dropout_p = dropout_p.replace('dropout', '')
        lr = lr.replace('lr', '')
        
        val_means = []
        test_strings = []
        hparams_strings = []
        for num_features_preproc in ('quantile', 'standard'):
            for freq in [0.01, 0.03, 0.1, 0.3, 1, 3, 10]:
                file_path = f'../experiments/{dataset}/{model_plr}_{reg_target_preproc}_{num_features_preproc}_dropout{dropout_p}_lr{lr}_freq{freq}_01/metrics.yaml'

                if not os.path.isfile(file_path):
                    continue

                with open(file_path) as file:
                    metrics = yaml.safe_load(file)

                if metrics['num runs'] != 5:
                    continue

                metric = dataset_metrics[dataset]

                val_mean = metrics[f'val {metric} mean']
                val_means.append(val_mean)

                test_mean = metrics[f'test {metric} mean']
                test_std = metrics[f'test {metric} std']

                test_string = f'{test_mean * 100:.2f} \u00B1 {test_std * 100:.2f}'
                test_strings.append(test_string)

                hparams_string = file_path.replace(f'../experiments/{dataset}/{model_plr}_', '').replace('_01/metrics.yaml', '')
                hparams_strings.append(hparams_string)
        
        if not val_means:
            raise RuntimeError(f'No successful experiments were found for dataset {dataset} '
                               f'and model {model_names_dict[model]}.')
        
        best_val_mean = max(val_means)
        best_val_id = val_means.index(best_val_mean)
        test_string = test_strings[best_val_id]
        results.append(test_string)
        hparams_string = hparams_strings[best_val_id]
        results_hparams_strings.append(hparams_string)
    
    df_results.loc[model_names_dict[model_plr]] = results
    df_hparams_strings.loc[model_names_dict[model_plr]] = results_hparams_strings


In [13]:
df_results

Unnamed: 0,city-roads-M,city-roads-L,avazu-devices,hm-prices,web-traffic
ResNet,70.58 ± 0.35,67.49 ± 0.09,21.60 ± 0.08,67.31 ± 0.21,73.19 ± 0.04
GCN,72.87 ± 0.21,70.92 ± 0.23,27.31 ± 0.17,77.05 ± 0.25,81.95 ± 0.08
GraphSAGE,73.35 ± 0.58,71.03 ± 0.90,27.99 ± 0.32,76.01 ± 0.47,84.04 ± 0.19
GAT,73.64 ± 0.30,71.74 ± 0.23,28.28 ± 0.54,78.02 ± 0.32,84.85 ± 0.17
GT,72.95 ± 0.47,69.98 ± 0.57,30.27 ± 0.26,78.44 ± 0.58,85.17 ± 0.17
ResNet-PLR,70.93 ± 0.23,67.58 ± 0.41,20.89 ± 0.73,,MLE
GCN-PLR,73.08 ± 0.33,70.95 ± 0.18,24.68 ± 0.12,,MLE
GraphSAGE-PLR,73.51 ± 0.37,71.97 ± 0.31,27.64 ± 0.23,,MLE
GAT-PLR,73.25 ± 0.33,71.78 ± 0.20,28.29 ± 0.36,,MLE
GT-PLR,73.09 ± 0.35,71.12 ± 0.56,29.88 ± 0.20,,MLE


In [14]:
df_hparams_strings

Unnamed: 0,city-roads-M,city-roads-L,avazu-devices,hm-prices,web-traffic
ResNet,standard_standard_dropout0_lr3e-4,standard_standard_dropout0_lr3e-4,none_standard_dropout0.2_lr3e-5,standard_quantile_dropout0_lr3e-4,none_quantile_dropout0_lr3e-4
GCN,standard_standard_dropout0_lr3e-4,standard_standard_dropout0_lr3e-4,standard_standard_dropout0_lr3e-5,standard_quantile_dropout0_lr3e-5,none_quantile_dropout0_lr3e-4
GraphSAGE,none_quantile_dropout0.2_lr3e-3,none_standard_dropout0.2_lr3e-3,none_standard_dropout0.2_lr3e-5,standard_quantile_dropout0_lr3e-5,none_quantile_dropout0_lr3e-4
GAT,standard_standard_dropout0_lr3e-4,standard_standard_dropout0_lr3e-4,standard_standard_dropout0_lr3e-5,standard_quantile_dropout0_lr3e-5,none_quantile_dropout0_lr3e-3
GT,standard_standard_dropout0_lr3e-5,none_quantile_dropout0.2_lr3e-3,standard_quantile_dropout0_lr3e-5,standard_quantile_dropout0_lr3e-5,none_quantile_dropout0_lr3e-4
ResNet-PLR,standard_standard_dropout0_lr3e-4_freq0.03,standard_quantile_dropout0_lr3e-4_freq0.03,none_quantile_dropout0.2_lr3e-5_freq0.3,,MLE
GCN-PLR,standard_quantile_dropout0_lr3e-4_freq0.1,standard_standard_dropout0_lr3e-4_freq0.01,standard_quantile_dropout0_lr3e-5_freq0.3,,MLE
GraphSAGE-PLR,none_quantile_dropout0.2_lr3e-3_freq0.3,none_quantile_dropout0.2_lr3e-3_freq1,none_standard_dropout0.2_lr3e-5_freq0.3,,MLE
GAT-PLR,standard_standard_dropout0_lr3e-4_freq1,standard_standard_dropout0_lr3e-4_freq0.01,standard_standard_dropout0_lr3e-5_freq0.3,,MLE
GT-PLR,standard_standard_dropout0_lr3e-5_freq1,none_quantile_dropout0.2_lr3e-3_freq3,standard_standard_dropout0_lr3e-5_freq0.1,,MLE
