In [1]:
import pandas as pd
import yaml
import os
import numpy as np

def aggr(data_name):
    # Define dataframe columns
    columns = ['config_id', 'batch_norm', 'data', 'hidden_units', 'learning_rate', 
            'num_lin_layers_after', 'num_lin_layers_between', 'num_pde_layers', 
            'p_dropout', 'pde_type', 'root_path', 'skip_conn', 'time_points',
            'time_range', 'time_range_start', 'weight_decay', 
            'mean_train_loss', 'std_train_loss', 'mean_train_accuracy', 'std_train_accuracy',
            'mean_validation_loss', 'std_validation_loss', 'mean_validation_accuracy', 'std_validation_accuracy',
            'mean_training_time', 'std_training_time']

    df = pd.DataFrame(columns=columns)

    # Prepare the content of the new job file
    new_job_content = ''

    # Walk through the folders
    for root, dirs, files in os.walk(data_name):
        if 'config.yml' in files:
            # Load the config file
            with open(os.path.join(root, 'config.yml'), 'r') as f:
                config = yaml.safe_load(f)

            if 'metrics.csv' in files:
                # Load the metrics file
                metrics = pd.read_csv(os.path.join(root, 'metrics.csv'))

                # Calculate the mean and std of metrics
                mean_metrics = metrics.mean()
                std_metrics = metrics.std()

                # Prepare data for new row
                data = {**config, 
                        'mean_train_loss': mean_metrics['train_loss'], 'std_train_loss': std_metrics['train_loss'],
                        'mean_train_accuracy': mean_metrics['train_accuracy'], 'std_train_accuracy': std_metrics['train_accuracy'],
                        'mean_validation_loss': mean_metrics['validation_loss'], 'std_validation_loss': std_metrics['validation_loss'],
                        'mean_validation_accuracy': mean_metrics['validation_accuracy'], 'std_validation_accuracy': std_metrics['validation_accuracy'],
                        'mean_training_time': mean_metrics['training_time'], 'std_training_time': std_metrics['training_time']}
            else:
                # No metrics file, fill with NaN
                data = {**config, 
                        'mean_train_loss': np.nan, 'std_train_loss': np.nan,
                        'mean_train_accuracy': np.nan, 'std_train_accuracy': np.nan,
                        'mean_validation_loss': np.nan, 'std_validation_loss': np.nan,
                        'mean_validation_accuracy': np.nan, 'std_validation_accuracy': np.nan,
                        'mean_training_time': np.nan, 'std_training_time': np.nan}

                # Add to the new job file content
                new_job_content += f'cd /gpfs/gibbs/pi/***/***/Graph_expressivity/; /gpfs/gibbs/pi/***/***/.conda_envs/pyg/bin/python src/cross_validate.py --config_file classification_experiments/{os.path.join(root, "config.yml")}\n'

            # Append new row to the dataframe
            df = df.append(data, ignore_index=True)

    # Write the new job file
    with open(f'job_unfinished_{data_name}.txt', 'w') as f:
        f.write(new_job_content)
    
    return df

In [2]:
df = aggr("ENZYMES")
df_sorted = df.set_index('config_id').sort_values('mean_validation_accuracy', ascending=False).drop('root_path', axis=1)
df_sorted[['mean_train_accuracy', 'mean_validation_accuracy']]

Unnamed: 0_level_0,mean_train_accuracy,mean_validation_accuracy
config_id,Unnamed: 1_level_1,Unnamed: 2_level_1
39,0.995556,0.743333
30,0.996667,0.73
36,0.996852,0.726667
27,0.966111,0.718333
24,0.959815,0.701667
0,0.764074,0.638333
18,0.77,0.636667
3,0.772963,0.63
21,0.766111,0.615
9,0.598519,0.541667


In [3]:
df = aggr("MUTAG")
df_sorted = df.set_index('config_id').sort_values('mean_validation_accuracy', ascending=False).drop('root_path', axis=1)
df_sorted[['mean_train_accuracy', 'mean_validation_accuracy']]

Unnamed: 0_level_0,mean_train_accuracy,mean_validation_accuracy
config_id,Unnamed: 1_level_1,Unnamed: 2_level_1
35,0.979307,0.792982
38,0.904831,0.787427
23,0.78961,0.77193
5,0.78489,0.766082
32,0.965736,0.75614
2,0.784876,0.755556
41,0.927887,0.750877
20,0.789607,0.749708
26,0.834501,0.744737
11,0.763603,0.744444


In [4]:
df = aggr("PROTEINS")
df_sorted = df.set_index('config_id').sort_values('mean_validation_accuracy', ascending=False).drop('root_path', axis=1)
df_sorted[['mean_train_accuracy', 'mean_validation_accuracy']]

Unnamed: 0_level_0,mean_train_accuracy,mean_validation_accuracy
config_id,Unnamed: 1_level_1,Unnamed: 2_level_1
25,0.764602,0.725169
28,0.774785,0.721493
7,0.703204,0.713401
13,0.701608,0.712516
10,0.726867,0.710746
1,0.746033,0.705317
16,0.719477,0.704456
19,0.745035,0.69992
4,0.74164,0.698118
22,0.744136,0.694514


In [5]:
df_sorted.to_csv('results.csv')

In [6]:
df_sorted

Unnamed: 0_level_0,batch_norm,data,hidden_units,learning_rate,num_lin_layers_after,num_lin_layers_between,num_pde_layers,p_dropout,pde_type,skip_conn,...,std_train_accuracy,mean_validation_loss,std_validation_loss,mean_validation_accuracy,std_validation_accuracy,mean_training_time,std_training_time,batch_size,data_path,num_epochs
config_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
25,True,PROTEINS,256,0.001,1,1,2,0.5,heat,False,...,0.009769,0.55624,0.071511,0.725169,0.054025,530.279498,6.964475,2048.0,data/,100.0
28,True,PROTEINS,256,0.001,1,1,2,0.5,wave,False,...,0.009537,0.564907,0.068307,0.721493,0.046729,1032.238837,13.263349,2048.0,data/,100.0
7,True,PROTEINS,64,0.0001,1,1,2,0.5,heat,False,...,0.015051,0.589981,0.056797,0.713401,0.056793,137.226514,1.706651,2048.0,data/,100.0
13,True,PROTEINS,64,0.0001,1,1,2,0.5,wave,False,...,0.009928,0.588458,0.056289,0.712516,0.055083,270.801891,3.477869,2048.0,data/,100.0
10,True,PROTEINS,128,0.0001,1,1,2,0.5,heat,False,...,0.009551,0.582533,0.060418,0.710746,0.058551,264.539569,3.340693,2048.0,data/,100.0
1,True,PROTEINS,256,0.0001,1,1,2,0.5,heat,False,...,0.010171,0.57431,0.060801,0.705317,0.053057,531.634075,6.746366,,data/,
16,True,PROTEINS,128,0.0001,1,1,2,0.5,wave,False,...,0.012428,0.583832,0.059916,0.704456,0.065903,498.639368,6.427528,2048.0,data/,100.0
19,True,PROTEINS,256,0.0001,1,1,2,0.5,heat,False,...,0.010733,0.576729,0.061278,0.69992,0.043559,520.104147,6.728732,2048.0,data/,100.0
4,True,PROTEINS,256,0.0001,1,1,2,0.5,wave,False,...,0.01061,0.581335,0.06292,0.698118,0.048896,1043.071087,14.358832,,data/,
22,True,PROTEINS,256,0.0001,1,1,2,0.5,wave,False,...,0.006747,0.586983,0.062609,0.694514,0.056991,1034.61874,13.590537,2048.0,data/,100.0
