In [7]:
import pandas as pd
import yaml
import os
import numpy as np

# Define dataframe columns
columns = ['config_id', 'batch_norm', 'data', 'hidden_units', 'learning_rate', 
           'num_lin_layers_after', 'num_lin_layers_between', 'num_pde_layers', 
           'p_dropout', 'pde_type', 'root_path', 'skip_conn', 'time_points',
           'time_range', 'time_range_start', 'weight_decay', 
           'mean_train_loss', 'std_train_loss', 'mean_train_accuracy', 'std_train_accuracy',
           'mean_validation_loss', 'std_validation_loss', 'mean_validation_accuracy', 'std_validation_accuracy',
           'mean_training_time', 'std_training_time']

df = pd.DataFrame(columns=columns)

# Prepare the content of the new job file
new_job_content = ''

# Walk through the folders
for root, dirs, files in os.walk('ENZYMES'):
    if 'config.yml' in files:
        # Load the config file
        with open(os.path.join(root, 'config.yml'), 'r') as f:
            config = yaml.safe_load(f)

        if 'metrics.csv' in files:
            # Load the metrics file
            metrics = pd.read_csv(os.path.join(root, 'metrics.csv'))

            # Calculate the mean and std of metrics
            mean_metrics = metrics.mean()
            std_metrics = metrics.std()

            # Prepare data for new row
            data = {**config, 
                    'mean_train_loss': mean_metrics['train_loss'], 'std_train_loss': std_metrics['train_loss'],
                    'mean_train_accuracy': mean_metrics['train_accuracy'], 'std_train_accuracy': std_metrics['train_accuracy'],
                    'mean_validation_loss': mean_metrics['validation_loss'], 'std_validation_loss': std_metrics['validation_loss'],
                    'mean_validation_accuracy': mean_metrics['validation_accuracy'], 'std_validation_accuracy': std_metrics['validation_accuracy'],
                    'mean_training_time': mean_metrics['training_time'], 'std_training_time': std_metrics['training_time']}
        else:
            # No metrics file, fill with NaN
            data = {**config, 
                    'mean_train_loss': np.nan, 'std_train_loss': np.nan,
                    'mean_train_accuracy': np.nan, 'std_train_accuracy': np.nan,
                    'mean_validation_loss': np.nan, 'std_validation_loss': np.nan,
                    'mean_validation_accuracy': np.nan, 'std_validation_accuracy': np.nan,
                    'mean_training_time': np.nan, 'std_training_time': np.nan}

            # Add to the new job file content
            new_job_content += f'cd /***/***/pi/***/***/Graph_expressivity/; /***/***/pi/***/***/.conda_envs/pyg/bin/python src/cross_validate.py --config_file tuning/{os.path.join(root, "config.yml")}\n'

        # Append new row to the dataframe
        df = df.append(data, ignore_index=True)

# Write the new job file
with open('job_new.txt', 'w') as f:
    f.write(new_job_content)


In [8]:
df_sorted = df.set_index('config_id').sort_values('mean_validation_accuracy', ascending=False).drop('root_path', axis=1)

In [9]:
df_sorted.to_csv('results.csv')

In [10]:
df_sorted

Unnamed: 0_level_0,batch_norm,data,hidden_units,learning_rate,num_lin_layers_after,num_lin_layers_between,num_pde_layers,p_dropout,pde_type,skip_conn,...,mean_train_loss,std_train_loss,mean_train_accuracy,std_train_accuracy,mean_validation_loss,std_validation_loss,mean_validation_accuracy,std_validation_accuracy,mean_training_time,std_training_time
config_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
50,True,ENZYMES,256,0.0010,1,1,2,0.5,heat,False,...,0.007481,0.003053,0.998333,0.001051,2.082193,0.503430,0.748333,0.048080,965.188008,4.872584
338,True,ENZYMES,256,0.0010,1,1,2,0.5,wave,False,...,0.004969,0.002978,0.999259,0.001295,2.064162,0.614309,0.748333,0.055249,1905.400235,8.239399
132,True,ENZYMES,256,0.0001,1,1,2,0.5,heat,True,...,0.027238,0.004334,0.996667,0.002102,1.332706,0.269219,0.748333,0.042637,2473.401100,12.011554
206,True,ENZYMES,256,0.0001,1,1,2,0.5,heat,False,...,0.019094,0.003273,0.997593,0.001525,1.342130,0.325872,0.748333,0.060067,5367.975739,28.460538
360,True,ENZYMES,128,0.0010,1,1,2,0.5,wave,True,...,0.009290,0.006217,0.998333,0.001842,1.978540,0.526748,0.746667,0.035832,309.520461,2.844150
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545,False,ENZYMES,64,0.0001,1,1,2,0.0,wave,True,...,,,,,,,,,,
541,False,ENZYMES,64,0.0001,1,1,2,0.5,wave,True,...,,,,,,,,,,
308,True,ENZYMES,128,0.0001,1,1,2,0.2,wave,True,...,,,,,,,,,,
571,False,ENZYMES,256,0.0001,1,1,2,0.0,wave,False,...,,,,,,,,,,


In [11]:
df_sorted.sort_values('mean_validation_loss')

Unnamed: 0_level_0,batch_norm,data,hidden_units,learning_rate,num_lin_layers_after,num_lin_layers_between,num_pde_layers,p_dropout,pde_type,skip_conn,...,mean_train_loss,std_train_loss,mean_train_accuracy,std_train_accuracy,mean_validation_loss,std_validation_loss,mean_validation_accuracy,std_validation_accuracy,mean_training_time,std_training_time
config_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
542,True,ENZYMES,64,0.0001,1,1,2,0.5,wave,False,...,0.200849,0.019300,0.958704,0.007613,0.935575,0.224917,0.731667,0.060578,1294.142009,7.147725
334,True,ENZYMES,64,0.0001,1,1,2,0.2,wave,False,...,0.097211,0.009423,0.994074,0.002869,0.962310,0.155368,0.736667,0.043603,558.568337,2.977270
190,True,ENZYMES,64,0.0001,1,1,2,0.2,heat,False,...,0.069669,0.005109,0.997407,0.002174,0.963811,0.177358,0.728333,0.039323,663.622343,3.370397
182,True,ENZYMES,64,0.0001,1,1,2,0.5,heat,False,...,0.312108,0.026393,0.921296,0.010338,0.963833,0.162569,0.705000,0.058294,191.545076,3.362157
254,True,ENZYMES,64,0.0001,1,1,2,0.5,heat,False,...,0.257537,0.026725,0.942963,0.013348,0.972886,0.242953,0.710000,0.071233,661.055887,3.584504
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545,False,ENZYMES,64,0.0001,1,1,2,0.0,wave,True,...,,,,,,,,,,
541,False,ENZYMES,64,0.0001,1,1,2,0.5,wave,True,...,,,,,,,,,,
308,True,ENZYMES,128,0.0001,1,1,2,0.2,wave,True,...,,,,,,,,,,
571,False,ENZYMES,256,0.0001,1,1,2,0.0,wave,False,...,,,,,,,,,,
