In [1]:
import pandas as pd
import os

# Read all CSV files in the 'csv_results' folder and concatenate them into a single DataFrame
folder = 'csv_results'
df = pd.concat([pd.read_csv(os.path.join(folder, f)).assign(run=f.split('.')[0][17:]) for f in os.listdir(folder) if f.endswith('.csv')], ignore_index=True)


# Extract best result/run for each task
func = {
    'anomaly_detection': max,
    'imputation': min,
    'long_term_forecast': min,
    'short_term_forecast': min,
    'classification': max,
}

# Construct best dataframe
best_df = pd.DataFrame(columns=df.columns)
for task in df['task_name'].unique():
    df_task = df[df['task_name'] == task]
    fun = func[df_task['task_type'].values[0]]
    best_val, best_run = fun(zip(df_task['main_metric'], df_task['run']))
    best_df = pd.concat([best_df, df_task[df_task['run'] == best_run]], ignore_index=True)

best_df

Unnamed: 0,task_type,task_name,dataset,main_metric,accuracy,f_score,full_task_name,horizon,mae,mape_average,...,owa_quarterly,owa_yearly,precision,recall,smape_average,smape_monthly,smape_others,smape_quarterly,smape_yearly,run
0,anomaly_detection,anomaly_detection_SMD,SMD,0.791500,0.9812,0.7915,,,,,...,,,0.7343,0.8583,,,,,,run_2
1,anomaly_detection,anomaly_detection_PSM,PSM,0.929000,0.9629,0.9290,,,,,...,,,0.9917,0.8737,,,,,,run_8
2,anomaly_detection,anomaly_detection_MSL,MSL,0.822400,0.9657,0.8224,,,,,...,,,0.9072,0.7520,,,,,,run_5
3,anomaly_detection,anomaly_detection_SMAP,SMAP,0.829400,0.9602,0.8294,,,,,...,,,0.9182,0.7563,,,,,,run_8
4,anomaly_detection,anomaly_detection_SWAT,SWAT,0.888900,0.9755,0.8889,,,,,...,,,0.9896,0.8068,,,,,,run_8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,long_term_forecast,long_term_forecast_weather_336,weather,0.284781,,,long_term_forecast_weather_96_336_EST_custom_f...,336.0,0.307564,,...,,,,,,,,,,run_6
63,long_term_forecast,long_term_forecast_weather_720,weather,0.410082,,,long_term_forecast_weather_96_720_EST_custom_f...,720.0,0.439835,,...,,,,,,,,,,run_5
64,long_term_forecast,long_term_forecast_ETTm2_720,ETTm2,0.438327,,,long_term_forecast_ETTm2_96_720_EST_ETTm2_ftM_...,720.0,0.419636,,...,,,,,,,,,,run_9
65,long_term_forecast,long_term_forecast_Exchange_720,Exchange,0.904805,,,long_term_forecast_Exchange_96_720_EST_custom_...,720.0,0.725382,,...,,,,,,,,,,run_6


In [2]:
best_df.run.value_counts()

run_6    14
run_9    13
run_5     8
run_1     8
run_2     7
run_7     7
run_4     6
run_8     3
run_3     1
Name: run, dtype: int64

In [3]:
best_df.groupby('task_type')['main_metric'].mean()

task_type
anomaly_detection      0.852240
classification         0.733393
imputation             0.134114
long_term_forecast     0.774613
short_term_forecast    0.944167
Name: main_metric, dtype: float64

In [5]:
best_df.groupby('task_type')['run'].value_counts()

task_type            run  
anomaly_detection    run_8     3
                     run_2     1
                     run_5     1
classification       run_5     3
                     run_9     2
                     run_1     1
                     run_3     1
                     run_4     1
                     run_6     1
                     run_7     1
imputation           run_1     6
                     run_5     3
                     run_4     2
                     run_9     1
long_term_forecast   run_6    13
                     run_7     6
                     run_9     6
                     run_2     4
                     run_4     3
                     run_1     1
                     run_5     1
short_term_forecast  run_9     4
                     run_2     2
Name: run, dtype: int64

In [6]:
best_df['run'].value_counts()

run_6    14
run_9    13
run_5     8
run_1     8
run_2     7
run_7     7
run_4     6
run_8     3
run_3     1
Name: run, dtype: int64