In [1]:
import pandas as pd
import os

# Read all CSV files in the 'csv_results' folder and concatenate them into a single DataFrame
folder = 'csv_results'
df = pd.concat([pd.read_csv(os.path.join(folder, f)).assign(run=f.split('.')[0][17:]) for f in os.listdir(folder) if f.endswith('.csv')], ignore_index=True)


# Extract best result/run for each task
func = {
    'anomaly_detection': max,
    'imputation': min,
    'long_term_forecast': min,
    'short_term_forecast': min,
    'classification': max,
}

# Construct best dataframe
best_df = pd.DataFrame(columns=df.columns)
for task in df['task_name'].unique():
    df_task = df[df['task_name'] == task]
    fun = func[df_task['task_type'].values[0]]
    best_val, best_run = fun(zip(df_task['main_metric'], df_task['run']))
    best_df = pd.concat([best_df, df_task[df_task['run'] == best_run]], ignore_index=True)

best_df

Unnamed: 0,task_type,task_name,dataset,main_metric,accuracy,f_score,full_task_name,horizon,mae,mape_average,...,owa_quarterly,owa_yearly,precision,recall,smape_average,smape_monthly,smape_others,smape_quarterly,smape_yearly,run
0,anomaly_detection,anomaly_detection_SMD,SMD,0.791500,0.9812,0.7915,,,,,...,,,0.7343,0.8583,,,,,,run_2
1,anomaly_detection,anomaly_detection_PSM,PSM,0.928300,0.9626,0.9283,,,,,...,,,0.9925,0.8719,,,,,,run_4
2,anomaly_detection,anomaly_detection_MSL,MSL,0.822400,0.9657,0.8224,,,,,...,,,0.9072,0.7520,,,,,,run_5
3,anomaly_detection,anomaly_detection_SMAP,SMAP,0.779900,0.9508,0.7799,,,,,...,,,0.9125,0.6809,,,,,,run_4
4,anomaly_detection,anomaly_detection_SWAT,SWAT,0.854300,0.9686,0.8543,,,,,...,,,0.9792,0.7576,,,,,,run_7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,long_term_forecast,long_term_forecast_weather_336,weather,0.284781,,,long_term_forecast_weather_96_336_EST_custom_f...,336.0,0.307564,,...,,,,,,,,,,run_6
63,long_term_forecast,long_term_forecast_weather_720,weather,0.410082,,,long_term_forecast_weather_96_720_EST_custom_f...,720.0,0.439835,,...,,,,,,,,,,run_5
64,long_term_forecast,long_term_forecast_ETTm2_720,ETTm2,3.022007,,,long_term_forecast_ETTm2_96_720_EST_ETTm2_ftM_...,720.0,1.396858,,...,,,,,,,,,,run_5
65,long_term_forecast,long_term_forecast_Exchange_720,Exchange,0.904805,,,long_term_forecast_Exchange_96_720_EST_custom_...,720.0,0.725382,,...,,,,,,,,,,run_6


In [2]:
best_df.run.value_counts()

run_6    18
run_4    11
run_5    11
run_1    10
run_7     9
run_2     7
run_3     1
Name: run, dtype: int64

In [None]:
best_df.groupby('task_type')['main_metric'].mean()

task_type
anomaly_detection      0.835280
classification         0.731833
imputation             0.135620
long_term_forecast     0.860197
short_term_forecast    0.954333
Name: main_metric, dtype: float64

In [4]:
best_df.groupby('task_type')['run'].value_counts()

task_type            run  
anomaly_detection    run_4     2
                     run_2     1
                     run_5     1
                     run_7     1
classification       run_5     5
                     run_1     1
                     run_3     1
                     run_4     1
                     run_6     1
                     run_7     1
imputation           run_1     7
                     run_5     3
                     run_4     2
long_term_forecast   run_6    17
                     run_7     7
                     run_2     4
                     run_4     3
                     run_5     2
                     run_1     1
short_term_forecast  run_4     3
                     run_2     2
                     run_1     1
Name: run, dtype: int64