In [1]:
import os 
import pandas as pd 
import sys 
import re 

current_path = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_path,'..','..'))  # '..','..','..'
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)
    
from jupyter_ipynb.NetMob_training_analysis.plotting import plot_boxplot_on_metric
from experiences.convert_df_to_latex import update_df_metrics
from experiences.convert_df_to_latex import tackle_trial_j as tackle_trial_for_distrib
from experiences.convert_df_to_latex import dataframe_to_latex,parse_index_exp1_2

from experiences.benchmarking.RESULTS_benchmark_from_init_config import results
folder_path = '../../save/K_fold_validation/training_wo_HP_tuning'  # '../../save'


import re
import pandas as pd

def results_to_dataframe(results_string: str) -> pd.DataFrame:
    """
    Transforme les chaînes de caractères de résultats en DataFrame.
    Sépare Modèle, Target, Context, Epoch, Horizon, Bis et Métriques.
    """
    data = []
    columns = ['Model', 'Target', 'Context', 'Epoch', 'Horizon', 'Bis', 'RMSE', 'MAE', 'MASE']
    
    for line in results_string.strip().split('\n'):
        if not line.strip():
            continue
            
        name_part, metrics_part = [p.strip() for p in line.split(':')]
        
        # --- Extraction de la configuration (avant __) ---
        config_str = name_part.split('__')[0]
        # On suppose le format : Modele_Target_Context
        # Target spécifique pour identifier la coupure
        target_match = re.search(r'(subway_in|subway_out|bike_in|bike_out)', config_str)
        target = target_match.group(1) if target_match else "unknown"
        
        model = config_str.split(f'_{target}')[0]
        context = config_str.split(f'{target}_')[1] if f'{target}_' in config_str else "None"
        
        # --- Extraction des paramètres (après __) ---
        params_str = name_part.split('__')[1]
        epoch = int(re.search(r'e(\d+)', params_str).group(1)) if re.search(r'e(\d+)', params_str) else 0
        horizon = int(re.search(r'h(\d+)', params_str).group(1)) if re.search(r'h(\d+)', params_str) else 0
        bis = int(re.search(r'bis(\d+)', params_str).group(1)) if re.search(r'bis(\d+)', params_str) else 0
        
        # --- Extraction des métriques ---
        def get_metric(m_name):
            match = re.search(fr'{m_name}\s*=\s*([\d.]+)', metrics_part)
            return float(match.group(1)) if match else -1.0

        data.append([
            model, target, context, epoch, horizon, bis,
            get_metric('RMSE'), get_metric('MAE'), get_metric('MASE')
        ])
        
    return pd.DataFrame(data, columns=columns)

df_results = results_to_dataframe(results)
display(df_results)

Unnamed: 0,Model,Target,Context,Epoch,Horizon,Bis,RMSE,MAE,MASE
0,DCRNN,subway_in,calendar_embedding,500,4,1,49.062,27.983,0.838
1,DCRNN,subway_in,calendar_embedding,500,4,2,48.361,27.470,0.822
2,DCRNN,subway_in,calendar_embedding,500,4,3,49.613,27.908,0.836
3,DCRNN,subway_in,calendar_embedding,500,4,4,49.550,27.899,0.835
4,DCRNN,subway_in,calendar_embedding,500,4,5,49.364,27.909,0.836
...,...,...,...,...,...,...,...,...,...
115,STAEformer,subway_in,,500,1,1,36.249,20.943,0.627
116,STAEformer,subway_in,,500,1,2,36.474,20.984,0.628
117,STAEformer,subway_in,,500,1,3,36.230,20.924,0.626
118,STAEformer,subway_in,,500,1,4,36.697,21.158,0.633


In [2]:
def convert_context(context):
    if context == 'calendar_embedding':
        return 'Calendar'
    elif context == 'calendar':
        return 'Calendar'
    else:
        return context


palette = ['#4e79a7','#f28e2b','#e15759', '#59a14f', '#c7e9c0','#1a4314' ]
legend_groups = ['Baseline','Independant Embedding','Shared Embedding','Early Fusion Other Methods ','Late Fusion Other Methods ']
targets = df_results['Target'].unique()
df_results['id'] = df_results.apply(lambda row: f"{row['Model']} - {convert_context(row['Context'])}" if row['Context'] != 'None' else row['Model'],axis=1)

horizons = df_results.Horizon.unique()
for horizon in horizons:
    df_horizon = df_results[df_results['Horizon'] == horizon].copy()
    df_horizon['legend_group'] = df_horizon['Model']

    for metric_i in ['RMSE','MAE','MASE']:
        title = f'{metric_i} Distribution per Config and Fusion Strategy of Subway-In Prediction at Horizon [{15*(int(horizon)-1)} - {15*(int(horizon))}]min'
        plot_boxplot_on_metric(df_horizon, metric_i=metric_i, xaxis_label="Config", legend_group='legend_group', width=800, height=600, 
                                save_path=None,palette = palette ,
                                legend_groups =df_horizon['Model'].unique().tolist(),
                                title = title)
        

    # ---- Display performance in a Latex Table: ---
    df_agg = df_horizon[['id','Epoch','Horizon','RMSE', 'MAE',
       'MASE']].groupby(['id','Horizon']).agg(['mean','std']).copy()
                                              
    caption = '<fill caption>'
    label = '<fill label>'

    index_parser = lambda x : parse_index_exp1_2(x,contextual= 'calendar')
    print(dataframe_to_latex(df_agg,
                    caption = caption,
                    label =  label,
                    index_parser = index_parser,
                    horizon = df_agg.index[0][1]
                    ) )    
    # -----------------------------------------------










\begin{table}[!htb]
    \centering
    \caption{<fill caption>}
    \label{tab:<fill label>}
    \resizebox{\textwidth}{!}{
    \begin{tabular}{{llcccccccc}}
        \toprule
        Contextual Data & Integration Strategy & \multicolumn{2}{c}{ EPOCH (h4) } & \multicolumn{2}{c}{ RMSE (h4) } & \multicolumn{2}{c}{ MAE (h4) } & \multicolumn{2}{c}{ MASE (h4) } \\
        \cmidrule(lr){1-2} \cmidrule(lr){3-4} \cmidrule(lr){5-6} \cmidrule(lr){7-8} \cmidrule(lr){9-10}
         &  & Mean & Std & Mean & Std & Mean & Std & Mean & Std \\
        \midrule
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 62.69539999999999 & 1.4361 & 35.1426 & 0.7728 & 1.0522 & 0.0232 \\
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 49.19 & 0.5105 & 27.833800000000004 & 0.2062 & 0.8333999999999999 & 0.0065 \\
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 66.3492 & 0.6883 & 37.294200000000004 & 0.2487 & 1.1164 & 0.0072 \\
        \midrule
        







\begin{table}[!htb]
    \centering
    \caption{<fill caption>}
    \label{tab:<fill label>}
    \resizebox{\textwidth}{!}{
    \begin{tabular}{{llcccccccc}}
        \toprule
        Contextual Data & Integration Strategy & \multicolumn{2}{c}{ EPOCH (h1) } & \multicolumn{2}{c}{ RMSE (h1) } & \multicolumn{2}{c}{ MAE (h1) } & \multicolumn{2}{c}{ MASE (h1) } \\
        \cmidrule(lr){1-2} \cmidrule(lr){3-4} \cmidrule(lr){5-6} \cmidrule(lr){7-8} \cmidrule(lr){9-10}
         &  & Mean & Std & Mean & Std & Mean & Std & Mean & Std \\
        \midrule
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 44.1104 & 0.1719 & 25.8322 & 0.1641 & 0.7736000000000001 & 0.0048 \\
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 39.48780000000001 & 0.1535 & 23.115199999999998 & 0.0669 & 0.6921999999999999 & 0.0022 \\
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 45.4842 & 0.1642 & 26.6094 & 0.0642 & 0.7966 & 0.0019 \\
        \midrule
     