In [1]:

import pandas as pd
import matplotlib.pyplot as plt
import re
import numpy as np
import os 
import sys 

current_path = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_path, '..','..','..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)
    
from pipeline.plotting.TS_analysis import plot_TS
from jupyter_ipynb.NetMob_training_analysis.plotting import plot_boxplot_on_metric
from experiences.convert_df_to_latex import dataframe_to_latex,parse_index_exp1_2,parse_index_exp4



folder_path = 'save/K_fold_validation/training_wo_HP_tuning'

In [2]:
def format_results_to_dataframe(results_string):
    """
    Cette fonction transforme une chaîne de caractères de résultats en un DataFrame pandas,
    en extrayant la configuration, le RMSE, le MAE et le MASE pour chaque ligne.
    """
    
    data = []
    
    for line in results_string.strip().split('\n'):
        if not line.strip():
            continue

        parts = line.split(':')
        name = parts[0].strip()
        metrics_str = parts[1].strip()

        # Extraire la configuration
        config_match = re.search(r'STAEformer_bike_out_(.*?)__e200', name)
        if config_match:
            config = config_match.group(1).strip('_')
            if not config:
                config = 'bike_out'
        else:
            config = 'baseline'

        # Extraire les métriques
        rmse_match = re.search(r'RMSE = ([\d.]+)', metrics_str)
        mae_match = re.search(r'MAE = ([\d.]+)', metrics_str)
        mase_match = re.search(r'MASE = ([\d.]+)', metrics_str)
        
        rmse = float(rmse_match.group(1)) if rmse_match else None
        mae = float(mae_match.group(1)) if mae_match else None
        mase = float(mase_match.group(1)) if mase_match else None
        
        data.append({
            'config': config,
            'RMSE': rmse,
            'MAE': mae,
            'MASE': mase
        })
        
    df = pd.DataFrame(data)
    return df

from experiences.contextual_data_integration.exp2_rainy_results import results
df = format_results_to_dataframe(results) 
df = df.groupby('config').agg(['mean','std'])
display(df)

baseline_index  = 'baseline'
df = df.sort_values(by=[('RMSE','mean')])
for c in df.columns:
    if 'mean' == c[1]:
        x = df[c].iloc[0]
        df[c] = df[c].apply(lambda x: f"{round(x,2)} ({round(100*(-1+x/df.loc[baseline_index,c]),2)}\%)")
df.index = [f"{c}__e200" for c in df.index]
df.columns = pd.MultiIndex.from_tuples([(x[0].lower() + '_h4',x[1]) for x in df.columns])
display(df)
caption = {r"Prediction of Bike-out (15min agg) at horizon [+45,+60min] with STAEformer using contextual data 'Weather' according to different Integration Strategies. Metrics are averaged over 5 runs."}
print("%%% LaTeX code for Experiment 1 Table %%%\n")
latex_table_1 = dataframe_to_latex(df, caption, "exp2_rainy", parse_index_exp1_2)
print(latex_table_1)

Unnamed: 0_level_0,RMSE,RMSE,MAE,MAE,MASE,MASE
Unnamed: 0_level_1,mean,std,mean,std,mean,std
config,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
baseline,5.1582,0.064348,3.0982,0.035801,0.6512,0.007328
weather_calendar_early_fusion_repeat_t_proj,4.8584,0.108154,2.9678,0.054614,0.6238,0.011498
weather_calendar_early_fusion_s_proj_t_proj,4.9598,0.079074,3.02,0.036688,0.6348,0.007662
weather_calendar_late_fusion_repeat_t_proj,4.9214,0.051578,3.0122,0.030963,0.6332,0.00638
weather_calendar_late_fusion_s_proj_t_proj,4.9604,0.06309,3.0188,0.023059,0.6346,0.00493


Unnamed: 0_level_0,rmse_h4,rmse_h4,mae_h4,mae_h4,mase_h4,mase_h4
Unnamed: 0_level_1,mean,std,mean,std,mean,std
weather_calendar_early_fusion_repeat_t_proj__e200,4.86 (-5.81\%),0.108154,2.97 (-4.21\%),0.054614,0.62 (-4.21\%),0.011498
weather_calendar_late_fusion_repeat_t_proj__e200,4.92 (-4.59\%),0.051578,3.01 (-2.78\%),0.030963,0.63 (-2.76\%),0.00638
weather_calendar_early_fusion_s_proj_t_proj__e200,4.96 (-3.85\%),0.079074,3.02 (-2.52\%),0.036688,0.63 (-2.52\%),0.007662
weather_calendar_late_fusion_s_proj_t_proj__e200,4.96 (-3.83\%),0.06309,3.02 (-2.56\%),0.023059,0.63 (-2.55\%),0.00493
baseline__e200,5.16 (0.0\%),0.064348,3.1 (0.0\%),0.035801,0.65 (0.0\%),0.007328


%%% LaTeX code for Experiment 1 Table %%%

\begin{table}[!htb]
    \centering
    \caption{{"Prediction of Bike-out (15min agg) at horizon [+45,+60min] with STAEformer using contextual data 'Weather' according to different Integration Strategies. Metrics are averaged over 5 runs."}}
    \label{tab:exp2_rainy}
    \resizebox{\textwidth}{!}{
    \begin{tabular}{{llcccccc}}
        \toprule
        Contextual Data & Integration Strategy & \multicolumn{2}{c}{ MAE (h4) } & \multicolumn{2}{c}{ MASE (h4) } & \multicolumn{2}{c}{ RMSE (h4) } \\
        \cmidrule(lr){1-2} \cmidrule(lr){3-4} \cmidrule(lr){5-6} \cmidrule(lr){7-8}
         &  & Mean & Std & Mean & Std & Mean & Std \\
        \midrule
        Yes & Early Fusion Repeat-T-Proj & 2.97 (-4.21\%) & 0.0546 & 0.62 (-4.21\%) & 0.0115 & 4.86 (-5.81\%) & 0.1082 \\
        Yes & Late Fusion Repeat-T-Proj & 3.01 (-2.78\%) & 0.0310 & 0.63 (-2.76\%) & 0.0064 & 4.92 (-4.59\%) & 0.0516 \\
        Yes & Early Fusion-S-Proj-T-Proj & 3.02 (-2.52\%) & 