In [1]:

import pandas as pd
import matplotlib.pyplot as plt
import re
import numpy as np
import os 
import sys 

current_path = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_path, '..','..','..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)
    
from pipeline.plotting.TS_analysis import plot_TS
from jupyter_ipynb.NetMob_training_analysis.plotting import plot_boxplot_on_metric
from experiences.convert_df_to_latex import dataframe_to_latex,parse_index_exp1_2,parse_index_exp4



folder_path = 'save/K_fold_validation/training_wo_HP_tuning'

In [7]:
def format_results_to_dataframe(results_string):
    """
    Cette fonction transforme une chaîne de caractères de résultats en un DataFrame pandas,
    en extrayant la configuration, le RMSE, le MAE et le MASE pour chaque ligne.
    """
    
    data = []
    
    for line in results_string.strip().split('\n'):
        if not line.strip():
            continue

        parts = line.split(':')
        name = parts[0].strip()
        metrics_str = parts[1].strip()

        # Extraire la configuration
        config_match = re.search(r'STAEformer_bike_out_weather_calendar_(.*?)__e200', name)
        if config_match:
            config = config_match.group(1).strip('_')
            if not config:
                config = 'bike_out'
        else:
            config = 'baseline'

        # Extraire les métriques
        rmse_match = re.search(r'RMSE = ([\d.]+)', metrics_str)
        mae_match = re.search(r'MAE = ([\d.]+)', metrics_str)
        mase_match = re.search(r'MASE = ([\d.]+)', metrics_str)
        
        rmse = float(rmse_match.group(1)) if rmse_match else None
        mae = float(mae_match.group(1)) if mae_match else None
        mase = float(mase_match.group(1)) if mase_match else None
        
        data.append({
            'config': config,
            'RMSE': rmse,
            'MAE': mae,
            'MASE': mase
        })
        
    df = pd.DataFrame(data)
    return df

from experiences.contextual_data_integration.exp2_results import results 
# from experiences.contextual_data_integration.exp2_rainy_results import results

df_init = format_results_to_dataframe(results) 

df = df_init.groupby('config').agg(['mean','std'])
baseline_index  = 'baseline'
df = df.sort_values(by=[('RMSE','mean')])
for c in df.columns:
    if 'mean' == c[1]:
        x = df[c].iloc[0]
        df[c] = df[c].apply(lambda x: f"{round(x,3)} ({round(100*(-1+x/df.loc[baseline_index,c]),2)}\%)")
df.index = [f"{c}__e200" for c in df.index]
df.columns = pd.MultiIndex.from_tuples([(x[0].lower() + '_h4',x[1]) for x in df.columns])
display(df)
caption = {r"Prediction of Bike-out (15min agg) at horizon [+45,+60min] with STAEformer using contextual data 'Weather' according to different Integration Strategies. Metrics are averaged over 5 runs."}
print("%%% LaTeX code for Experiment 1 Table %%%\n")
latex_table_1 = dataframe_to_latex(df, caption, "exp2_rainy", parse_index_exp1_2)
print(latex_table_1)

Unnamed: 0_level_0,rmse_h4,rmse_h4,mae_h4,mae_h4,mase_h4,mase_h4
Unnamed: 0_level_1,mean,std,mean,std,mean,std
late_fusion_repeat_t_proj__e200,4.831 (-1.67\%),0.012276,3.063 (-0.41\%),0.019191,0.795 (-0.38\%),0.004817
late_fusion_s_proj_t_proj__e200,4.844 (-1.41\%),0.027249,3.053 (-0.73\%),0.01055,0.793 (-0.7\%),0.002408
early_fusion_repeat_t_proj__e200,4.885 (-0.58\%),0.033201,3.07 (-0.16\%),0.02272,0.797 (-0.15\%),0.005958
baseline__e200,4.913 (0.0\%),0.02241,3.075 (0.0\%),0.012911,0.798 (0.0\%),0.003347
early_fusion_s_proj_t_proj__e200,4.916 (0.05\%),0.031681,3.092 (0.54\%),0.018102,0.803 (0.58\%),0.004494


%%% LaTeX code for Experiment 1 Table %%%

\begin{table}[!htb]
    \centering
    \caption{{"Prediction of Bike-out (15min agg) at horizon [+45,+60min] with STAEformer using contextual data 'Weather' according to different Integration Strategies. Metrics are averaged over 5 runs."}}
    \label{tab:exp2_rainy}
    \resizebox{\textwidth}{!}{
    \begin{tabular}{{llcccccc}}
        \toprule
        Contextual Data & Integration Strategy & \multicolumn{2}{c}{ MAE (h4) } & \multicolumn{2}{c}{ MASE (h4) } & \multicolumn{2}{c}{ RMSE (h4) } \\
        \cmidrule(lr){1-2} \cmidrule(lr){3-4} \cmidrule(lr){5-6} \cmidrule(lr){7-8}
         &  & Mean & Std & Mean & Std & Mean & Std \\
        \midrule
        \midrule
        No & Baseline (No Context) & 3.063 (-0.41\%) & 0.0192 & 0.795 (-0.38\%) & 0.0048 & 4.831 (-1.67\%) & 0.0123 \\
        \midrule
        No & Baseline (No Context) & 3.053 (-0.73\%) & 0.0105 & 0.793 (-0.7\%) & 0.0024 & 4.844 (-1.41\%) & 0.0272 \\
        \midrule
        No & Ba

In [3]:
# from experiences.contextual_data_integration.exp2_rainy_results import results 
from experiences.contextual_data_integration.exp2_results import results 
df_init = format_results_to_dataframe(results) 

def f_replace(x):
    x = x.replace('early_fusion','EarlyFusion')
    x = x.replace('late_fusion','LateFusion')
    x = x.replace('s_proj_t_proj','SProjTproj')
    x = x.replace('repeat_t_proj','RepeatTproj')
    return x

df_init['config'] = df_init['config'].apply(f_replace)
df_init['id'] = df_init['config']
plot_boxplot_on_metric(df_init, metric_i='MAE', xaxis_label="Config", legend_group='config', width=1000, height=400, 
                            save_path=None)
plot_boxplot_on_metric(df_init, metric_i='RMSE', xaxis_label="Config", legend_group='config', width=1000, height=400, 
                            save_path=None)



