In [1]:
import os 
import pandas as pd 
import sys 
import re 

current_path = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_path,'..','..'))  # '..','..','..'
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)
    
from jupyter_ipynb.NetMob_training_analysis.plotting import plot_boxplot_on_metric
from experiences.convert_df_to_latex import update_df_metrics
from experiences.convert_df_to_latex import tackle_trial_j as tackle_trial_for_distrib
from experiences.convert_df_to_latex import dataframe_to_latex,parse_index_exp1_2

from experiences.benchmarking.RESULTS_benchmark_from_init_config import results as results_from_init
from experiences.benchmarking.RESULTS_benchmark import results as results
folder_path = '../../save/K_fold_validation/training_wo_HP_tuning'  # '../../save'


import re
import pandas as pd

def results_to_dataframe(results_string: str) -> pd.DataFrame:
    """
    Transforme les chaînes de caractères de résultats en DataFrame.
    Sépare Modèle, Target, Context, Epoch, Horizon, Bis et Métriques.
    """
    data = []
    columns = ['Model', 'Target', 'Context', 'Epoch', 'Horizon', 'Bis', 'RMSE', 'MAE', 'MASE']
    
    for line in results_string.strip().split('\n'):
        if not line.strip():
            continue
            
        name_part, metrics_part = [p.strip() for p in line.split(':')]
        
        # --- Extraction de la configuration (avant __) ---
        config_str = name_part.split('__')[0]
        # On suppose le format : Modele_Target_Context
        # Target spécifique pour identifier la coupure
        target_match = re.search(r'(subway_in|subway_out|bike_in|bike_out)', config_str)
        target = target_match.group(1) if target_match else "unknown"
        
        model = config_str.split(f'_{target}')[0]
        context = config_str.split(f'{target}_')[1] if f'{target}_' in config_str else "None"
        
        # --- Extraction des paramètres (après __) ---
        params_str = name_part.split('__')[1]
        epoch = int(re.search(r'e(\d+)', params_str).group(1)) if re.search(r'e(\d+)', params_str) else 0
        horizon = int(re.search(r'h(\d+)', params_str).group(1)) if re.search(r'h(\d+)', params_str) else 0
        bis = int(re.search(r'bis(\d+)', params_str).group(1)) if re.search(r'bis(\d+)', params_str) else 0
        
        # --- Extraction des métriques ---
        def get_metric(m_name):
            match = re.search(fr'{m_name}\s*=\s*([\d.]+)', metrics_part)
            return float(match.group(1)) if match else -1.0

        data.append([
            model, target, context, epoch, horizon, bis,
            get_metric('RMSE'), get_metric('MAE'), get_metric('MASE')
        ])
        
    return pd.DataFrame(data, columns=columns)

df_results_init = results_to_dataframe(results_from_init)
df_results = results_to_dataframe(results)
display(df_results.head())

Unnamed: 0,Model,Target,Context,Epoch,Horizon,Bis,RMSE,MAE,MASE
0,DCRNN,subway_in,,500,4,1,62.628,34.844,1.043
1,DCRNN,subway_in,,500,4,2,61.943,34.722,1.04
2,DCRNN,subway_in,,500,4,3,65.112,36.436,1.091
3,DCRNN,subway_in,,500,4,4,62.428,35.233,1.055
4,DCRNN,subway_in,,500,4,5,61.366,34.478,1.032


In [2]:
import pandas as pd
import numpy as np 
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.palettes import Category10
from bokeh.transform import factor_cmap
from bokeh.plotting import figure 
from bokeh.models import ColumnDataSource, Toggle, CustomJS,HoverTool, Legend,Model
from bokeh.layouts import layout,row,column
from bokeh.resources import CDN
from bokeh.io import reset_output,show, output_file, save,output_notebook
from bokeh.transform import dodge

def plot_overlay_boxplot(df_init, df_results, metric_i='mse', xaxis_label="App", legend_group='fold', width=1200, height=400, 
                         save_path=None, palette=None, legend_groups=None, title=None, bool_show=True):
    
    if title is None:
        title = f"{metric_i} Comparison: Initial vs Current"

    # Préparation des données (Tri basé sur df_results)
    sdf = df_results.groupby("id")[metric_i].mean().sort_values()
    sorted_ids = sdf.index.tolist()

    p = figure(x_range=FactorRange(factors=sorted_ids), width=width, height=height, title=title)
    box_width = 0.4 # Largeur augmentée car ils sont superposés

    if palette is None:
        unique_groups = sorted(df_results[legend_group].astype(str).unique())
        palette = Category10[max(3, len(unique_groups))]
        legend_groups = unique_groups

    renderers = []

    for i, group_name in enumerate(legend_groups):
        group_renderers = []
        
        # Sous-fonction pour générer les glyphes de boxplot
        def add_box_elements(df_sub, alpha_box, alpha_lines, suffix, is_opaque):
            df_sub[f"{legend_group}_str"] = df_sub[legend_group].astype(str)
            subset = df_sub[df_sub[f"{legend_group}_str"] == group_name]
            
            if subset.empty: return []

            grp = subset.groupby("id")[metric_i]
            stats = pd.DataFrame({
                "min_v": grp.min(), "q1": grp.quantile(0.25),
                "median_v": grp.quantile(0.50), "q3": grp.quantile(0.75),
                "max_v": grp.max(), "mean_v": grp.mean()
            }).reset_index()
            
            b_src = ColumnDataSource(stats)
            
            # Box
            b = p.vbar(x="id", top="q3", bottom="q1", width=box_width, source=b_src,
                       fill_color=palette[i], fill_alpha=alpha_box, line_color="black", line_alpha=alpha_lines)
            # Whiskers
            s1 = p.segment(x0="id", y0="max_v", x1="id", y1="q3", source=b_src, line_color="black", line_alpha=alpha_lines)
            s2 = p.segment(x0="id", y0="min_v", x1="id", y1="q1", source=b_src, line_color="black", line_alpha=alpha_lines)
            # Median & Mean
            m1 = p.segment(x0=dodge("id", -box_width/2, range=p.x_range), y0="median_v",
                           x1=dodge("id", box_width/2, range=p.x_range), y1="median_v",
                           source=b_src, line_width=2, line_color="black", line_alpha=alpha_lines)
            m2 = p.segment(x0=dodge("id", -box_width/2, range=p.x_range), y0="mean_v",
                           x1=dodge("id", box_width/2, range=p.x_range), y1="mean_v",
                           source=b_src, line_width=2, line_color="black", line_dash="dashed", line_alpha=alpha_lines)
            
            res = [b, s1, s2, m1, m2]
            
            # Points uniquement pour le dataset opaque (df_results)
            if is_opaque:
                c_src = ColumnDataSource(subset)
                c = p.circle(x="id", y=metric_i, source=c_src, size=7, line_color="black",
                             fill_color=palette[i], legend_label=group_name)
                res.append(c)
            return res

        # 1. Boxplot transparent (Initial)
        group_renderers.extend(add_box_elements(df_init, 0.1, 0.2, "init", False))
        # 2. Boxplot opaque (Results) + Points
        group_renderers.extend(add_box_elements(df_results, 0.6, 1.0, "res", True))
        
        renderers.append(group_renderers)

    # Mise en forme
    p.xaxis.axis_label = xaxis_label
    p.yaxis.axis_label = metric_i
    p.xaxis.major_label_orientation = np.pi/3
    p.legend.click_policy = "hide"
    p.add_layout(p.legend[0], 'right')

    # Le callback JS doit pointer sur le cercle (dernier élément ajouté dans group_renderers)
    callback = CustomJS(args=dict(renderers=renderers, x_range=p.x_range, original_factors=p.x_range.factors), code="""
        setTimeout(function() {
            const active_factors = new Set();
            for (const grp of renderers) {
                const circle_renderer = grp[grp.length - 1]; 
                if (circle_renderer.visible) {
                    if (circle_renderer.data_source.data.id) {
                        circle_renderer.data_source.data.id.forEach(id => active_factors.add(id));
                    }
                }
            }
            x_range.factors = original_factors.filter(f => active_factors.has(f));
        }, 0);
    """)

    for items in p.legend[0].items:
        items.renderers[0].js_on_change('visible', callback)

    if bool_show:
        output_notebook()
        show(p)
    return p


def convert_context(context):
    if context == 'calendar_embedding':
        return 'Calendar'
    elif context == 'calendar':
        return 'Calendar'
    else:
        return context

# Préparation préalable des identifiants pour les deux DataFrames
for df in [df_results_init, df_results]:
    df['id'] = df.apply(
        lambda row: f"{row['Model']} - {convert_context(row['Context'])}" 
        if row['Context'] != 'None' else row['Model'], 
        axis=1
    )

palette = ['#4e79a7','#f28e2b','#e15759', '#59a14f', '#c7e9c0','#1a4314','#edc949','#af7aa1','#ff9da7','#9c755f','#bab0ac']
horizons = df_results.Horizon.unique()

for horizon in horizons:
    # Filtrage par horizon pour les deux sets de données
    df_h_init = df_results_init[df_results_init['Horizon'] == horizon].copy()
    df_h_res = df_results[df_results['Horizon'] == horizon].copy()
    
    # Définition du groupe de légende
    df_h_init['legend_group'] = df_h_init['Model']
    df_h_res['legend_group'] = df_h_res['Model']

    for metric_i in ['RMSE', 'MAE', 'MASE']:
        title = f'{metric_i} Comparison: Initial (transparent) vs Results (opaque) - Horizon {horizon}'
        
        # Appel de la fonction de superposition
        plot_overlay_boxplot(
            df_init=df_h_init, 
            df_results=df_h_res, 
            metric_i=metric_i, 
            xaxis_label="Config", 
            legend_group='legend_group', 
            width=800, 
            height=600, 
            save_path=None,
            palette=palette,
            legend_groups=df_h_res['Model'].unique().tolist(),
            title=title
        )













In [2]:
def convert_context(context):
    if context == 'calendar_embedding':
        return 'Calendar'
    elif context == 'calendar':
        return 'Calendar'
    else:
        return context



for df_results_i in [df_results_init, df_results]:
    palette = ['#4e79a7','#f28e2b','#e15759', '#59a14f', '#c7e9c0','#1a4314','#edc949','#af7aa1','#ff9da7','#9c755f','#bab0ac']
    targets = df_results_i['Target'].unique()
    df_results_i['id'] = df_results_i.apply(lambda row: f"{row['Model']} - {convert_context(row['Context'])}" if row['Context'] != 'None' else row['Model'],axis=1)

    horizons = df_results_i.Horizon.unique()
    for horizon in horizons:
        df_horizon = df_results_i[df_results_i['Horizon'] == horizon].copy()
        df_horizon['legend_group'] = df_horizon['Model']

        for metric_i in ['RMSE','MAE','MASE']:
            title = f'{metric_i} Distribution per Config and Fusion Strategy of Subway-In Prediction at Horizon [{15*(int(horizon)-1)} - {15*(int(horizon))}]min'
            plot_boxplot_on_metric(df_horizon, metric_i=metric_i, xaxis_label="Config", legend_group='legend_group', width=800, height=600, 
                                    save_path=None,palette = palette ,
                                    legend_groups =df_horizon['Model'].unique().tolist(),
                                    title = title)
            

        # ---- Display performance in a Latex Table: ---
        df_agg = df_horizon[['id','Epoch','Horizon','RMSE', 'MAE',
        'MASE']].groupby(['id','Horizon']).agg(['mean','std']).copy()
                                                
        caption = '<fill caption>'
        label = '<fill label>'

        index_parser = lambda x : parse_index_exp1_2(x,contextual= 'calendar')
        print(dataframe_to_latex(df_agg,
                        caption = caption,
                        label =  label,
                        index_parser = index_parser,
                        horizon = df_agg.index[0][1]
                        ) )    
        # -----------------------------------------------










\begin{table}[!htb]
    \centering
    \caption{<fill caption>}
    \label{tab:<fill label>}
    \resizebox{\textwidth}{!}{
    \begin{tabular}{{llcccccccc}}
        \toprule
        Contextual Data & Integration Strategy & \multicolumn{2}{c}{ EPOCH (h4) } & \multicolumn{2}{c}{ RMSE (h4) } & \multicolumn{2}{c}{ MAE (h4) } & \multicolumn{2}{c}{ MASE (h4) } \\
        \cmidrule(lr){1-2} \cmidrule(lr){3-4} \cmidrule(lr){5-6} \cmidrule(lr){7-8} \cmidrule(lr){9-10}
         &  & Mean & Std & Mean & Std & Mean & Std & Mean & Std \\
        \midrule
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 56.023 & 0.2725 & 31.565999999999995 & 0.2607 & 0.9452 & 0.0079 \\
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 50.946 & 0.3745 & 28.587400000000002 & 0.1675 & 0.8558 & 0.0051 \\
        \midrule
        No & Baseline (No Context) & 100.0 & 0.0000 & 47.7864 & 0.6632 & 26.358600000000003 & 0.1977 & 0.789 & 0.0062 \\
        \midrule
        No & Baselin







\begin{table}[!htb]
    \centering
    \caption{<fill caption>}
    \label{tab:<fill label>}
    \resizebox{\textwidth}{!}{
    \begin{tabular}{{llcccccccc}}
        \toprule
        Contextual Data & Integration Strategy & \multicolumn{2}{c}{ EPOCH (h1) } & \multicolumn{2}{c}{ RMSE (h1) } & \multicolumn{2}{c}{ MAE (h1) } & \multicolumn{2}{c}{ MASE (h1) } \\
        \cmidrule(lr){1-2} \cmidrule(lr){3-4} \cmidrule(lr){5-6} \cmidrule(lr){7-8} \cmidrule(lr){9-10}
         &  & Mean & Std & Mean & Std & Mean & Std & Mean & Std \\
        \midrule
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 41.5216 & 0.2833 & 24.188200000000002 & 0.1031 & 0.7243999999999999 & 0.0032 \\
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 40.536 & 0.1851 & 23.4574 & 0.0964 & 0.7023999999999999 & 0.0029 \\
        \midrule
        No & Baseline (No Context) & 100.0 & 0.0000 & 39.2324 & 0.3304 & 23.162799999999997 & 0.2682 & 0.6936 & 0.0081 \\
        \midrule
     







\begin{table}[!htb]
    \centering
    \caption{<fill caption>}
    \label{tab:<fill label>}
    \resizebox{\textwidth}{!}{
    \begin{tabular}{{llcccccccc}}
        \toprule
        Contextual Data & Integration Strategy & \multicolumn{2}{c}{ EPOCH (h4) } & \multicolumn{2}{c}{ RMSE (h4) } & \multicolumn{2}{c}{ MAE (h4) } & \multicolumn{2}{c}{ MASE (h4) } \\
        \cmidrule(lr){1-2} \cmidrule(lr){3-4} \cmidrule(lr){5-6} \cmidrule(lr){7-8} \cmidrule(lr){9-10}
         &  & Mean & Std & Mean & Std & Mean & Std & Mean & Std \\
        \midrule
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 62.69539999999999 & 1.4361 & 35.1426 & 0.7728 & 1.0522 & 0.0232 \\
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 49.19 & 0.5105 & 27.833800000000004 & 0.2062 & 0.8333999999999999 & 0.0065 \\
        \midrule
        No & Baseline (No Context) & 100.0 & 0.0000 & 47.1546 & 0.8310 & 24.929399999999998 & 0.3444 & 0.7464000000000001 & 0.0105 \\
        \midr







\begin{table}[!htb]
    \centering
    \caption{<fill caption>}
    \label{tab:<fill label>}
    \resizebox{\textwidth}{!}{
    \begin{tabular}{{llcccccccc}}
        \toprule
        Contextual Data & Integration Strategy & \multicolumn{2}{c}{ EPOCH (h1) } & \multicolumn{2}{c}{ RMSE (h1) } & \multicolumn{2}{c}{ MAE (h1) } & \multicolumn{2}{c}{ MASE (h1) } \\
        \cmidrule(lr){1-2} \cmidrule(lr){3-4} \cmidrule(lr){5-6} \cmidrule(lr){7-8} \cmidrule(lr){9-10}
         &  & Mean & Std & Mean & Std & Mean & Std & Mean & Std \\
        \midrule
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 44.1104 & 0.1719 & 25.8322 & 0.1641 & 0.7736000000000001 & 0.0048 \\
        \midrule
        No & Baseline (No Context) & 500.0 & 0.0000 & 39.48780000000001 & 0.1535 & 23.115199999999998 & 0.0669 & 0.6921999999999999 & 0.0022 \\
        \midrule
        No & Baseline (No Context) & 100.0 & 0.0000 & 38.1164 & 0.4130 & 21.6618 & 0.1558 & 0.6484 & 0.0048 \\
        \midrule
     