In [4]:
# GET PARAMETERS
import os 
import sys
import pickle
import importlib 

# Get Parent folder : 
current_path = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_path, '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

# Personnal imports: 
from constants.paths import SAVE_DIRECTORY, FOLDER_PATH
from jupyter_ipynb.utils import parse_results_to_dataframe
from jupyter_ipynb.NetMob_training_analysis.plotting import plot_boxplot_on_metric 
# from save.fast_training_str_results.subway_in_subway_out_bike_in_bike_out_1_trial_1year import results_string as results_string_1year


target_data = 'subway_in'


# --- Load results string
module = importlib.import_module(f'save.fast_training_str_results.{target_data}_subway_out_bike_in_bike_out_1_trial_1year')
results_string = module.results_string
# from save.fast_training_str_results.subway_out_subway_in_bike_in_bike_out_1_trial_1year import results_string as results_string_1year
# from save.fast_training_str_results.subway_in_subway_out_bike_in_bike_out_1_trial_77days import results_string as results_string_77days

# --- Convert into df: 
results_df,methods = parse_results_to_dataframe(results_string,bis=True)
results_df['legend_group'] = results_df.apply(lambda row : f"{row['model_name']} h{row['horizon']}",axis=1)
plot_boxplot_on_metric(results_df,metric_i='MAE',xaxis_label = 'config', legend_group = 'legend_group', save_path=None)

  df = df.fillna(False)


## LaTeX Results: 

In [None]:
agg_df = results_df[['id','RMSE','MAE','MAPE','MASE']].groupby(['id']).agg(['mean', 'std'])
agg_df[[('RMSE','mean'),('MAE','mean'),('MAPE','mean'),('RMSE','std'),('MAE','std'),('MAPE','std')]] = agg_df[[('RMSE','mean'),('MAE','mean'),('MAPE','mean'),('RMSE','std'),('MAE','std'),('MAPE','std')]].round(2)
agg_df[[('MASE','mean'),('MASE','std')]] = agg_df[[('MASE','mean'),('MASE','std')]].round(4)
agg_df['horizon'] = agg_df.index.str.split('_h').str[1].astype(int)
agg_df['id_name'] = agg_df.index.str.split('_h').str[0]
agg_df =agg_df.groupby(['horizon','id_name']).mean()


import itertools

# LaTex Table:
for horizon in [15,30,45,60]:
    df_h_i = agg_df.loc[horizon,:]
    for metric in ['RMSE','MAE','MAPE','MASE']:
        ref = df_h_i.loc['',(metric,'mean')].copy()
        s_mean = df_h_i[(metric,'mean')].copy()
        df_h_i[(metric,'gain (%)')] = ((s_mean/ref -1 )*100).round(2)

    columns= [[(metric,'mean'),(metric,'gain (%)'),(metric,'std')] for metric in ['RMSE','MAE','MAPE','MASE']]
    columns = list(itertools.chain(*columns))
    df_h_i = df_h_i[columns]

df_h_i

## Loading the exact metrics without rounded performance

In [None]:
import itertools 
model_benchmarks =  ['STGCN','STAEformer']
freq = 15 # 15min 
horizons = [1,2,3,4]
contextual_dataset_names = ['subway_in','subway_out','bike_in','bike_out','weather']
contextual_dataset_names = list(itertools.chain.from_iterable(itertools.combinations(contextual_dataset_names, r) for r in range(len(contextual_dataset_names)+1)))
contextual_dataset_names = [list(context_ds) for context_ds in contextual_dataset_names]
n_bis = list(range(1,6))
for model_name in model_benchmarks:
    # Load saved args: 
    subfolder = f'K_fold_validation/training_wo_HP_tuning/optim/subway_in_{model_name}'
    path_model_args = f"{SAVE_DIRECTORY}/{subfolder}/best_models"
    globals()[f"{model_name}_model_args"] = pickle.load(open(f"{path_model_args}/model_args.pkl", 'rb'))


for trial_id in results_df['id'].unique():
    sub_df = results_df[results_df['id'] == trial_id]
    assert len(sub_df) == 5, f"Number of trial_id recorded for {trial_id}: {len(sub_df)}. Has to be = 5.\n{sub_df}"

    try: 
        for bis in sub_df['bis'].unique():
            sub_df_bis = sub_df[sub_df['bis'] == bis]

            calendar = 'calendar' if sub_df_bis['STAEformer'].values[0] else 'calendar_embedding'
            methods_pkl = '_'.join([m for m in methods if sub_df_bis[m].values[0]])
            horizon = sub_df_bis['horizon'].item()//freq
            if len(methods_pkl) == 0:
                if sub_df_bis['contextuals'].item() == '':
                    unique_id = f"{sub_df_bis['target_data'].item()}_{calendar}_h{horizon}_bis{bis}"
                else:
                    unique_id = f"{sub_df_bis['target_data'].item()}_{sub_df_bis['contextuals'].item()}_{calendar}_h{horizon}_bis{bis}"
            else:
                if sub_df_bis['contextuals'].item() == '':
                    unique_id = f"{sub_df_bis['target_data'].item()}_{calendar}_{methods_pkl}_h{horizon}_bis{bis}"
                else:
                    unique_id = f"{sub_df_bis['target_data'].item()}_{sub_df_bis['contextuals'].item()}_{calendar}_{methods_pkl}_h{horizon}_bis{bis}"

            idx = sub_df_bis.index[0]

            results_df.loc[idx, 'MASE'] = globals()[f"{model_name}_model_args"]['model'][unique_id]['performance']['test_metrics'][f'mase_h{horizon}']
            results_df.loc[idx, 'RMSE'] = globals()[f"{model_name}_model_args"]['model'][unique_id]['performance']['test_metrics'][f'rmse_h{horizon}']
            results_df.loc[idx, 'MSE'] = globals()[f"{model_name}_model_args"]['model'][unique_id]['performance']['test_metrics'][f'mse_h{horizon}']
            results_df.loc[idx, 'MAE'] = globals()[f"{model_name}_model_args"]['model'][unique_id]['performance']['test_metrics'][f'mae_h{horizon}']
    except:
        print(f"Unique_id {unique_id} not found in {model_name}_model_args")
        continue

plot_boxplot_on_metric(results_df,metric_i='MAE',xaxis_label = 'config', legend_group = 'horizon', save_path=None)


Unique_id subway_in_subway_out_bike_in_bike_out_calendar_embedding_h1_bis1 not found in STAEformer_model_args
Unique_id subway_in_subway_out_bike_out_calendar_embedding_h1_bis1 not found in STAEformer_model_args
Unique_id subway_in_subway_out_bike_in_calendar_embedding_h1_bis1 not found in STAEformer_model_args
Unique_id subway_in_subway_out_calendar_embedding_h1_bis1 not found in STAEformer_model_args
Unique_id subway_in_bike_in_calendar_embedding_h1_bis1 not found in STAEformer_model_args
Unique_id subway_in_bike_out_calendar_embedding_h1_bis1 not found in STAEformer_model_args
Unique_id subway_in_calendar_embedding_h1_bis1 not found in STAEformer_model_args
Unique_id subway_in_bike_in_bike_out_calendar_embedding_h1_bis1 not found in STAEformer_model_args
Unique_id subway_in_subway_out_calendar_embedding_stack_h1_bis1 not found in STAEformer_model_args
Unique_id subway_in_subway_out_calendar_embedding_FFConcatLate_h1_bis1 not found in STAEformer_model_args
Unique_id subway_in_subway_



In [None]:
import pandas as pd


'subway_in'

In [None]:

                for stack in [True,False]:
                    for ff_concat_late in [True,False]:
                        for attn_late in [True,False]:
                            for epochs in ['','e50','e500']: # ['',200]: # [100,200]:
                                for weather in [True,False]: # ['',200]: # [100,200]:
                                    if model_name == 'STGCN':
                                        calendar_name = 'calendar_embedding'
                                    elif model_name == 'STAEformer':
                                        calendar_name = 'calendar'
                                    else:
                                        raise NotImplementedError
                                    dataset_names =  [target_data] +contextual_dataset_names+ [calendar_name]
                                    

                                    sub_df = results_df[(results_df['contextuals'] == '_'.join(contextual_dataset_names)) & 
                                                    (results_df['horizon'] == horizon*freq) &
                                                    (results_df['bis'] == n_bis)&
                                                    (results_df['stack'] == stack)&
                                                    (results_df['ff_concat_late'] == ff_concat_late) & 
                                                    (results_df['STAEformer'] == (model_name=='STAEformer'))&
                                                    (results_df['attn_late'] == attn_late)&
                                                    (results_df['epochs'] == epochs)&
                                                    ]
                                    if len(sub_df) == 0:
                                        continue
                                    idx = sub_df.index
                                    assert len(idx) == 1, f"Number of configuration recorded for {'_'.join(contextual_dataset_names)}: {len(idx)}. Has to be = 1.\n{sub_df}"

                                    if stack:
                                        name_i = f"{'_'.join(dataset_names)}_stack_h{horizon}_bis{n_bis}"
                                    elif ff_concat_late:
                                        name_i = f"{'_'.join(dataset_names)}_ff_concat_late_h{horizon}_bis{n_bis}"
                                    else:
                                        name_i = f"{'_'.join(dataset_names)}_h{horizon}_bis{n_bis}"
                                    if model_name == 'STAEformer':
                                        name_i = f"{name_i}"
                                    elif model_name == 'STGCN':
                                        name_i = f"_{name_i}"
                                    else:
                                        raise NotImplementedError
                                    results_df.loc[idx[0], 'MASE'] = globals()[f"{model_name}_model_args"]['model'][f"{name_i}_f5"]['performance']['test_metrics'][f'mase_h{horizon}']
                                    results_df.loc[idx[0], 'RMSE'] = globals()[f"{model_name}_model_args"]['model'][f"{name_i}_f5"]['performance']['test_metrics'][f'rmse_h{horizon}']
                                    results_df.loc[idx[0], 'MSE'] = globals()[f"{model_name}_model_args"]['model'][f"{name_i}_f5"]['performance']['test_metrics'][f'mse_h{horizon}']
                                    results_df.loc[idx[0], 'MAE'] = globals()[f"{model_name}_model_args"]['model'][f"{name_i}_f5"]['performance']['test_metrics'][f'mae_h{horizon}']
        # ---


    # --- Plot Metrics : 

from jupyter_ipynb.NetMob_training_analysis.plotting import plot_boxplot_on_metric 


plot_boxplot_on_metric(results_df,metric_i='MAE',xaxis_label = 'config', legend_group = 'horizon', save_path=None)
# plot_boxplot_on_metric(results_df,metric_i='RMSE',xaxis_label = 'config', legend_group = 'horizon', save_path=None)
# plot_boxplot_on_metric(results_df,metric_i='MASE',xaxis_label = 'config', legend_group = 'horizon', save_path=None)