In [None]:
import sys
import os

current_path = notebook_dir = os.getcwd()
working_dir = os.path.abspath(os.path.join(current_path, '..','..'))
if working_dir not in sys.path:
    sys.path.insert(0, working_dir)

import pandas as pd
import torch
import pickle
from argparse import Namespace
import re 
L_Apps = ['Apple_Video','Google_Play_Store','Google_Maps','Web_Clothes','Uber', 'Twitter',
        'Microsoft_Mail', 'Microsoft_Store', 'Apple_Music', 'Microsoft_Office', 'Pokemon_GO', 'Clash_of_Clans', 'Yahoo_Mail', 'PlayStation',
        'Wikipedia', 'Apple_Web_Services', 'Pinterest', 'Web_Ads', 'Google_Mail', 'Google_Meet',
        'Apple_Siri', 'Web_Adult', 'Spotify', 'Deezer', 'Waze', 'Web_Games', 'Apple_App_Store', 'Microsoft_Skydrive', 'Google_Docs', 'Microsoft_Web_Services',
        'Molotov', 'YouTube', 'Apple_iTunes', 'Apple_iMessage', 'DailyMotion', 'Netflix', 'Web_Transportation',
        'Web_Downloads', 'SoundCloud', 'TeamViewer', 'Google_Web_Services', 'Facebook', 'EA_Games', 'Tor', 'Amazon_Web_Services',
        'Web_e-Commerce', 'Telegram', 'Apple_Mail','Dropbox', 'Web_Food', 'Apple_iCloud', 'Skype', 'Facebook_Messenger', 'Twitch', 'Microsoft_Azure',
        'Instagram', 'Facebook_Live', 'Web_Streaming', 'Orange_TV', 'Periscope', 'Snapchat' ,'Web_Finance' ,'WhatsApp', 'Web_Weather','Google_Drive','LinkedIn','Yahoo','Fortnite']


def get_df_results(trial_id,model_args,L_Apps,split_key = 'eps100_'):
    df = pd.DataFrame(columns = ['mse','mae','mape','fold','id','trial_num'])
    for app in L_Apps:
        best_model_names = [name for name in model_args['model'].keys() if (f"{trial_id}_{app}_f" in name) or (f"{trial_id}_{app}_1_f" in name) or (f"{trial_id}_{app}_2_f" in name)]

        if len(best_model_names)>5:
            print(best_model_names)
            best_model_names = best_model_names[:5]

        for k,selected_model_name in enumerate(best_model_names):
            model_metrics = model_args['model'][selected_model_name]['performance']['test_metrics']
            app_num = best_model_names[0].split(split_key)[-1].split('_f')[0]
            if (not '_1' in app_num ) and (not '_2' in app_num):
                app_num = app_num+ '_1'
            name_id = '_'.join(app_num.split('_')[:-1])
            trial_num = app_num.split('_')[-1]

            df.loc[len(df)] = [model_metrics['mse'],model_metrics['mae'],model_metrics['mape'],k,name_id,trial_num]
    return df 



In [None]:
folder_name = 're_validation_epsilon100'
save_path = f'save/K_fold_validation/training_with_HP_tuning/{folder_name}/best_models'
model_args = pickle.load(open(f'{current_path}/{save_path}/model_args.pkl','rb'))
trial_id = 'subway_in_subway_out_STGCN_MSELoss_2025_02_19_00_05_19271NETMOB_eps100'

df1 = get_df_results(trial_id,model_args,L_Apps)

folder_name = 're_validation'
save_path = f'save/K_fold_validation/training_with_HP_tuning/{folder_name}/best_models'
model_args = pickle.load(open(f'{current_path}/{save_path}/model_args.pkl','rb'))
trial_id = 'subway_in_subway_out_STGCN_MSELoss_2025_02_19_00_05_19271NETMOB_eps100'

df2 = get_df_results(trial_id,model_args,L_Apps)
df = pd.concat([df1,df2])

In [None]:
df.head()

In [None]:
import pandas as pd
import numpy as np 
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.palettes import Category10
from bokeh.transform import factor_cmap
from bokeh.plotting import figure, show, output_file, save,output_notebook
from bokeh.models import ColumnDataSource, Toggle, CustomJS,HoverTool, Legend
from bokeh.layouts import layout,row,column


def plot_boxplot_on_metric(df,metric_i='mse',save_path=f"MSE_distribution_per_app_and_per_fold.html"):
    sdf = df.groupby("id")[metric_i].mean().sort_values()
    sdf_ids = sdf.index.tolist()

    df["fold_str"] = df["fold"].astype(str)

    grp = df.groupby("id")[metric_i]
    q1 = grp.quantile(0.25)
    q2 = grp.quantile(0.50)
    q3 = grp.quantile(0.75)
    mn = grp.min()
    mx = grp.max()
    stats = pd.DataFrame({
        "id": q1.index,
        "min_v": mn.values,
        "q1": q1.values,
        "median_v": q2.values,
        "q3": q3.values,
        "max_v": mx.values
    })
    source_box = ColumnDataSource(stats)
    source_points = ColumnDataSource(df)

    sdf = df.groupby("id")[metric_i].mean().sort_values()
    sdf_ids = sdf.index.tolist()

    p = figure(
        x_range=sdf_ids, #sorted(df["id"].unique()),
        width=1200, height=400,
        title=f"{metric_i} distribution per app and per folds"
    )
    box_width = 0.2

    p.segment("id","max_v","id","q3", source=source_box, line_width=1,line_color = 'black')
    p.segment("id","min_v","id","q1", source=source_box, line_width=1,line_color = 'black')
    p.vbar("id", box_width, "median_v", "q3", source=source_box, line_width=2,fill_color = 'grey',fill_alpha = 0.3,line_color = 'black')
    p.vbar("id", box_width, "q1", "median_v", source=source_box, line_width=2,fill_color = 'grey',fill_alpha = 0.3,line_color = 'black')
    #p.rect("id","median_v", box_width, 0, source=source_box)

    palette = Category10[len(df["fold_str"].unique())]
    p.circle(
        x="id", y=metric_i,
        source=source_points,
        size=7,
        line_color="black",
        fill_color=factor_cmap("fold_str", palette=palette, factors=df["fold_str"].unique()),
        legend_group="fold_str"
    )

    p.xaxis.axis_label = "App"
    p.yaxis.axis_label = metric_i
    p.xaxis.major_label_orientation = np.pi/2
    p.legend.title = "Fold"
    output_notebook()
    show(p)

    if save_path is not None:
        output_file(save_path)
        save(p)

#save_path=f"MSE_distribution_per_app_and_per_fold.html"
save_path = f"MASE_distribution_per_app_and_per_fold.html"
metric_i = 'mase'
plot_boxplot_on_metric(csv_with_mase,metric_i,save_path)

save_path = f"MAE_distribution_per_app_and_per_fold.html"
metric_i = 'mae'
plot_boxplot_on_metric(csv_with_mase,metric_i,save_path)

save_path = f"MSE_distribution_per_app_and_per_fold.html"
metric_i = 'mse'
plot_boxplot_on_metric(csv_with_mase,metric_i,save_path)

In [None]:
import numpy as np 
import pickle 

import sys
import os

current_path = notebook_dir = os.getcwd()
working_dir = os.path.abspath(os.path.join(current_path, '..','..'))
if working_dir not in sys.path:
    sys.path.insert(0, working_dir)
    
from high_level_DL_method import load_model,load_optimizer_and_scheduler
from trainer import Trainer
from examples.train_and_visu_non_recurrent import get_multi_ds
from utils.metrics import evaluate_metrics

# Init 

if False:
    folder_name = 're_validation_epsilon100'

if True:
    folder_name = 're_validation'
    
save_path = f'save/K_fold_validation/training_with_HP_tuning/{folder_name}/best_models'
model_args = pickle.load(open(f'{current_path}/{save_path}/model_args.pkl','rb'))
trial_id = 'subway_in_subway_out_STGCN_MSELoss_2025_02_19_00_05_19271NETMOB_eps100'


def load_trained_model(selected_model_path,ds,model_fold_i):
    model_param = torch.load(selected_model_path)
    args = model_args['model'][model_fold_i]['args']
    args = Namespace(**args)
    model = load_model(ds, args)
    model.load_state_dict(model_param['state_dict'])
    optimizer,scheduler,loss_function = load_optimizer_and_scheduler(model,args)
    trainer = Trainer(ds,model,args,optimizer,loss_function,scheduler = scheduler)
    return trainer 

def get_metrics_from_test(trainer,ds,metric_list = ['mse','mae','mape','mase']):
    Preds,Y_true,T_labels = trainer.testing(ds.normalizer)
    dic_pred_metrics = evaluate_metrics(Preds,Y_true,metric_list)
    return dic_pred_metrics

pandas_total_results = pd.DataFrame()
for app_ind,app in enumerate(L_Apps):
    print('avancement: ',(app_ind+1)/len(L_Apps))
    best_model_names = [name for name in model_args['model'].keys() if (f"{trial_id}_{app}_f" in name) or (f"{trial_id}_{app}_1_f" in name) or (f"{trial_id}_{app}_2_f" in name)]
    if len(best_model_names)>5:
        best_model_names=best_model_names[:5]

    ## Load datasets: 
    # Load Args 
    # Use information from first fold to get the inputs (which app, which train/valid/test prop...):
    args_0 = model_args['model'][best_model_names[0]]['args']
    args_0 = Namespace(**args_0)
    args_with_contextual,K_subway_ds = get_multi_ds(args_0.model_name, args_0.dataset_names,args_0.dataset_for_coverage,args_init = args_0,fold_to_evaluate = np.arange(args_0.K_fold))
    # ...
    ## ====

    # With the already load ds, load now the trained model. And ds is ready for the evaluation:
    for k_fold,model_fold_i in enumerate(best_model_names):
        # Load trained param: 
        ds = K_subway_ds[k_fold]
        selected_model_path = f"{current_path}/{save_path}/{model_fold_i}.pkl"
        
        # Model :
        trainer = load_trained_model(selected_model_path,ds,model_fold_i)

        # get metrics : 
        dic_pred_metrics = get_metrics_from_test(trainer,ds,metric_list = ['mse','mae','mape','mase'])

        pd_local_results = pd.DataFrame(dict(mse = [dic_pred_metrics['mse']],mase = [dic_pred_metrics['mase']],mae = [dic_pred_metrics['mae']], mape = [dic_pred_metrics['mape']],
                                            fold = [k_fold], id = [app], trial_num = [2] if 'eps' in folder_name else [1]))
        pandas_total_results = pd.concat([pandas_total_results,pd_local_results])
       


In [None]:
csv_with_mase = pd.concat([pandas_total_results,pandas_total_results_2])
csv_with_mase

In [None]:
#csv_with_mase.to_csv('training_netmob_metric_tabs_concat.csv')
#pandas_total_results.to_csv('training_netmob_metric_tabs_1.csv')
#pandas_total_results.to_csv('training_netmob_metric_tabs_2.csv')

## Acceder Ã  la config originelle d'un hp tuning : 

In [33]:
import pickle 
import sys
import os
import pandas as pd 

current_path = notebook_dir = os.getcwd()
working_dir = os.path.abspath(os.path.join(current_path, '..','..'))
if working_dir not in sys.path:
    sys.path.insert(0, working_dir)

save_path = f'save/K_fold_validation/training_with_HP_tuning/best_models'
model_args = pickle.load(open(f'{working_dir}/{save_path}/model_args.pkl','rb'))

trial_ids = ['subway_in_subway_out_STGCN_MSELoss_2025_02_19_00_05_19271',
             'subway_in_subway_out_STGCN_MSELoss_2025_03_29_00_17_68381'
]

for trial_id in trial_ids: 
    print('\nTrial id: ',trial_id)
    args = model_args['model'][f"{trial_id}_f0"]['args']
    print('epochs: ',args['epochs'],' lr: ',args['lr'],' wd: ',args['weight_decay'],' dropout: ',args['dropout'])
    if ('scheduler' in args.keys()) and (args['scheduler'] is not None):
        print('Scheduler: milestone: ',args['torch_scheduler_milestone'],
              ' torch_scheduler_gamma: ',args['torch_scheduler_gamma'],
              ' torch_scheduler_lr_start_factor: ',args['torch_scheduler_lr_start_factor'])
    else:
        print('Scheduler: ',None)




Trial id:  subway_in_subway_out_STGCN_MSELoss_2025_02_19_00_05_19271
epochs:  100  lr:  0.00105  wd:  0.0188896655584368  dropout:  0.271795372610271
Scheduler: milestone:  28.0  torch_scheduler_gamma:  0.9958348861339396  torch_scheduler_lr_start_factor:  0.8809942312067847

Trial id:  subway_in_subway_out_STGCN_MSELoss_2025_03_29_00_17_68381
epochs:  500  lr:  0.00058  wd:  0.0413499097011521  dropout:  0.2171486868627564
Scheduler:  None


## Acceder aux train/valid loss d'une K-fold validation :

In [2]:
import sys
import os
import pandas as pd 

current_path = notebook_dir = os.getcwd()
working_dir = os.path.abspath(os.path.join(current_path, '..','..'))
if working_dir not in sys.path:
    sys.path.insert(0, working_dir)
    
from plotting.TS_analysis import plot_TS


save_path = f'save/K_fold_validation/training_with_HP_tuning'
trial_id_500 = 'subway_in_subway_out_STGCN_MSELoss_2025_03_29_00_17_68381'
trial_id_100 = 'subway_in_subway_out_STGCN_MSELoss_2025_02_19_00_05_19271'


df_losses500 = pd.read_csv(f"{working_dir}/{save_path}/Losses_{trial_id_500}.csv",index_col = 0)
df_losses100 = pd.read_csv(f"{working_dir}/{save_path}/Losses_{trial_id_100}.csv",index_col = 0)


plot_TS(df_losses500,width=800,height=400,bool_show=True,title=f"Training",scatter = False,x_datetime = False)
plot_TS(df_losses100,width=800,height=400,bool_show=True,title=f"Training",scatter = False,x_datetime = False)

In [None]:
import pandas as pd
pd.DataFrame(dict(pred= Preds[:,0,0],true = Y_true[:,0,0])).plot()