In [1]:
import sys
import os

working_dir = os.path.expanduser('~/prediction-validation')
if working_dir not in sys.path:
    sys.path.insert(0, working_dir)
import pandas as pd
import numpy as np
import torch
import pickle
from argparse import Namespace

from utils.utilities_DL import get_loss,load_model_and_optimizer
from build_inputs.load_datasets_to_predict import load_datasets_to_predict
from examples.load_best_config import get_trainer_and_ds_from_saved_trial,load_trainer_ds_from_saved_trial

def get_TS_prediction(trainer,ds,stations_to_plot=['CHA'],training_mode='test',name= ''):
    spatial_units  = list(ds.spatial_unit)
    if stations_to_plot is None:
        stations_to_plot = spatial_units
    station_indices = [spatial_units.index(station) for station in stations_to_plot]
    predict,Y_true,_ = trainer.testing(ds.normalizer, training_mode =training_mode)
    df_pred =  pd.DataFrame({f'{name}_{spatial_units[station_i]}_Pred':predict[:,station_i,0].detach().cpu().numpy() for station_i in station_indices})
    df_true = pd.DataFrame({f'{spatial_units[station_i]}_Y_true': Y_true[:,station_i,0].detach().cpu().numpy() for station_i in station_indices})
    index_df = getattr(ds.tensor_limits_keeper,f"df_verif_{training_mode}").iloc[:,-1].values
    return df_pred,df_true,index_df


if False:
    working_dir = os.path.expanduser('~/prediction-validation/save')
    subfolder = 'best_models'
    trial_id ='PeMS08_flow_PeMS08_speed_PeMS08_occupancy_calendar_STAEformer_HuberLossLoss_2025_05_22_16_06_17612'
    trial_id ='PeMS08_flow_PeMS08_speed_PeMS08_occupancy_calendar_STAEformer_HuberLossLoss_2025_05_22_16_06_17612'
    # 'subway_in_subway_out_ASTGCN_MSELoss_2025_04_21_20_06_76371'
    # 'subway_in_STGCN_MSELoss_2024_08_25_18_05_25229'
    # 'subway_in_STGCN_MSELoss_2024_08_25_18_05_25229'
    # 'subway_in_calendar_STGCN_MSELoss_2024_08_25_22_56_92429'
    # 'netmob_subway_in_STGCN_ImageAvgPooling_MSELoss_2024_08_24_01_42_17375'
    # 'netmob_subway_in_STGCN_FeatureExtractor_ResNetInspired_MSELoss_2024_08_23_06_53_46982'
    # 'netmob_subway_in_calendar_STGCN_ImageAvgPooling_MSELoss_2024_08_27_00_16_90667'
    # 'netmob_subway_in_calendar_STGCN_FeatureExtractor_ResNetInspired_MSELoss_2024_08_28_06_04_41108'
    # 'subway_in_STGCN_MSELoss_2024_08_21_14_50_2810'
    model_id = f"{trial_id}_F6_f0"     # One trial_id can be associated to several folds 

if True:
    working_dir = os.path.expanduser('~/prediction-validation/save/K_fold_validation/training_wo_HP_tuning/optim')
    # subfolder = 'STGCN_architecture'
    subfolder = 'best_models'
    trial_id = '_calendar'
    trial_id = '_calendar_Google_Maps_Deezer_IRIS_clustering015'
    trial_id = '_Init'

    # subfolder = 'CRITER_3_4_5_lanes_flow_STAEformer'
    model_id = f"{trial_id}_f5"     # One trial_id can be associated to several folds 


model_save_path = f"{working_dir}/{subfolder}/{model_id}.pkl"

print('Model path: ',model_save_path)

  from .autonotebook import tqdm as notebook_tqdm


Training and Hyper-parameter tuning with Ray is not possible
Model path:  /home/rrochas/prediction-validation/save/K_fold_validation/training_wo_HP_tuning/optim/best_models/_Init_f5.pkl


In [2]:
model_args = pickle.load(open(f"{working_dir}/{subfolder}/model_args.pkl",'rb'))
args = model_args['model'][model_id]['args']
args = Namespace(**args)


test_metrics = model_args['model'][model_id]['performance']['test_metrics']

print(f"\n--------- Test ---------\nAll Steps RMSE = {test_metrics['rmse_all']}, MAE = {test_metrics['mae_all']}, MAPE = {test_metrics['mape_all']}")
for h in np.arange(1,args.step_ahead+1):
    print(f"Step {h} RMSE = {test_metrics[f'rmse_h{h}']}, MAE = {test_metrics[f'mae_h{h}']}, MAPE = {test_metrics[f'mape_h{h}']}")


--------- Test ---------
All Steps RMSE = 52.836530685424805, MAE = 37.24277591705322, MAPE = 7.8500648736953735
Step 1 RMSE = 40.00498580932617, MAE = 28.864337921142578, MAPE = 6.60634708404541
Step 2 RMSE = 49.3797721862793, MAE = 35.085243225097656, MAPE = 7.536723613739014
Step 3 RMSE = 57.39794921875, MAE = 40.200626373291016, MAPE = 8.26611614227295
Step 4 RMSE = 64.56341552734375, MAE = 44.82089614868164, MAPE = 8.991072654724121


In [None]:

# --- Prétraitement ---
try:
    df[DATE_COL] = pd.to_datetime(df[DATE_COL])
    df_pivoted = df.pivot_table(index=DATE_COL, columns=LOCATION_COL, values=VALUE_COL, aggfunc='sum',fill_value = 0)

    # Reindex 
    df_pivoted.reindex(pd.date_range(start=START, end=END, freq=target_freq), fill_value=0, inplace=True)
    df_filtered = df_pivoted[df_pivoted.index.isin(coverage_period)].copy()

    local_df_dates = pd.DataFrame(df_filtered.index, columns=['date'])

    if df_filtered.empty:
        print(f"ERROR : No data available for {file_name}.csv in the specified coverage period.")
        return None

    # Convert into Tennsor
    data_T = torch.tensor(df_filtered.values).float()

except :
    print(f"ERROR during preprocessing of {file_name}.csv: DataFrame might be empty or invalid.")
    return None


dims = [0] # if [0] then Normalisation on temporal dim
processed_input = load_input_and_preprocess(dims = dims,normalize=normalize,invalid_dates=invalid_dates,args=args,data_T=data_T,coverage_period=coverage_period,name=name,
                                            minmaxnorm=minmaxnorm,standardize=standardize)

# --- Finalisation Métadonnées ---
processed_input.spatial_unit = df_filtered.columns.tolist()
processed_input.C = C
processed_input.periods = None # Pas de périodicité spécifique définie ici

print(f"Chargement et prétraitement de {file_base_name} terminés.")
return processed_input

# --- Point d'entrée pour exécution directe (optionnel, pour tests) ---
# ... (Similaire à subway_indiv.py, adapter les mocks) ...

In [3]:
stations_to_plot = [292,64,66,1000] #[893,67,999,65,291,1176]
modification = {'shuffle':False,
                'data_augmentation':False }
# trainer,ds,args = get_trainer_and_ds_from_saved_trial(args,model_save_path,modification=modification)
trainer, ds, args_init = load_trainer_ds_from_saved_trial(args,model_save_path,modification=modification)

----------------------------------------
Loading the Complete Dataset for K-fold splitting
Coverage Period: 18480 elts between 2019-03-16 00:00:00 and 2019-05-31 23:54:00
Invalid dates within this fold: 791

>>>Tackle Target dataset: CRITER_3_4_5_lanes_flow
Number of invalid time-slots (i.e data when every single sensors does not have data): 82
number of nan values before filling :  539557
nb sparse_columns :  96
Number of sensors after filter sparse sensor :  51

Total anomalies detected: 4561
 Data loaded with shape: (18480, 51)
   Init Dataset: 'torch.Size([18480, 51]). 0 Nan values
   TRAIN contextual_ds: torch.Size([10558, 51, 10])
   VALID contextual_ds: torch.Size([3520, 51, 10])
   TEST contextual_ds: torch.Size([3519, 51, 10])
Init U/Utarget size: torch.Size([17598, 51, 10])/torch.Size([17598, 51, 4]) Train/Valid/Test 10558 3520 3519

----------------------------------------
Loading the dataset for fold n°5
Model size: 0.003GB


In [8]:
from plotting.TS_analysis import plot_TS


stations_to_plot = [893,67,999,1176] #65,291,1176] # [292,64,66,1000] #[893,67,999,65,291,1176]
df_pred1,df_true,index_df = get_TS_prediction(trainer,ds,stations_to_plot=stations_to_plot,training_mode='test',name= 'CRITER')
df_to_plot= pd.concat([df_pred1,df_true],axis=1)
df_to_plot.index = index_df
plot_TS(df_to_plot,width=1200,height=400,bool_show=True,title='Comparison of Prediction Quality')

In [None]:
from calendar_class import get_time_slots_labels
from constants.paths import FILE_NAME
model_param = torch.load(model_save_path)

# Load Model:
coverage_period = match_period_coverage_with_netmob(FILE_NAME,dataset_names=['subway_in','netmob'])
dataset,_,_ = load_datasets_to_predict(args,coverage_period)
_,dic_class2rpz,_,_ = get_time_slots_labels(dataset,nb_class = [0,1,2,3])
loss_function = get_loss(args)
model,optimizer,scheduler = load_model_and_optimizer(args,dic_class2rpz)

# Associate its weights: 
model.load_state_dict(model_param['state_dict'])

Time-step per hour: 4.0
coverage period: 2019-03-16 00:00:00 - 2019-05-31 23:45:00

Init Subway-In Dataset:  torch.Size([7392, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  295680 


 Tackling Training Set

 Tackling Validation Set

 Tackling Training Set

 Tackling Validation Set

U size:  torch.Size([6719, 40, 7]) Utarget size:  torch.Size([6719, 40, 1])
U_train size:  torch.Size([5145, 40, 7]) Utarget_train size:  torch.Size([5145, 40, 1])
U_valid size:  torch.Size([1477, 40, 7]) Utarget_valid size:  torch.Size([1477, 40, 1])
U_train min:  tensor(0.) U_train max:  tensor(1.)
U_valid min:  tensor(0.) U_valid max:  tensor(2.3191)
model size: 0.001GB
number of total parameters: 249889
number of trainable parameters: 249889


<All keys matched successfully>

In [None]:
stations_to_plot = [292,64,66,1000] #[893,67,999,65,291,1176]
modification = {'shuffle':False,
                'data_augmentation':False }
save_folder = 'K_fold_validation/training_with_HP_tuning/re_validation'
trial_id = 'subway_in_STGCN_MSELoss_2025_01_20_14_27_20569'
add_name_id = 'RE_CRITER_3lanes_rich_interpolation'
trainer,ds,args = get_trainer_and_ds_from_saved_trial(trial_id,add_name_id,save_folder,modification)

Training and Hyper-parameter tuning with Ray is not possible
>>>>Model: STGCN; K_fold = 6; Loss function: MSE 
Invalid dates within this fold: 387

total remaining idptm : 67
after filtering df: (3685, 67)
after removing NaN: (3685, 36)


NameError: name 'blabla' is not defined

In [2]:
trial_id = 'subway_in_subway_out_STGCN_VariableSelectionNetwork_MSELoss_2025_01_20_05_38_87836'
add_name_id = 'RE_CRITER_3lanes_netmob_POIS_rich_interpolation_Waze_DL'
trainer2,ds2,args2 = get_trainer_and_ds_from_saved_trial(trial_id,add_name_id,save_folder,modification)

>>>>Model: STGCN; K_fold = 6; Loss function: MSE 
Invalid dates within this fold: 387



Init Dataset: 'torch.Size([3685, 20]) with 73700 Total nb of elements and 0 Nan values
nb CRITER_3lanes invalid dates:  387
vision_input_type POIs
vision_model_name VariableSelectionNetwork
Init U/Utarget size: torch.Size([2344, 20, 7])/torch.Size([2344, 20, 1]) Train/Valid/Test 1406 469 468

 ===== ERROR WITH prefetch_factor====  
ValueError: prefetch_factor option could only be specified in multiprocessing.let num_workers > 0 to enable multiprocessing
Considered Spatial-Unit:  None
Invalid dates within this fold: 241

Init Dataset: 'torch.Size([2294, 20]) with 45880 Total nb of elements and 0 Nan values
nb CRITER_3lanes invalid dates:  241
vision_input_type POIs
vision_model_name VariableSelectionNetwork
Init U/Utarget size: torch.Size([1467, 20, 7])/torch.Size([1467, 20, 1]) Train/Valid/Test 530 469 467

 ===== ERROR WITH prefetch_factor====  
ValueError: prefetch_factor option could only be specified in multiprocessing.let num_workers > 0 to enable multiprocessing
number of Parame

In [3]:
from plotting.TS_analysis import plot_TS


stations_to_plot = [893,67,999,1176] #65,291,1176] # [292,64,66,1000] #[893,67,999,65,291,1176]
df_pred1,df_true,index_df = get_TS_prediction(trainer,ds,stations_to_plot=stations_to_plot,training_mode='test',name= 'CRITER')
df_pred2,df_true,index_df = get_TS_prediction(trainer2,ds2,stations_to_plot=stations_to_plot,training_mode='test',name= 'CRITER_netmob_waze')
df_pred_all_models = pd.concat([df_pred1,df_pred2,df_true],axis=1)
df_pred_all_models.index = index_df
plot_TS(df_pred_all_models,width=1200,height=400,bool_show=True,title='Comparison of Prediction Quality')

ValueError: 893 is not in list