In [1]:
# GET PARAMETERS
from examples.benchmark import local_get_args,get_inputs,train_on_ds
from utils.save_results import get_trial_id
import sys
import os
import pandas as pd

from constants.config import modification_contextual_args
from plotting.TS_analysis import drag_selection_box,plot_single_point_prediction,plot_prediction_error,plot_loss_from_trainer,plot_TS
from build_inputs.preprocess_subway_15 import get_trigram_correspondance
from bokeh.plotting import show,output_notebook
from bokeh.layouts import column,row
from utils.specific_event import rugby_matches
import geopandas as gpd
# Get Parent folder : 
current_path = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_path, '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)


#### Specify the spatial unit we want to evaluate 
# Parameter of the visualisation : 
range = 3*60  # +/- range (min) supposed to be affected around the event 
width = 1000
height = 300
min_flow = 20

# Set the spatial unit we would like to evaluate :
station = 'BON'  # 'BON'  #'GER'

# Set the df containing at list the 
df_correspondance = get_trigram_correspondance()
Metro_B_stations = ['Charpennes','Brotteaux','Part-Dieu' ,'Place Guichard', 'Saxe - Gambetta',
       'Jean Macé','Place Jean Jaurès','Debourg','Stade de Gerland',"Gare d'Oullins"]
Metro_B_TRG = list(df_correspondance.set_index('Station').loc[Metro_B_stations].values.reshape(-1))


def netmob_volume_on_POI(gdf_POI_2_tile_ids,app = 'Instagram',transfer_mode = 'DL',type_POI = 'stadium', spatial_unit = 'Lou_rugby',POI_or_station='POI',expanded='', folder_path= '../../../../data/rrochas/prediction_validation'):

    gdf_obj = gdf_POI_2_tile_ids[(gdf_POI_2_tile_ids['tag'] == type_POI) &
                    (gdf_POI_2_tile_ids['name'] == spatial_unit ) & 
                    (gdf_POI_2_tile_ids['type'] == f"{POI_or_station}{expanded}")
    ]
    assert len(gdf_obj) == 1, f"Length of gdf = {len(gdf_obj)} while it should be = 1"

    osmid = gdf_obj['id'].values[0]
    path_df = f"{folder_path}/POIs/netmob_POI_Lyon{expanded}/{type_POI}/{app}/df_{osmid}_{transfer_mode}.csv"
    serie = pd.read_csv(path_df,index_col = 0).sum(axis=1)
    serie.index = pd.to_datetime(serie.index)
    return(serie)

def evaluate_config(model_name,dataset_names,dataset_for_coverage,vision_model_name,transfer_modes= None,
                    type_POIs = ['stadium','nightclub'],
                    spatial_units = ['Lou_rugby','Ninkasi_Kao'],
                    apps = ['Instagram'],
                    POI_or_stations = ['POI'],
                    expanded ='',
                    modification = {}
                    ):
    ds,args,trial_id,save_folder,df_loss = get_ds(model_name,dataset_names,dataset_for_coverage,modification=modification)
    trainer,df_loss = train_on_ds(ds,args,trial_id,save_folder,df_loss)
    # Allow us to have 'dataloader['train'] with no shuffle !!!!
    # ======
    modification.update({'shuffle':False })
    ds_no_shuffle,args_no_shuffle,trial_id,save_folder,df_loss =  get_ds(model_name,dataset_names,
                                                                            dataset_for_coverage, 
                                                                            modification = modification)

    trainer.dataloader = ds_no_shuffle.dataloader
    # ======

    # Load gdf for POIs:
    folder_path= '../../../../data/rrochas/prediction_validation'
    gdf_POI_2_tile_ids = gpd.read_file(f"{folder_path}/POIs/gdf_POI_2_tile_ids.geojson")

    analysis_on_specific_training_mode(trainer,ds_no_shuffle,gdf_POI_2_tile_ids,training_mode='test',transfer_modes= transfer_modes,type_POIs = type_POIs,spatial_units=spatial_units,apps=apps,POI_or_stations = POI_or_stations,expanded=expanded)
    analysis_on_specific_training_mode(trainer,ds_no_shuffle,gdf_POI_2_tile_ids,training_mode='valid',transfer_modes= transfer_modes,type_POIs = type_POIs,spatial_units=spatial_units,apps=apps,POI_or_stations = POI_or_stations,expanded=expanded)
    analysis_on_specific_training_mode(trainer,ds_no_shuffle,gdf_POI_2_tile_ids,training_mode='train',transfer_modes= transfer_modes,type_POIs = type_POIs,spatial_units=spatial_units,apps=apps,POI_or_stations = POI_or_stations,expanded=expanded)
    return(trainer,ds,ds_no_shuffle,args)

def analysis_on_specific_training_mode(trainer,ds,gdf_POI_2_tile_ids,training_mode,transfer_modes= None,
                                       type_POIs = ['stadium','nightclub'],
                                       spatial_units = ['Lou_rugby','Ninkasi_Kao'],
                                       apps = ['Instagram'],
                                       POI_or_stations = ['POI'],
                                       expanded = ''
                                       ):
    Preds,Y_true,T_labels = trainer.testing(ds.normalizer, training_mode =training_mode)
    df_true,df_prediction = get_df_for_visualisation(ds,Preds,Y_true,training_mode)
    kick_off_time,match_times = rugby_matches(df_true.index,range)

    if apps is not None : 
        netmob_consumption = pd.DataFrame(index = df_true.index)
        for app in apps:
            for type_POI,spatial_unit,POI_or_station in zip(type_POIs,spatial_units,POI_or_stations):
                for transfer_mode in transfer_modes:
                    serie_netmob = netmob_volume_on_POI(gdf_POI_2_tile_ids,app,transfer_mode,type_POI,spatial_unit,POI_or_station,expanded)
                    serie_netmob = serie_netmob.loc[df_true.index]

                    # norm_series :
                    serie_netmob = (serie_netmob-serie_netmob.min())/(serie_netmob.max()-serie_netmob.min())
                    
                    name_netmob_serie = f"{app}_{transfer_mode} at {spatial_unit}"

                    netmob_consumption[name_netmob_serie] = serie_netmob
        netmob_consumption['Sum_of_apps'] = netmob_consumption.sum(axis=1)/len(netmob_consumption.columns)

    visualisation_special_event(trainer,df_true,df_prediction,station,kick_off_time,range,width,height,min_flow,training_mode = training_mode,netmob_consumption = netmob_consumption)

# Get df_True Volume: 
def get_df_for_visualisation(ds,Preds,Y_true,training_mode):
       '''
       outputs:
       --------
       return 2 pd DataFrame : df_true and df_prediction
       >>>> the DataFrames contains the unormalized predicted and real value  
       '''
       df_verif = getattr(ds.tensor_limits_keeper,f"df_verif_{training_mode}")
       df_true = pd.DataFrame(Y_true[:,:,0],columns = ds.spatial_unit,index = df_verif.iloc[:,-1])
       df_prediction = pd.DataFrame(Preds[:,:,0],columns = ds.spatial_unit,index = df_verif.iloc[:,-1])
       return(df_true,df_prediction)


def visualisation_special_event(trainer,df_true,df_prediction,station,kick_off_time,range,width,height,min_flow,training_mode,netmob_consumption):
    ''' Specific interactiv visualisation for Prediction, True Value, Error and loss function '''
    p1 = plot_single_point_prediction(df_true,df_prediction,station,title= f'{training_mode} Trafic Volume Prediction around at "Stade du Lou Gerland" subway station ',kick_off_time=kick_off_time, range=range,width=width,height = height,bool_show = False)
    p2 = plot_TS(netmob_consumption,width=width,height=height,bool_show=False) if netmob_consumption is not None else None
    p3 = plot_prediction_error(df_true,df_prediction,station,metrics =['mae','mse','mape'],title = 'Prediction Error',width=width,height=height,bool_show=False,min_flow = min_flow)

    select = drag_selection_box(df_true,p1,p2,p3,width=width,height=height//3)
    output_notebook()
    col1 = column(p1,p2,p3,select)

    col2 = plot_loss_from_trainer(trainer,width=width//3,height=height,bool_show=False)
    grid = row(col1,col2)

    show(grid)

def get_ds(model_name,dataset_names,dataset_for_coverage,
           modification = {},
            args_init = None
            ):
    save_folder = None
    df_loss,df_results = pd.DataFrame(),pd.DataFrame()


    # Tricky but here we net to set 'netmob' so that we will use the same period for every combination
    args,folds,hp_tuning_on_first_fold = local_get_args(model_name,
                                                        args_init,
                                                            dataset_names=dataset_names,
                                                            dataset_for_coverage=dataset_for_coverage,
                                                            modification = modification)
    K_fold_splitter,K_subway_ds,args_with_contextual = get_inputs(args,folds)
    args_with_contextual = modification_contextual_args(args_with_contextual,modification)
    trial_id = get_trial_id(args_with_contextual)
    ds = K_subway_ds[0]
    return(ds,args_with_contextual,trial_id,save_folder,df_loss)

Training and Hyper-parameter tuning with Ray is not possible


## Evaluate on non recurrent event: 
### Visualisation: 
#### Cas `subway_in` stade du Lou Gerland avec un match de Rugby : 

## HP tuning: 

In [5]:
from examples.Total_evaluation_of_model import HP_and_valid_one_config,hyperparameter_tuning
from examples.benchmark import local_get_args
import pickle

model_name = 'STGCN' #'CNN'
dataset_for_coverage = ['subway_in','netmob_POIs'] 
dataset_names = ['calendar']
vision_model_name = None

args,_,_ = local_get_args(model_name,
                        args_init = None,
                        dataset_names=dataset_names,
                        dataset_for_coverage=dataset_for_coverage,
                        modification = {'ray':True,
                                        'grace_period':2,
                                        'HP_max_epochs':10,
                                        'evaluate_complete_ds' : True,
                                        'set_spatial_units' : ['BON','SOI','GER','CHA'],
                                        'vision_model_name': None
                                        })

# Init 
epochs_validation = 30
num_samples = 10

# HP and evaluate K-fold best config
#HP_and_valid_one_config(args,epochs_validation,num_samples)
analysis,trial_id = hyperparameter_tuning(args,num_samples)
print('trial_id: ',trial_id)



>>>>Model: STGCN; K_fold = 6; Loss function: MSE 
Considered Spatial-Unit:  ['BON', 'SOI', 'GER', 'CHA']

Init Dataset:  torch.Size([7392, 4])
Number of Nan Value:  tensor(0)
Total Number of Elements:  29568 



U/Utarget size: torch.Size([6238, 4, 7])/torch.Size([6238, 4, 1]) Train/Valid/Test 3742 1248 1247

 ===== ERROR ==== 
Try with torch >= 2.0.0 (works with 2.0.1) to allow 'prefetch_factor' 
ValueError: prefetch_factor option could only be specified in multiprocessing.let num_workers > 0 to enable multiprocessing

 ===== ERROR ==== 
Try with torch >= 2.0.0 (works with 2.0.1) to allow 'prefetch_factor' 
ValueError: prefetch_factor option could only be specified in multiprocessing.let num_workers > 0 to enable multiprocessing

 ===== ERROR ==== 
Try with torch >= 2.0.0 (works with 2.0.1) to allow 'prefetch_factor' 
ValueError: prefetch_factor option could only be specified in multiprocessing.let num_workers > 0 to enable multiprocessing
----------------------------------------
Fold n°0
Considered Spatial-Unit:  ['BON', 'SOI', 'GER', 'CHA']

Init Dataset:  torch.Size([2174, 4])
Number of Nan Value:  tensor(0)
Total Number of Elements:  8696 

U/Utarget size: torch.Size([2078, 4, 7])/torch.S

2024-12-17 15:51:16,991	INFO worker.py:1612 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
2024-12-17 15:51:17,315	INFO packaging.py:518 -- Creating a file package for local directory '/home/rrochas/prediction-validation'.
2024-12-17 15:51:17,672	INFO packaging.py:346 -- Pushing file package 'gcs://_ray_pkg_0b2e490d65dd4ae4.zip' (148.66MiB) to Ray cluster...
2024-12-17 15:51:18,224	INFO packaging.py:359 -- Successfully pushed file package 'gcs://_ray_pkg_0b2e490d65dd4ae4.zip'.
2024-12-17 15:51:33,415	INFO tune.py:657 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-12-17 15:52:39
Running for:,00:01:06.13
Memory:,43.1/754.3 GiB

Trial name,status,loc,TE_concatenation_ord er,TE_embedding_dim,TE_fc1,TE_fc1/activation_fc 1,TE_fc1/fc1,TE_fc1/fc2,TE_out_h_dim,dropout,lr,output_h_dim,scheduler,scheduler/torch_sche duler_gamma,...duler/torch_sched uler_lr_start_factor,scheduler/torch_sche duler_milestone,spatial_h_dim,temporal_h_dim,weight_decay,iter,total time (s)
lambda_685b9_00000,TERMINATED,137.121.170.69:226024,{'concatenation_f980,16,"{'fc1': 8, 'fc2_41c0",True,8,16.0,16,0.502416,0.00145,32,{'scheduler': T_ed00,0.998338,0.463143,27.0,8,16,0.0952457,10,13.6364
lambda_685b9_00001,TERMINATED,137.121.170.69:226043,{'concatenation_e9c0,4,"{'fc1': 32, 'fc_2180",,32,,32,0.677933,0.00065,32,{'scheduler': None},,,,16,16,0.0859346,2,5.03526
lambda_685b9_00002,TERMINATED,137.121.170.69:226579,{'concatenation_2c80,32,"{'fc1': 64, 'fc_8b00",,64,,8,0.111353,0.0298,64,{'scheduler': None},,,,64,64,0.0330402,2,4.58914
lambda_685b9_00003,TERMINATED,137.121.170.69:226629,{'concatenation_00c0,32,"{'fc1': 8, 'fc2_17c0",,8,,16,0.518838,0.03885,256,{'scheduler': T_bd40,0.996652,0.144211,18.0,256,32,0.0686053,2,4.65292
lambda_685b9_00004,TERMINATED,137.121.170.69:226652,{'concatenation_ebc0,32,"{'fc1': 32, 'fc_3880",,32,,8,0.0332631,0.01305,128,{'scheduler': T_b4c0,0.994442,0.834919,5.0,16,128,0.00888624,4,2.84558
lambda_685b9_00005,TERMINATED,137.121.170.69:226726,{'concatenation_c780,3,"{'fc1': 4, 'fc2_b740",,4,,16,0.871392,0.00045,128,{'scheduler': T_2280,0.988342,0.121469,28.0,64,16,0.0933141,2,4.75086
lambda_685b9_00006,TERMINATED,137.121.170.69:226727,{'concatenation_ec80,16,"{'fc1': 64, 'fc_1b80",,64,,16,0.865926,0.04485,8,{'scheduler': T_cf00,0.98541,0.289997,26.0,64,128,0.0588472,2,2.5091
lambda_685b9_00007,TERMINATED,137.121.170.69:226819,{'concatenation_2940,3,"{'fc1': 16, 'fc_e680",True,16,4.0,8,0.548165,0.00085,16,{'scheduler': T_eac0,0.998771,0.41237,17.0,128,16,0.0076435,2,4.51125
lambda_685b9_00008,TERMINATED,137.121.170.69:226842,{'concatenation_0340,8,"{'fc1': 8, 'fc2_9200",,8,,4,0.723946,0.007,16,{'scheduler': None},,,,64,16,0.0159168,8,10.4371
lambda_685b9_00009,TERMINATED,137.121.170.69:226914,{'concatenation_dd00,32,"{'fc1': 16, 'fc_4700",True,16,16.0,8,0.524122,0.0004,64,{'scheduler': T_2240,0.997224,0.190869,11.0,16,64,0.00384602,10,10.0315


[2m[36m(pid=226024)[0m 'pynvml' is not available on this environment.
[2m[36m(<lambda> pid=226024)[0m 
[2m[36m(<lambda> pid=226024)[0m PREDICTION WILL BE BASED SOLELY ON CONTEXTUAL DATA !
[2m[36m(<lambda> pid=226024)[0m 
[2m[36m(<lambda> pid=226024)[0m Model size: 0.003GB
[2m[36m(<lambda> pid=226024)[0m number of total parameters: 748486
[2m[36m(<lambda> pid=226024)[0m number of trainable parameters: 748486
[2m[36m(<lambda> pid=226024)[0m start training


Trial name,_metric
lambda_685b9_00000,{'Loss_model': 0.033904821064631836}
lambda_685b9_00001,{'Loss_model': 0.03486671445887672}
lambda_685b9_00002,{'Loss_model': 0.03838700875783472}
lambda_685b9_00003,{'Loss_model': 0.03621544134812089}
lambda_685b9_00004,{'Loss_model': 0.0380147113796747}
lambda_685b9_00005,{'Loss_model': 0.048882973296047405}
lambda_685b9_00006,{'Loss_model': 0.035656646364198026}
lambda_685b9_00007,{'Loss_model': 0.039317958329684016}
lambda_685b9_00008,{'Loss_model': 0.033620090390733215}
lambda_685b9_00009,{'Loss_model': 0.03326783181923605}


2024-12-17 15:51:47,029	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'TE_fc1/fc1': <ray.tune.search.sample.Categorical object at 0x7f96d0551ed0>}


[2m[36m(pid=226579)[0m 'pynvml' is not available on this environment.[32m [repeated 2x across cluster][0m
[2m[36m(<lambda> pid=226043)[0m [32m [repeated 4x across cluster][0m
[2m[36m(<lambda> pid=226043)[0m PREDICTION WILL BE BASED SOLELY ON CONTEXTUAL DATA !
[2m[36m(<lambda> pid=226043)[0m Model size: 0.001GB
[2m[36m(<lambda> pid=226043)[0m number of total parameters: 352758
[2m[36m(<lambda> pid=226043)[0m number of trainable parameters: 352758
[2m[36m(<lambda> pid=226043)[0m start training
[2m[36m(<lambda> pid=226579)[0m PREDICTION WILL BE BASED SOLELY ON CONTEXTUAL DATA !
[2m[36m(<lambda> pid=226579)[0m Model size: 0.005GB
[2m[36m(<lambda> pid=226579)[0m number of total parameters: 1274806
[2m[36m(<lambda> pid=226579)[0m number of trainable parameters: 1274806
[2m[36m(<lambda> pid=226579)[0m start training


2024-12-17 15:51:55,595	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'scheduler/torch_scheduler_milestone': <ray.tune.search.sample.Integer object at 0x7f96d06dd190>, 'scheduler/torch_scheduler_gamma': <ray.tune.search.sample.Float object at 0x7f96d06de990>, 'scheduler/torch_scheduler_lr_start_factor': <ray.tune.search.sample.Float object at 0x7f96d06df850>, 'TE_fc1/fc1': <ray.tune.search.sample.Categorical object at 0x7f96d05b5d90>, 'TE_fc1/fc2': <ray.tune.search.sample.Categorical object at 0x7f96d05b4310>, 'TE_fc1/activation_fc1': <ray.tune.search.sample.Categorical object at 0x7f96d0552310>}
2024-12-17 15:51:57,540	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'TE_fc1/fc1': <ray.tune.search.sample.Categorical object at 0x7f96d0012850>}


[2m[36m(pid=226629)[0m 'pynvml' is not available on this environment.
[2m[36m(<lambda> pid=226579)[0m [32m [repeated 3x across cluster][0m
[2m[36m(<lambda> pid=226629)[0m PREDICTION WILL BE BASED SOLELY ON CONTEXTUAL DATA !
[2m[36m(<lambda> pid=226629)[0m Model size: 0.005GB
[2m[36m(<lambda> pid=226629)[0m number of total parameters: 1274806
[2m[36m(<lambda> pid=226629)[0m number of trainable parameters: 1274806
[2m[36m(<lambda> pid=226629)[0m start training
[2m[36m(pid=226652)[0m 'pynvml' is not available on this environment.


2024-12-17 15:52:05,935	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'scheduler/torch_scheduler_milestone': <ray.tune.search.sample.Integer object at 0x7f96d06308d0>, 'scheduler/torch_scheduler_gamma': <ray.tune.search.sample.Float object at 0x7f96d0011b90>, 'scheduler/torch_scheduler_lr_start_factor': <ray.tune.search.sample.Float object at 0x7f96d0013a90>, 'TE_fc1/fc1': <ray.tune.search.sample.Categorical object at 0x7f96d05f0dd0>}
2024-12-17 15:52:06,164	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'scheduler/torch_scheduler_milestone': <ray.tune.search.sample.Integer object at 0x7f96d0011890>, 'scheduler/torch_scheduler_gamma': <ray.tune.search.sample.Float object at 0x7f96d0013f50>, 'scheduler/torch_scheduler_lr_start_factor': <ray.tune.search.sample.Float object at 0x7f96d0012810>, 'TE_fc1/fc1': <ray.tune.search.sample.Categorical object at 0x7f96d05cc990>}


[2m[36m(<lambda> pid=226652)[0m [32m [repeated 6x across cluster][0m
[2m[36m(<lambda> pid=226652)[0m PREDICTION WILL BE BASED SOLELY ON CONTEXTUAL DATA !
[2m[36m(<lambda> pid=226652)[0m Model size: 0.001GB
[2m[36m(<lambda> pid=226652)[0m number of total parameters: 192054
[2m[36m(<lambda> pid=226652)[0m number of trainable parameters: 192054
[2m[36m(<lambda> pid=226652)[0m start training
[2m[36m(pid=226726)[0m 'pynvml' is not available on this environment.
[2m[36m(pid=226727)[0m 'pynvml' is not available on this environment.
[2m[36m(<lambda> pid=226726)[0m PREDICTION WILL BE BASED SOLELY ON CONTEXTUAL DATA !
[2m[36m(<lambda> pid=226726)[0m Model size: 0.001GB
[2m[36m(<lambda> pid=226726)[0m number of total parameters: 319974
[2m[36m(<lambda> pid=226726)[0m number of trainable parameters: 319974
[2m[36m(<lambda> pid=226726)[0m start training


2024-12-17 15:52:14,168	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'scheduler/torch_scheduler_milestone': <ray.tune.search.sample.Integer object at 0x7f96d05cfd50>, 'scheduler/torch_scheduler_gamma': <ray.tune.search.sample.Float object at 0x7f96d05ce790>, 'scheduler/torch_scheduler_lr_start_factor': <ray.tune.search.sample.Float object at 0x7f96d05ce0d0>, 'TE_fc1/fc1': <ray.tune.search.sample.Categorical object at 0x7f96d05cfe50>}
2024-12-17 15:52:16,319	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'scheduler/torch_scheduler_milestone': <ray.tune.search.sample.Integer object at 0x7f96d06e25d0>, 'scheduler/torch_scheduler_gamma': <ray.tune.search.sample.Float object at 0x7f96d00dc9d0>, 'scheduler/torch_scheduler_lr_start_factor': <ray.tune.search.sample.Float object at 0x7f96da8eb090>, 'TE_fc1/fc1': <ray.tune.search.sample.Categorical object at 0x7f96d05cd150>}


[2m[36m(<lambda> pid=226727)[0m [32m [repeated 6x across cluster][0m
[2m[36m(pid=226819)[0m 'pynvml' is not available on this environment.
[2m[36m(<lambda> pid=226727)[0m PREDICTION WILL BE BASED SOLELY ON CONTEXTUAL DATA !
[2m[36m(<lambda> pid=226727)[0m Model size: 0.001GB
[2m[36m(<lambda> pid=226727)[0m number of total parameters: 189750
[2m[36m(<lambda> pid=226727)[0m number of trainable parameters: 189750
[2m[36m(<lambda> pid=226727)[0m start training
[2m[36m(<lambda> pid=226819)[0m PREDICTION WILL BE BASED SOLELY ON CONTEXTUAL DATA !
[2m[36m(<lambda> pid=226819)[0m Model size: 0.001GB
[2m[36m(<lambda> pid=226819)[0m number of total parameters: 320465
[2m[36m(<lambda> pid=226819)[0m number of trainable parameters: 320465
[2m[36m(<lambda> pid=226819)[0m start training
[2m[36m(pid=226842)[0m 'pynvml' is not available on this environment.


2024-12-17 15:52:24,094	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'scheduler/torch_scheduler_milestone': <ray.tune.search.sample.Integer object at 0x7f96d06e3310>, 'scheduler/torch_scheduler_gamma': <ray.tune.search.sample.Float object at 0x7f96dab2ffd0>, 'scheduler/torch_scheduler_lr_start_factor': <ray.tune.search.sample.Float object at 0x7f96d05cef10>, 'TE_fc1/fc1': <ray.tune.search.sample.Categorical object at 0x7f96d05cc850>, 'TE_fc1/fc2': <ray.tune.search.sample.Categorical object at 0x7f96d05cea50>, 'TE_fc1/activation_fc1': <ray.tune.search.sample.Categorical object at 0x7f96d05cf1d0>}


[2m[36m(<lambda> pid=226842)[0m [32m [repeated 6x across cluster][0m
[2m[36m(<lambda> pid=226842)[0m PREDICTION WILL BE BASED SOLELY ON CONTEXTUAL DATA !
[2m[36m(<lambda> pid=226842)[0m Model size: 0.002GB
[2m[36m(<lambda> pid=226842)[0m number of total parameters: 483894
[2m[36m(<lambda> pid=226842)[0m number of trainable parameters: 483894
[2m[36m(<lambda> pid=226842)[0m start training
[2m[36m(pid=226914)[0m 'pynvml' is not available on this environment.
[2m[36m(<lambda> pid=226914)[0m PREDICTION WILL BE BASED SOLELY ON CONTEXTUAL DATA !
[2m[36m(<lambda> pid=226914)[0m Model size: 0.005GB
[2m[36m(<lambda> pid=226914)[0m number of total parameters: 1270614
[2m[36m(<lambda> pid=226914)[0m number of trainable parameters: 1270614
[2m[36m(<lambda> pid=226914)[0m start training


2024-12-17 15:52:32,276	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'TE_fc1/fc1': <ray.tune.search.sample.Categorical object at 0x7f96d042ad10>}
2024-12-17 15:52:39,552	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'scheduler/torch_scheduler_milestone': <ray.tune.search.sample.Integer object at 0x7f96d05ce390>, 'scheduler/torch_scheduler_gamma': <ray.tune.search.sample.Float object at 0x7f96d05cf510>, 'scheduler/torch_scheduler_lr_start_factor': <ray.tune.search.sample.Float object at 0x7f96d05cc8d0>, 'TE_fc1/fc1': <ray.tune.search.sample.Categorical object at 0x7f96d05cce50>, 'TE_fc1/fc2': <ray.tune.search.sample.Categorical object at 0x7f96d05f0cd0>, 'TE_fc1/activation_fc1': <ray.tune.search.sample.Categorical object at 0x7f96d042a150>}
2024-12-17 15:52:39,573	INFO tune.py:1148 -- Total run time: 66.16 seconds (66.13 seconds for the tuning loop).


trial_id

>>>> Load best CONFIG


In [6]:
trial_id

'calendar_STGCN_MSELoss_2024_12_17_15_52_99364'

In [4]:
from examples.train_model_on_k_fold_validation import load_configuration

trial_id = 'calendar_STGCN_MSELoss_2024_12_17_15_52_99364'
best_args,folds = load_configuration(trial_id,load_config=True,epochs=None)

dict_keys_embedding = {'TE_concatenation_order/concatenation_late':'TE_concatenation_late',
            'TE_concatenation_order/concatenation_early':'TE_concatenation_early',
             'TE_fc1/fc1':'TE_fc1', 
             'TE_fc1/fc2':'TE_fc2', 
             'TE_fc1/activation_fc1':'TE_activation_fc1',
             }

for key in dict_keys_embedding.keys():
    value = vars(best_args)[key]
    new_key = dict_keys_embedding[key]
    setattr(best_args,new_key,value)


modification = vars(best_args)


dataset_names = ["calendar"] # ["subway_in","calendar"] # ["subway_in"] # ['data_bidon'] # ['METR_LA'] # ['PEMS_BAY']
dataset_for_coverage = ['subway_in','netmob_image_per_station'] #  ['data_bidon','netmob'] #  ['subway_in','netmob']  # ['METR_LA'] # ['PEMS_BAY']
model_name = 'STGCN'
vision_model_name =  None

station = 'GER'  # 'BON'  #'GER'
apps = ['Google_Maps']# ['Instagram','Twitter','Google_Maps'] # 'Instagram'  # 'Twitter' # 'Google_Maps' # 'Facebook'


transfer_modes = ['DL'] # ['DL'] # ['UL'] # ['DL','UL']
type_POIs = ['stadium'] #['stadium','station'] #['stadium','nightclub'] #['stadium']
spatial_units = ['Matmut Stadium Gerland','GER']  #spatial_units = ['Lou_rugby']  # ['Astroballe'] #['Lou_rugby','Ninkasi_Kao'] #['Lou_rugby'] #['Ninkasi_Kao'] 
expanded = '_expanded' # '' # '_expanded' # ''
POI_or_stations = ['POI']# ['POI','station'] # 'station'

(trainer,ds,ds_no_shuffle,args) = evaluate_config(model_name,dataset_names,dataset_for_coverage,vision_model_name,transfer_modes= transfer_modes,
                                                   type_POIs = type_POIs,spatial_units = spatial_units,apps = apps,POI_or_stations = POI_or_stations,
                                                   expanded=expanded,modification=modification)


>>>> Load best CONFIG
>>>>Model: STGCN; K_fold = 6; Loss function: MSE 
Considered Spatial-Unit:  ['BON', 'SOI', 'GER', 'CHA']

Init Dataset:  torch.Size([7392, 4])
Number of Nan Value:  tensor(0)
Total Number of Elements:  29568 

U/Utarget size: torch.Size([6238, 4, 7])/torch.Size([6238, 4, 1]) Train/Valid/Test 3742 1248 1247

 ===== ERROR ==== 
Try with torch >= 2.0.0 (works with 2.0.1) to allow 'prefetch_factor' 
ValueError: prefetch_factor option could only be specified in multiprocessing.let num_workers > 0 to enable multiprocessing

 ===== ERROR ==== 
Try with torch >= 2.0.0 (works with 2.0.1) to allow 'prefetch_factor' 
ValueError: prefetch_factor option could only be specified in multiprocessing.let num_workers > 0 to enable multiprocessing

 ===== ERROR ==== 
Try with torch >= 2.0.0 (works with 2.0.1) to allow 'prefetch_factor' 
ValueError: prefetch_factor option could only be specified in multiprocessing.let num_workers > 0 to enable multiprocessing
------------------------




Training Throughput:1733.73 sequences per seconds
>>> Training complete in: 0:01:46.199921
>>> Training performance time: min 0.015354156494140625 avg 0.017724990844726562 seconds (+/- 0.001908711764526658)
>>> Loading performance time: min 0.0004906654357910156 avg 0.010633290717278521 seconds (+/- 0.01996059909954239)
>>> Forward performance time: 0.0050526173851105066 seconds (+/- 0.0003301841689708197)
>>> Backward performance time: 0.006343807339984608 seconds (+/- 0.0012335300118616849)
>>> Plotting performance time: 3.2656120531486743e-06 seconds (+/- 9.063609834007626e-06)
>>> Saving performance time: 0.29553026812417166 seconds (+/- 0.07690586599499047)
>>> PI-tracking performance time: 4.050707576250789e-06 seconds (+/- 2.4154080418792537e-06)
>>> Scheduler-update performance time: 3.696932937159683e-05 seconds (+/- 8.41775458925887e-05)
>>> Peak Power during training: 90.8 W)
>>> Validation time: 0:00:00.087934
Proportion of time consumed for Loading: 45.9%
Proportion of ti

ERROR 1: PROJ: proj_create_from_database: Open of /root/anaconda3/envs/pytorch-2.0.1/share/proj failed
  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


In [4]:
dataset_names = ["calendar"] # ["subway_in","calendar"] # ["subway_in"] # ['data_bidon'] # ['METR_LA'] # ['PEMS_BAY']
dataset_for_coverage = ['subway_in','netmob_image_per_station'] #  ['data_bidon','netmob'] #  ['subway_in','netmob']  # ['METR_LA'] # ['PEMS_BAY']
model_name = 'STGCN'
vision_model_name =  None

station = 'GER'  # 'BON'  #'GER'
apps = ['Google_Maps']# ['Instagram','Twitter','Google_Maps'] # 'Instagram'  # 'Twitter' # 'Google_Maps' # 'Facebook'


transfer_modes = ['DL'] # ['DL'] # ['UL'] # ['DL','UL']
type_POIs = ['stadium'] #['stadium','station'] #['stadium','nightclub'] #['stadium']
spatial_units = ['Matmut Stadium Gerland','GER']  #spatial_units = ['Lou_rugby']  # ['Astroballe'] #['Lou_rugby','Ninkasi_Kao'] #['Lou_rugby'] #['Ninkasi_Kao'] 
expanded = '_expanded' # '' # '_expanded' # ''
POI_or_stations = ['POI']# ['POI','station'] # 'station'
modification = {'epochs' : 20, #100
                'lr':1e-4,
                'temporal_h_dim' : 32,
                'spatial_h_dim' : 16,
                'output_h_dim' : 32,
                'TE_embedding_dim': 16,
                'set_spatial_units': ['CHA','GER','BON','SOI'],
                'TE_out_h_dim': 16,
                'TE_concatenation_late':False,
                'TE_concatenation_early':True,
                           }
(trainer,ds,ds_no_shuffle,args) = evaluate_config(model_name,dataset_names,dataset_for_coverage,vision_model_name,transfer_modes= transfer_modes,
                                                   type_POIs = type_POIs,spatial_units = spatial_units,apps = apps,POI_or_stations = POI_or_stations,
                                                   expanded=expanded,modification=modification)


# lr 5e-3
# 32 / 32 / 32 - 3.5e-2   , 380 000
# 32 / 16 / 32 - 3.5e-2   , 380 000

>>>>Model: STGCN; K_fold = 6; Loss function: MSE 
Considered Spatial-Unit:  ['CHA', 'GER', 'BON', 'SOI']

Init Dataset:  torch.Size([7392, 4])
Number of Nan Value:  tensor(0)
Total Number of Elements:  29568 

U/Utarget size: torch.Size([6238, 4, 7])/torch.Size([6238, 4, 1]) Train/Valid/Test 3742 1248 1247
----------------------------------------
Fold n°0
Considered Spatial-Unit:  ['CHA', 'GER', 'BON', 'SOI']

Init Dataset:  torch.Size([2174, 4])
Number of Nan Value:  tensor(0)
Total Number of Elements:  8696 

U/Utarget size: torch.Size([2078, 4, 7])/torch.Size([2078, 4, 1]) Train/Valid 1558 519

PREDICTION WILL BE BASED SOLELY ON CONTEXTUAL DATA !

Model size: 0.000GB
number of total parameters: 53126
number of trainable parameters: 53126

start training
epoch: 0 
 min\epoch : 0.03
Estimated time for training: 0.4min 

Training Throughput:1779.84 sequences per seconds
>>> Training complete in: 0:00:21.589236
>>> Training performance time: min 0.01401209831237793 avg 0.017776012420654

  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


#### Cas `subway_out` stade du Lou Gerland avec un match de Rugby : 

#### Cas `subway_in` Laurent Bonnevay Astroballe avec un match de Basket : 

#### Cas `subway_out` Laurent Bonnevay Astroballe avec un match de Basket : 

## Maintenant on va intégrer les données NetMob et voir si elles permettent de réduire l'erreur de prédiction :

In [1]:
import os 
import pickle 
import pandas as pd 

trial_id = 'subway_in_netmob_POIs_STGCN_VariableSelectionNetwork_MSELoss_2024_12_07_02_55_5679'
#'subway_in_calendar_STGCN_MSELoss_2024_12_12_15_51_46099'
#'subway_in_calendar_STGCN_MSELoss_2024_12_12_14_16_71587'

path_csv = f'save/HyperparameterTuning/{trial_id}.csv'
path_pickle = 'save/HyperparameterTuning/model_args.pkl'
df_hp_tuning = pd.read_csv(path_csv)
model_args = pickle.load(open(path_pickle,'rb'))

metric = '_metric/Loss_model'
best_model = df_hp_tuning.sort_values(metric).iloc[0]
HP_args = [indx.replace('config/', '') for indx in best_model.index if 'config/' in indx]
args = model_args['model'][trial_id]['args']

In [3]:
print(df_hp_tuning.columns)

df_hp_tuning.sort_values(metric)[['_metric/Loss_model', 'config/lr', 'config/weight_decay',
       'config/dropout', 'config/vision_vision_grn_out_dim']].iloc[:30]

Index(['trial_id', 'time_this_iter_s', 'done', 'training_iteration', 'date',
       'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip',
       'time_since_restore', 'iterations_since_restore', 'experiment_tag',
       '_metric/Loss_model', 'config/lr', 'config/weight_decay',
       'config/dropout', 'config/vision_vision_grn_out_dim',
       'config/scheduler/scheduler',
       'config/scheduler/torch_scheduler_milestone',
       'config/scheduler/torch_scheduler_gamma',
       'config/scheduler/torch_scheduler_lr_start_factor'],
      dtype='object')

In [12]:
dataset_names = ["subway_in","netmob_image_per_station"] # ["subway_in","calendar"] # ["subway_in"] # ['data_bidon'] # ['METR_LA'] # ['PEMS_BAY']
dataset_for_coverage = ['subway_in','netmob_image_per_station'] #  ['data_bidon','netmob'] #  ['subway_in','netmob']  # ['METR_LA'] # ['PEMS_BAY']
model_name = 'STGCN'
vision_model_name =  'ImageAvgPooling'

(trainer,ds,ds_no_shuffle,args) = evaluate_config(model_name,dataset_names,dataset_for_coverage,vision_model_name)

40
>>>>Model: STGCN; K_fold = 6; Loss function: MSE 

Init Dataset:  torch.Size([7392, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  295680 



>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6238, 40, 7]) Utarget size:  torch.Size([6238, 40, 1])
U_train size:  torch.Size([3742, 40, 7]) Utarget_train size:  torch.Size([3742, 40, 1])
U_valid size:  torch.Size([1248, 40, 7]) Utarget_valid size:  torch.Size([1248, 40, 1])
U_test size:  torch.Size([1247, 40, 7]) Utarget_test size:  torch.Size([1247, 40, 1])
U_train min:  tensor(0.) U_train max:  tensor(10798.)
U_valid min:  tensor(0.) U_valid max:  tensor(1405.)
U_test min:  tensor(0.) U_test max:  tensor(1352.)
Transfer Modes: DL

Init NetMob Dataset:  torch.Size([7392, 40, 1, 6, 6])
Number of Nan Value:  tensor(0)
Total Number of Elements:  10644480 

>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6238, 40, 1, 6, 6, 7]) Utarget size:  torch.Size([6238, 40, 1, 6, 6, 1])
U_train size:  torch.Size([3742, 40, 1, 6, 6, 7]) Utarget_train size:  torch.Size([3742, 40, 1, 6, 6, 1])
U_valid size:  torch.Size([1248, 40, 1, 

  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


#### Prediction avec uniquement NetMob, sans subway-in: 

#### Prediction avec uniquement Calendar, sans subway-in: 