In [1]:
# GET PARAMETERS
from examples.benchmark import local_get_args,get_inputs,train_on_ds,keep_track_on_model_metrics,get_trial_id
import sys
import os
import pandas as pd

from plotting.TS_analysis import drag_selection_box,plot_single_point_prediction,plot_prediction_error,plot_loss_from_trainer
from build_inputs.preprocess_subway_15 import get_trigram_correspondance
from bokeh.plotting import show,output_notebook
from bokeh.layouts import column
from bokeh.layouts import row
from utils.specific_event import rugby_matches

# Get Parent folder : 
current_path = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_path, '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)


#### Specify the spatial unit we want to evaluate 
# Parameter of the visualisation : 
range = 3*60  # +/- range (min) supposed to be affected around the event 
width = 1000
height = 300
min_flow = 20

# Set the spatial unit we would like to evaluate :
station = 'GER'

# Set the df containing at list the 
df_correspondance = get_trigram_correspondance()
Metro_B_stations = ['Charpennes','Brotteaux','Part-Dieu' ,'Place Guichard', 'Saxe - Gambetta',
       'Jean Macé','Place Jean Jaurès','Debourg','Stade de Gerland',"Gare d'Oullins"]
Metro_B_TRG = list(df_correspondance.set_index('Station').loc[Metro_B_stations].values.reshape(-1))

def analysis_on_specific_training_mode(trainer,ds,training_mode):
    Preds,Y_true,T_labels = trainer.testing(ds.normalizer, training_mode =training_mode)
    df_true,df_prediction = get_df_for_visualisation(ds,Preds,Y_true,Metro_B_TRG)
    kick_off_time,match_times = rugby_matches(df_true.index,range)
    visualisation_special_event(trainer,df_true,df_prediction,station,kick_off_time,range,width,height,min_flow,training_mode)

# Get df_True Volume: 
def get_df_for_visualisation(ds,Preds,Y_true,Metro_B_TRG):
       '''
       outputs:
       --------
       return 2 pd DataFrame : df_true and df_prediction
       >>>> the DataFrames contains the unormalized predicted and real value  
       '''
       spatial_units = ds.spatial_unit[ds.spatial_unit.isin(Metro_B_TRG)]
       metro_b_indices = list(spatial_units.index)

       df_true = pd.DataFrame(Y_true[:,metro_b_indices,0],columns = spatial_units.values,index = ds.tensor_limits_keeper.df_verif_test.iloc[:,-1])

       # Get df Predicted Volume : 
       df_prediction = pd.DataFrame(Preds[:,metro_b_indices,0],columns = spatial_units.values,index = ds.tensor_limits_keeper.df_verif_test.iloc[:,-1])
       return(df_true,df_prediction)


def visualisation_special_event(trainer,df_true,df_prediction,station,kick_off_time,range,width,height,min_flow,training_mode):
    ''' Specific interactiv visualisation for Prediction, True Value, Error and loss function '''
    p1 = plot_single_point_prediction(df_true,df_prediction,station,title= f'{training_mode} Trafic Volume Prediction around at "Stade du Lou Gerland" subway station ',kick_off_time=kick_off_time, range=range,width=width,height = height,bool_show = False)
    p2 = plot_prediction_error(df_true,df_prediction,station,metrics =['mae','mse','mape'],title = 'Prediction Error',width=width,height=height,bool_show=False,min_flow = min_flow)
    select = drag_selection_box(df_true,p1,p2,width=width,height=height//3)
    output_notebook()
    col1 = column(p1,p2,select)

    col2 = plot_loss_from_trainer(trainer,width=width//3,height=height,bool_show=False)
    grid = row(col1,col2)

    show(grid)

def get_ds(model_name,dataset_names,dataset_for_coverage,vision_model_name = None):
    save_folder = None
    df_loss,df_results = pd.DataFrame(),pd.DataFrame()
    modification = {'epochs' : 30, #100,
                    }

    # Tricky but here we net to set 'netmob' so that we will use the same period for every combination
    args,folds,hp_tuning_on_first_fold = local_get_args(model_name,
                                                            dataset_names=dataset_names,
                                                            dataset_for_coverage=dataset_for_coverage,
                                                            modification = modification)
    trial_id = get_trial_id(args,vision_model_name=vision_model_name)
    K_fold_splitter,K_subway_ds,dic_class2rpz = get_inputs(args,vision_model_name,folds)
    ds = K_subway_ds[0]
    return(ds,args,trial_id,save_folder,dic_class2rpz,df_loss)

Training and Hyper-parameter tuning with Ray is not possible


In [2]:
dataset_names = ["subway_in"] # ["subway_in","calendar"] # ["subway_in"] # ['data_bidon'] # ['METR_LA'] # ['PEMS_BAY']
dataset_for_coverage = ['subway_in','netmob'] #  ['data_bidon','netmob'] #  ['subway_in','netmob']  # ['METR_LA'] # ['PEMS_BAY']
model_name = 'STGCN'

ds,args,trial_id,save_folder,dic_class2rpz,df_loss = get_ds(model_name,dataset_names,dataset_for_coverage)

40
>>>>Model: STGCN; K_fold = 6; Loss function: MSE 



Init Dataset:  torch.Size([7392, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  295680 

>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6238, 40, 7]) Utarget size:  torch.Size([6238, 40, 1])
U_train size:  torch.Size([3742, 40, 7]) Utarget_train size:  torch.Size([3742, 40, 1])
U_valid size:  torch.Size([1248, 40, 7]) Utarget_valid size:  torch.Size([1248, 40, 1])
U_test size:  torch.Size([1247, 40, 7]) Utarget_test size:  torch.Size([1247, 40, 1])
U_train min:  tensor(0.) U_train max:  tensor(10798.)
U_valid min:  tensor(0.) U_valid max:  tensor(1405.)
U_test min:  tensor(0.) U_test max:  tensor(1352.)

Fold n°0

Init Dataset:  torch.Size([2174, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  86960 

>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([2078, 40, 7]) Utarget size:  torch.Size([2078, 40, 1])
U_train size:  torch.Size([1558, 40, 7]) Utarget_train size:  torch.Size([1558, 40, 1

### Train :

In [3]:
trainer,df_loss = train_on_ds(model_name,ds,args,trial_id,save_folder,dic_class2rpz,df_loss)

Model size: 0.001GB
number of total parameters: 207233
number of trainable parameters: 207233

start training
epoch: 0 
 min\epoch : 0.10
Estimated time for training: 0.6min 

Training Throughput:462.05 sequences per seconds
>>> Training complete in: 0:00:38.807602
>>> Training performance time: min 0.05730485916137695 avg 0.0642087459564209 seconds (+/- 0.010236158620365265)
>>> Loading performance time: min 0.0001480579376220703 avg 0.03719739778804393 seconds (+/- 0.05805735518184448)
>>> Forward performance time: 0.04649755462303239 seconds (+/- 0.009929583337176027)
>>> Backward performance time: 0.012959547081851765 seconds (+/- 0.00213162896342509)
>>> Plotting performance time: 2.1139780680338543e-05 seconds (+/- 3.743230747693841e-05)
>>> Saving performance time: 0.33913910388946533 seconds (+/- 0.12700577001368352)
>>> PI-tracking performance time: 5.245208740234375e-06 seconds (+/- 3.557686692174179e-06)
>>> Scheduler-update performance time: 4.2120615641276044e-06 seconds (

### Test model :

In [4]:
Preds,Y_true,T_labels = trainer.testing(ds.normalizer, training_mode = 'test')

## Evaluate on non recurrent event: 
### Visualisation: 
#### Cas `subway_in` stade du Lou Gerland avec un match de Rugby : 

In [5]:
df_true,df_prediction = get_df_for_visualisation(ds,Preds,Y_true,Metro_B_TRG)
kick_off_time,match_times = rugby_matches(df_true.index,range)
visualisation_special_event(trainer,df_true,df_prediction,station,kick_off_time,range,width,height,min_flow)

  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


#### Cas `subway_out` stade du Lou Gerland avec un match de Rugby : 

#### Cas `subway_in` Laurent Bonnevay Astroballe avec un match de Basket : 

#### Cas `subway_out` Laurent Bonnevay Astroballe avec un match de Basket : 

## Maintenant on va intégrer les données NetMob et voir si elles permettent de réduire l'erreur de prédiction :

In [6]:
dataset_names = ["subway_in","netmob_image_per_station"] # ["subway_in","calendar"] # ["subway_in"] # ['data_bidon'] # ['METR_LA'] # ['PEMS_BAY']
dataset_for_coverage = ['subway_in','netmob_image_per_station'] #  ['data_bidon','netmob'] #  ['subway_in','netmob']  # ['METR_LA'] # ['PEMS_BAY']
model_name = 'STGCN'
vision_model_name =  'ImageAvgPooling'


ds,args,trial_id,save_folder,dic_class2rpz,df_loss = get_ds(model_name,dataset_names,dataset_for_coverage,vision_model_name=vision_model_name)
trainer,df_loss = train_on_ds(model_name,ds,args,trial_id,save_folder,dic_class2rpz,df_loss)
Preds,Y_true,T_labels = trainer.testing(ds.normalizer, training_mode = 'test')


df_true,df_prediction = get_df_for_visualisation(ds,Preds,Y_true,Metro_B_TRG)
kick_off_time,match_times = rugby_matches(df_true.index,range)
visualisation_special_event(trainer,df_true,df_prediction,station,kick_off_time,range,width,height,min_flow)

40
>>>>Model: STGCN; K_fold = 6; Loss function: MSE 

Init Dataset:  torch.Size([7392, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  295680 



>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6238, 40, 7]) Utarget size:  torch.Size([6238, 40, 1])
U_train size:  torch.Size([3742, 40, 7]) Utarget_train size:  torch.Size([3742, 40, 1])
U_valid size:  torch.Size([1248, 40, 7]) Utarget_valid size:  torch.Size([1248, 40, 1])
U_test size:  torch.Size([1247, 40, 7]) Utarget_test size:  torch.Size([1247, 40, 1])
U_train min:  tensor(0.) U_train max:  tensor(10798.)
U_valid min:  tensor(0.) U_valid max:  tensor(1405.)
U_test min:  tensor(0.) U_test max:  tensor(1352.)
Transfer Modes: DL

Init NetMob Dataset:  torch.Size([7392, 40, 1, 6, 6])
Number of Nan Value:  tensor(0)
Total Number of Elements:  10644480 

>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6238, 40, 1, 6, 6, 7]) Utarget size:  torch.Size([6238, 40, 1, 6, 6, 1])
U_train size:  torch.Size([3742, 40, 1, 6, 6, 7]) Utarget_train size:  torch.Size([3742, 40, 1, 6, 6, 1])
U_valid size:  torch.Size([1248, 40, 1, 

  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


#### Prediction avec uniquement NetMob, sans subway-in: 

In [2]:
dataset_names = ["netmob_image_per_station"] # ["subway_in","calendar"] # ["subway_in"] # ['data_bidon'] # ['METR_LA'] # ['PEMS_BAY']
dataset_for_coverage = ['netmob_image_per_station'] #  ['data_bidon','netmob'] #  ['subway_in','netmob']  # ['METR_LA'] # ['PEMS_BAY']
model_name = 'STGCN'
vision_model_name =  'ImageAvgPooling'


ds,args,trial_id,save_folder,dic_class2rpz,df_loss = get_ds(model_name,dataset_names,dataset_for_coverage,vision_model_name=vision_model_name)
trainer,df_loss = train_on_ds(model_name,ds,args,trial_id,save_folder,dic_class2rpz,df_loss)


analysis_on_specific_training_mode(ds,training_mode = 'test')

40
>>>>Model: STGCN; K_fold = 6; Loss function: MSE 

Init Dataset:  torch.Size([7392, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  295680 

>>>> Train/Valid/Test split method : similar_length_method



U size:  torch.Size([6815, 40, 7]) Utarget size:  torch.Size([6815, 40, 1])
U_train size:  torch.Size([4089, 40, 7]) Utarget_train size:  torch.Size([4089, 40, 1])
U_valid size:  torch.Size([1363, 40, 7]) Utarget_valid size:  torch.Size([1363, 40, 1])
U_test size:  torch.Size([1362, 40, 7]) Utarget_test size:  torch.Size([1362, 40, 1])
U_train min:  tensor(0.) U_train max:  tensor(10798.)
U_valid min:  tensor(0.) U_valid max:  tensor(1774.)
U_test min:  tensor(0.) U_test max:  tensor(1352.)
Transfer Modes: DL

Init NetMob Dataset:  torch.Size([7392, 40, 1, 6, 6])
Number of Nan Value:  tensor(0)
Total Number of Elements:  10644480 

>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6815, 40, 1, 6, 6, 7]) Utarget size:  torch.Size([6815, 40, 1, 6, 6, 1])
U_train size:  torch.Size([4089, 40, 1, 6, 6, 7]) Utarget_train size:  torch.Size([4089, 40, 1, 6, 6, 1])
U_valid size:  torch.Size([1363, 40, 1, 6, 6, 7]) Utarget_valid size:  torch.Size([1363, 40, 1, 6, 

  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


#### Prediction avec uniquement Calendar, sans subway-in: 

In [3]:
dataset_names = ["calendar"] # ["subway_in","calendar"] # ["subway_in"] # ['data_bidon'] # ['METR_LA'] # ['PEMS_BAY']
dataset_for_coverage = ['netmob_image_per_station'] #  ['data_bidon','netmob'] #  ['subway_in','netmob']  # ['METR_LA'] # ['PEMS_BAY']
model_name = 'STGCN'
vision_model_name =  None


ds,args,trial_id,save_folder,dic_class2rpz,df_loss = get_ds(model_name,dataset_names,dataset_for_coverage,vision_model_name=vision_model_name)
trainer,df_loss = train_on_ds(model_name,ds,args,trial_id,save_folder,dic_class2rpz,df_loss)
Preds,Y_true,T_labels = trainer.testing(ds.normalizer, training_mode = 'test')


df_true,df_prediction = get_df_for_visualisation(ds,Preds,Y_true,Metro_B_TRG)
kick_off_time,match_times = rugby_matches(df_true.index,range)
visualisation_special_event(trainer,df_true,df_prediction,station,kick_off_time,range,width,height,min_flow)

40
>>>>Model: STGCN; K_fold = 6; Loss function: MSE 

Init Dataset:  torch.Size([7392, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  295680 

>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6815, 40, 7]) Utarget size:  torch.Size([6815, 40, 1])
U_train size:  torch.Size([4089, 40, 7]) Utarget_train size:  torch.Size([4089, 40, 1])
U_valid size:  torch.Size([1363, 40, 7]) Utarget_valid size:  torch.Size([1363, 40, 1])
U_test size:  torch.Size([1362, 40, 7]) Utarget_test size:  torch.Size([1362, 40, 1])
U_train min:  tensor(0.) U_train max:  tensor(10798.)
U_valid min:  tensor(0.) U_valid max:  tensor(1774.)
U_test min:  tensor(0.) U_test max:  tensor(1352.)

Fold n°0

Init Dataset:  torch.Size([2383, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  95320 

>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([2287, 40, 7]) Utarget size:  torch.Size([2287, 40, 1])
U_train size:  torch.Size([1715,

  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)
