In [3]:
# GET PARAMETERS
from examples.benchmark import local_get_args,get_inputs,train_on_ds,keep_track_on_model_metrics,get_trial_id
import sys
import os
import pandas as pd

from plotting.TS_analysis import drag_selection_box,plot_single_point_prediction,plot_prediction_error,plot_loss_from_trainer,plot_TS
from build_inputs.preprocess_subway_15 import get_trigram_correspondance
from bokeh.plotting import show,output_notebook
from bokeh.layouts import column,row
from utils.specific_event import rugby_matches
import geopandas as gpd
# Get Parent folder : 
current_path = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_path, '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)


#### Specify the spatial unit we want to evaluate 
# Parameter of the visualisation : 
range = 3*60  # +/- range (min) supposed to be affected around the event 
width = 1000
height = 600
min_flow = 20

# Set the spatial unit we would like to evaluate :
station = 'BON'  # 'BON'  #'GER'

# Set the df containing at list the 
df_correspondance = get_trigram_correspondance()
Metro_B_stations = ['Charpennes','Brotteaux','Part-Dieu' ,'Place Guichard', 'Saxe - Gambetta',
       'Jean Macé','Place Jean Jaurès','Debourg','Stade de Gerland',"Gare d'Oullins"]
Metro_B_TRG = list(df_correspondance.set_index('Station').loc[Metro_B_stations].values.reshape(-1))


def netmob_volume_on_POI(gdf_POI_2_tile_ids,app = 'Instagram',transfer_mode = 'DL',type_POI = 'stadium', spatial_unit = 'Lou_rugby',POI_or_station='POI',expanded='', folder_path= '../../../../data/rrochas/prediction_validation'):

    gdf_obj = gdf_POI_2_tile_ids[(gdf_POI_2_tile_ids['tag'] == type_POI) &
                    (gdf_POI_2_tile_ids['name'] == spatial_unit ) & 
                    (gdf_POI_2_tile_ids['type'] == f"{POI_or_station}{expanded}")
    ]
    assert len(gdf_obj) == 1, f"Length of gdf = {len(gdf_obj)} while it should be = 1"

    osmid = gdf_obj['id'].values[0]
    path_df = f"{folder_path}/POIs/netmob_POI_Lyon{expanded}/{type_POI}/{app}/df_{osmid}_{transfer_mode}.csv"
    serie = pd.read_csv(path_df,index_col = 0).sum(axis=1)
    serie.index = pd.to_datetime(serie.index)
    return(serie)

def evaluate_config(model_name,dataset_names,dataset_for_coverage,vision_model_name,transfer_modes= None,
                    type_POIs = ['stadium','nightclub'],
                    spatial_units = ['Lou_rugby','Ninkasi_Kao'],
                    apps = ['Instagram'],
                    POI_or_stations = ['POI'],
                    expanded =''
                    ):
    ds,args,trial_id,save_folder,dic_class2rpz,df_loss = get_ds(model_name,dataset_names,dataset_for_coverage,vision_model_name=vision_model_name)
    trainer,df_loss = train_on_ds(model_name,ds,args,trial_id,save_folder,dic_class2rpz,df_loss)

    # Allow us to have 'dataloader['train'] with no shuffle !!!!
    # ======
    modification = {'shuffle':False }
    ds_no_shuffle,args_no_shuffle,trial_id,save_folder,dic_class2rpz,df_loss =  get_ds(model_name,dataset_names,
                                                                            dataset_for_coverage,vision_model_name = vision_model_name, 
                                                                            modification = modification)

    trainer.dataloader = ds_no_shuffle.dataloader
    # ======

    # Load gdf for POIs:
    folder_path= '../../../../data/rrochas/prediction_validation'
    gdf_POI_2_tile_ids = gpd.read_file(f"{folder_path}/POIs/gdf_POI_2_tile_ids.geojson")

    analysis_on_specific_training_mode(trainer,ds_no_shuffle,gdf_POI_2_tile_ids,training_mode='test',transfer_modes= transfer_modes,type_POIs = type_POIs,spatial_units=spatial_units,apps=apps,POI_or_stations = POI_or_stations,expanded=expanded)
    analysis_on_specific_training_mode(trainer,ds_no_shuffle,gdf_POI_2_tile_ids,training_mode='valid',transfer_modes= transfer_modes,type_POIs = type_POIs,spatial_units=spatial_units,apps=apps,POI_or_stations = POI_or_stations,expanded=expanded)
    analysis_on_specific_training_mode(trainer,ds_no_shuffle,gdf_POI_2_tile_ids,training_mode='train',transfer_modes= transfer_modes,type_POIs = type_POIs,spatial_units=spatial_units,apps=apps,POI_or_stations = POI_or_stations,expanded=expanded)
    return(trainer,ds,ds_no_shuffle,args)

def analysis_on_specific_training_mode(trainer,ds,gdf_POI_2_tile_ids,training_mode,transfer_modes= None,
                                       type_POIs = ['stadium','nightclub'],
                                       spatial_units = ['Lou_rugby','Ninkasi_Kao'],
                                       apps = ['Instagram'],
                                       POI_or_stations = ['POI'],
                                       expanded = ''
                                       ):
    Preds,Y_true,T_labels = trainer.testing(ds.normalizer, training_mode =training_mode)
    df_true,df_prediction = get_df_for_visualisation(ds,Preds,Y_true,training_mode)
    kick_off_time,match_times = rugby_matches(df_true.index,range)

    if apps is not None : 
        netmob_consumption = pd.DataFrame(index = df_true.index)
        for app in apps:
            for type_POI,spatial_unit,POI_or_station in zip(type_POIs,spatial_units,POI_or_stations):
                for transfer_mode in transfer_modes:
                    serie_netmob = netmob_volume_on_POI(gdf_POI_2_tile_ids,app,transfer_mode,type_POI,spatial_unit,POI_or_station,expanded)
                    serie_netmob = serie_netmob.loc[df_true.index]

                    # norm_series :
                    serie_netmob = (serie_netmob-serie_netmob.min())/(serie_netmob.max()-serie_netmob.min())
                    
                    name_netmob_serie = f"{app}_{transfer_mode} at {spatial_unit}"

                    netmob_consumption[name_netmob_serie] = serie_netmob
        netmob_consumption['Sum_of_apps'] = netmob_consumption.sum(axis=1)/len(netmob_consumption.columns)

    visualisation_special_event(trainer,df_true,df_prediction,station,kick_off_time,range,width,height,min_flow,training_mode = training_mode,netmob_consumption = netmob_consumption)

# Get df_True Volume: 
def get_df_for_visualisation(ds,Preds,Y_true,training_mode, #Metro_B_TRG,
                             ):
       '''
       outputs:
       --------
       return 2 pd DataFrame : df_true and df_prediction
       >>>> the DataFrames contains the unormalized predicted and real value  
       '''
       #spatial_units = ds.spatial_unit[ds.spatial_unit.isin(Metro_B_TRG)]
       #metro_b_indices = list(spatial_units.index)
       df_verif = getattr(ds.tensor_limits_keeper,f"df_verif_{training_mode}")

       # Get df True Volume and df Predicted Volume : 
       df_true = pd.DataFrame(Y_true[:,list(ds.spatial_unit.index),0],columns = ds.spatial_unit.values,index = df_verif.iloc[:,-1])
       df_prediction = pd.DataFrame(Preds[:,list(ds.spatial_unit.index),0],columns = ds.spatial_unit.values,index = df_verif.iloc[:,-1])
       
       #df_true = pd.DataFrame(Y_true[:,metro_b_indices,0],columns = spatial_units.values,index = df_verif.iloc[:,-1])
       #df_prediction = pd.DataFrame(Preds[:,metro_b_indices,0],columns = spatial_units.values,index = df_verif.iloc[:,-1])
       return(df_true,df_prediction)


def visualisation_special_event(trainer,df_true,df_prediction,station,kick_off_time,range,width,height,min_flow,training_mode,netmob_consumption):
    ''' Specific interactiv visualisation for Prediction, True Value, Error and loss function '''
    p1 = plot_single_point_prediction(df_true,df_prediction,station,title= f'{training_mode} Trafic Volume Prediction around at "Stade du Lou Gerland" subway station ',kick_off_time=kick_off_time, range=range,width=width,height = height,bool_show = False)
    p2 = plot_TS(netmob_consumption,width=width,height=height,bool_show=False) if netmob_consumption is not None else None
    p3 = plot_prediction_error(df_true,df_prediction,station,metrics =['mae','mse','mape'],title = 'Prediction Error',width=width,height=height,bool_show=False,min_flow = min_flow)

    select = drag_selection_box(df_true,p1,p2,p3,width=width,height=height//3)
    output_notebook()
    col1 = column(p1,p2,p3,select)

    col2 = plot_loss_from_trainer(trainer,width=width//3,height=height,bool_show=False)
    grid = row(col1,col2)

    show(grid)

def get_ds(model_name,dataset_names,dataset_for_coverage,vision_model_name = None, 
           modification = {'epochs' : 3 #100
                           }
            ):
    save_folder = None
    df_loss,df_results = pd.DataFrame(),pd.DataFrame()


    # Tricky but here we net to set 'netmob' so that we will use the same period for every combination
    args,folds,hp_tuning_on_first_fold = local_get_args(model_name,
                                                        args_init=None,
                                                            dataset_names=dataset_names,
                                                            dataset_for_coverage=dataset_for_coverage,
                                                            modification = modification)
    trial_id = get_trial_id(args,vision_model_name=vision_model_name)
    K_fold_splitter,K_subway_ds,dic_class2rpz = get_inputs(args,vision_model_name,folds)
    ds = K_subway_ds[0]
    return(ds,args,trial_id,save_folder,dic_class2rpz,df_loss)

## Evaluate on non recurrent event: 
### Visualisation: 
#### Cas `subway_in` stade du Lou Gerland avec un match de Rugby : 

In [4]:
dataset_names = ["subway_in"] # ["subway_in","calendar"] # ["subway_in"] # ['data_bidon'] # ['METR_LA'] # ['PEMS_BAY']
dataset_for_coverage = ['subway_in','netmob_image_per_station'] #  ['data_bidon','netmob'] #  ['subway_in','netmob']  # ['METR_LA'] # ['PEMS_BAY']
model_name = 'STGCN'
vision_model_name =  None

station = 'GER'  # 'BON'  #'GER'
apps = ['Instagram','Twitter','Google_Maps'] # 'Instagram'  # 'Twitter' # 'Google_Maps' # 'Facebook'


transfer_modes = ['DL'] # ['DL'] # ['UL'] # ['DL','UL']
type_POIs = ['stadium','station'] #['stadium','nightclub'] #['stadium']
spatial_units = ['Matmut Stadium Gerland','GER']  #spatial_units = ['Lou_rugby']  # ['Astroballe'] #['Lou_rugby','Ninkasi_Kao'] #['Lou_rugby'] #['Ninkasi_Kao'] 
expanded = '_expanded' # '_expanded' # ''
POI_or_stations = ['POI','station'] # 'station'

(trainer,ds,ds_no_shuffle,args) = evaluate_config(model_name,dataset_names,dataset_for_coverage,vision_model_name,transfer_modes= transfer_modes,
                                                   type_POIs = type_POIs,spatial_units = spatial_units,apps = apps,POI_or_stations = POI_or_stations,expanded=expanded)

40
>>>>Model: STGCN; K_fold = 6; Loss function: MSE 

Init Dataset:  torch.Size([7392, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  295680 

>>>> Train/Valid/Test split method : similar_length_method



U size:  torch.Size([6238, 40, 7]) Utarget size:  torch.Size([6238, 40, 1])
U_train size:  torch.Size([3742, 40, 7]) Utarget_train size:  torch.Size([3742, 40, 1])
U_valid size:  torch.Size([1248, 40, 7]) Utarget_valid size:  torch.Size([1248, 40, 1])
U_test size:  torch.Size([1247, 40, 7]) Utarget_test size:  torch.Size([1247, 40, 1])
U_train min:  tensor(0.) U_train max:  tensor(10798.)
U_valid min:  tensor(0.) U_valid max:  tensor(1405.)
U_test min:  tensor(0.) U_test max:  tensor(1352.)

Fold n°0

Init Dataset:  torch.Size([2174, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  86960 

>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([2078, 40, 7]) Utarget size:  torch.Size([2078, 40, 1])
U_train size:  torch.Size([1558, 40, 7]) Utarget_train size:  torch.Size([1558, 40, 1])
U_valid size:  torch.Size([519, 40, 7]) Utarget_valid size:  torch.Size([519, 40, 1])
U_train min:  tensor(0.) U_train max:  tensor(1.)
U_valid min:  tensor(0.) U_

  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


#### Cas `subway_out` stade du Lou Gerland avec un match de Rugby : 

#### Cas `subway_in` Laurent Bonnevay Astroballe avec un match de Basket : 

#### Cas `subway_out` Laurent Bonnevay Astroballe avec un match de Basket : 

## Maintenant on va intégrer les données NetMob et voir si elles permettent de réduire l'erreur de prédiction :

In [12]:
import os 
import pickle 
import pandas as pd 

trial_id = 'subway_in_STGCN_MSELoss_2024_12_03_15_38_74185'
path_csv = f'save/HyperparameterTuning/{trial_id}.csv'
path_pickle = 'save/HyperparameterTuning/model_args.pkl'
df_hp_tuning = pd.read_csv(path_csv)
model_args = pickle.load(open(path_pickle,'rb'))

In [13]:
metric = '_metric/Loss_model'
best_model = df_hp_tuning.sort_values(metric).iloc[0]
HP_args = [indx.replace('config/', '') for indx in best_model.index if 'config/' in indx]
args = model_args['model'][trial_id]['args']

In [12]:
dataset_names = ["subway_in","netmob_image_per_station"] # ["subway_in","calendar"] # ["subway_in"] # ['data_bidon'] # ['METR_LA'] # ['PEMS_BAY']
dataset_for_coverage = ['subway_in','netmob_image_per_station'] #  ['data_bidon','netmob'] #  ['subway_in','netmob']  # ['METR_LA'] # ['PEMS_BAY']
model_name = 'STGCN'
vision_model_name =  'ImageAvgPooling'

(trainer,ds,ds_no_shuffle,args) = evaluate_config(model_name,dataset_names,dataset_for_coverage,vision_model_name)

40
>>>>Model: STGCN; K_fold = 6; Loss function: MSE 

Init Dataset:  torch.Size([7392, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  295680 



>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6238, 40, 7]) Utarget size:  torch.Size([6238, 40, 1])
U_train size:  torch.Size([3742, 40, 7]) Utarget_train size:  torch.Size([3742, 40, 1])
U_valid size:  torch.Size([1248, 40, 7]) Utarget_valid size:  torch.Size([1248, 40, 1])
U_test size:  torch.Size([1247, 40, 7]) Utarget_test size:  torch.Size([1247, 40, 1])
U_train min:  tensor(0.) U_train max:  tensor(10798.)
U_valid min:  tensor(0.) U_valid max:  tensor(1405.)
U_test min:  tensor(0.) U_test max:  tensor(1352.)
Transfer Modes: DL

Init NetMob Dataset:  torch.Size([7392, 40, 1, 6, 6])
Number of Nan Value:  tensor(0)
Total Number of Elements:  10644480 

>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6238, 40, 1, 6, 6, 7]) Utarget size:  torch.Size([6238, 40, 1, 6, 6, 1])
U_train size:  torch.Size([3742, 40, 1, 6, 6, 7]) Utarget_train size:  torch.Size([3742, 40, 1, 6, 6, 1])
U_valid size:  torch.Size([1248, 40, 1, 

  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


#### Prediction avec uniquement NetMob, sans subway-in: 

In [10]:
dataset_names = ["netmob_image_per_station"] # ["subway_in","calendar"] # ["subway_in"] # ['data_bidon'] # ['METR_LA'] # ['PEMS_BAY']
dataset_for_coverage = ['netmob_image_per_station'] #  ['data_bidon','netmob'] #  ['subway_in','netmob']  # ['METR_LA'] # ['PEMS_BAY']
model_name = 'STGCN'
vision_model_name =  'ImageAvgPooling'

(trainer,ds,ds_no_shuffle,args) = evaluate_config(model_name,dataset_names,dataset_for_coverage,vision_model_name)

40
>>>>Model: STGCN; K_fold = 6; Loss function: MSE 

Init Dataset:  torch.Size([7392, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  295680 



>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6815, 40, 7]) Utarget size:  torch.Size([6815, 40, 1])
U_train size:  torch.Size([4089, 40, 7]) Utarget_train size:  torch.Size([4089, 40, 1])
U_valid size:  torch.Size([1363, 40, 7]) Utarget_valid size:  torch.Size([1363, 40, 1])
U_test size:  torch.Size([1362, 40, 7]) Utarget_test size:  torch.Size([1362, 40, 1])
U_train min:  tensor(0.) U_train max:  tensor(10798.)
U_valid min:  tensor(0.) U_valid max:  tensor(1774.)
U_test min:  tensor(0.) U_test max:  tensor(1352.)
Transfer Modes: DL

Init NetMob Dataset:  torch.Size([7392, 40, 1, 6, 6])
Number of Nan Value:  tensor(0)
Total Number of Elements:  10644480 

>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6815, 40, 1, 6, 6, 7]) Utarget size:  torch.Size([6815, 40, 1, 6, 6, 1])
U_train size:  torch.Size([4089, 40, 1, 6, 6, 7]) Utarget_train size:  torch.Size([4089, 40, 1, 6, 6, 1])
U_valid size:  torch.Size([1363, 40, 1, 

  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


#### Prediction avec uniquement Calendar, sans subway-in: 

In [11]:
dataset_names = ["calendar"] # ["subway_in","calendar"] # ["subway_in"] # ['data_bidon'] # ['METR_LA'] # ['PEMS_BAY']
dataset_for_coverage = ['netmob_image_per_station'] #  ['data_bidon','netmob'] #  ['subway_in','netmob']  # ['METR_LA'] # ['PEMS_BAY']
model_name = 'STGCN'
vision_model_name =  None

(trainer,ds,ds_no_shuffle,args) = evaluate_config(model_name,dataset_names,dataset_for_coverage,vision_model_name)

40
>>>>Model: STGCN; K_fold = 6; Loss function: MSE 

Init Dataset:  torch.Size([7392, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  295680 



>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([6815, 40, 7]) Utarget size:  torch.Size([6815, 40, 1])
U_train size:  torch.Size([4089, 40, 7]) Utarget_train size:  torch.Size([4089, 40, 1])
U_valid size:  torch.Size([1363, 40, 7]) Utarget_valid size:  torch.Size([1363, 40, 1])
U_test size:  torch.Size([1362, 40, 7]) Utarget_test size:  torch.Size([1362, 40, 1])
U_train min:  tensor(0.) U_train max:  tensor(10798.)
U_valid min:  tensor(0.) U_valid max:  tensor(1774.)
U_test min:  tensor(0.) U_test max:  tensor(1352.)

Fold n°0

Init Dataset:  torch.Size([2383, 40])
Number of Nan Value:  tensor(0)
Total Number of Elements:  95320 

>>>> Train/Valid/Test split method : similar_length_method

U size:  torch.Size([2287, 40, 7]) Utarget size:  torch.Size([2287, 40, 1])
U_train size:  torch.Size([1715, 40, 7]) Utarget_train size:  torch.Size([1715, 40, 1])
U_valid size:  torch.Size([571, 40, 7]) Utarget_valid size:  torch.Size([571, 40, 1])
U_train min:  tens

  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)


  real = torch.tensor(real).reshape(-1)
  predict = torch.tensor(predict).reshape(-1)
