In [1]:
import pandas as pd
from utilities_DL import get_DataSet_and_invalid_dates,get_MultiModel_loss_args_emb_opts,load_init_trainer
from DL_class import MultiModelTrainer, Trainer
from config import get_args
from save_results import build_results_df
from paths import folder_path,file_name,get_save_directory
import time 
import torch
import argparse

from ray_config import get_ray_config
import ray 
from ray import tune 

# ==== GET PARAMETERS ====
# Load config
model_name = 'STGCN'  #'CNN'
args = get_args(model_name)
#args = get_args(model_name = model_name,learn_graph_structure = True)  # MTGNN

# Modification :
args.epochs = 2
args.K_fold = 6   # Means we will use the first fold for the Ray Tuning and the 5 other ones to get the metrics
if torch.cuda.is_available():
    args.device = 'cuda:0'
    args.batch_size = 256
else :
    args.device = 'cpu'
    args.batch_size = 32

args.single_station = True
args.ray = True

args.loss_function_type = 'quantile'  #'MSE' #

if args.loss_function_type == 'MSE':
    args.out_dim = 1
    args.alpha = None
    args.type_calendar = 'tuple'
    args.ray_track_pi = False

else:
    args.embedding_dim = 3
    args.calendar_class = 3
    args.position = 'input'
    args.specific_lr = False
    args.type_calendar = 'tuple'
    args.out_dim = 2
    args.alpha = 0.1
    args.ray_track_pi = True

## Search Space

In [2]:
config = {"lr": tune.qloguniform(1e-4, 1e-1, 5e-5),
          "weight_decay" : tune.uniform(0.0005, 0.1),
          "momentum" : tune.uniform(0.85, 0.99),
          "dropout" : tune.uniform(0,0.9),
        }

config_embedding = {#'calendar_class' : tune.choice([1,2,3]),
                    'embedding_dim' : tune.choice([2,3,4,5,6]),
                    'multi_embedding' : tune.choice([True,False]),
                    #'TE_transfer' : tune.choice([True,False]),
                    }


config_stgcn = {"Kt" : tune.choice([2,3,4]),
                "stblock_num" : tune.choice([1,2,3,4]),
                "act_fun" : tune.choice(['glu','gtu']),
                "Ks" :  tune.choice([2,3]),
                "graph_conv_type" : tune.choice(['cheb_graph_conv','graph_conv']),
                "gso_type" : tune.choice(['sym_norm_lap', 'rw_norm_lap', 'sym_renorm_adj', 'rw_renorm_adj']),
                "adj_type" : 'dist',
                }

if args.time_embedding:
    config.update(config_embedding)

if args.model_name == 'STGCN':
     config.update(config_stgcn)

## Hyper Parameter Tuning sur le Fold 0

In [6]:
def load_trainer(config,folder_path,file_name,args):

    for key, value in config.items():
        if hasattr(args, key):
            setattr(args, key, value)

    Datasets,DataLoader_list,dic_class2rpz,nb_words_embedding,time_slots_labels,dic_rpz2class = load_init_trainer(folder_path,file_name,args)
    (loss_function,Model_list,Optimizer_list,args_embedding) = get_MultiModel_loss_args_emb_opts(args,nb_words_embedding,dic_class2rpz,n_vertex = len(Datasets[0].columns))
    dataset,dataloader,model,optimizer = Datasets[0],DataLoader_list[0],Model_list[0],Optimizer_list[0]


    trainer = Trainer(dataset,model,dataloader,
                    args,optimizer,loss_function,scheduler = None,
                    args_embedding=args_embedding,
                    save_dir = None,dic_class2rpz=dic_class2rpz)
    return(trainer)

def Train_with_tune(config):
    trainer = load_trainer(config,folder_path,file_name,args)
    result_df = trainer.train_and_valid()



ray_scheduler,ray_search_alg,resources_per_trial,num_gpus,max_concurrent_trials = get_ray_config(args)


if ray.is_initialized:
    ray.shutdown()
    ray.init(num_gpus=num_gpus)

analysis = tune.run(
        Train_with_tune,
        config=config,
        num_samples=6,  # Increase num_samples for more random combinations
        resources_per_trial = resources_per_trial,
        max_concurrent_trials = max_concurrent_trials,
        scheduler = ray_scheduler,
        search_alg = ray_search_alg,
    )

2024-05-16 17:09:52,287	ERROR services.py:1207 -- Failed to start the dashboard , return code 0
2024-05-16 17:09:52,290	ERROR services.py:1232 -- Error should be written to 'dashboard.log' or 'dashboard.err'. We are printing the last 20 lines for you. See 'https://docs.ray.io/en/master/ray-observability/ray-logging.html#logging-directory-structure' to find where the log file is.
2024-05-16 17:09:52,308	ERROR services.py:1276 -- 
The last 20 lines of /tmp/ray/session_2024-05-16_17-09-50_068202_5397/logs/dashboard.log (it contains the error message from the dashboard): 
  File "/Users/romainrochas/opt/anaconda3/envs/tuning/lib/python3.11/site-packages/ray/dashboard/head.py", line 327, in run
    self.http_server = await self._configure_http_server(modules)
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/romainrochas/opt/anaconda3/envs/tuning/lib/python3.11/site-packages/ray/dashboard/head.py", line 158, in _configure_http_server
    http_server = HttpServ

0,1
Current time:,2024-05-16 17:10:32
Running for:,00:00:38.64
Memory:,8.3/16.0 GiB

Trial name,status,loc,Ks,Kt,act_fun,dropout,embedding_dim,graph_conv_type,gso_type,lr,momentum,multi_embedding,stblock_num,weight_decay,iter,total time (s)
Train_with_tune_5956c_00000,TERMINATED,127.0.0.1:5459,2,4,glu,0.168384,4,graph_conv,sym_renorm_adj,0.001,0.985297,False,4,0.0372033,1,28.5103
Train_with_tune_5956c_00001,TERMINATED,127.0.0.1:5460,3,4,glu,0.388887,5,cheb_graph_conv,sym_renorm_adj,0.0279,0.956231,False,3,0.00562806,1,28.2258
Train_with_tune_5956c_00002,TERMINATED,127.0.0.1:5461,3,3,glu,0.448697,3,graph_conv,sym_norm_lap,0.0003,0.890922,False,1,0.0401834,2,26.0946
Train_with_tune_5956c_00003,TERMINATED,127.0.0.1:5462,2,4,glu,0.419861,5,cheb_graph_conv,sym_norm_lap,0.00055,0.917278,True,2,0.0327489,2,29.635
Train_with_tune_5956c_00004,TERMINATED,127.0.0.1:5463,3,3,gtu,0.278855,2,graph_conv,sym_norm_lap,0.0587,0.954508,False,2,0.0400115,1,26.9145
Train_with_tune_5956c_00005,TERMINATED,127.0.0.1:5464,3,2,glu,0.663362,5,cheb_graph_conv,sym_norm_lap,0.00015,0.986806,True,4,0.00916009,1,28.2391


[2m[36m(Train_with_tune pid=5462)[0m coverage period: 2019-01-01 00:00:00 - 2020-01-01 00:00:00
[2m[36m(Train_with_tune pid=5462)[0m Time-step per hour: 4.0


[2m[36m(Train_with_tune pid=5462)[0m [0.00171721]
[2m[36m(Train_with_tune pid=5462)[0m not reaching the requested tolerance 5.960464477539062e-07.
[2m[36m(Train_with_tune pid=5462)[0m Use iteration 21 instead with accuracy 
[2m[36m(Train_with_tune pid=5462)[0m 0.0017172072956624291.
[2m[36m(Train_with_tune pid=5462)[0m 
[2m[36m(Train_with_tune pid=5462)[0m   _, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter,
[2m[36m(Train_with_tune pid=5462)[0m [0.00171721]
[2m[36m(Train_with_tune pid=5462)[0m not reaching the requested tolerance 5.960464477539062e-07.
[2m[36m(Train_with_tune pid=5462)[0m   _, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter,
[2m[36m(Train_with_tune pid=5462)[0m [0.00321617]
[2m[36m(Train_with_tune pid=5462)[0m not reaching the requested tolerance 5.960464477539062e-07.
[2m[36m(Train_with_tune pid=5462)[0m Use iteration 20 instead with accuracy 
[2m[36m(Train_with_tune pid=5462)[0m 0.0026350128453127516.
[2m[36m(Tr

[2m[36m(Train_with_tune pid=5459)[0m start training
[2m[36m(Train_with_tune pid=5464)[0m coverage period: 2019-01-01 00:00:00 - 2020-01-01 00:00:00[32m [repeated 5x across cluster][0m
[2m[36m(Train_with_tune pid=5464)[0m Time-step per hour: 4.0[32m [repeated 5x across cluster][0m
[2m[36m(Train_with_tune pid=5461)[0m Proportion of label with quantile order set to 1: 26.8%


Trial name,_metric
Train_with_tune_5956c_00000,"{'Loss_model': 0.03162562436172887, 'MPIW': -10.806896209716797, 'PICP': 0.4439484126984127}"
Train_with_tune_5956c_00001,"{'Loss_model': 0.03088225501160773, 'MPIW': -20.088762283325195, 'PICP': 0.14484126984126985}"
Train_with_tune_5956c_00002,"{'Loss_model': 0.015297703563220917, 'MPIW': -26.507434844970703, 'PICP': 0.08928571428571429}"
Train_with_tune_5956c_00003,"{'Loss_model': 0.014393046734825013, 'MPIW': -13.358352661132812, 'PICP': 0.2619047619047619}"
Train_with_tune_5956c_00004,"{'Loss_model': 0.029725275312860806, 'MPIW': -6.498607635498047, 'PICP': 0.25}"
Train_with_tune_5956c_00005,"{'Loss_model': 0.05281252756951347, 'MPIW': -161.0552520751953, 'PICP': 0.037698412698412696}"


[2m[36m(Train_with_tune pid=5464)[0m start training[32m [repeated 5x across cluster][0m
[2m[36m(Train_with_tune pid=5462)[0m Proportion of label with quantile order set to 1: 28.0%[32m [repeated 7x across cluster][0m


2024-05-16 17:10:32,322	INFO tune.py:1148 -- Total run time: 38.66 seconds (38.64 seconds for the tuning loop).


In [7]:
analysis.dataframe().sort_values('_metric/Loss_model')

Unnamed: 0,time_this_iter_s,done,training_iteration,trial_id,date,timestamp,time_total_s,pid,hostname,node_ip,...,config/dropout,config/embedding_dim,config/graph_conv_type,config/gso_type,config/lr,config/momentum,config/multi_embedding,config/stblock_num,config/weight_decay,logdir
3,3.632186,True,2,5956c_00003,2024-05-16_17-10-32,1715872232,29.635034,5462,mbro-21-005,127.0.0.1,...,0.419861,5,cheb_graph_conv,sym_norm_lap,0.00055,0.917278,True,2,0.032749,/Users/romainrochas/ray_results/Train_with_tun...
2,2.410849,True,2,5956c_00002,2024-05-16_17-10-28,1715872228,26.094634,5461,mbro-21-005,127.0.0.1,...,0.448697,3,graph_conv,sym_norm_lap,0.0003,0.890922,False,1,0.040183,/Users/romainrochas/ray_results/Train_with_tun...
4,26.914531,True,1,5956c_00004,2024-05-16_17-10-29,1715872229,26.914531,5463,mbro-21-005,127.0.0.1,...,0.278855,2,graph_conv,sym_norm_lap,0.0587,0.954508,False,2,0.040012,/Users/romainrochas/ray_results/Train_with_tun...
1,28.225816,True,1,5956c_00001,2024-05-16_17-10-30,1715872230,28.225816,5460,mbro-21-005,127.0.0.1,...,0.388887,5,cheb_graph_conv,sym_renorm_adj,0.0279,0.956231,False,3,0.005628,/Users/romainrochas/ray_results/Train_with_tun...
0,28.510311,True,1,5956c_00000,2024-05-16_17-10-31,1715872231,28.510311,5459,mbro-21-005,127.0.0.1,...,0.168384,4,graph_conv,sym_renorm_adj,0.001,0.985297,False,4,0.037203,/Users/romainrochas/ray_results/Train_with_tun...
5,28.23911,True,1,5956c_00005,2024-05-16_17-10-31,1715872231,28.23911,5464,mbro-21-005,127.0.0.1,...,0.663362,5,cheb_graph_conv,sym_norm_lap,0.00015,0.986806,True,4,0.00916,/Users/romainrochas/ray_results/Train_with_tun...


## Choix des hyperparamer en fonction du Tuning. Puis Cross Validation sur les 5 Fold Restant

In [None]:
# Refaire l'initialisation avec les args adapté .... 
#
#
#
args =  ... 
results_df = pd.DataFrame()
save_dir = get_save_directory(args)

multimodeltrainer = MultiModelTrainer(Datasets,Model_list,DataLoader_list,args,Optimizer_list,loss_function,scheduler = None,args_embedding=args_embedding,ray= False,save_dir = save_dir,dic_class2rpz=dic_class2rpz)

(results_by_fold,mean_picp,mean_mpiw,dict_last_from_mean_of_folds,dict_best_from_mean_of_folds) = multimodeltrainer.K_fold_validation(mod_plot = 10)
results_by_fold.to_csv(f"{save_dir}results_by_fold.csv")

# Svae results 
results_df = build_results_df(results_df,args, mean_picp,mean_mpiw,dict_last_from_mean_of_folds,dict_best_from_mean_of_folds)
results_df.to_csv(f"{args.model_name}_{args.loss_function_type}_H{args.H}_D{args.D}_W{args.W}_E{args.epochs}_K_fold{args.K_fold}_Emb_dim{args.embedding_dim}FC1_17_8_FC2_8_4_save_results.csv")

# Get Parameters : 

In [None]:
# Load config
model_name = 'STGCN' #'CNN' 
args = get_args(model_name)
#args = get_args(model_name = model_name,learn_graph_structure = True)  # MTGNN

# Modification : 
args.epochs = 1
args.K_fold = 1

args.loss_function_type = 'MSE'
args.out_dim = 1
args.alpha = None
# Save Directory:
main_dir = get_save_directory(args)

## Define fixed Dataset K_fold split for each trial: 

In [None]:
# Load dataset and invalid_dates
dataset,invalid_dates = get_DataSet_and_invalid_dates(folder_path,file_name,args.W,args.D,args.H,args.step_ahead,single_station = False)

# Train / Valid / Test split and Normalize for K-fold 
(Datasets,DataLoader_list,time_slots_labels_list,dic_class2rpz,dic_rpz2class,nb_words_embedding) =  dataset.split_K_fold(args,invalid_dates)

# Plot information about split and folds:
plot_k_fold_split(Datasets,invalid_dates)

# Test complet sur les 3 Top STGCN, avec sauvegarde du model :

In [None]:
results_df = pd.DataFrame()

#for i,(calendar_class,position,specific_lr, type_calendar) in enumerate(zip([3,1,3],['input','input','input'],[True,True,False],['tuple','tuple','tuple'])):
for i,(calendar_class,position,specific_lr, type_calendar) in enumerate(zip([3],['input'],[True],['tuple'])):

    args.calendar_class = calendar_class
    args.position = position
    args.specific_lr = specific_lr
    args.type_calendar = type_calendar

    save_dir = get_save_directory(args)

    # Load dataset and invalid_dates 
    dataset,invalid_dates = get_DataSet_and_invalid_dates(folder_path,file_name,args.W,args.D,args.H,args.step_ahead,single_station = False)
    (Datasets,DataLoader_list,time_slots_labels,dic_class2rpz,dic_rpz2class,nb_words_embedding) =  dataset.split_K_fold(args,invalid_dates)
    
    # Load associated K_folds Models: 
    (loss_function,Model_list,Optimizer_list,args_embedding) = get_MultiModel_loss_args_emb_opts(args,nb_words_embedding,dic_class2rpz)
    multimodeltrainer = MultiModelTrainer(Datasets,Model_list,DataLoader_list,args,Optimizer_list,loss_function,scheduler = None,args_embedding=args_embedding,ray= False,save_dir = save_dir,dic_class2rpz=dic_class2rpz)
    (results_by_fold,mean_picp,mean_mpiw,dict_last,dict_scores) = multimodeltrainer.K_fold_validation(mod_plot = 1)
    results_by_fold.to_csv(f"{save_dir}results_by_fold.csv")
    
    # Svae results 
    results_df = build_results_df(results_df,args, mean_picp,mean_mpiw,dict_last,dict_scores)
                            
results_df.to_csv('save_results.csv')
results_df

In [None]:
# ==== GET PARAMETERS ====
# Load config
model_name = 'STGCN' #'CNN' 
args = get_args(model_name)
#args = get_args(model_name = model_name,learn_graph_structure = True)  # MTGNN

# Modification : 
args.epochs = 300

# Save Directory:
main_dir = get_save_directory(args)
args.H = 0
args.W = 0
args.D = 0
args.L =args.H+args.W+args.D
args.single_station = True
# ==== TEST  ====
for K_fold in [5]:
    args.K_fold = K_fold
    results_df = pd.DataFrame()

    #for i,(calendar_class,position,specific_lr, type_calendar) in enumerate(zip([3,1,3],['input','input','input'],[True,True,False],['tuple','tuple','tuple'])):
    #for i,(calendar_class,position,specific_lr, type_calendar) in enumerate(zip([3],['input'],[True],['tuple'])):
    for i,(embedding_dim,calendar_class,position,specific_lr, type_calendar,time_embedding) in enumerate(zip([3], # None
                                                                                                             [3], # 3
                                                                                               ['input'], # None
                                                                                               [False], # None 
                                                                                               ['tuple'], # None
                                                                                               [True] # False
                                                                                              )
                                                                                            ):
        args.embedding_dim = embedding_dim
        args.calendar_class = calendar_class
        args.position = position
        args.specific_lr = specific_lr
        args.type_calendar = type_calendar
        args.time_embedding = time_embedding

        save_dir = get_save_directory(args)

        # Load dataset and invalid_dates 
        dataset,invalid_dates = get_DataSet_and_invalid_dates(folder_path,file_name,args.W,args.D,args.H,args.step_ahead,single_station = args.single_station)
        (Datasets,DataLoader_list,time_slots_labels,dic_class2rpz,dic_rpz2class,nb_words_embedding) =  dataset.split_K_fold(args,invalid_dates)

        # Load associated K_folds Models: 
        (loss_function,Model_list,Optimizer_list,args_embedding) = get_MultiModel_loss_args_emb_opts(args,nb_words_embedding,dic_class2rpz)
        multimodeltrainer = MultiModelTrainer(Datasets,Model_list,DataLoader_list,args,Optimizer_list,loss_function,scheduler = None,args_embedding=args_embedding,ray= False,save_dir = save_dir,dic_class2rpz=dic_class2rpz)
        
        (results_by_fold,mean_picp,mean_mpiw,dict_last_from_mean_of_folds,dict_best_from_mean_of_folds) = multimodeltrainer.K_fold_validation(mod_plot = 10)
        results_by_fold.to_csv(f"{save_dir}results_by_fold.csv")

        # Svae results 
        results_df = build_results_df(results_df,args, mean_picp,mean_mpiw,dict_last_from_mean_of_folds,dict_best_from_mean_of_folds)
        results_df.to_csv(f"{args.model_name}_H{args.H}_D{args.D}_W{args.W}_E{args.epochs}_K_fold{args.K_fold}_Emb_dim{args.embedding_dim}FC1_17_8_FC2_8_4_save_results.csv")