In [1]:
import pandas as pd
from utilities_DL import get_DataSet_and_invalid_dates,get_MultiModel_loss_args_emb_opts,load_init_trainer
from DL_class import MultiModelTrainer, Trainer
from config import get_args
from save_results import build_results_df
from paths import folder_path,file_name,get_save_directory
import time 
import torch
import argparse

import ray 
from ray import tune 
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.hyperopt import HyperOptSearch
# ==== GET PARAMETERS ====
# Load config
model_name = 'STGCN'  #'CNN'
args = get_args(model_name)
#args = get_args(model_name = model_name,learn_graph_structure = True)  # MTGNN

# Modification :
args.epochs = 5
args.K_fold = 6   # Means we will use the first fold for the Ray Tuning and the 5 other ones to get the metrics
if torch.cuda.is_available():
    args.device = 'cuda:0'
    args.batch_size = 256
else :
    args.device = 'cpu'
    args.batch_size = 32

args.single_station = True
args.ray = True

args.loss_function_type = 'quantile' # #'MSE'

if args.loss_function_type == 'MSE':
    args.out_dim = 1
    args.alpha = None
    args.type_calendar = 'tuple'
    args.ray_track_pi = False

else:
    args.embedding_dim = 3
    args.calendar_class = 3
    args.position = 'input'
    args.specific_lr = False
    args.type_calendar = 'tuple'
    args.out_dim = 2
    args.alpha = 0.1
    args.ray_track_pi = True


critical_keys = ['train_prop','valid_prop','test_prop','calib_prop',
                 'batch_size','calendar_class','validation','K_fold',
              'W','D','H','step_ahead','single_station'] 

## Search Space

In [2]:
config = {"lr": tune.qloguniform(1e-4, 1e-1, 5e-5),
          "weight_decay" : tune.uniform(0.0005, 0.1),
          "momentum" : tune.uniform(0.85, 0.99),
          "dropout" : tune.uniform(0,0.9),
        }

config_embedding = {#'calendar_class' : tune.choice([1,2,3]),
                    'embedding_dim' : tune.choice([2,3,4,5,6]),
                    'multi_embedding' : tune.choice([True,False]),
                    'TE_transfer' : tune.choice([True,False]),
                    }


config_stgcn = {"Kt" : tune.choice([2,3,4]),
                "stblock_num" : tune.choice([1,2,3,4]),
                "act_fun" : tune.choice(['glu','gtu']),
                "Ks" :  tune.choice([2,3]),
                "graph_conv_type" : tune.choice(['cheb_graph_conv','graph_conv']),
                "gso_type" : tune.choice(['sym_norm_lap', 'rw_norm_lap', 'sym_renorm_adj', 'rw_renorm_adj']),
                "adj_type" : 'dist',
                }

if args.time_embedding:
    config.update(config_embedding)

if args.model_name == 'STGCN':
     config.update(config_stgcn)

In [3]:
def load_trainer(config,folder_path,file_name,args,dic_class2rpz,Datasets,DataLoader_list,nb_words_embedding,time_slots_labels,dic_rpz2class):
    for key, value in config.items():
        if hasattr(args, key):
            setattr(args, key, value)
            
    # long (5s), donc on évite de le relancer à chaque fois si pas nécessaire
    if sum([key in critical_keys for key in  config.keys()]) > 0 : 
        print("Need to run 'load_init_trainer' at each iteragtion, which may take some time")
        Datasets,DataLoader_list,dic_class2rpz,nb_words_embedding,time_slots_labels,dic_rpz2class = load_init_trainer(folder_path,file_name,args)
        
    (loss_function,Model_list,Optimizer_list,args_embedding) = get_MultiModel_loss_args_emb_opts(args,nb_words_embedding,dic_class2rpz,n_vertex = len(Datasets[0].columns))
    dataset,dataloader,model,optimizer = Datasets[0],DataLoader_list[0],Model_list[0],Optimizer_list[0]

    trainer = Trainer(dataset,model,dataloader,
                    args,optimizer,loss_function,scheduler = None,
                    args_embedding=args_embedding,
                    save_dir = None,dic_class2rpz=dic_class2rpz)
    return(trainer)

In [3]:
def load_trainer(config,folder_path,file_name,args,dic_class2rpz,Datasets,DataLoader_list,nb_words_embedding,time_slots_labels,dic_rpz2class):
    if False : 
        for key, value in config.items():
            if hasattr(args, key):
                setattr(args, key, value)

    #args = config['args']
    args.lr = config['lr']
            
    # long (5s), donc on évite de le relancer à chaque fois si pas nécessaire
    if sum([key in critical_keys for key in  config.keys()]) > 0 : 
        print("Need to run 'load_init_trainer' at each iteragtion, which may take some time")
        Datasets,DataLoader_list,dic_class2rpz,nb_words_embedding,time_slots_labels,dic_rpz2class = load_init_trainer(folder_path,file_name,args)
        
    (loss_function,Model_list,Optimizer_list,args_embedding) = get_MultiModel_loss_args_emb_opts(args,nb_words_embedding,dic_class2rpz,n_vertex = len(Datasets[0].columns))
    dataset,dataloader,model,optimizer = Datasets[0],DataLoader_list[0],Model_list[0],Optimizer_list[0]

    trainer = Trainer(dataset,model,dataloader,
                    args,optimizer,loss_function,scheduler = None,
                    args_embedding=args_embedding,
                    save_dir = None,dic_class2rpz=dic_class2rpz)
    return(trainer)

def Train_with_tune(config):
    trainer = load_trainer(config,folder_path,file_name,args,dic_class2rpz,Datasets,DataLoader_list,nb_words_embedding,time_slots_labels,dic_rpz2class)
    result_df = trainer.train_and_valid()

config = {"lr": tune.qloguniform(1e-4, 1e-1, 5e-5)}
#config.update({'args':args})

In [5]:
Datasets,DataLoader_list,dic_class2rpz,nb_words_embedding,time_slots_labels,dic_rpz2class = load_init_trainer(folder_path,file_name,args)

ray.shutdown()
ray.init()

analysis = tune.run(
        Train_with_tune,
        config=config,
        num_samples=100,  # Increase num_samples for more random combinations
    )


coverage period: 2019-01-01 00:00:00 - 2020-01-01 00:00:00
Time-step per hour: 4.0


2024-05-14 19:36:38,540	ERROR services.py:1207 -- Failed to start the dashboard , return code 0
2024-05-14 19:36:38,543	ERROR services.py:1232 -- Error should be written to 'dashboard.log' or 'dashboard.err'. We are printing the last 20 lines for you. See 'https://docs.ray.io/en/master/ray-observability/ray-logging.html#logging-directory-structure' to find where the log file is.
2024-05-14 19:36:38,571	ERROR services.py:1276 -- 
The last 20 lines of /tmp/ray/session_2024-05-14_19-36-36_021225_14037/logs/dashboard.log (it contains the error message from the dashboard): 
  File "/Users/romainrochas/opt/anaconda3/envs/tuning/lib/python3.11/site-packages/ray/dashboard/head.py", line 327, in run
    self.http_server = await self._configure_http_server(modules)
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/romainrochas/opt/anaconda3/envs/tuning/lib/python3.11/site-packages/ray/dashboard/head.py", line 158, in _configure_http_server
    http_server = HttpSer

In [3]:
def load_trainer(config,folder_path,file_name,args):

    for key, value in config.items():
        if hasattr(args, key):
            setattr(args, key, value)

    Datasets,DataLoader_list,dic_class2rpz,nb_words_embedding,time_slots_labels,dic_rpz2class = load_init_trainer(folder_path,file_name,args)
    (loss_function,Model_list,Optimizer_list,args_embedding) = get_MultiModel_loss_args_emb_opts(args,nb_words_embedding,dic_class2rpz,n_vertex = len(Datasets[0].columns))
    dataset,dataloader,model,optimizer = Datasets[0],DataLoader_list[0],Model_list[0],Optimizer_list[0]


    trainer = Trainer(dataset,model,dataloader,
                    args,optimizer,loss_function,scheduler = None,
                    args_embedding=args_embedding,
                    save_dir = None,dic_class2rpz=dic_class2rpz)
    return(trainer)

def Train_with_tune(config):
    trainer = load_trainer(config,folder_path,file_name,args)
    result_df = trainer.train_and_valid()

Datasets,DataLoader_list,dic_class2rpz,nb_words_embedding,time_slots_labels,dic_rpz2class = load_init_trainer(folder_path,file_name,args)

ray.shutdown()
ray.init()

analysis = tune.run(
        Train_with_tune,
        config=config,
        num_samples=3,  # Increase num_samples for more random combinations
    )

coverage period: 2019-01-01 00:00:00 - 2020-01-01 00:00:00
Time-step per hour: 4.0


2024-05-14 19:54:16,696	ERROR services.py:1207 -- Failed to start the dashboard , return code 0
2024-05-14 19:54:16,698	ERROR services.py:1232 -- Error should be written to 'dashboard.log' or 'dashboard.err'. We are printing the last 20 lines for you. See 'https://docs.ray.io/en/master/ray-observability/ray-logging.html#logging-directory-structure' to find where the log file is.
2024-05-14 19:54:16,716	ERROR services.py:1276 -- 
The last 20 lines of /tmp/ray/session_2024-05-14_19-54-14_201290_14440/logs/dashboard.log (it contains the error message from the dashboard): 
  File "/Users/romainrochas/opt/anaconda3/envs/tuning/lib/python3.11/site-packages/ray/dashboard/head.py", line 327, in run
    self.http_server = await self._configure_http_server(modules)
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/romainrochas/opt/anaconda3/envs/tuning/lib/python3.11/site-packages/ray/dashboard/head.py", line 158, in _configure_http_server
    http_server = HttpSer

0,1
Current time:,2024-05-14 19:55:03
Running for:,00:00:45.38
Memory:,8.4/16.0 GiB

Trial name,status,loc,Ks,Kt,TE_transfer,act_fun,dropout,embedding_dim,graph_conv_type,gso_type,lr,momentum,multi_embedding,stblock_num,weight_decay,iter,total time (s)
Train_with_tune_fc27c_00000,TERMINATED,127.0.0.1:14466,2,3,False,glu,0.299226,2,graph_conv,sym_norm_lap,0.00125,0.89419,True,2,0.0285128,5,29.0045
Train_with_tune_fc27c_00001,TERMINATED,127.0.0.1:14467,2,3,True,glu,0.144057,6,cheb_graph_conv,rw_norm_lap,0.0003,0.98927,False,3,0.0567686,5,38.0215
Train_with_tune_fc27c_00002,TERMINATED,127.0.0.1:14468,3,2,True,gtu,0.186454,5,graph_conv,sym_norm_lap,0.00385,0.894476,True,1,0.057037,5,25.0208


[2m[36m(Train_with_tune pid=14468)[0m coverage period: 2019-01-01 00:00:00 - 2020-01-01 00:00:00
[2m[36m(Train_with_tune pid=14468)[0m Time-step per hour: 4.0


[2m[36m(Train_with_tune pid=14467)[0m [0.00561264]
[2m[36m(Train_with_tune pid=14467)[0m not reaching the requested tolerance 5.960464477539062e-07.
[2m[36m(Train_with_tune pid=14467)[0m Use iteration 21 instead with accuracy 
[2m[36m(Train_with_tune pid=14467)[0m 0.005612642301555001.
[2m[36m(Train_with_tune pid=14467)[0m 
[2m[36m(Train_with_tune pid=14467)[0m   _, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter,
[2m[36m(Train_with_tune pid=14467)[0m [0.00561264]
[2m[36m(Train_with_tune pid=14467)[0m not reaching the requested tolerance 5.960464477539062e-07.
[2m[36m(Train_with_tune pid=14467)[0m   _, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter,
[2m[36m(Train_with_tune pid=14467)[0m [0.01323469]
[2m[36m(Train_with_tune pid=14467)[0m not reaching the requested tolerance 5.960464477539062e-07.
[2m[36m(Train_with_tune pid=14467)[0m Use iteration 18 instead with accuracy 
[2m[36m(Train_with_tune pid=14467)[0m 0.005966110405363099.


[2m[36m(Train_with_tune pid=14467)[0m TE impossible
[2m[36m(Train_with_tune pid=14466)[0m coverage period: 2019-01-01 00:00:00 - 2020-01-01 00:00:00[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m
[2m[36m(Train_with_tune pid=14466)[0m Time-step per hour: 4.0[32m [repeated 2x across cluster][0m
[2m[36m(Train_with_tune pid=14467)[0m start training
[2m[36m(Train_with_tune pid=14468)[0m Proportion of label with quantile order set to 1: 24.4%


Trial name,_metric
Train_with_tune_fc27c_00000,"{'Loss_model': 0.011454091917368629, 'MPIW': -6.288499355316162, 'PICP': 0.21180555555555555}"
Train_with_tune_fc27c_00001,"{'Loss_model': 0.010750323134873594, 'MPIW': 4.261815547943115, 'PICP': 0.45882936507936506}"
Train_with_tune_fc27c_00002,"{'Loss_model': 0.012302149124147873, 'MPIW': -0.8615270853042603, 'PICP': 0.30456349206349204}"


[2m[36m(Train_with_tune pid=14468)[0m TE impossible
[2m[36m(Train_with_tune pid=14466)[0m start training[32m [repeated 2x across cluster][0m
[2m[36m(Train_with_tune pid=14468)[0m Proportion of label with quantile order set to 1: 24.4%[32m [repeated 6x across cluster][0m
[2m[36m(Train_with_tune pid=14466)[0m Proportion of label with quantile order set to 1: 27.4%[32m [repeated 5x across cluster][0m
[2m[36m(Train_with_tune pid=14467)[0m Proportion of label with quantile order set to 1: 21.4%[32m [repeated 3x across cluster][0m


2024-05-14 19:55:03,474	INFO tune.py:1148 -- Total run time: 45.41 seconds (45.38 seconds for the tuning loop).


In [4]:
analysis.dataframe().sort_values('_metric/Loss_model')

Unnamed: 0,time_this_iter_s,done,training_iteration,trial_id,date,timestamp,time_total_s,pid,hostname,node_ip,...,config/dropout,config/embedding_dim,config/graph_conv_type,config/gso_type,config/lr,config/momentum,config/multi_embedding,config/stblock_num,config/weight_decay,logdir
1,4.324152,False,5,fc27c_00001,2024-05-14_19-55-03,1715709303,38.021509,14467,mbro-21-005,127.0.0.1,...,0.144057,6,cheb_graph_conv,rw_norm_lap,0.0003,0.98927,False,3,0.056769,/Users/romainrochas/ray_results/Train_with_tun...
0,2.601934,False,5,fc27c_00000,2024-05-14_19-54-54,1715709294,29.00449,14466,mbro-21-005,127.0.0.1,...,0.299226,2,graph_conv,sym_norm_lap,0.00125,0.89419,True,2,0.028513,/Users/romainrochas/ray_results/Train_with_tun...
2,1.954401,False,5,fc27c_00002,2024-05-14_19-54-50,1715709290,25.020832,14468,mbro-21-005,127.0.0.1,...,0.186454,5,graph_conv,sym_norm_lap,0.00385,0.894476,True,1,0.057037,/Users/romainrochas/ray_results/Train_with_tun...


In [5]:
def load_trainer(config,folder_path,file_name,args,nb_words_embedding = None,dic_class2rpz = None,Datasets = None,DataLoader_list = None,time_slots_labels=None,dic_rpz2class = None):
    if False : 
        for key, value in config.items():
            if hasattr(args, key):
                setattr(args, key, value)
    
    # Particulièrement long (13s), donc on évite de le relancer à chaque fois si pas nécessaire
    if sum([key in critical_keys for key in  config.keys()]) > 0 : 
        print("Need to run 'load_init_trainer' at each iteragtion, which may take some time")
        Datasets,DataLoader_list,dic_class2rpz,nb_words_embedding,time_slots_labels,dic_rpz2class = load_init_trainer(folder_path,file_name,args)
        dataset,dataloader,model,optimizer = Datasets[0],DataLoader_list[0],Model_list[0],Optimizer_list[0]

    # Load associated K_folds Models: 
    (loss_function,Model_list,Optimizer_list,args_embedding) = get_MultiModel_loss_args_emb_opts(args,nb_words_embedding,dic_class2rpz,n_vertex = len(Datasets[0].columns))
    dataset,dataloader,model,optimizer = Datasets[0],DataLoader_list[0],Model_list[0],Optimizer_list[0]


    trainer = Trainer(dataset,model,dataloader,
                    args,optimizer,loss_function,scheduler = None,
                    args_embedding=args_embedding,
                    save_dir = None,dic_class2rpz=dic_class2rpz)
    return(trainer)

def Train_with_tune(config):
    result_df = trainer.train_and_valid()

In [6]:
ray_scheduler = ASHAScheduler(
    metric="Loss_model",
    mode="min",
    max_t=args.epochs,  # Maximum of run epochs 
    grace_period=1,     # Minimum of run epochs 
    reduction_factor=2,  # 100*(1/reduction_factor) % of all trials are kept each time they are reduced
)

ray.shutdown()
ray.init()

2024-05-14 15:18:41,102	ERROR services.py:1207 -- Failed to start the dashboard , return code 0
2024-05-14 15:18:41,104	ERROR services.py:1232 -- Error should be written to 'dashboard.log' or 'dashboard.err'. We are printing the last 20 lines for you. See 'https://docs.ray.io/en/master/ray-observability/ray-logging.html#logging-directory-structure' to find where the log file is.
2024-05-14 15:18:41,115	ERROR services.py:1276 -- 
The last 20 lines of /tmp/ray/session_2024-05-14_15-18-38_597411_11134/logs/dashboard.log (it contains the error message from the dashboard): 
  File "/Users/romainrochas/opt/anaconda3/envs/tuning/lib/python3.11/site-packages/ray/dashboard/head.py", line 327, in run
    self.http_server = await self._configure_http_server(modules)
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/romainrochas/opt/anaconda3/envs/tuning/lib/python3.11/site-packages/ray/dashboard/head.py", line 158, in _configure_http_server
    http_server = HttpSer

0,1
Python version:,3.11.9
Ray version:,2.6.3


In [7]:
trainer = load_trainer(config,folder_path,file_name,args,nb_words_embedding,dic_class2rpz,Datasets,DataLoader_list, time_slots_labels, dic_rpz2class)
results_df = trainer.train_and_valid()
display(results_df)

trainer.valid_loss

start training


None

[0.006075231594935296, 0.0054372804274501665]

In [9]:
analysis = tune.run(
        Train_with_tune,
        config=config,
        num_samples=2,  # Increase num_samples for more random combinations
        #scheduler = ray_scheduler
    )

2024-05-14 15:19:25,860	INFO tune.py:657 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
[libprotobuf ERROR external/com_google_protobuf/src/google/protobuf/message_lite.cc:403] ray.rpc.InternalKVPutRequest exceeded maximum protobuf size of 2GB: 9410078683
E0514 15:22:30.083314000 140704557250496 call_op_set.h:322]            assertion failed: serializer_(msg_).ok()
*** SIGABRT received at time=1715692950 ***
PC: @     0x7ff811c137a2  (unknown)  __pthread_kill
[2024-05-14 15:22:30,085 E 11134 195502] logging.cc:361: *** SIGABRT received at time=1715692950 ***
[2024-05-14 15:22:30,105 E 11134 195502] logging.cc:361: PC: @     0x7ff811c137a2  (unknown)  __pthread_kill
Fatal Python error: Aborted

Stack (most recent call first):
  File "/Users/romainrochas/opt/anaconda3/envs/tuning/lib/python3.11/site-packages/ray/experimental/inter

: 

## Hyper Parameter Tuning sur le Fold 0

In [6]:
def Ray_Trainer(config,folder_path,file_name,args):
    print('start ray trainer')

    for key, value in config.items():
        if hasattr(args, key):
            setattr(args, key, value)
    
    # Particulièrement long (13s), donc on évite de le relancer à chaque fois si pas nécessaire
    if sum([key in critical_keys for key in  config.keys()]) > 0 : 
        print("Need to run 'load_init_trainer' at each iteragtion, which may take some time")
        Datasets,DataLoader_list,dic_class2rpz,nb_words_embedding,time_slots_labels,dic_rpz2class = load_init_trainer(folder_path,file_name,args)

    # Load associated K_folds Models: 
    (loss_function,Model_list,Optimizer_list,args_embedding) = get_MultiModel_loss_args_emb_opts(args,nb_words_embedding,dic_class2rpz,n_vertex = len(Datasets[0].columns))

    dataset,dataloader,model,optimizer = Datasets[0],DataLoader_list[0],Model_list[0],Optimizer_list[0]

    trainer = Trainer(dataset,model,dataloader,
                    args,optimizer,loss_function,scheduler = None,
                    args_embedding=args_embedding,
                    save_dir = None,dic_class2rpz=dic_class2rpz)
    
    results_df = trainer.train_and_valid()


ray.shutdown()
ray.init()

if False : 
    hyperopt_search = HyperOptSearch(
        metric="Loss_model", mode="min",
        points_to_evaluate=current_best_params)

ray_scheduler = ASHAScheduler(
    metric="Loss_model",
    mode="min",
    max_t=args.epochs,  # Maximum of run epochs 
    grace_period=1,     # Minimum of run epochs 
    reduction_factor=2,  # 100*(1/reduction_factor) % of all trials are kept each time they are reduced
)

def Train_with_tuner(config):
    Ray_Trainer(config,folder_path,file_name,args)


analysis = tune.run(
        Train_with_tuner,
        config=config,
        num_samples=5,  # Increase num_samples for more random combinations
        scheduler = ray_scheduler
    )


In [4]:
# Load associated K_folds Models: 
Datasets,DataLoader_list,dic_class2rpz,nb_words_embedding,time_slots_labels,dic_rpz2class = load_init_trainer(folder_path,file_name,args)
(loss_function,Model_list,Optimizer_list,args_embedding) = get_MultiModel_loss_args_emb_opts(args,nb_words_embedding,dic_class2rpz,n_vertex = len(Datasets[0].columns))
dataset,dataloader,model,optimizer = Datasets[0],DataLoader_list[0],Model_list[0],Optimizer_list[0]

coverage period: 2019-01-01 00:00:00 - 2020-01-01 00:00:00
Time-step per hour: 4.0


In [5]:
def Ray_Trainer(config):
    trainer = Trainer(dataset,model,dataloader,
                    args,optimizer,loss_function,scheduler = None,
                    args_embedding=args_embedding,
                    save_dir = None,dic_class2rpz=dic_class2rpz)
    results_df = trainer.train_and_valid()



ray.shutdown()
ray.init()
analysis = tune.run(
        lambda config: Ray_Trainer(config,folder_path,file_name,args),
        config=config,
        num_samples=5,  # Increase num_samples for more random combinations
    )

tune.run()


## Choix des hyperparamer en fonction du Tuning. Puis Cross Validation sur les 5 Fold Restant

In [None]:
# Refaire l'initialisation avec les args adapté .... 
#
#
#
args =  ... 
results_df = pd.DataFrame()
save_dir = get_save_directory(args)

multimodeltrainer = MultiModelTrainer(Datasets,Model_list,DataLoader_list,args,Optimizer_list,loss_function,scheduler = None,args_embedding=args_embedding,ray= False,save_dir = save_dir,dic_class2rpz=dic_class2rpz)

(results_by_fold,mean_picp,mean_mpiw,dict_last_from_mean_of_folds,dict_best_from_mean_of_folds) = multimodeltrainer.K_fold_validation(mod_plot = 10)
results_by_fold.to_csv(f"{save_dir}results_by_fold.csv")

# Svae results 
results_df = build_results_df(results_df,args, mean_picp,mean_mpiw,dict_last_from_mean_of_folds,dict_best_from_mean_of_folds)
results_df.to_csv(f"{args.model_name}_{args.loss_function_type}_H{args.H}_D{args.D}_W{args.W}_E{args.epochs}_K_fold{args.K_fold}_Emb_dim{args.embedding_dim}FC1_17_8_FC2_8_4_save_results.csv")

# Get Parameters : 

In [None]:
# Load config
model_name = 'STGCN' #'CNN' 
args = get_args(model_name)
#args = get_args(model_name = model_name,learn_graph_structure = True)  # MTGNN

# Modification : 
args.epochs = 1
args.K_fold = 1

args.loss_function_type = 'MSE'
args.out_dim = 1
args.alpha = None
# Save Directory:
main_dir = get_save_directory(args)

## Define fixed Dataset K_fold split for each trial: 

In [None]:
# Load dataset and invalid_dates
dataset,invalid_dates = get_DataSet_and_invalid_dates(folder_path,file_name,args.W,args.D,args.H,args.step_ahead,single_station = False)

# Train / Valid / Test split and Normalize for K-fold 
(Datasets,DataLoader_list,time_slots_labels_list,dic_class2rpz,dic_rpz2class,nb_words_embedding) =  dataset.split_K_fold(args,invalid_dates)

# Plot information about split and folds:
plot_k_fold_split(Datasets,invalid_dates)

# Test complet sur les 3 Top STGCN, avec sauvegarde du model :

In [None]:
results_df = pd.DataFrame()

#for i,(calendar_class,position,specific_lr, type_calendar) in enumerate(zip([3,1,3],['input','input','input'],[True,True,False],['tuple','tuple','tuple'])):
for i,(calendar_class,position,specific_lr, type_calendar) in enumerate(zip([3],['input'],[True],['tuple'])):

    args.calendar_class = calendar_class
    args.position = position
    args.specific_lr = specific_lr
    args.type_calendar = type_calendar

    save_dir = get_save_directory(args)

    # Load dataset and invalid_dates 
    dataset,invalid_dates = get_DataSet_and_invalid_dates(folder_path,file_name,args.W,args.D,args.H,args.step_ahead,single_station = False)
    (Datasets,DataLoader_list,time_slots_labels,dic_class2rpz,dic_rpz2class,nb_words_embedding) =  dataset.split_K_fold(args,invalid_dates)
    
    # Load associated K_folds Models: 
    (loss_function,Model_list,Optimizer_list,args_embedding) = get_MultiModel_loss_args_emb_opts(args,nb_words_embedding,dic_class2rpz)
    multimodeltrainer = MultiModelTrainer(Datasets,Model_list,DataLoader_list,args,Optimizer_list,loss_function,scheduler = None,args_embedding=args_embedding,ray= False,save_dir = save_dir,dic_class2rpz=dic_class2rpz)
    (results_by_fold,mean_picp,mean_mpiw,dict_last,dict_scores) = multimodeltrainer.K_fold_validation(mod_plot = 1)
    results_by_fold.to_csv(f"{save_dir}results_by_fold.csv")
    
    # Svae results 
    results_df = build_results_df(results_df,args, mean_picp,mean_mpiw,dict_last,dict_scores)
                            
results_df.to_csv('save_results.csv')
results_df

In [None]:
# ==== GET PARAMETERS ====
# Load config
model_name = 'STGCN' #'CNN' 
args = get_args(model_name)
#args = get_args(model_name = model_name,learn_graph_structure = True)  # MTGNN

# Modification : 
args.epochs = 300

# Save Directory:
main_dir = get_save_directory(args)
args.H = 0
args.W = 0
args.D = 0
args.L =args.H+args.W+args.D
args.single_station = True
# ==== TEST  ====
for K_fold in [5]:
    args.K_fold = K_fold
    results_df = pd.DataFrame()

    #for i,(calendar_class,position,specific_lr, type_calendar) in enumerate(zip([3,1,3],['input','input','input'],[True,True,False],['tuple','tuple','tuple'])):
    #for i,(calendar_class,position,specific_lr, type_calendar) in enumerate(zip([3],['input'],[True],['tuple'])):
    for i,(embedding_dim,calendar_class,position,specific_lr, type_calendar,time_embedding) in enumerate(zip([3], # None
                                                                                                             [3], # 3
                                                                                               ['input'], # None
                                                                                               [False], # None 
                                                                                               ['tuple'], # None
                                                                                               [True] # False
                                                                                              )
                                                                                            ):
        args.embedding_dim = embedding_dim
        args.calendar_class = calendar_class
        args.position = position
        args.specific_lr = specific_lr
        args.type_calendar = type_calendar
        args.time_embedding = time_embedding

        save_dir = get_save_directory(args)

        # Load dataset and invalid_dates 
        dataset,invalid_dates = get_DataSet_and_invalid_dates(folder_path,file_name,args.W,args.D,args.H,args.step_ahead,single_station = args.single_station)
        (Datasets,DataLoader_list,time_slots_labels,dic_class2rpz,dic_rpz2class,nb_words_embedding) =  dataset.split_K_fold(args,invalid_dates)

        # Load associated K_folds Models: 
        (loss_function,Model_list,Optimizer_list,args_embedding) = get_MultiModel_loss_args_emb_opts(args,nb_words_embedding,dic_class2rpz)
        multimodeltrainer = MultiModelTrainer(Datasets,Model_list,DataLoader_list,args,Optimizer_list,loss_function,scheduler = None,args_embedding=args_embedding,ray= False,save_dir = save_dir,dic_class2rpz=dic_class2rpz)
        
        (results_by_fold,mean_picp,mean_mpiw,dict_last_from_mean_of_folds,dict_best_from_mean_of_folds) = multimodeltrainer.K_fold_validation(mod_plot = 10)
        results_by_fold.to_csv(f"{save_dir}results_by_fold.csv")

        # Svae results 
        results_df = build_results_df(results_df,args, mean_picp,mean_mpiw,dict_last_from_mean_of_folds,dict_best_from_mean_of_folds)
        results_df.to_csv(f"{args.model_name}_H{args.H}_D{args.D}_W{args.W}_E{args.epochs}_K_fold{args.K_fold}_Emb_dim{args.embedding_dim}FC1_17_8_FC2_8_4_save_results.csv")