In [1]:
#import: standard libraries, plus the classes
from modules.utils_model import SingleTaskModelTrainer, MultiTaskModelTrainer
from ray import tune
import torch
import pandas as pd
from modules.utils_data import get_graphs #will be changed later by Riccardo
from sklearn.model_selection import train_test_split

In [2]:
#just an example, we can think about different params/ranges
hp_search_config = {
        "lr": tune.loguniform(1e-5, 1e-2),
        "batch_size": tune.choice([8, 16]),
        "hidden_channels": tune.choice([32, 64, 128]),
        "num_layers": tune.choice([2, 3, 4]),
        "num_timesteps": tune.choice([1, 2, 3]),
        "gamma": tune.loguniform(0.9, 0.99),
        "Scheduler": tune.choice(["ReduceLROnPlateau", "ExponentialLR"]),
    }

In [3]:
#start with getting the data
train = pd.read_csv("train.csv")
train_graphs_DASH_charge_scaled = get_graphs(train,dash_charges=True,scaled =True,save_graphs = True)

Loading previously created graphs


In [4]:
#we need to split double: we use the first validation set to tune our hyperparameters, and then a second one to be used for early stopping of the final model. We could have smaller sets I think
train_data, val_data = train_test_split(train_graphs_DASH_charge_scaled, test_size=0.2, random_state=2000)
val1_data, val2_data = train_test_split(val_data, test_size=0.5, random_state=2000)

In [5]:
#because for some reason I run into memory issues, did not have this before, to fix. now it will give crap results for the full thing because much less data
from random import sample 
train_data_hp_opt = sample(train_data, 1000)
val_data_hp_opt = sample(val1_data, 100)

In [6]:
example_mtl_model = MultiTaskModelTrainer(sandbox=True,verbose=True,name='example_MTL',seed = 18012000,train_data = train_data_hp_opt,val_data=val_data_hp_opt, n_input_feautures=23)

In [7]:
example_mtl_model.tune_hyperparameters(config=hp_search_config,num_samples=1,max_num_epochs=1,gpus_per_trial=1,cpus_per_trial=16)

2024-03-26 19:04:35,527	INFO worker.py:1715 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
2024-03-26 19:04:37,783	INFO tune.py:583 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-03-26 19:04:45
Running for:,00:00:08.02
Memory:,10.1/62.5 GiB

Trial name,status,loc,Scheduler,batch_size,gamma,hidden_channels,lr,num_layers,num_timesteps,iter,total time (s),kendall_tau
train_model_with_ray_4f4d9_00000,TERMINATED,129.132.218.175:13978,ExponentialLR,8,0.958413,32,0.000606811,2,3,1,5.05224,0.311785


Trial name,kendall_tau,should_checkpoint
train_model_with_ray_4f4d9_00000,0.311785,True


[36m(func pid=13978)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/eruijsena/ray_results/train_model_with_ray_2024-03-26_19-04-37/train_model_with_ray_4f4d9_00000_0_Scheduler=ExponentialLR,batch_size=8,gamma=0.9584,hidden_channels=32,lr=0.0006,num_layers=2,num_2024-03-26_19-04-37/checkpoint_000000)
2024-03-26 19:04:45,834	INFO tune.py:1042 -- Total run time: 8.05 seconds (8.01 seconds for the tuning loop).


Best trial config: {'lr': 0.000606810841366676, 'batch_size': 8, 'hidden_channels': 32, 'num_layers': 2, 'num_timesteps': 3, 'gamma': 0.9584126887372598, 'Scheduler': 'ExponentialLR'}
Best trial final kendall_tau: 0.31178470060446595


In [9]:
#change the train and val data back first
example_mtl_model.train_data = train_data
example_mtl_model.val_data = val1_data

example_mtl_model.train_and_validate(num_epochs=50, save_models=True, es_patience=10, save_losses=True)

Epoch 1: Train Loss: 0.3887, Val Loss: 0.3948
Epoch 2: Train Loss: 0.3836, Val Loss: 0.3940
Epoch 3: Train Loss: 0.3798, Val Loss: 0.3908
Epoch 4: Train Loss: 0.3773, Val Loss: 0.3843
Epoch 5: Train Loss: 0.3746, Val Loss: 0.3797


([0.3886611156588732,
  0.3835504604358824,
  0.37977016480156744,
  0.37730875589800544,
  0.37464684307827734],
 [0.3948308053316108,
  0.3940248149450422,
  0.3908491695840577,
  0.384289342003958,
  0.3797187944511138])

In [10]:
torch.save(example_mtl_model.model,'model_full_test.pt')