In [24]:
import os
import sys

import torch
import torch.nn as nn
import torch.optim as optim

import numpy as np
import pandas as pd

from ray import tune
from ray.air import session
from ray.tune.schedulers import ASHAScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau

from pathlib import Path

module_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.models import *
from src.data import Data
from src.train import fit

In [25]:
target_variable = 'Flow_Kalltveit'
file_name = "cleaned_data_1.csv"
data_dir = "../data"
datetime_variable = "Datetime"

In [26]:
def train_model(config):
    use_GPU = torch.cuda.is_available()
    if use_GPU:
        mode = {"name": "cuda", "device": torch.device("cuda")}
    else:
        mode = {"name": "cpu", "device": torch.device("cpu")}

    # Define hyperparameters
    train_size = 0.7
    val_size = 0.2
    test_size = 0.1

    sequence_length = config['sequence_length']
    batch_size = config['batch_size']
    num_epochs = config['num_epochs']
    lr = config['lr']
    weight_decay = config['weigth_decay']

    # Set data file
    data_file = config['data_file']
    datetime_variable = config['datetime']

    data = Data(data_file, datetime_variable)

    # Select variables to use
    vars = config['variables']
    target_variable = config['target_variable']
    X, y = data.data_transformation(sequence_length=sequence_length, target_variable=target_variable, columns_to_transformation=vars)

    # Split the data
    X_train, y_train, X_val, y_val, X_test, y_test = data.split_data(X, y, train_size=train_size, val_size=val_size, test_size=test_size)
    train_dataloader = data.create_dataloader(X_train, y_train, sequence_length, batch_size=batch_size, shuffle=True)
    val_dataloader = data.create_dataloader(X_val, y_val, sequence_length, batch_size=batch_size, shuffle=False)
    test_dataloader = data.create_dataloader(X_test, y_test, sequence_length, batch_size=batch_size, shuffle=False)

    # Model inputs
    if vars:
        input_size = len(vars) + 1
    else:
        input_size = 1
    hidden_size = config['hidden_size']
    num_layers = config['num_layers']
    output_size = 1

    if config['arch'] == "FCN":
        net = FCN(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] ==  "FCNTemporalAttention":
        net = FCNTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTM":
        net = LSTM(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTMTemporalAttention":
        net = LSTMTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTMSpatialAttention":
        net = LSTMSpatialAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTMSpatialTemporalAttention":
        net = LSTMSpatialTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )

    data_loader = {
    "train": train_dataloader,
    "val": val_dataloader,
    "test": test_dataloader,
    }
    
    net.to(mode["device"])

    loss_function = nn.MSELoss().to(mode["device"])
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)


    # Define your learning rate scheduler
    lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
                                           
    best_net, losses = fit(net, loss_function, optimizer, data_loader, num_epochs, mode, lr_scheduler, use_amp=True)
    out_name = ""
    for k, v in config.items():
        if not k in ['variables']:
            out_name += '{}-{}_'.format(k, v)
    torch.save(best_net, os.path.join(os.path.abspath(os.path.join("../data")), out_name[:-1] + '.pth'))

    return losses



In [27]:
from functools import partial

config = {
    "data_file": file_name,
    "datetime":  datetime_variable,
    "target_variable": target_variable,
    "arch": tune.grid_search(["LSTM"]), # "FCN", "FCNTemporalAttention", "LSTMTemporalAttention", "LSTM", "LSTMSpatialAttention", "LSTMSpatialTemporalAttention"
    "sequence_length": tune.choice([25]),
    'num_epochs': tune.choice([150, 200]),
    'num_layers': tune.choice([2, 3, 4]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "weigth_decay": tune.choice([0, 0.001, 0.0001]),
    "batch_size": tune.choice([256, 512]),
    "hidden_size": tune.choice([32, 64]),
    "variables": tune.grid_search([
        None,
    ])
}

analysis = tune.run(
    partial(train_model),
    config=config,
    resources_per_trial={"cpu": 12, "gpu": 1},
    num_samples=1,
)

0,1
Current time:,2023-04-02 14:35:02
Running for:,00:00:03.30
Memory:,14.3/31.9 GiB

Trial name,# failures,error file
train_model_c7c7b_00000,1,"C:\Users\magnu\ray_results\train_model_2023-04-02_14-34-58\train_model_c7c7b_00000_0_arch=LSTM,batch_size=512,hidden_size=64,lr=0.0044,num_epochs=200,num_layers=2,sequence_length=25,variabl_2023-04-02_14-34-58\error.txt"

Trial name,status,loc,arch,batch_size,hidden_size,lr,num_epochs,num_layers,sequence_length,variables,weigth_decay
train_model_c7c7b_00000,ERROR,,LSTM,512,64,0.00435276,200,2,25,,0


  File "python\ray\_raylet.pyx", line 850, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 902, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
  File "c:\Code\Master\hydro-ml\.venv\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Code\Master\hydro-ml\.venv\lib\site-packages\ray\_private\function_manager.py", line 586, in temporary_actor_method
    raise RuntimeError(
RuntimeError: The actor with name ImplicitFunc failed to import on the worker. This may be because needed library dependencies are not installed in the worker environment:

Traceback (most recent call last):
  File "c:\Code\Master\hydro-ml\.venv\lib\site-packages\ray\_private\function_manager.py", lin

Trial name,trial_id
train_model_c7c7b_00000,c7c7b_00000


TuneError: ('Trials did not complete', [train_model_c7c7b_00000])

[2m[36m(TemporaryActor pid=6996)[0m 2023-04-02 14:35:02,036	ERROR worker.py:772 -- Worker exits with an exit code None.
[2m[36m(TemporaryActor pid=6996)[0m  Traceback (most recent call last):
[2m[36m(TemporaryActor pid=6996)[0m   File "python\ray\_raylet.pyx", line 850, in ray._raylet.execute_task
[2m[36m(TemporaryActor pid=6996)[0m   File "python\ray\_raylet.pyx", line 902, in ray._raylet.execute_task
[2m[36m(TemporaryActor pid=6996)[0m   File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
[2m[36m(TemporaryActor pid=6996)[0m   File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
[2m[36m(TemporaryActor pid=6996)[0m   File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
[2m[36m(TemporaryActor pid=6996)[0m   File "c:\Code\Master\hydro-ml\.venv\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
[2m[36m(TemporaryActor pid=6996)[0m     return method(__ray_actor,