In [1]:
import os
import sys

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from models import new_models
from config import load_data

from ray import tune
from ray.air.integrations.mlflow import MLflowLoggerCallback
from ray.tune.schedulers import ASHAScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau

import mlflow
from mlflow.tracking import MlflowClient

# Training setup

In [2]:
def fit(net, loss_function, optimizer, data_loader, num_epochs, mode, lr_scheduler, use_amp=False):
    scaler = torch.cuda.amp.GradScaler(enabled=use_amp) # Mixed-precision support for compatible GPUs
    for epoch in range(num_epochs):
        if epoch < num_epochs - 1:
            keys = ["train", "val"]
        else:
            keys = ["train", "val", "test"]
        for key in keys:
            dataset_size = 0
            dataset_loss = 0.0
            if key == "train":
                net.train()
            else:
                net.eval()
            for X_batch, y_batch in tqdm(data_loader[key]):
                X_batch, y_batch = X_batch.to(mode["device"]), y_batch.to(mode["device"])
                with torch.set_grad_enabled(mode=(key=="train")): # Autograd activated only during training
                    with torch.cuda.amp.autocast(enabled=False): # Mixed-precision support for compatible GPUs
                        batch_output = net(X_batch.float())
                        batch_loss = loss_function(batch_output, y_batch)
                    if key == "train":
                        scaler.scale(batch_loss).backward()
                        scaler.step(optimizer) 	
                        scaler.update()
                        optimizer.zero_grad()
                dataset_size += y_batch.shape[0]
                dataset_loss += y_batch.shape[0] * batch_loss.item()

            dataset_loss /= dataset_size

            # Report results to Ray Tune
            if key == "train":
                tune.report(train_loss=dataset_loss)
            elif key == "val":
                # Update learning rate
                lr_scheduler.step(metrics=dataset_loss)
                tune.report(val_loss=dataset_loss)
            else:
                tune.report(test_loss=dataset_loss)
    return net

In [3]:
from config import load_data

def train_model(config, data_dir):

    use_GPU = torch.cuda.is_available()
    if use_GPU:
        mode = {"name": "cuda", "device": torch.device("cuda")}
    else:
        mode = {"name": "cpu", "device": torch.device("cpu")}

    # Define hyperparameters
    train_size = 0.7
    val_size = 0.2
    test_size = 0.1

    sequence_length = config['sequence_length']
    batch_size = config['batch_size']
    num_epochs = config['num_epochs']
    lr = config['lr']
    weight_decay = config['weigth_decay']
    vars = config['variables']

    ld = load_data(data_dir = data_dir, target_variable = config['target_variable'])
    
    X, y = ld.create_lagged_matrix(window_size=sequence_length, vars_to_lag=vars)

    X_train, y_train, X_val, y_val, X_test, y_test = ld.split_data(X, y, train_size=train_size, val_size=val_size, test_size=test_size)

    train_dataloader = ld.create_dataloader(X_train, y_train, sequence_length, batch_size=batch_size, shuffle=True)
    val_dataloader = ld.create_dataloader(X_val, y_val, sequence_length, batch_size=batch_size, shuffle=True)
    test_dataloader = ld.create_dataloader(X_test, y_test, sequence_length, batch_size=batch_size, shuffle=False)
    
    # Model inputs
    if vars:
        input_size = len(vars) + 1
    else:
        input_size = 1
    hidden_size = config['hidden_size']
    num_layers = config['num_layers']
    output_size = 1

    if config['arch'] == "FCN":
        net = new_models.FCN(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] ==  "FCNTemporalAttention":
        net = new_models.FCNTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTM":
        net = new_models.LSTM(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTMTemporalAttention":
        net = new_models.LSTMTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTMSpatialTemporalAttention":
        net = new_models.LSTMSpatialTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )

    data_loader = {
    "train": train_dataloader,
    "val": val_dataloader,
    "test": test_dataloader,
    }
    
    net.to(mode["device"])

    loss_function = nn.MSELoss().to(mode["device"])
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)

    # Define your learning rate scheduler
    lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
                                           
    best_net = fit(net, loss_function, optimizer, data_loader, num_epochs, mode, lr_scheduler, use_amp=True)
    out_name = ""
    for k, v in config.items():
        if not k in ['weights_dir', 'cwd', 'variables']:
            out_name += '{}-{}_'.format(k, v)
    torch.save(best_net.state_dict(), os.path.join(config['cwd'], config['weights_dir'], out_name[:-1] + '.pth'))

# MLFlow setup

In [4]:
client = MlflowClient()
cwd = os.getcwd()
exp_base_name = "Test_of_interface"

created = 0
for i in range(100):
    try:
        exp_name = exp_base_name+"_{}".format(i)
        experiment_id = client.create_experiment(exp_name)
        created=1
        break
    except (TypeError, mlflow.exceptions.MlflowException):
        continue

if not created:
    print("ERROR: Try new experiment name.")
    sys.exit(1)

weights_root = "./model_weights/"
weights_dir = weights_root+exp_name+'/'
os.mkdir(weights_dir)

In [5]:
data_dir = "./data/"
target_variable = 'Q_Kalltveit'

# Start experiments

In [6]:
Discharge = ["Q_Kalltveit_uten_tapping", "Q_Lyngsaana"]
HBV = ["Q_HBV_mean", "Q_HBV", "Evap_HBV", "SNOW_MELT_HBV", "PRECIP_HBV", "GR_WAT_HBV", "TEMP_HBV", "SOIL_WAT_HBV"]
Loggers = ['Vannstand Lyngsåna','Vanntemp. Hiafossen', 'Vannstand Hiafossen', 'Vannstand Kalltveit', 'Vanntemp. Kalltveit kum', 'Vanntemp. Hiavatn', 'Vannstand Hiavatn', 'Vanntemp. Musdalsvatn', 'Vannstand Musdalsvatn', 'Vanntemp. Musdalsvatn nedstrøms', 'Vannstand Musdalsvatn nedstrøms', 'Vanntemp. Viglesdalsvatn', 'Vannstand Viglesdalsvatn', 'Vanntemp. Lyngsåna', 'Vanntemp. Kalltveit elv']
Loggers_1 = ['Vanntemp. Lyngsåna', 'Vannstand Lyngsåna']
Loggers_2 = ['Vanntemp. Kalltveit kum', 'Vannstand Kalltveit']
Loggers_3 = ['Vanntemp. Hiavatn', 'Vannstand Hiavatn']
Loggers_4 = ['Vanntemp. Musdalsvatn', 'Vannstand Musdalsvatn']
Loggers_5 = ['Vanntemp. Musdalsvatn nedstrøms', 'Vannstand Musdalsvatn nedstrøms']
Loggers_6 = ['Vanntemp. Viglesdalsvatn', 'Vannstand Viglesdalsvatn']
Loggers_7 = ['Vanntemp. Kalltveit elv']
Loggers_8 = ['Vannstand Hiafossen']
Meto = ['Nedbør Nilsebu', 'Nedbør Fister', 'Lufttemp Fister', 'Lufttemp. Nilsebu', 'RelHum Nilsebu', 'Vindretning Nilsebu']
Meto_1 = ['Nedbør Nilsebu', 'Lufttemp. Nilsebu', 'RelHum Nilsebu', 'Vindretning Nilsebu']
Meto_2 = ['Nedbør Fister', 'Lufttemp Fister']

Precipitation_1 = ['Nedbør Nilsebu']
Precipitation_2 = ['Nedbør Fister']


In [7]:
from functools import partial

config = {
    "mlflow_experiment_id": experiment_id,
    "weights_dir": weights_dir,
    "cwd": cwd,
    "target_variable": target_variable,
    "arch": tune.grid_search(["LSTMSpatialTemporalAttention"]), # "FCN", "FCNTemporalAttention", "LSTMTemporalAttention", "LSTM", "LSTMSpatialAttention"
    "sequence_length": tune.grid_search([25]),
    'num_epochs': tune.grid_search([150]),
    'num_layers': tune.choice([2, 3, 4]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "weigth_decay": tune.choice([0, 0.001, 0.0001]),
    "batch_size": tune.choice([256, 256*2]),
    "hidden_size": tune.grid_search([64]),
    "variables": tune.grid_search([
        Discharge + HBV + Precipitation_1,
        Discharge + HBV + Precipitation_2,
        Discharge + HBV + Precipitation_1 + Precipitation_2,
    ])
}

analysis = tune.run(
    partial(train_model, data_dir=data_dir),
    config=config,
    resources_per_trial={"cpu": 12, "gpu": 1},
    num_samples=2,
    callbacks=[MLflowLoggerCallback(experiment_name=exp_name)],
)

2023-03-26 14:47:54,863	INFO worker.py:1538 -- Started a local Ray instance.


0,1
Current time:,2023-03-26 14:47:59
Running for:,00:00:03.49
Memory:,11.5/31.9 GiB

Trial name,status,loc,arch,batch_size,hidden_size,lr,num_epochs,num_layers,sequence_length,variables,weigth_decay
train_model_6e511_00000,RUNNING,127.0.0.1:3220,LSTMSpatialTemp_c3a0,256,64,0.000172304,150,3,25,['Q_Kalltveit_u_bd88,0.0001
train_model_6e511_00001,PENDING,,LSTMSpatialTemp_c3a0,256,64,0.000143719,150,4,25,['Q_Kalltveit_u_bf88,0.0
train_model_6e511_00002,PENDING,,LSTMSpatialTemp_c3a0,256,64,0.0201881,150,4,25,['Q_Kalltveit_u_5b08,0.0
train_model_6e511_00003,PENDING,,LSTMSpatialTemp_c3a0,256,64,0.0355483,150,3,25,['Q_Kalltveit_u_b9c8,0.0001


  0%|          | 0/76 [00:00<?, ?it/s]
  1%|▏         | 1/76 [00:00<00:36,  2.08it/s]
 18%|█▊        | 14/76 [00:00<00:02, 30.90it/s]
 36%|███▌      | 27/76 [00:00<00:00, 54.55it/s]
 53%|█████▎    | 40/76 [00:00<00:00, 72.44it/s]
 68%|██████▊   | 52/76 [00:00<00:00, 84.92it/s]
 84%|████████▍ | 64/76 [00:01<00:00, 94.39it/s]


Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,node_ip,pid,test_loss,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
train_model_6e511_00000,2023-03-26_14-50-10,False,,88e6c7a87eec4731947d6a65f5bbf8de,"0_arch=LSTMSpatialTemporalAttention,batch_size=256,hidden_size=64,lr=0.0002,num_epochs=150,num_layers=3,sequence_length=25,variables=Q_Kalltveit_uten_tapping_Q_Lyngsaana_Q_HBV_mean_Q_HBV_Evap_HBV_SNOW_MELT_HBV_PRECIP_HBV_GR_WAT_HBV_TEMP_HBV_SOIL_WAT_HBV_Nedb_r_Nilsebu_Lufttemp_Nilsebu_RelHum_Nilsebu_Vindretning_Nilsebu,weigth_decay=0.0001",DESKTOP-D4IVECG,301,127.0.0.1,3220,9.77203,130.999,0.104644,130.999,1679835010,0,,301,6e511_00000,0.00300431


100%|██████████| 76/76 [00:01<00:00, 68.11it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 345.80it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 111.25it/s]
 32%|███▏      | 24/76 [00:00<00:00, 112.26it/s]
 49%|████▊     | 37/76 [00:00<00:00, 118.25it/s]
 64%|██████▍   | 49/76 [00:00<00:00, 110.36it/s]
 80%|████████  | 61/76 [00:00<00:00, 111.84it/s]
100%|██████████| 76/76 [00:00<00:00, 114.44it/s]
100%|██████████| 22/22 [00:00<00:00, 396.81it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 13%|█▎        | 10/76 [00:00<00:00, 97.56it/s]
 30%|███       | 23/76 [00:00<00:00, 109.40it/s]
 46%|████▌     | 35/76 [00:00<00:00, 110.84it/s]
 62%|██████▏   | 47/76 [00:00<00:00, 111.98it/s]
 78%|███████▊  | 59/76 [00:00<00:00, 114.67it/s]
 95%|█████████▍| 72/76 [00:00<00:00, 116.78it/s]
100%|██████████| 76/76 [00:00<00:00, 113.77it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 392.35it/s]
  0%|        

[2m[36m(func pid=17528)[0m Epoch 00010: reducing learning rate of group 0 to 1.0094e-02.


 16%|█▌        | 12/76 [00:00<00:00, 117.88it/s]
 32%|███▏      | 24/76 [00:00<00:00, 113.17it/s]
 47%|████▋     | 36/76 [00:00<00:00, 111.68it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 108.30it/s]
100%|██████████| 22/22 [00:00<00:00, 357.42it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 33%|███▎      | 25/76 [00:00<00:00, 119.03it/s]
 49%|████▊     | 37/76 [00:00<00:00, 118.52it/s]
 64%|██████▍   | 49/76 [00:00<00:00, 112.14it/s]
 80%|████████  | 61/76 [00:00<00:00, 113.56it/s]
100%|██████████| 76/76 [00:00<00:00, 115.70it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 372.19it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 112.28it/s]
 32%|███▏      | 24/76 [00:00<00:00, 113.30it/s]
 47%|████▋     | 36/76 [00:00<00:00, 116.26it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 115.33it/s]
 80%|████████  | 61/76 [00:00<00:00, 117.97it/s]
 97%|█████████▋| 74/76 [00:00<00:00, 119.19it/s]
100%|██████████| 22/22 [00:00<00:00, 358.26it/s]
 

[2m[36m(func pid=17528)[0m Epoch 00016: reducing learning rate of group 0 to 5.0470e-03.


 32%|███▏      | 24/76 [00:00<00:00, 111.87it/s]
 47%|████▋     | 36/76 [00:00<00:00, 113.61it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 115.40it/s]
 79%|███████▉  | 60/76 [00:00<00:00, 115.45it/s]
100%|██████████| 76/76 [00:00<00:00, 115.70it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 365.46it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 117.38it/s]
 32%|███▏      | 24/76 [00:00<00:00, 116.29it/s]
 49%|████▊     | 37/76 [00:00<00:00, 118.66it/s]
 66%|██████▌   | 50/76 [00:00<00:00, 121.16it/s]
 83%|████████▎ | 63/76 [00:00<00:00, 117.73it/s]
100%|██████████| 76/76 [00:00<00:00, 118.59it/s]
100%|██████████| 22/22 [00:00<00:00, 351.51it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 33%|███▎      | 25/76 [00:00<00:00, 122.57it/s]
 50%|█████     | 38/76 [00:00<00:00, 122.97it/s]
 67%|██████▋   | 51/76 [00:00<00:00, 120.93it/s]
 84%|████████▍ | 64/76 [00:00<00:00, 122.34it/s]
100%|██████████| 76/76 [00:00<00:00, 116.71it/s]
1

[2m[36m(func pid=17528)[0m Epoch 00067: reducing learning rate of group 0 to 2.5235e-03.


 16%|█▌        | 12/76 [00:00<00:00, 113.42it/s]
 32%|███▏      | 24/76 [00:00<00:00, 116.35it/s]
 47%|████▋     | 36/76 [00:00<00:00, 109.82it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 111.33it/s]
 79%|███████▉  | 60/76 [00:00<00:00, 112.57it/s]
100%|██████████| 76/76 [00:00<00:00, 113.25it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 332.92it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 114.02it/s]
 32%|███▏      | 24/76 [00:00<00:00, 106.05it/s]
 47%|████▋     | 36/76 [00:00<00:00, 108.80it/s]
 62%|██████▏   | 47/76 [00:00<00:00, 106.69it/s]
 78%|███████▊  | 59/76 [00:00<00:00, 110.05it/s]
 93%|█████████▎| 71/76 [00:00<00:00, 112.19it/s]
100%|██████████| 76/76 [00:00<00:00, 108.40it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 351.79it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 14%|█▍        | 11/76 [00:00<00:00, 103.70it/s]
 29%|██▉       | 22/76 [00:00<00:00, 103.76it/s]
 43%|████▎ 

[2m[36m(func pid=17528)[0m Epoch 00080: reducing learning rate of group 0 to 1.2618e-03.


 32%|███▏      | 24/76 [00:00<00:00, 118.67it/s]
 47%|████▋     | 36/76 [00:00<00:00, 117.15it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 116.51it/s]
 79%|███████▉  | 60/76 [00:00<00:00, 112.74it/s]
100%|██████████| 76/76 [00:00<00:00, 113.64it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 348.76it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 111.46it/s]
 32%|███▏      | 24/76 [00:00<00:00, 112.58it/s]
 47%|████▋     | 36/76 [00:00<00:00, 110.54it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 107.94it/s]
 79%|███████▉  | 60/76 [00:00<00:00, 108.71it/s]
100%|██████████| 76/76 [00:00<00:00, 108.19it/s]
100%|██████████| 22/22 [00:00<00:00, 343.09it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 14%|█▍        | 11/76 [00:00<00:00, 105.82it/s]
 29%|██▉       | 22/76 [00:00<00:00, 104.95it/s]
 45%|████▍     | 34/76 [00:00<00:00, 108.44it/s]
 61%|██████    | 46/76 [00:00<00:00, 109.87it/s]
 75%|███████▌  | 57/76 [00:00<00:00, 86.79it/s] 
1

[2m[36m(func pid=17528)[0m Epoch 00096: reducing learning rate of group 0 to 6.3088e-04.


 30%|███       | 23/76 [00:00<00:00, 109.72it/s]
 46%|████▌     | 35/76 [00:00<00:00, 112.85it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 115.76it/s]
 80%|████████  | 61/76 [00:00<00:00, 117.59it/s]
100%|██████████| 76/76 [00:00<00:00, 116.48it/s]
100%|██████████| 22/22 [00:00<00:00, 383.48it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 118.84it/s]
 47%|████▋     | 36/76 [00:00<00:00, 115.02it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 116.33it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 362.53it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 117.75it/s]
 32%|███▏      | 24/76 [00:00<00:00, 110.64it/s]
 47%|████▋     | 36/76 [00:00<00:00, 113.28it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 114.67it/s]
 80%|████████  | 61/76 [00:00<00:00, 118.04it/s]
100%|██████████| 76/76 [00:00<00:00, 116.80it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 341.68it/s]
  0%|      

[2m[36m(func pid=17528)[0m Epoch 00105: reducing learning rate of group 0 to 3.1544e-04.


  0%|          | 0/76 [00:00<?, ?it/s]
 30%|███       | 23/76 [00:00<00:00, 113.90it/s]
 46%|████▌     | 35/76 [00:00<00:00, 109.01it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 113.69it/s]
 79%|███████▉  | 60/76 [00:00<00:00, 114.43it/s]
 95%|█████████▍| 72/76 [00:00<00:00, 109.39it/s]
100%|██████████| 76/76 [00:00<00:00, 111.28it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 321.21it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 14%|█▍        | 11/76 [00:00<00:00, 104.16it/s]
 29%|██▉       | 22/76 [00:00<00:00, 104.92it/s]
 45%|████▍     | 34/76 [00:00<00:00, 111.11it/s]
 61%|██████    | 46/76 [00:00<00:00, 113.59it/s]
100%|██████████| 22/22 [00:00<00:00, 330.46it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 114.19it/s]
 32%|███▏      | 24/76 [00:00<00:00, 108.63it/s]
 47%|████▋     | 36/76 [00:00<00:00, 111.50it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 113.01it/s]
 79%|███████▉  | 60/76 [00:00<00:00, 111.93it/s]
100%|██████

[2m[36m(func pid=17528)[0m Epoch 00117: reducing learning rate of group 0 to 1.5772e-04.


 30%|███       | 23/76 [00:00<00:00, 109.78it/s]
 45%|████▍     | 34/76 [00:00<00:00, 102.47it/s]
 59%|█████▉    | 45/76 [00:00<00:00, 102.46it/s]
 76%|███████▋  | 58/76 [00:00<00:00, 109.85it/s]
 92%|█████████▏| 70/76 [00:00<00:00, 109.47it/s]
100%|██████████| 76/76 [00:00<00:00, 107.50it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 328.10it/s]
 13%|█▎        | 10/76 [00:00<00:00, 98.60it/s]
 28%|██▊       | 21/76 [00:00<00:00, 104.68it/s]
 42%|████▏     | 32/76 [00:00<00:00, 106.71it/s]
 58%|█████▊    | 44/76 [00:00<00:00, 107.83it/s]
 74%|███████▎  | 56/76 [00:00<00:00, 110.08it/s]
100%|██████████| 22/22 [00:00<00:00, 354.59it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 14%|█▍        | 11/76 [00:00<00:00, 103.06it/s]
 30%|███       | 23/76 [00:00<00:00, 111.39it/s]
 46%|████▌     | 35/76 [00:00<00:00, 111.82it/s]
 62%|██████▏   | 47/76 [00:00<00:00, 107.46it/s]
 76%|███████▋  | 58/76 [00:00<00:00, 104.65it/s]
 91%|█████████ | 69/76 [00:00<00:00, 104.

[2m[36m(func pid=17528)[0m Epoch 00127: reducing learning rate of group 0 to 7.8860e-05.


 17%|█▋        | 13/76 [00:00<00:00, 117.70it/s]
 33%|███▎      | 25/76 [00:00<00:00, 118.73it/s]
 49%|████▊     | 37/76 [00:00<00:00, 90.00it/s] 
 64%|██████▍   | 49/76 [00:00<00:00, 97.22it/s]
 79%|███████▉  | 60/76 [00:00<00:00, 99.79it/s]
100%|██████████| 22/22 [00:00<00:00, 365.20it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 115.54it/s]
 32%|███▏      | 24/76 [00:00<00:00, 116.31it/s]
 47%|████▋     | 36/76 [00:00<00:00, 115.68it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 117.18it/s]
 79%|███████▉  | 60/76 [00:00<00:00, 116.31it/s]
 95%|█████████▍| 72/76 [00:00<00:00, 117.09it/s]
100%|██████████| 76/76 [00:00<00:00, 116.05it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 369.22it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 109.08it/s]
 32%|███▏      | 24/76 [00:00<00:00, 113.56it/s]
 47%|████▋     | 36/76 [00:00<00:00, 115.70it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 111.40it/s]
 79

[2m[36m(func pid=17528)[0m Epoch 00139: reducing learning rate of group 0 to 3.9430e-05.


 14%|█▍        | 11/76 [00:00<00:00, 109.02it/s]
 29%|██▉       | 22/76 [00:00<00:00, 109.30it/s]
 45%|████▍     | 34/76 [00:00<00:00, 114.16it/s]
 61%|██████    | 46/76 [00:00<00:00, 116.09it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 274.22it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 14%|█▍        | 11/76 [00:00<00:00, 105.98it/s]
 29%|██▉       | 22/76 [00:00<00:00, 106.32it/s]
 43%|████▎     | 33/76 [00:00<00:00, 106.07it/s]
 59%|█████▉    | 45/76 [00:00<00:00, 108.33it/s]
 75%|███████▌  | 57/76 [00:00<00:00, 111.33it/s]
100%|██████████| 76/76 [00:00<00:00, 110.23it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 333.79it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 115.63it/s]
 32%|███▏      | 24/76 [00:00<00:00, 114.64it/s]
 47%|████▋     | 36/76 [00:00<00:00, 115.21it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 112.06it/s]
 79%|███████▉  | 60/76 [00:00<00:00, 113.03it/s]
100%|██████

[2m[36m(func pid=17528)[0m Epoch 00145: reducing learning rate of group 0 to 1.9715e-05.


 32%|███▏      | 24/76 [00:00<00:00, 113.54it/s]
 47%|████▋     | 36/76 [00:00<00:00, 104.94it/s]
 63%|██████▎   | 48/76 [00:00<00:00, 108.25it/s]
 78%|███████▊  | 59/76 [00:00<00:00, 108.24it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 353.92it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 16%|█▌        | 12/76 [00:00<00:00, 117.58it/s]
 32%|███▏      | 24/76 [00:00<00:00, 117.31it/s]
 47%|████▋     | 36/76 [00:00<00:00, 116.37it/s]
 79%|███████▉  | 60/76 [00:00<00:00, 116.48it/s]
 95%|█████████▍| 72/76 [00:00<00:00, 114.94it/s]
100%|██████████| 76/76 [00:00<00:00, 115.60it/s]
  0%|          | 0/22 [00:00<?, ?it/s]
100%|██████████| 22/22 [00:00<00:00, 316.73it/s]
  0%|          | 0/76 [00:00<?, ?it/s]
 14%|█▍        | 11/76 [00:00<00:00, 106.60it/s]
 29%|██▉       | 22/76 [00:00<00:00, 103.98it/s]
 45%|████▍     | 34/76 [00:00<00:00, 107.24it/s]
 59%|█████▉    | 45/76 [00:00<00:00, 104.33it/s]
 74%|███████▎  | 56/76 [00:00<00:00, 106.11it/s]
100%|██████

TuneError: ('Trials did not complete', [train_model_6e511_00000, train_model_6e511_00001, train_model_6e511_00003])