In [1]:
import os
import sys

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from models import new_models
from config import load_data

from ray import tune
from ray.air import session
from ray.air.integrations.mlflow import MLflowLoggerCallback
from ray.tune.schedulers import ASHAScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau

import mlflow
from mlflow.tracking import MlflowClient

# Training setup

In [2]:
def fit(net, loss_function, optimizer, data_loader, num_epochs, mode, lr_scheduler, use_amp=False):
    scaler = torch.cuda.amp.GradScaler(enabled=use_amp) # Mixed-precision support for compatible GPUs
    for epoch in range(num_epochs):
        if epoch < num_epochs - 1:
            keys = ["train", "val"]
        else:
            keys = ["train", "val", "test"]
        for key in keys:
            dataset_size = 0
            dataset_loss = 0.0
            if key == "train":
                net.train()
            else:
                net.eval()
            for X_batch, y_batch in tqdm(data_loader[key]):
                X_batch, y_batch = X_batch.to(mode["device"]), y_batch.to(mode["device"])
                with torch.set_grad_enabled(mode=(key=="train")): # Autograd activated only during training
                    with torch.cuda.amp.autocast(enabled=False): # Mixed-precision support for compatible GPUs
                        batch_output = net(X_batch.float())
                        batch_loss = loss_function(batch_output, y_batch)
                    if key == "train":
                        scaler.scale(batch_loss).backward()
                        scaler.step(optimizer) 	
                        scaler.update()
                        optimizer.zero_grad()
                dataset_size += y_batch.shape[0]
                dataset_loss += y_batch.shape[0] * batch_loss.item()

            dataset_loss /= dataset_size

            # Report results to Ray Tune
            if key == "train":
                tune.report(train_loss=dataset_loss)
            elif key == "val":
                # Update learning rate
                lr_scheduler.step(metrics=dataset_loss)
                tune.report(val_loss=dataset_loss)
            else:
                tune.report(test_loss=dataset_loss)
    return net

In [3]:
from config import load_data

def train_model(config):

    use_GPU = torch.cuda.is_available()
    if use_GPU:
        mode = {"name": "cuda", "device": torch.device("cuda")}
    else:
        mode = {"name": "cpu", "device": torch.device("cpu")}

    # Define hyperparameters
    train_size = 0.7
    val_size = 0.2
    test_size = 0.1

    sequence_length = config['sequence_length']
    batch_size = config['batch_size']
    num_epochs = config['num_epochs']
    lr = config['lr']
    weight_decay = config['weigth_decay']
    vars = config['variables']

    ld = load_data(data_dir = config['cwd'], target_variable = config['target_variable'])
    
    X, y = ld.create_lagged_matrix(window_size=sequence_length, vars_to_lag=vars)

    X_train, y_train, X_val, y_val, X_test, y_test = ld.split_data(X, y, train_size=train_size, val_size=val_size, test_size=test_size)

    train_dataloader = ld.create_dataloader(X_train, y_train, sequence_length, batch_size=batch_size, shuffle=True)
    val_dataloader = ld.create_dataloader(X_val, y_val, sequence_length, batch_size=batch_size, shuffle=False)
    test_dataloader = ld.create_dataloader(X_test, y_test, sequence_length, batch_size=batch_size, shuffle=False)
    
    # Model inputs
    if vars:
        input_size = len(vars) + 1
    else:
        input_size = 1
    hidden_size = config['hidden_size']
    num_layers = config['num_layers']
    output_size = 1

    if config['arch'] == "FCN":
        net = new_models.FCN(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] ==  "FCNTemporalAttention":
        net = new_models.FCNTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTM":
        net = new_models.LSTM(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTMTemporalAttention":
        net = new_models.LSTMTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTMSpatialTemporalAttention":
        net = new_models.LSTMSpatialTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )

    data_loader = {
    "train": train_dataloader,
    "val": val_dataloader,
    "test": test_dataloader,
    }
    
    net.to(mode["device"])

    loss_function = nn.MSELoss().to(mode["device"])
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)

    # Define your learning rate scheduler
    lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
                                           
    best_net = fit(net, loss_function, optimizer, data_loader, num_epochs, mode, lr_scheduler, use_amp=True)
    out_name = ""
    for k, v in config.items():
        if not k in ['weights_dir', 'cwd', 'variables']:
            out_name += '{}-{}_'.format(k, v)
    torch.save(best_net, os.path.join(config['cwd'], config['weights_dir'], out_name[:-1] + '.pth'))

# MLFlow setup

In [4]:
client = MlflowClient()
cwd = os.getcwd()
exp_base_name = "Test_of_interface"

created = 0
for i in range(100):
    try:
        exp_name = exp_base_name+"_{}".format(i)
        experiment_id = client.create_experiment(exp_name)
        created=1
        break
    except (TypeError, mlflow.exceptions.MlflowException):
        continue

if not created:
    print("ERROR: Try new experiment name.")
    sys.exit(1)

weights_root = "./model_weights/"
weights_dir = weights_root+exp_name+'/'
os.mkdir(weights_dir)

In [5]:
checkpoint_dir = "./model_weights/"
target_variable = 'Flow_Kalltveit'

# Start experiments

In [6]:
Discharge = ['Flow_Tapping', 'Flow_Lyngsaana']
HBV = ['Mean_Flow_HBV',
 'Flow_HBV',
 'Evaporation_HBV',
 'SNOW_MELT_HBV',
 'Precipitation_HBV',
 'Groundwater_Storage_HBV',
 'Temperature_HBV',
 'Soil_Water_Storage_HBV']
Loggers = ['Water_Level_Lyngsaana',
 'Water_Temperature_Hiafossen',
 'Water_Level_Hiafossen',
 'Water_Level_Kalltveit',
 'Water_Temperature_Kalltveit_Kum',
 'Water_Temperature_Hiavatn',
 'Water_Level_Hiavatn',
 'Water_Temperature_Musdalsvatn',
 'Water_Level_Musdalsvatn',
 'Water_Temperature_Musdalsvatn_Downstream',
 'Water_Level_Musdalsvatn_Downstream',
 'Water_Temperature_Viglesdalsvatn',
 'Water_Level_Viglesdalsvatn',
 'Water_Temperature_Lyngsaana',
 'Water_Temperature_Kalltveit_River']
Loggers_1 = ['Water_Temperature_Lyngsaana', 'Water_Level_Lyngsaana']
Loggers_2 = ['Water_Temperature_Kalltveit_Kum', 'Water_Level_Kalltveit']
Loggers_3 = ['Water_Temperature_Hiavatn', 'Water_Level_Hiavatn']
Loggers_4 = ['Water_Temperature_Musdalsvatn', 'Water_Level_Musdalsvatn']
Loggers_5 = ['Water_Temperature_Musdalsvatn_Downstream',
 'Water_Level_Musdalsvatn_Downstream']
Loggers_6 = ['Water_Temperature_Viglesdalsvatn', 'Water_Level_Viglesdalsvatn']
Loggers_7 = ['Water_Temperature_Kalltveit_River']
Loggers_8 = ['Water_Level_Hiafossen']
Meto = ['Precipitation_Nilsebu',
 'Precipitation_Fister',
 'Air_Temperature_Fister',
 'Air_Temperature_Nilsebu',
 'Relative_Humidity_Nilsebu',
 'Wind_Direction_Nilsebu']
Meto_1 = ['Precipitation_Nilsebu',
 'Air_Temperature_Nilsebu',
 'Relative_Humidity_Nilsebu',
 'Wind_Direction_Nilsebu']
Meto_2 = ['Precipitation_Fister', 'Air_Temperature_Fister']
Meto_Pre_Air_Nilsebu = ['Precipitation_Nilsebu', 'Air_Temperature_Nilsebu']
Meto_Pre_Air_Fister = ['Precipitation_Fister', 'Air_Temperature_Fister']


In [7]:
from functools import partial

config = {
    "mlflow_experiment_id": experiment_id,
    "weights_dir": weights_dir,
    "cwd": cwd,
    "target_variable": target_variable,
    "arch": tune.grid_search(["LSTM"]), # "FCN", "FCNTemporalAttention", "LSTMTemporalAttention", "LSTM", "LSTMSpatialAttention", "LSTMSpatialTemporalAttention"
    "sequence_length": tune.choice([25]),
    'num_epochs': tune.choice([150, 200]),
    'num_layers': tune.choice([2, 3, 4]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "weigth_decay": tune.choice([0, 0.001, 0.0001]),
    "batch_size": tune.choice([256, 512]),
    "hidden_size": tune.choice([32, 64]),
    "variables": tune.grid_search([
        None,
        #Loggers_1+Loggers_2+Loggers_3+Loggers_4+Loggers_5+Loggers_6+Meto_Pre_Air_Nilsebu+Meto_Pre_Air_Fister
    ])
}

analysis = tune.run(
    partial(train_model),
    config=config,
    resources_per_trial={"cpu": 12, "gpu": 1},
    num_samples=1,
    callbacks=[MLflowLoggerCallback(experiment_name=exp_name)],
)

2023-03-31 15:17:53,056	INFO worker.py:1538 -- Started a local Ray instance.


0,1
Current time:,2023-03-31 15:17:59
Running for:,00:00:03.67
Memory:,21.2/31.9 GiB

Trial name,status,loc,arch,batch_size,hidden_size,lr,num_epochs,num_layers,sequence_length,variables,weigth_decay
train_model_73167_00000,RUNNING,127.0.0.1:8756,LSTM,512,64,0.00107296,200,3,25,,0


  0%|          | 0/89 [00:00<?, ?it/s]
  1%|          | 1/89 [00:00<00:32,  2.74it/s]
 10%|█         | 9/89 [00:00<00:03, 24.43it/s]
 20%|██        | 18/89 [00:00<00:01, 42.40it/s]
 31%|███▏      | 28/89 [00:00<00:01, 57.92it/s]
 43%|████▎     | 38/89 [00:00<00:00, 69.32it/s]
 55%|█████▌    | 49/89 [00:00<00:00, 79.09it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 86.00it/s]
 79%|███████▊  | 70/89 [00:01<00:00, 89.88it/s]
 91%|█████████ | 81/89 [00:01<00:00, 92.77it/s]


Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,val_loss,warmup_time
train_model_73167_00000,2023-03-31_15-18-14,False,,7b3ae4e8f0f2429c9ddd7e407b867812,DESKTOP-D4IVECG,20,127.0.0.1,8756,14.5336,0.178499,14.5336,1680268694,0,,20,73167_00000,2.70725,0.00394034


100%|██████████| 89/89 [00:01<00:00, 68.87it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 218.50it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 97.84it/s]
 22%|██▏       | 20/89 [00:00<00:01, 66.57it/s]
 34%|███▎      | 30/89 [00:00<00:00, 78.01it/s]
 46%|████▌     | 41/89 [00:00<00:00, 86.32it/s]
 58%|█████▊    | 52/89 [00:00<00:00, 90.86it/s]
 70%|██████▉   | 62/89 [00:00<00:00, 93.53it/s]
 81%|████████  | 72/89 [00:00<00:00, 94.68it/s]
 92%|█████████▏| 82/89 [00:00<00:00, 96.11it/s]
100%|██████████| 89/89 [00:00<00:00, 90.24it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 219.43it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 94.84it/s]
 22%|██▏       | 20/89 [00:00<00:00, 94.97it/s]
 34%|███▎      | 30/89 [00:00<00:00, 95.98it/s]
 45%|████▍     | 40/89 [00:00<00:00, 95.62it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 94.49it/s]
 67%|██████▋   | 60/89 [00

[2m[36m(func pid=8756)[0m Epoch 00021: reducing learning rate of group 0 to 5.3648e-04.


 11%|█         | 10/89 [00:00<00:00, 94.70it/s]
 22%|██▏       | 20/89 [00:00<00:00, 97.38it/s]
 34%|███▎      | 30/89 [00:00<00:00, 72.73it/s]
 45%|████▍     | 40/89 [00:00<00:00, 81.22it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 86.85it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 90.66it/s]
 80%|███████▉  | 71/89 [00:00<00:00, 93.71it/s]
 91%|█████████ | 81/89 [00:00<00:00, 94.27it/s]
100%|██████████| 89/89 [00:00<00:00, 90.07it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 213.83it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 98.99it/s]
 24%|██▎       | 21/89 [00:00<00:00, 99.58it/s]
 36%|███▌      | 32/89 [00:00<00:00, 100.73it/s]
 48%|████▊     | 43/89 [00:00<00:00, 100.84it/s]
 61%|██████    | 54/89 [00:00<00:00, 98.26it/s] 
 72%|███████▏  | 64/89 [00:00<00:00, 98.00it/s]
 96%|█████████▌| 85/89 [00:00<00:00, 98.96it/s]
100%|██████████| 89/89 [00:00<00:00, 99.12it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
 91%|█████████▏

[2m[36m(func pid=8756)[0m Epoch 00040: reducing learning rate of group 0 to 2.6824e-04.


 11%|█         | 10/89 [00:00<00:00, 94.86it/s]
 24%|██▎       | 21/89 [00:00<00:00, 98.56it/s]
 36%|███▌      | 32/89 [00:00<00:00, 99.27it/s]
 48%|████▊     | 43/89 [00:00<00:00, 99.80it/s]
 60%|█████▉    | 53/89 [00:00<00:00, 99.04it/s]
 71%|███████   | 63/89 [00:00<00:00, 99.19it/s]
 83%|████████▎ | 74/89 [00:00<00:00, 100.15it/s]
100%|██████████| 89/89 [00:00<00:00, 99.77it/s] 
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 200.03it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 94.88it/s]
 22%|██▏       | 20/89 [00:00<00:00, 95.00it/s]
 34%|███▎      | 30/89 [00:00<00:00, 94.49it/s]
 45%|████▍     | 40/89 [00:00<00:00, 94.79it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 94.51it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 94.54it/s]
 79%|███████▊  | 70/89 [00:00<00:00, 74.54it/s]
 89%|████████▉ | 79/89 [00:00<00:00, 78.41it/s]
100%|██████████| 89/89 [00:01<00:00, 85.70it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████|

[2m[36m(func pid=8756)[0m Epoch 00049: reducing learning rate of group 0 to 1.3412e-04.


 11%|█         | 10/89 [00:00<00:00, 98.13it/s]
 22%|██▏       | 20/89 [00:00<00:00, 97.95it/s]
 34%|███▎      | 30/89 [00:00<00:00, 98.43it/s]
 45%|████▍     | 40/89 [00:00<00:00, 98.99it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 99.31it/s]
 69%|██████▊   | 61/89 [00:00<00:00, 99.70it/s]
 80%|███████▉  | 71/89 [00:00<00:00, 99.51it/s]
 91%|█████████ | 81/89 [00:00<00:00, 98.66it/s]
100%|██████████| 89/89 [00:00<00:00, 98.38it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 201.68it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 93.04it/s]
 22%|██▏       | 20/89 [00:00<00:00, 95.36it/s]
 35%|███▍      | 31/89 [00:00<00:00, 98.02it/s]
 46%|████▌     | 41/89 [00:00<00:00, 97.70it/s]
 58%|█████▊    | 52/89 [00:00<00:00, 98.83it/s]
 71%|███████   | 63/89 [00:00<00:00, 99.80it/s]
 82%|████████▏ | 73/89 [00:00<00:00, 99.70it/s]
 93%|█████████▎| 83/89 [00:00<00:00, 98.88it/s]
100%|██████████| 89/89 [00:00<00:00, 98.19it/s]
  0%|    

[2m[36m(func pid=8756)[0m Epoch 00055: reducing learning rate of group 0 to 6.7060e-05.


 11%|█         | 10/89 [00:00<00:00, 96.20it/s]
 22%|██▏       | 20/89 [00:00<00:01, 67.90it/s]
 35%|███▍      | 31/89 [00:00<00:00, 81.42it/s]
 47%|████▋     | 42/89 [00:00<00:00, 88.92it/s]
 58%|█████▊    | 52/89 [00:00<00:00, 92.36it/s]
 71%|███████   | 63/89 [00:00<00:00, 94.90it/s]
 83%|████████▎ | 74/89 [00:00<00:00, 97.12it/s]
100%|██████████| 89/89 [00:00<00:00, 92.38it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 219.53it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 97.30it/s]
 22%|██▏       | 20/89 [00:00<00:00, 95.46it/s]
 34%|███▎      | 30/89 [00:00<00:00, 95.81it/s]
 45%|████▍     | 40/89 [00:00<00:00, 96.70it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 94.73it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 95.99it/s]
 80%|███████▉  | 71/89 [00:00<00:00, 98.50it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 219.99it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00

[2m[36m(func pid=8756)[0m Epoch 00069: reducing learning rate of group 0 to 3.3530e-05.


 11%|█         | 10/89 [00:00<00:00, 91.29it/s]
 22%|██▏       | 20/89 [00:00<00:00, 92.07it/s]
 34%|███▎      | 30/89 [00:00<00:00, 92.30it/s]
 45%|████▍     | 40/89 [00:00<00:00, 91.80it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 93.55it/s]
 69%|██████▊   | 61/89 [00:00<00:00, 96.18it/s]
 80%|███████▉  | 71/89 [00:00<00:00, 96.96it/s]
 91%|█████████ | 81/89 [00:00<00:00, 95.08it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 96.02it/s]
 22%|██▏       | 20/89 [00:00<00:00, 96.81it/s]
 34%|███▎      | 30/89 [00:00<00:00, 97.71it/s]
 46%|████▌     | 41/89 [00:00<00:00, 98.61it/s]
 57%|█████▋    | 51/89 [00:00<00:00, 96.88it/s]
 69%|██████▊   | 61/89 [00:00<00:00, 96.67it/s]
 80%|███████▉  | 71/89 [00:00<00:00, 96.70it/s]
 91%|█████████ | 81/89 [00:00<00:00, 96.15it/s]
100%|██████████| 89/89 [00:00<00:00, 96.64it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 215.04it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 1

[2m[36m(func pid=8756)[0m Epoch 00075: reducing learning rate of group 0 to 1.6765e-05.


 11%|█         | 10/89 [00:00<00:00, 97.28it/s]
 22%|██▏       | 20/89 [00:00<00:00, 96.93it/s]
 34%|███▎      | 30/89 [00:00<00:00, 72.83it/s]
 46%|████▌     | 41/89 [00:00<00:00, 82.54it/s]
 57%|█████▋    | 51/89 [00:00<00:00, 87.53it/s]
 69%|██████▊   | 61/89 [00:00<00:00, 90.84it/s]
 81%|████████  | 72/89 [00:00<00:00, 94.05it/s]
 92%|█████████▏| 82/89 [00:00<00:00, 95.52it/s]
100%|██████████| 89/89 [00:00<00:00, 90.89it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 219.86it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 96.83it/s]
 22%|██▏       | 20/89 [00:00<00:00, 96.81it/s]
 34%|███▎      | 30/89 [00:00<00:00, 97.18it/s]
 45%|████▍     | 40/89 [00:00<00:00, 96.76it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 97.28it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 96.26it/s]
 80%|███████▉  | 71/89 [00:00<00:00, 97.51it/s]
 91%|█████████ | 81/89 [00:00<00:00, 97.28it/s]
100%|██████████| 89/89 [00:00<00:00, 96.98it/s]
  0%|    

[2m[36m(func pid=8756)[0m Epoch 00081: reducing learning rate of group 0 to 8.3825e-06.


 11%|█         | 10/89 [00:00<00:00, 92.78it/s]
 22%|██▏       | 20/89 [00:00<00:00, 94.19it/s]
 34%|███▎      | 30/89 [00:00<00:00, 95.10it/s]
 57%|█████▋    | 51/89 [00:00<00:00, 97.27it/s]
 69%|██████▊   | 61/89 [00:00<00:00, 97.13it/s]
 81%|████████  | 72/89 [00:00<00:00, 98.46it/s]
 92%|█████████▏| 82/89 [00:00<00:00, 98.27it/s]
100%|██████████| 89/89 [00:00<00:00, 96.96it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 215.23it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 91.62it/s]
 22%|██▏       | 20/89 [00:00<00:01, 66.28it/s]
 35%|███▍      | 31/89 [00:00<00:00, 79.12it/s]
 46%|████▌     | 41/89 [00:00<00:00, 85.68it/s]
 58%|█████▊    | 52/89 [00:00<00:00, 90.80it/s]
 70%|██████▉   | 62/89 [00:00<00:00, 93.44it/s]
 81%|████████  | 72/89 [00:00<00:00, 94.81it/s]
 93%|█████████▎| 83/89 [00:00<00:00, 97.10it/s]
100%|██████████| 89/89 [00:00<00:00, 90.43it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 2

[2m[36m(func pid=8756)[0m Epoch 00088: reducing learning rate of group 0 to 4.1912e-06.


 21%|██▏       | 19/89 [00:00<00:00, 92.35it/s]
 33%|███▎      | 29/89 [00:00<00:00, 89.30it/s]
 44%|████▍     | 39/89 [00:00<00:00, 91.04it/s]
 55%|█████▌    | 49/89 [00:00<00:00, 93.67it/s]
 66%|██████▋   | 59/89 [00:00<00:00, 95.40it/s]
 79%|███████▊  | 70/89 [00:00<00:00, 96.78it/s]
 90%|████████▉ | 80/89 [00:00<00:00, 96.58it/s]
100%|██████████| 89/89 [00:01<00:00, 87.74it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 196.32it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 90.06it/s]
 22%|██▏       | 20/89 [00:00<00:00, 91.42it/s]
 34%|███▎      | 30/89 [00:00<00:00, 92.61it/s]
 45%|████▍     | 40/89 [00:00<00:00, 92.90it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 94.71it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 95.18it/s]
 79%|███████▊  | 70/89 [00:00<00:00, 96.51it/s]
 90%|████████▉ | 80/89 [00:00<00:00, 96.98it/s]
100%|██████████| 89/89 [00:00<00:00, 95.21it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 2

[2m[36m(func pid=8756)[0m Epoch 00094: reducing learning rate of group 0 to 2.0956e-06.


  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 94.92it/s]
 22%|██▏       | 20/89 [00:00<00:00, 96.07it/s]
 34%|███▎      | 30/89 [00:00<00:00, 95.15it/s]
 45%|████▍     | 40/89 [00:00<00:00, 93.35it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 91.74it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 91.25it/s]
 79%|███████▊  | 70/89 [00:00<00:00, 91.15it/s]
 90%|████████▉ | 80/89 [00:00<00:00, 92.15it/s]
100%|██████████| 89/89 [00:00<00:00, 92.69it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 195.98it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 93.36it/s]
 22%|██▏       | 20/89 [00:00<00:00, 94.42it/s]
 34%|███▎      | 30/89 [00:00<00:00, 95.16it/s]
 45%|████▍     | 40/89 [00:00<00:00, 95.58it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 96.15it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 96.80it/s]
 79%|███████▊  | 70/89 [00:00<00:00, 77.30it/s]
100%|██████████| 89/89 [00:01<00:00, 88.61it/s]
  0%|          | 0

[2m[36m(func pid=8756)[0m Epoch 00100: reducing learning rate of group 0 to 1.0478e-06.


 11%|█         | 10/89 [00:00<00:00, 95.56it/s]
 22%|██▏       | 20/89 [00:00<00:00, 97.31it/s]
 34%|███▎      | 30/89 [00:00<00:00, 96.39it/s]
 45%|████▍     | 40/89 [00:00<00:00, 97.47it/s]
 57%|█████▋    | 51/89 [00:00<00:00, 99.02it/s]
 70%|██████▉   | 62/89 [00:00<00:00, 99.91it/s]
 81%|████████  | 72/89 [00:00<00:00, 98.79it/s]
 92%|█████████▏| 82/89 [00:00<00:00, 96.76it/s]
100%|██████████| 89/89 [00:00<00:00, 97.48it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 91.68it/s]
 24%|██▎       | 21/89 [00:00<00:00, 97.82it/s]
 35%|███▍      | 31/89 [00:00<00:00, 97.30it/s]
 46%|████▌     | 41/89 [00:00<00:00, 96.62it/s]
 57%|█████▋    | 51/89 [00:00<00:00, 96.38it/s]
 69%|██████▊   | 61/89 [00:00<00:00, 95.92it/s]
 80%|███████▉  | 71/89 [00:00<00:00, 95.49it/s]
 91%|█████████ | 81/89 [00:00<00:00, 96.23it/s]
100%|██████████| 89/89 [00:00<00:00, 96.07it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 204.57it/s]
  0%|    

[2m[36m(func pid=8756)[0m Epoch 00106: reducing learning rate of group 0 to 5.2391e-07.


 11%|█         | 10/89 [00:00<00:00, 96.01it/s]
 22%|██▏       | 20/89 [00:00<00:00, 97.66it/s]
 34%|███▎      | 30/89 [00:00<00:00, 98.53it/s]
 45%|████▍     | 40/89 [00:00<00:00, 98.29it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 77.07it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 82.93it/s]
 79%|███████▊  | 70/89 [00:00<00:00, 86.82it/s]
 90%|████████▉ | 80/89 [00:00<00:00, 88.68it/s]
100%|██████████| 89/89 [00:00<00:00, 89.29it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 203.49it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 91.85it/s]
 22%|██▏       | 20/89 [00:00<00:00, 94.34it/s]
 34%|███▎      | 30/89 [00:00<00:00, 96.44it/s]
 45%|████▍     | 40/89 [00:00<00:00, 96.72it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 97.10it/s]
 69%|██████▊   | 61/89 [00:00<00:00, 98.58it/s]
 81%|████████  | 72/89 [00:00<00:00, 99.30it/s]
 92%|█████████▏| 82/89 [00:00<00:00, 98.09it/s]
100%|██████████| 89/89 [00:00<00:00, 97.71it/s]
  0%|    

[2m[36m(func pid=8756)[0m Epoch 00112: reducing learning rate of group 0 to 2.6195e-07.


 22%|██▏       | 20/89 [00:00<00:00, 97.95it/s]
 34%|███▎      | 30/89 [00:00<00:00, 96.84it/s]
 45%|████▍     | 40/89 [00:00<00:00, 97.97it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 97.57it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 98.12it/s]
 80%|███████▉  | 71/89 [00:00<00:00, 98.72it/s]
 91%|█████████ | 81/89 [00:00<00:00, 98.97it/s]
100%|██████████| 89/89 [00:00<00:00, 98.40it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 212.57it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 95.30it/s]
 22%|██▏       | 20/89 [00:00<00:01, 67.54it/s]
 34%|███▎      | 30/89 [00:00<00:00, 79.10it/s]
 45%|████▍     | 40/89 [00:00<00:00, 85.59it/s]
 57%|█████▋    | 51/89 [00:00<00:00, 91.07it/s]
 69%|██████▊   | 61/89 [00:00<00:00, 91.93it/s]
 80%|███████▉  | 71/89 [00:00<00:00, 93.71it/s]
100%|██████████| 89/89 [00:00<00:00, 90.34it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 212.63it/s]
  0%|          | 

[2m[36m(func pid=8756)[0m Epoch 00118: reducing learning rate of group 0 to 1.3098e-07.


 11%|█         | 10/89 [00:00<00:00, 95.14it/s]
 22%|██▏       | 20/89 [00:00<00:00, 92.38it/s]
 34%|███▎      | 30/89 [00:00<00:00, 91.81it/s]
 45%|████▍     | 40/89 [00:00<00:00, 91.31it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 91.84it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 90.92it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 202.04it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 98.01it/s]
 22%|██▏       | 20/89 [00:00<00:00, 95.80it/s]
 34%|███▎      | 30/89 [00:00<00:00, 95.49it/s]
 45%|████▍     | 40/89 [00:00<00:00, 96.55it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 96.67it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 96.19it/s]
 79%|███████▊  | 70/89 [00:00<00:00, 95.83it/s]
 90%|████████▉ | 80/89 [00:00<00:00, 95.45it/s]
100%|██████████| 89/89 [00:00<00:00, 95.48it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 122.11it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00

[2m[36m(func pid=8756)[0m Epoch 00124: reducing learning rate of group 0 to 6.5488e-08.


 11%|█         | 10/89 [00:00<00:00, 97.42it/s]
 22%|██▏       | 20/89 [00:00<00:00, 97.51it/s]
 35%|███▍      | 31/89 [00:00<00:00, 98.72it/s]
 46%|████▌     | 41/89 [00:00<00:00, 98.43it/s]
 58%|█████▊    | 52/89 [00:00<00:00, 99.33it/s]
 70%|██████▉   | 62/89 [00:00<00:00, 98.99it/s]
 81%|████████  | 72/89 [00:00<00:00, 98.91it/s]
100%|██████████| 89/89 [00:00<00:00, 97.79it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 222.04it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 98.92it/s]
 22%|██▏       | 20/89 [00:00<00:00, 96.19it/s]
 34%|███▎      | 30/89 [00:00<00:00, 97.47it/s]
 46%|████▌     | 41/89 [00:00<00:00, 99.16it/s]
 58%|█████▊    | 52/89 [00:00<00:00, 99.54it/s]
 70%|██████▉   | 62/89 [00:00<00:00, 98.94it/s]
 81%|████████  | 72/89 [00:00<00:00, 98.77it/s]
100%|██████████| 89/89 [00:00<00:00, 97.91it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 201.60it/s]
  0%|          | 

[2m[36m(func pid=8756)[0m Epoch 00130: reducing learning rate of group 0 to 3.2744e-08.


  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 94.48it/s]
 22%|██▏       | 20/89 [00:00<00:00, 94.93it/s]
 34%|███▎      | 30/89 [00:00<00:00, 95.71it/s]
 45%|████▍     | 40/89 [00:00<00:00, 96.97it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 97.52it/s]
 69%|██████▊   | 61/89 [00:00<00:00, 98.42it/s]
 80%|███████▉  | 71/89 [00:00<00:00, 98.41it/s]
100%|██████████| 89/89 [00:00<00:00, 97.93it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 211.80it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 96.25it/s]
 22%|██▏       | 20/89 [00:00<00:00, 96.73it/s]
 34%|███▎      | 30/89 [00:00<00:00, 97.85it/s]
 45%|████▍     | 40/89 [00:00<00:00, 98.16it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 98.62it/s]
 69%|██████▊   | 61/89 [00:00<00:00, 98.97it/s]
 81%|████████  | 72/89 [00:00<00:00, 99.45it/s]
100%|██████████| 89/89 [00:00<00:00, 98.06it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 10%|█         | 9/89 [00:0

[2m[36m(func pid=8756)[0m Epoch 00136: reducing learning rate of group 0 to 1.6372e-08.


  0%|          | 0/89 [00:00<?, ?it/s]
 10%|█         | 9/89 [00:00<00:00, 88.22it/s]
 20%|██        | 18/89 [00:00<00:01, 60.85it/s]
 31%|███▏      | 28/89 [00:00<00:00, 74.22it/s]
 43%|████▎     | 38/89 [00:00<00:00, 81.88it/s]
 54%|█████▍    | 48/89 [00:00<00:00, 86.61it/s]
 65%|██████▌   | 58/89 [00:00<00:00, 89.14it/s]
 76%|███████▋  | 68/89 [00:00<00:00, 91.66it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 211.49it/s]
  0%|          | 0/89 [00:00<?, ?it/s]
 11%|█         | 10/89 [00:00<00:00, 96.28it/s]
 22%|██▏       | 20/89 [00:00<00:00, 97.19it/s]
 45%|████▍     | 40/89 [00:00<00:00, 96.77it/s]
 56%|█████▌    | 50/89 [00:00<00:00, 97.24it/s]
 67%|██████▋   | 60/89 [00:00<00:00, 97.11it/s]
 79%|███████▊  | 70/89 [00:00<00:00, 97.69it/s]
 90%|████████▉ | 80/89 [00:00<00:00, 97.94it/s]
100%|██████████| 89/89 [00:00<00:00, 97.40it/s]
  0%|          | 0/23 [00:00<?, ?it/s]
100%|██████████| 23/23 [00:00<00:00, 203.75it/s]
  0%|          | 0/89 [00:0