In [1]:
import os
import sys

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from models import new_models
from config import load_data

from ray import tune
from ray.air.integrations.mlflow import MLflowLoggerCallback
from ray.tune.schedulers import ASHAScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau

import mlflow
from mlflow.tracking import MlflowClient

# Training setup

In [2]:
def fit(net, loss_function, optimizer, data_loader, num_epochs, mode, lr_scheduler, use_amp=False):
    scaler = torch.cuda.amp.GradScaler(enabled=use_amp) # Mixed-precision support for compatible GPUs
    for epoch in range(num_epochs):
        if epoch < num_epochs - 1:
            keys = ["train", "val"]
        else:
            keys = ["train", "val", "test"]
        for key in keys:
            dataset_size = 0
            dataset_loss = 0.0
            if key == "train":
                net.train()
            else:
                net.eval()
            for X_batch, y_batch in tqdm(data_loader[key]):
                X_batch, y_batch = X_batch.to(mode["device"]), y_batch.to(mode["device"])
                with torch.set_grad_enabled(mode=(key=="train")): # Autograd activated only during training
                    with torch.cuda.amp.autocast(enabled=False): # Mixed-precision support for compatible GPUs
                        batch_output = net(X_batch.float())
                        batch_loss = loss_function(batch_output, y_batch)
                    if key == "train":
                        scaler.scale(batch_loss).backward()
                        scaler.step(optimizer) 	
                        scaler.update()
                        optimizer.zero_grad()
                dataset_size += y_batch.shape[0]
                dataset_loss += y_batch.shape[0] * batch_loss.item()

            dataset_loss /= dataset_size

            # Report results to Ray Tune
            if key == "train":
                tune.report(train_loss=dataset_loss)
            elif key == "val":
                # Update learning rate
                lr_scheduler.step(metrics=dataset_loss)
                tune.report(val_loss=dataset_loss)
            else:
                tune.report(test_loss=dataset_loss)
    return net

In [3]:
from config import load_data

def train_model(config, data_dir):

    use_GPU = torch.cuda.is_available()
    if use_GPU:
        mode = {"name": "cuda", "device": torch.device("cuda")}
    else:
        mode = {"name": "cpu", "device": torch.device("cpu")}

    # Define hyperparameters
    train_size = 0.6
    val_size = 0.2
    test_size = 0.2

    sequence_length = config['sequence_length']
    batch_size = config['batch_size']
    num_epochs = config['num_epochs']
    lr = config['lr']
    weight_decay = config['weigth_decay']
    vars = config['variables']

    ld = load_data(data_dir = data_dir, target_variable = config['target_variable'])
    
    X, y = ld.create_lagged_matrix(window_size=sequence_length, vars_to_lag=vars)

    X_train, y_train, X_val, y_val, X_test, y_test = ld.split_data(X, y)

    train_dataloader = ld.create_dataloader(X_train, y_train, sequence_length, batch_size=batch_size, shuffle=True)
    val_dataloader = ld.create_dataloader(X_val, y_val, sequence_length, batch_size=batch_size, shuffle=True)
    test_dataloader = ld.create_dataloader(X_test, y_test, sequence_length, batch_size=batch_size, shuffle=False)
    
    # Model inputs
    if vars:
        input_size = len(vars) + 1
    else:
        input_size = 1
    hidden_size = config['hidden_size']
    num_layers = config['num_layers']
    output_size = 1

    if config['arch'] == "FCN":
        net = new_models.FCN(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] ==  "FCNTemporalAttention":
        net = new_models.FCNTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTM":
        net = new_models.LSTM(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTMTemporalAttention":
        net = new_models.LSTMTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )

    data_loader = {
    "train": train_dataloader,
    "val": val_dataloader,
    "test": test_dataloader,
    }
    
    net.to(mode["device"])

    loss_function = nn.MSELoss().to(mode["device"])
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)

    # Define your learning rate scheduler
    lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
                                           
    best_net = fit(net, loss_function, optimizer, data_loader, num_epochs, mode, lr_scheduler, use_amp=True)
    out_name = ""
    for k, v in config.items():
        if not k in ['weights_dir', 'cwd', 'variables']:
            out_name += '{}-{}_'.format(k, v)
    torch.save(best_net.state_dict(), os.path.join(config['cwd'], config['weights_dir'], out_name[:-1] + '.pth'))

# MLFlow setup

In [4]:
client = MlflowClient()
cwd = os.getcwd()
exp_base_name = "Test_of_interface"

created = 0
for i in range(100):
    try:
        exp_name = exp_base_name+"_{}".format(i)
        experiment_id = client.create_experiment(exp_name)
        created=1
        break
    except (TypeError, mlflow.exceptions.MlflowException):
        continue

if not created:
    print("ERROR: Try new experiment name.")
    sys.exit(1)

weights_root = "./model_weights/"
weights_dir = weights_root+exp_name+'/'
os.mkdir(weights_dir)

In [5]:
data_dir = "./data/"
target_variable = 'Q_Kalltveit'

# Start experiments

In [6]:
from functools import partial

config = {
    "mlflow_experiment_id": experiment_id,
    "weights_dir": weights_dir,
    "cwd": cwd,
    "target_variable": target_variable,
    "arch": tune.grid_search(["LSTMTemporalAttention"]), # "FCN", "FCNTemporalAttention", 
    "sequence_length": tune.grid_search([25]),
    'num_epochs': tune.grid_search([150]),
    'num_layers': tune.choice([2, 3, 4]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "weigth_decay": tune.choice([0, 0.001, 0.0001]),
    "batch_size": tune.choice([256, 256*2]),
    "hidden_size": tune.grid_search([64]),
    "variables": tune.grid_search([None, ['Nedbør Nilsebu'], ['Q_Lyngsaana'], ['Nedbør Nilsebu', 'Q_Lyngsaana']])
}


analysis = tune.run(
    partial(train_model, data_dir=data_dir),
    config=config,
    resources_per_trial={"cpu": 12, "gpu": 1},
    num_samples=1,
    callbacks=[MLflowLoggerCallback(experiment_name=exp_name)],
)

2023-03-24 19:16:11,632	INFO worker.py:1538 -- Started a local Ray instance.


0,1
Current time:,2023-03-24 19:16:17
Running for:,00:00:03.75
Memory:,14.4/31.9 GiB

Trial name,status,loc,arch,batch_size,hidden_size,lr,num_epochs,num_layers,sequence_length,variables,weigth_decay
train_model_f5d07_00000,RUNNING,127.0.0.1:17740,LSTM,256,64,0.00740814,150,4,25,,0.0001
train_model_f5d07_00001,PENDING,,LSTMTemporalAtt_c710,256,64,0.00117195,150,4,25,,0.001
train_model_f5d07_00002,PENDING,,LSTM,512,64,0.00479903,150,2,25,['Nedbør Nilsebu'],0.0
train_model_f5d07_00003,PENDING,,LSTMTemporalAtt_c710,256,64,0.028881,150,2,25,['Nedbør Nilsebu'],0.0001
train_model_f5d07_00004,PENDING,,LSTM,256,64,0.00441163,150,4,25,['Q_Lyngsaana'],0.0
train_model_f5d07_00005,PENDING,,LSTMTemporalAtt_c710,256,64,0.0019959,150,3,25,['Q_Lyngsaana'],0.001
train_model_f5d07_00006,PENDING,,LSTM,512,64,0.00149262,150,3,25,['Nedbør Nilseb_cb48,0.0001
train_model_f5d07_00007,PENDING,,LSTMTemporalAtt_c710,512,64,0.00105039,150,3,25,['Nedbør Nilseb_cb08,0.0001
train_model_f5d07_00008,PENDING,,LSTM,512,64,0.0885835,150,3,25,,0.0
train_model_f5d07_00009,PENDING,,LSTMTemporalAtt_c710,512,64,0.00732141,150,2,25,,0.0


  0%|          | 0/173 [00:00<?, ?it/s]
  1%|          | 1/173 [00:00<00:46,  3.67it/s]
  6%|▋         | 11/173 [00:00<00:04, 36.68it/s]
 13%|█▎        | 22/173 [00:00<00:02, 59.77it/s]
 20%|█▉        | 34/173 [00:00<00:01, 77.34it/s]
 27%|██▋       | 46/173 [00:00<00:01, 89.97it/s]
 34%|███▎      | 58/173 [00:00<00:01, 97.00it/s]
 40%|████      | 70/173 [00:00<00:01, 101.36it/s]
 47%|████▋     | 81/173 [00:01<00:00, 103.06it/s]
 53%|█████▎    | 92/173 [00:01<00:00, 103.52it/s]
 60%|██████    | 104/173 [00:01<00:00, 105.64it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 106.83it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 108.20it/s]
 80%|████████  | 139/173 [00:01<00:00, 109.57it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 111.96it/s]
 94%|█████████▍| 163/173 [00:01<00:00, 113.13it/s]


Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,node_ip,pid,test_loss,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
train_model_f5d07_00000,2023-03-24_19-20-54,True,,9ba076c7d15540dfb02e4d9034804429,"0_arch=LSTM,batch_size=256,hidden_size=64,lr=0.0074,num_epochs=150,num_layers=4,sequence_length=25,variables=None,weigth_decay=0.0001",DESKTOP-D4IVECG,301,127.0.0.1,17740,0.789844,277.372,0.141862,277.372,1679682054,0,,301,f5d07_00000,0.0039072


100%|██████████| 173/173 [00:01<00:00, 94.53it/s] 
  0%|          | 0/50 [00:00<?, ?it/s]
 64%|██████▍   | 32/50 [00:00<00:00, 312.54it/s]
100%|██████████| 50/50 [00:00<00:00, 318.79it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 96.23it/s]
 12%|█▏        | 21/173 [00:00<00:01, 103.28it/s]
 18%|█▊        | 32/173 [00:00<00:01, 105.52it/s]
 25%|██▌       | 44/173 [00:00<00:01, 109.74it/s]
 32%|███▏      | 56/173 [00:00<00:01, 111.83it/s]
 39%|███▉      | 68/173 [00:00<00:00, 113.74it/s]
 46%|████▌     | 80/173 [00:00<00:00, 114.22it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 115.03it/s]
 60%|██████    | 104/173 [00:00<00:00, 114.02it/s]
 67%|██████▋   | 116/173 [00:01<00:00, 115.20it/s]
 74%|███████▍  | 128/173 [00:01<00:00, 114.50it/s]
 81%|████████  | 140/173 [00:01<00:00, 112.07it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 112.48it/s]
100%|██████████| 173/173 [00:01<00:00, 112.75it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:0

[2m[36m(func pid=17740)[0m Epoch 00027: reducing learning rate of group 0 to 3.7041e-03.


  0%|          | 0/173 [00:00<?, ?it/s]
 14%|█▍        | 24/173 [00:00<00:01, 116.76it/s]
 21%|██        | 36/173 [00:00<00:01, 117.18it/s]
 28%|██▊       | 48/173 [00:00<00:01, 114.80it/s]
 35%|███▍      | 60/173 [00:00<00:01, 112.92it/s]
 42%|████▏     | 72/173 [00:00<00:00, 112.34it/s]
 49%|████▊     | 84/173 [00:00<00:00, 112.99it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 113.65it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 108.53it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 109.56it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 110.93it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 112.07it/s]
 50%|█████     | 25/50 [00:00<00:00, 238.91it/s]
100%|██████████| 50/50 [00:00<00:00, 270.19it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 112.56it/s]
 14%|█▍        | 24/173 [00:00<00:01, 114.55it/s]
 21%|██        | 36/173 [00:00<00:01, 113.99it/s]
 28%|██▊       | 48/173 [00:00<00:01, 112.46it/s]
 35%|███▍      | 60/173 [00:00<00:00, 114.66it/s]
 42%|████▏     | 7

[2m[36m(func pid=17740)[0m Epoch 00042: reducing learning rate of group 0 to 1.8520e-03.


  7%|▋         | 12/173 [00:00<00:01, 109.19it/s]
 13%|█▎        | 23/173 [00:00<00:01, 106.88it/s]
 20%|█▉        | 34/173 [00:00<00:01, 98.69it/s] 
 26%|██▌       | 45/173 [00:00<00:01, 100.07it/s]
 33%|███▎      | 57/173 [00:00<00:01, 106.45it/s]
 39%|███▉      | 68/173 [00:00<00:00, 105.61it/s]
 46%|████▌     | 80/173 [00:00<00:00, 108.62it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 108.76it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 110.79it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 110.26it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 112.48it/s]
 80%|████████  | 139/173 [00:01<00:00, 112.06it/s]
100%|██████████| 173/173 [00:01<00:00, 109.64it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 302.25it/s]
100%|██████████| 50/50 [00:00<00:00, 313.71it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 111.67it/s]
 14%|█▍        | 24/173 [00:00<00:01, 111.37it/s]
 21%|██        | 36/173 [00:00<00:01, 111.31it/s]
 28%|██▊       | 4

[2m[36m(func pid=17740)[0m Epoch 00061: reducing learning rate of group 0 to 9.2602e-04.


  7%|▋         | 12/173 [00:00<00:01, 114.19it/s]
 14%|█▍        | 24/173 [00:00<00:01, 81.61it/s] 
 20%|█▉        | 34/173 [00:00<00:01, 87.91it/s]
 27%|██▋       | 46/173 [00:00<00:01, 96.38it/s]
 34%|███▎      | 58/173 [00:00<00:01, 101.68it/s]
 40%|███▉      | 69/173 [00:00<00:01, 103.00it/s]
 46%|████▌     | 80/173 [00:00<00:00, 102.96it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 102.88it/s]
 59%|█████▉    | 102/173 [00:01<00:00, 104.73it/s]
 65%|██████▌   | 113/173 [00:01<00:00, 104.96it/s]
 72%|███████▏  | 124/173 [00:01<00:00, 104.42it/s]
 79%|███████▊  | 136/173 [00:01<00:00, 107.89it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 107.94it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 108.38it/s]
100%|██████████| 173/173 [00:01<00:00, 103.64it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 303.02it/s]
100%|██████████| 50/50 [00:00<00:00, 305.85it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 96.19it/s]
 12%|█▏        | 21

[2m[36m(func pid=17740)[0m Epoch 00075: reducing learning rate of group 0 to 4.6301e-04.


 14%|█▍        | 24/173 [00:00<00:01, 108.38it/s]
 21%|██        | 36/173 [00:00<00:01, 110.06it/s]
 28%|██▊       | 48/173 [00:00<00:01, 111.72it/s]
 35%|███▍      | 60/173 [00:00<00:01, 109.84it/s]
 42%|████▏     | 73/173 [00:00<00:00, 113.56it/s]
 49%|████▉     | 85/173 [00:00<00:00, 113.08it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 109.85it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 109.93it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 112.14it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 112.84it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 113.43it/s]
100%|██████████| 173/173 [00:01<00:00, 111.90it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 44%|████▍     | 22/50 [00:00<00:00, 117.07it/s]
100%|██████████| 50/50 [00:00<00:00, 179.74it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 100.33it/s]
 13%|█▎        | 22/173 [00:00<00:01, 104.55it/s]
 20%|█▉        | 34/173 [00:00<00:01, 107.54it/s]
 26%|██▌       | 45/173 [00:00<00:01, 108.44it/s]
 33%|███▎      | 5

[2m[36m(func pid=17740)[0m Epoch 00085: reducing learning rate of group 0 to 2.3150e-04.


  7%|▋         | 12/173 [00:00<00:01, 113.20it/s]
 14%|█▍        | 24/173 [00:00<00:01, 113.11it/s]
 21%|██        | 36/173 [00:00<00:01, 110.46it/s]
 28%|██▊       | 48/173 [00:00<00:01, 108.92it/s]
 35%|███▍      | 60/173 [00:00<00:01, 109.06it/s]
 41%|████      | 71/173 [00:00<00:00, 108.41it/s]
 47%|████▋     | 82/173 [00:00<00:00, 105.85it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 105.58it/s]
 60%|██████    | 104/173 [00:00<00:00, 105.30it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 105.14it/s]
 73%|███████▎  | 126/173 [00:01<00:00, 105.54it/s]
 79%|███████▉  | 137/173 [00:01<00:00, 105.36it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 104.55it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 106.58it/s]
100%|██████████| 173/173 [00:01<00:00, 102.55it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 66%|██████▌   | 33/50 [00:00<00:00, 314.59it/s]
100%|██████████| 50/50 [00:00<00:00, 280.70it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 106.95it/s]
 13%|█▎        |

[2m[36m(func pid=17740)[0m Epoch 00097: reducing learning rate of group 0 to 1.1575e-04.


 14%|█▍        | 24/173 [00:00<00:01, 110.81it/s]
 21%|██        | 36/173 [00:00<00:01, 110.01it/s]
 28%|██▊       | 48/173 [00:00<00:01, 109.82it/s]
 34%|███▍      | 59/173 [00:00<00:01, 109.70it/s]
 40%|████      | 70/173 [00:00<00:00, 108.78it/s]
 47%|████▋     | 81/173 [00:00<00:00, 109.16it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 109.09it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 108.36it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 110.22it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 108.95it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 109.25it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 109.78it/s]
 94%|█████████▎| 162/173 [00:01<00:00, 110.81it/s]
100%|██████████| 173/173 [00:01<00:00, 109.97it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 68%|██████▊   | 34/50 [00:00<00:00, 327.50it/s]
100%|██████████| 50/50 [00:00<00:00, 325.09it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 101.41it/s]
 13%|█▎        | 23/173 [00:00<00:01, 110.80it/s]
 20%|██        |

[2m[36m(func pid=17740)[0m Epoch 00114: reducing learning rate of group 0 to 5.7876e-05.


  7%|▋         | 12/173 [00:00<00:01, 110.97it/s]
 14%|█▍        | 24/173 [00:00<00:01, 114.68it/s]
 21%|██        | 36/173 [00:00<00:01, 110.28it/s]
 28%|██▊       | 48/173 [00:00<00:01, 106.70it/s]
 35%|███▍      | 60/173 [00:00<00:01, 108.85it/s]
 41%|████      | 71/173 [00:00<00:00, 105.95it/s]
 47%|████▋     | 82/173 [00:00<00:00, 105.39it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 104.73it/s]
 60%|██████    | 104/173 [00:00<00:00, 104.46it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 103.38it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 108.16it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 108.35it/s]
 58%|█████▊    | 29/50 [00:00<00:00, 284.55it/s]
100%|██████████| 50/50 [00:00<00:00, 297.70it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 108.89it/s]
 13%|█▎        | 23/173 [00:00<00:01, 110.37it/s]
 20%|██        | 35/173 [00:00<00:01, 110.78it/s]
 27%|██▋       | 47/173 [00:00<00:01, 110.03it/s]
 34%|███▍      | 59/173 [00:00<00:01, 87.58it/s] 
 41%|███

[2m[36m(func pid=17740)[0m Epoch 00127: reducing learning rate of group 0 to 2.8938e-05.


 13%|█▎        | 23/173 [00:00<00:01, 109.89it/s]
 20%|█▉        | 34/173 [00:00<00:01, 108.06it/s]
 26%|██▌       | 45/173 [00:00<00:01, 106.32it/s]
 32%|███▏      | 56/173 [00:00<00:01, 103.98it/s]
 39%|███▊      | 67/173 [00:00<00:01, 104.44it/s]
 46%|████▌     | 79/173 [00:00<00:00, 107.12it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 106.39it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 106.16it/s]
 65%|██████▍   | 112/173 [00:01<00:00, 105.10it/s]
 72%|███████▏  | 124/173 [00:01<00:00, 105.77it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 106.07it/s]
 84%|████████▍ | 146/173 [00:01<00:00, 103.93it/s]
 91%|█████████ | 157/173 [00:01<00:00, 105.06it/s]
100%|██████████| 173/173 [00:01<00:00, 106.19it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 64%|██████▍   | 32/50 [00:00<00:00, 313.45it/s]
100%|██████████| 50/50 [00:00<00:00, 320.52it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 108.67it/s]
 13%|█▎        | 22/173 [00:00<00:01, 109.19it/s]
 20%|█▉        |

[2m[36m(func pid=17740)[0m Epoch 00133: reducing learning rate of group 0 to 1.4469e-05.


 13%|█▎        | 23/173 [00:00<00:01, 110.71it/s]
 20%|██        | 35/173 [00:00<00:01, 104.68it/s]
 27%|██▋       | 46/173 [00:00<00:01, 103.22it/s]
 34%|███▎      | 58/173 [00:00<00:01, 103.59it/s]
 40%|███▉      | 69/173 [00:00<00:01, 100.60it/s]
 46%|████▌     | 80/173 [00:00<00:00, 103.06it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 106.56it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 106.72it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 107.95it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 108.88it/s]
 80%|████████  | 139/173 [00:01<00:00, 110.53it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 109.45it/s]
 94%|█████████▍| 163/173 [00:01<00:00, 111.28it/s]
100%|██████████| 173/173 [00:01<00:00, 106.91it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 322.21it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 111.24it/s]
 14%|█▍        | 24/173 [00:00<00:01, 111.27it/s]
 21%|██        | 36/173 [00:00<00:01, 111.59it/s]
 28%|██▊       

[2m[36m(func pid=17740)[0m Epoch 00148: reducing learning rate of group 0 to 7.2345e-06.


 14%|█▍        | 25/173 [00:00<00:01, 119.61it/s]
 21%|██▏       | 37/173 [00:00<00:01, 115.66it/s]
 28%|██▊       | 49/173 [00:00<00:01, 110.54it/s]
 35%|███▌      | 61/173 [00:00<00:01, 108.97it/s]
 42%|████▏     | 73/173 [00:00<00:00, 110.02it/s]
 49%|████▉     | 85/173 [00:00<00:00, 108.49it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 107.29it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 109.80it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 110.21it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 110.70it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 111.15it/s]
 90%|█████████ | 156/173 [00:01<00:00, 111.19it/s]
 97%|█████████▋| 168/173 [00:01<00:00, 111.31it/s]
100%|██████████| 173/173 [00:01<00:00, 111.00it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 305.20it/s]
100%|██████████| 50/50 [00:00<00:00, 302.19it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 111.84it/s]
 14%|█▍        | 24/173 [00:00<00:01, 112.60it/s]
 21%|██        |

[2m[36m(func pid=19928)[0m Epoch 00023: reducing learning rate of group 0 to 5.8598e-04.


 13%|█▎        | 22/173 [00:00<00:01, 98.47it/s] 
 18%|█▊        | 32/173 [00:00<00:01, 96.61it/s]
 25%|██▍       | 43/173 [00:00<00:01, 98.29it/s]
 31%|███       | 53/173 [00:00<00:01, 97.55it/s]
 37%|███▋      | 64/173 [00:00<00:01, 100.68it/s]
 43%|████▎     | 75/173 [00:00<00:00, 101.27it/s]
 50%|████▉     | 86/173 [00:00<00:00, 101.20it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 101.41it/s]
 62%|██████▏   | 108/173 [00:01<00:00, 101.83it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 101.05it/s]
 75%|███████▌  | 130/173 [00:01<00:00, 100.89it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 99.55it/s] 
 88%|████████▊ | 152/173 [00:01<00:00, 100.66it/s]
 94%|█████████▍| 163/173 [00:01<00:00, 99.45it/s] 
100%|██████████| 173/173 [00:01<00:00, 100.46it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 324.05it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 102.12it/s]
 13%|█▎        | 22/173 [00:00<00:01, 99.61it/s] 
 19%|█▉        | 3

[2m[36m(func pid=19928)[0m Epoch 00052: reducing learning rate of group 0 to 2.9299e-04.


  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 107.84it/s]
 13%|█▎        | 22/173 [00:00<00:01, 107.51it/s]
 19%|█▉        | 33/173 [00:00<00:01, 98.46it/s] 
 25%|██▌       | 44/173 [00:00<00:01, 100.10it/s]
 32%|███▏      | 55/173 [00:00<00:01, 102.14it/s]
 38%|███▊      | 66/173 [00:00<00:01, 103.69it/s]
 45%|████▍     | 77/173 [00:00<00:00, 103.48it/s]
 51%|█████     | 88/173 [00:00<00:00, 103.49it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 103.89it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 102.67it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 104.09it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 103.67it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 100.43it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 100.12it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 101.32it/s]
100%|██████████| 173/173 [00:01<00:00, 102.15it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 299.87it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [

[2m[36m(func pid=19928)[0m Epoch 00062: reducing learning rate of group 0 to 1.4649e-04.


 13%|█▎        | 22/173 [00:00<00:01, 108.94it/s]
 19%|█▉        | 33/173 [00:00<00:01, 105.41it/s]
 25%|██▌       | 44/173 [00:00<00:01, 102.96it/s]
 32%|███▏      | 55/173 [00:00<00:01, 103.60it/s]
 38%|███▊      | 66/173 [00:00<00:01, 103.88it/s]
 45%|████▍     | 77/173 [00:00<00:00, 104.26it/s]
 51%|█████     | 88/173 [00:00<00:00, 104.71it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 104.14it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 104.63it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 102.96it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 104.15it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 105.16it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 104.92it/s]
100%|██████████| 173/173 [00:01<00:00, 104.92it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 322.44it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 105.94it/s]
 13%|█▎        | 22/173 [00:00<00:01, 107.51it/s]
 19%|█▉        | 33/173 [00:00<00:01, 106.52it/s]
 25%|██▌       |

[2m[36m(func pid=19928)[0m Epoch 00075: reducing learning rate of group 0 to 7.3247e-05.


  6%|▋         | 11/173 [00:00<00:01, 104.96it/s]
 13%|█▎        | 22/173 [00:00<00:01, 105.45it/s]
 19%|█▉        | 33/173 [00:00<00:01, 102.53it/s]
 25%|██▌       | 44/173 [00:00<00:01, 102.16it/s]
 32%|███▏      | 55/173 [00:00<00:01, 101.12it/s]
 38%|███▊      | 66/173 [00:00<00:01, 101.35it/s]
 45%|████▍     | 77/173 [00:00<00:00, 100.90it/s]
 51%|█████     | 88/173 [00:00<00:00, 100.62it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 100.00it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 100.68it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 101.31it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 99.61it/s] 
 83%|████████▎ | 143/173 [00:01<00:00, 100.27it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 100.80it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 44%|████▍     | 22/50 [00:00<00:00, 124.14it/s]
100%|██████████| 50/50 [00:00<00:00, 188.07it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 103.65it/s]
 13%|█▎        | 22/173 [00:00<00:01, 104.83it/s]
 19%|█▉        | 3

[2m[36m(func pid=19928)[0m Epoch 00084: reducing learning rate of group 0 to 3.6624e-05.


  6%|▋         | 11/173 [00:00<00:01, 101.94it/s]
 13%|█▎        | 22/173 [00:00<00:01, 97.08it/s] 
 18%|█▊        | 32/173 [00:00<00:01, 94.32it/s]
 25%|██▍       | 43/173 [00:00<00:01, 97.88it/s]
 31%|███       | 53/173 [00:00<00:01, 96.32it/s]
 37%|███▋      | 64/173 [00:00<00:01, 97.62it/s]
 43%|████▎     | 74/173 [00:00<00:01, 96.23it/s]
 49%|████▊     | 84/173 [00:00<00:00, 95.34it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 95.29it/s]
 60%|██████    | 104/173 [00:01<00:00, 95.80it/s]
 66%|██████▌   | 114/173 [00:01<00:00, 94.64it/s]
 72%|███████▏  | 124/173 [00:01<00:00, 95.03it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 98.27it/s]
 84%|████████▍ | 146/173 [00:01<00:00, 99.63it/s]
 90%|█████████ | 156/173 [00:01<00:00, 97.26it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 314.48it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 103.33it/s]
 13%|█▎        | 22/173 [00:00<00:01, 94.17it/s] 
 18%|█▊        | 32/173 [00:0

[2m[36m(func pid=19928)[0m Epoch 00091: reducing learning rate of group 0 to 1.8312e-05.


 13%|█▎        | 22/173 [00:00<00:01, 106.06it/s]
 19%|█▉        | 33/173 [00:00<00:01, 100.71it/s]
 25%|██▌       | 44/173 [00:00<00:01, 101.14it/s]
 32%|███▏      | 55/173 [00:00<00:01, 101.35it/s]
 38%|███▊      | 66/173 [00:00<00:01, 101.78it/s]
 45%|████▍     | 77/173 [00:00<00:00, 101.31it/s]
 51%|█████     | 88/173 [00:00<00:00, 101.37it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 98.49it/s] 
 64%|██████▎   | 110/173 [00:01<00:00, 100.34it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 99.94it/s] 
 76%|███████▋  | 132/173 [00:01<00:00, 101.39it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 102.28it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 102.48it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 104.06it/s]
100%|██████████| 173/173 [00:01<00:00, 101.60it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 300.44it/s]
100%|██████████| 50/50 [00:00<00:00, 297.63it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 102.19it/s]
 13%|█▎        |

[2m[36m(func pid=19928)[0m Epoch 00099: reducing learning rate of group 0 to 9.1559e-06.


  6%|▋         | 11/173 [00:00<00:01, 104.85it/s]
 13%|█▎        | 22/173 [00:00<00:01, 103.66it/s]
 19%|█▉        | 33/173 [00:00<00:01, 102.76it/s]
 25%|██▌       | 44/173 [00:00<00:01, 102.26it/s]
 32%|███▏      | 55/173 [00:00<00:01, 102.24it/s]
 38%|███▊      | 66/173 [00:00<00:01, 104.10it/s]
 45%|████▍     | 77/173 [00:00<00:00, 103.96it/s]
 51%|█████     | 88/173 [00:00<00:00, 103.33it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 102.55it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 102.39it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 102.16it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 99.84it/s] 
 82%|████████▏ | 142/173 [00:01<00:00, 99.52it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 97.53it/s]
100%|██████████| 173/173 [00:01<00:00, 99.28it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 60%|██████    | 30/50 [00:00<00:00, 299.59it/s]
100%|██████████| 50/50 [00:00<00:00, 306.77it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 104.62it/s]
 13%|█▎        | 22/

[2m[36m(func pid=19928)[0m Epoch 00107: reducing learning rate of group 0 to 4.5779e-06.


  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 110.29it/s]
 14%|█▍        | 24/173 [00:00<00:01, 106.75it/s]
 20%|██        | 35/173 [00:00<00:01, 106.26it/s]
 27%|██▋       | 46/173 [00:00<00:01, 105.35it/s]
 33%|███▎      | 57/173 [00:00<00:01, 100.70it/s]
 39%|███▉      | 68/173 [00:00<00:01, 98.32it/s] 
 46%|████▌     | 79/173 [00:00<00:00, 99.16it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 100.56it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 102.11it/s]
 65%|██████▍   | 112/173 [00:01<00:00, 103.31it/s]
 77%|███████▋  | 134/173 [00:01<00:00, 102.18it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 99.38it/s] 
 90%|█████████ | 156/173 [00:01<00:00, 100.58it/s]
 97%|█████████▋| 167/173 [00:01<00:00, 101.84it/s]
100%|██████████| 173/173 [00:01<00:00, 101.69it/s]
 64%|██████▍   | 32/50 [00:00<00:00, 308.62it/s]
100%|██████████| 50/50 [00:00<00:00, 316.93it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 101.41it/s]
 13%|█▎        |

[2m[36m(func pid=19928)[0m Epoch 00117: reducing learning rate of group 0 to 2.2890e-06.


  7%|▋         | 12/173 [00:00<00:01, 111.20it/s]
 14%|█▍        | 24/173 [00:00<00:01, 106.96it/s]
 20%|██        | 35/173 [00:00<00:01, 104.65it/s]
 27%|██▋       | 46/173 [00:00<00:01, 102.52it/s]
 33%|███▎      | 57/173 [00:00<00:01, 100.97it/s]
 39%|███▉      | 68/173 [00:00<00:01, 99.92it/s] 
 46%|████▌     | 79/173 [00:00<00:00, 100.60it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 99.22it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 96.55it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 96.51it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 99.56it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 100.80it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 101.46it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 275.16it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 103.83it/s]
 13%|█▎        | 22/173 [00:00<00:01, 104.33it/s]
 19%|█▉        | 33/173 [00:00<00:01, 103.16it/s]
 25%|██▌       | 44/173 [00:00<00:01, 103.85it/s]
 32%|███▏      | 55/

[2m[36m(func pid=19928)[0m Epoch 00123: reducing learning rate of group 0 to 1.1445e-06.


  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 100.92it/s]
 13%|█▎        | 22/173 [00:00<00:01, 103.45it/s]
 19%|█▉        | 33/173 [00:00<00:01, 104.24it/s]
 25%|██▌       | 44/173 [00:00<00:01, 104.87it/s]
 32%|███▏      | 55/173 [00:00<00:01, 105.23it/s]
 38%|███▊      | 66/173 [00:00<00:01, 105.94it/s]
 45%|████▍     | 77/173 [00:00<00:00, 105.09it/s]
 51%|█████     | 88/173 [00:00<00:00, 105.53it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 104.87it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 103.98it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 104.00it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 104.35it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 104.04it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 101.40it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 269.94it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 95.18it/s]
 12%|█▏        | 21/173 [00:00<00:01, 97.57it/s]
 18%|█▊        | 31/173 [00:0

[2m[36m(func pid=19928)[0m Epoch 00129: reducing learning rate of group 0 to 5.7224e-07.


 12%|█▏        | 20/173 [00:00<00:01, 98.62it/s]
 18%|█▊        | 31/173 [00:00<00:01, 100.33it/s]
 24%|██▍       | 42/173 [00:00<00:01, 101.51it/s]
 31%|███       | 53/173 [00:00<00:01, 101.65it/s]
 37%|███▋      | 64/173 [00:00<00:01, 101.39it/s]
 43%|████▎     | 75/173 [00:00<00:00, 101.92it/s]
 50%|████▉     | 86/173 [00:00<00:00, 101.80it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 102.42it/s]
 62%|██████▏   | 108/173 [00:01<00:00, 101.92it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 102.25it/s]
 75%|███████▌  | 130/173 [00:01<00:00, 102.36it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 102.09it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 102.15it/s]
100%|██████████| 173/173 [00:01<00:00, 102.22it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 50%|█████     | 25/50 [00:00<00:00, 236.83it/s]
100%|██████████| 50/50 [00:00<00:00, 244.59it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▍         | 8/173 [00:00<00:03, 47.26it/s]
 11%|█         | 19/173 [00:00<00:02, 75.02it/s]
 17%|█▋        | 30/1

[2m[36m(func pid=19928)[0m Epoch 00135: reducing learning rate of group 0 to 2.8612e-07.


 12%|█▏        | 20/173 [00:00<00:01, 96.46it/s]
 18%|█▊        | 31/173 [00:00<00:01, 100.64it/s]
 24%|██▍       | 42/173 [00:00<00:01, 102.01it/s]
 31%|███       | 53/173 [00:00<00:01, 101.48it/s]
 37%|███▋      | 64/173 [00:00<00:01, 101.03it/s]
 43%|████▎     | 75/173 [00:00<00:00, 101.21it/s]
 50%|████▉     | 86/173 [00:00<00:00, 101.12it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 101.43it/s]
 62%|██████▏   | 108/173 [00:01<00:00, 101.86it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 100.39it/s]
 75%|███████▌  | 130/173 [00:01<00:00, 101.68it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 100.36it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 102.20it/s]
 94%|█████████▍| 163/173 [00:01<00:00, 102.99it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 241.31it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 100.51it/s]
 13%|█▎        | 22/173 [00:00<00:01, 101.84it/s]
 19%|█▉        | 33/173 [00:00<00:01, 103.02it/s]
 25%|██▌       | 

[2m[36m(func pid=19928)[0m Epoch 00141: reducing learning rate of group 0 to 1.4306e-07.


  6%|▋         | 11/173 [00:00<00:01, 102.00it/s]
 13%|█▎        | 22/173 [00:00<00:01, 102.25it/s]
 19%|█▉        | 33/173 [00:00<00:01, 101.73it/s]
 25%|██▌       | 44/173 [00:00<00:01, 100.06it/s]
 32%|███▏      | 55/173 [00:00<00:01, 101.67it/s]
 38%|███▊      | 66/173 [00:00<00:01, 101.41it/s]
 45%|████▍     | 77/173 [00:00<00:00, 102.83it/s]
 51%|█████     | 88/173 [00:00<00:00, 102.21it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 101.26it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 98.85it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 100.33it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 100.47it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 101.75it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 102.39it/s]
 56%|█████▌    | 28/50 [00:00<00:00, 271.67it/s]
100%|██████████| 50/50 [00:00<00:00, 264.32it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 97.82it/s]
 12%|█▏        | 21/173 [00:00<00:01, 99.96it/s]
 18%|█▊        | 32/173 [00:00<00:01, 101.63it/s]
 25%|██▍  

[2m[36m(func pid=19928)[0m Epoch 00147: reducing learning rate of group 0 to 7.1530e-08.


  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 100.84it/s]
 13%|█▎        | 22/173 [00:00<00:01, 103.75it/s]
 19%|█▉        | 33/173 [00:00<00:01, 104.16it/s]
 25%|██▌       | 44/173 [00:00<00:01, 103.79it/s]
 32%|███▏      | 55/173 [00:00<00:01, 104.00it/s]
 38%|███▊      | 66/173 [00:00<00:01, 104.82it/s]
 45%|████▍     | 77/173 [00:00<00:00, 105.16it/s]
 51%|█████     | 88/173 [00:00<00:00, 104.09it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 105.11it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 105.88it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 105.78it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 105.42it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 101.99it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 103.12it/s]
 54%|█████▍    | 27/50 [00:00<00:00, 262.24it/s]
100%|██████████| 50/50 [00:00<00:00, 263.02it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 92.80it/s]
 12%|█▏        | 20/173 [00:00<00:01, 96.76it/s]
 18%|█▊        | 31

[2m[36m(func pid=21376)[0m Epoch 00036: reducing learning rate of group 0 to 2.3995e-03.


 14%|█▍        | 12/87 [00:00<00:00, 117.58it/s]
 28%|██▊       | 24/87 [00:00<00:00, 116.21it/s]
 41%|████▏     | 36/87 [00:00<00:00, 117.39it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 118.62it/s]
 70%|███████   | 61/87 [00:00<00:00, 117.81it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 117.80it/s]
 98%|█████████▊| 85/87 [00:00<00:00, 118.05it/s]
100%|██████████| 87/87 [00:00<00:00, 106.85it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 242.91it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 112.44it/s]
 28%|██▊       | 24/87 [00:00<00:00, 113.44it/s]
 41%|████▏     | 36/87 [00:00<00:00, 115.38it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 115.78it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 116.04it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 116.18it/s]
100%|██████████| 87/87 [00:00<00:00, 115.73it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 238.85it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍    

[2m[36m(func pid=21376)[0m Epoch 00053: reducing learning rate of group 0 to 1.1998e-03.


 15%|█▍        | 13/87 [00:00<00:00, 121.53it/s]
 30%|██▉       | 26/87 [00:00<00:00, 116.41it/s]
 44%|████▎     | 38/87 [00:00<00:00, 115.81it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 114.52it/s]
 71%|███████▏  | 62/87 [00:00<00:00, 115.65it/s]
 85%|████████▌ | 74/87 [00:00<00:00, 114.47it/s]
100%|██████████| 87/87 [00:00<00:00, 116.08it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 236.21it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 106.73it/s]
 26%|██▋       | 23/87 [00:00<00:00, 109.95it/s]
 40%|████      | 35/87 [00:00<00:00, 112.96it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 111.00it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 112.97it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 114.76it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 109.56it/s]
 28%|██▊       | 24/87 [00:00<00:00, 114.88it/s]
 41%|████▏     | 36/87 [00:00<00:00, 115.16it/s]
 55%|█████▌

[2m[36m(func pid=21376)[0m Epoch 00063: reducing learning rate of group 0 to 5.9988e-04.


 28%|██▊       | 24/87 [00:00<00:00, 117.23it/s]
 41%|████▏     | 36/87 [00:00<00:00, 117.89it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 119.31it/s]
 70%|███████   | 61/87 [00:00<00:00, 119.23it/s]
 85%|████████▌ | 74/87 [00:00<00:00, 119.69it/s]
100%|██████████| 87/87 [00:00<00:00, 119.14it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 141.50it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 114.02it/s]
 28%|██▊       | 24/87 [00:00<00:00, 117.45it/s]
 41%|████▏     | 36/87 [00:00<00:00, 115.88it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 116.64it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 116.06it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 115.73it/s]
 97%|█████████▋| 84/87 [00:00<00:00, 116.50it/s]
100%|██████████| 87/87 [00:00<00:00, 116.24it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 101.25it/s]
 25%|██▌       | 22/87 [00:00<00:00, 106.06it/s]
 38%|███▊  

[2m[36m(func pid=21376)[0m Epoch 00069: reducing learning rate of group 0 to 2.9994e-04.


 14%|█▍        | 12/87 [00:00<00:00, 113.66it/s]
 28%|██▊       | 24/87 [00:00<00:00, 114.41it/s]
 41%|████▏     | 36/87 [00:00<00:00, 111.42it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 111.62it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 111.53it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 111.32it/s]
100%|██████████| 87/87 [00:00<00:00, 110.27it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 109.98it/s]
 25%|██▌       | 22/87 [00:00<00:00, 107.11it/s]
 39%|███▉      | 34/87 [00:00<00:00, 111.69it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 113.29it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 113.31it/s]
 80%|████████  | 70/87 [00:00<00:00, 114.64it/s]
100%|██████████| 87/87 [00:00<00:00, 114.03it/s]
 96%|█████████▌| 24/25 [00:00<00:00, 236.82it/s]
100%|██████████| 25/25 [00:00<00:00, 235.94it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 113.59it/s]
 28%|██▊       | 24/87 [00:00<00:00, 113.69it/s]
 

[2m[36m(func pid=21376)[0m Epoch 00087: reducing learning rate of group 0 to 1.4997e-04.


 28%|██▊       | 24/87 [00:00<00:00, 116.69it/s]
 41%|████▏     | 36/87 [00:00<00:00, 113.87it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 113.14it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 111.71it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 112.89it/s]
100%|██████████| 87/87 [00:00<00:00, 113.16it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 138.73it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 115.58it/s]
 28%|██▊       | 24/87 [00:00<00:00, 113.58it/s]
 41%|████▏     | 36/87 [00:00<00:00, 112.88it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 113.37it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 114.75it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 114.73it/s]
100%|██████████| 87/87 [00:00<00:00, 114.19it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 242.72it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 109.93it/s]
 26%|██▋       | 23/87 [00:00<00:00, 112.58it/s]
 40%|████  

[2m[36m(func pid=21376)[0m Epoch 00093: reducing learning rate of group 0 to 7.4985e-05.


 14%|█▍        | 12/87 [00:00<00:00, 114.32it/s]
 28%|██▊       | 24/87 [00:00<00:00, 103.55it/s]
 40%|████      | 35/87 [00:00<00:00, 105.10it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 108.72it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 110.66it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 111.49it/s]
100%|██████████| 87/87 [00:00<00:00, 111.48it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 139.39it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 108.04it/s]
 26%|██▋       | 23/87 [00:00<00:00, 113.53it/s]
 40%|████      | 35/87 [00:00<00:00, 108.07it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 111.10it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 112.46it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 113.92it/s]
100%|██████████| 87/87 [00:00<00:00, 112.84it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 237.96it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 113.44it/s]
 28%|██▊   

[2m[36m(func pid=21376)[0m Epoch 00099: reducing learning rate of group 0 to 3.7492e-05.


 15%|█▍        | 13/87 [00:00<00:00, 119.81it/s]
 29%|██▊       | 25/87 [00:00<00:00, 116.24it/s]
 43%|████▎     | 37/87 [00:00<00:00, 116.13it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 116.73it/s]
 70%|███████   | 61/87 [00:00<00:00, 115.22it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 114.61it/s]
100%|██████████| 87/87 [00:00<00:00, 115.84it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 137.72it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 112.95it/s]
 28%|██▊       | 24/87 [00:00<00:00, 113.12it/s]
 41%|████▏     | 36/87 [00:00<00:00, 115.14it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 112.71it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 111.36it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 109.27it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 115.72it/s]
 28%|██▊       | 24/87 [00:00<00:00, 116.28it/s]
 41%|████▏     | 36/87 [00:00<00:00, 112.13it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 111.90it/s]
 

[2m[36m(func pid=21376)[0m Epoch 00105: reducing learning rate of group 0 to 1.8746e-05.


 14%|█▍        | 12/87 [00:00<00:00, 115.41it/s]
 28%|██▊       | 24/87 [00:00<00:00, 113.55it/s]
 41%|████▏     | 36/87 [00:00<00:00, 115.42it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 110.00it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 108.04it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 104.76it/s]
100%|██████████| 87/87 [00:00<00:00, 109.60it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 76%|███████▌  | 19/25 [00:00<00:00, 125.74it/s]
100%|██████████| 25/25 [00:00<00:00, 142.16it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 103.51it/s]
 25%|██▌       | 22/87 [00:00<00:00, 101.54it/s]
 39%|███▉      | 34/87 [00:00<00:00, 107.96it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 111.03it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 113.04it/s]
 80%|████████  | 70/87 [00:00<00:00, 112.37it/s]
100%|██████████| 87/87 [00:00<00:00, 111.54it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 238.77it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍    

[2m[36m(func pid=21376)[0m Epoch 00111: reducing learning rate of group 0 to 9.3731e-06.


 28%|██▊       | 24/87 [00:00<00:00, 110.96it/s]
 41%|████▏     | 36/87 [00:00<00:00, 113.41it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 114.20it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 111.89it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 111.29it/s]
100%|██████████| 87/87 [00:00<00:00, 112.71it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 44%|████▍     | 11/25 [00:00<00:00, 90.30it/s]
100%|██████████| 25/25 [00:00<00:00, 139.77it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 109.85it/s]
 26%|██▋       | 23/87 [00:00<00:00, 113.27it/s]
 40%|████      | 35/87 [00:00<00:00, 114.95it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 114.18it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 113.72it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 112.09it/s]
100%|██████████| 87/87 [00:00<00:00, 113.88it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 227.50it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 112.43it/s]
 28%|██▊    

[2m[36m(func pid=21376)[0m Epoch 00117: reducing learning rate of group 0 to 4.6865e-06.


 14%|█▍        | 12/87 [00:00<00:00, 113.52it/s]
 28%|██▊       | 24/87 [00:00<00:00, 109.75it/s]
 41%|████▏     | 36/87 [00:00<00:00, 110.41it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 108.66it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 110.22it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 111.25it/s]
100%|██████████| 87/87 [00:00<00:00, 111.33it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 60%|██████    | 15/25 [00:00<00:00, 105.59it/s]
100%|██████████| 25/25 [00:00<00:00, 133.14it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 115.59it/s]
 28%|██▊       | 24/87 [00:00<00:00, 117.80it/s]
 43%|████▎     | 37/87 [00:00<00:00, 119.08it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 118.48it/s]
 70%|███████   | 61/87 [00:00<00:00, 118.58it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 117.48it/s]
100%|██████████| 87/87 [00:00<00:00, 118.02it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 96%|█████████▌| 24/25 [00:00<00:00, 237.43it/s]
100%|██████████| 25/25 [00:00<00:00, 238.09it/s]
 

[2m[36m(func pid=21376)[0m Epoch 00123: reducing learning rate of group 0 to 2.3433e-06.


 14%|█▍        | 12/87 [00:00<00:00, 113.26it/s]
 28%|██▊       | 24/87 [00:00<00:00, 113.86it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 116.23it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 114.98it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 115.80it/s]
 97%|█████████▋| 84/87 [00:00<00:00, 114.59it/s]
100%|██████████| 87/87 [00:00<00:00, 115.10it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 138.28it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 112.20it/s]
 28%|██▊       | 24/87 [00:00<00:00, 116.04it/s]
 41%|████▏     | 36/87 [00:00<00:00, 113.14it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 113.30it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 114.12it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 113.51it/s]
100%|██████████| 87/87 [00:00<00:00, 114.84it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 231.44it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 115.01it/s]
 28%|██▊   

[2m[36m(func pid=21376)[0m Epoch 00129: reducing learning rate of group 0 to 1.1716e-06.


 14%|█▍        | 12/87 [00:00<00:00, 115.30it/s]
 28%|██▊       | 24/87 [00:00<00:00, 116.64it/s]
 43%|████▎     | 37/87 [00:00<00:00, 118.89it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 119.23it/s]
 70%|███████   | 61/87 [00:00<00:00, 117.53it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 116.86it/s]
100%|██████████| 87/87 [00:00<00:00, 116.47it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 136.34it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 113.06it/s]
 28%|██▊       | 24/87 [00:00<00:00, 113.67it/s]
 41%|████▏     | 36/87 [00:00<00:00, 114.96it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 112.23it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 113.29it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 114.01it/s]
100%|██████████| 87/87 [00:00<00:00, 112.08it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 220.95it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 104.80it/s]
 26%|██▋   

[2m[36m(func pid=21376)[0m Epoch 00135: reducing learning rate of group 0 to 5.8582e-07.


 14%|█▍        | 12/87 [00:00<00:00, 110.32it/s]
 28%|██▊       | 24/87 [00:00<00:00, 110.08it/s]
 41%|████▏     | 36/87 [00:00<00:00, 110.19it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 109.73it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 109.48it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 112.13it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 133.06it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 113.38it/s]
 28%|██▊       | 24/87 [00:00<00:00, 112.95it/s]
 41%|████▏     | 36/87 [00:00<00:00, 112.81it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 110.99it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 113.23it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 113.98it/s]
100%|██████████| 87/87 [00:00<00:00, 113.32it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 238.43it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 112.67it/s]
 28%|██▊       | 24/87 [00:00<00:00, 113.51it/s]
 43%|████▎ 

[2m[36m(func pid=21376)[0m Epoch 00141: reducing learning rate of group 0 to 2.9291e-07.


 29%|██▊       | 25/87 [00:00<00:00, 118.11it/s]
 43%|████▎     | 37/87 [00:00<00:00, 118.26it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 117.65it/s]
 70%|███████   | 61/87 [00:00<00:00, 116.44it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 116.47it/s]
100%|██████████| 87/87 [00:00<00:00, 116.86it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 139.08it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 112.87it/s]
 28%|██▊       | 24/87 [00:00<00:00, 116.20it/s]
 41%|████▏     | 36/87 [00:00<00:00, 115.79it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 116.68it/s]
 70%|███████   | 61/87 [00:00<00:00, 118.57it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 118.07it/s]
100%|██████████| 87/87 [00:00<00:00, 117.89it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 238.64it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 113.13it/s]
 28%|██▊       | 24/87 [00:00<00:00, 115.14it/s]
 41%|████▏ 

[2m[36m(func pid=21376)[0m Epoch 00147: reducing learning rate of group 0 to 1.4645e-07.


 28%|██▊       | 24/87 [00:00<00:00, 116.83it/s]
 41%|████▏     | 36/87 [00:00<00:00, 114.86it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 115.76it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 115.34it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 115.23it/s]
100%|██████████| 87/87 [00:00<00:00, 115.51it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 44%|████▍     | 11/25 [00:00<00:00, 84.30it/s]
100%|██████████| 25/25 [00:00<00:00, 131.21it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 107.07it/s]
 25%|██▌       | 22/87 [00:00<00:00, 103.88it/s]
 38%|███▊      | 33/87 [00:00<00:00, 105.15it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 107.57it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 110.37it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 111.34it/s]
100%|██████████| 87/87 [00:00<00:00, 109.87it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 237.99it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 105.59it/s]
 40%|████   

[2m[36m(func pid=21212)[0m Epoch 00009: reducing learning rate of group 0 to 1.4440e-02.


 16%|█▌        | 28/173 [00:00<00:01, 138.52it/s]
 24%|██▍       | 42/173 [00:00<00:00, 137.13it/s]
 32%|███▏      | 56/173 [00:00<00:00, 136.79it/s]
 40%|████      | 70/173 [00:00<00:00, 136.08it/s]
 49%|████▊     | 84/173 [00:00<00:00, 136.97it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 137.91it/s]
 65%|██████▌   | 113/173 [00:00<00:00, 137.74it/s]
 73%|███████▎  | 127/173 [00:00<00:00, 134.73it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 133.96it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 134.50it/s]
 98%|█████████▊| 169/173 [00:01<00:00, 134.51it/s]
100%|██████████| 173/173 [00:01<00:00, 135.35it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 390.41it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 129.98it/s]
 16%|█▌        | 27/173 [00:00<00:01, 130.63it/s]
 24%|██▎       | 41/173 [00:00<00:00, 134.01it/s]
 32%|███▏      | 55/173 [00:00<00:00, 133.39it/s]
 40%|███▉      | 69/173 [00:00<00:00, 131.94it/s]
 48%|████▊     |

[2m[36m(func pid=21212)[0m Epoch 00024: reducing learning rate of group 0 to 7.2202e-03.


  8%|▊         | 14/173 [00:00<00:01, 138.79it/s]
 16%|█▌        | 28/173 [00:00<00:01, 137.75it/s]
 24%|██▍       | 42/173 [00:00<00:00, 136.37it/s]
 32%|███▏      | 56/173 [00:00<00:00, 135.91it/s]
 41%|████      | 71/173 [00:00<00:00, 137.95it/s]
 49%|████▉     | 85/173 [00:00<00:00, 136.84it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 137.49it/s]
 65%|██████▌   | 113/173 [00:00<00:00, 135.74it/s]
 73%|███████▎  | 127/173 [00:00<00:00, 135.83it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 131.81it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 131.36it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 387.77it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▌         | 9/173 [00:00<00:01, 85.03it/s]
 13%|█▎        | 23/173 [00:00<00:01, 112.67it/s]
 21%|██▏       | 37/173 [00:00<00:01, 124.23it/s]
 29%|██▉       | 50/173 [00:00<00:00, 125.88it/s]
 36%|███▋      | 63/173 [00:00<00:00, 127.05it/s]
 44%|████▍     | 76/173 [00:00<00:00, 127.99it/s]
 52%|█████▏    | 90/

[2m[36m(func pid=21212)[0m Epoch 00030: reducing learning rate of group 0 to 3.6101e-03.


  8%|▊         | 13/173 [00:00<00:01, 129.24it/s]
 16%|█▌        | 27/173 [00:00<00:01, 131.67it/s]
 24%|██▎       | 41/173 [00:00<00:00, 133.92it/s]
 32%|███▏      | 55/173 [00:00<00:00, 133.68it/s]
 40%|███▉      | 69/173 [00:00<00:00, 134.86it/s]
 48%|████▊     | 83/173 [00:00<00:00, 135.74it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 136.49it/s]
 64%|██████▍   | 111/173 [00:00<00:00, 134.88it/s]
 72%|███████▏  | 125/173 [00:00<00:00, 134.36it/s]
 80%|████████  | 139/173 [00:01<00:00, 136.04it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 134.76it/s]
100%|██████████| 173/173 [00:01<00:00, 135.10it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 375.76it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 129.23it/s]
 15%|█▌        | 26/173 [00:00<00:01, 128.83it/s]
 23%|██▎       | 39/173 [00:00<00:01, 122.74it/s]
 30%|███       | 52/173 [00:00<00:01, 120.89it/s]
 38%|███▊      | 65/173 [00:00<00:00, 123.95it/s]
 46%|████▌     | 

[2m[36m(func pid=21212)[0m Epoch 00043: reducing learning rate of group 0 to 1.8051e-03.


  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 132.70it/s]
 16%|█▌        | 28/173 [00:00<00:01, 127.89it/s]
 24%|██▍       | 42/173 [00:00<00:01, 129.27it/s]
 32%|███▏      | 55/173 [00:00<00:00, 128.02it/s]
 40%|███▉      | 69/173 [00:00<00:00, 128.20it/s]
 47%|████▋     | 82/173 [00:00<00:00, 125.95it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 127.84it/s]
 72%|███████▏  | 124/173 [00:00<00:00, 132.23it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 131.05it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 135.03it/s]
 97%|█████████▋| 167/173 [00:01<00:00, 134.83it/s]
100%|██████████| 173/173 [00:01<00:00, 130.64it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 409.07it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 118.78it/s]
 15%|█▌        | 26/173 [00:00<00:01, 127.26it/s]
 23%|██▎       | 39/173 [00:00<00:01, 127.45it/s]
 30%|███       | 52/173 [00:00<00:00, 123.95it/s]
 38%|███▊      | 65/173 [00

[2m[36m(func pid=21212)[0m Epoch 00053: reducing learning rate of group 0 to 9.0253e-04.


  8%|▊         | 14/173 [00:00<00:01, 129.31it/s]
 16%|█▌        | 28/173 [00:00<00:01, 135.37it/s]
 24%|██▍       | 42/173 [00:00<00:00, 135.05it/s]
 32%|███▏      | 56/173 [00:00<00:00, 134.07it/s]
 40%|████      | 70/173 [00:00<00:00, 132.26it/s]
 49%|████▊     | 84/173 [00:00<00:00, 129.04it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 128.20it/s]
 64%|██████▎   | 110/173 [00:00<00:00, 126.90it/s]
 72%|███████▏  | 124/173 [00:00<00:00, 128.99it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 130.74it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 130.56it/s]
 96%|█████████▌| 166/173 [00:01<00:00, 130.17it/s]
100%|██████████| 173/173 [00:01<00:00, 129.37it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 403.85it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 129.63it/s]
 16%|█▌        | 27/173 [00:00<00:01, 131.93it/s]
 24%|██▍       | 42/173 [00:00<00:00, 136.35it/s]
 32%|███▏      | 56/173 [00:00<00:00, 135.74it/s]
 41%|████      |

[2m[36m(func pid=21212)[0m Epoch 00061: reducing learning rate of group 0 to 4.5127e-04.


  8%|▊         | 14/173 [00:00<00:01, 134.52it/s]
 16%|█▌        | 28/173 [00:00<00:01, 92.97it/s] 
 24%|██▍       | 42/173 [00:00<00:01, 108.76it/s]
 32%|███▏      | 56/173 [00:00<00:01, 116.61it/s]
 40%|████      | 70/173 [00:00<00:00, 121.25it/s]
 49%|████▉     | 85/173 [00:00<00:00, 128.17it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 130.10it/s]
 65%|██████▌   | 113/173 [00:00<00:00, 131.08it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 130.63it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 125.29it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 122.64it/s]
100%|██████████| 173/173 [00:01<00:00, 121.83it/s]
 76%|███████▌  | 38/50 [00:00<00:00, 379.51it/s]
100%|██████████| 50/50 [00:00<00:00, 384.58it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 126.07it/s]
 15%|█▌        | 26/173 [00:00<00:01, 126.49it/s]
 23%|██▎       | 39/173 [00:00<00:01, 127.74it/s]
 30%|███       | 52/173 [00:00<00:00, 125.88it/s]
 38%|███▊      | 65/173 [00:00<00:00, 126.31it/s]
 46%|██

[2m[36m(func pid=21212)[0m Epoch 00067: reducing learning rate of group 0 to 2.2563e-04.


  8%|▊         | 14/173 [00:00<00:01, 136.01it/s]
 16%|█▌        | 28/173 [00:00<00:01, 134.81it/s]
 24%|██▍       | 42/173 [00:00<00:01, 130.98it/s]
 33%|███▎      | 57/173 [00:00<00:00, 134.84it/s]
 41%|████      | 71/173 [00:00<00:00, 135.56it/s]
 49%|████▉     | 85/173 [00:00<00:00, 136.28it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 133.90it/s]
 65%|██████▌   | 113/173 [00:00<00:00, 134.15it/s]
 73%|███████▎  | 127/173 [00:00<00:00, 131.38it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 132.68it/s]
100%|██████████| 173/173 [00:01<00:00, 130.86it/s]
 76%|███████▌  | 38/50 [00:00<00:00, 371.85it/s]
100%|██████████| 50/50 [00:00<00:00, 357.31it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 134.68it/s]
 16%|█▌        | 28/173 [00:00<00:01, 134.57it/s]
 24%|██▍       | 42/173 [00:00<00:01, 130.15it/s]
 32%|███▏      | 56/173 [00:00<00:00, 128.18it/s]
 40%|████      | 70/173 [00:00<00:00, 131.11it/s]
 49%|████▊     | 84/173 [00:00<00:00, 133.49it/s]
 57%|███

[2m[36m(func pid=21212)[0m Epoch 00077: reducing learning rate of group 0 to 1.1282e-04.


  8%|▊         | 13/173 [00:00<00:01, 125.18it/s]
 15%|█▌        | 26/173 [00:00<00:01, 122.94it/s]
 23%|██▎       | 39/173 [00:00<00:01, 124.97it/s]
 30%|███       | 52/173 [00:00<00:00, 123.54it/s]
 45%|████▌     | 78/173 [00:00<00:00, 125.24it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 125.31it/s]
 60%|██████    | 104/173 [00:00<00:00, 126.13it/s]
 68%|██████▊   | 117/173 [00:00<00:00, 126.55it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 128.26it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 127.28it/s]
 91%|█████████ | 157/173 [00:01<00:00, 127.72it/s]
100%|██████████| 173/173 [00:01<00:00, 126.19it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 384.06it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 127.51it/s]
 15%|█▌        | 26/173 [00:00<00:01, 125.28it/s]
 23%|██▎       | 39/173 [00:00<00:01, 125.70it/s]
 31%|███       | 53/173 [00:00<00:00, 128.65it/s]
 38%|███▊      | 66/173 [00:00<00:00, 126.47it/s]
 46%|████▌     |

[2m[36m(func pid=21212)[0m Epoch 00084: reducing learning rate of group 0 to 5.6408e-05.


 16%|█▌        | 28/173 [00:00<00:01, 131.28it/s]
 24%|██▍       | 42/173 [00:00<00:00, 133.96it/s]
 32%|███▏      | 56/173 [00:00<00:00, 120.94it/s]
 40%|████      | 70/173 [00:00<00:00, 124.13it/s]
 48%|████▊     | 83/173 [00:00<00:00, 124.38it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 124.03it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 125.81it/s]
 71%|███████   | 122/173 [00:00<00:00, 123.06it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 121.85it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 123.14it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 121.62it/s]
100%|██████████| 173/173 [00:01<00:00, 124.64it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 385.05it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 133.52it/s]
 16%|█▌        | 28/173 [00:00<00:01, 135.71it/s]
 24%|██▍       | 42/173 [00:00<00:00, 135.74it/s]
 33%|███▎      | 57/173 [00:00<00:00, 138.10it/s]
 41%|████      | 71/173 [00:00<00:00, 132.40it/s]
 49%|████▉     |

[2m[36m(func pid=21212)[0m Epoch 00090: reducing learning rate of group 0 to 2.8204e-05.


  8%|▊         | 13/173 [00:00<00:01, 126.61it/s]
 15%|█▌        | 26/173 [00:00<00:01, 125.03it/s]
 23%|██▎       | 39/173 [00:00<00:01, 126.64it/s]
 30%|███       | 52/173 [00:00<00:00, 126.86it/s]
 38%|███▊      | 65/173 [00:00<00:00, 125.29it/s]
 46%|████▌     | 79/173 [00:00<00:00, 128.04it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 130.28it/s]
 62%|██████▏   | 107/173 [00:00<00:00, 132.09it/s]
 70%|██████▉   | 121/173 [00:00<00:00, 134.08it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 131.06it/s]
 86%|████████▌ | 149/173 [00:01<00:00, 104.76it/s]
 94%|█████████▎| 162/173 [00:01<00:00, 109.94it/s]
100%|██████████| 173/173 [00:01<00:00, 121.77it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 398.29it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 127.81it/s]
 16%|█▌        | 27/173 [00:00<00:01, 132.55it/s]
 24%|██▎       | 41/173 [00:00<00:01, 129.17it/s]
 32%|███▏      | 56/173 [00:00<00:00, 134.08it/s]
 40%|████      |

[2m[36m(func pid=21212)[0m Epoch 00096: reducing learning rate of group 0 to 1.4102e-05.


 17%|█▋        | 29/173 [00:00<00:01, 137.03it/s]
 25%|██▍       | 43/173 [00:00<00:00, 137.73it/s]
 33%|███▎      | 57/173 [00:00<00:00, 138.45it/s]
 41%|████      | 71/173 [00:00<00:00, 138.53it/s]
 49%|████▉     | 85/173 [00:00<00:00, 133.50it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 131.42it/s]
 65%|██████▌   | 113/173 [00:00<00:00, 133.91it/s]
 73%|███████▎  | 127/173 [00:00<00:00, 132.54it/s]
 74%|███████▍  | 37/50 [00:00<00:00, 362.58it/s]
100%|██████████| 50/50 [00:00<00:00, 354.29it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 129.91it/s]
 16%|█▌        | 27/173 [00:00<00:01, 132.77it/s]
 24%|██▎       | 41/173 [00:00<00:00, 135.64it/s]
 32%|███▏      | 55/173 [00:00<00:00, 137.10it/s]
 40%|███▉      | 69/173 [00:00<00:00, 136.19it/s]
 48%|████▊     | 83/173 [00:00<00:00, 135.56it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 135.97it/s]
 64%|██████▍   | 111/173 [00:00<00:00, 137.09it/s]
 80%|████████  | 139/173 [00:01<00:00, 138.23it/s]
 88%|███

[2m[36m(func pid=21212)[0m Epoch 00102: reducing learning rate of group 0 to 7.0510e-06.


  8%|▊         | 13/173 [00:00<00:01, 124.74it/s]
 15%|█▌        | 26/173 [00:00<00:01, 121.52it/s]
 23%|██▎       | 40/173 [00:00<00:01, 127.99it/s]
 31%|███       | 54/173 [00:00<00:00, 129.09it/s]
 39%|███▊      | 67/173 [00:00<00:00, 128.18it/s]
 47%|████▋     | 81/173 [00:00<00:00, 130.65it/s]
 55%|█████▍    | 95/173 [00:00<00:00, 132.11it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 132.79it/s]
 71%|███████   | 123/173 [00:00<00:00, 132.93it/s]
 79%|███████▉  | 137/173 [00:01<00:00, 134.30it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 134.78it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 133.90it/s]
100%|██████████| 173/173 [00:01<00:00, 130.46it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 76%|███████▌  | 38/50 [00:00<00:00, 376.94it/s]
100%|██████████| 50/50 [00:00<00:00, 381.77it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 130.52it/s]
 16%|█▌        | 28/173 [00:00<00:01, 133.28it/s]
 24%|██▍       | 42/173 [00:00<00:01, 130.84it/s]
 32%|███▏      | 

[2m[36m(func pid=21212)[0m Epoch 00108: reducing learning rate of group 0 to 3.5255e-06.


  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 131.98it/s]
 16%|█▌        | 28/173 [00:00<00:01, 134.36it/s]
 24%|██▍       | 42/173 [00:00<00:00, 131.54it/s]
 32%|███▏      | 56/173 [00:00<00:00, 129.39it/s]
 40%|████      | 70/173 [00:00<00:00, 132.27it/s]
 49%|████▊     | 84/173 [00:00<00:00, 128.72it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 126.80it/s]
 64%|██████▍   | 111/173 [00:00<00:00, 128.45it/s]
 72%|███████▏  | 124/173 [00:00<00:00, 127.77it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 128.68it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 127.09it/s]
 95%|█████████▍| 164/173 [00:01<00:00, 126.61it/s]
100%|██████████| 173/173 [00:01<00:00, 129.12it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 78%|███████▊  | 39/50 [00:00<00:00, 383.27it/s]
100%|██████████| 50/50 [00:00<00:00, 382.29it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 127.81it/s]
 15%|█▌        | 26/173 [00:00<00:01, 126.75it/s]
 23%|██▎       | 40/173 [00

[2m[36m(func pid=21212)[0m Epoch 00114: reducing learning rate of group 0 to 1.7628e-06.


  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 132.66it/s]
 16%|█▌        | 28/173 [00:00<00:01, 133.68it/s]
 24%|██▍       | 42/173 [00:00<00:00, 133.36it/s]
 40%|████      | 70/173 [00:00<00:00, 133.14it/s]
 49%|████▊     | 84/173 [00:00<00:00, 133.18it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 136.78it/s]
 65%|██████▌   | 113/173 [00:00<00:00, 136.98it/s]
 73%|███████▎  | 127/173 [00:00<00:00, 136.11it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 135.33it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 378.41it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 96.50it/s]
 12%|█▏        | 21/173 [00:00<00:01, 100.38it/s]
 20%|██        | 35/173 [00:00<00:01, 114.05it/s]
 28%|██▊       | 49/173 [00:00<00:01, 120.72it/s]
 36%|███▌      | 62/173 [00:00<00:01, 91.98it/s] 
 43%|████▎     | 74/173 [00:00<00:01, 98.33it/s]
 50%|████▉     | 86/173 [00:00<00:00, 102.43it/s]
 57%|█████▋    | 99/173 [00:00<

[2m[36m(func pid=21212)[0m Epoch 00120: reducing learning rate of group 0 to 8.8138e-07.


 14%|█▍        | 24/173 [00:00<00:01, 119.25it/s]
 22%|██▏       | 38/173 [00:00<00:01, 126.11it/s]
 29%|██▉       | 51/173 [00:00<00:01, 99.21it/s] 
 38%|███▊      | 65/173 [00:00<00:00, 110.68it/s]
 46%|████▌     | 79/173 [00:00<00:00, 117.98it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 124.84it/s]
 62%|██████▏   | 107/173 [00:00<00:00, 124.55it/s]
 77%|███████▋  | 134/173 [00:01<00:00, 127.04it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 127.90it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 126.77it/s]
100%|██████████| 173/173 [00:01<00:00, 121.64it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 384.25it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 128.46it/s]
 16%|█▌        | 27/173 [00:00<00:01, 124.62it/s]
 24%|██▎       | 41/173 [00:00<00:01, 128.32it/s]
 32%|███▏      | 55/173 [00:00<00:00, 129.82it/s]
 39%|███▉      | 68/173 [00:00<00:00, 128.07it/s]
 47%|████▋     | 82/173 [00:00<00:00, 129.83it/s]
 55%|█████▌    | 

[2m[36m(func pid=21212)[0m Epoch 00126: reducing learning rate of group 0 to 4.4069e-07.


  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 118.31it/s]
 15%|█▌        | 26/173 [00:00<00:01, 125.11it/s]
 23%|██▎       | 40/173 [00:00<00:01, 128.40it/s]
 31%|███       | 54/173 [00:00<00:00, 130.18it/s]
 39%|███▉      | 68/173 [00:00<00:00, 130.08it/s]
 47%|████▋     | 82/173 [00:00<00:00, 129.00it/s]
 55%|█████▍    | 95/173 [00:00<00:00, 129.03it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 131.61it/s]
 72%|███████▏  | 124/173 [00:00<00:00, 135.09it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 134.92it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 135.13it/s]
100%|██████████| 173/173 [00:01<00:00, 132.38it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 400.05it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 135.65it/s]
 24%|██▍       | 42/173 [00:00<00:00, 137.49it/s]
 32%|███▏      | 56/173 [00:00<00:00, 137.13it/s]
 40%|████      | 70/173 [00:00<00:00, 134.32it/s]
 49%|████▊     | 84/173 [00

[2m[36m(func pid=21212)[0m Epoch 00132: reducing learning rate of group 0 to 2.2034e-07.


  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 131.11it/s]
 16%|█▌        | 28/173 [00:00<00:01, 114.33it/s]
 23%|██▎       | 40/173 [00:00<00:01, 111.68it/s]
 31%|███       | 54/173 [00:00<00:00, 120.12it/s]
 39%|███▊      | 67/173 [00:00<00:00, 123.05it/s]
 47%|████▋     | 81/173 [00:00<00:00, 126.93it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 131.11it/s]
 71%|███████   | 123/173 [00:00<00:00, 132.57it/s]
 79%|███████▉  | 137/173 [00:01<00:00, 134.40it/s]
100%|██████████| 173/173 [00:01<00:00, 129.54it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 74%|███████▍  | 37/50 [00:00<00:00, 369.18it/s]
100%|██████████| 50/50 [00:00<00:00, 367.62it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 126.47it/s]
 16%|█▌        | 27/173 [00:00<00:01, 131.03it/s]
 24%|██▎       | 41/173 [00:00<00:01, 120.20it/s]
 31%|███       | 54/173 [00:00<00:00, 120.21it/s]
 39%|███▉      | 68/173 [00:00<00:00, 125.08it/s]
 48%|████▊     | 83/173 [00:0

[2m[36m(func pid=21212)[0m Epoch 00138: reducing learning rate of group 0 to 1.1017e-07.


 16%|█▌        | 27/173 [00:00<00:01, 126.76it/s]
 23%|██▎       | 40/173 [00:00<00:01, 125.09it/s]
 31%|███       | 53/173 [00:00<00:00, 124.31it/s]
 38%|███▊      | 66/173 [00:00<00:00, 123.13it/s]
 46%|████▌     | 79/173 [00:00<00:00, 124.15it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 126.76it/s]
 62%|██████▏   | 107/173 [00:00<00:00, 128.60it/s]
 69%|██████▉   | 120/173 [00:00<00:00, 127.10it/s]
 77%|███████▋  | 134/173 [00:01<00:00, 129.94it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 131.43it/s]
100%|██████████| 173/173 [00:01<00:00, 128.07it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 384.58it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 122.87it/s]
 24%|██▎       | 41/173 [00:00<00:00, 134.37it/s]
 32%|███▏      | 55/173 [00:00<00:00, 132.90it/s]
 40%|███▉      | 69/173 [00:00<00:00, 134.35it/s]
 48%|████▊     | 83/173 [00:00<00:00, 133.74it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 131.08it/s]
 64%|██████▍   | 

[2m[36m(func pid=21212)[0m Epoch 00144: reducing learning rate of group 0 to 5.5086e-08.


 16%|█▌        | 28/173 [00:00<00:01, 133.12it/s]
 24%|██▍       | 42/173 [00:00<00:00, 132.06it/s]
 32%|███▏      | 56/173 [00:00<00:00, 131.65it/s]
 40%|████      | 70/173 [00:00<00:00, 134.00it/s]
 49%|████▊     | 84/173 [00:00<00:00, 133.13it/s]
 57%|█████▋    | 98/173 [00:00<00:00, 131.39it/s]
 65%|██████▍   | 112/173 [00:00<00:00, 127.68it/s]
 73%|███████▎  | 126/173 [00:00<00:00, 130.74it/s]
 81%|████████  | 140/173 [00:01<00:00, 123.23it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 124.02it/s]
100%|██████████| 173/173 [00:01<00:00, 128.84it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 16%|█▌        | 8/50 [00:00<00:00, 79.31it/s]
100%|██████████| 50/50 [00:00<00:00, 246.28it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 137.08it/s]
 16%|█▌        | 28/173 [00:00<00:01, 134.30it/s]
 24%|██▍       | 42/173 [00:00<00:00, 134.32it/s]
 32%|███▏      | 56/173 [00:00<00:00, 133.58it/s]
 41%|████      | 71/173 [00:00<00:00, 136.68it/s]
 49%|████▉     | 85/

[2m[36m(func pid=12216)[0m Epoch 00021: reducing learning rate of group 0 to 2.2058e-03.


 14%|█▍        | 25/173 [00:00<00:01, 119.13it/s]
 21%|██▏       | 37/173 [00:00<00:01, 116.01it/s]
 28%|██▊       | 49/173 [00:00<00:01, 115.32it/s]
 35%|███▌      | 61/173 [00:00<00:00, 116.07it/s]
 42%|████▏     | 73/173 [00:00<00:00, 113.34it/s]
 49%|████▉     | 85/173 [00:00<00:00, 114.62it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 115.24it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 114.88it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 116.17it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 116.40it/s]
 91%|█████████ | 157/173 [00:01<00:00, 117.15it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 299.48it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▌         | 9/173 [00:00<00:02, 81.79it/s]
 12%|█▏        | 21/173 [00:00<00:01, 98.74it/s]
 19%|█▉        | 33/173 [00:00<00:01, 104.05it/s]
 25%|██▌       | 44/173 [00:00<00:01, 105.93it/s]
 32%|███▏      | 56/173 [00:00<00:01, 107.47it/s]
 39%|███▊      | 67/173 [00:00<00:00, 108.22it/s]
 45%|████▌     | 78/1

[2m[36m(func pid=12216)[0m Epoch 00037: reducing learning rate of group 0 to 1.1029e-03.


  6%|▌         | 10/173 [00:00<00:01, 92.99it/s]
 12%|█▏        | 21/173 [00:00<00:01, 101.54it/s]
 25%|██▌       | 44/173 [00:00<00:01, 86.64it/s] 
 32%|███▏      | 56/173 [00:00<00:01, 95.34it/s]
 39%|███▊      | 67/173 [00:00<00:01, 98.99it/s]
 46%|████▌     | 79/173 [00:00<00:00, 103.54it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 105.49it/s]
 60%|█████▉    | 103/173 [00:01<00:00, 109.46it/s]
 67%|██████▋   | 116/173 [00:01<00:00, 112.72it/s]
 74%|███████▍  | 128/173 [00:01<00:00, 112.86it/s]
 81%|████████  | 140/173 [00:01<00:00, 114.47it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 115.64it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 117.72it/s]
100%|██████████| 173/173 [00:01<00:00, 106.47it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 324.85it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 118.43it/s]
 14%|█▍        | 24/173 [00:00<00:01, 116.84it/s]
 21%|██▏       | 37/173 [00:00<00:01, 118.33it/s]
 28%|██▊       | 4

[2m[36m(func pid=12216)[0m Epoch 00052: reducing learning rate of group 0 to 5.5145e-04.


  6%|▌         | 10/173 [00:00<00:01, 91.71it/s]
 13%|█▎        | 23/173 [00:00<00:01, 109.09it/s]
 20%|██        | 35/173 [00:00<00:01, 112.39it/s]
 27%|██▋       | 47/173 [00:00<00:01, 112.68it/s]
 34%|███▍      | 59/173 [00:00<00:00, 114.54it/s]
 41%|████      | 71/173 [00:00<00:00, 115.94it/s]
 48%|████▊     | 83/173 [00:00<00:00, 114.21it/s]
 55%|█████▍    | 95/173 [00:00<00:00, 113.58it/s]
 62%|██████▏   | 107/173 [00:00<00:00, 113.05it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 110.57it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 89.67it/s] 
 83%|████████▎ | 143/173 [00:01<00:00, 95.30it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 100.60it/s]
100%|██████████| 173/173 [00:01<00:00, 106.84it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 325.08it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 117.71it/s]
 14%|█▍        | 24/173 [00:00<00:01, 119.00it/s]
 21%|██        | 36/173 [00:00<00:01, 117.31it/s]
 28%|██▊       | 4

[2m[36m(func pid=12216)[0m Epoch 00058: reducing learning rate of group 0 to 2.7573e-04.


  7%|▋         | 12/173 [00:00<00:01, 118.12it/s]
 14%|█▍        | 25/173 [00:00<00:01, 119.90it/s]
 21%|██▏       | 37/173 [00:00<00:01, 107.91it/s]
 28%|██▊       | 49/173 [00:00<00:01, 111.93it/s]
 36%|███▌      | 62/173 [00:00<00:00, 115.02it/s]
 43%|████▎     | 74/173 [00:00<00:00, 112.88it/s]
 50%|█████     | 87/173 [00:00<00:00, 116.11it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 117.24it/s]
 64%|██████▍   | 111/173 [00:00<00:00, 118.06it/s]
 71%|███████   | 123/173 [00:01<00:00, 118.56it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 118.41it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 118.99it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 119.85it/s]
100%|██████████| 173/173 [00:01<00:00, 117.08it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 66%|██████▌   | 33/50 [00:00<00:00, 325.29it/s]
100%|██████████| 50/50 [00:00<00:00, 324.13it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 117.66it/s]
 14%|█▍        | 25/173 [00:00<00:01, 119.71it/s]
 22%|██▏       | 

[2m[36m(func pid=12216)[0m Epoch 00064: reducing learning rate of group 0 to 1.3786e-04.


 13%|█▎        | 22/173 [00:00<00:01, 107.45it/s]
 19%|█▉        | 33/173 [00:00<00:01, 108.49it/s]
 26%|██▌       | 45/173 [00:00<00:01, 112.63it/s]
 34%|███▎      | 58/173 [00:00<00:00, 115.06it/s]
 40%|████      | 70/173 [00:00<00:00, 115.62it/s]
 47%|████▋     | 82/173 [00:00<00:00, 115.62it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 114.01it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 113.88it/s]
 68%|██████▊   | 118/173 [00:01<00:00, 114.50it/s]
 75%|███████▌  | 130/173 [00:01<00:00, 114.56it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 114.38it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 113.78it/s]
100%|██████████| 173/173 [00:01<00:00, 113.45it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 312.34it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 114.26it/s]
 14%|█▍        | 24/173 [00:00<00:01, 116.93it/s]
 21%|██▏       | 37/173 [00:00<00:01, 118.04it/s]
 28%|██▊       | 49/173 [00:00<00:01, 116.74it/s]
 42%|████▏     |

[2m[36m(func pid=12216)[0m Epoch 00070: reducing learning rate of group 0 to 6.8932e-05.


  7%|▋         | 12/173 [00:00<00:01, 112.72it/s]
 14%|█▍        | 24/173 [00:00<00:01, 113.02it/s]
 21%|██        | 36/173 [00:00<00:01, 103.51it/s]
 28%|██▊       | 48/173 [00:00<00:01, 107.91it/s]
 35%|███▍      | 60/173 [00:00<00:01, 111.31it/s]
 42%|████▏     | 72/173 [00:00<00:00, 113.23it/s]
 49%|████▊     | 84/173 [00:00<00:00, 114.06it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 115.66it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 116.53it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 117.06it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 116.89it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 118.03it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 118.88it/s]
100%|██████████| 173/173 [00:01<00:00, 115.20it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 323.74it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 116.34it/s]
 14%|█▍        | 24/173 [00:00<00:01, 117.88it/s]
 21%|██        | 36/173 [00:00<00:01, 118.34it/s]
 28%|██▊       |

[2m[36m(func pid=12216)[0m Epoch 00076: reducing learning rate of group 0 to 3.4466e-05.


 13%|█▎        | 23/173 [00:00<00:01, 114.03it/s]
 21%|██        | 36/173 [00:00<00:01, 116.70it/s]
 28%|██▊       | 49/173 [00:00<00:01, 118.74it/s]
 35%|███▌      | 61/173 [00:00<00:00, 118.78it/s]
 42%|████▏     | 73/173 [00:00<00:00, 116.68it/s]
 49%|████▉     | 85/173 [00:00<00:00, 116.36it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 116.37it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 115.08it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 114.86it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 116.14it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 116.55it/s]
 91%|█████████ | 157/173 [00:01<00:00, 117.11it/s]
 98%|█████████▊| 169/173 [00:01<00:00, 116.68it/s]
100%|██████████| 173/173 [00:01<00:00, 116.23it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 303.74it/s]
100%|██████████| 50/50 [00:00<00:00, 303.20it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 111.85it/s]
 14%|█▍        | 24/173 [00:00<00:01, 105.31it/s]
 20%|██        |

[2m[36m(func pid=12216)[0m Epoch 00082: reducing learning rate of group 0 to 1.7233e-05.


 12%|█▏        | 20/173 [00:00<00:01, 95.75it/s]
 18%|█▊        | 32/173 [00:00<00:01, 104.67it/s]
 25%|██▍       | 43/173 [00:00<00:01, 104.26it/s]
 31%|███       | 54/173 [00:00<00:01, 105.94it/s]
 38%|███▊      | 65/173 [00:00<00:01, 105.51it/s]
 45%|████▍     | 77/173 [00:00<00:00, 109.49it/s]
 51%|█████▏    | 89/173 [00:00<00:00, 111.27it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 113.91it/s]
 65%|██████▌   | 113/173 [00:01<00:00, 113.79it/s]
 72%|███████▏  | 125/173 [00:01<00:00, 114.86it/s]
 86%|████████▌ | 149/173 [00:01<00:00, 113.01it/s]
 94%|█████████▎| 162/173 [00:01<00:00, 115.87it/s]
100%|██████████| 173/173 [00:01<00:00, 110.85it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 322.35it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 115.39it/s]
 14%|█▍        | 24/173 [00:00<00:01, 117.08it/s]
 21%|██        | 36/173 [00:00<00:01, 117.57it/s]
 28%|██▊       | 49/173 [00:00<00:01, 118.61it/s]
 36%|███▌      | 

[2m[36m(func pid=12216)[0m Epoch 00088: reducing learning rate of group 0 to 8.6165e-06.


  6%|▋         | 11/173 [00:00<00:01, 103.91it/s]
 13%|█▎        | 22/173 [00:00<00:01, 105.60it/s]
 19%|█▉        | 33/173 [00:00<00:01, 99.57it/s] 
 26%|██▌       | 45/173 [00:00<00:01, 104.34it/s]
 32%|███▏      | 56/173 [00:00<00:01, 101.45it/s]
 39%|███▉      | 68/173 [00:00<00:00, 105.04it/s]
 46%|████▌     | 79/173 [00:00<00:00, 106.23it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 107.46it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 109.58it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 109.34it/s]
 73%|███████▎  | 126/173 [00:01<00:00, 88.70it/s] 
 80%|███████▉  | 138/173 [00:01<00:00, 95.87it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 99.96it/s]
 94%|█████████▎| 162/173 [00:01<00:00, 104.41it/s]
100%|██████████| 173/173 [00:01<00:00, 103.34it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 64%|██████▍   | 32/50 [00:00<00:00, 313.77it/s]
100%|██████████| 50/50 [00:00<00:00, 305.07it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 102.40it/s]
 13%|█▎        | 2

[2m[36m(func pid=12216)[0m Epoch 00094: reducing learning rate of group 0 to 4.3082e-06.


  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 101.70it/s]
 13%|█▎        | 22/173 [00:00<00:01, 90.39it/s] 
 19%|█▉        | 33/173 [00:00<00:01, 97.02it/s]
 26%|██▌       | 45/173 [00:00<00:01, 102.76it/s]
 32%|███▏      | 56/173 [00:00<00:01, 104.80it/s]
 39%|███▉      | 68/173 [00:00<00:00, 106.99it/s]
 46%|████▌     | 79/173 [00:00<00:00, 107.84it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 108.43it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 109.45it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 110.79it/s]
 74%|███████▍  | 128/173 [00:01<00:00, 113.76it/s]
 81%|████████  | 140/173 [00:01<00:00, 112.79it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 113.92it/s]
 95%|█████████▍| 164/173 [00:01<00:00, 114.05it/s]
100%|██████████| 173/173 [00:01<00:00, 109.17it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 64%|██████▍   | 32/50 [00:00<00:00, 311.25it/s]
100%|██████████| 50/50 [00:00<00:00, 316.80it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00

[2m[36m(func pid=12216)[0m Epoch 00100: reducing learning rate of group 0 to 2.1541e-06.


 12%|█▏        | 21/173 [00:00<00:01, 104.28it/s]
 19%|█▉        | 33/173 [00:00<00:01, 108.49it/s]
 26%|██▌       | 45/173 [00:00<00:01, 111.71it/s]
 33%|███▎      | 57/173 [00:00<00:01, 114.28it/s]
 40%|███▉      | 69/173 [00:00<00:00, 114.55it/s]
 47%|████▋     | 81/173 [00:00<00:00, 110.61it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 111.71it/s]
 61%|██████    | 105/173 [00:00<00:00, 114.06it/s]
 68%|██████▊   | 117/173 [00:01<00:00, 115.40it/s]
 75%|███████▍  | 129/173 [00:01<00:00, 112.74it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 111.74it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 111.55it/s]
100%|██████████| 173/173 [00:01<00:00, 111.70it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 316.10it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 114.05it/s]
 14%|█▍        | 24/173 [00:00<00:01, 116.13it/s]
 21%|██        | 36/173 [00:00<00:01, 115.88it/s]
 28%|██▊       | 48/173 [00:00<00:01, 112.68it/s]
 35%|███▍      |

[2m[36m(func pid=12216)[0m Epoch 00106: reducing learning rate of group 0 to 1.0771e-06.


  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 116.45it/s]
 14%|█▍        | 24/173 [00:00<00:01, 115.47it/s]
 21%|██        | 36/173 [00:00<00:01, 114.93it/s]
 28%|██▊       | 48/173 [00:00<00:01, 110.18it/s]
 35%|███▍      | 60/173 [00:00<00:01, 110.87it/s]
 42%|████▏     | 72/173 [00:00<00:00, 111.92it/s]
 49%|████▊     | 84/173 [00:00<00:00, 108.85it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 111.80it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 113.21it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 110.62it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 110.77it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 108.66it/s]
 90%|█████████ | 156/173 [00:01<00:00, 110.22it/s]
 97%|█████████▋| 168/173 [00:01<00:00, 109.74it/s]
100%|██████████| 173/173 [00:01<00:00, 111.09it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 64%|██████▍   | 32/50 [00:00<00:00, 316.58it/s]
100%|██████████| 50/50 [00:00<00:00, 318.59it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [0

[2m[36m(func pid=12216)[0m Epoch 00112: reducing learning rate of group 0 to 5.3853e-07.


  7%|▋         | 12/173 [00:00<00:01, 117.40it/s]
 14%|█▍        | 24/173 [00:00<00:01, 104.94it/s]
 21%|██        | 36/173 [00:00<00:01, 109.85it/s]
 28%|██▊       | 49/173 [00:00<00:01, 114.39it/s]
 35%|███▌      | 61/173 [00:00<00:00, 115.90it/s]
 42%|████▏     | 73/173 [00:00<00:00, 117.18it/s]
 49%|████▉     | 85/173 [00:00<00:00, 118.07it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 115.96it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 117.08it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 115.15it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 116.16it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 117.28it/s]
 91%|█████████ | 157/173 [00:01<00:00, 117.51it/s]
100%|██████████| 173/173 [00:01<00:00, 115.61it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 66%|██████▌   | 33/50 [00:00<00:00, 322.50it/s]
100%|██████████| 50/50 [00:00<00:00, 320.00it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 104.76it/s]
 13%|█▎        | 23/173 [00:00<00:01, 110.62it/s]
 20%|██        | 

[2m[36m(func pid=12216)[0m Epoch 00118: reducing learning rate of group 0 to 2.6926e-07.


 14%|█▍        | 24/173 [00:00<00:01, 115.68it/s]
 21%|██        | 36/173 [00:00<00:01, 103.32it/s]
 28%|██▊       | 48/173 [00:00<00:01, 106.57it/s]
 35%|███▍      | 60/173 [00:00<00:01, 110.68it/s]
 42%|████▏     | 72/173 [00:00<00:00, 112.22it/s]
 49%|████▊     | 84/173 [00:00<00:00, 112.95it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 114.91it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 114.22it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 114.01it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 114.18it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 115.23it/s]
 91%|█████████ | 157/173 [00:01<00:00, 114.95it/s]
100%|██████████| 173/173 [00:01<00:00, 113.40it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 292.26it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 109.80it/s]
 13%|█▎        | 23/173 [00:00<00:01, 104.49it/s]
 20%|█▉        | 34/173 [00:00<00:01, 106.58it/s]
 27%|██▋       | 46/173 [00:00<00:01, 110.32it/s]
 34%|███▎      |

[2m[36m(func pid=12216)[0m Epoch 00124: reducing learning rate of group 0 to 1.3463e-07.


  6%|▌         | 10/173 [00:00<00:01, 94.84it/s]
 12%|█▏        | 21/173 [00:00<00:01, 99.99it/s]
 19%|█▉        | 33/173 [00:00<00:01, 105.85it/s]
 26%|██▌       | 45/173 [00:00<00:01, 110.44it/s]
 33%|███▎      | 57/173 [00:00<00:01, 112.54it/s]
 47%|████▋     | 81/173 [00:00<00:00, 114.39it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 115.48it/s]
 61%|██████    | 105/173 [00:00<00:00, 115.73it/s]
 68%|██████▊   | 117/173 [00:01<00:00, 114.97it/s]
 75%|███████▍  | 129/173 [00:01<00:00, 114.33it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 111.73it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 112.57it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 113.27it/s]
100%|██████████| 173/173 [00:01<00:00, 112.26it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 305.18it/s]
100%|██████████| 50/50 [00:00<00:00, 308.51it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 117.61it/s]
 14%|█▍        | 25/173 [00:00<00:01, 120.13it/s]
 22%|██▏       | 3

[2m[36m(func pid=12216)[0m Epoch 00130: reducing learning rate of group 0 to 6.7316e-08.


  6%|▌         | 10/173 [00:00<00:01, 92.27it/s]
 13%|█▎        | 22/173 [00:00<00:01, 103.63it/s]
 20%|█▉        | 34/173 [00:00<00:01, 109.30it/s]
 26%|██▌       | 45/173 [00:00<00:01, 107.89it/s]
 33%|███▎      | 57/173 [00:00<00:01, 109.42it/s]
 47%|████▋     | 81/173 [00:00<00:00, 112.11it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 113.52it/s]
 61%|██████    | 105/173 [00:00<00:00, 114.93it/s]
 68%|██████▊   | 117/173 [00:01<00:00, 115.23it/s]
 75%|███████▍  | 129/173 [00:01<00:00, 115.32it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 117.17it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 118.03it/s]
100%|██████████| 173/173 [00:01<00:00, 113.38it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 20%|██        | 10/50 [00:00<00:00, 86.17it/s]
100%|██████████| 50/50 [00:00<00:00, 202.42it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 113.30it/s]
 14%|█▍        | 24/173 [00:00<00:01, 115.31it/s]
 21%|██        | 36/173 [00:00<00:01, 113.93it/s]
 28%|██▊       | 48

[2m[36m(func pid=12216)[0m Epoch 00136: reducing learning rate of group 0 to 3.3658e-08.


  6%|▋         | 11/173 [00:00<00:01, 104.42it/s]
 13%|█▎        | 23/173 [00:00<00:01, 110.06it/s]
 21%|██        | 36/173 [00:00<00:01, 114.79it/s]
 28%|██▊       | 49/173 [00:00<00:01, 116.76it/s]
 35%|███▌      | 61/173 [00:00<00:00, 117.72it/s]
 42%|████▏     | 73/173 [00:00<00:00, 118.14it/s]
 49%|████▉     | 85/173 [00:00<00:00, 117.16it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 116.91it/s]
 64%|██████▎   | 110/173 [00:00<00:00, 118.07it/s]
 71%|███████   | 123/173 [00:01<00:00, 119.41it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 118.77it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 116.59it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 118.04it/s]
100%|██████████| 173/173 [00:01<00:00, 117.15it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 320.26it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 116.40it/s]
 14%|█▍        | 24/173 [00:00<00:01, 118.46it/s]
 21%|██        | 36/173 [00:00<00:01, 119.07it/s]
 28%|██▊       |

[2m[36m(func pid=12216)[0m Epoch 00142: reducing learning rate of group 0 to 1.6829e-08.


 12%|█▏        | 20/173 [00:00<00:01, 99.21it/s]
 18%|█▊        | 32/173 [00:00<00:01, 104.54it/s]
 25%|██▍       | 43/173 [00:00<00:01, 104.86it/s]
 32%|███▏      | 55/173 [00:00<00:01, 108.98it/s]
 39%|███▊      | 67/173 [00:00<00:00, 110.69it/s]
 46%|████▌     | 79/173 [00:00<00:00, 112.77it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 111.80it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 113.84it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 113.30it/s]
 74%|███████▍  | 128/173 [00:01<00:00, 115.42it/s]
 81%|████████  | 140/173 [00:01<00:00, 115.26it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 115.61it/s]
100%|██████████| 173/173 [00:01<00:00, 112.08it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 64%|██████▍   | 32/50 [00:00<00:00, 319.01it/s]
100%|██████████| 50/50 [00:00<00:00, 323.74it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 109.60it/s]
 13%|█▎        | 22/173 [00:00<00:01, 106.30it/s]
 20%|█▉        | 34/173 [00:00<00:01, 110.49it/s]
 27%|██▋       | 4

[2m[36m(func pid=16172)[0m Epoch 00030: reducing learning rate of group 0 to 9.9795e-04.


 14%|█▍        | 24/173 [00:00<00:01, 116.81it/s]
 21%|██        | 36/173 [00:00<00:01, 112.63it/s]
 28%|██▊       | 48/173 [00:00<00:01, 111.52it/s]
 35%|███▍      | 60/173 [00:00<00:01, 112.93it/s]
 42%|████▏     | 72/173 [00:00<00:00, 115.19it/s]
 49%|████▉     | 85/173 [00:00<00:00, 117.10it/s]
 57%|█████▋    | 98/173 [00:00<00:00, 119.44it/s]
 64%|██████▍   | 111/173 [00:00<00:00, 119.77it/s]
 72%|███████▏  | 124/173 [00:01<00:00, 120.41it/s]
 79%|███████▉  | 137/173 [00:01<00:00, 120.30it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 119.62it/s]
100%|██████████| 173/173 [00:01<00:00, 116.75it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 350.48it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 94.32it/s]
 13%|█▎        | 22/173 [00:00<00:01, 106.70it/s]
 20%|█▉        | 34/173 [00:00<00:01, 112.08it/s]
 27%|██▋       | 46/173 [00:00<00:01, 112.10it/s]
 34%|███▍      | 59/173 [00:00<00:00, 116.83it/s]
 41%|████      | 7

[2m[36m(func pid=16172)[0m Epoch 00043: reducing learning rate of group 0 to 4.9897e-04.


  7%|▋         | 12/173 [00:00<00:01, 111.87it/s]
 14%|█▍        | 24/173 [00:00<00:01, 110.31it/s]
 21%|██        | 36/173 [00:00<00:01, 106.25it/s]
 28%|██▊       | 48/173 [00:00<00:01, 107.74it/s]
 35%|███▍      | 60/173 [00:00<00:01, 109.31it/s]
 42%|████▏     | 72/173 [00:00<00:00, 111.26it/s]
 49%|████▉     | 85/173 [00:00<00:00, 114.67it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 113.80it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 112.77it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 113.69it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 102.43it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 103.85it/s]
 90%|█████████ | 156/173 [00:01<00:00, 106.89it/s]
 97%|█████████▋| 168/173 [00:01<00:00, 107.33it/s]
100%|██████████| 173/173 [00:01<00:00, 108.60it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 64%|██████▍   | 32/50 [00:00<00:00, 315.94it/s]
100%|██████████| 50/50 [00:00<00:00, 310.61it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 112.31it/s]
 14%|█▍        |

[2m[36m(func pid=16172)[0m Epoch 00056: reducing learning rate of group 0 to 2.4949e-04.


  6%|▋         | 11/173 [00:00<00:01, 108.99it/s]
 13%|█▎        | 22/173 [00:00<00:01, 105.22it/s]
 19%|█▉        | 33/173 [00:00<00:01, 105.60it/s]
 25%|██▌       | 44/173 [00:00<00:01, 78.74it/s] 
 32%|███▏      | 55/173 [00:00<00:01, 87.49it/s]
 38%|███▊      | 65/173 [00:00<00:01, 90.31it/s]
 44%|████▍     | 76/173 [00:00<00:01, 94.75it/s]
 50%|█████     | 87/173 [00:00<00:00, 97.54it/s]
 57%|█████▋    | 98/173 [00:01<00:00, 99.60it/s]
 63%|██████▎   | 109/173 [00:01<00:00, 102.61it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 103.62it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 101.80it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 102.52it/s]
 95%|█████████▍| 164/173 [00:01<00:00, 103.75it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 287.15it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 96.95it/s]
 12%|█▏        | 21/173 [00:00<00:01, 100.61it/s]
 18%|█▊        | 32/173 [00:00<00:01, 102.77it/s]
 25%|██▍       | 43/173

[2m[36m(func pid=16172)[0m Epoch 00067: reducing learning rate of group 0 to 1.2474e-04.


  7%|▋         | 12/173 [00:00<00:01, 113.39it/s]
 14%|█▍        | 24/173 [00:00<00:01, 109.66it/s]
 20%|██        | 35/173 [00:00<00:01, 109.80it/s]
 27%|██▋       | 46/173 [00:00<00:01, 108.23it/s]
 34%|███▎      | 58/173 [00:00<00:01, 110.20it/s]
 40%|████      | 70/173 [00:00<00:00, 112.51it/s]
 47%|████▋     | 82/173 [00:00<00:00, 111.76it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 109.51it/s]
 61%|██████    | 105/173 [00:00<00:00, 109.05it/s]
 67%|██████▋   | 116/173 [00:01<00:00, 107.74it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 107.72it/s]
 80%|████████  | 139/173 [00:01<00:00, 109.04it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 107.85it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 106.61it/s]
100%|██████████| 173/173 [00:01<00:00, 109.01it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 68%|██████▊   | 34/50 [00:00<00:00, 330.73it/s]
100%|██████████| 50/50 [00:00<00:00, 334.73it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 113.92it/s]
 14%|█▍        |

[2m[36m(func pid=16172)[0m Epoch 00073: reducing learning rate of group 0 to 6.2372e-05.


 13%|█▎        | 22/173 [00:00<00:01, 104.19it/s]
 19%|█▉        | 33/173 [00:00<00:01, 105.40it/s]
 25%|██▌       | 44/173 [00:00<00:01, 106.71it/s]
 32%|███▏      | 55/173 [00:00<00:01, 102.78it/s]
 38%|███▊      | 66/173 [00:00<00:01, 103.74it/s]
 45%|████▍     | 77/173 [00:00<00:00, 104.41it/s]
 51%|█████     | 88/173 [00:00<00:00, 104.24it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 104.39it/s]
 64%|██████▍   | 111/173 [00:01<00:00, 106.50it/s]
 71%|███████   | 122/173 [00:01<00:00, 106.23it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 106.75it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 106.75it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 105.53it/s]
 96%|█████████▌| 166/173 [00:01<00:00, 105.37it/s]
100%|██████████| 173/173 [00:01<00:00, 105.13it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 307.61it/s]
100%|██████████| 50/50 [00:00<00:00, 303.10it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 104.85it/s]
 13%|█▎        |

[2m[36m(func pid=16172)[0m Epoch 00080: reducing learning rate of group 0 to 3.1186e-05.


  7%|▋         | 12/173 [00:00<00:01, 116.18it/s]
 14%|█▍        | 24/173 [00:00<00:01, 116.65it/s]
 21%|██        | 36/173 [00:00<00:01, 116.49it/s]
 28%|██▊       | 48/173 [00:00<00:01, 115.16it/s]
 35%|███▍      | 60/173 [00:00<00:01, 112.51it/s]
 42%|████▏     | 72/173 [00:00<00:00, 111.32it/s]
 49%|████▊     | 84/173 [00:00<00:00, 111.85it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 112.05it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 112.06it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 106.50it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 104.91it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 105.99it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 106.24it/s]
 95%|█████████▍| 164/173 [00:01<00:00, 106.09it/s]
 12%|█▏        | 6/50 [00:00<00:00, 58.83it/s]
 70%|███████   | 35/50 [00:00<00:00, 192.34it/s]
100%|██████████| 50/50 [00:00<00:00, 181.19it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 113.95it/s]
 14%|█▍        | 24/173 [00:00<00:01, 111.20it/s]
 21%|██  

[2m[36m(func pid=16172)[0m Epoch 00086: reducing learning rate of group 0 to 1.5593e-05.


  7%|▋         | 12/173 [00:00<00:01, 111.02it/s]
 14%|█▍        | 24/173 [00:00<00:01, 107.34it/s]
 21%|██        | 36/173 [00:00<00:01, 109.47it/s]
 28%|██▊       | 48/173 [00:00<00:01, 112.57it/s]
 35%|███▍      | 60/173 [00:00<00:00, 113.15it/s]
 42%|████▏     | 72/173 [00:00<00:00, 112.81it/s]
 49%|████▊     | 84/173 [00:00<00:00, 114.65it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 113.55it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 114.50it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 115.75it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 117.26it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 117.32it/s]
100%|██████████| 173/173 [00:01<00:00, 115.15it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 68%|██████▊   | 34/50 [00:00<00:00, 330.22it/s]
100%|██████████| 50/50 [00:00<00:00, 332.98it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 95.24it/s]
 12%|█▏        | 21/173 [00:00<00:01, 102.81it/s]
 18%|█▊        | 32/173 [00:00<00:01, 104.90it/s]
 25%|██▍       | 43

[2m[36m(func pid=16172)[0m Epoch 00092: reducing learning rate of group 0 to 7.7965e-06.


  6%|▋         | 11/173 [00:00<00:01, 109.98it/s]
 13%|█▎        | 22/173 [00:00<00:01, 109.98it/s]
 19%|█▉        | 33/173 [00:00<00:01, 109.55it/s]
 25%|██▌       | 44/173 [00:00<00:01, 109.30it/s]
 32%|███▏      | 56/173 [00:00<00:01, 112.35it/s]
 39%|███▉      | 68/173 [00:00<00:00, 113.72it/s]
 46%|████▌     | 80/173 [00:00<00:00, 115.39it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 114.67it/s]
 60%|██████    | 104/173 [00:00<00:00, 110.89it/s]
 67%|██████▋   | 116/173 [00:01<00:00, 109.08it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 108.23it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 107.07it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 107.76it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 320.24it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 105.55it/s]
 13%|█▎        | 23/173 [00:00<00:01, 109.20it/s]
 21%|██        | 36/173 [00:00<00:01, 113.61it/s]
 28%|██▊       | 49/173 [00:00<00:01, 117.14it/s]
 35%|███▌      | 

[2m[36m(func pid=16172)[0m Epoch 00098: reducing learning rate of group 0 to 3.8982e-06.


 13%|█▎        | 23/173 [00:00<00:01, 113.74it/s]
 20%|██        | 35/173 [00:00<00:01, 110.39it/s]
 27%|██▋       | 47/173 [00:00<00:01, 105.76it/s]
 34%|███▎      | 58/173 [00:00<00:01, 105.41it/s]
 40%|████      | 70/173 [00:00<00:00, 108.66it/s]
 47%|████▋     | 82/173 [00:00<00:00, 110.07it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 112.15it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 110.79it/s]
 68%|██████▊   | 118/173 [00:01<00:00, 111.58it/s]
 75%|███████▌  | 130/173 [00:01<00:00, 112.04it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 113.38it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 112.99it/s]
 97%|█████████▋| 167/173 [00:01<00:00, 115.29it/s]
 68%|██████▊   | 34/50 [00:00<00:00, 336.70it/s]
100%|██████████| 50/50 [00:00<00:00, 332.13it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 113.39it/s]
 14%|█▍        | 24/173 [00:00<00:01, 112.59it/s]
 21%|██        | 36/173 [00:00<00:01, 109.68it/s]
 27%|██▋       | 47/173 [00:00<00:01, 107.98it/s]
 34%|█

[2m[36m(func pid=16172)[0m Epoch 00104: reducing learning rate of group 0 to 1.9491e-06.


  6%|▋         | 11/173 [00:00<00:01, 107.84it/s]
 13%|█▎        | 22/173 [00:00<00:01, 105.90it/s]
 19%|█▉        | 33/173 [00:00<00:01, 101.25it/s]
 25%|██▌       | 44/173 [00:00<00:01, 103.22it/s]
 32%|███▏      | 55/173 [00:00<00:01, 104.84it/s]
 38%|███▊      | 66/173 [00:00<00:01, 103.52it/s]
 45%|████▍     | 77/173 [00:00<00:00, 102.00it/s]
 51%|█████     | 88/173 [00:00<00:00, 99.92it/s] 
 57%|█████▋    | 99/173 [00:00<00:00, 101.64it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 101.98it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 102.06it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 100.72it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 100.25it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 101.32it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 103.15it/s]
100%|██████████| 173/173 [00:01<00:00, 102.13it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 58%|█████▊    | 29/50 [00:00<00:00, 288.97it/s]
100%|██████████| 50/50 [00:00<00:00, 296.90it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         |

[2m[36m(func pid=16172)[0m Epoch 00110: reducing learning rate of group 0 to 9.7456e-07.


  7%|▋         | 12/173 [00:00<00:01, 115.20it/s]
 14%|█▍        | 24/173 [00:00<00:01, 113.09it/s]
 21%|██        | 36/173 [00:00<00:01, 112.10it/s]
 28%|██▊       | 48/173 [00:00<00:01, 110.92it/s]
 35%|███▌      | 61/173 [00:00<00:00, 112.05it/s]
 42%|████▏     | 73/173 [00:00<00:00, 112.07it/s]
 49%|████▉     | 85/173 [00:00<00:00, 91.18it/s] 
 56%|█████▌    | 97/173 [00:00<00:00, 97.28it/s]
 63%|██████▎   | 109/173 [00:01<00:00, 102.41it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 106.01it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 108.78it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 108.26it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 312.45it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 98.39it/s]
 13%|█▎        | 22/173 [00:00<00:01, 110.48it/s]
 20%|█▉        | 34/173 [00:00<00:01, 110.35it/s]
 27%|██▋       | 46/173 [00:00<00:01, 109.91it/s]
 33%|███▎      | 57/173 [00:00<00:01, 104.42it/s]
 40%|███▉      | 69/

[2m[36m(func pid=16172)[0m Epoch 00116: reducing learning rate of group 0 to 4.8728e-07.


  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 107.70it/s]
 13%|█▎        | 22/173 [00:00<00:01, 104.63it/s]
 19%|█▉        | 33/173 [00:00<00:01, 103.87it/s]
 26%|██▌       | 45/173 [00:00<00:01, 107.18it/s]
 32%|███▏      | 56/173 [00:00<00:01, 105.23it/s]
 39%|███▊      | 67/173 [00:00<00:01, 104.40it/s]
 45%|████▌     | 78/173 [00:00<00:00, 105.48it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 107.29it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 107.32it/s]
 65%|██████▍   | 112/173 [00:01<00:00, 107.01it/s]
 71%|███████   | 123/173 [00:01<00:00, 106.57it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 107.10it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 108.89it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 105.92it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 290.77it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 98.86it/s]
 12%|█▏        | 21/173 [00:00<00:01, 104.83it/s]
 18%|█▊        | 32/173 [00

[2m[36m(func pid=16172)[0m Epoch 00122: reducing learning rate of group 0 to 2.4364e-07.


  6%|▋         | 11/173 [00:00<00:01, 105.71it/s]
 13%|█▎        | 22/173 [00:00<00:01, 100.77it/s]
 19%|█▉        | 33/173 [00:00<00:01, 102.95it/s]
 25%|██▌       | 44/173 [00:00<00:01, 104.07it/s]
 32%|███▏      | 55/173 [00:00<00:01, 102.99it/s]
 38%|███▊      | 66/173 [00:00<00:01, 103.12it/s]
 45%|████▍     | 77/173 [00:00<00:00, 103.97it/s]
 51%|█████     | 88/173 [00:00<00:00, 104.32it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 103.64it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 104.49it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 100.44it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 102.49it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 101.20it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 100.35it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 102.22it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 294.50it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 108.98it/s]
 13%|█▎        | 22/173 [00:00<00:01, 106.48it/s]
 19%|█▉        |

[2m[36m(func pid=16172)[0m Epoch 00128: reducing learning rate of group 0 to 1.2182e-07.


  6%|▋         | 11/173 [00:00<00:01, 105.92it/s]
 13%|█▎        | 23/173 [00:00<00:01, 108.46it/s]
 20%|█▉        | 34/173 [00:00<00:01, 107.18it/s]
 27%|██▋       | 46/173 [00:00<00:01, 108.81it/s]
 40%|████      | 70/173 [00:00<00:00, 112.07it/s]
 47%|████▋     | 82/173 [00:00<00:00, 114.09it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 115.42it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 115.26it/s]
 68%|██████▊   | 118/173 [00:01<00:00, 115.42it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 117.37it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 116.39it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 113.79it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 318.77it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 107.43it/s]
 13%|█▎        | 23/173 [00:00<00:01, 111.81it/s]
 20%|██        | 35/173 [00:00<00:01, 107.00it/s]
 27%|██▋       | 46/173 [00:00<00:01, 105.54it/s]
 33%|███▎      | 57/173 [00:00<00:01, 105.23it/s]
 39%|███▉      | 

[2m[36m(func pid=16172)[0m Epoch 00134: reducing learning rate of group 0 to 6.0910e-08.


  6%|▋         | 11/173 [00:00<00:01, 101.68it/s]
 13%|█▎        | 22/173 [00:00<00:01, 100.81it/s]
 19%|█▉        | 33/173 [00:00<00:01, 102.42it/s]
 25%|██▌       | 44/173 [00:00<00:01, 102.15it/s]
 32%|███▏      | 55/173 [00:00<00:01, 101.94it/s]
 38%|███▊      | 66/173 [00:00<00:01, 103.02it/s]
 45%|████▍     | 77/173 [00:00<00:00, 103.34it/s]
 51%|█████     | 88/173 [00:00<00:00, 102.59it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 103.51it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 103.28it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 102.85it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 99.18it/s] 
 83%|████████▎ | 143/173 [00:01<00:00, 99.01it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 100.76it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 100.87it/s]
100%|██████████| 173/173 [00:01<00:00, 101.82it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 60%|██████    | 30/50 [00:00<00:00, 297.03it/s]
100%|██████████| 50/50 [00:00<00:00, 199.28it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 

[2m[36m(func pid=16172)[0m Epoch 00140: reducing learning rate of group 0 to 3.0455e-08.


  7%|▋         | 12/173 [00:00<00:01, 108.15it/s]
 14%|█▍        | 24/173 [00:00<00:01, 109.30it/s]
 20%|██        | 35/173 [00:00<00:01, 109.37it/s]
 27%|██▋       | 46/173 [00:00<00:01, 107.09it/s]
 33%|███▎      | 57/173 [00:00<00:01, 107.72it/s]
 40%|███▉      | 69/173 [00:00<00:00, 109.09it/s]
 47%|████▋     | 81/173 [00:00<00:00, 109.73it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 107.32it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 107.63it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 111.08it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 111.10it/s]
 80%|████████  | 139/173 [00:01<00:00, 112.19it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 111.97it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 58%|█████▊    | 29/50 [00:00<00:00, 289.87it/s]
100%|██████████| 50/50 [00:00<00:00, 302.25it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 102.87it/s]
 13%|█▎        | 22/173 [00:00<00:01, 102.84it/s]
 20%|█▉        | 34/173 [00:00<00:01, 108.11it/s]
 26%|██▌       | 4

[2m[36m(func pid=16172)[0m Epoch 00146: reducing learning rate of group 0 to 1.5227e-08.


 13%|█▎        | 23/173 [00:00<00:01, 109.41it/s]
 20%|█▉        | 34/173 [00:00<00:01, 105.33it/s]
 27%|██▋       | 46/173 [00:00<00:01, 107.80it/s]
 33%|███▎      | 57/173 [00:00<00:01, 105.85it/s]
 39%|███▉      | 68/173 [00:00<00:00, 105.04it/s]
 46%|████▌     | 79/173 [00:00<00:00, 105.35it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 106.46it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 106.88it/s]
 65%|██████▌   | 113/173 [00:01<00:00, 108.34it/s]
 72%|███████▏  | 125/173 [00:01<00:00, 109.22it/s]
 79%|███████▊  | 136/173 [00:01<00:00, 108.13it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 109.73it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 111.59it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 312.55it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 97.15it/s]
 13%|█▎        | 22/173 [00:00<00:01, 106.64it/s]
 20%|█▉        | 34/173 [00:00<00:01, 111.63it/s]
 27%|██▋       | 46/173 [00:00<00:01, 112.68it/s]
 34%|███▎      | 

[2m[36m(func pid=22208)[0m Epoch 00069: reducing learning rate of group 0 to 7.4631e-04.


 11%|█▏        | 10/87 [00:00<00:00, 91.35it/s]
 23%|██▎       | 20/87 [00:00<00:00, 90.69it/s]
 34%|███▍      | 30/87 [00:00<00:00, 90.73it/s]
 46%|████▌     | 40/87 [00:00<00:00, 91.44it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 89.58it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 89.60it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 88.97it/s]
 90%|████████▉ | 78/87 [00:00<00:00, 88.49it/s]
100%|██████████| 87/87 [00:00<00:00, 89.02it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 181.11it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 21%|██        | 18/87 [00:00<00:00, 84.56it/s]
 31%|███       | 27/87 [00:00<00:00, 85.12it/s]
 41%|████▏     | 36/87 [00:00<00:00, 83.19it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 83.26it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 84.08it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 84.82it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 82.58it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 195.20it/s]
  0%|          | 

[2m[36m(func pid=22208)[0m Epoch 00080: reducing learning rate of group 0 to 3.7315e-04.


 11%|█▏        | 10/87 [00:00<00:00, 91.21it/s]
 23%|██▎       | 20/87 [00:00<00:01, 62.85it/s]
 33%|███▎      | 29/87 [00:00<00:00, 69.97it/s]
 45%|████▍     | 39/87 [00:00<00:00, 77.75it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 78.83it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 81.24it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 83.11it/s]
 86%|████████▌ | 75/87 [00:00<00:00, 83.86it/s]
100%|██████████| 87/87 [00:01<00:00, 81.19it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 195.46it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 89.51it/s]
 21%|██        | 18/87 [00:00<00:00, 85.93it/s]
 31%|███       | 27/87 [00:00<00:00, 86.27it/s]
 43%|████▎     | 37/87 [00:00<00:00, 88.64it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 88.98it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 89.07it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 89.10it/s]
 85%|████████▌ | 74/87 [00:00<00:00, 87.79it/s]
100%|██████████| 87/87 [00:00<00:00, 88.09it/s]
  0%|     

[2m[36m(func pid=22208)[0m Epoch 00092: reducing learning rate of group 0 to 1.8658e-04.


 10%|█         | 9/87 [00:00<00:00, 82.38it/s]
 21%|██        | 18/87 [00:00<00:00, 83.15it/s]
 31%|███       | 27/87 [00:00<00:00, 83.64it/s]
 41%|████▏     | 36/87 [00:00<00:00, 84.38it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 85.47it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 87.61it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 87.66it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 87.96it/s]
 94%|█████████▍| 82/87 [00:00<00:00, 86.81it/s]
100%|██████████| 87/87 [00:01<00:00, 86.60it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 198.81it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 88.15it/s]
 21%|██        | 18/87 [00:00<00:00, 86.43it/s]
 32%|███▏      | 28/87 [00:00<00:00, 89.47it/s]
 43%|████▎     | 37/87 [00:00<00:00, 89.43it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 87.47it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 87.41it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 86.88it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 85.76it/s]
 94%|██████

[2m[36m(func pid=22208)[0m Epoch 00098: reducing learning rate of group 0 to 9.3289e-05.


 21%|██        | 18/87 [00:00<00:00, 88.34it/s]
 31%|███       | 27/87 [00:00<00:00, 86.37it/s]
 43%|████▎     | 37/87 [00:00<00:00, 88.55it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 87.85it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 89.76it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 90.14it/s]
 87%|████████▋ | 76/87 [00:00<00:00, 89.26it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 80%|████████  | 20/25 [00:00<00:00, 190.19it/s]
100%|██████████| 25/25 [00:00<00:00, 178.56it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 77.48it/s]
 20%|█▉        | 17/87 [00:00<00:00, 83.79it/s]
 30%|██▉       | 26/87 [00:00<00:00, 84.06it/s]
 40%|████      | 35/87 [00:00<00:00, 80.54it/s]
 51%|█████     | 44/87 [00:00<00:00, 72.68it/s]
 60%|█████▉    | 52/87 [00:00<00:00, 74.10it/s]
 70%|███████   | 61/87 [00:00<00:00, 76.24it/s]
 80%|████████  | 70/87 [00:00<00:00, 78.51it/s]
 91%|█████████ | 79/87 [00:01<00:00, 78.94it/s]
100%|██████████| 87/87 [00:01<00:00, 78.79it/s]
  0%|    

[2m[36m(func pid=22208)[0m Epoch 00108: reducing learning rate of group 0 to 4.6644e-05.


 10%|█         | 9/87 [00:00<00:00, 78.90it/s]
 21%|██        | 18/87 [00:00<00:00, 81.44it/s]
 31%|███       | 27/87 [00:00<00:00, 82.26it/s]
 41%|████▏     | 36/87 [00:00<00:00, 84.27it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 83.67it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 85.59it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 83.90it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 83.13it/s]
 94%|█████████▍| 82/87 [00:00<00:00, 84.05it/s]
100%|██████████| 87/87 [00:01<00:00, 83.58it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 72%|███████▏  | 18/25 [00:00<00:00, 176.43it/s]
100%|██████████| 25/25 [00:00<00:00, 178.39it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 89.54it/s]
 22%|██▏       | 19/87 [00:00<00:00, 86.67it/s]
 32%|███▏      | 28/87 [00:00<00:00, 87.59it/s]
 44%|████▎     | 38/87 [00:00<00:00, 88.57it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 88.68it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 88.61it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 87.67it/s]
 85%|████

[2m[36m(func pid=22208)[0m Epoch 00116: reducing learning rate of group 0 to 2.3322e-05.


 10%|█         | 9/87 [00:00<00:00, 86.96it/s]
 21%|██        | 18/87 [00:00<00:00, 86.05it/s]
 31%|███       | 27/87 [00:00<00:00, 86.69it/s]
 43%|████▎     | 37/87 [00:00<00:00, 88.80it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 87.86it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 88.03it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 87.70it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 87.20it/s]
 94%|█████████▍| 82/87 [00:00<00:00, 87.48it/s]
100%|██████████| 87/87 [00:00<00:00, 87.72it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 118.86it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 85.76it/s]
 22%|██▏       | 19/87 [00:00<00:00, 89.83it/s]
 32%|███▏      | 28/87 [00:00<00:00, 89.59it/s]
 44%|████▎     | 38/87 [00:00<00:00, 90.11it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 90.32it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 89.08it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 88.62it/s]
 87%|████████▋ | 76/87 [00:00<00:00, 89.04it/s]
100%|██████

[2m[36m(func pid=22208)[0m Epoch 00122: reducing learning rate of group 0 to 1.1661e-05.


  9%|▉         | 8/87 [00:00<00:01, 71.55it/s]
 18%|█▊        | 16/87 [00:00<00:00, 74.98it/s]
 28%|██▊       | 24/87 [00:00<00:00, 75.81it/s]
 38%|███▊      | 33/87 [00:00<00:00, 77.98it/s]
 48%|████▊     | 42/87 [00:00<00:00, 79.00it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 78.37it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 80.91it/s]
 78%|███████▊  | 68/87 [00:00<00:00, 79.67it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 81.30it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 172.41it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  1%|          | 1/87 [00:00<00:09,  9.38it/s]
 11%|█▏        | 10/87 [00:00<00:01, 52.71it/s]
 21%|██        | 18/87 [00:00<00:01, 64.08it/s]
 31%|███       | 27/87 [00:00<00:00, 71.79it/s]
 41%|████▏     | 36/87 [00:00<00:00, 75.45it/s]
 51%|█████     | 44/87 [00:00<00:00, 76.71it/s]
 60%|█████▉    | 52/87 [00:00<00:00, 77.27it/s]
 70%|███████   | 61/87 [00:00<00:00, 78.46it/s]
 80%|████████  | 70/87 [00:00<00:00, 80.14it/s]
 91%|██████

[2m[36m(func pid=22208)[0m Epoch 00131: reducing learning rate of group 0 to 5.8305e-06.


  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 82.56it/s]
 21%|██        | 18/87 [00:00<00:00, 82.49it/s]
 31%|███       | 27/87 [00:00<00:00, 85.09it/s]
 41%|████▏     | 36/87 [00:00<00:00, 85.25it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 85.71it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 81.76it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 81.30it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 194.51it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 21%|██        | 18/87 [00:00<00:00, 87.31it/s]
 31%|███       | 27/87 [00:00<00:00, 88.37it/s]
 41%|████▏     | 36/87 [00:00<00:00, 85.72it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 86.26it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 85.77it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 87.17it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 87.64it/s]
100%|██████████| 87/87 [00:00<00:00, 87.71it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 195.59it/s]
  0%|          | 0/87 [00:0

[2m[36m(func pid=22208)[0m Epoch 00137: reducing learning rate of group 0 to 2.9153e-06.


  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 85.43it/s]
 21%|██        | 18/87 [00:00<00:00, 84.82it/s]
 31%|███       | 27/87 [00:00<00:00, 84.12it/s]
 41%|████▏     | 36/87 [00:00<00:00, 85.32it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 83.14it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 84.86it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 83.02it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 84.35it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 85.47it/s]
100%|██████████| 87/87 [00:01<00:00, 84.34it/s]
 76%|███████▌  | 19/25 [00:00<00:00, 188.25it/s]
100%|██████████| 25/25 [00:00<00:00, 185.09it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 85.76it/s]
 21%|██        | 18/87 [00:00<00:00, 86.23it/s]
 31%|███       | 27/87 [00:00<00:00, 86.78it/s]
 41%|████▏     | 36/87 [00:00<00:00, 87.59it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 88.02it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 88.39it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 88.73it/s]
 84%|█████

[2m[36m(func pid=22208)[0m Epoch 00143: reducing learning rate of group 0 to 1.4576e-06.


 10%|█         | 9/87 [00:00<00:00, 87.78it/s]
 22%|██▏       | 19/87 [00:00<00:00, 89.49it/s]
 32%|███▏      | 28/87 [00:00<00:00, 85.90it/s]
 43%|████▎     | 37/87 [00:00<00:00, 87.20it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 86.90it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 85.84it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 84.53it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 83.47it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 156.51it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 83.41it/s]
 21%|██        | 18/87 [00:00<00:00, 87.09it/s]
 32%|███▏      | 28/87 [00:00<00:00, 90.58it/s]
 44%|████▎     | 38/87 [00:00<00:00, 90.26it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 88.46it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 87.32it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 84.44it/s]
 86%|████████▌ | 75/87 [00:00<00:00, 84.94it/s]
100%|██████████| 87/87 [00:01<00:00, 86.37it/s]
 72%|███████▏  | 18/25 [00:00<00:00, 179.69it/s]
100%|█████

[2m[36m(func pid=22208)[0m Epoch 00149: reducing learning rate of group 0 to 7.2882e-07.


  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 88.93it/s]
 21%|██        | 18/87 [00:00<00:00, 89.09it/s]
 31%|███       | 27/87 [00:00<00:00, 87.02it/s]
 41%|████▏     | 36/87 [00:00<00:00, 87.24it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 67.63it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 73.08it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 76.55it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 79.03it/s]
100%|██████████| 87/87 [00:01<00:00, 80.68it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 180.46it/s]
  0%|          | 0/13 [00:00<?, ?it/s]
100%|██████████| 13/13 [00:00<00:00, 190.17it/s]
2023-03-24 19:45:58,866	ERROR trial_runner.py:1088 -- Trial train_model_f5d07_00006: Error processing event.
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=22208, ip=127.0.0.1, repr=func)
  File "python\ray\_raylet.pyx", line 830, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 834, in ray._raylet.

[2m[36m(func pid=19100)[0m Epoch 00068: reducing learning rate of group 0 to 5.2519e-04.


 10%|█         | 9/87 [00:00<00:00, 82.28it/s]
 21%|██        | 18/87 [00:00<00:00, 81.93it/s]
 31%|███       | 27/87 [00:00<00:00, 82.57it/s]
 41%|████▏     | 36/87 [00:00<00:00, 82.70it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 83.03it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 83.46it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 83.53it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 84.48it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 76%|███████▌  | 19/25 [00:00<00:00, 105.10it/s]
100%|██████████| 25/25 [00:00<00:00, 116.36it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 80.85it/s]
 21%|██        | 18/87 [00:00<00:00, 80.87it/s]
 31%|███       | 27/87 [00:00<00:00, 79.46it/s]
 41%|████▏     | 36/87 [00:00<00:00, 80.68it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 80.42it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 78.86it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 80.47it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 80.95it/s]
 93%|█████████▎| 81/87 [00:01<00:00, 81.14it/s]
100%|█████

[2m[36m(func pid=19100)[0m Epoch 00087: reducing learning rate of group 0 to 2.6260e-04.


 10%|█         | 9/87 [00:00<00:00, 84.58it/s]
 21%|██        | 18/87 [00:00<00:00, 84.87it/s]
 31%|███       | 27/87 [00:00<00:00, 83.70it/s]
 41%|████▏     | 36/87 [00:00<00:00, 83.33it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 82.75it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 81.78it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 79.82it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 79.66it/s]
100%|██████████| 87/87 [00:01<00:00, 81.90it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 192.98it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 82.61it/s]
 21%|██        | 18/87 [00:00<00:00, 78.73it/s]
 30%|██▉       | 26/87 [00:00<00:00, 75.94it/s]
 39%|███▉      | 34/87 [00:00<00:00, 74.02it/s]
 48%|████▊     | 42/87 [00:00<00:00, 75.00it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 75.70it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 75.09it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 77.00it/s]
 87%|████████▋ | 76/87 [00:00<00:00, 77.87it/s]
  0%|      

[2m[36m(func pid=19100)[0m Epoch 00101: reducing learning rate of group 0 to 1.3130e-04.


  6%|▌         | 5/87 [00:00<00:01, 47.61it/s]
 14%|█▍        | 12/87 [00:00<00:01, 59.19it/s]
 23%|██▎       | 20/87 [00:00<00:01, 64.95it/s]
 32%|███▏      | 28/87 [00:00<00:00, 70.37it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 75.17it/s]
 61%|██████    | 53/87 [00:00<00:00, 75.22it/s]
 70%|███████   | 61/87 [00:00<00:00, 75.68it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 76.57it/s]
 89%|████████▊ | 77/87 [00:01<00:00, 75.69it/s]
100%|██████████| 87/87 [00:01<00:00, 73.41it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 32%|███▏      | 8/25 [00:00<00:00, 62.43it/s]
100%|██████████| 25/25 [00:00<00:00, 111.36it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 73.40it/s]
 18%|█▊        | 16/87 [00:00<00:00, 75.76it/s]
 28%|██▊       | 24/87 [00:00<00:00, 77.45it/s]
 37%|███▋      | 32/87 [00:00<00:00, 76.30it/s]
 46%|████▌     | 40/87 [00:00<00:00, 75.73it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 75.15it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 73.92it/s]
 74%|███████

[2m[36m(func pid=19100)[0m Epoch 00129: reducing learning rate of group 0 to 6.5649e-05.


 20%|█▉        | 17/87 [00:00<00:00, 81.83it/s]
 30%|██▉       | 26/87 [00:00<00:00, 82.13it/s]
 40%|████      | 35/87 [00:00<00:00, 79.70it/s]
 49%|████▉     | 43/87 [00:00<00:00, 77.75it/s]
 60%|█████▉    | 52/87 [00:00<00:00, 78.34it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 78.68it/s]
 78%|███████▊  | 68/87 [00:00<00:00, 78.12it/s]
 87%|████████▋ | 76/87 [00:00<00:00, 77.95it/s]
100%|██████████| 87/87 [00:01<00:00, 79.41it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 76%|███████▌  | 19/25 [00:00<00:00, 186.37it/s]
100%|██████████| 25/25 [00:00<00:00, 186.24it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 81.92it/s]
 21%|██        | 18/87 [00:00<00:00, 81.58it/s]
 31%|███       | 27/87 [00:00<00:00, 81.20it/s]
 41%|████▏     | 36/87 [00:00<00:00, 82.02it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 81.09it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 81.15it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 81.37it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 81.74it/s]
  0%|    

[2m[36m(func pid=19100)[0m Epoch 00138: reducing learning rate of group 0 to 3.2825e-05.


 10%|█         | 9/87 [00:00<00:00, 83.13it/s]
 21%|██        | 18/87 [00:00<00:00, 80.94it/s]
 31%|███       | 27/87 [00:00<00:00, 82.85it/s]
 41%|████▏     | 36/87 [00:00<00:00, 82.38it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 82.68it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 82.27it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 79.61it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 79.08it/s]
100%|██████████| 87/87 [00:01<00:00, 79.71it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 184.43it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 74.51it/s]
 20%|█▉        | 17/87 [00:00<00:00, 77.34it/s]
 30%|██▉       | 26/87 [00:00<00:00, 79.78it/s]
 39%|███▉      | 34/87 [00:00<00:00, 79.83it/s]
 48%|████▊     | 42/87 [00:00<00:00, 78.65it/s]
 59%|█████▊    | 51/87 [00:00<00:00, 80.26it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 79.94it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 81.13it/s]
 90%|████████▉ | 78/87 [00:00<00:00, 81.56it/s]
100%|██████

[2m[36m(func pid=19100)[0m Epoch 00144: reducing learning rate of group 0 to 1.6412e-05.


 18%|█▊        | 16/87 [00:00<00:00, 76.81it/s]
 28%|██▊       | 24/87 [00:00<00:00, 76.36it/s]
 37%|███▋      | 32/87 [00:00<00:00, 76.72it/s]
 46%|████▌     | 40/87 [00:00<00:00, 75.36it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 74.48it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 75.74it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 75.16it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 76.62it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 166.67it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 72.14it/s]
 18%|█▊        | 16/87 [00:00<00:00, 72.20it/s]
 28%|██▊       | 24/87 [00:00<00:00, 74.57it/s]
 37%|███▋      | 32/87 [00:00<00:00, 75.26it/s]
 46%|████▌     | 40/87 [00:00<00:00, 75.78it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 76.69it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 77.71it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 78.02it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 77.14it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 76.34it/s]
100%|█████

[2m[36m(func pid=15968)[0m Epoch 00012: reducing learning rate of group 0 to 4.4292e-02.


 10%|█         | 9/87 [00:00<00:00, 89.24it/s]
 21%|██        | 18/87 [00:00<00:00, 89.68it/s]
 31%|███       | 27/87 [00:00<00:00, 87.35it/s]
 41%|████▏     | 36/87 [00:00<00:00, 86.44it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 87.66it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 91.20it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 90.84it/s]
 86%|████████▌ | 75/87 [00:00<00:00, 93.46it/s]
100%|██████████| 87/87 [00:00<00:00, 90.73it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 114.12it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 85.60it/s]
 21%|██        | 18/87 [00:00<00:00, 87.65it/s]
 31%|███       | 27/87 [00:00<00:00, 87.18it/s]
 43%|████▎     | 37/87 [00:00<00:00, 87.73it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 88.31it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 87.03it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 86.19it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 85.41it/s]
 94%|█████████▍| 82/87 [00:00<00:00, 85.29it/s]
100%|██████

[2m[36m(func pid=15968)[0m Epoch 00018: reducing learning rate of group 0 to 2.2146e-02.


 21%|██        | 18/87 [00:00<00:00, 85.95it/s]
 31%|███       | 27/87 [00:00<00:00, 82.59it/s]
 41%|████▏     | 36/87 [00:00<00:00, 83.21it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 84.98it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 87.05it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 87.75it/s]
 85%|████████▌ | 74/87 [00:00<00:00, 89.75it/s]
 97%|█████████▋| 84/87 [00:00<00:00, 90.72it/s]
100%|██████████| 87/87 [00:00<00:00, 87.78it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 120.86it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 92.12it/s]
 23%|██▎       | 20/87 [00:00<00:00, 94.19it/s]
 34%|███▍      | 30/87 [00:00<00:00, 93.47it/s]
 46%|████▌     | 40/87 [00:00<00:00, 94.21it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 93.36it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 93.02it/s]
 80%|████████  | 70/87 [00:00<00:00, 94.30it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 95.17it/s]
100%|██████████| 87/87 [00:00<00:00, 94.46it/s]
  0%|    

[2m[36m(func pid=15968)[0m Epoch 00024: reducing learning rate of group 0 to 1.1073e-02.


 11%|█▏        | 10/87 [00:00<00:00, 95.55it/s]
 23%|██▎       | 20/87 [00:00<00:00, 93.11it/s]
 34%|███▍      | 30/87 [00:00<00:00, 92.22it/s]
 46%|████▌     | 40/87 [00:00<00:00, 90.11it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 89.52it/s]
 80%|████████  | 70/87 [00:00<00:00, 92.31it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 92.40it/s]
100%|██████████| 87/87 [00:00<00:00, 92.20it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 122.89it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 93.02it/s]
 23%|██▎       | 20/87 [00:00<00:00, 93.13it/s]
 34%|███▍      | 30/87 [00:00<00:00, 89.09it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 88.93it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 89.51it/s]
 78%|███████▊  | 68/87 [00:00<00:00, 89.39it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 88.80it/s]
100%|██████████| 87/87 [00:00<00:00, 89.14it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 196.66it/s]
  0%|          | 

[2m[36m(func pid=15968)[0m Epoch 00054: reducing learning rate of group 0 to 5.5365e-03.


 10%|█         | 9/87 [00:00<00:00, 88.07it/s]
 22%|██▏       | 19/87 [00:00<00:00, 90.27it/s]
 33%|███▎      | 29/87 [00:00<00:00, 90.13it/s]
 45%|████▍     | 39/87 [00:00<00:00, 92.81it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 93.39it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 92.45it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 93.86it/s]
 91%|█████████ | 79/87 [00:00<00:00, 93.13it/s]
100%|██████████| 87/87 [00:00<00:00, 92.81it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 120.88it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 89.28it/s]
 23%|██▎       | 20/87 [00:00<00:00, 91.20it/s]
 34%|███▍      | 30/87 [00:00<00:00, 88.86it/s]
 46%|████▌     | 40/87 [00:00<00:00, 90.04it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 91.18it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 91.70it/s]
 80%|████████  | 70/87 [00:00<00:00, 89.78it/s]
 91%|█████████ | 79/87 [00:00<00:00, 88.84it/s]
100%|██████████| 87/87 [00:00<00:00, 89.60it/s]
  0%|     

[2m[36m(func pid=15968)[0m Epoch 00060: reducing learning rate of group 0 to 2.7682e-03.


 23%|██▎       | 20/87 [00:00<00:00, 91.53it/s]
 34%|███▍      | 30/87 [00:00<00:00, 92.46it/s]
 46%|████▌     | 40/87 [00:00<00:00, 92.19it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 89.77it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 90.32it/s]
 80%|████████  | 70/87 [00:00<00:00, 88.93it/s]
 91%|█████████ | 79/87 [00:00<00:00, 87.54it/s]
100%|██████████| 87/87 [00:00<00:00, 89.04it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 108.09it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 89.97it/s]
 21%|██        | 18/87 [00:00<00:00, 85.55it/s]
 31%|███       | 27/87 [00:00<00:00, 85.95it/s]
 41%|████▏     | 36/87 [00:00<00:00, 85.58it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 86.83it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 85.70it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 86.35it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 87.17it/s]
 72%|███████▏  | 18/25 [00:00<00:00, 171.22it/s]
100%|██████████| 25/25 [00:00<00:00, 174.36it/s]
  0%|   

[2m[36m(func pid=15968)[0m Epoch 00066: reducing learning rate of group 0 to 1.3841e-03.


  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 89.12it/s]
 22%|██▏       | 19/87 [00:00<00:00, 89.92it/s]
 33%|███▎      | 29/87 [00:00<00:00, 91.50it/s]
 45%|████▍     | 39/87 [00:00<00:00, 89.23it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 89.45it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 90.65it/s]
 78%|███████▊  | 68/87 [00:00<00:00, 89.20it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 89.33it/s]
100%|██████████| 87/87 [00:00<00:00, 89.71it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 117.14it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 92.54it/s]
 23%|██▎       | 20/87 [00:00<00:00, 94.41it/s]
 34%|███▍      | 30/87 [00:00<00:00, 93.59it/s]
 46%|████▌     | 40/87 [00:00<00:00, 94.47it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 93.20it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 93.97it/s]
 80%|████████  | 70/87 [00:00<00:00, 94.56it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 93.98it/s]
100%|██████████| 87

[2m[36m(func pid=15968)[0m Epoch 00073: reducing learning rate of group 0 to 6.9206e-04.


 23%|██▎       | 20/87 [00:00<00:00, 94.71it/s]
 34%|███▍      | 30/87 [00:00<00:00, 95.85it/s]
 46%|████▌     | 40/87 [00:00<00:00, 95.24it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 95.26it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 95.43it/s]
 80%|████████  | 70/87 [00:00<00:00, 94.86it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 94.68it/s]
100%|██████████| 87/87 [00:00<00:00, 95.10it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 204.64it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 91.71it/s]
 23%|██▎       | 20/87 [00:00<00:00, 92.22it/s]
 34%|███▍      | 30/87 [00:00<00:00, 93.56it/s]
 46%|████▌     | 40/87 [00:00<00:00, 91.85it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 92.39it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 90.25it/s]
 80%|████████  | 70/87 [00:00<00:00, 91.52it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 178.81it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:

[2m[36m(func pid=15968)[0m Epoch 00084: reducing learning rate of group 0 to 3.4603e-04.


  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 76.84it/s]
 20%|█▉        | 17/87 [00:00<00:00, 83.98it/s]
 31%|███       | 27/87 [00:00<00:00, 86.87it/s]
 43%|████▎     | 37/87 [00:00<00:00, 88.26it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 90.92it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 92.18it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 93.72it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 94.60it/s]
100%|██████████| 87/87 [00:00<00:00, 91.77it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 112.81it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 82.87it/s]
 22%|██▏       | 19/87 [00:00<00:00, 86.94it/s]
 33%|███▎      | 29/87 [00:00<00:00, 88.86it/s]
 45%|████▍     | 39/87 [00:00<00:00, 89.98it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 91.80it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 91.60it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 91.81it/s]
100%|██████████| 87/87 [00:00<00:00, 90.44it/s]
  0%|          | 0/2

[2m[36m(func pid=15968)[0m Epoch 00095: reducing learning rate of group 0 to 1.7301e-04.


 11%|█▏        | 10/87 [00:00<00:00, 90.29it/s]
 23%|██▎       | 20/87 [00:00<00:00, 90.62it/s]
 34%|███▍      | 30/87 [00:00<00:00, 91.61it/s]
 46%|████▌     | 40/87 [00:00<00:00, 90.86it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 92.26it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 92.36it/s]
 80%|████████  | 70/87 [00:00<00:00, 93.55it/s]
100%|██████████| 87/87 [00:00<00:00, 93.45it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 90.99it/s]
 23%|██▎       | 20/87 [00:00<00:00, 89.11it/s]
 33%|███▎      | 29/87 [00:00<00:00, 87.96it/s]
 44%|████▎     | 38/87 [00:00<00:00, 88.53it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 88.90it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 91.05it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 91.88it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 91.32it/s]
100%|██████████| 87/87 [00:00<00:00, 90.59it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 114.75it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9

[2m[36m(func pid=15968)[0m Epoch 00101: reducing learning rate of group 0 to 8.6507e-05.


 21%|██        | 18/87 [00:00<00:00, 85.39it/s]
 31%|███       | 27/87 [00:00<00:00, 86.09it/s]
 41%|████▏     | 36/87 [00:00<00:00, 87.23it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 87.93it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 89.86it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 88.81it/s]
 85%|████████▌ | 74/87 [00:00<00:00, 90.54it/s]
100%|██████████| 87/87 [00:00<00:00, 89.60it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 80%|████████  | 20/25 [00:00<00:00, 190.62it/s]
100%|██████████| 25/25 [00:00<00:00, 189.31it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 84.07it/s]
 22%|██▏       | 19/87 [00:00<00:00, 91.08it/s]
 33%|███▎      | 29/87 [00:00<00:00, 87.85it/s]
 44%|████▎     | 38/87 [00:00<00:00, 86.87it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 86.18it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 88.36it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 88.95it/s]
 87%|████████▋ | 76/87 [00:00<00:00, 89.26it/s]
100%|██████████| 87/87 [00:00<00:00, 88.93it/s]
  0%|    

[2m[36m(func pid=15968)[0m Epoch 00114: reducing learning rate of group 0 to 4.3254e-05.


 23%|██▎       | 20/87 [00:00<00:00, 96.25it/s]
 34%|███▍      | 30/87 [00:00<00:00, 92.95it/s]
 46%|████▌     | 40/87 [00:00<00:00, 92.46it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 91.55it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 91.11it/s]
 80%|████████  | 70/87 [00:00<00:00, 90.99it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 91.01it/s]
100%|██████████| 87/87 [00:00<00:00, 90.99it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 118.47it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 96.64it/s]
 23%|██▎       | 20/87 [00:00<00:00, 95.67it/s]
 34%|███▍      | 30/87 [00:00<00:00, 95.58it/s]
 46%|████▌     | 40/87 [00:00<00:00, 93.01it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 92.44it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 93.20it/s]
 80%|████████  | 70/87 [00:00<00:00, 92.68it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 92.16it/s]
100%|██████████| 87/87 [00:00<00:00, 92.98it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 2

[2m[36m(func pid=15968)[0m Epoch 00125: reducing learning rate of group 0 to 2.1627e-05.


 11%|█▏        | 10/87 [00:00<00:00, 92.22it/s]
 23%|██▎       | 20/87 [00:00<00:00, 93.41it/s]
 34%|███▍      | 30/87 [00:00<00:00, 92.68it/s]
 46%|████▌     | 40/87 [00:00<00:00, 93.30it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 92.73it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 93.03it/s]
 80%|████████  | 70/87 [00:00<00:00, 92.86it/s]
100%|██████████| 87/87 [00:00<00:00, 93.00it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 92.08it/s]
 23%|██▎       | 20/87 [00:00<00:00, 93.30it/s]
 34%|███▍      | 30/87 [00:00<00:00, 90.34it/s]
 46%|████▌     | 40/87 [00:00<00:00, 87.03it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 85.86it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 87.55it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 88.91it/s]
 90%|████████▉ | 78/87 [00:00<00:00, 88.38it/s]
100%|██████████| 87/87 [00:00<00:00, 89.10it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 72%|███████▏  | 18/25 [00:00<00:00, 101.95it/s]
100%|██████████| 25/25 [00:00<00:00, 115.08it/s]
  0%|   

[2m[36m(func pid=15968)[0m Epoch 00131: reducing learning rate of group 0 to 1.0813e-05.


 10%|█         | 9/87 [00:00<00:00, 86.74it/s]
 22%|██▏       | 19/87 [00:00<00:00, 89.73it/s]
 32%|███▏      | 28/87 [00:00<00:00, 88.68it/s]
 44%|████▎     | 38/87 [00:00<00:00, 89.87it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 91.74it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 91.48it/s]
 78%|███████▊  | 68/87 [00:00<00:00, 92.68it/s]
 90%|████████▉ | 78/87 [00:00<00:00, 93.02it/s]
100%|██████████| 87/87 [00:00<00:00, 91.99it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 198.03it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 89.19it/s]
 22%|██▏       | 19/87 [00:00<00:00, 92.79it/s]
 33%|███▎      | 29/87 [00:00<00:00, 93.57it/s]
 45%|████▍     | 39/87 [00:00<00:00, 92.50it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 92.59it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 92.53it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 92.38it/s]
 91%|█████████ | 79/87 [00:00<00:00, 92.58it/s]
100%|██████████| 87/87 [00:00<00:00, 91.90it/s]
  0%|      

[2m[36m(func pid=15968)[0m Epoch 00137: reducing learning rate of group 0 to 5.4067e-06.


 11%|█▏        | 10/87 [00:00<00:00, 92.54it/s]
 23%|██▎       | 20/87 [00:00<00:00, 94.87it/s]
 34%|███▍      | 30/87 [00:00<00:00, 92.72it/s]
 46%|████▌     | 40/87 [00:00<00:00, 92.63it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 92.89it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 93.11it/s]
 80%|████████  | 70/87 [00:00<00:00, 92.15it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 91.44it/s]
100%|██████████| 87/87 [00:00<00:00, 92.52it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 84.60it/s]
 21%|██        | 18/87 [00:00<00:00, 83.85it/s]
 31%|███       | 27/87 [00:00<00:00, 80.16it/s]
 41%|████▏     | 36/87 [00:00<00:00, 82.94it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 85.22it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 86.32it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 87.31it/s]
 85%|████████▌ | 74/87 [00:00<00:00, 89.30it/s]
 95%|█████████▌| 83/87 [00:00<00:00, 86.91it/s]
100%|██████████| 87/87 [00:01<00:00, 86.18it/s]
  0%|      

[2m[36m(func pid=15968)[0m Epoch 00143: reducing learning rate of group 0 to 2.7034e-06.


 10%|█         | 9/87 [00:00<00:00, 87.96it/s]
 22%|██▏       | 19/87 [00:00<00:00, 90.23it/s]
 33%|███▎      | 29/87 [00:00<00:00, 91.11it/s]
 45%|████▍     | 39/87 [00:00<00:00, 91.18it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 91.69it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 92.03it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 90.82it/s]
 91%|█████████ | 79/87 [00:00<00:00, 90.82it/s]
100%|██████████| 87/87 [00:00<00:00, 91.24it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 195.97it/s]
 10%|█         | 9/87 [00:00<00:00, 88.66it/s]
 22%|██▏       | 19/87 [00:00<00:00, 90.78it/s]
 33%|███▎      | 29/87 [00:00<00:00, 92.38it/s]
 45%|████▍     | 39/87 [00:00<00:00, 90.81it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 90.57it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 89.55it/s]
 78%|███████▊  | 68/87 [00:00<00:00, 89.65it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 88.71it/s]
100%|██████████| 87/87 [00:00<00:00, 90.31it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████

[2m[36m(func pid=15968)[0m Epoch 00149: reducing learning rate of group 0 to 1.3517e-06.


 11%|█▏        | 10/87 [00:00<00:00, 93.55it/s]
 23%|██▎       | 20/87 [00:00<00:00, 92.97it/s]
 34%|███▍      | 30/87 [00:00<00:00, 91.23it/s]
 46%|████▌     | 40/87 [00:00<00:00, 88.89it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 89.84it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 90.79it/s]
 80%|████████  | 70/87 [00:00<00:00, 91.62it/s]
100%|██████████| 87/87 [00:00<00:00, 91.39it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 200.10it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  1%|          | 1/87 [00:00<00:24,  3.57it/s]
 13%|█▎        | 11/87 [00:00<00:02, 34.86it/s]
 24%|██▍       | 21/87 [00:00<00:01, 54.00it/s]
 34%|███▍      | 30/87 [00:00<00:00, 64.10it/s]
 47%|████▋     | 41/87 [00:00<00:00, 75.99it/s]
 60%|█████▉    | 52/87 [00:00<00:00, 83.91it/s]
100%|██████████| 87/87 [00:01<00:00, 74.94it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 88%|████████▊ | 22/25 [00:00<00:00, 217.90it/s]
100%|██████████| 25/25 [00:00<00:00, 126.95it/s]
  0%|          | 

[2m[36m(func pid=21008)[0m Epoch 00037: reducing learning rate of group 0 to 3.6607e-03.


 25%|██▌       | 22/87 [00:00<00:00, 108.78it/s]
 38%|███▊      | 33/87 [00:00<00:00, 105.82it/s]
 51%|█████     | 44/87 [00:00<00:00, 103.90it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 99.52it/s] 
 76%|███████▌  | 66/87 [00:00<00:00, 101.17it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 101.99it/s]
100%|██████████| 87/87 [00:00<00:00, 103.63it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 218.50it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 96.06it/s]
 24%|██▍       | 21/87 [00:00<00:00, 102.40it/s]
 37%|███▋      | 32/87 [00:00<00:00, 103.35it/s]
 49%|████▉     | 43/87 [00:00<00:00, 101.49it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 103.03it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 102.73it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 214.09it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 102.22it/s]
 25%|██▌       | 22/87 [00:00<00:00, 99.49it/s] 
 37%|███▋   

[2m[36m(func pid=21008)[0m Epoch 00055: reducing learning rate of group 0 to 1.8304e-03.


 13%|█▎        | 11/87 [00:00<00:00, 104.74it/s]
 25%|██▌       | 22/87 [00:00<00:00, 102.30it/s]
 38%|███▊      | 33/87 [00:00<00:00, 97.21it/s] 
 49%|████▉     | 43/87 [00:00<00:00, 96.07it/s]
 61%|██████    | 53/87 [00:00<00:00, 94.56it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 96.03it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 94.57it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 194.25it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 91.00it/s]
 23%|██▎       | 20/87 [00:00<00:00, 90.51it/s]
 34%|███▍      | 30/87 [00:00<00:00, 94.21it/s]
 46%|████▌     | 40/87 [00:00<00:00, 94.56it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 95.85it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 96.83it/s]
 80%|████████  | 70/87 [00:00<00:00, 97.57it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 96.68it/s]
100%|██████████| 87/87 [00:00<00:00, 95.57it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 195.08it/s]
  0%|         

[2m[36m(func pid=21008)[0m Epoch 00073: reducing learning rate of group 0 to 9.1518e-04.


 23%|██▎       | 20/87 [00:00<00:00, 94.89it/s]
 34%|███▍      | 30/87 [00:00<00:00, 94.66it/s]
 46%|████▌     | 40/87 [00:00<00:00, 96.30it/s]
 59%|█████▊    | 51/87 [00:00<00:00, 98.51it/s]
 71%|███████▏  | 62/87 [00:00<00:00, 101.15it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 102.71it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 193.01it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 102.90it/s]
 25%|██▌       | 22/87 [00:00<00:00, 97.37it/s] 
 38%|███▊      | 33/87 [00:00<00:00, 101.57it/s]
 51%|█████     | 44/87 [00:00<00:00, 99.81it/s] 
 63%|██████▎   | 55/87 [00:00<00:00, 97.31it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 94.62it/s]
 86%|████████▌ | 75/87 [00:00<00:00, 95.40it/s]
100%|██████████| 87/87 [00:00<00:00, 97.91it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 223.20it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 91.43it/s]
 23%|██▎       | 20/

[2m[36m(func pid=21008)[0m Epoch 00079: reducing learning rate of group 0 to 4.5759e-04.


 11%|█▏        | 10/87 [00:00<00:00, 96.07it/s]
 23%|██▎       | 20/87 [00:00<00:00, 97.25it/s]
 36%|███▌      | 31/87 [00:00<00:00, 100.62it/s]
 48%|████▊     | 42/87 [00:00<00:00, 97.45it/s] 
 61%|██████    | 53/87 [00:00<00:00, 99.82it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 97.70it/s]
 85%|████████▌ | 74/87 [00:00<00:00, 96.90it/s]
100%|██████████| 87/87 [00:00<00:00, 97.31it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 219.57it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 103.69it/s]
 25%|██▌       | 22/87 [00:00<00:00, 101.97it/s]
 51%|█████     | 44/87 [00:00<00:00, 102.24it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 102.41it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 103.32it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 103.75it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 88%|████████▊ | 22/25 [00:00<00:00, 215.83it/s]
100%|██████████| 25/25 [00:00<00:00, 209.22it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 

[2m[36m(func pid=21008)[0m Epoch 00085: reducing learning rate of group 0 to 2.2879e-04.


 13%|█▎        | 11/87 [00:00<00:00, 105.55it/s]
 25%|██▌       | 22/87 [00:00<00:00, 103.99it/s]
 38%|███▊      | 33/87 [00:00<00:00, 98.68it/s] 
 49%|████▉     | 43/87 [00:00<00:00, 95.91it/s]
 61%|██████    | 53/87 [00:00<00:00, 95.42it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 94.18it/s]
 85%|████████▌ | 74/87 [00:00<00:00, 97.53it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 179.31it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 85.07it/s]
 22%|██▏       | 19/87 [00:00<00:00, 89.84it/s]
 33%|███▎      | 29/87 [00:00<00:00, 94.31it/s]
 46%|████▌     | 40/87 [00:00<00:00, 99.32it/s]
 59%|█████▊    | 51/87 [00:00<00:00, 100.93it/s]
 71%|███████▏  | 62/87 [00:00<00:00, 103.71it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 104.29it/s]
100%|██████████| 87/87 [00:00<00:00, 101.04it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 223.37it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/

[2m[36m(func pid=21008)[0m Epoch 00091: reducing learning rate of group 0 to 1.1440e-04.


 13%|█▎        | 11/87 [00:00<00:00, 103.89it/s]
 25%|██▌       | 22/87 [00:00<00:00, 102.17it/s]
 38%|███▊      | 33/87 [00:00<00:00, 104.26it/s]
 51%|█████     | 44/87 [00:00<00:00, 104.03it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 105.77it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 107.14it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 107.81it/s]
100%|██████████| 87/87 [00:00<00:00, 106.25it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 221.47it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 106.70it/s]
 25%|██▌       | 22/87 [00:00<00:00, 104.02it/s]
 38%|███▊      | 33/87 [00:00<00:00, 102.80it/s]
 51%|█████     | 44/87 [00:00<00:00, 102.12it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 101.34it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 100.85it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 100.30it/s]
100%|██████████| 87/87 [00:00<00:00, 100.45it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 217.65it/s]
 

[2m[36m(func pid=21008)[0m Epoch 00097: reducing learning rate of group 0 to 5.7198e-05.


 24%|██▍       | 21/87 [00:00<00:00, 101.44it/s]
 37%|███▋      | 32/87 [00:00<00:00, 101.61it/s]
 49%|████▉     | 43/87 [00:00<00:00, 104.02it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 104.87it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 105.53it/s]
 87%|████████▋ | 76/87 [00:00<00:00, 104.76it/s]
100%|██████████| 87/87 [00:00<00:00, 104.05it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 220.86it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 105.49it/s]
 25%|██▌       | 22/87 [00:00<00:00, 102.51it/s]
 38%|███▊      | 33/87 [00:00<00:00, 103.57it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 105.73it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 105.38it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 104.84it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 208.79it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 95.30it/s]
 23%|██▎       | 20/87 [00:00<00:00, 91.85it/s]
 34%|███▍    

[2m[36m(func pid=21008)[0m Epoch 00107: reducing learning rate of group 0 to 2.8599e-05.


 25%|██▌       | 22/87 [00:00<00:00, 99.98it/s] 
 37%|███▋      | 32/87 [00:00<00:00, 98.80it/s]
 49%|████▉     | 43/87 [00:00<00:00, 100.94it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 101.24it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 103.79it/s]
 87%|████████▋ | 76/87 [00:00<00:00, 103.76it/s]
100%|██████████| 87/87 [00:00<00:00, 102.31it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 206.19it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 90.78it/s]
 23%|██▎       | 20/87 [00:00<00:00, 94.16it/s]
 34%|███▍      | 30/87 [00:00<00:00, 96.48it/s]
 47%|████▋     | 41/87 [00:00<00:00, 101.39it/s]
 60%|█████▉    | 52/87 [00:00<00:00, 101.01it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 102.74it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 123.04it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 99.69it/s]
 24%|██▍       | 21/87 [00:00<00:00, 103.35it/s]
 37%|███▋      |

[2m[36m(func pid=21008)[0m Epoch 00131: reducing learning rate of group 0 to 1.4300e-05.


 13%|█▎        | 11/87 [00:00<00:00, 100.83it/s]
 25%|██▌       | 22/87 [00:00<00:00, 102.59it/s]
 38%|███▊      | 33/87 [00:00<00:00, 100.98it/s]
 51%|█████     | 44/87 [00:00<00:00, 101.33it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 100.50it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 100.96it/s]
100%|██████████| 87/87 [00:00<00:00, 102.35it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 228.69it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 100.51it/s]
 25%|██▌       | 22/87 [00:00<00:00, 100.70it/s]
 38%|███▊      | 33/87 [00:00<00:00, 103.53it/s]
 51%|█████     | 44/87 [00:00<00:00, 103.71it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 105.00it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 104.14it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 105.07it/s]
100%|██████████| 87/87 [00:00<00:00, 98.48it/s] 
  0%|          | 0/25 [00:00<?, ?it/s]
 64%|██████▍   | 16/25 [00:00<00:00, 101.99it/s]
100%|██████████| 25/25 [00:00<00:00, 124.99it/s]
 

[2m[36m(func pid=21008)[0m Epoch 00137: reducing learning rate of group 0 to 7.1498e-06.


 25%|██▌       | 22/87 [00:00<00:00, 102.93it/s]
 38%|███▊      | 33/87 [00:00<00:00, 100.15it/s]
 51%|█████     | 44/87 [00:00<00:00, 98.36it/s] 
 62%|██████▏   | 54/87 [00:00<00:00, 94.95it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 95.03it/s]
 86%|████████▌ | 75/87 [00:00<00:00, 98.44it/s]
100%|██████████| 87/87 [00:00<00:00, 99.91it/s] 
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 225.17it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 104.11it/s]
 25%|██▌       | 22/87 [00:00<00:00, 106.89it/s]
 38%|███▊      | 33/87 [00:00<00:00, 106.65it/s]
 51%|█████     | 44/87 [00:00<00:00, 106.31it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 107.52it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 32%|███▏      | 8/25 [00:00<00:00, 68.73it/s]
100%|██████████| 25/25 [00:00<00:00, 126.89it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 93.25it/s]
 24%|██▍       | 21/87 [00:00<00:00, 102.80it/s]
 37%|███▋      | 

[2m[36m(func pid=21008)[0m Epoch 00143: reducing learning rate of group 0 to 3.5749e-06.


  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 102.11it/s]
 25%|██▌       | 22/87 [00:00<00:00, 103.02it/s]
 38%|███▊      | 33/87 [00:00<00:00, 103.35it/s]
 51%|█████     | 44/87 [00:00<00:00, 105.87it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 106.52it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 108.17it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 209.20it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 93.66it/s]
 24%|██▍       | 21/87 [00:00<00:00, 98.59it/s]
 38%|███▊      | 33/87 [00:00<00:00, 104.37it/s]
 51%|█████     | 44/87 [00:00<00:00, 105.70it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 107.69it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 106.83it/s]
 90%|████████▉ | 78/87 [00:00<00:00, 106.68it/s]
100%|██████████| 87/87 [00:00<00:00, 105.62it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 125.86it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 

[2m[36m(func pid=4728)[0m Epoch 00035: reducing learning rate of group 0 to 7.9993e-04.


  8%|▊         | 13/173 [00:00<00:01, 120.50it/s]
 15%|█▌        | 26/173 [00:00<00:01, 120.23it/s]
 23%|██▎       | 39/173 [00:00<00:01, 122.94it/s]
 30%|███       | 52/173 [00:00<00:00, 122.84it/s]
 38%|███▊      | 65/173 [00:00<00:00, 120.65it/s]
 45%|████▌     | 78/173 [00:00<00:00, 119.84it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 122.22it/s]
 60%|██████    | 104/173 [00:00<00:00, 121.65it/s]
 68%|██████▊   | 117/173 [00:00<00:00, 123.02it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 125.26it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 126.03it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 126.49it/s]
100%|██████████| 173/173 [00:01<00:00, 123.84it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 18%|█▊        | 9/50 [00:00<00:00, 86.56it/s]
  8%|▊         | 13/173 [00:00<00:01, 128.53it/s]
 15%|█▌        | 26/173 [00:00<00:01, 127.14it/s]
 23%|██▎       | 39/173 [00:00<00:01, 118.18it/s]
 30%|███       | 52/173 [00:00<00:01, 120.93it/s]
 38%|███▊      | 66/173 [00:00<00:00, 124.06it/s]
 46%|███

[2m[36m(func pid=4728)[0m Epoch 00047: reducing learning rate of group 0 to 3.9996e-04.


  8%|▊         | 13/173 [00:00<00:01, 123.90it/s]
 15%|█▌        | 26/173 [00:00<00:01, 119.71it/s]
 23%|██▎       | 39/173 [00:00<00:01, 121.48it/s]
 30%|███       | 52/173 [00:00<00:01, 120.23it/s]
 38%|███▊      | 65/173 [00:00<00:00, 123.58it/s]
 46%|████▌     | 79/173 [00:00<00:00, 127.21it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 126.91it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 128.52it/s]
 69%|██████▉   | 120/173 [00:00<00:00, 130.44it/s]
 77%|███████▋  | 134/173 [00:01<00:00, 131.08it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 128.68it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 125.83it/s]
100%|██████████| 173/173 [00:01<00:00, 125.48it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 66%|██████▌   | 33/50 [00:00<00:00, 323.40it/s]
100%|██████████| 50/50 [00:00<00:00, 320.08it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 121.71it/s]
 15%|█▌        | 26/173 [00:00<00:01, 126.24it/s]
 23%|██▎       | 39/173 [00:00<00:01, 123.06it/s]
 31%|███       | 

[2m[36m(func pid=4728)[0m Epoch 00074: reducing learning rate of group 0 to 9.9991e-05.


  8%|▊         | 13/173 [00:00<00:01, 120.78it/s]
 15%|█▌        | 26/173 [00:00<00:01, 124.52it/s]
 23%|██▎       | 39/173 [00:00<00:01, 118.95it/s]
 30%|███       | 52/173 [00:00<00:01, 120.74it/s]
 38%|███▊      | 65/173 [00:00<00:00, 120.22it/s]
 45%|████▌     | 78/173 [00:00<00:00, 117.98it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 122.93it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 126.43it/s]
 69%|██████▉   | 119/173 [00:00<00:00, 123.89it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 124.57it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 124.87it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 121.95it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 264.89it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 112.00it/s]
 14%|█▍        | 24/173 [00:00<00:01, 111.29it/s]
 21%|██▏       | 37/173 [00:00<00:01, 117.06it/s]
 28%|██▊       | 49/173 [00:00<00:01, 88.84it/s] 
 36%|███▌      | 62/173 [00:00<00:01, 100.15it/s]
 43%|████▎     | 

[2m[36m(func pid=4728)[0m Epoch 00080: reducing learning rate of group 0 to 4.9995e-05.


 15%|█▌        | 26/173 [00:00<00:01, 129.58it/s]
 23%|██▎       | 39/173 [00:00<00:01, 93.53it/s] 
 30%|███       | 52/173 [00:00<00:01, 103.23it/s]
 38%|███▊      | 66/173 [00:00<00:00, 112.92it/s]
 46%|████▌     | 79/173 [00:00<00:00, 116.35it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 120.02it/s]
 61%|██████    | 105/173 [00:00<00:00, 121.52it/s]
 68%|██████▊   | 118/173 [00:01<00:00, 122.27it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 124.13it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 124.86it/s]
 68%|██████▊   | 34/50 [00:00<00:00, 333.26it/s]
100%|██████████| 50/50 [00:00<00:00, 333.06it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 111.61it/s]
 14%|█▍        | 24/173 [00:00<00:01, 114.09it/s]
 21%|██        | 36/173 [00:00<00:01, 114.99it/s]
 28%|██▊       | 48/173 [00:00<00:01, 115.12it/s]
 35%|███▍      | 60/173 [00:00<00:00, 116.03it/s]
 42%|████▏     | 73/173 [00:00<00:00, 118.57it/s]
 57%|█████▋    | 98/173 [00:00<00:00, 120.51it/s]
 64%|███

[2m[36m(func pid=4728)[0m Epoch 00092: reducing learning rate of group 0 to 2.4998e-05.


  6%|▌         | 10/173 [00:00<00:01, 99.98it/s]
 13%|█▎        | 23/173 [00:00<00:01, 117.54it/s]
 20%|██        | 35/173 [00:00<00:01, 117.57it/s]
 27%|██▋       | 47/173 [00:00<00:01, 115.94it/s]
 34%|███▍      | 59/173 [00:00<00:00, 116.48it/s]
 42%|████▏     | 73/173 [00:00<00:00, 121.40it/s]
 50%|█████     | 87/173 [00:00<00:00, 123.90it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 125.72it/s]
 66%|██████▌   | 114/173 [00:00<00:00, 127.87it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 125.72it/s]
 81%|████████  | 140/173 [00:01<00:00, 124.85it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 122.57it/s]
100%|██████████| 173/173 [00:01<00:00, 120.97it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 68%|██████▊   | 34/50 [00:00<00:00, 339.25it/s]
  5%|▌         | 9/173 [00:00<00:01, 82.36it/s]
 11%|█         | 19/173 [00:00<00:01, 88.38it/s]
 17%|█▋        | 30/173 [00:00<00:01, 96.95it/s]
 24%|██▍       | 42/173 [00:00<00:01, 104.85it/s]
 31%|███       | 54/173 [00:00<00:01, 108.35it/s]
 38%|███▊  

[2m[36m(func pid=4728)[0m Epoch 00098: reducing learning rate of group 0 to 1.2499e-05.


  6%|▋         | 11/173 [00:00<00:01, 104.76it/s]
 13%|█▎        | 23/173 [00:00<00:01, 109.01it/s]
 20%|██        | 35/173 [00:00<00:01, 111.55it/s]
 27%|██▋       | 47/173 [00:00<00:01, 110.17it/s]
 35%|███▍      | 60/173 [00:00<00:00, 114.04it/s]
 42%|████▏     | 72/173 [00:00<00:00, 115.45it/s]
 49%|████▊     | 84/173 [00:00<00:00, 114.92it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 114.85it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 113.99it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 113.14it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 112.49it/s]
 90%|█████████ | 156/173 [00:01<00:00, 110.82it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 316.32it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 116.54it/s]
 14%|█▍        | 24/173 [00:00<00:01, 112.59it/s]
 21%|██        | 36/173 [00:00<00:01, 109.77it/s]
 28%|██▊       | 48/173 [00:00<00:01, 112.54it/s]
 35%|███▌      | 61/173 [00:00<00:00, 115.86it/s]
 43%|████▎     | 

[2m[36m(func pid=4728)[0m Epoch 00108: reducing learning rate of group 0 to 6.2494e-06.


  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 93.40it/s]
 12%|█▏        | 21/173 [00:00<00:01, 97.75it/s]
 19%|█▉        | 33/173 [00:00<00:01, 106.31it/s]
 26%|██▌       | 45/173 [00:00<00:01, 109.71it/s]
 40%|███▉      | 69/173 [00:00<00:00, 112.48it/s]
 47%|████▋     | 81/173 [00:00<00:00, 109.12it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 108.03it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 106.42it/s]
 66%|██████▌   | 114/173 [00:01<00:00, 104.32it/s]
 72%|███████▏  | 125/173 [00:01<00:00, 102.13it/s]
 79%|███████▊  | 136/173 [00:01<00:00, 102.94it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 103.17it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 98.88it/s] 
100%|██████████| 173/173 [00:01<00:00, 104.58it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 322.30it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 111.43it/s]
 14%|█▍        | 25/173 [00:00<00:01, 116.57it/s]
 21%|██▏       | 37/173 [00

[2m[36m(func pid=4728)[0m Epoch 00114: reducing learning rate of group 0 to 3.1247e-06.


  6%|▋         | 11/173 [00:00<00:01, 107.71it/s]
 13%|█▎        | 23/173 [00:00<00:01, 112.29it/s]
 20%|██        | 35/173 [00:00<00:01, 106.55it/s]
 27%|██▋       | 46/173 [00:00<00:01, 102.87it/s]
 34%|███▎      | 58/173 [00:00<00:01, 105.89it/s]
 41%|████      | 71/173 [00:00<00:00, 111.09it/s]
 48%|████▊     | 83/173 [00:00<00:00, 112.69it/s]
 55%|█████▍    | 95/173 [00:00<00:00, 113.58it/s]
 62%|██████▏   | 107/173 [00:00<00:00, 115.10it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 115.38it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 113.30it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 92.96it/s] 
 90%|█████████ | 156/173 [00:01<00:00, 100.30it/s]
100%|██████████| 173/173 [00:01<00:00, 107.19it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 311.70it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 116.40it/s]
 14%|█▍        | 25/173 [00:00<00:01, 118.96it/s]
 21%|██▏       | 37/173 [00:00<00:01, 115.98it/s]
 28%|██▊       |

[2m[36m(func pid=4728)[0m Epoch 00120: reducing learning rate of group 0 to 1.5624e-06.


  6%|▋         | 11/173 [00:00<00:01, 106.50it/s]
 13%|█▎        | 23/173 [00:00<00:01, 111.96it/s]
 20%|██        | 35/173 [00:00<00:01, 114.49it/s]
 27%|██▋       | 47/173 [00:00<00:01, 112.35it/s]
 34%|███▍      | 59/173 [00:00<00:01, 113.38it/s]
 41%|████      | 71/173 [00:00<00:00, 114.76it/s]
 48%|████▊     | 83/173 [00:00<00:00, 114.68it/s]
 55%|█████▍    | 95/173 [00:00<00:00, 114.80it/s]
 62%|██████▏   | 107/173 [00:00<00:00, 112.70it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 106.90it/s]
 75%|███████▌  | 130/173 [00:01<00:00, 103.52it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 103.09it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 101.73it/s]
 94%|█████████▍| 163/173 [00:01<00:00, 102.11it/s]
100%|██████████| 173/173 [00:01<00:00, 107.05it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 56%|█████▌    | 28/50 [00:00<00:00, 271.84it/s]
100%|██████████| 50/50 [00:00<00:00, 224.83it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▌         | 9/173 [00:00<00:01, 85.85it/s]
 11%|█         | 1

[2m[36m(func pid=4728)[0m Epoch 00126: reducing learning rate of group 0 to 7.8118e-07.


  6%|▋         | 11/173 [00:00<00:01, 99.95it/s]
 13%|█▎        | 22/173 [00:00<00:01, 105.28it/s]
 19%|█▉        | 33/173 [00:00<00:01, 107.21it/s]
 25%|██▌       | 44/173 [00:00<00:01, 105.31it/s]
 32%|███▏      | 55/173 [00:00<00:01, 104.54it/s]
 45%|████▌     | 78/173 [00:00<00:00, 109.10it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 109.70it/s]
 59%|█████▉    | 102/173 [00:00<00:00, 110.47it/s]
 66%|██████▌   | 114/173 [00:01<00:00, 110.41it/s]
 73%|███████▎  | 126/173 [00:01<00:00, 106.95it/s]
 79%|███████▉  | 137/173 [00:01<00:00, 105.44it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 103.78it/s]
 92%|█████████▏| 159/173 [00:01<00:00, 103.38it/s]
100%|██████████| 173/173 [00:01<00:00, 106.09it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 305.47it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 102.98it/s]
 13%|█▎        | 22/173 [00:00<00:01, 104.86it/s]
 20%|█▉        | 34/173 [00:00<00:01, 107.06it/s]
 26%|██▌       |

[2m[36m(func pid=4728)[0m Epoch 00132: reducing learning rate of group 0 to 3.9059e-07.


  7%|▋         | 12/173 [00:00<00:01, 116.34it/s]
 14%|█▍        | 24/173 [00:00<00:01, 115.68it/s]
 21%|██        | 36/173 [00:00<00:01, 111.72it/s]
 28%|██▊       | 48/173 [00:00<00:01, 111.82it/s]
 35%|███▍      | 60/173 [00:00<00:01, 111.20it/s]
 42%|████▏     | 72/173 [00:00<00:00, 111.94it/s]
 49%|████▉     | 85/173 [00:00<00:00, 116.09it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 121.94it/s]
 65%|██████▍   | 112/173 [00:00<00:00, 123.37it/s]
 72%|███████▏  | 125/173 [00:01<00:00, 123.73it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 122.44it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 122.29it/s]
 95%|█████████▍| 164/173 [00:01<00:00, 118.32it/s]
100%|██████████| 173/173 [00:01<00:00, 117.42it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 330.05it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 123.04it/s]
 15%|█▌        | 26/173 [00:00<00:01, 121.25it/s]
 23%|██▎       | 39/173 [00:00<00:01, 124.09it/s]
 30%|███       |

[2m[36m(func pid=4728)[0m Epoch 00138: reducing learning rate of group 0 to 1.9529e-07.


 12%|█▏        | 20/173 [00:00<00:01, 86.85it/s]
 18%|█▊        | 31/173 [00:00<00:01, 94.31it/s]
 24%|██▍       | 42/173 [00:00<00:01, 98.26it/s]
 30%|███       | 52/173 [00:00<00:01, 95.21it/s]
 37%|███▋      | 64/173 [00:00<00:01, 100.21it/s]
 43%|████▎     | 75/173 [00:00<00:00, 102.08it/s]
 50%|█████     | 87/173 [00:00<00:00, 105.75it/s]
 57%|█████▋    | 98/173 [00:00<00:00, 103.97it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 107.79it/s]
 71%|███████   | 122/173 [00:01<00:00, 110.69it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 114.60it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 114.44it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 117.70it/s]
100%|██████████| 173/173 [00:01<00:00, 107.80it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 70%|███████   | 35/50 [00:00<00:00, 337.98it/s]
100%|██████████| 50/50 [00:00<00:00, 335.84it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 121.44it/s]
 15%|█▌        | 26/173 [00:00<00:01, 121.57it/s]
 23%|██▎       | 39/1

[2m[36m(func pid=4728)[0m Epoch 00144: reducing learning rate of group 0 to 9.7647e-08.


 15%|█▌        | 26/173 [00:00<00:01, 118.24it/s]
 22%|██▏       | 38/173 [00:00<00:01, 86.76it/s] 
 30%|███       | 52/173 [00:00<00:01, 100.50it/s]
 38%|███▊      | 65/173 [00:00<00:01, 107.30it/s]
 45%|████▍     | 77/173 [00:00<00:00, 110.92it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 117.39it/s]
 61%|██████    | 105/173 [00:00<00:00, 121.27it/s]
 68%|██████▊   | 118/173 [00:01<00:00, 123.23it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 122.47it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 121.65it/s]
 91%|█████████ | 157/173 [00:01<00:00, 119.50it/s]
100%|██████████| 173/173 [00:01<00:00, 114.51it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 312.89it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 116.01it/s]
 14%|█▍        | 25/173 [00:00<00:01, 120.50it/s]
 23%|██▎       | 39/173 [00:00<00:01, 125.40it/s]
 30%|███       | 52/173 [00:00<00:00, 125.69it/s]
 38%|███▊      | 66/173 [00:00<00:00, 127.08it/s]
 46%|████▌     |

[2m[36m(func pid=4728)[0m Epoch 00150: reducing learning rate of group 0 to 4.8824e-08.


100%|██████████| 25/25 [00:00<00:00, 287.93it/s]
2023-03-24 20:00:08,752	ERROR trial_runner.py:1088 -- Trial train_model_f5d07_00010: Error processing event.
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=4728, ip=127.0.0.1, repr=func)
  File "python\ray\_raylet.pyx", line 830, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 834, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 780, in ray._raylet.execute_task.function_executor
  File "c:\Code\hydro-ml\my_env\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Code\hydro-ml\my_env\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Code\hydro-ml\my_env\lib\site-packages\ray\tune\trainable\trainable.py", line 367, in train
    raise skipped from exception_cause(skipped)
  File "c:\Code\hydro

[2m[36m(func pid=1624)[0m Epoch 00019: reducing learning rate of group 0 to 2.9733e-03.


  8%|▊         | 13/173 [00:00<00:01, 124.67it/s]
 15%|█▌        | 26/173 [00:00<00:01, 120.74it/s]
 23%|██▎       | 39/173 [00:00<00:01, 121.77it/s]
 30%|███       | 52/173 [00:00<00:01, 116.05it/s]
 37%|███▋      | 64/173 [00:00<00:00, 113.20it/s]
 44%|████▍     | 76/173 [00:00<00:00, 112.59it/s]
 51%|█████     | 88/173 [00:00<00:00, 112.98it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 112.12it/s]
 65%|██████▍   | 112/173 [00:00<00:00, 113.12it/s]
 72%|███████▏  | 125/173 [00:01<00:00, 116.33it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 118.91it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 123.73it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 125.18it/s]
100%|██████████| 173/173 [00:01<00:00, 118.72it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 72%|███████▏  | 36/50 [00:00<00:00, 355.04it/s]
100%|██████████| 50/50 [00:00<00:00, 359.61it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 132.11it/s]
 16%|█▌        | 28/173 [00:00<00:01, 133.24it/s]
 24%|██▍       |

[2m[36m(func pid=1624)[0m Epoch 00045: reducing learning rate of group 0 to 7.4332e-04.


 16%|█▌        | 27/173 [00:00<00:01, 130.90it/s]
 24%|██▎       | 41/173 [00:00<00:01, 130.81it/s]
 32%|███▏      | 55/173 [00:00<00:00, 129.72it/s]
 39%|███▉      | 68/173 [00:00<00:00, 125.80it/s]
 47%|████▋     | 81/173 [00:00<00:00, 125.36it/s]
 55%|█████▍    | 95/173 [00:00<00:00, 127.17it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 128.75it/s]
 79%|███████▊  | 136/173 [00:01<00:00, 129.52it/s]
 86%|████████▌ | 149/173 [00:01<00:00, 128.18it/s]
 94%|█████████▎| 162/173 [00:01<00:00, 127.63it/s]
100%|██████████| 173/173 [00:01<00:00, 127.31it/s]
 74%|███████▍  | 37/50 [00:00<00:00, 365.16it/s]
100%|██████████| 50/50 [00:00<00:00, 351.38it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 114.07it/s]
 14%|█▍        | 24/173 [00:00<00:01, 113.15it/s]
 21%|██        | 36/173 [00:00<00:01, 112.02it/s]
 28%|██▊       | 48/173 [00:00<00:01, 113.09it/s]
 36%|███▌      | 62/173 [00:00<00:00, 119.59it/s]
 43%|████▎     | 75/173 [00:00<00:01, 96.43it/s] 
 51%|██

[2m[36m(func pid=1624)[0m Epoch 00051: reducing learning rate of group 0 to 3.7166e-04.


 15%|█▌        | 26/173 [00:00<00:01, 125.09it/s]
 23%|██▎       | 39/173 [00:00<00:01, 125.38it/s]
 30%|███       | 52/173 [00:00<00:00, 126.95it/s]
 38%|███▊      | 65/173 [00:00<00:01, 99.14it/s] 
 46%|████▌     | 79/173 [00:00<00:00, 109.41it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 113.65it/s]
 61%|██████    | 105/173 [00:00<00:00, 118.10it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 122.06it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 123.55it/s]
 84%|████████▍ | 146/173 [00:01<00:00, 126.40it/s]
 99%|█████████▉| 172/173 [00:01<00:00, 125.63it/s]
100%|██████████| 173/173 [00:01<00:00, 120.39it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 348.18it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 115.24it/s]
 22%|██▏       | 38/173 [00:00<00:01, 124.50it/s]
 29%|██▉       | 51/173 [00:00<00:01, 121.80it/s]
 37%|███▋      | 64/173 [00:00<00:00, 123.87it/s]
 45%|████▍     | 77/173 [00:00<00:00, 125.46it/s]
 53%|█████▎    |

[2m[36m(func pid=1624)[0m Epoch 00057: reducing learning rate of group 0 to 1.8583e-04.


  6%|▌         | 10/173 [00:00<00:01, 93.85it/s]
 12%|█▏        | 20/173 [00:00<00:01, 92.55it/s]
 18%|█▊        | 32/173 [00:00<00:01, 103.22it/s]
 25%|██▌       | 44/173 [00:00<00:01, 108.42it/s]
 32%|███▏      | 55/173 [00:00<00:01, 108.91it/s]
 39%|███▊      | 67/173 [00:00<00:00, 108.29it/s]
 46%|████▌     | 79/173 [00:00<00:00, 109.74it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 103.06it/s]
 59%|█████▉    | 102/173 [00:00<00:00, 106.15it/s]
 66%|██████▌   | 114/173 [00:01<00:00, 109.48it/s]
 73%|███████▎  | 126/173 [00:01<00:00, 111.90it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 110.96it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 113.89it/s]
 95%|█████████▍| 164/173 [00:01<00:00, 115.84it/s]
100%|██████████| 173/173 [00:01<00:00, 110.17it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 357.13it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 118.87it/s]
 14%|█▍        | 25/173 [00:00<00:01, 124.81it/s]
 22%|██▏       | 

[2m[36m(func pid=1624)[0m Epoch 00063: reducing learning rate of group 0 to 9.2915e-05.


 13%|█▎        | 23/173 [00:00<00:01, 113.31it/s]
 20%|██        | 35/173 [00:00<00:01, 113.64it/s]
 28%|██▊       | 48/173 [00:00<00:01, 117.43it/s]
 35%|███▍      | 60/173 [00:00<00:00, 118.28it/s]
 42%|████▏     | 73/173 [00:00<00:00, 121.76it/s]
 50%|████▉     | 86/173 [00:00<00:00, 122.76it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 125.67it/s]
 66%|██████▌   | 114/173 [00:00<00:00, 127.13it/s]
 74%|███████▍  | 128/173 [00:01<00:00, 129.82it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 130.50it/s]
 90%|█████████ | 156/173 [00:01<00:00, 129.07it/s]
100%|██████████| 173/173 [00:01<00:00, 124.98it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 76%|███████▌  | 38/50 [00:00<00:00, 370.28it/s]
100%|██████████| 50/50 [00:00<00:00, 364.84it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 123.80it/s]
 16%|█▌        | 27/173 [00:00<00:01, 130.34it/s]
 24%|██▎       | 41/173 [00:00<00:01, 127.48it/s]
 39%|███▊      | 67/173 [00:00<00:00, 127.40it/s]
 46%|████▌     | 

[2m[36m(func pid=1624)[0m Epoch 00069: reducing learning rate of group 0 to 4.6458e-05.


  6%|▌         | 10/173 [00:00<00:01, 95.11it/s]
 12%|█▏        | 20/173 [00:00<00:01, 94.04it/s]
 18%|█▊        | 31/173 [00:00<00:01, 97.31it/s]
 32%|███▏      | 56/173 [00:00<00:01, 111.57it/s]
 40%|███▉      | 69/173 [00:00<00:00, 115.37it/s]
 47%|████▋     | 81/173 [00:00<00:00, 116.40it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 119.43it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 118.56it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 119.61it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 118.94it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 118.97it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 118.82it/s]
 97%|█████████▋| 167/173 [00:01<00:00, 117.47it/s]
100%|██████████| 173/173 [00:01<00:00, 114.26it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 72%|███████▏  | 36/50 [00:00<00:00, 352.61it/s]
100%|██████████| 50/50 [00:00<00:00, 345.01it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 109.72it/s]
 13%|█▎        | 23/173 [00:00<00:01, 111.23it/s]
 21%|██        | 36

[2m[36m(func pid=1624)[0m Epoch 00075: reducing learning rate of group 0 to 2.3229e-05.


 13%|█▎        | 23/173 [00:00<00:01, 109.43it/s]
 20%|█▉        | 34/173 [00:00<00:01, 109.50it/s]
 27%|██▋       | 46/173 [00:00<00:01, 113.01it/s]
 34%|███▎      | 58/173 [00:00<00:01, 114.71it/s]
 41%|████      | 71/173 [00:00<00:00, 117.20it/s]
 48%|████▊     | 83/173 [00:00<00:00, 118.09it/s]
 55%|█████▍    | 95/173 [00:00<00:00, 117.91it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 118.65it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 114.77it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 112.89it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 113.29it/s]
 90%|█████████ | 156/173 [00:01<00:00, 110.97it/s]
100%|██████████| 173/173 [00:01<00:00, 113.30it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 44%|████▍     | 22/50 [00:00<00:00, 145.93it/s]
100%|██████████| 50/50 [00:00<00:00, 213.90it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 109.37it/s]
 13%|█▎        | 23/173 [00:00<00:01, 112.88it/s]
 20%|██        | 35/173 [00:00<00:01, 115.68it/s]
 28%|██▊       | 

[2m[36m(func pid=1624)[0m Epoch 00081: reducing learning rate of group 0 to 1.1614e-05.


 13%|█▎        | 23/173 [00:00<00:01, 111.54it/s]
 20%|██        | 35/173 [00:00<00:01, 113.83it/s]
 27%|██▋       | 47/173 [00:00<00:01, 105.20it/s]
 34%|███▎      | 58/173 [00:00<00:01, 97.68it/s] 
 39%|███▉      | 68/173 [00:00<00:01, 97.82it/s]
 46%|████▌     | 79/173 [00:00<00:00, 100.78it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 101.98it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 103.86it/s]
 65%|██████▌   | 113/173 [00:01<00:00, 108.03it/s]
 72%|███████▏  | 125/173 [00:01<00:00, 110.51it/s]
 79%|███████▉  | 137/173 [00:01<00:00, 112.34it/s]
 86%|████████▌ | 149/173 [00:01<00:00, 111.64it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 112.37it/s]
100%|██████████| 173/173 [00:01<00:00, 108.15it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 66%|██████▌   | 33/50 [00:00<00:00, 323.82it/s]
100%|██████████| 50/50 [00:00<00:00, 327.01it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 120.53it/s]
 15%|█▌        | 26/173 [00:00<00:01, 116.56it/s]
 22%|██▏       | 

[2m[36m(func pid=1624)[0m Epoch 00087: reducing learning rate of group 0 to 5.8072e-06.


  8%|▊         | 13/173 [00:00<00:01, 121.89it/s]
 15%|█▌        | 26/173 [00:00<00:01, 101.57it/s]
 23%|██▎       | 40/173 [00:00<00:01, 114.77it/s]
 31%|███       | 53/173 [00:00<00:01, 118.52it/s]
 38%|███▊      | 66/173 [00:00<00:00, 121.90it/s]
 46%|████▌     | 80/173 [00:00<00:00, 124.72it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 122.54it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 120.69it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 118.26it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 118.38it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 116.77it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 116.76it/s]
100%|██████████| 173/173 [00:01<00:00, 117.85it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 337.72it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 116.48it/s]
 14%|█▍        | 25/173 [00:00<00:01, 124.31it/s]
 22%|██▏       | 38/173 [00:00<00:01, 118.36it/s]
 29%|██▉       | 51/173 [00:00<00:01, 119.22it/s]
 37%|███▋      |

[2m[36m(func pid=1624)[0m Epoch 00093: reducing learning rate of group 0 to 2.9036e-06.


  6%|▋         | 11/173 [00:00<00:01, 103.84it/s]
 13%|█▎        | 22/173 [00:00<00:01, 97.18it/s] 
 19%|█▉        | 33/173 [00:00<00:01, 100.11it/s]
 27%|██▋       | 46/173 [00:00<00:01, 108.47it/s]
 34%|███▍      | 59/173 [00:00<00:01, 113.48it/s]
 41%|████      | 71/173 [00:00<00:00, 111.62it/s]
 48%|████▊     | 83/173 [00:00<00:00, 113.74it/s]
 55%|█████▍    | 95/173 [00:00<00:00, 114.33it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 118.62it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 121.38it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 122.34it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 121.98it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 119.72it/s]
100%|██████████| 173/173 [00:01<00:00, 115.10it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 293.95it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 93.47it/s]
 13%|█▎        | 22/173 [00:00<00:01, 105.16it/s]
 20%|█▉        | 34/173 [00:00<00:01, 108.68it/s]
 27%|██▋       | 

[2m[36m(func pid=1624)[0m Epoch 00099: reducing learning rate of group 0 to 1.4518e-06.


  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 115.21it/s]
 14%|█▍        | 24/173 [00:00<00:01, 116.52it/s]
 21%|██        | 36/173 [00:00<00:01, 117.74it/s]
 36%|███▋      | 63/173 [00:00<00:00, 123.39it/s]
 45%|████▍     | 77/173 [00:00<00:00, 126.59it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 126.66it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 127.18it/s]
 67%|██████▋   | 116/173 [00:00<00:00, 125.69it/s]
 75%|███████▍  | 129/173 [00:01<00:00, 124.10it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 122.56it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 124.04it/s]
100%|██████████| 173/173 [00:01<00:00, 123.57it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 340.00it/s]
  8%|▊         | 13/173 [00:00<00:01, 129.87it/s]
 15%|█▌        | 26/173 [00:00<00:01, 127.66it/s]
 23%|██▎       | 39/173 [00:00<00:01, 123.58it/s]
 30%|███       | 52/173 [00:00<00:00, 123.27it/s]
 38%|███▊      | 65/173 [00:00<00:00, 120.95it/s]
 45%|████▌     |

[2m[36m(func pid=1624)[0m Epoch 00105: reducing learning rate of group 0 to 7.2590e-07.


 14%|█▍        | 24/173 [00:00<00:01, 110.28it/s]
 21%|██        | 36/173 [00:00<00:01, 114.49it/s]
 28%|██▊       | 48/173 [00:00<00:01, 114.19it/s]
 35%|███▍      | 60/173 [00:00<00:00, 115.65it/s]
 42%|████▏     | 72/173 [00:00<00:00, 116.62it/s]
 49%|████▉     | 85/173 [00:00<00:00, 119.04it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 95.20it/s] 
 64%|██████▎   | 110/173 [00:01<00:00, 102.84it/s]
 71%|███████   | 123/173 [00:01<00:00, 108.12it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 110.12it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 115.38it/s]
100%|██████████| 173/173 [00:01<00:00, 113.34it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 373.81it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 125.33it/s]
 15%|█▌        | 26/173 [00:00<00:01, 125.11it/s]
 23%|██▎       | 39/173 [00:00<00:01, 126.80it/s]
 30%|███       | 52/173 [00:00<00:00, 123.08it/s]
 38%|███▊      | 65/173 [00:00<00:00, 117.77it/s]
 45%|████▍     | 

[2m[36m(func pid=1624)[0m Epoch 00111: reducing learning rate of group 0 to 3.6295e-07.


  8%|▊         | 13/173 [00:00<00:01, 121.79it/s]
 15%|█▌        | 26/173 [00:00<00:01, 115.17it/s]
 23%|██▎       | 39/173 [00:00<00:01, 120.52it/s]
 30%|███       | 52/173 [00:00<00:00, 123.13it/s]
 38%|███▊      | 65/173 [00:00<00:00, 123.01it/s]
 46%|████▌     | 79/173 [00:00<00:00, 125.60it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 128.12it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 128.54it/s]
 69%|██████▉   | 119/173 [00:00<00:00, 121.02it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 121.91it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 120.41it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 122.75it/s]
100%|██████████| 173/173 [00:01<00:00, 123.07it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 74%|███████▍  | 37/50 [00:00<00:00, 361.35it/s]
100%|██████████| 50/50 [00:00<00:00, 368.97it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 126.38it/s]
 15%|█▌        | 26/173 [00:00<00:01, 123.39it/s]
 23%|██▎       | 40/173 [00:00<00:01, 126.88it/s]
 31%|███       | 

[2m[36m(func pid=1624)[0m Epoch 00123: reducing learning rate of group 0 to 9.0737e-08.


  8%|▊         | 14/173 [00:00<00:01, 130.35it/s]
 16%|█▌        | 28/173 [00:00<00:01, 130.90it/s]
 32%|███▏      | 55/173 [00:00<00:00, 127.85it/s]
 39%|███▉      | 68/173 [00:00<00:00, 127.54it/s]
 47%|████▋     | 82/173 [00:00<00:00, 129.88it/s]
 55%|█████▍    | 95/173 [00:00<00:00, 129.16it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 129.42it/s]
 71%|███████   | 122/173 [00:00<00:00, 126.78it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 126.59it/s]
 86%|████████▌ | 149/173 [00:01<00:00, 129.07it/s]
100%|██████████| 173/173 [00:01<00:00, 129.38it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 115.70it/s]
 14%|█▍        | 25/173 [00:00<00:01, 117.84it/s]
 21%|██▏       | 37/173 [00:00<00:01, 113.20it/s]
 28%|██▊       | 49/173 [00:00<00:01, 104.10it/s]
 36%|███▌      | 62/173 [00:00<00:01, 109.67it/s]
 43%|████▎     | 74/173 [00:00<00:00, 111.45it/s]
 50%|████▉     | 86/173 [00:00<00:00, 112.70it/s]
 57%|█████▋    |

[2m[36m(func pid=1624)[0m Epoch 00129: reducing learning rate of group 0 to 4.5369e-08.


  7%|▋         | 12/173 [00:00<00:01, 112.68it/s]
 14%|█▍        | 24/173 [00:00<00:01, 116.76it/s]
 21%|██        | 36/173 [00:00<00:01, 117.85it/s]
 28%|██▊       | 48/173 [00:00<00:01, 114.25it/s]
 35%|███▍      | 60/173 [00:00<00:00, 114.17it/s]
 42%|████▏     | 73/173 [00:00<00:00, 118.75it/s]
 50%|████▉     | 86/173 [00:00<00:00, 120.61it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 119.90it/s]
 65%|██████▍   | 112/173 [00:00<00:00, 119.62it/s]
 72%|███████▏  | 125/173 [00:01<00:00, 120.90it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 123.13it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 122.33it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 331.47it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▍         | 8/173 [00:00<00:04, 38.64it/s]
 19%|█▉        | 33/173 [00:00<00:01, 91.06it/s]
 27%|██▋       | 46/173 [00:00<00:01, 102.67it/s]
 34%|███▍      | 59/173 [00:00<00:01, 111.07it/s]
 42%|████▏     | 73/173 [00:00<00:00, 119.83it/s]
 50%|████▉     | 86/1

[2m[36m(func pid=1624)[0m Epoch 00135: reducing learning rate of group 0 to 2.2684e-08.


 16%|█▌        | 28/173 [00:00<00:01, 132.46it/s]
 24%|██▍       | 42/173 [00:00<00:01, 125.08it/s]
 32%|███▏      | 55/173 [00:00<00:00, 122.49it/s]
 40%|███▉      | 69/173 [00:00<00:00, 125.11it/s]
 47%|████▋     | 82/173 [00:00<00:00, 126.64it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 128.78it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 127.19it/s]
 71%|███████   | 122/173 [00:00<00:00, 123.99it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 123.52it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 124.42it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 359.78it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 126.19it/s]
 15%|█▌        | 26/173 [00:00<00:01, 90.18it/s] 
 23%|██▎       | 39/173 [00:00<00:01, 102.14it/s]
 30%|███       | 52/173 [00:00<00:01, 109.74it/s]
 38%|███▊      | 65/173 [00:00<00:00, 115.23it/s]
 45%|████▍     | 77/173 [00:00<00:00, 116.21it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 118.09it/s]
 60%|██████    | 1

[2m[36m(func pid=1624)[0m Epoch 00141: reducing learning rate of group 0 to 1.1342e-08.


 14%|█▍        | 24/173 [00:00<00:01, 117.39it/s]
 21%|██        | 36/173 [00:00<00:01, 110.07it/s]
 28%|██▊       | 48/173 [00:00<00:01, 110.70it/s]
 35%|███▍      | 60/173 [00:00<00:01, 109.51it/s]
 42%|████▏     | 72/173 [00:00<00:00, 110.38it/s]
 49%|████▊     | 84/173 [00:00<00:00, 111.98it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 113.00it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 112.15it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 112.59it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 110.98it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 109.28it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 107.42it/s]
 97%|█████████▋| 167/173 [00:01<00:00, 107.86it/s]
100%|██████████| 173/173 [00:01<00:00, 110.19it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 64%|██████▍   | 32/50 [00:00<00:00, 317.15it/s]
100%|██████████| 50/50 [00:00<00:00, 310.92it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 115.54it/s]
 14%|█▍        | 24/173 [00:00<00:01, 111.71it/s]
 21%|██        |

[2m[36m(func pid=21200)[0m Epoch 00140: reducing learning rate of group 0 to 1.3188e-04.


 18%|█▊        | 16/87 [00:00<00:00, 73.22it/s]
 28%|██▊       | 24/87 [00:00<00:00, 70.16it/s]
 37%|███▋      | 32/87 [00:00<00:00, 73.29it/s]
 46%|████▌     | 40/87 [00:00<00:00, 75.14it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 76.76it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 77.45it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 77.08it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 76.40it/s]
 93%|█████████▎| 81/87 [00:01<00:00, 76.02it/s]
100%|██████████| 87/87 [00:01<00:00, 75.81it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 32%|███▏      | 8/25 [00:00<00:00, 63.58it/s]
100%|██████████| 25/25 [00:00<00:00, 109.01it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 72.53it/s]
 18%|█▊        | 16/87 [00:00<00:00, 71.16it/s]
 28%|██▊       | 24/87 [00:00<00:00, 72.37it/s]
 37%|███▋      | 32/87 [00:00<00:00, 72.30it/s]
 46%|████▌     | 40/87 [00:00<00:00, 72.98it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 74.99it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 75.31it/s]
 74%|██████

[2m[36m(func pid=14268)[0m Epoch 00012: reducing learning rate of group 0 to 1.8324e-02.


  6%|▌         | 10/173 [00:00<00:01, 91.78it/s]
 12%|█▏        | 20/173 [00:00<00:01, 93.31it/s]
 18%|█▊        | 31/173 [00:00<00:01, 96.79it/s]
 24%|██▍       | 42/173 [00:00<00:01, 98.40it/s]
 31%|███       | 53/173 [00:00<00:01, 100.13it/s]
 37%|███▋      | 64/173 [00:00<00:01, 99.62it/s] 
 43%|████▎     | 74/173 [00:00<00:01, 95.91it/s]
 49%|████▊     | 84/173 [00:00<00:00, 94.38it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 94.01it/s]
 60%|██████    | 104/173 [00:01<00:00, 94.13it/s]
 66%|██████▌   | 114/173 [00:01<00:00, 94.15it/s]
 72%|███████▏  | 124/173 [00:01<00:00, 93.48it/s]
 77%|███████▋  | 134/173 [00:01<00:00, 90.64it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 93.97it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 94.68it/s]
100%|██████████| 173/173 [00:01<00:00, 94.85it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 301.51it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
 12%|█▏        | 21/173 [00:00<00:01, 98.80it/s]
 18%|█▊        | 31/173 [00:00

[2m[36m(func pid=14268)[0m Epoch 00032: reducing learning rate of group 0 to 9.1619e-03.


  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 93.48it/s]
 12%|█▏        | 20/173 [00:00<00:01, 89.47it/s]
 17%|█▋        | 30/173 [00:00<00:01, 93.20it/s]
 29%|██▉       | 50/173 [00:00<00:01, 95.17it/s]
 35%|███▍      | 60/173 [00:00<00:01, 96.75it/s]
 41%|████      | 71/173 [00:00<00:01, 98.43it/s]
 47%|████▋     | 81/173 [00:00<00:00, 95.52it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 94.72it/s]
 58%|█████▊    | 101/173 [00:01<00:00, 96.07it/s]
 64%|██████▍   | 111/173 [00:01<00:00, 93.57it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 95.20it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 96.71it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 96.52it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 96.09it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 294.32it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 91.67it/s]
 12%|█▏        | 20/173 [00:00<00:01, 85.56it/s]
 23%|██▎       | 40/173 [00:00<00:01, 93.2

[2m[36m(func pid=14268)[0m Epoch 00045: reducing learning rate of group 0 to 4.5809e-03.


  6%|▌         | 10/173 [00:00<00:01, 93.39it/s]
 12%|█▏        | 20/173 [00:00<00:01, 88.91it/s]
 18%|█▊        | 31/173 [00:00<00:01, 92.97it/s]
 24%|██▎       | 41/173 [00:00<00:01, 94.42it/s]
 29%|██▉       | 51/173 [00:00<00:01, 92.85it/s]
 35%|███▌      | 61/173 [00:00<00:01, 93.12it/s]
 41%|████      | 71/173 [00:00<00:01, 92.95it/s]
 47%|████▋     | 81/173 [00:00<00:00, 92.24it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 91.60it/s]
 58%|█████▊    | 101/173 [00:01<00:00, 91.37it/s]
 64%|██████▍   | 111/173 [00:01<00:00, 91.49it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 91.71it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 88.91it/s]
 81%|████████  | 140/173 [00:01<00:00, 88.69it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 90.83it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 92.18it/s]
 98%|█████████▊| 170/173 [00:01<00:00, 92.79it/s]
100%|██████████| 173/173 [00:01<00:00, 91.78it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 275.19it/s]
  0%|          | 0/17

[2m[36m(func pid=14268)[0m Epoch 00056: reducing learning rate of group 0 to 2.2905e-03.


  6%|▌         | 10/173 [00:00<00:01, 92.68it/s]
 12%|█▏        | 20/173 [00:00<00:01, 91.53it/s]
 18%|█▊        | 31/173 [00:00<00:01, 95.01it/s]
 24%|██▎       | 41/173 [00:00<00:01, 71.77it/s]
 29%|██▉       | 51/173 [00:00<00:01, 77.76it/s]
 36%|███▌      | 62/173 [00:00<00:01, 84.99it/s]
 42%|████▏     | 72/173 [00:00<00:01, 88.98it/s]
 48%|████▊     | 83/173 [00:00<00:00, 94.48it/s]
 54%|█████▍    | 93/173 [00:01<00:00, 95.19it/s]
 60%|██████    | 104/173 [00:01<00:00, 96.37it/s]
 66%|██████▌   | 114/173 [00:01<00:00, 97.00it/s]
 72%|███████▏  | 124/173 [00:01<00:00, 96.39it/s]
 77%|███████▋  | 134/173 [00:01<00:00, 95.02it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 92.90it/s]
100%|██████████| 173/173 [00:01<00:00, 91.54it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 52%|█████▏    | 26/50 [00:00<00:00, 250.65it/s]
100%|██████████| 50/50 [00:00<00:00, 263.76it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▌         | 9/173 [00:00<00:01, 88.56it/s]
 10%|█         | 18/173 [00:00<00:

[2m[36m(func pid=14268)[0m Epoch 00071: reducing learning rate of group 0 to 1.1452e-03.


 11%|█         | 19/173 [00:00<00:01, 89.93it/s]
 17%|█▋        | 29/173 [00:00<00:01, 90.89it/s]
 23%|██▎       | 39/173 [00:00<00:01, 85.23it/s]
 28%|██▊       | 48/173 [00:00<00:01, 85.82it/s]
 34%|███▎      | 58/173 [00:00<00:01, 87.97it/s]
 39%|███▉      | 68/173 [00:00<00:01, 89.44it/s]
 45%|████▌     | 78/173 [00:00<00:01, 91.37it/s]
 51%|█████     | 88/173 [00:00<00:00, 91.33it/s]
 57%|█████▋    | 98/173 [00:01<00:00, 92.04it/s]
 62%|██████▏   | 108/173 [00:01<00:00, 92.24it/s]
 68%|██████▊   | 118/173 [00:01<00:00, 91.97it/s]
 74%|███████▍  | 128/173 [00:01<00:00, 90.39it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 92.11it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 91.57it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 92.55it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 291.19it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▌         | 9/173 [00:00<00:01, 88.33it/s]
 10%|█         | 18/173 [00:00<00:01, 83.68it/s]
 16%|█▌        | 28/173 [00:00<00:

[2m[36m(func pid=14268)[0m Epoch 00077: reducing learning rate of group 0 to 5.7262e-04.


  5%|▌         | 9/173 [00:00<00:01, 88.03it/s]
 10%|█         | 18/173 [00:00<00:01, 88.88it/s]
 16%|█▌        | 28/173 [00:00<00:01, 90.11it/s]
 22%|██▏       | 38/173 [00:00<00:01, 92.10it/s]
 28%|██▊       | 48/173 [00:00<00:01, 92.26it/s]
 34%|███▍      | 59/173 [00:00<00:01, 94.97it/s]
 40%|███▉      | 69/173 [00:00<00:01, 94.21it/s]
 46%|████▌     | 79/173 [00:00<00:00, 94.67it/s]
 51%|█████▏    | 89/173 [00:00<00:00, 95.61it/s]
 57%|█████▋    | 99/173 [00:01<00:00, 95.56it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 96.66it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 98.31it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 96.31it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 97.05it/s]
 94%|█████████▎| 162/173 [00:01<00:00, 98.14it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 259.05it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 98.96it/s]
 12%|█▏        | 20/173 [00:00<00:01, 95.70it/s]
 17%|█▋        | 30/173 [00:00<00:0

[2m[36m(func pid=14268)[0m Epoch 00094: reducing learning rate of group 0 to 2.8631e-04.


  6%|▌         | 10/173 [00:00<00:01, 90.35it/s]
 12%|█▏        | 20/173 [00:00<00:01, 91.18it/s]
 17%|█▋        | 30/173 [00:00<00:01, 91.02it/s]
 23%|██▎       | 40/173 [00:00<00:01, 90.18it/s]
 29%|██▉       | 50/173 [00:00<00:01, 90.47it/s]
 35%|███▍      | 60/173 [00:00<00:01, 89.47it/s]
 41%|████      | 71/173 [00:00<00:01, 93.91it/s]
 47%|████▋     | 81/173 [00:00<00:00, 92.05it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 93.30it/s]
 58%|█████▊    | 101/173 [00:01<00:00, 91.31it/s]
 64%|██████▍   | 111/173 [00:01<00:00, 91.88it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 90.62it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 93.70it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 92.05it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 92.54it/s]
100%|██████████| 173/173 [00:01<00:00, 91.99it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 58%|█████▊    | 29/50 [00:00<00:00, 289.84it/s]
100%|██████████| 50/50 [00:00<00:00, 290.36it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▌         | 9/173 [00:00<00

[2m[36m(func pid=14268)[0m Epoch 00100: reducing learning rate of group 0 to 1.4315e-04.


  6%|▌         | 10/173 [00:00<00:01, 91.95it/s]
 12%|█▏        | 20/173 [00:00<00:01, 90.19it/s]
 17%|█▋        | 30/173 [00:00<00:01, 90.16it/s]
 23%|██▎       | 40/173 [00:00<00:01, 90.05it/s]
 34%|███▍      | 59/173 [00:00<00:01, 88.94it/s]
 40%|███▉      | 69/173 [00:00<00:01, 91.71it/s]
 46%|████▌     | 79/173 [00:00<00:01, 90.93it/s]
 51%|█████▏    | 89/173 [00:00<00:00, 92.59it/s]
 57%|█████▋    | 99/173 [00:01<00:00, 93.40it/s]
 63%|██████▎   | 109/173 [00:01<00:00, 93.97it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 75.09it/s]
 80%|████████  | 139/173 [00:01<00:00, 84.89it/s]
 86%|████████▌ | 149/173 [00:01<00:00, 88.51it/s]
 92%|█████████▏| 159/173 [00:01<00:00, 89.43it/s]
100%|██████████| 173/173 [00:01<00:00, 88.71it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 54%|█████▍    | 27/50 [00:00<00:00, 264.79it/s]
100%|██████████| 50/50 [00:00<00:00, 270.45it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 90.31it/s]
 12%|█▏        | 20/173 [00:00<00

[2m[36m(func pid=14268)[0m Epoch 00110: reducing learning rate of group 0 to 7.1577e-05.


  5%|▌         | 9/173 [00:00<00:01, 87.29it/s]
 16%|█▌        | 27/173 [00:00<00:01, 88.44it/s]
 21%|██        | 36/173 [00:00<00:01, 87.67it/s]
 26%|██▌       | 45/173 [00:00<00:01, 87.00it/s]
 32%|███▏      | 55/173 [00:00<00:01, 90.14it/s]
 38%|███▊      | 65/173 [00:00<00:01, 92.69it/s]
 43%|████▎     | 75/173 [00:00<00:01, 94.72it/s]
 49%|████▉     | 85/173 [00:01<00:01, 75.63it/s]
 55%|█████▍    | 95/173 [00:01<00:00, 78.95it/s]
 61%|██████    | 105/173 [00:01<00:00, 82.87it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 85.47it/s]
 72%|███████▏  | 125/173 [00:01<00:00, 87.55it/s]
 77%|███████▋  | 134/173 [00:01<00:00, 87.05it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 90.44it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 92.96it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 289.09it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▌         | 9/173 [00:00<00:01, 86.19it/s]
 10%|█         | 18/173 [00:00<00:01, 87.28it/s]
 16%|█▌        | 28/173 [00:00<00:0

[2m[36m(func pid=14268)[0m Epoch 00116: reducing learning rate of group 0 to 3.5789e-05.


 12%|█▏        | 20/173 [00:00<00:01, 92.35it/s]
 17%|█▋        | 30/173 [00:00<00:01, 89.94it/s]
 23%|██▎       | 40/173 [00:00<00:01, 89.91it/s]
 28%|██▊       | 49/173 [00:00<00:01, 89.77it/s]
 34%|███▍      | 59/173 [00:00<00:01, 90.05it/s]
 40%|███▉      | 69/173 [00:00<00:01, 90.90it/s]
 46%|████▌     | 79/173 [00:00<00:01, 89.57it/s]
 51%|█████     | 88/173 [00:00<00:00, 89.33it/s]
 61%|██████▏   | 106/173 [00:01<00:00, 88.69it/s]
 67%|██████▋   | 116/173 [00:01<00:00, 90.11it/s]
 73%|███████▎  | 126/173 [00:01<00:00, 92.08it/s]
 79%|███████▊  | 136/173 [00:01<00:00, 92.88it/s]
 84%|████████▍ | 146/173 [00:01<00:00, 94.07it/s]
 90%|█████████ | 156/173 [00:01<00:00, 93.10it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 52%|█████▏    | 26/50 [00:00<00:00, 259.91it/s]
100%|██████████| 50/50 [00:00<00:00, 267.45it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 94.31it/s]
 12%|█▏        | 20/173 [00:00<00:01, 82.09it/s]
 17%|█▋        | 29/173 [00:00<00

[2m[36m(func pid=14268)[0m Epoch 00122: reducing learning rate of group 0 to 1.7894e-05.


  6%|▌         | 10/173 [00:00<00:01, 93.40it/s]
 12%|█▏        | 20/173 [00:00<00:01, 96.19it/s]
 17%|█▋        | 30/173 [00:00<00:01, 91.45it/s]
 23%|██▎       | 40/173 [00:00<00:01, 94.32it/s]
 29%|██▉       | 50/173 [00:00<00:01, 94.85it/s]
 35%|███▌      | 61/173 [00:00<00:01, 97.31it/s]
 42%|████▏     | 72/173 [00:00<00:01, 98.51it/s]
 48%|████▊     | 83/173 [00:00<00:00, 99.53it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 100.61it/s]
 61%|██████    | 105/173 [00:01<00:00, 97.81it/s]
 67%|██████▋   | 116/173 [00:01<00:00, 99.95it/s]
 79%|███████▉  | 137/173 [00:01<00:00, 99.13it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 99.30it/s]
 92%|█████████▏| 159/173 [00:01<00:00, 99.59it/s]
100%|██████████| 173/173 [00:01<00:00, 97.57it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 58%|█████▊    | 29/50 [00:00<00:00, 285.88it/s]
100%|██████████| 50/50 [00:00<00:00, 286.77it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 89.56it/s]
 11%|█         | 19/173 [00:00<0

[2m[36m(func pid=14268)[0m Epoch 00128: reducing learning rate of group 0 to 8.9471e-06.


 12%|█▏        | 20/173 [00:00<00:01, 95.76it/s]
 17%|█▋        | 30/173 [00:00<00:01, 92.03it/s]
 23%|██▎       | 40/173 [00:00<00:01, 93.95it/s]
 29%|██▉       | 51/173 [00:00<00:01, 96.09it/s]
 35%|███▌      | 61/173 [00:00<00:01, 95.58it/s]
 41%|████      | 71/173 [00:00<00:01, 95.77it/s]
 47%|████▋     | 81/173 [00:00<00:00, 93.09it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 90.78it/s]
 58%|█████▊    | 101/173 [00:01<00:00, 91.74it/s]
 65%|██████▍   | 112/173 [00:01<00:00, 94.68it/s]
 71%|███████   | 123/173 [00:01<00:00, 96.58it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 96.73it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 97.42it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 96.73it/s]
 94%|█████████▍| 163/173 [00:01<00:00, 97.62it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 58%|█████▊    | 29/50 [00:00<00:00, 284.90it/s]
100%|██████████| 50/50 [00:00<00:00, 287.50it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▌         | 9/173 [00:00<00:01, 86.67it/s]
 10%|█         | 18/173 [00:00<00

[2m[36m(func pid=14268)[0m Epoch 00134: reducing learning rate of group 0 to 4.4736e-06.


  5%|▌         | 9/173 [00:00<00:01, 88.18it/s]
 10%|█         | 18/173 [00:00<00:01, 88.78it/s]
 16%|█▌        | 27/173 [00:00<00:01, 88.84it/s]
 27%|██▋       | 47/173 [00:00<00:01, 91.42it/s]
 33%|███▎      | 57/173 [00:00<00:01, 87.43it/s]
 38%|███▊      | 66/173 [00:00<00:01, 87.12it/s]
 43%|████▎     | 75/173 [00:00<00:01, 87.98it/s]
 49%|████▉     | 85/173 [00:00<00:00, 90.01it/s]
 55%|█████▍    | 95/173 [00:01<00:00, 89.24it/s]
 61%|██████    | 105/173 [00:01<00:00, 90.07it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 90.75it/s]
 72%|███████▏  | 125/173 [00:01<00:00, 91.35it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 91.72it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 92.35it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 92.32it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 93.40it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 50%|█████     | 25/50 [00:00<00:00, 246.03it/s]
100%|██████████| 50/50 [00:00<00:00, 180.02it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
 10%|█         | 18/173 [00:00<00

[2m[36m(func pid=14268)[0m Epoch 00140: reducing learning rate of group 0 to 2.2368e-06.


 12%|█▏        | 20/173 [00:00<00:01, 96.88it/s]
 17%|█▋        | 30/173 [00:00<00:01, 94.51it/s]
 23%|██▎       | 40/173 [00:00<00:01, 92.61it/s]
 29%|██▉       | 50/173 [00:00<00:01, 94.43it/s]
 35%|███▌      | 61/173 [00:00<00:01, 97.18it/s]
 41%|████      | 71/173 [00:00<00:01, 97.85it/s]
 47%|████▋     | 81/173 [00:00<00:00, 96.67it/s]
 59%|█████▉    | 102/173 [00:01<00:00, 98.54it/s]
 65%|██████▍   | 112/173 [00:01<00:00, 97.99it/s]
 71%|███████   | 122/173 [00:01<00:00, 97.99it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 98.19it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 98.36it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 98.51it/s]
100%|██████████| 173/173 [00:01<00:00, 97.16it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 60%|██████    | 30/50 [00:00<00:00, 293.86it/s]
100%|██████████| 50/50 [00:00<00:00, 298.69it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 90.96it/s]
 12%|█▏        | 20/173 [00:00<00:01, 88.04it/s]
 17%|█▋        | 30/173 [00:00<0

[2m[36m(func pid=14268)[0m Epoch 00146: reducing learning rate of group 0 to 1.1184e-06.


 12%|█▏        | 20/173 [00:00<00:01, 88.24it/s]
 17%|█▋        | 29/173 [00:00<00:01, 86.71it/s]
 22%|██▏       | 38/173 [00:00<00:01, 86.35it/s]
 28%|██▊       | 48/173 [00:00<00:01, 91.01it/s]
 34%|███▎      | 58/173 [00:00<00:01, 92.72it/s]
 39%|███▉      | 68/173 [00:00<00:01, 91.84it/s]
 45%|████▌     | 78/173 [00:00<00:01, 93.72it/s]
 51%|█████     | 88/173 [00:00<00:00, 95.40it/s]
 57%|█████▋    | 98/173 [00:01<00:00, 96.20it/s]
 62%|██████▏   | 108/173 [00:01<00:00, 94.84it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 96.72it/s]
 75%|███████▍  | 129/173 [00:01<00:00, 94.34it/s]
 80%|████████  | 139/173 [00:01<00:00, 95.88it/s]
 86%|████████▌ | 149/173 [00:01<00:00, 96.33it/s]
 92%|█████████▏| 159/173 [00:01<00:00, 97.06it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 56%|█████▌    | 28/50 [00:00<00:00, 276.95it/s]
100%|██████████| 50/50 [00:00<00:00, 283.85it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 95.27it/s]
 12%|█▏        | 20/173 [00:00<00

[2m[36m(func pid=11132)[0m Epoch 00109: reducing learning rate of group 0 to 1.1865e-04.


 13%|█▎        | 22/173 [00:00<00:01, 97.35it/s] 
 18%|█▊        | 32/173 [00:00<00:01, 95.42it/s]
 24%|██▍       | 42/173 [00:00<00:01, 96.96it/s]
 31%|███       | 53/173 [00:00<00:01, 98.54it/s]
 37%|███▋      | 64/173 [00:00<00:01, 100.45it/s]
 43%|████▎     | 75/173 [00:00<00:00, 103.07it/s]
 50%|████▉     | 86/173 [00:00<00:00, 103.68it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 100.09it/s]
 62%|██████▏   | 108/173 [00:01<00:00, 100.44it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 100.25it/s]
 75%|███████▌  | 130/173 [00:01<00:00, 100.42it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 99.77it/s] 
 87%|████████▋ | 151/173 [00:01<00:00, 98.48it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 98.33it/s]
 52%|█████▏    | 26/50 [00:00<00:00, 258.77it/s]
100%|██████████| 50/50 [00:00<00:00, 274.31it/s]
  6%|▌         | 10/173 [00:00<00:01, 99.01it/s]
 12%|█▏        | 20/173 [00:00<00:01, 98.27it/s]
 17%|█▋        | 30/173 [00:00<00:01, 94.03it/s]
 23%|██▎       | 40/173 [00:00<00:01, 95.91it/s]
 29%|

[2m[36m(func pid=11132)[0m Epoch 00116: reducing learning rate of group 0 to 5.9324e-05.


  6%|▌         | 10/173 [00:00<00:01, 87.33it/s]
 12%|█▏        | 20/173 [00:00<00:01, 90.14it/s]
 17%|█▋        | 30/173 [00:00<00:01, 86.45it/s]
 23%|██▎       | 39/173 [00:00<00:01, 87.14it/s]
 28%|██▊       | 49/173 [00:00<00:01, 89.68it/s]
 35%|███▍      | 60/173 [00:00<00:01, 94.22it/s]
 41%|████      | 71/173 [00:00<00:01, 97.75it/s]
 47%|████▋     | 81/173 [00:00<00:00, 97.52it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 100.12it/s]
 60%|█████▉    | 103/173 [00:01<00:00, 100.68it/s]
 66%|██████▌   | 114/173 [00:01<00:00, 98.74it/s] 
 72%|███████▏  | 124/173 [00:01<00:00, 98.84it/s]
 77%|███████▋  | 134/173 [00:01<00:00, 98.04it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 98.59it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 97.27it/s]
 95%|█████████▍| 164/173 [00:01<00:00, 97.78it/s]
100%|██████████| 173/173 [00:01<00:00, 95.79it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 22%|██▏       | 11/50 [00:00<00:00, 89.11it/s]
100%|██████████| 50/50 [00:00<00:00, 196.10it/s]
  0%|          | 0/1

[2m[36m(func pid=11132)[0m Epoch 00122: reducing learning rate of group 0 to 2.9662e-05.


 13%|█▎        | 22/173 [00:00<00:01, 98.67it/s] 
 18%|█▊        | 32/173 [00:00<00:01, 95.73it/s]
 25%|██▍       | 43/173 [00:00<00:01, 97.93it/s]
 31%|███       | 54/173 [00:00<00:01, 100.70it/s]
 38%|███▊      | 65/173 [00:00<00:01, 101.99it/s]
 44%|████▍     | 76/173 [00:00<00:00, 102.94it/s]
 50%|█████     | 87/173 [00:00<00:00, 103.54it/s]
 57%|█████▋    | 98/173 [00:00<00:00, 102.37it/s]
 63%|██████▎   | 109/173 [00:01<00:00, 102.55it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 103.32it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 105.13it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 106.23it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 105.73it/s]
100%|██████████| 173/173 [00:01<00:00, 103.13it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 314.84it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00:00<00:01, 99.13it/s]
 12%|█▏        | 21/173 [00:00<00:01, 103.64it/s]
 18%|█▊        | 32/173 [00:00<00:01, 102.41it/s]
 25%|██▍       | 43

[2m[36m(func pid=21148)[0m Epoch 00061: reducing learning rate of group 0 to 2.9903e-04.


  8%|▊         | 13/173 [00:00<00:01, 119.91it/s]
 14%|█▍        | 25/173 [00:00<00:01, 83.36it/s] 
 21%|██        | 36/173 [00:00<00:01, 92.72it/s]
 27%|██▋       | 46/173 [00:00<00:01, 94.68it/s]
 33%|███▎      | 57/173 [00:00<00:01, 99.04it/s]
 40%|███▉      | 69/173 [00:00<00:00, 104.66it/s]
 47%|████▋     | 81/173 [00:00<00:00, 106.48it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 111.02it/s]
 62%|██████▏   | 107/173 [00:01<00:00, 116.30it/s]
 69%|██████▉   | 119/173 [00:01<00:00, 116.02it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 120.96it/s]
 84%|████████▍ | 146/173 [00:01<00:00, 119.08it/s]
 92%|█████████▏| 159/173 [00:01<00:00, 120.29it/s]
100%|██████████| 173/173 [00:01<00:00, 111.15it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 344.34it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
 15%|█▌        | 26/173 [00:00<00:01, 125.81it/s]
 23%|██▎       | 39/173 [00:00<00:01, 123.17it/s]
 30%|███       | 52/173 [00:00<00:00, 124.46it/s]
 38%|███▊      | 65

[2m[36m(func pid=21148)[0m Epoch 00074: reducing learning rate of group 0 to 1.4952e-04.


 13%|█▎        | 23/173 [00:00<00:01, 112.96it/s]
 20%|██        | 35/173 [00:00<00:01, 114.78it/s]
 27%|██▋       | 47/173 [00:00<00:01, 104.68it/s]
 34%|███▎      | 58/173 [00:00<00:01, 106.39it/s]
 40%|████      | 70/173 [00:00<00:00, 108.78it/s]
 47%|████▋     | 82/173 [00:00<00:00, 110.72it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 109.98it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 109.42it/s]
 68%|██████▊   | 118/173 [00:01<00:00, 111.30it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 111.10it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 110.43it/s]
 96%|█████████▌| 166/173 [00:01<00:00, 111.60it/s]
100%|██████████| 173/173 [00:01<00:00, 110.41it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 68%|██████▊   | 34/50 [00:00<00:00, 324.28it/s]
100%|██████████| 50/50 [00:00<00:00, 331.11it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 109.65it/s]
 13%|█▎        | 23/173 [00:00<00:01, 112.59it/s]
 20%|██        | 35/173 [00:00<00:01, 114.61it/s]
 35%|███▍      | 

[2m[36m(func pid=21148)[0m Epoch 00095: reducing learning rate of group 0 to 7.4758e-05.


  7%|▋         | 12/173 [00:00<00:01, 112.28it/s]
 14%|█▍        | 25/173 [00:00<00:01, 117.85it/s]
 21%|██▏       | 37/173 [00:00<00:01, 116.05it/s]
 36%|███▌      | 62/173 [00:00<00:00, 118.49it/s]
 43%|████▎     | 75/173 [00:00<00:00, 121.16it/s]
 51%|█████     | 88/173 [00:00<00:00, 119.14it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 118.34it/s]
 65%|██████▍   | 112/173 [00:00<00:00, 116.85it/s]
 72%|███████▏  | 124/173 [00:01<00:00, 115.62it/s]
 79%|███████▊  | 136/173 [00:01<00:00, 92.54it/s] 
 86%|████████▌ | 148/173 [00:01<00:00, 99.28it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 104.44it/s]
100%|██████████| 173/173 [00:01<00:00, 110.47it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 372.48it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 127.68it/s]
 15%|█▌        | 26/173 [00:00<00:01, 121.55it/s]
 23%|██▎       | 39/173 [00:00<00:01, 121.87it/s]
 30%|███       | 52/173 [00:00<00:00, 122.59it/s]
 38%|███▊      |

[2m[36m(func pid=21148)[0m Epoch 00108: reducing learning rate of group 0 to 3.7379e-05.


  7%|▋         | 12/173 [00:00<00:01, 109.71it/s]
 14%|█▍        | 24/173 [00:00<00:01, 112.54it/s]
 28%|██▊       | 48/173 [00:00<00:01, 110.36it/s]
 35%|███▍      | 60/173 [00:00<00:01, 109.37it/s]
 41%|████      | 71/173 [00:00<00:00, 107.59it/s]
 47%|████▋     | 82/173 [00:00<00:00, 106.90it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 105.62it/s]
 60%|██████    | 104/173 [00:00<00:00, 105.96it/s]
 66%|██████▋   | 115/173 [00:01<00:00, 104.72it/s]
 73%|███████▎  | 126/173 [00:01<00:00, 101.54it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 102.53it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 106.35it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 321.83it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 103.78it/s]
 20%|█▉        | 34/173 [00:00<00:01, 108.28it/s]
 26%|██▌       | 45/173 [00:00<00:01, 108.14it/s]
 33%|███▎      | 57/173 [00:00<00:01, 111.34it/s]
 40%|███▉      | 69/173 [00:00<00:00, 108.25it/s]
 47%|████▋     | 

[2m[36m(func pid=21148)[0m Epoch 00123: reducing learning rate of group 0 to 1.8689e-05.


  8%|▊         | 13/173 [00:00<00:01, 123.44it/s]
 15%|█▌        | 26/173 [00:00<00:01, 122.98it/s]
 23%|██▎       | 39/173 [00:00<00:01, 122.11it/s]
 30%|███       | 52/173 [00:00<00:00, 123.07it/s]
 38%|███▊      | 66/173 [00:00<00:00, 127.51it/s]
 46%|████▌     | 79/173 [00:00<00:00, 127.42it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 126.26it/s]
 61%|██████    | 105/173 [00:00<00:00, 125.50it/s]
 68%|██████▊   | 118/173 [00:00<00:00, 125.61it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 126.94it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 127.16it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 126.29it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 364.70it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
 13%|█▎        | 23/173 [00:00<00:01, 113.40it/s]
 21%|██        | 36/173 [00:00<00:01, 116.22it/s]
 28%|██▊       | 48/173 [00:00<00:01, 115.87it/s]
 35%|███▌      | 61/173 [00:00<00:00, 119.18it/s]
 42%|████▏     | 73/173 [00:00<00:00, 116.37it/s]
 49%|████▉     | 

[2m[36m(func pid=21148)[0m Epoch 00129: reducing learning rate of group 0 to 9.3447e-06.


  7%|▋         | 12/173 [00:00<00:01, 116.10it/s]
 14%|█▍        | 24/173 [00:00<00:01, 115.23it/s]
 28%|██▊       | 49/173 [00:00<00:01, 117.86it/s]
 35%|███▌      | 61/173 [00:00<00:00, 116.38it/s]
 42%|████▏     | 73/173 [00:00<00:00, 114.19it/s]
 50%|████▉     | 86/173 [00:00<00:00, 116.02it/s]
 57%|█████▋    | 98/173 [00:00<00:00, 116.31it/s]
 64%|██████▍   | 111/173 [00:00<00:00, 118.52it/s]
 71%|███████   | 123/173 [00:01<00:00, 118.60it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 113.64it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 117.13it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 358.82it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▍         | 8/173 [00:00<00:03, 51.40it/s]
 12%|█▏        | 20/173 [00:00<00:01, 82.13it/s]
 17%|█▋        | 30/173 [00:00<00:01, 89.07it/s]
 24%|██▍       | 42/173 [00:00<00:01, 97.65it/s]
 38%|███▊      | 66/173 [00:00<00:00, 107.68it/s]
 46%|████▌     | 79/173 [00:00<00:00, 112.75it/s]
 53%|█████▎    | 91/173

[2m[36m(func pid=21148)[0m Epoch 00135: reducing learning rate of group 0 to 4.6724e-06.


  8%|▊         | 13/173 [00:00<00:01, 124.29it/s]
 15%|█▌        | 26/173 [00:00<00:01, 116.40it/s]
 31%|███       | 53/173 [00:00<00:00, 122.51it/s]
 39%|███▊      | 67/173 [00:00<00:00, 124.89it/s]
 47%|████▋     | 81/173 [00:00<00:00, 127.20it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 126.72it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 128.29it/s]
 70%|██████▉   | 121/173 [00:00<00:00, 125.04it/s]
 77%|███████▋  | 134/173 [00:01<00:00, 124.77it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 124.76it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 126.69it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 386.69it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 127.24it/s]
 16%|█▌        | 27/173 [00:00<00:01, 130.02it/s]
 23%|██▎       | 40/173 [00:00<00:01, 118.59it/s]
 31%|███       | 53/173 [00:00<00:00, 120.83it/s]
 38%|███▊      | 66/173 [00:00<00:00, 120.27it/s]
 46%|████▌     | 79/173 [00:00<00:00, 120.75it/s]
 53%|█████▎    | 

[2m[36m(func pid=21148)[0m Epoch 00144: reducing learning rate of group 0 to 2.3362e-06.


  7%|▋         | 12/173 [00:00<00:01, 111.81it/s]
 14%|█▍        | 24/173 [00:00<00:01, 111.35it/s]
 21%|██        | 36/173 [00:00<00:01, 110.14it/s]
 28%|██▊       | 48/173 [00:00<00:01, 109.77it/s]
 35%|███▍      | 60/173 [00:00<00:01, 111.04it/s]
 42%|████▏     | 72/173 [00:00<00:00, 111.04it/s]
 49%|████▊     | 84/173 [00:00<00:00, 112.00it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 112.47it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 112.44it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 112.52it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 112.20it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 111.91it/s]
 90%|█████████ | 156/173 [00:01<00:00, 113.23it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 12%|█▏        | 6/50 [00:00<00:00, 57.91it/s]
100%|██████████| 50/50 [00:00<00:00, 211.65it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 111.68it/s]
 14%|█▍        | 24/173 [00:00<00:01, 104.69it/s]
 21%|██        | 36/173 [00:00<00:01, 107.72it/s]
 28%|██▊       | 49/

[2m[36m(func pid=21148)[0m Epoch 00150: reducing learning rate of group 0 to 1.1681e-06.


2023-03-24 20:23:36,196	ERROR trial_runner.py:1088 -- Trial train_model_f5d07_00015: Error processing event.
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=21148, ip=127.0.0.1, repr=func)
  File "python\ray\_raylet.pyx", line 830, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 834, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 780, in ray._raylet.execute_task.function_executor
  File "c:\Code\hydro-ml\my_env\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Code\hydro-ml\my_env\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Code\hydro-ml\my_env\lib\site-packages\ray\tune\trainable\trainable.py", line 367, in train
    raise skipped from exception_cause(skipped)
  File "c:\Code\hydro-ml\my_env\lib\site-packages\ray\tune\trainable\

TuneError: ('Trials did not complete', [train_model_f5d07_00001, train_model_f5d07_00002, train_model_f5d07_00003, train_model_f5d07_00005, train_model_f5d07_00006, train_model_f5d07_00007, train_model_f5d07_00009, train_model_f5d07_00010, train_model_f5d07_00011, train_model_f5d07_00012, train_model_f5d07_00013, train_model_f5d07_00014, train_model_f5d07_00015])