In [1]:
import os
import sys

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from models import new_models
from config import load_data

from ray import tune
from ray.air.integrations.mlflow import MLflowLoggerCallback
from ray.tune.schedulers import ASHAScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau

import mlflow
from mlflow.tracking import MlflowClient

# Training setup

In [2]:
def fit(net, loss_function, optimizer, data_loader, num_epochs, mode, lr_scheduler, use_amp=False):
    scaler = torch.cuda.amp.GradScaler(enabled=use_amp) # Mixed-precision support for compatible GPUs
    for epoch in range(num_epochs):
        if epoch < num_epochs - 1:
            keys = ["train", "val"]
        else:
            keys = ["train", "val", "test"]
        for key in keys:
            dataset_size = 0
            dataset_loss = 0.0
            if key == "train":
                net.train()
            else:
                net.eval()
            for X_batch, y_batch in tqdm(data_loader[key]):
                X_batch, y_batch = X_batch.to(mode["device"]), y_batch.to(mode["device"])
                with torch.set_grad_enabled(mode=(key=="train")): # Autograd activated only during training
                    with torch.cuda.amp.autocast(enabled=False): # Mixed-precision support for compatible GPUs
                        batch_output = net(X_batch.float())
                        batch_loss = loss_function(batch_output, y_batch)
                    if key == "train":
                        scaler.scale(batch_loss).backward()
                        scaler.step(optimizer) 	
                        scaler.update()
                        optimizer.zero_grad()
                dataset_size += y_batch.shape[0]
                dataset_loss += y_batch.shape[0] * batch_loss.item()

            dataset_loss /= dataset_size

            # Report results to Ray Tune
            if key == "train":
                tune.report(train_loss=dataset_loss)
            elif key == "val":
                # Update learning rate
                lr_scheduler.step(metrics=dataset_loss)
                tune.report(val_loss=dataset_loss)
            else:
                tune.report(test_loss=dataset_loss)
    return net

In [3]:
from config import load_data

def train_model(config, data_dir):

    use_GPU = torch.cuda.is_available()
    if use_GPU:
        mode = {"name": "cuda", "device": torch.device("cuda")}
    else:
        mode = {"name": "cpu", "device": torch.device("cpu")}

    # Define hyperparameters
    train_size = 0.7
    val_size = 0.2
    test_size = 0.1

    sequence_length = config['sequence_length']
    batch_size = config['batch_size']
    num_epochs = config['num_epochs']
    lr = config['lr']
    weight_decay = config['weigth_decay']
    vars = config['variables']

    ld = load_data(data_dir = data_dir, target_variable = config['target_variable'])
    
    X, y = ld.create_lagged_matrix(window_size=sequence_length, vars_to_lag=vars)

    X_train, y_train, X_val, y_val, X_test, y_test = ld.split_data(X, y, train_size=train_size, val_size=val_size, test_size=test_size)

    train_dataloader = ld.create_dataloader(X_train, y_train, sequence_length, batch_size=batch_size, shuffle=True)
    val_dataloader = ld.create_dataloader(X_val, y_val, sequence_length, batch_size=batch_size, shuffle=True)
    test_dataloader = ld.create_dataloader(X_test, y_test, sequence_length, batch_size=batch_size, shuffle=False)
    
    # Model inputs
    if vars:
        input_size = len(vars) + 1
    else:
        input_size = 1
    hidden_size = config['hidden_size']
    num_layers = config['num_layers']
    output_size = 1

    if config['arch'] == "FCN":
        net = new_models.FCN(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] ==  "FCNTemporalAttention":
        net = new_models.FCNTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTM":
        net = new_models.LSTM(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )
    elif config['arch'] == "LSTMTemporalAttention":
        net = new_models.LSTMTemporalAttention(input_size,
                    hidden_size,
                    num_layers,
                    output_size,
                    )

    data_loader = {
    "train": train_dataloader,
    "val": val_dataloader,
    "test": test_dataloader,
    }
    
    net.to(mode["device"])

    loss_function = nn.MSELoss().to(mode["device"])
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)

    # Define your learning rate scheduler
    lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
                                           
    best_net = fit(net, loss_function, optimizer, data_loader, num_epochs, mode, lr_scheduler, use_amp=True)
    out_name = ""
    for k, v in config.items():
        if not k in ['weights_dir', 'cwd', 'variables']:
            out_name += '{}-{}_'.format(k, v)
    torch.save(best_net.state_dict(), os.path.join(config['cwd'], config['weights_dir'], out_name[:-1] + '.pth'))

# MLFlow setup

In [4]:
client = MlflowClient()
cwd = os.getcwd()
exp_base_name = "Test_of_interface"

created = 0
for i in range(100):
    try:
        exp_name = exp_base_name+"_{}".format(i)
        experiment_id = client.create_experiment(exp_name)
        created=1
        break
    except (TypeError, mlflow.exceptions.MlflowException):
        continue

if not created:
    print("ERROR: Try new experiment name.")
    sys.exit(1)

weights_root = "./model_weights/"
weights_dir = weights_root+exp_name+'/'
os.mkdir(weights_dir)

In [5]:
data_dir = "./data/"
target_variable = 'Q_Kalltveit'

# Start experiments

In [6]:
from functools import partial

config = {
    "mlflow_experiment_id": experiment_id,
    "weights_dir": weights_dir,
    "cwd": cwd,
    "target_variable": target_variable,
    "arch": tune.grid_search(["LSTMTemporalAttention"]), # "FCN", "FCNTemporalAttention", 
    "sequence_length": tune.grid_search([25]),
    'num_epochs': tune.grid_search([150]),
    'num_layers': tune.choice([2, 3, 4]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "weigth_decay": tune.choice([0, 0.001, 0.0001]),
    "batch_size": tune.choice([256, 256*2]),
    "hidden_size": tune.grid_search([64]),
    "variables": tune.grid_search([
        None, 
        ["Air Temperature Nilsebu"], 
        ["Vanntemp. Lyngsåna"], 
        ["Relative Humidity Nilsebu"], 
        ["Wind Direction Nilsebu"], 
        ["Wind Speed Nilsebu"], 
        ["Air Temperature Nilsebu", "Vanntemp. Lyngsåna"]
        ["Air Temperature Nilsebu","Relative Humidity Nilsebu","Wind Direction Nilsebu"]
        ["Air Temperature Nilsebu","Relative Humidity Nilsebu","Wind Direction Nilsebu","Wind Speed Nilsebu"]
        ["Air Temperature Nilsebu","Vanntemp. Lyngsåna","Relative Humidity Nilsebu","Wind Direction Nilsebu","Wind Speed Nilsebu"]
        ])
}

["Wind Speed Nilsebu", "Wind Direction Nilsebu", "Relative Humidity Nilsebu", "Air Temperature Nilsebu", "Water Temperature Kalltveit Kum", "Water Level Kalltveit", "Vanntemp. Lyngsåna"]

analysis = tune.run(
    partial(train_model, data_dir=data_dir),
    config=config,
    resources_per_trial={"cpu": 12, "gpu": 1},
    num_samples=2,
    callbacks=[MLflowLoggerCallback(experiment_name=exp_name)],
)

2023-03-25 00:02:48,906	INFO worker.py:1538 -- Started a local Ray instance.


0,1
Current time:,2023-03-25 00:02:56
Running for:,00:00:04.15
Memory:,14.6/31.9 GiB

Trial name,status,loc,arch,batch_size,hidden_size,lr,num_epochs,num_layers,sequence_length,variables,weigth_decay
train_model_00d49_00000,RUNNING,127.0.0.1:10704,LSTMTemporalAtt_50d0,512,64,0.00971619,150,2,25,,0.001
train_model_00d49_00001,PENDING,,LSTMTemporalAtt_50d0,256,64,0.000307932,150,2,25,['Q_Lyngsaana'],0.0001
train_model_00d49_00002,PENDING,,LSTMTemporalAtt_50d0,512,64,0.000533051,150,3,25,['Q_Kalltveit_u_6108,0.001
train_model_00d49_00003,PENDING,,LSTMTemporalAtt_50d0,512,64,0.00169172,150,3,25,['TEMP_HBV'],0.0001
train_model_00d49_00004,PENDING,,LSTMTemporalAtt_50d0,256,64,0.00187616,150,2,25,"['Q_Lyngsaana',_6888",0.0
train_model_00d49_00005,PENDING,,LSTMTemporalAtt_50d0,256,64,0.0743784,150,4,25,"['Q_Lyngsaana',_6748",0.001
train_model_00d49_00006,PENDING,,LSTMTemporalAtt_50d0,256,64,0.0611585,150,2,25,"['Q_Lyngsaana',_b5c8",0.001
train_model_00d49_00007,PENDING,,LSTMTemporalAtt_50d0,512,64,0.000830929,150,3,25,,0.0
train_model_00d49_00008,PENDING,,LSTMTemporalAtt_50d0,256,64,0.000119253,150,3,25,['Q_Lyngsaana'],0.001
train_model_00d49_00009,PENDING,,LSTMTemporalAtt_50d0,512,64,0.0584233,150,2,25,['Q_Kalltveit_u_6188,0.0


  0%|          | 0/87 [00:00<?, ?it/s]
  1%|          | 1/87 [00:00<00:23,  3.61it/s]
 11%|█▏        | 10/87 [00:00<00:02, 32.65it/s]
 21%|██        | 18/87 [00:00<00:01, 47.00it/s]
 30%|██▉       | 26/87 [00:00<00:01, 57.09it/s]
 41%|████▏     | 36/87 [00:00<00:00, 68.43it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 76.80it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 83.60it/s]
 76%|███████▌  | 66/87 [00:01<00:00, 87.97it/s]
 87%|████████▋ | 76/87 [00:01<00:00, 90.96it/s]


Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,train_loss,training_iteration,trial_id,warmup_time
train_model_00d49_00000,2023-03-25_00-03-05,False,,d58bc3666fdd48679064d0b5303ffdd3,DESKTOP-D4IVECG,11,127.0.0.1,10704,9.14198,0.967369,9.14198,1679698985,0,,3.30604,11,00d49_00000,0.0147445


[2m[36m(func pid=10704)[0m Epoch 00025: reducing learning rate of group 0 to 4.8581e-03.


 11%|█▏        | 10/87 [00:00<00:00, 95.74it/s]
 23%|██▎       | 20/87 [00:00<00:00, 95.57it/s]
 34%|███▍      | 30/87 [00:00<00:00, 95.96it/s]
 46%|████▌     | 40/87 [00:00<00:00, 93.27it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 95.18it/s]
 70%|███████   | 61/87 [00:00<00:00, 97.50it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 216.93it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 96.27it/s]
 24%|██▍       | 21/87 [00:00<00:00, 98.75it/s]
 36%|███▌      | 31/87 [00:00<00:00, 96.58it/s]
 47%|████▋     | 41/87 [00:00<00:00, 97.64it/s]
 60%|█████▉    | 52/87 [00:00<00:00, 97.45it/s]
 71%|███████▏  | 62/87 [00:00<00:00, 98.23it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 98.73it/s]
100%|██████████| 87/87 [00:00<00:00, 98.64it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 219.00it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 97.75it/s]
 24%|██▍       | 21/87 [00

[2m[36m(func pid=10704)[0m Epoch 00032: reducing learning rate of group 0 to 2.4290e-03.


 11%|█▏        | 10/87 [00:00<00:00, 98.11it/s]
 23%|██▎       | 20/87 [00:00<00:00, 93.15it/s]
 34%|███▍      | 30/87 [00:00<00:00, 93.63it/s]
 46%|████▌     | 40/87 [00:00<00:00, 92.94it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 93.09it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 93.96it/s]
 80%|████████  | 70/87 [00:00<00:00, 93.41it/s]
100%|██████████| 87/87 [00:00<00:00, 91.38it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 197.56it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 86.34it/s]
 22%|██▏       | 19/87 [00:00<00:00, 91.80it/s]
 33%|███▎      | 29/87 [00:00<00:00, 92.22it/s]
 45%|████▍     | 39/87 [00:00<00:00, 92.68it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 94.53it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 93.29it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 93.78it/s]
100%|██████████| 87/87 [00:00<00:00, 93.34it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 127.31it/s]
  0%|          | 0

[2m[36m(func pid=10704)[0m Epoch 00044: reducing learning rate of group 0 to 1.2145e-03.


 25%|██▌       | 22/87 [00:00<00:00, 100.17it/s]
 38%|███▊      | 33/87 [00:00<00:00, 99.68it/s] 
 49%|████▉     | 43/87 [00:00<00:00, 99.05it/s]
 61%|██████    | 53/87 [00:00<00:00, 98.57it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 97.49it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 97.16it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 226.56it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 94.46it/s]
 23%|██▎       | 20/87 [00:00<00:00, 95.52it/s]
 34%|███▍      | 30/87 [00:00<00:00, 95.19it/s]
 47%|████▋     | 41/87 [00:00<00:00, 97.41it/s]
 59%|█████▊    | 51/87 [00:00<00:00, 97.44it/s]
 70%|███████   | 61/87 [00:00<00:00, 96.97it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 94.57it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 93.44it/s]
100%|██████████| 87/87 [00:00<00:00, 95.27it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 112.81it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [

[2m[36m(func pid=10704)[0m Epoch 00051: reducing learning rate of group 0 to 6.0726e-04.


 24%|██▍       | 21/87 [00:00<00:00, 98.63it/s]
 36%|███▌      | 31/87 [00:00<00:00, 98.31it/s]
 48%|████▊     | 42/87 [00:00<00:00, 101.44it/s]
 61%|██████    | 53/87 [00:00<00:00, 96.32it/s] 
 72%|███████▏  | 63/87 [00:00<00:00, 95.01it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 93.18it/s]
100%|██████████| 87/87 [00:00<00:00, 95.86it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 76%|███████▌  | 19/25 [00:00<00:00, 117.81it/s]
100%|██████████| 25/25 [00:00<00:00, 134.13it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 98.94it/s]
 25%|██▌       | 22/87 [00:00<00:00, 102.79it/s]
 38%|███▊      | 33/87 [00:00<00:00, 101.99it/s]
 51%|█████     | 44/87 [00:00<00:00, 101.27it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 101.86it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 101.43it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 100.32it/s]
100%|██████████| 87/87 [00:00<00:00, 101.22it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 227.42it/s]
  0%|  

[2m[36m(func pid=10704)[0m Epoch 00062: reducing learning rate of group 0 to 3.0363e-04.


 11%|█▏        | 10/87 [00:00<00:00, 90.66it/s]
 23%|██▎       | 20/87 [00:00<00:00, 94.19it/s]
 34%|███▍      | 30/87 [00:00<00:00, 95.10it/s]
 46%|████▌     | 40/87 [00:00<00:00, 96.95it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 98.00it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 96.57it/s]
 80%|████████  | 70/87 [00:00<00:00, 97.60it/s]
100%|██████████| 87/87 [00:00<00:00, 97.70it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 227.90it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 93.38it/s]
 23%|██▎       | 20/87 [00:00<00:00, 96.68it/s]
 34%|███▍      | 30/87 [00:00<00:00, 97.70it/s]
 46%|████▌     | 40/87 [00:00<00:00, 96.99it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 97.93it/s]
 70%|███████   | 61/87 [00:00<00:00, 99.16it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 98.26it/s]
100%|██████████| 87/87 [00:00<00:00, 98.65it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 60%|██████    | 15/25 [00:00<00:00, 97.94it/s]
100%|██████████| 2

[2m[36m(func pid=10704)[0m Epoch 00069: reducing learning rate of group 0 to 1.5182e-04.


 23%|██▎       | 20/87 [00:00<00:00, 95.91it/s]
 36%|███▌      | 31/87 [00:00<00:00, 99.04it/s]
 47%|████▋     | 41/87 [00:00<00:00, 97.62it/s]
 59%|█████▊    | 51/87 [00:00<00:00, 97.56it/s]
 71%|███████▏  | 62/87 [00:00<00:00, 99.26it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 100.78it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 124.89it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 83.85it/s]
 21%|██        | 18/87 [00:00<00:00, 87.19it/s]
 31%|███       | 27/87 [00:00<00:00, 87.35it/s]
 43%|████▎     | 37/87 [00:00<00:00, 90.36it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 91.19it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 89.39it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 90.58it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 90.80it/s]
100%|██████████| 87/87 [00:00<00:00, 88.43it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 203.21it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00

[2m[36m(func pid=10704)[0m Epoch 00084: reducing learning rate of group 0 to 3.7954e-05.


 11%|█▏        | 10/87 [00:00<00:00, 93.41it/s]
 23%|██▎       | 20/87 [00:00<00:00, 92.44it/s]
 34%|███▍      | 30/87 [00:00<00:00, 92.53it/s]
 46%|████▌     | 40/87 [00:00<00:00, 89.60it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 92.18it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 91.75it/s]
 80%|████████  | 70/87 [00:00<00:00, 92.69it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 90.99it/s]
100%|██████████| 87/87 [00:00<00:00, 91.28it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 127.09it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 93.50it/s]
 23%|██▎       | 20/87 [00:00<00:00, 95.65it/s]
 36%|███▌      | 31/87 [00:00<00:00, 99.39it/s]
 47%|████▋     | 41/87 [00:00<00:00, 98.53it/s]
 59%|█████▊    | 51/87 [00:00<00:00, 97.28it/s]
 70%|███████   | 61/87 [00:00<00:00, 97.80it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 99.17it/s]
100%|██████████| 87/87 [00:00<00:00, 98.63it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 2

[2m[36m(func pid=10704)[0m Epoch 00091: reducing learning rate of group 0 to 1.8977e-05.


 13%|█▎        | 11/87 [00:00<00:00, 102.26it/s]
 25%|██▌       | 22/87 [00:00<00:00, 100.66it/s]
 38%|███▊      | 33/87 [00:00<00:00, 96.72it/s] 
 49%|████▉     | 43/87 [00:00<00:00, 97.91it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 97.68it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 98.31it/s]
 95%|█████████▌| 83/87 [00:00<00:00, 97.16it/s]
100%|██████████| 87/87 [00:00<00:00, 97.75it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 213.60it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 102.51it/s]
 25%|██▌       | 22/87 [00:00<00:00, 98.99it/s] 
 37%|███▋      | 32/87 [00:00<00:00, 98.92it/s]
 48%|████▊     | 42/87 [00:00<00:00, 98.38it/s]
 60%|█████▉    | 52/87 [00:00<00:00, 96.47it/s]
 71%|███████▏  | 62/87 [00:00<00:00, 95.44it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 95.95it/s]
100%|██████████| 87/87 [00:00<00:00, 96.21it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 217.28it/s]
  0%|       

[2m[36m(func pid=10704)[0m Epoch 00097: reducing learning rate of group 0 to 9.4885e-06.


 10%|█         | 9/87 [00:00<00:00, 88.70it/s]
 21%|██        | 18/87 [00:00<00:00, 88.87it/s]
 32%|███▏      | 28/87 [00:00<00:00, 89.49it/s]
 44%|████▎     | 38/87 [00:00<00:00, 90.42it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 90.86it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 94.27it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 93.82it/s]
100%|██████████| 87/87 [00:00<00:00, 91.40it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 212.51it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 84.25it/s]
 22%|██▏       | 19/87 [00:00<00:00, 89.05it/s]
 33%|███▎      | 29/87 [00:00<00:00, 93.12it/s]
 45%|████▍     | 39/87 [00:00<00:00, 92.66it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 92.19it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 93.46it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 92.65it/s]
100%|██████████| 87/87 [00:00<00:00, 92.20it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 210.05it/s]
  0%|          | 0/

[2m[36m(func pid=10704)[0m Epoch 00104: reducing learning rate of group 0 to 4.7442e-06.


 11%|█▏        | 10/87 [00:00<00:00, 94.25it/s]
 23%|██▎       | 20/87 [00:00<00:00, 93.24it/s]
 34%|███▍      | 30/87 [00:00<00:00, 94.98it/s]
 46%|████▌     | 40/87 [00:00<00:00, 96.09it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 95.49it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 96.21it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 213.70it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 89.42it/s]
 23%|██▎       | 20/87 [00:00<00:00, 91.75it/s]
 34%|███▍      | 30/87 [00:00<00:00, 91.37it/s]
 46%|████▌     | 40/87 [00:00<00:00, 92.78it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 94.61it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 95.83it/s]
 80%|████████  | 70/87 [00:00<00:00, 96.25it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 97.02it/s]
100%|██████████| 87/87 [00:00<00:00, 95.61it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 128.74it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00

[2m[36m(func pid=10704)[0m Epoch 00125: reducing learning rate of group 0 to 2.3721e-06.


  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 93.77it/s]
 23%|██▎       | 20/87 [00:00<00:00, 93.54it/s]
 34%|███▍      | 30/87 [00:00<00:00, 93.57it/s]
 46%|████▌     | 40/87 [00:00<00:00, 93.41it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 94.77it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 94.64it/s]
 80%|████████  | 70/87 [00:00<00:00, 95.93it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 211.71it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 89.16it/s]
 23%|██▎       | 20/87 [00:00<00:00, 92.49it/s]
 36%|███▌      | 31/87 [00:00<00:00, 96.44it/s]
 47%|████▋     | 41/87 [00:00<00:00, 92.78it/s]
 59%|█████▊    | 51/87 [00:00<00:00, 91.88it/s]
 70%|███████   | 61/87 [00:00<00:00, 91.55it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 92.74it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 88.32it/s]
100%|██████████| 87/87 [00:01<00:00, 86.88it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:

[2m[36m(func pid=10704)[0m Epoch 00131: reducing learning rate of group 0 to 1.1861e-06.


 11%|█▏        | 10/87 [00:00<00:00, 92.69it/s]
 23%|██▎       | 20/87 [00:00<00:00, 94.11it/s]
 34%|███▍      | 30/87 [00:00<00:00, 95.16it/s]
 46%|████▌     | 40/87 [00:00<00:00, 96.56it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 96.76it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 96.22it/s]
 80%|████████  | 70/87 [00:00<00:00, 93.00it/s]
100%|██████████| 87/87 [00:00<00:00, 94.79it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 218.75it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 90.40it/s]
 24%|██▍       | 21/87 [00:00<00:00, 94.61it/s]
 36%|███▌      | 31/87 [00:00<00:00, 93.71it/s]
 47%|████▋     | 41/87 [00:00<00:00, 95.99it/s]
 59%|█████▊    | 51/87 [00:00<00:00, 94.47it/s]
 70%|███████   | 61/87 [00:00<00:00, 93.50it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 92.81it/s]
100%|██████████| 87/87 [00:00<00:00, 92.08it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 121.98it/s]
  0%|          | 

[2m[36m(func pid=10704)[0m Epoch 00137: reducing learning rate of group 0 to 5.9303e-07.


  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 93.39it/s]
 23%|██▎       | 20/87 [00:00<00:00, 92.41it/s]
 34%|███▍      | 30/87 [00:00<00:00, 90.68it/s]
 46%|████▌     | 40/87 [00:00<00:00, 92.10it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 86.14it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 88.10it/s]
 80%|████████  | 70/87 [00:00<00:00, 90.26it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 91.28it/s]
100%|██████████| 87/87 [00:00<00:00, 90.64it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 222.29it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 95.95it/s]
 23%|██▎       | 20/87 [00:00<00:00, 93.89it/s]
 34%|███▍      | 30/87 [00:00<00:00, 95.47it/s]
 46%|████▌     | 40/87 [00:00<00:00, 93.96it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 94.53it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 93.93it/s]
 80%|████████  | 70/87 [00:00<00:00, 94.17it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 94.51it/s]
100%|██████████| 8

[2m[36m(func pid=1772)[0m Epoch 00088: reducing learning rate of group 0 to 1.5397e-04.


  8%|▊         | 14/173 [00:00<00:01, 132.58it/s]
 16%|█▌        | 28/173 [00:00<00:01, 134.57it/s]
 24%|██▍       | 42/173 [00:00<00:00, 134.50it/s]
 32%|███▏      | 56/173 [00:00<00:00, 126.85it/s]
 40%|████      | 70/173 [00:00<00:00, 129.53it/s]
 49%|████▊     | 84/173 [00:00<00:00, 130.01it/s]
 57%|█████▋    | 98/173 [00:00<00:00, 129.53it/s]
 64%|██████▍   | 111/173 [00:00<00:00, 127.29it/s]
 72%|███████▏  | 125/173 [00:00<00:00, 130.70it/s]
 81%|████████  | 140/173 [00:01<00:00, 134.05it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 135.45it/s]
100%|██████████| 173/173 [00:01<00:00, 132.68it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 384.47it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 128.58it/s]
 16%|█▌        | 27/173 [00:00<00:01, 135.24it/s]
 24%|██▎       | 41/173 [00:00<00:01, 125.69it/s]
 31%|███       | 54/173 [00:00<00:00, 120.46it/s]
 39%|███▊      | 67/173 [00:00<00:00, 121.58it/s]
 46%|████▌     | 

[2m[36m(func pid=1772)[0m Epoch 00103: reducing learning rate of group 0 to 7.6983e-05.


  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 132.95it/s]
 16%|█▌        | 28/173 [00:00<00:01, 136.64it/s]
 24%|██▍       | 42/173 [00:00<00:01, 114.38it/s]
 32%|███▏      | 56/173 [00:00<00:00, 122.88it/s]
 40%|████      | 70/173 [00:00<00:00, 126.00it/s]
 49%|████▊     | 84/173 [00:00<00:00, 127.77it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 125.63it/s]
 64%|██████▎   | 110/173 [00:00<00:00, 122.79it/s]
 71%|███████   | 123/173 [00:00<00:00, 122.13it/s]
 79%|███████▊  | 136/173 [00:01<00:00, 122.65it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 126.97it/s]
 76%|███████▌  | 38/50 [00:00<00:00, 371.81it/s]
100%|██████████| 50/50 [00:00<00:00, 374.49it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 138.01it/s]
 16%|█▌        | 28/173 [00:00<00:01, 130.05it/s]
 24%|██▍       | 42/173 [00:00<00:01, 126.28it/s]
 32%|███▏      | 55/173 [00:00<00:01, 117.11it/s]
 39%|███▉      | 68/173 [00:00<00:00, 118.50it/s]
 46%|████▌     | 8

[2m[36m(func pid=1772)[0m Epoch 00115: reducing learning rate of group 0 to 3.8491e-05.


  8%|▊         | 13/173 [00:00<00:01, 128.73it/s]
 16%|█▌        | 27/173 [00:00<00:01, 132.79it/s]
 24%|██▎       | 41/173 [00:00<00:00, 135.66it/s]
 32%|███▏      | 55/173 [00:00<00:00, 133.73it/s]
 40%|███▉      | 69/173 [00:00<00:01, 103.18it/s]
 48%|████▊     | 83/173 [00:00<00:00, 112.43it/s]
 57%|█████▋    | 98/173 [00:00<00:00, 121.33it/s]
 65%|██████▌   | 113/173 [00:00<00:00, 128.02it/s]
 74%|███████▍  | 128/173 [00:01<00:00, 132.44it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 133.35it/s]
 90%|█████████ | 156/173 [00:01<00:00, 133.49it/s]
100%|██████████| 173/173 [00:01<00:00, 127.96it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 390.47it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 133.06it/s]
 16%|█▌        | 28/173 [00:00<00:01, 135.72it/s]
 24%|██▍       | 42/173 [00:00<00:00, 135.01it/s]
 32%|███▏      | 56/173 [00:00<00:00, 136.34it/s]
 40%|████      | 70/173 [00:00<00:00, 134.81it/s]
 49%|████▊     | 

[2m[36m(func pid=1772)[0m Epoch 00121: reducing learning rate of group 0 to 1.9246e-05.


 13%|█▎        | 22/173 [00:00<00:01, 105.25it/s]
 20%|█▉        | 34/173 [00:00<00:01, 107.68it/s]
 26%|██▌       | 45/173 [00:00<00:01, 106.47it/s]
 32%|███▏      | 56/173 [00:00<00:01, 102.72it/s]
 39%|███▊      | 67/173 [00:00<00:01, 103.87it/s]
 45%|████▌     | 78/173 [00:00<00:00, 105.31it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 107.16it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 106.93it/s]
 66%|██████▌   | 114/173 [00:01<00:00, 111.99it/s]
 73%|███████▎  | 126/173 [00:01<00:00, 113.28it/s]
 80%|████████  | 139/173 [00:01<00:00, 115.23it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 112.80it/s]
 94%|█████████▍| 163/173 [00:01<00:00, 114.05it/s]
100%|██████████| 173/173 [00:01<00:00, 109.99it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 70%|███████   | 35/50 [00:00<00:00, 340.64it/s]
100%|██████████| 50/50 [00:00<00:00, 346.24it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 120.63it/s]
 15%|█▌        | 26/173 [00:00<00:01, 115.88it/s]
 22%|██▏       |

[2m[36m(func pid=1772)[0m Epoch 00145: reducing learning rate of group 0 to 9.6229e-06.


  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 122.22it/s]
 16%|█▌        | 27/173 [00:00<00:01, 130.99it/s]
 24%|██▎       | 41/173 [00:00<00:01, 126.08it/s]
 32%|███▏      | 56/173 [00:00<00:00, 132.41it/s]
 40%|████      | 70/173 [00:00<00:00, 128.94it/s]
 49%|████▊     | 84/173 [00:00<00:00, 130.04it/s]
 57%|█████▋    | 98/173 [00:00<00:00, 128.59it/s]
 64%|██████▍   | 111/173 [00:00<00:00, 127.97it/s]
 72%|███████▏  | 125/173 [00:00<00:00, 130.38it/s]
 80%|████████  | 139/173 [00:01<00:00, 127.89it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 129.20it/s]
 97%|█████████▋| 167/173 [00:01<00:00, 130.20it/s]
100%|██████████| 50/50 [00:00<00:00, 371.59it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 92.94it/s]
 13%|█▎        | 23/173 [00:00<00:01, 106.96it/s]
 21%|██        | 36/173 [00:00<00:01, 114.98it/s]
 28%|██▊       | 48/173 [00:00<00:01, 115.98it/s]
 35%|███▍      | 60/173 [00:00<00:00, 114.58it/s]
 43%|████▎     | 

[2m[36m(func pid=19772)[0m Epoch 00089: reducing learning rate of group 0 to 2.6653e-04.


 21%|██        | 18/87 [00:00<00:00, 84.92it/s]
 31%|███       | 27/87 [00:00<00:00, 85.29it/s]
 41%|████▏     | 36/87 [00:00<00:00, 81.79it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 81.14it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 82.26it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 82.33it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 82.94it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 84.20it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 195.18it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 86.39it/s]
 21%|██        | 18/87 [00:00<00:00, 87.47it/s]
 31%|███       | 27/87 [00:00<00:00, 87.88it/s]
 41%|████▏     | 36/87 [00:00<00:00, 85.91it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 65.96it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 70.77it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 75.27it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 77.41it/s]
100%|██████████| 87/87 [00:01<00:00, 78.56it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25

[2m[36m(func pid=19772)[0m Epoch 00096: reducing learning rate of group 0 to 1.3326e-04.


 10%|█         | 9/87 [00:00<00:00, 87.41it/s]
 21%|██        | 18/87 [00:00<00:00, 86.94it/s]
 31%|███       | 27/87 [00:00<00:00, 86.70it/s]
 41%|████▏     | 36/87 [00:00<00:00, 86.30it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 85.25it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 85.06it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 85.09it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 85.25it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 84%|████████▍ | 21/25 [00:00<00:00, 200.54it/s]
100%|██████████| 25/25 [00:00<00:00, 122.69it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 87.44it/s]
 21%|██        | 18/87 [00:00<00:00, 88.42it/s]
 31%|███       | 27/87 [00:00<00:00, 86.02it/s]
 41%|████▏     | 36/87 [00:00<00:00, 87.20it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 87.77it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 87.97it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 87.00it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 87.14it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25

[2m[36m(func pid=19772)[0m Epoch 00112: reducing learning rate of group 0 to 6.6631e-05.


 10%|█         | 9/87 [00:00<00:00, 86.42it/s]
 21%|██        | 18/87 [00:00<00:00, 86.93it/s]
 31%|███       | 27/87 [00:00<00:00, 87.88it/s]
 41%|████▏     | 36/87 [00:00<00:00, 81.98it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 83.32it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 84.19it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 85.47it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 85.87it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 185.60it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:00, 79.97it/s]
 20%|█▉        | 17/87 [00:00<00:00, 81.13it/s]
 30%|██▉       | 26/87 [00:00<00:00, 81.84it/s]
 40%|████      | 35/87 [00:00<00:00, 83.63it/s]
 51%|█████     | 44/87 [00:00<00:00, 83.53it/s]
 61%|██████    | 53/87 [00:00<00:00, 82.92it/s]
 71%|███████▏  | 62/87 [00:00<00:00, 84.34it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 83.68it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 183.41it/s]
  0%|          | 0/

[2m[36m(func pid=19772)[0m Epoch 00123: reducing learning rate of group 0 to 3.3316e-05.


 10%|█         | 9/87 [00:00<00:00, 89.13it/s]
 22%|██▏       | 19/87 [00:00<00:00, 90.19it/s]
 33%|███▎      | 29/87 [00:00<00:00, 86.17it/s]
 44%|████▎     | 38/87 [00:00<00:00, 84.92it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 84.44it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 83.64it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 83.77it/s]
 85%|████████▌ | 74/87 [00:00<00:00, 84.49it/s]
100%|██████████| 87/87 [00:01<00:00, 85.81it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 80%|████████  | 20/25 [00:00<00:00, 194.30it/s]
100%|██████████| 25/25 [00:00<00:00, 193.96it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 85.75it/s]
 21%|██        | 18/87 [00:00<00:01, 60.41it/s]
 31%|███       | 27/87 [00:00<00:00, 69.66it/s]
 41%|████▏     | 36/87 [00:00<00:00, 76.42it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 81.12it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 83.56it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 84.80it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25

[2m[36m(func pid=19772)[0m Epoch 00129: reducing learning rate of group 0 to 1.6658e-05.


  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 82.98it/s]
 21%|██        | 18/87 [00:00<00:00, 83.91it/s]
 31%|███       | 27/87 [00:00<00:00, 82.88it/s]
 41%|████▏     | 36/87 [00:00<00:00, 83.04it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 81.27it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 82.93it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 83.64it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 196.28it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 82.89it/s]
 21%|██        | 18/87 [00:00<00:00, 85.18it/s]
 31%|███       | 27/87 [00:00<00:00, 85.10it/s]
 41%|████▏     | 36/87 [00:00<00:00, 86.17it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 86.74it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 85.83it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 85.26it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 85.33it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 86.56it/s]
100%|██████████| 87/87 [00:01<00:00, 86.07it/s]
  0%|          | 0/2

[2m[36m(func pid=19772)[0m Epoch 00138: reducing learning rate of group 0 to 8.3289e-06.


 21%|██        | 18/87 [00:00<00:00, 88.33it/s]
 31%|███       | 27/87 [00:00<00:00, 87.82it/s]
 41%|████▏     | 36/87 [00:00<00:00, 85.90it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 85.67it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 85.88it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 83.69it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 81.97it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 82.82it/s]
100%|██████████| 87/87 [00:01<00:00, 84.56it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 193.47it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 78.33it/s]
 20%|█▉        | 17/87 [00:00<00:00, 81.42it/s]
 30%|██▉       | 26/87 [00:00<00:00, 79.94it/s]
 40%|████      | 35/87 [00:00<00:00, 83.02it/s]
 51%|█████     | 44/87 [00:00<00:00, 85.15it/s]
 61%|██████    | 53/87 [00:00<00:00, 86.48it/s]
 71%|███████▏  | 62/87 [00:00<00:00, 85.39it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 85.26it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 85.68it/s]
100%|█████

  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:03, 26.17it/s]
 18%|█▊        | 16/87 [00:00<00:01, 43.61it/s]
 26%|██▋       | 23/87 [00:00<00:01, 51.33it/s]
 36%|███▌      | 31/87 [00:00<00:00, 60.05it/s]
 46%|████▌     | 40/87 [00:00<00:00, 68.57it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 73.29it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 78.12it/s]
 77%|███████▋  | 67/87 [00:01<00:00, 79.98it/s]
100%|██████████| 87/87 [00:01<00:00, 65.39it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 188.70it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  3%|▎         | 3/87 [00:00<00:03, 24.16it/s]
 14%|█▍        | 12/87 [00:00<00:01, 58.64it/s]
 24%|██▍       | 21/87 [00:00<00:00, 70.18it/s]
 34%|███▍      | 30/87 [00:00<00:00, 77.05it/s]
 45%|████▍     | 39/87 [00:00<00:00, 79.28it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 79.89it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 81.76it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 82.49it/s]
 86%|████████▌ | 75/

[2m[36m(func pid=20408)[0m Epoch 00055: reducing learning rate of group 0 to 8.4586e-04.


 21%|██        | 18/87 [00:00<00:00, 87.75it/s]
 32%|███▏      | 28/87 [00:00<00:00, 88.91it/s]
 43%|████▎     | 37/87 [00:00<00:00, 86.97it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 86.27it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 87.88it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 88.51it/s]
 85%|████████▌ | 74/87 [00:00<00:00, 88.45it/s]
100%|██████████| 87/87 [00:00<00:00, 87.18it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 199.35it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 83.90it/s]
 21%|██        | 18/87 [00:00<00:00, 83.58it/s]
 31%|███       | 27/87 [00:00<00:00, 84.63it/s]
 41%|████▏     | 36/87 [00:00<00:00, 85.55it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 86.15it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 85.63it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 86.44it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 85.62it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 181.90it/s]
  0%|          | 0

[2m[36m(func pid=20408)[0m Epoch 00076: reducing learning rate of group 0 to 4.2293e-04.


 18%|█▊        | 16/87 [00:00<00:00, 76.70it/s]
 29%|██▊       | 25/87 [00:00<00:00, 79.81it/s]
 39%|███▉      | 34/87 [00:00<00:00, 80.21it/s]
 49%|████▉     | 43/87 [00:00<00:00, 79.79it/s]
 59%|█████▊    | 51/87 [00:00<00:00, 77.90it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 77.35it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 77.35it/s]
 86%|████████▌ | 75/87 [00:00<00:00, 77.59it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 172.18it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  2%|▏         | 2/87 [00:00<00:04, 18.08it/s]
 13%|█▎        | 11/87 [00:00<00:01, 55.67it/s]
 23%|██▎       | 20/87 [00:00<00:00, 70.16it/s]
 33%|███▎      | 29/87 [00:00<00:00, 74.76it/s]
 45%|████▍     | 39/87 [00:00<00:00, 81.27it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 83.44it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 84.65it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 85.08it/s]
 86%|████████▌ | 75/87 [00:00<00:00, 83.92it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 64%|██████▍   | 16

[2m[36m(func pid=20408)[0m Epoch 00086: reducing learning rate of group 0 to 2.1146e-04.


 22%|██▏       | 19/87 [00:00<00:00, 90.08it/s]
 33%|███▎      | 29/87 [00:00<00:00, 88.04it/s]
 44%|████▎     | 38/87 [00:00<00:00, 88.40it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 87.72it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 87.93it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 82.71it/s]
 85%|████████▌ | 74/87 [00:00<00:00, 79.04it/s]
 95%|█████████▌| 83/87 [00:00<00:00, 79.47it/s]
100%|██████████| 87/87 [00:01<00:00, 83.53it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 32%|███▏      | 8/25 [00:00<00:00, 64.03it/s]
100%|██████████| 25/25 [00:00<00:00, 116.23it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 84.86it/s]
 21%|██        | 18/87 [00:00<00:00, 83.85it/s]
 31%|███       | 27/87 [00:00<00:00, 82.98it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 85.11it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 85.39it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 85.70it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 83.96it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 81.09it/s]
100%|██████

[2m[36m(func pid=20408)[0m Epoch 00103: reducing learning rate of group 0 to 1.0573e-04.


 20%|█▉        | 17/87 [00:00<00:00, 79.64it/s]
 29%|██▊       | 25/87 [00:00<00:01, 52.02it/s]
 40%|████      | 35/87 [00:00<00:00, 63.82it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 71.95it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 75.31it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 77.94it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 80.47it/s]
 93%|█████████▎| 81/87 [00:01<00:00, 82.31it/s]
100%|██████████| 87/87 [00:01<00:00, 75.58it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 177.33it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 81.06it/s]
 21%|██        | 18/87 [00:00<00:00, 83.35it/s]
 31%|███       | 27/87 [00:00<00:00, 83.63it/s]
 41%|████▏     | 36/87 [00:00<00:00, 86.00it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 85.08it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 86.40it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 86.72it/s]
100%|██████████| 87/87 [00:01<00:00, 85.60it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25

[2m[36m(func pid=20408)[0m Epoch 00118: reducing learning rate of group 0 to 5.2866e-05.


 10%|█         | 9/87 [00:00<00:00, 87.47it/s]
 21%|██        | 18/87 [00:00<00:00, 88.02it/s]
 31%|███       | 27/87 [00:00<00:00, 87.47it/s]
 41%|████▏     | 36/87 [00:00<00:00, 86.76it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 84.19it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 81.45it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 81.62it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 81.26it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 83.02it/s]
100%|██████████| 87/87 [00:01<00:00, 83.83it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 206.52it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 83.64it/s]
 21%|██        | 18/87 [00:00<00:00, 84.36it/s]
 31%|███       | 27/87 [00:00<00:00, 86.83it/s]
 41%|████▏     | 36/87 [00:00<00:00, 85.45it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 87.56it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 87.69it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 87.53it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 87.51it/s]
100%|██████

[2m[36m(func pid=20408)[0m Epoch 00124: reducing learning rate of group 0 to 2.6433e-05.


  9%|▉         | 8/87 [00:00<00:00, 79.17it/s]
 20%|█▉        | 17/87 [00:00<00:00, 81.24it/s]
 30%|██▉       | 26/87 [00:00<00:00, 82.20it/s]
 40%|████      | 35/87 [00:00<00:00, 83.51it/s]
 51%|█████     | 44/87 [00:00<00:00, 84.84it/s]
 61%|██████    | 53/87 [00:00<00:00, 86.35it/s]
 71%|███████▏  | 62/87 [00:00<00:00, 86.27it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 87.06it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 87.12it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 182.05it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 87.49it/s]
 21%|██        | 18/87 [00:00<00:00, 86.40it/s]
 31%|███       | 27/87 [00:00<00:00, 86.86it/s]
 41%|████▏     | 36/87 [00:00<00:00, 86.70it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 86.30it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 86.75it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 86.82it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 87.15it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/

[2m[36m(func pid=20408)[0m Epoch 00130: reducing learning rate of group 0 to 1.3217e-05.


 10%|█         | 9/87 [00:00<00:00, 79.00it/s]
 21%|██        | 18/87 [00:00<00:00, 81.68it/s]
 31%|███       | 27/87 [00:00<00:00, 81.50it/s]
 41%|████▏     | 36/87 [00:00<00:00, 80.56it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 81.12it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 80.50it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 79.72it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 80.24it/s]
 93%|█████████▎| 81/87 [00:01<00:00, 79.71it/s]
100%|██████████| 87/87 [00:01<00:00, 80.33it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 186.89it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 75.15it/s]
 20%|█▉        | 17/87 [00:00<00:00, 80.34it/s]
 31%|███       | 27/87 [00:00<00:00, 85.32it/s]
 41%|████▏     | 36/87 [00:00<00:00, 85.12it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 86.78it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 86.74it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 85.85it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 85.83it/s]
100%|██████

[2m[36m(func pid=20408)[0m Epoch 00136: reducing learning rate of group 0 to 6.6083e-06.


 11%|█▏        | 10/87 [00:00<00:00, 90.25it/s]
 23%|██▎       | 20/87 [00:00<00:00, 90.15it/s]
 34%|███▍      | 30/87 [00:00<00:00, 89.35it/s]
 45%|████▍     | 39/87 [00:00<00:00, 89.18it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 90.82it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 91.37it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 91.00it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 177.38it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 85.18it/s]
 21%|██        | 18/87 [00:00<00:00, 87.19it/s]
 31%|███       | 27/87 [00:00<00:00, 87.70it/s]
 41%|████▏     | 36/87 [00:00<00:00, 88.27it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 87.30it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 84.01it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 84.35it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 83.89it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 196.94it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:0

[2m[36m(func pid=20408)[0m Epoch 00142: reducing learning rate of group 0 to 3.3041e-06.


  9%|▉         | 8/87 [00:00<00:00, 79.23it/s]
 20%|█▉        | 17/87 [00:00<00:00, 82.99it/s]
 31%|███       | 27/87 [00:00<00:00, 86.29it/s]
 41%|████▏     | 36/87 [00:00<00:00, 84.71it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 85.50it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 86.78it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 87.79it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 88.38it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 88.65it/s]
100%|██████████| 87/87 [00:00<00:00, 87.15it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 125.62it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 85.48it/s]
 21%|██        | 18/87 [00:00<00:00, 86.21it/s]
 31%|███       | 27/87 [00:00<00:00, 83.95it/s]
 41%|████▏     | 36/87 [00:00<00:00, 83.85it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 85.44it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 84.72it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 86.70it/s]
 95%|█████████▌| 83/87 [00:00<00:00, 88.27it/s]
100%|██████

[2m[36m(func pid=20408)[0m Epoch 00148: reducing learning rate of group 0 to 1.6521e-06.


 10%|█         | 9/87 [00:00<00:00, 87.35it/s]
 21%|██        | 18/87 [00:00<00:00, 88.49it/s]
 32%|███▏      | 28/87 [00:00<00:00, 90.25it/s]
 44%|████▎     | 38/87 [00:00<00:00, 87.19it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 88.06it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 85.95it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 86.26it/s]
 86%|████████▌ | 75/87 [00:00<00:00, 87.79it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 196.93it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 85.30it/s]
 21%|██        | 18/87 [00:00<00:00, 83.52it/s]
 31%|███       | 27/87 [00:00<00:00, 82.40it/s]
 41%|████▏     | 36/87 [00:00<00:00, 80.71it/s]
 61%|██████    | 53/87 [00:00<00:00, 68.17it/s]
 70%|███████   | 61/87 [00:00<00:00, 71.34it/s]
 80%|████████  | 70/87 [00:00<00:00, 75.05it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 79.89it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 192.74it/s]
  0%|          | 0/

[2m[36m(func pid=21096)[0m Epoch 00030: reducing learning rate of group 0 to 9.3808e-04.


  8%|▊         | 14/173 [00:00<00:01, 138.91it/s]
 16%|█▌        | 28/173 [00:00<00:01, 139.24it/s]
 25%|██▍       | 43/173 [00:00<00:00, 139.63it/s]
 34%|███▎      | 58/173 [00:00<00:00, 140.79it/s]
 42%|████▏     | 73/173 [00:00<00:00, 142.71it/s]
 51%|█████     | 88/173 [00:00<00:00, 144.68it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 145.55it/s]
 68%|██████▊   | 118/173 [00:00<00:00, 146.04it/s]
 77%|███████▋  | 133/173 [00:00<00:00, 146.74it/s]
100%|██████████| 173/173 [00:01<00:00, 144.89it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 76%|███████▌  | 38/50 [00:00<00:00, 379.83it/s]
100%|██████████| 50/50 [00:00<00:00, 302.44it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▊         | 15/173 [00:00<00:01, 144.02it/s]
 17%|█▋        | 30/173 [00:00<00:00, 144.28it/s]
 26%|██▌       | 45/173 [00:00<00:00, 143.48it/s]
 35%|███▍      | 60/173 [00:00<00:00, 145.55it/s]
 43%|████▎     | 75/173 [00:00<00:00, 145.41it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 145.12it/s]
 61%|██████    | 10

[2m[36m(func pid=21096)[0m Epoch 00046: reducing learning rate of group 0 to 4.6904e-04.


  9%|▊         | 15/173 [00:00<00:01, 145.63it/s]
 17%|█▋        | 30/173 [00:00<00:00, 145.53it/s]
 26%|██▌       | 45/173 [00:00<00:00, 145.64it/s]
 35%|███▍      | 60/173 [00:00<00:00, 145.10it/s]
 44%|████▍     | 76/173 [00:00<00:00, 146.70it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 145.91it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 146.28it/s]
 70%|██████▉   | 121/173 [00:00<00:00, 146.01it/s]
 79%|███████▊  | 136/173 [00:00<00:00, 145.55it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 143.77it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 370.90it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 137.12it/s]
 16%|█▌        | 28/173 [00:00<00:01, 138.16it/s]
 24%|██▍       | 42/173 [00:00<00:00, 137.20it/s]
 33%|███▎      | 57/173 [00:00<00:00, 139.51it/s]
 42%|████▏     | 72/173 [00:00<00:00, 140.64it/s]
 50%|█████     | 87/173 [00:00<00:00, 142.38it/s]
 59%|█████▉    | 102/173 [00:00<00:00, 111.09it/s]
 67%|██████▋   | 

[2m[36m(func pid=21096)[0m Epoch 00055: reducing learning rate of group 0 to 2.3452e-04.


  9%|▊         | 15/173 [00:00<00:01, 142.40it/s]
 17%|█▋        | 30/173 [00:00<00:00, 145.87it/s]
 26%|██▌       | 45/173 [00:00<00:00, 144.44it/s]
 35%|███▍      | 60/173 [00:00<00:00, 144.28it/s]
 43%|████▎     | 75/173 [00:00<00:00, 143.83it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 140.54it/s]
 61%|██████    | 105/173 [00:00<00:00, 138.92it/s]
 69%|██████▉   | 119/173 [00:00<00:00, 137.57it/s]
 77%|███████▋  | 134/173 [00:00<00:00, 138.02it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 137.81it/s]
 94%|█████████▍| 163/173 [00:01<00:00, 139.33it/s]
100%|██████████| 173/173 [00:01<00:00, 139.92it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 378.55it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▊         | 15/173 [00:00<00:01, 143.53it/s]
 17%|█▋        | 30/173 [00:00<00:01, 141.27it/s]
 26%|██▌       | 45/173 [00:00<00:00, 144.75it/s]
 35%|███▍      | 60/173 [00:00<00:00, 145.10it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 146.05it/s]
 61%|██████    |

[2m[36m(func pid=21096)[0m Epoch 00061: reducing learning rate of group 0 to 1.1726e-04.


 17%|█▋        | 30/173 [00:00<00:00, 147.91it/s]
 26%|██▌       | 45/173 [00:00<00:00, 144.77it/s]
 35%|███▍      | 60/173 [00:00<00:00, 141.24it/s]
 43%|████▎     | 75/173 [00:00<00:00, 141.57it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 141.57it/s]
 61%|██████    | 105/173 [00:00<00:00, 136.47it/s]
 69%|██████▉   | 119/173 [00:00<00:00, 133.89it/s]
 77%|███████▋  | 133/173 [00:00<00:00, 132.85it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 133.82it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 132.64it/s]
100%|██████████| 173/173 [00:01<00:00, 135.64it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 72%|███████▏  | 36/50 [00:00<00:00, 345.64it/s]
100%|██████████| 50/50 [00:00<00:00, 287.36it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 106.02it/s]
 13%|█▎        | 22/173 [00:00<00:01, 107.00it/s]
 19%|█▉        | 33/173 [00:00<00:01, 105.12it/s]
 34%|███▎      | 58/173 [00:00<00:01, 113.10it/s]
 40%|████      | 70/173 [00:00<00:00, 111.76it/s]
 48%|████▊     | 

[2m[36m(func pid=21096)[0m Epoch 00067: reducing learning rate of group 0 to 5.8630e-05.


 15%|█▌        | 26/173 [00:00<00:01, 127.47it/s]
 23%|██▎       | 39/173 [00:00<00:01, 125.97it/s]
 30%|███       | 52/173 [00:00<00:00, 125.57it/s]
 38%|███▊      | 65/173 [00:00<00:00, 124.19it/s]
 45%|████▌     | 78/173 [00:00<00:00, 125.22it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 125.57it/s]
 60%|██████    | 104/173 [00:00<00:00, 124.60it/s]
 68%|██████▊   | 117/173 [00:00<00:00, 125.18it/s]
 75%|███████▌  | 130/173 [00:01<00:00, 125.45it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 122.45it/s]
 90%|█████████ | 156/173 [00:01<00:00, 118.39it/s]
 97%|█████████▋| 168/173 [00:01<00:00, 118.49it/s]
100%|██████████| 173/173 [00:01<00:00, 122.60it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 68%|██████▊   | 34/50 [00:00<00:00, 335.61it/s]
100%|██████████| 50/50 [00:00<00:00, 214.24it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 102.83it/s]
 13%|█▎        | 22/173 [00:00<00:01, 104.52it/s]
 19%|█▉        | 33/173 [00:00<00:01, 105.17it/s]
 25%|██▌       |

[2m[36m(func pid=21096)[0m Epoch 00074: reducing learning rate of group 0 to 2.9315e-05.


  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▊         | 15/173 [00:00<00:01, 144.26it/s]
 17%|█▋        | 30/173 [00:00<00:00, 143.53it/s]
 26%|██▌       | 45/173 [00:00<00:00, 140.04it/s]
 35%|███▍      | 60/173 [00:00<00:00, 140.59it/s]
 43%|████▎     | 75/173 [00:00<00:00, 141.48it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 140.64it/s]
 61%|██████    | 105/173 [00:00<00:00, 139.63it/s]
 69%|██████▉   | 120/173 [00:00<00:00, 140.59it/s]
 78%|███████▊  | 135/173 [00:00<00:00, 141.79it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 137.16it/s]
 95%|█████████▍| 164/173 [00:01<00:00, 136.04it/s]
100%|██████████| 173/173 [00:01<00:00, 137.18it/s]
 70%|███████   | 35/50 [00:00<00:00, 346.86it/s]
100%|██████████| 50/50 [00:00<00:00, 339.84it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 108.45it/s]
 14%|█▍        | 24/173 [00:00<00:01, 120.86it/s]
 22%|██▏       | 38/173 [00:00<00:01, 125.82it/s]
 29%|██▉       | 51/173 [00:00<00:00, 126.97it/s]
 37%|███▋      |

[2m[36m(func pid=21096)[0m Epoch 00080: reducing learning rate of group 0 to 1.4657e-05.


 17%|█▋        | 29/173 [00:00<00:01, 137.56it/s]
 25%|██▍       | 43/173 [00:00<00:00, 130.73it/s]
 33%|███▎      | 57/173 [00:00<00:00, 132.72it/s]
 42%|████▏     | 72/173 [00:00<00:00, 135.96it/s]
 50%|████▉     | 86/173 [00:00<00:00, 134.61it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 135.05it/s]
 66%|██████▌   | 114/173 [00:00<00:00, 134.89it/s]
 74%|███████▍  | 128/173 [00:00<00:00, 135.29it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 133.86it/s]
 90%|█████████ | 156/173 [00:01<00:00, 132.89it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 78%|███████▊  | 39/50 [00:00<00:00, 378.18it/s]
100%|██████████| 50/50 [00:00<00:00, 364.43it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 131.18it/s]
 17%|█▋        | 29/173 [00:00<00:01, 137.16it/s]
 25%|██▍       | 43/173 [00:00<00:00, 132.92it/s]
 34%|███▎      | 58/173 [00:00<00:00, 136.43it/s]
 42%|████▏     | 72/173 [00:00<00:00, 134.39it/s]
 50%|█████     | 87/173 [00:00<00:00, 136.78it/s]
 58%|█████▊    | 1

[2m[36m(func pid=21096)[0m Epoch 00086: reducing learning rate of group 0 to 7.3287e-06.


 16%|█▌        | 27/173 [00:00<00:01, 129.91it/s]
 23%|██▎       | 40/173 [00:00<00:01, 129.18it/s]
 31%|███       | 53/173 [00:00<00:00, 128.62it/s]
 38%|███▊      | 66/173 [00:00<00:00, 126.94it/s]
 46%|████▌     | 79/173 [00:00<00:00, 125.78it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 119.85it/s]
 61%|██████    | 105/173 [00:00<00:00, 118.33it/s]
 68%|██████▊   | 117/173 [00:00<00:00, 118.46it/s]
 75%|███████▌  | 130/173 [00:01<00:00, 120.85it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 125.08it/s]
100%|██████████| 173/173 [00:01<00:00, 124.35it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 329.11it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 124.81it/s]
 16%|█▌        | 27/173 [00:00<00:01, 130.72it/s]
 24%|██▎       | 41/173 [00:00<00:00, 133.60it/s]
 32%|███▏      | 55/173 [00:00<00:00, 132.42it/s]
 40%|███▉      | 69/173 [00:00<00:00, 129.01it/s]
 48%|████▊     | 83/173 [00:00<00:00, 130.27it/s]
 56%|█████▌    | 

[2m[36m(func pid=21096)[0m Epoch 00092: reducing learning rate of group 0 to 3.6644e-06.


 14%|█▍        | 24/173 [00:00<00:01, 121.44it/s]
 22%|██▏       | 38/173 [00:00<00:01, 126.84it/s]
 29%|██▉       | 51/173 [00:00<00:00, 127.07it/s]
 37%|███▋      | 64/173 [00:00<00:00, 126.22it/s]
 45%|████▌     | 78/173 [00:00<00:00, 129.57it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 134.08it/s]
 62%|██████▏   | 107/173 [00:00<00:00, 135.53it/s]
 70%|██████▉   | 121/173 [00:00<00:00, 135.56it/s]
 78%|███████▊  | 135/173 [00:01<00:00, 135.71it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 137.51it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 140.36it/s]
100%|██████████| 173/173 [00:01<00:00, 132.97it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 78%|███████▊  | 39/50 [00:00<00:00, 387.32it/s]
100%|██████████| 50/50 [00:00<00:00, 384.23it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▊         | 15/173 [00:00<00:01, 143.06it/s]
 17%|█▋        | 30/173 [00:00<00:01, 135.05it/s]
 25%|██▌       | 44/173 [00:00<00:00, 136.70it/s]
 34%|███▎      | 58/173 [00:00<00:00, 135.96it/s]
 42%|████▏     | 

[2m[36m(func pid=21096)[0m Epoch 00098: reducing learning rate of group 0 to 1.8322e-06.


 16%|█▌        | 28/173 [00:00<00:01, 135.31it/s]
 24%|██▍       | 42/173 [00:00<00:00, 135.65it/s]
 32%|███▏      | 56/173 [00:00<00:00, 135.02it/s]
 40%|████      | 70/173 [00:00<00:00, 136.04it/s]
 49%|████▉     | 85/173 [00:00<00:00, 139.62it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 141.63it/s]
 66%|██████▋   | 115/173 [00:00<00:00, 141.12it/s]
 75%|███████▌  | 130/173 [00:00<00:00, 140.67it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 140.77it/s]
100%|██████████| 173/173 [00:01<00:00, 139.82it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 14%|█▍        | 7/50 [00:00<00:00, 68.19it/s]
100%|██████████| 50/50 [00:00<00:00, 233.90it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 136.47it/s]
 17%|█▋        | 29/173 [00:00<00:01, 139.52it/s]
 25%|██▌       | 44/173 [00:00<00:00, 141.51it/s]
 34%|███▍      | 59/173 [00:00<00:00, 141.60it/s]
 43%|████▎     | 74/173 [00:00<00:00, 135.91it/s]
 51%|█████     | 88/173 [00:00<00:00, 135.59it/s]
 59%|█████▉    | 102

[2m[36m(func pid=21096)[0m Epoch 00104: reducing learning rate of group 0 to 9.1609e-07.


  7%|▋         | 12/173 [00:00<00:01, 117.55it/s]
 14%|█▍        | 25/173 [00:00<00:01, 119.26it/s]
 21%|██▏       | 37/173 [00:00<00:01, 101.55it/s]
 28%|██▊       | 48/173 [00:00<00:01, 97.14it/s] 
 34%|███▎      | 58/173 [00:00<00:01, 96.42it/s]
 46%|████▌     | 80/173 [00:00<00:00, 101.08it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 98.99it/s] 
 59%|█████▉    | 102/173 [00:01<00:00, 101.68it/s]
 67%|██████▋   | 116/173 [00:01<00:00, 111.63it/s]
 75%|███████▌  | 130/173 [00:01<00:00, 118.38it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 123.96it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 126.66it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 310.69it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 131.97it/s]
 16%|█▌        | 28/173 [00:00<00:01, 132.12it/s]
 24%|██▍       | 42/173 [00:00<00:00, 133.24it/s]
 32%|███▏      | 56/173 [00:00<00:00, 130.37it/s]
 41%|████      | 71/173 [00:00<00:00, 134.27it/s]
 49%|████▉     | 8

[2m[36m(func pid=21096)[0m Epoch 00110: reducing learning rate of group 0 to 4.5805e-07.


  9%|▊         | 15/173 [00:00<00:01, 141.61it/s]
 17%|█▋        | 30/173 [00:00<00:01, 141.52it/s]
 26%|██▌       | 45/173 [00:00<00:00, 143.99it/s]
 35%|███▍      | 60/173 [00:00<00:00, 142.93it/s]
 43%|████▎     | 75/173 [00:00<00:00, 140.65it/s]
 61%|██████    | 105/173 [00:00<00:00, 141.82it/s]
 69%|██████▉   | 120/173 [00:00<00:00, 141.79it/s]
 78%|███████▊  | 135/173 [00:00<00:00, 140.41it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 136.80it/s]
 95%|█████████▍| 164/173 [00:01<00:00, 136.16it/s]
100%|██████████| 173/173 [00:01<00:00, 139.39it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 76%|███████▌  | 38/50 [00:00<00:00, 379.50it/s]
100%|██████████| 50/50 [00:00<00:00, 386.75it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 131.70it/s]
 17%|█▋        | 29/173 [00:00<00:01, 140.80it/s]
 25%|██▌       | 44/173 [00:00<00:00, 138.77it/s]
 34%|███▍      | 59/173 [00:00<00:00, 141.04it/s]
 43%|████▎     | 74/173 [00:00<00:00, 140.50it/s]
 51%|█████▏    | 

[2m[36m(func pid=21096)[0m Epoch 00116: reducing learning rate of group 0 to 2.2902e-07.


 16%|█▌        | 28/173 [00:00<00:01, 134.06it/s]
 24%|██▍       | 42/173 [00:00<00:01, 127.10it/s]
 32%|███▏      | 55/173 [00:00<00:00, 126.71it/s]
 40%|███▉      | 69/173 [00:00<00:00, 128.23it/s]
 47%|████▋     | 82/173 [00:00<00:00, 128.32it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 132.05it/s]
 64%|██████▍   | 111/173 [00:00<00:00, 123.82it/s]
 72%|███████▏  | 124/173 [00:00<00:00, 125.55it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 127.18it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 128.30it/s]
100%|██████████| 173/173 [00:01<00:00, 127.69it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 357.20it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 138.62it/s]
 16%|█▌        | 28/173 [00:00<00:01, 137.84it/s]
 24%|██▍       | 42/173 [00:00<00:00, 134.65it/s]
 32%|███▏      | 56/173 [00:00<00:00, 135.23it/s]
 40%|████      | 70/173 [00:00<00:00, 135.78it/s]
 49%|████▉     | 85/173 [00:00<00:00, 137.87it/s]
 57%|█████▋    | 

[2m[36m(func pid=21096)[0m Epoch 00122: reducing learning rate of group 0 to 1.1451e-07.


  8%|▊         | 13/173 [00:00<00:01, 126.39it/s]
 15%|█▌        | 26/173 [00:00<00:01, 124.92it/s]
 23%|██▎       | 40/173 [00:00<00:01, 130.42it/s]
 31%|███       | 54/173 [00:00<00:00, 129.65it/s]
 39%|███▉      | 68/173 [00:00<00:00, 131.77it/s]
 47%|████▋     | 82/173 [00:00<00:00, 132.95it/s]
 55%|█████▌    | 96/173 [00:00<00:00, 132.07it/s]
 64%|██████▎   | 110/173 [00:00<00:00, 131.13it/s]
 72%|███████▏  | 124/173 [00:00<00:00, 130.09it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 130.69it/s]
 88%|████████▊ | 152/173 [00:01<00:00, 130.38it/s]
 96%|█████████▌| 166/173 [00:01<00:00, 130.22it/s]
100%|██████████| 173/173 [00:01<00:00, 129.81it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 364.32it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 125.03it/s]
 15%|█▌        | 26/173 [00:00<00:01, 125.64it/s]
 23%|██▎       | 39/173 [00:00<00:01, 123.15it/s]
 30%|███       | 52/173 [00:00<00:00, 124.84it/s]
 38%|███▊      |

[2m[36m(func pid=21096)[0m Epoch 00128: reducing learning rate of group 0 to 5.7256e-08.


 17%|█▋        | 29/173 [00:00<00:01, 140.13it/s]
 25%|██▌       | 44/173 [00:00<00:00, 135.66it/s]
 34%|███▎      | 58/173 [00:00<00:00, 135.72it/s]
 42%|████▏     | 73/173 [00:00<00:00, 138.28it/s]
 51%|█████     | 88/173 [00:00<00:00, 141.44it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 143.45it/s]
 68%|██████▊   | 118/173 [00:00<00:00, 144.19it/s]
 77%|███████▋  | 133/173 [00:00<00:00, 144.89it/s]
 86%|████████▌ | 148/173 [00:01<00:00, 144.78it/s]
100%|██████████| 173/173 [00:01<00:00, 141.95it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 380.79it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▊         | 15/173 [00:00<00:01, 145.21it/s]
 26%|██▌       | 45/173 [00:00<00:00, 145.68it/s]
 35%|███▍      | 60/173 [00:00<00:00, 146.12it/s]
 43%|████▎     | 75/173 [00:00<00:00, 144.48it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 145.36it/s]
 61%|██████    | 105/173 [00:00<00:00, 144.43it/s]
 69%|██████▉   | 120/173 [00:00<00:00, 141.81it/s]
 78%|███████▊  

[2m[36m(func pid=21096)[0m Epoch 00134: reducing learning rate of group 0 to 2.8628e-08.


 17%|█▋        | 30/173 [00:00<00:00, 144.11it/s]
 26%|██▌       | 45/173 [00:00<00:00, 143.51it/s]
 35%|███▍      | 60/173 [00:00<00:00, 141.60it/s]
 43%|████▎     | 75/173 [00:00<00:00, 142.32it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 143.09it/s]
 61%|██████    | 105/173 [00:00<00:00, 141.84it/s]
 69%|██████▉   | 120/173 [00:00<00:00, 142.97it/s]
 78%|███████▊  | 135/173 [00:00<00:00, 141.19it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 140.26it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 140.13it/s]
100%|██████████| 173/173 [00:01<00:00, 132.43it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 394.02it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▊         | 15/173 [00:00<00:01, 141.75it/s]
 17%|█▋        | 30/173 [00:00<00:01, 142.79it/s]
 26%|██▌       | 45/173 [00:00<00:00, 139.21it/s]
 35%|███▍      | 60/173 [00:00<00:00, 142.14it/s]
 43%|████▎     | 75/173 [00:00<00:00, 142.80it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 142.89it/s]
 61%|██████    |

[2m[36m(func pid=21096)[0m Epoch 00140: reducing learning rate of group 0 to 1.4314e-08.


  8%|▊         | 14/173 [00:00<00:01, 136.04it/s]
 17%|█▋        | 29/173 [00:00<00:01, 139.99it/s]
 25%|██▌       | 44/173 [00:00<00:00, 141.37it/s]
 34%|███▍      | 59/173 [00:00<00:00, 141.45it/s]
 43%|████▎     | 74/173 [00:00<00:00, 139.04it/s]
 51%|█████     | 88/173 [00:00<00:00, 137.60it/s]
 60%|█████▉    | 103/173 [00:00<00:00, 140.18it/s]
 68%|██████▊   | 118/173 [00:00<00:00, 136.52it/s]
 76%|███████▋  | 132/173 [00:00<00:00, 135.91it/s]
 84%|████████▍ | 146/173 [00:01<00:00, 136.63it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 135.36it/s]
100%|██████████| 173/173 [00:01<00:00, 136.75it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 370.57it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 130.79it/s]
 16%|█▌        | 28/173 [00:00<00:01, 129.89it/s]
 24%|██▍       | 42/173 [00:00<00:00, 132.20it/s]
 40%|████      | 70/173 [00:00<00:00, 133.48it/s]
 49%|████▊     | 84/173 [00:00<00:00, 132.26it/s]
 57%|█████▋    |

[2m[36m(func pid=23148)[0m Epoch 00010: reducing learning rate of group 0 to 3.7189e-02.


 12%|█▏        | 21/173 [00:00<00:01, 101.41it/s]
 19%|█▉        | 33/173 [00:00<00:01, 105.54it/s]
 25%|██▌       | 44/173 [00:00<00:01, 106.68it/s]
 32%|███▏      | 55/173 [00:00<00:01, 106.42it/s]
 39%|███▊      | 67/173 [00:00<00:00, 107.33it/s]
 45%|████▌     | 78/173 [00:00<00:00, 106.56it/s]
 51%|█████▏    | 89/173 [00:00<00:00, 105.97it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 104.95it/s]
 64%|██████▍   | 111/173 [00:01<00:00, 105.29it/s]
 71%|███████   | 122/173 [00:01<00:00, 104.21it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 104.07it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 105.12it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 105.62it/s]
100%|██████████| 173/173 [00:01<00:00, 104.84it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 322.11it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 101.25it/s]
 13%|█▎        | 22/173 [00:00<00:01, 99.57it/s] 
 19%|█▉        | 33/173 [00:00<00:01, 103.06it/s]
 25%|██▌       

[2m[36m(func pid=23148)[0m Epoch 00038: reducing learning rate of group 0 to 1.8595e-02.


  6%|▌         | 10/173 [00:00<00:01, 97.22it/s]
 12%|█▏        | 20/173 [00:00<00:01, 89.08it/s]
 17%|█▋        | 30/173 [00:00<00:01, 90.91it/s]
 23%|██▎       | 40/173 [00:00<00:01, 92.95it/s]
 29%|██▉       | 50/173 [00:00<00:01, 94.80it/s]
 35%|███▌      | 61/173 [00:00<00:01, 98.05it/s]
 42%|████▏     | 72/173 [00:00<00:01, 99.59it/s]
 48%|████▊     | 83/173 [00:00<00:00, 100.71it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 99.58it/s] 
 61%|██████    | 105/173 [00:01<00:00, 100.66it/s]
 67%|██████▋   | 116/173 [00:01<00:00, 100.63it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 100.20it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 100.38it/s]
 86%|████████▌ | 149/173 [00:01<00:00, 100.00it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 100.29it/s]
100%|██████████| 173/173 [00:01<00:00, 98.70it/s] 
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 298.42it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 101.10it/s]
 13%|█▎        | 22/17

[2m[36m(func pid=23148)[0m Epoch 00050: reducing learning rate of group 0 to 9.2973e-03.


 12%|█▏        | 20/173 [00:00<00:01, 98.39it/s]
 17%|█▋        | 30/173 [00:00<00:01, 95.13it/s]
 23%|██▎       | 40/173 [00:00<00:01, 95.54it/s]
 29%|██▉       | 50/173 [00:00<00:01, 96.23it/s]
 35%|███▍      | 60/173 [00:00<00:01, 96.21it/s]
 40%|████      | 70/173 [00:00<00:01, 96.75it/s]
 47%|████▋     | 81/173 [00:00<00:00, 98.41it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 100.44it/s]
 60%|█████▉    | 103/173 [00:01<00:00, 99.62it/s]
 66%|██████▌   | 114/173 [00:01<00:00, 100.69it/s]
 72%|███████▏  | 125/173 [00:01<00:00, 101.28it/s]
 79%|███████▊  | 136/173 [00:01<00:00, 102.04it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 100.88it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 81.79it/s] 
 97%|█████████▋| 168/173 [00:01<00:00, 85.70it/s]
100%|██████████| 173/173 [00:01<00:00, 94.63it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 60%|██████    | 30/50 [00:00<00:00, 296.94it/s]
100%|██████████| 50/50 [00:00<00:00, 303.34it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [

[2m[36m(func pid=23148)[0m Epoch 00064: reducing learning rate of group 0 to 4.6486e-03.


 13%|█▎        | 22/173 [00:00<00:01, 102.67it/s]
 19%|█▉        | 33/173 [00:00<00:01, 99.66it/s] 
 25%|██▌       | 44/173 [00:00<00:01, 100.11it/s]
 32%|███▏      | 55/173 [00:00<00:01, 90.55it/s] 
 38%|███▊      | 65/173 [00:00<00:01, 92.44it/s]
 43%|████▎     | 75/173 [00:00<00:01, 93.04it/s]
 49%|████▉     | 85/173 [00:00<00:00, 94.71it/s]
 55%|█████▍    | 95/173 [00:00<00:00, 93.77it/s]
 61%|██████▏   | 106/173 [00:01<00:00, 96.21it/s]
 67%|██████▋   | 116/173 [00:01<00:00, 97.06it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 99.08it/s]
 79%|███████▉  | 137/173 [00:01<00:00, 98.13it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 97.83it/s]
 91%|█████████ | 157/173 [00:01<00:00, 97.73it/s]
 97%|█████████▋| 167/173 [00:01<00:00, 98.38it/s]
100%|██████████| 173/173 [00:01<00:00, 97.00it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 301.48it/s]
100%|██████████| 50/50 [00:00<00:00, 299.42it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▌         | 10/173 [00

[2m[36m(func pid=23148)[0m Epoch 00072: reducing learning rate of group 0 to 2.3243e-03.


  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▌         | 9/173 [00:00<00:01, 86.06it/s]
 10%|█         | 18/173 [00:00<00:01, 83.74it/s]
 16%|█▌        | 28/173 [00:00<00:01, 89.75it/s]
 22%|██▏       | 38/173 [00:00<00:01, 91.23it/s]
 28%|██▊       | 48/173 [00:00<00:01, 93.60it/s]
 34%|███▎      | 58/173 [00:00<00:01, 93.55it/s]
 40%|███▉      | 69/173 [00:00<00:01, 96.12it/s]
 46%|████▌     | 79/173 [00:00<00:00, 96.77it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 98.78it/s]
 58%|█████▊    | 101/173 [00:01<00:00, 99.34it/s]
 65%|██████▍   | 112/173 [00:01<00:00, 99.67it/s]
 71%|███████   | 122/173 [00:01<00:00, 98.06it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 98.98it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 99.25it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 56%|█████▌    | 28/50 [00:00<00:00, 276.29it/s]
100%|██████████| 50/50 [00:00<00:00, 272.12it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  4%|▍         | 7/173 [00:00<00:02, 62.97it/s]
 10%|▉         | 17/173 [00:00<00:01, 81.59it

[2m[36m(func pid=23148)[0m Epoch 00091: reducing learning rate of group 0 to 1.1622e-03.


  6%|▋         | 11/173 [00:00<00:01, 103.02it/s]
 13%|█▎        | 22/173 [00:00<00:01, 103.05it/s]
 19%|█▉        | 33/173 [00:00<00:01, 94.24it/s] 
 25%|██▍       | 43/173 [00:00<00:01, 92.41it/s]
 31%|███       | 54/173 [00:00<00:01, 96.27it/s]
 38%|███▊      | 65/173 [00:00<00:01, 99.38it/s]
 44%|████▍     | 76/173 [00:00<00:00, 100.78it/s]
 50%|█████     | 87/173 [00:00<00:00, 102.35it/s]
 57%|█████▋    | 98/173 [00:00<00:00, 103.14it/s]
 63%|██████▎   | 109/173 [00:01<00:00, 102.36it/s]
 69%|██████▉   | 120/173 [00:01<00:00, 103.42it/s]
 76%|███████▌  | 131/173 [00:01<00:00, 100.66it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 98.56it/s] 
 88%|████████▊ | 152/173 [00:01<00:00, 95.86it/s]
 94%|█████████▎| 162/173 [00:01<00:00, 96.01it/s]
100%|██████████| 173/173 [00:01<00:00, 96.34it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 22%|██▏       | 11/50 [00:00<00:00, 77.58it/s]
 68%|██████▊   | 34/50 [00:00<00:00, 151.76it/s]
100%|██████████| 50/50 [00:00<00:00, 159.94it/s]
  0%|         

[2m[36m(func pid=23148)[0m Epoch 00108: reducing learning rate of group 0 to 5.8108e-04.


 11%|█         | 19/173 [00:00<00:01, 91.18it/s]
 17%|█▋        | 29/173 [00:00<00:01, 91.52it/s]
 23%|██▎       | 39/173 [00:00<00:01, 93.90it/s]
 29%|██▉       | 50/173 [00:00<00:01, 98.21it/s]
 35%|███▌      | 61/173 [00:00<00:01, 101.05it/s]
 42%|████▏     | 72/173 [00:00<00:00, 102.31it/s]
 48%|████▊     | 83/173 [00:00<00:00, 103.70it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 104.20it/s]
 61%|██████    | 105/173 [00:01<00:00, 105.45it/s]
 67%|██████▋   | 116/173 [00:01<00:00, 105.46it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 103.42it/s]
 80%|███████▉  | 138/173 [00:01<00:00, 104.26it/s]
 86%|████████▌ | 149/173 [00:01<00:00, 103.42it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 104.55it/s]
100%|██████████| 173/173 [00:01<00:00, 101.94it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 307.13it/s]
100%|██████████| 50/50 [00:00<00:00, 305.68it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 104.07it/s]
 13%|█▎        | 22/

[2m[36m(func pid=23148)[0m Epoch 00117: reducing learning rate of group 0 to 2.9054e-04.


 13%|█▎        | 22/173 [00:00<00:01, 106.41it/s]
 19%|█▉        | 33/173 [00:00<00:01, 107.80it/s]
 25%|██▌       | 44/173 [00:00<00:01, 106.36it/s]
 32%|███▏      | 55/173 [00:00<00:01, 105.28it/s]
 38%|███▊      | 66/173 [00:00<00:01, 106.50it/s]
 45%|████▍     | 77/173 [00:00<00:00, 106.74it/s]
 51%|█████     | 88/173 [00:00<00:00, 106.11it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 106.64it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 107.22it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 87.03it/s] 
 76%|███████▋  | 132/173 [00:01<00:00, 92.42it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 95.54it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 98.98it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 100.91it/s]
100%|██████████| 173/173 [00:01<00:00, 101.53it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 307.97it/s]
100%|██████████| 50/50 [00:00<00:00, 312.47it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 106.54it/s]
 13%|█▎        | 22

[2m[36m(func pid=23148)[0m Epoch 00124: reducing learning rate of group 0 to 1.4527e-04.


  6%|▋         | 11/173 [00:00<00:01, 107.00it/s]
 19%|█▉        | 33/173 [00:00<00:01, 106.42it/s]
 25%|██▌       | 44/173 [00:00<00:01, 106.33it/s]
 32%|███▏      | 55/173 [00:00<00:01, 106.49it/s]
 38%|███▊      | 66/173 [00:00<00:01, 106.86it/s]
 45%|████▍     | 77/173 [00:00<00:00, 107.23it/s]
 51%|█████     | 88/173 [00:00<00:00, 105.45it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 105.63it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 105.58it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 105.68it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 106.34it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 106.79it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 107.03it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 107.90it/s]
100%|██████████| 173/173 [00:01<00:00, 106.64it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 318.02it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 103.20it/s]
 13%|█▎        | 22/173 [00:00<00:01, 105.00it/s]
 19%|█▉        

[2m[36m(func pid=23148)[0m Epoch 00130: reducing learning rate of group 0 to 7.2635e-05.


 13%|█▎        | 23/173 [00:00<00:01, 109.33it/s]
 20%|█▉        | 34/173 [00:00<00:01, 107.55it/s]
 26%|██▌       | 45/173 [00:00<00:01, 106.93it/s]
 32%|███▏      | 56/173 [00:00<00:01, 106.49it/s]
 39%|███▊      | 67/173 [00:00<00:00, 106.22it/s]
 45%|████▌     | 78/173 [00:00<00:00, 106.28it/s]
 51%|█████▏    | 89/173 [00:00<00:00, 106.94it/s]
 64%|██████▍   | 111/173 [00:01<00:00, 107.11it/s]
 71%|███████   | 122/173 [00:01<00:00, 105.87it/s]
 77%|███████▋  | 133/173 [00:01<00:00, 104.82it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 105.99it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 106.17it/s]
 96%|█████████▌| 166/173 [00:01<00:00, 106.45it/s]
100%|██████████| 173/173 [00:01<00:00, 106.38it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 309.91it/s]
100%|██████████| 50/50 [00:00<00:00, 312.38it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 107.48it/s]
 13%|█▎        | 22/173 [00:00<00:01, 105.20it/s]
 19%|█▉        |

[2m[36m(func pid=23148)[0m Epoch 00136: reducing learning rate of group 0 to 3.6318e-05.


  6%|▋         | 11/173 [00:00<00:01, 109.02it/s]
 13%|█▎        | 22/173 [00:00<00:01, 106.00it/s]
 19%|█▉        | 33/173 [00:00<00:01, 106.48it/s]
 25%|██▌       | 44/173 [00:00<00:01, 105.84it/s]
 32%|███▏      | 55/173 [00:00<00:01, 105.76it/s]
 38%|███▊      | 66/173 [00:00<00:01, 106.39it/s]
 45%|████▍     | 77/173 [00:00<00:00, 107.14it/s]
 51%|█████     | 88/173 [00:00<00:00, 107.19it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 106.70it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 106.13it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 106.79it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 107.25it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 107.50it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 106.95it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 106.71it/s]
100%|██████████| 173/173 [00:01<00:00, 106.63it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 313.86it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 100.91it/s]
 13%|█▎        

[2m[36m(func pid=23148)[0m Epoch 00146: reducing learning rate of group 0 to 1.8159e-05.


  6%|▋         | 11/173 [00:00<00:01, 108.67it/s]
 13%|█▎        | 22/173 [00:00<00:01, 107.00it/s]
 19%|█▉        | 33/173 [00:00<00:01, 107.08it/s]
 25%|██▌       | 44/173 [00:00<00:01, 106.04it/s]
 32%|███▏      | 55/173 [00:00<00:01, 106.18it/s]
 38%|███▊      | 66/173 [00:00<00:01, 105.41it/s]
 45%|████▍     | 77/173 [00:00<00:00, 105.45it/s]
 51%|█████     | 88/173 [00:00<00:00, 105.30it/s]
 64%|██████▎   | 110/173 [00:01<00:00, 104.89it/s]
 70%|██████▉   | 121/173 [00:01<00:00, 105.43it/s]
 76%|███████▋  | 132/173 [00:01<00:00, 106.43it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 106.45it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 106.38it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 105.60it/s]
 62%|██████▏   | 31/50 [00:00<00:00, 303.92it/s]
100%|██████████| 50/50 [00:00<00:00, 306.96it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  5%|▌         | 9/173 [00:00<00:01, 89.42it/s]
 12%|█▏        | 20/173 [00:00<00:01, 97.27it/s]
 18%|█▊        | 31/173 [00:00<00:01, 99.19it/s]
 24%|██▍  

[2m[36m(func pid=14196)[0m Epoch 00072: reducing learning rate of group 0 to 4.1546e-04.


 23%|██▎       | 20/87 [00:00<00:00, 94.24it/s]
 34%|███▍      | 30/87 [00:00<00:00, 94.49it/s]
 46%|████▌     | 40/87 [00:00<00:00, 93.37it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 93.70it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 93.36it/s]
 80%|████████  | 70/87 [00:00<00:00, 91.99it/s]
100%|██████████| 87/87 [00:00<00:00, 93.83it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 72%|███████▏  | 18/25 [00:00<00:00, 112.48it/s]
100%|██████████| 25/25 [00:00<00:00, 129.14it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 89.98it/s]
 22%|██▏       | 19/87 [00:00<00:00, 93.33it/s]
 33%|███▎      | 29/87 [00:00<00:00, 93.03it/s]
 45%|████▍     | 39/87 [00:00<00:00, 92.58it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 92.53it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 92.68it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 92.96it/s]
 91%|█████████ | 79/87 [00:00<00:00, 93.08it/s]
100%|██████████| 87/87 [00:00<00:00, 92.94it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 2

[2m[36m(func pid=14196)[0m Epoch 00086: reducing learning rate of group 0 to 2.0773e-04.


 10%|█         | 9/87 [00:00<00:00, 87.49it/s]
 22%|██▏       | 19/87 [00:00<00:00, 91.50it/s]
 33%|███▎      | 29/87 [00:00<00:00, 91.97it/s]
 45%|████▍     | 39/87 [00:00<00:00, 92.59it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 91.69it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 92.15it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 91.62it/s]
 91%|█████████ | 79/87 [00:00<00:00, 93.00it/s]
100%|██████████| 87/87 [00:00<00:00, 92.66it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 210.44it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 94.59it/s]
 23%|██▎       | 20/87 [00:00<00:00, 95.31it/s]
 34%|███▍      | 30/87 [00:00<00:00, 94.57it/s]
 46%|████▌     | 40/87 [00:00<00:00, 94.25it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 94.11it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 94.79it/s]
 80%|████████  | 70/87 [00:00<00:00, 94.30it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 93.66it/s]
100%|██████████| 87/87 [00:00<00:00, 94.54it/s]
  0%|     

[2m[36m(func pid=14196)[0m Epoch 00101: reducing learning rate of group 0 to 1.0387e-04.


 23%|██▎       | 20/87 [00:00<00:00, 95.95it/s]
 34%|███▍      | 30/87 [00:00<00:00, 94.48it/s]
 46%|████▌     | 40/87 [00:00<00:00, 94.41it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 93.46it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 92.57it/s]
 80%|████████  | 70/87 [00:00<00:00, 90.97it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 90.69it/s]
100%|██████████| 87/87 [00:00<00:00, 92.54it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 88%|████████▊ | 22/25 [00:00<00:00, 210.06it/s]
100%|██████████| 25/25 [00:00<00:00, 206.73it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 93.32it/s]
 23%|██▎       | 20/87 [00:00<00:00, 91.08it/s]
 34%|███▍      | 30/87 [00:00<00:00, 92.85it/s]
 46%|████▌     | 40/87 [00:00<00:00, 92.98it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 93.25it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 93.89it/s]
 80%|████████  | 70/87 [00:00<00:00, 92.96it/s]
100%|██████████| 87/87 [00:00<00:00, 93.32it/s]
100%|██████████| 25/25 [00:00<00:00, 127.90it/s]
  0%|  

[2m[36m(func pid=14196)[0m Epoch 00110: reducing learning rate of group 0 to 5.1933e-05.


 11%|█▏        | 10/87 [00:00<00:00, 97.15it/s]
 23%|██▎       | 20/87 [00:00<00:00, 96.31it/s]
 34%|███▍      | 30/87 [00:00<00:00, 95.92it/s]
 46%|████▌     | 40/87 [00:00<00:00, 96.17it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 95.10it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 93.96it/s]
 80%|████████  | 70/87 [00:00<00:00, 94.41it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 93.19it/s]
100%|██████████| 87/87 [00:00<00:00, 93.66it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 208.84it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 90.73it/s]
 23%|██▎       | 20/87 [00:00<00:00, 92.41it/s]
 34%|███▍      | 30/87 [00:00<00:00, 93.76it/s]
 46%|████▌     | 40/87 [00:00<00:00, 93.28it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 94.66it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 94.05it/s]
 80%|████████  | 70/87 [00:00<00:00, 93.10it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 91.72it/s]
100%|██████████| 87/87 [00:00<00:00, 93.04it/s]
  0%|    

[2m[36m(func pid=14196)[0m Epoch 00116: reducing learning rate of group 0 to 2.5967e-05.


 10%|█         | 9/87 [00:00<00:00, 87.95it/s]
 21%|██        | 18/87 [00:00<00:00, 82.27it/s]
 31%|███       | 27/87 [00:00<00:00, 84.91it/s]
 41%|████▏     | 36/87 [00:00<00:00, 86.25it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 86.10it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 85.96it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 88.09it/s]
 84%|████████▍ | 73/87 [00:00<00:00, 88.13it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 84%|████████▍ | 21/25 [00:00<00:00, 202.45it/s]
100%|██████████| 25/25 [00:00<00:00, 203.39it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 90.11it/s]
 23%|██▎       | 20/87 [00:00<00:00, 86.22it/s]
 34%|███▍      | 30/87 [00:00<00:00, 89.09it/s]
 45%|████▍     | 39/87 [00:00<00:00, 88.36it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 86.41it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 88.26it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 88.60it/s]
 87%|████████▋ | 76/87 [00:00<00:00, 88.93it/s]
100%|██████████| 87/87 [00:00<00:00, 89.23it/s]
  0%|    

[2m[36m(func pid=14196)[0m Epoch 00127: reducing learning rate of group 0 to 1.2983e-05.


100%|██████████| 25/25 [00:00<00:00, 134.31it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 94.07it/s]
 23%|██▎       | 20/87 [00:00<00:00, 94.13it/s]
 34%|███▍      | 30/87 [00:00<00:00, 94.27it/s]
 46%|████▌     | 40/87 [00:00<00:00, 91.97it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 93.51it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 92.54it/s]
 80%|████████  | 70/87 [00:00<00:00, 91.44it/s]
100%|██████████| 87/87 [00:00<00:00, 92.75it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 206.94it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:00, 86.50it/s]
 22%|██▏       | 19/87 [00:00<00:00, 91.27it/s]
 33%|███▎      | 29/87 [00:00<00:00, 93.46it/s]
 45%|████▍     | 39/87 [00:00<00:00, 92.04it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 91.41it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 91.71it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 92.97it/s]
 91%|█████████ | 79/87 [00:00<00:00, 92.03it/s]
100%|██████████| 8

[2m[36m(func pid=14196)[0m Epoch 00134: reducing learning rate of group 0 to 6.4916e-06.


 23%|██▎       | 20/87 [00:00<00:00, 92.13it/s]
 34%|███▍      | 30/87 [00:00<00:00, 90.52it/s]
 46%|████▌     | 40/87 [00:00<00:00, 91.94it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 91.96it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 92.63it/s]
 80%|████████  | 70/87 [00:00<00:00, 93.57it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 93.54it/s]
 80%|████████  | 20/25 [00:00<00:00, 194.32it/s]
100%|██████████| 25/25 [00:00<00:00, 196.02it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 90.97it/s]
 23%|██▎       | 20/87 [00:00<00:00, 86.72it/s]
 34%|███▍      | 30/87 [00:00<00:00, 88.95it/s]
 46%|████▌     | 40/87 [00:00<00:00, 90.46it/s]
 57%|█████▋    | 50/87 [00:00<00:00, 91.10it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 91.70it/s]
 80%|████████  | 70/87 [00:00<00:00, 92.22it/s]
100%|██████████| 87/87 [00:00<00:00, 91.81it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 60%|██████    | 15/25 [00:00<00:00, 100.63it/s]
100%|██████████| 25/25 [00:00<00:00, 126.17it/s]
  0%| 

[2m[36m(func pid=7240)[0m Epoch 00148: reducing learning rate of group 0 to 5.9626e-05.


  8%|▊         | 13/173 [00:00<00:01, 129.03it/s]
 15%|█▌        | 26/173 [00:00<00:01, 125.77it/s]
 23%|██▎       | 39/173 [00:00<00:01, 125.31it/s]
 30%|███       | 52/173 [00:00<00:00, 122.89it/s]
 38%|███▊      | 65/173 [00:00<00:00, 121.55it/s]
 45%|████▌     | 78/173 [00:00<00:00, 120.93it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 122.44it/s]
 60%|██████    | 104/173 [00:00<00:00, 121.70it/s]
 68%|██████▊   | 117/173 [00:00<00:00, 120.95it/s]
 75%|███████▌  | 130/173 [00:01<00:00, 121.90it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 122.10it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 356.33it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 119.65it/s]
 14%|█▍        | 25/173 [00:00<00:01, 117.64it/s]
 22%|██▏       | 38/173 [00:00<00:01, 120.95it/s]
 29%|██▉       | 51/173 [00:00<00:01, 119.85it/s]
 37%|███▋      | 64/173 [00:00<00:00, 120.50it/s]
 45%|████▍     | 77/173 [00:00<00:00, 120.75it/s]
 52%|█████▏    | 9

[2m[36m(func pid=3668)[0m Epoch 00013: reducing learning rate of group 0 to 2.9212e-02.


 28%|██▊       | 24/87 [00:00<00:00, 111.72it/s]
 41%|████▏     | 36/87 [00:00<00:00, 110.20it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 109.78it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 110.27it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 110.28it/s]
100%|██████████| 87/87 [00:00<00:00, 110.94it/s]
100%|██████████| 25/25 [00:00<00:00, 244.83it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 109.18it/s]
 26%|██▋       | 23/87 [00:00<00:00, 110.89it/s]
 40%|████      | 35/87 [00:00<00:00, 110.41it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 108.84it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 108.43it/s]
 80%|████████  | 70/87 [00:00<00:00, 109.25it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 108.61it/s]
100%|██████████| 87/87 [00:00<00:00, 109.15it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 25%|██▌       | 22/87 [00:00<00:00, 108.39it/s]
 38%|███▊      | 33/87 [00:00<00:00, 104.75it/s]
 51%|█████     | 44/87 [00:00<00:00, 106.37it/s]
 

[2m[36m(func pid=3668)[0m Epoch 00033: reducing learning rate of group 0 to 1.4606e-02.


 26%|██▋       | 23/87 [00:00<00:00, 110.03it/s]
 39%|███▉      | 34/87 [00:00<00:00, 109.20it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 107.71it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 109.23it/s]
 78%|███████▊  | 68/87 [00:00<00:00, 108.85it/s]
100%|██████████| 87/87 [00:00<00:00, 109.14it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 137.36it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 107.74it/s]
 25%|██▌       | 22/87 [00:00<00:00, 106.85it/s]
 39%|███▉      | 34/87 [00:00<00:00, 109.57it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 110.20it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 109.38it/s]
 79%|███████▉  | 69/87 [00:00<00:00, 108.93it/s]
 93%|█████████▎| 81/87 [00:00<00:00, 110.34it/s]
100%|██████████| 87/87 [00:00<00:00, 109.81it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 105.80it/s]
 25%|██▌       | 22/87 [00:00<00:00, 107.70it/s]
 38%|███▊  

[2m[36m(func pid=3668)[0m Epoch 00066: reducing learning rate of group 0 to 7.3029e-03.


 26%|██▋       | 23/87 [00:00<00:00, 110.12it/s]
 40%|████      | 35/87 [00:00<00:00, 108.50it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 109.30it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 108.70it/s]
 80%|████████  | 70/87 [00:00<00:00, 109.57it/s]
100%|██████████| 87/87 [00:00<00:00, 109.48it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 109.59it/s]
 26%|██▋       | 23/87 [00:00<00:00, 107.10it/s]
 39%|███▉      | 34/87 [00:00<00:00, 104.25it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 105.95it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 106.60it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 105.87it/s]
 90%|████████▉ | 78/87 [00:00<00:00, 103.40it/s]
100%|██████████| 87/87 [00:00<00:00, 105.63it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 232.44it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 109.15it/s]
 26%|██▋       | 23/87 [00:00<00:00, 112.96it/s]
 40%|████  

[2m[36m(func pid=3668)[0m Epoch 00079: reducing learning rate of group 0 to 3.6515e-03.


 26%|██▋       | 23/87 [00:00<00:00, 112.70it/s]
 40%|████      | 35/87 [00:00<00:00, 112.04it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 109.09it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 107.54it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 238.33it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:00, 79.92it/s]
 33%|███▎      | 29/87 [00:00<00:00, 95.51it/s]
 47%|████▋     | 41/87 [00:00<00:00, 103.58it/s]
 60%|█████▉    | 52/87 [00:00<00:00, 105.02it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 107.12it/s]
 86%|████████▌ | 75/87 [00:00<00:00, 106.38it/s]
100%|██████████| 87/87 [00:00<00:00, 102.98it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 243.19it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 105.82it/s]
 25%|██▌       | 22/87 [00:00<00:00, 106.70it/s]
 39%|███▉      | 34/87 [00:00<00:00, 108.09it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 110.25it/s]
 67%|██████▋  

[2m[36m(func pid=3668)[0m Epoch 00085: reducing learning rate of group 0 to 1.8257e-03.


 26%|██▋       | 23/87 [00:00<00:00, 107.65it/s]
 39%|███▉      | 34/87 [00:00<00:00, 106.26it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 106.15it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 105.82it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 104.62it/s]
 90%|████████▉ | 78/87 [00:00<00:00, 106.00it/s]
100%|██████████| 87/87 [00:00<00:00, 106.69it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 229.83it/s]
 13%|█▎        | 11/87 [00:00<00:00, 108.90it/s]
 25%|██▌       | 22/87 [00:00<00:00, 108.37it/s]
 39%|███▉      | 34/87 [00:00<00:00, 109.74it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 110.96it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 111.42it/s]
 80%|████████  | 70/87 [00:00<00:00, 110.35it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 218.49it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 103.81it/s]
 26%|██▋       | 23/87 [00:00<00:00, 108.74it/s]
 40%|████      | 35/87 [00:00<00:00, 110.74it/s]
 

[2m[36m(func pid=3668)[0m Epoch 00091: reducing learning rate of group 0 to 9.1286e-04.


 25%|██▌       | 22/87 [00:00<00:00, 104.40it/s]
 38%|███▊      | 33/87 [00:00<00:00, 102.65it/s]
 51%|█████     | 44/87 [00:00<00:00, 105.17it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 105.60it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 220.22it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 11%|█▏        | 10/87 [00:00<00:00, 94.78it/s]
 24%|██▍       | 21/87 [00:00<00:00, 103.28it/s]
 38%|███▊      | 33/87 [00:00<00:00, 107.93it/s]
 51%|█████     | 44/87 [00:00<00:00, 108.32it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 108.89it/s]
 77%|███████▋  | 67/87 [00:00<00:00, 109.08it/s]
 91%|█████████ | 79/87 [00:00<00:00, 110.01it/s]
100%|██████████| 87/87 [00:00<00:00, 107.81it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 245.47it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 104.93it/s]
 26%|██▋       | 23/87 [00:00<00:00, 108.39it/s]
 40%|████      | 35/87 [00:00<00:00, 110.79it/s]
 54%|█████▍ 

[2m[36m(func pid=3668)[0m Epoch 00104: reducing learning rate of group 0 to 4.5643e-04.


 26%|██▋       | 23/87 [00:00<00:00, 112.65it/s]
 40%|████      | 35/87 [00:00<00:00, 112.95it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 111.86it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 110.48it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 110.75it/s]
100%|██████████| 25/25 [00:00<00:00, 238.81it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 10%|█         | 9/87 [00:00<00:01, 77.13it/s]
 21%|██        | 18/87 [00:00<00:00, 83.34it/s]
 34%|███▍      | 30/87 [00:00<00:00, 94.83it/s]
 48%|████▊     | 42/87 [00:00<00:00, 101.11it/s]
 62%|██████▏   | 54/87 [00:00<00:00, 104.60it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 107.29it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 106.82it/s]
100%|██████████| 87/87 [00:00<00:00, 102.86it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 134.05it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 106.55it/s]
 25%|██▌       | 22/87 [00:00<00:00, 107.91it/s]
 38%|███▊      | 33/87 [00:00<00:00, 108.42it/s]
 52%|

[2m[36m(func pid=3668)[0m Epoch 00114: reducing learning rate of group 0 to 2.2822e-04.


 28%|██▊       | 24/87 [00:00<00:00, 115.42it/s]
 41%|████▏     | 36/87 [00:00<00:00, 113.91it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 109.94it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 107.19it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 108.79it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 64%|██████▍   | 16/25 [00:00<00:00, 104.49it/s]
100%|██████████| 25/25 [00:00<00:00, 129.97it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 102.68it/s]
 25%|██▌       | 22/87 [00:00<00:00, 105.75it/s]
 38%|███▊      | 33/87 [00:00<00:00, 105.82it/s]
 51%|█████     | 44/87 [00:00<00:00, 104.84it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 105.95it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 105.94it/s]
100%|██████████| 87/87 [00:00<00:00, 105.81it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 229.48it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 110.60it/s]
 28%|██▊       | 24/87 [00:00<00:00, 109.03it/s]
 40%|████  

[2m[36m(func pid=3668)[0m Epoch 00120: reducing learning rate of group 0 to 1.1411e-04.


 26%|██▋       | 23/87 [00:00<00:00, 110.01it/s]
 40%|████      | 35/87 [00:00<00:00, 109.03it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 109.33it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 110.43it/s]
 80%|████████  | 70/87 [00:00<00:00, 112.04it/s]
100%|██████████| 87/87 [00:00<00:00, 111.00it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 140.87it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 105.42it/s]
 26%|██▋       | 23/87 [00:00<00:00, 108.93it/s]
 40%|████      | 35/87 [00:00<00:00, 110.85it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 110.64it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 109.72it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 110.18it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 243.56it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 106.10it/s]
 26%|██▋       | 23/87 [00:00<00:00, 109.67it/s]
 40%|████      | 35/87 [00:00<00:00, 110.25it/s]
 54%|█████▍

[2m[36m(func pid=3668)[0m Epoch 00126: reducing learning rate of group 0 to 5.7054e-05.


 26%|██▋       | 23/87 [00:00<00:00, 111.41it/s]
 40%|████      | 35/87 [00:00<00:00, 110.61it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 109.07it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 107.68it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 72%|███████▏  | 18/25 [00:00<00:00, 112.63it/s]
100%|██████████| 25/25 [00:00<00:00, 126.86it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 103.47it/s]
 26%|██▋       | 23/87 [00:00<00:00, 107.01it/s]
 39%|███▉      | 34/87 [00:00<00:00, 105.26it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 107.17it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 107.44it/s]
 78%|███████▊  | 68/87 [00:00<00:00, 108.21it/s]
 91%|█████████ | 79/87 [00:00<00:00, 105.75it/s]
100%|██████████| 87/87 [00:00<00:00, 106.49it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 230.68it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 25%|██▌       | 22/87 [00:00<00:00, 105.98it/s]
 39%|███▉      | 34/87 [00:00<00:00, 108.86it/s]
 52%|█████▏

[2m[36m(func pid=3668)[0m Epoch 00132: reducing learning rate of group 0 to 2.8527e-05.


 13%|█▎        | 11/87 [00:00<00:00, 102.74it/s]
 26%|██▋       | 23/87 [00:00<00:00, 107.23it/s]
 40%|████      | 35/87 [00:00<00:00, 108.78it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 110.19it/s]
 68%|██████▊   | 59/87 [00:00<00:00, 110.14it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 110.09it/s]
 95%|█████████▌| 83/87 [00:00<00:00, 109.43it/s]
100%|██████████| 87/87 [00:00<00:00, 109.13it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 64%|██████▍   | 16/25 [00:00<00:00, 114.64it/s]
100%|██████████| 25/25 [00:00<00:00, 138.27it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 110.90it/s]
 28%|██▊       | 24/87 [00:00<00:00, 110.06it/s]
 41%|████▏     | 36/87 [00:00<00:00, 111.00it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 111.37it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 110.92it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 110.54it/s]
100%|██████████| 87/87 [00:00<00:00, 111.39it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 236.89it/s]
 

[2m[36m(func pid=3668)[0m Epoch 00141: reducing learning rate of group 0 to 1.4263e-05.


 13%|█▎        | 11/87 [00:00<00:00, 108.61it/s]
 25%|██▌       | 22/87 [00:00<00:00, 106.20it/s]
 39%|███▉      | 34/87 [00:00<00:00, 108.20it/s]
 53%|█████▎    | 46/87 [00:00<00:00, 110.52it/s]
 67%|██████▋   | 58/87 [00:00<00:00, 110.05it/s]
 80%|████████  | 70/87 [00:00<00:00, 111.72it/s]
100%|██████████| 87/87 [00:00<00:00, 110.29it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 137.27it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 14%|█▍        | 12/87 [00:00<00:00, 111.10it/s]
 28%|██▊       | 24/87 [00:00<00:00, 111.72it/s]
 41%|████▏     | 36/87 [00:00<00:00, 112.66it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 111.43it/s]
 69%|██████▉   | 60/87 [00:00<00:00, 111.61it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 111.54it/s]
 97%|█████████▋| 84/87 [00:00<00:00, 111.01it/s]
100%|██████████| 87/87 [00:00<00:00, 111.23it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 232.98it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎    

[2m[36m(func pid=3668)[0m Epoch 00147: reducing learning rate of group 0 to 7.1317e-06.


 25%|██▌       | 22/87 [00:00<00:00, 106.73it/s]
 38%|███▊      | 33/87 [00:00<00:00, 103.14it/s]
 52%|█████▏    | 45/87 [00:00<00:00, 104.72it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 106.15it/s]
 78%|███████▊  | 68/87 [00:00<00:00, 107.37it/s]
 92%|█████████▏| 80/87 [00:00<00:00, 108.38it/s]
100%|██████████| 87/87 [00:00<00:00, 107.37it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 44%|████▍     | 11/25 [00:00<00:00, 90.84it/s]
100%|██████████| 25/25 [00:00<00:00, 137.35it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
 13%|█▎        | 11/87 [00:00<00:00, 107.89it/s]
 25%|██▌       | 22/87 [00:00<00:00, 106.56it/s]
 38%|███▊      | 33/87 [00:00<00:00, 105.19it/s]
 51%|█████     | 44/87 [00:00<00:00, 105.13it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 106.50it/s]
 76%|███████▌  | 66/87 [00:00<00:00, 106.82it/s]
 89%|████████▊ | 77/87 [00:00<00:00, 106.13it/s]
100%|██████████| 87/87 [00:00<00:00, 106.83it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 235.95it/s]
  

[2m[36m(func pid=1200)[0m Epoch 00038: reducing learning rate of group 0 to 5.2839e-04.


 18%|█▊        | 31/173 [00:00<00:00, 151.85it/s]
 27%|██▋       | 47/173 [00:00<00:00, 146.38it/s]
 36%|███▌      | 62/173 [00:00<00:00, 145.21it/s]
 45%|████▍     | 77/173 [00:00<00:00, 145.28it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 144.74it/s]
 62%|██████▏   | 107/173 [00:00<00:00, 144.64it/s]
 71%|███████   | 123/173 [00:00<00:00, 146.56it/s]
 80%|███████▉  | 138/173 [00:00<00:00, 144.61it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 143.65it/s]
100%|██████████| 173/173 [00:01<00:00, 144.94it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 84%|████████▍ | 42/50 [00:00<00:00, 419.62it/s]
100%|██████████| 50/50 [00:00<00:00, 415.98it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 133.93it/s]
 17%|█▋        | 29/173 [00:00<00:01, 137.96it/s]
 25%|██▍       | 43/173 [00:00<00:00, 137.99it/s]
 34%|███▎      | 58/173 [00:00<00:00, 140.01it/s]
 42%|████▏     | 73/173 [00:00<00:00, 140.64it/s]
 51%|█████     | 88/173 [00:00<00:00, 140.89it/s]
 60%|█████▉    | 1

[2m[36m(func pid=1200)[0m Epoch 00048: reducing learning rate of group 0 to 2.6420e-04.


 16%|█▌        | 27/173 [00:00<00:01, 133.88it/s]
 24%|██▎       | 41/173 [00:00<00:00, 134.14it/s]
 32%|███▏      | 55/173 [00:00<00:00, 132.56it/s]
 40%|████      | 70/173 [00:00<00:00, 137.14it/s]
 49%|████▊     | 84/173 [00:00<00:00, 135.29it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 137.03it/s]
 66%|██████▌   | 114/173 [00:00<00:00, 140.46it/s]
 75%|███████▍  | 129/173 [00:00<00:00, 142.01it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 142.77it/s]
 92%|█████████▏| 159/173 [00:01<00:00, 140.08it/s]
100%|██████████| 173/173 [00:01<00:00, 138.83it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 76%|███████▌  | 38/50 [00:00<00:00, 373.41it/s]
100%|██████████| 50/50 [00:00<00:00, 375.45it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 125.19it/s]
 16%|█▌        | 27/173 [00:00<00:01, 130.79it/s]
 24%|██▎       | 41/173 [00:00<00:00, 134.76it/s]
 32%|███▏      | 56/173 [00:00<00:00, 138.08it/s]
 49%|████▉     | 85/173 [00:00<00:00, 139.76it/s]
 58%|█████▊    | 1

[2m[36m(func pid=1200)[0m Epoch 00074: reducing learning rate of group 0 to 1.3210e-04.


 18%|█▊        | 32/173 [00:00<00:00, 153.97it/s]
 28%|██▊       | 48/173 [00:00<00:00, 150.59it/s]
 37%|███▋      | 64/173 [00:00<00:00, 144.27it/s]
 46%|████▌     | 79/173 [00:00<00:00, 145.20it/s]
 63%|██████▎   | 109/173 [00:00<00:00, 147.34it/s]
 72%|███████▏  | 124/173 [00:00<00:00, 146.75it/s]
 80%|████████  | 139/173 [00:00<00:00, 144.75it/s]
 89%|████████▉ | 154/173 [00:01<00:00, 146.10it/s]
100%|██████████| 173/173 [00:01<00:00, 146.26it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 411.51it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▊         | 15/173 [00:00<00:01, 142.62it/s]
 17%|█▋        | 30/173 [00:00<00:01, 139.12it/s]
 26%|██▌       | 45/173 [00:00<00:00, 140.91it/s]
 35%|███▌      | 61/173 [00:00<00:00, 143.31it/s]
 44%|████▍     | 76/173 [00:00<00:00, 145.42it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 145.40it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 144.00it/s]
 70%|██████▉   | 121/173 [00:00<00:00, 145.45it/s]
 79%|███████▉  

[2m[36m(func pid=1200)[0m Epoch 00090: reducing learning rate of group 0 to 6.6049e-05.


  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▉         | 16/173 [00:00<00:01, 151.44it/s]
 18%|█▊        | 32/173 [00:00<00:00, 149.93it/s]
 27%|██▋       | 47/173 [00:00<00:00, 147.24it/s]
 36%|███▌      | 62/173 [00:00<00:00, 146.99it/s]
 45%|████▍     | 77/173 [00:00<00:00, 145.32it/s]
 54%|█████▍    | 93/173 [00:00<00:00, 147.44it/s]
 62%|██████▏   | 108/173 [00:00<00:00, 147.57it/s]
 71%|███████   | 123/173 [00:00<00:00, 147.51it/s]
 80%|███████▉  | 138/173 [00:00<00:00, 147.66it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 121.19it/s]
 97%|█████████▋| 168/173 [00:01<00:00, 128.57it/s]
100%|██████████| 173/173 [00:01<00:00, 139.04it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 399.56it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▊         | 15/173 [00:00<00:01, 148.42it/s]
 17%|█▋        | 30/173 [00:00<00:00, 145.95it/s]
 26%|██▌       | 45/173 [00:00<00:00, 144.97it/s]
 35%|███▍      | 60/173 [00:00<00:00, 144.89it/s]
 43%|████▎     | 75/173 [0

[2m[36m(func pid=1200)[0m Epoch 00102: reducing learning rate of group 0 to 3.3024e-05.


  9%|▊         | 15/173 [00:00<00:01, 148.92it/s]
 17%|█▋        | 30/173 [00:00<00:00, 147.82it/s]
 26%|██▌       | 45/173 [00:00<00:00, 144.61it/s]
 35%|███▍      | 60/173 [00:00<00:00, 144.76it/s]
 44%|████▍     | 76/173 [00:00<00:00, 146.54it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 147.02it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 143.90it/s]
 70%|██████▉   | 121/173 [00:00<00:00, 141.89it/s]
 79%|███████▊  | 136/173 [00:00<00:00, 143.00it/s]
 87%|████████▋ | 151/173 [00:01<00:00, 141.38it/s]
100%|██████████| 173/173 [00:01<00:00, 144.32it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 411.13it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▊         | 15/173 [00:00<00:01, 148.56it/s]
 17%|█▋        | 30/173 [00:00<00:00, 145.22it/s]
 26%|██▌       | 45/173 [00:00<00:00, 144.72it/s]
 35%|███▍      | 60/173 [00:00<00:00, 144.02it/s]
 43%|████▎     | 75/173 [00:00<00:00, 143.62it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 145.19it/s]
 61%|██████▏   | 

[2m[36m(func pid=1200)[0m Epoch 00113: reducing learning rate of group 0 to 1.6512e-05.


 18%|█▊        | 31/173 [00:00<00:00, 150.78it/s]
 27%|██▋       | 47/173 [00:00<00:00, 143.47it/s]
 36%|███▌      | 62/173 [00:00<00:00, 139.17it/s]
 44%|████▍     | 76/173 [00:00<00:00, 137.48it/s]
 53%|█████▎    | 92/173 [00:00<00:00, 142.12it/s]
 62%|██████▏   | 107/173 [00:00<00:00, 143.66it/s]
 71%|███████   | 122/173 [00:00<00:00, 144.16it/s]
 79%|███████▉  | 137/173 [00:00<00:00, 141.71it/s]
 88%|████████▊ | 153/173 [00:01<00:00, 145.57it/s]
 97%|█████████▋| 168/173 [00:01<00:00, 145.40it/s]
100%|██████████| 173/173 [00:01<00:00, 143.61it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 410.11it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 132.04it/s]
 25%|██▌       | 44/173 [00:00<00:00, 142.67it/s]
 34%|███▍      | 59/173 [00:00<00:00, 141.75it/s]
 43%|████▎     | 74/173 [00:00<00:00, 140.61it/s]
 51%|█████▏    | 89/173 [00:00<00:00, 142.20it/s]
 61%|██████    | 105/173 [00:00<00:00, 144.70it/s]
 69%|██████▉   

[2m[36m(func pid=1200)[0m Epoch 00119: reducing learning rate of group 0 to 8.2561e-06.


 16%|█▌        | 28/173 [00:00<00:01, 137.57it/s]
 24%|██▍       | 42/173 [00:00<00:00, 137.97it/s]
 32%|███▏      | 56/173 [00:00<00:00, 137.54it/s]
 41%|████      | 71/173 [00:00<00:00, 137.84it/s]
 49%|████▉     | 85/173 [00:00<00:00, 137.71it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 136.54it/s]
 66%|██████▌   | 114/173 [00:00<00:00, 138.29it/s]
 75%|███████▍  | 129/173 [00:00<00:00, 141.41it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 141.02it/s]
 92%|█████████▏| 159/173 [00:01<00:00, 141.98it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 404.04it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 122.53it/s]
 16%|█▌        | 28/173 [00:00<00:01, 138.21it/s]
 24%|██▍       | 42/173 [00:00<00:01, 104.00it/s]
 33%|███▎      | 57/173 [00:00<00:00, 117.85it/s]
 41%|████      | 71/173 [00:00<00:00, 123.18it/s]
 49%|████▉     | 85/173 [00:00<00:00, 127.65it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 131.03it/s]
 66%|██████▌   | 1

[2m[36m(func pid=1200)[0m Epoch 00125: reducing learning rate of group 0 to 4.1281e-06.


 15%|█▌        | 26/173 [00:00<00:01, 93.84it/s] 
 24%|██▎       | 41/173 [00:00<00:01, 113.41it/s]
 32%|███▏      | 56/173 [00:00<00:00, 123.51it/s]
 50%|████▉     | 86/173 [00:00<00:00, 134.45it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 138.33it/s]
 68%|██████▊   | 117/173 [00:00<00:00, 141.20it/s]
 76%|███████▋  | 132/173 [00:00<00:00, 143.63it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 144.19it/s]
 94%|█████████▍| 163/173 [00:01<00:00, 145.94it/s]
100%|██████████| 173/173 [00:01<00:00, 135.26it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 414.36it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▉         | 16/173 [00:00<00:01, 148.19it/s]
 18%|█▊        | 31/173 [00:00<00:00, 147.60it/s]
 35%|███▌      | 61/173 [00:00<00:00, 148.04it/s]
 44%|████▍     | 76/173 [00:00<00:00, 147.61it/s]
 53%|█████▎    | 91/173 [00:00<00:00, 146.95it/s]
 61%|██████▏   | 106/173 [00:00<00:00, 147.83it/s]
 71%|███████   | 122/173 [00:00<00:00, 149.81it/s]
 79%|███████▉ 

[2m[36m(func pid=1200)[0m Epoch 00131: reducing learning rate of group 0 to 2.0640e-06.


  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 138.28it/s]
 17%|█▋        | 29/173 [00:00<00:00, 144.56it/s]
 25%|██▌       | 44/173 [00:00<00:00, 143.06it/s]
 34%|███▍      | 59/173 [00:00<00:00, 144.54it/s]
 43%|████▎     | 75/173 [00:00<00:00, 146.97it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 146.04it/s]
 61%|██████    | 105/173 [00:00<00:00, 143.77it/s]
 69%|██████▉   | 120/173 [00:00<00:00, 142.64it/s]
 78%|███████▊  | 135/173 [00:00<00:00, 142.56it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 144.03it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 392.22it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 121.57it/s]
 16%|█▌        | 27/173 [00:00<00:01, 130.68it/s]
 24%|██▍       | 42/173 [00:00<00:00, 137.99it/s]
 33%|███▎      | 57/173 [00:00<00:00, 141.56it/s]
 42%|████▏     | 72/173 [00:00<00:00, 144.16it/s]
 50%|█████     | 87/173 [00:00<00:00, 144.22it/s]
 59%|█████▉    | 102/173 [00

[2m[36m(func pid=1200)[0m Epoch 00137: reducing learning rate of group 0 to 1.0320e-06.


 17%|█▋        | 29/173 [00:00<00:01, 138.22it/s]
 25%|██▌       | 44/173 [00:00<00:00, 141.07it/s]
 34%|███▍      | 59/173 [00:00<00:00, 142.67it/s]
 43%|████▎     | 74/173 [00:00<00:00, 142.86it/s]
 51%|█████▏    | 89/173 [00:00<00:00, 144.82it/s]
 69%|██████▉   | 119/173 [00:00<00:00, 144.44it/s]
 77%|███████▋  | 134/173 [00:00<00:00, 146.11it/s]
 86%|████████▌ | 149/173 [00:01<00:00, 146.88it/s]
 95%|█████████▍| 164/173 [00:01<00:00, 147.37it/s]
100%|██████████| 173/173 [00:01<00:00, 144.48it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 416.00it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 129.89it/s]
 17%|█▋        | 29/173 [00:00<00:01, 138.10it/s]
 25%|██▌       | 44/173 [00:00<00:00, 142.41it/s]
 34%|███▍      | 59/173 [00:00<00:00, 143.42it/s]
 43%|████▎     | 74/173 [00:00<00:00, 141.29it/s]
 51%|█████▏    | 89/173 [00:00<00:00, 143.64it/s]
 60%|██████    | 104/173 [00:00<00:00, 143.45it/s]
 69%|██████▉   |

[2m[36m(func pid=1200)[0m Epoch 00143: reducing learning rate of group 0 to 5.1601e-07.


  9%|▊         | 15/173 [00:00<00:01, 142.55it/s]
 17%|█▋        | 30/173 [00:00<00:00, 145.23it/s]
 26%|██▌       | 45/173 [00:00<00:00, 143.90it/s]
 35%|███▍      | 60/173 [00:00<00:00, 145.83it/s]
 43%|████▎     | 75/173 [00:00<00:00, 146.76it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 146.49it/s]
 61%|██████    | 105/173 [00:00<00:00, 145.76it/s]
 69%|██████▉   | 120/173 [00:00<00:00, 146.01it/s]
 78%|███████▊  | 135/173 [00:00<00:00, 146.42it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 146.52it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 388.86it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 133.98it/s]
 16%|█▌        | 28/173 [00:00<00:01, 136.98it/s]
 25%|██▍       | 43/173 [00:00<00:00, 141.08it/s]
 34%|███▎      | 58/173 [00:00<00:00, 142.71it/s]
 42%|████▏     | 73/173 [00:00<00:00, 143.67it/s]
 51%|█████     | 88/173 [00:00<00:00, 144.88it/s]
 60%|██████    | 104/173 [00:00<00:00, 147.24it/s]
 69%|██████▉   | 

[2m[36m(func pid=1200)[0m Epoch 00149: reducing learning rate of group 0 to 2.5800e-07.


 15%|█▌        | 26/173 [00:00<00:01, 120.86it/s]
 23%|██▎       | 39/173 [00:00<00:01, 116.64it/s]
 32%|███▏      | 55/173 [00:00<00:00, 129.16it/s]
 40%|████      | 70/173 [00:00<00:00, 132.89it/s]
 49%|████▉     | 85/173 [00:00<00:00, 137.02it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 139.16it/s]
 66%|██████▌   | 114/173 [00:00<00:00, 136.58it/s]
 75%|███████▍  | 129/173 [00:00<00:00, 138.48it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 140.17it/s]
 92%|█████████▏| 160/173 [00:01<00:00, 142.79it/s]
100%|██████████| 173/173 [00:01<00:00, 128.91it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 407.01it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  1%|          | 1/173 [00:00<00:45,  3.82it/s]
  9%|▊         | 15/173 [00:00<00:03, 50.66it/s]
 17%|█▋        | 29/173 [00:00<00:01, 78.60it/s]
 25%|██▌       | 44/173 [00:00<00:01, 98.58it/s]
 34%|███▎      | 58/173 [00:00<00:01, 111.15it/s]
 42%|████▏     | 72/173 [00:00<0

[2m[36m(func pid=15868)[0m Epoch 00041: reducing learning rate of group 0 to 3.7449e-04.


  8%|▊         | 13/173 [00:00<00:01, 122.75it/s]
 16%|█▌        | 27/173 [00:00<00:01, 128.63it/s]
 24%|██▎       | 41/173 [00:00<00:01, 131.28it/s]
 40%|███▉      | 69/173 [00:00<00:00, 135.73it/s]
 48%|████▊     | 83/173 [00:00<00:00, 137.14it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 110.62it/s]
 65%|██████▍   | 112/173 [00:00<00:00, 119.21it/s]
 73%|███████▎  | 127/173 [00:01<00:00, 125.63it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 129.34it/s]
 91%|█████████ | 157/173 [00:01<00:00, 132.12it/s]
 99%|█████████▉| 171/173 [00:01<00:00, 133.80it/s]
100%|██████████| 173/173 [00:01<00:00, 128.94it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 82%|████████▏ | 41/50 [00:00<00:00, 409.46it/s]
100%|██████████| 50/50 [00:00<00:00, 404.54it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 138.64it/s]
 17%|█▋        | 29/173 [00:00<00:01, 142.02it/s]
 25%|██▌       | 44/173 [00:00<00:00, 141.77it/s]
 34%|███▍      | 59/173 [00:00<00:00, 142.11it/s]
 43%|████▎     | 

[2m[36m(func pid=15868)[0m Epoch 00049: reducing learning rate of group 0 to 1.8725e-04.


 16%|█▌        | 28/173 [00:00<00:01, 135.53it/s]
 24%|██▍       | 42/173 [00:00<00:00, 136.36it/s]
 32%|███▏      | 56/173 [00:00<00:00, 135.78it/s]
 41%|████      | 71/173 [00:00<00:00, 138.75it/s]
 50%|████▉     | 86/173 [00:00<00:00, 140.59it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 141.68it/s]
 67%|██████▋   | 116/173 [00:00<00:00, 142.22it/s]
 76%|███████▌  | 131/173 [00:00<00:00, 141.96it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 144.68it/s]
100%|██████████| 173/173 [00:01<00:00, 141.21it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 82%|████████▏ | 41/50 [00:00<00:00, 409.35it/s]
100%|██████████| 50/50 [00:00<00:00, 411.90it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 138.56it/s]
 17%|█▋        | 29/173 [00:00<00:01, 140.31it/s]
 25%|██▌       | 44/173 [00:00<00:00, 141.37it/s]
 34%|███▍      | 59/173 [00:00<00:00, 142.56it/s]
 43%|████▎     | 74/173 [00:00<00:00, 143.15it/s]
 60%|██████    | 104/173 [00:00<00:00, 143.63it/s]
 69%|██████▉   | 

[2m[36m(func pid=15868)[0m Epoch 00066: reducing learning rate of group 0 to 9.3623e-05.


 17%|█▋        | 29/173 [00:00<00:01, 143.01it/s]
 25%|██▌       | 44/173 [00:00<00:00, 142.11it/s]
 34%|███▍      | 59/173 [00:00<00:00, 140.40it/s]
 43%|████▎     | 74/173 [00:00<00:00, 140.40it/s]
 51%|█████▏    | 89/173 [00:00<00:00, 140.98it/s]
 60%|██████    | 104/173 [00:00<00:00, 142.43it/s]
 69%|██████▉   | 119/173 [00:00<00:00, 142.78it/s]
 77%|███████▋  | 134/173 [00:00<00:00, 143.11it/s]
 86%|████████▌ | 149/173 [00:01<00:00, 143.56it/s]
100%|██████████| 173/173 [00:01<00:00, 142.48it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 355.54it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 126.03it/s]
 16%|█▌        | 27/173 [00:00<00:01, 132.04it/s]
 24%|██▎       | 41/173 [00:00<00:00, 134.32it/s]
 32%|███▏      | 56/173 [00:00<00:00, 137.80it/s]
 41%|████      | 71/173 [00:00<00:00, 139.67it/s]
 50%|████▉     | 86/173 [00:00<00:00, 140.70it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 139.61it/s]
 66%|██████▋   |

[2m[36m(func pid=15868)[0m Epoch 00072: reducing learning rate of group 0 to 4.6812e-05.


  8%|▊         | 14/173 [00:00<00:01, 136.11it/s]
 17%|█▋        | 29/173 [00:00<00:01, 138.17it/s]
 25%|██▍       | 43/173 [00:00<00:00, 137.38it/s]
 33%|███▎      | 57/173 [00:00<00:00, 136.35it/s]
 41%|████      | 71/173 [00:00<00:00, 131.91it/s]
 49%|████▉     | 85/173 [00:00<00:00, 131.41it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 135.93it/s]
 66%|██████▋   | 115/173 [00:00<00:00, 138.81it/s]
 75%|███████▍  | 129/173 [00:00<00:00, 137.26it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 137.65it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 393.15it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 100.26it/s]
 13%|█▎        | 22/173 [00:00<00:01, 100.63it/s]
 21%|██        | 36/173 [00:00<00:01, 115.70it/s]
 29%|██▉       | 51/173 [00:00<00:00, 126.29it/s]
 38%|███▊      | 65/173 [00:00<00:00, 129.33it/s]
 46%|████▌     | 79/173 [00:00<00:00, 130.91it/s]
 54%|█████▍    | 94/173 [00:00<00:00, 135.12it/s]
 63%|██████▎   | 1

[2m[36m(func pid=15868)[0m Epoch 00080: reducing learning rate of group 0 to 2.3406e-05.


 17%|█▋        | 30/173 [00:00<00:00, 143.45it/s]
 26%|██▌       | 45/173 [00:00<00:00, 140.73it/s]
 35%|███▍      | 60/173 [00:00<00:00, 137.01it/s]
 43%|████▎     | 74/173 [00:00<00:00, 136.56it/s]
 51%|█████     | 88/173 [00:00<00:00, 134.97it/s]
 68%|██████▊   | 117/173 [00:00<00:00, 137.55it/s]
 76%|███████▌  | 131/173 [00:00<00:00, 136.32it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 136.40it/s]
 92%|█████████▏| 159/173 [00:01<00:00, 136.47it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 390.83it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 129.44it/s]
 17%|█▋        | 29/173 [00:00<00:01, 134.14it/s]
 25%|██▍       | 43/173 [00:00<00:01, 125.40it/s]
 33%|███▎      | 57/173 [00:00<00:00, 128.73it/s]
 41%|████      | 71/173 [00:00<00:00, 131.35it/s]
 49%|████▉     | 85/173 [00:00<00:00, 130.49it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 135.02it/s]
 66%|██████▋   | 115/173 [00:00<00:00, 137.55it/s]
 75%|███████▌  |

[2m[36m(func pid=15868)[0m Epoch 00086: reducing learning rate of group 0 to 1.1703e-05.


 17%|█▋        | 29/173 [00:00<00:01, 137.74it/s]
 25%|██▍       | 43/173 [00:00<00:00, 137.42it/s]
 33%|███▎      | 57/173 [00:00<00:00, 137.23it/s]
 41%|████      | 71/173 [00:00<00:00, 138.11it/s]
 50%|████▉     | 86/173 [00:00<00:00, 140.50it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 140.74it/s]
 67%|██████▋   | 116/173 [00:00<00:00, 139.76it/s]
 75%|███████▌  | 130/173 [00:00<00:00, 136.58it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 133.50it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 393.83it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 122.34it/s]
 16%|█▌        | 28/173 [00:00<00:01, 133.30it/s]
 25%|██▍       | 43/173 [00:00<00:00, 136.29it/s]
 33%|███▎      | 57/173 [00:00<00:00, 134.56it/s]
 41%|████      | 71/173 [00:00<00:00, 131.84it/s]
 49%|████▉     | 85/173 [00:00<00:00, 130.38it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 127.79it/s]
 65%|██████▌   | 113/173 [00:00<00:00, 130.67it/s]
 73%|███████▎  | 

[2m[36m(func pid=15868)[0m Epoch 00092: reducing learning rate of group 0 to 5.8515e-06.


  8%|▊         | 14/173 [00:00<00:01, 137.08it/s]
 16%|█▌        | 28/173 [00:00<00:01, 138.62it/s]
 24%|██▍       | 42/173 [00:00<00:00, 138.94it/s]
 32%|███▏      | 56/173 [00:00<00:00, 137.85it/s]
 41%|████      | 71/173 [00:00<00:00, 138.59it/s]
 49%|████▉     | 85/173 [00:00<00:00, 138.09it/s]
 57%|█████▋    | 99/173 [00:00<00:00, 138.65it/s]
 65%|██████▌   | 113/173 [00:00<00:00, 137.76it/s]
 74%|███████▍  | 128/173 [00:00<00:00, 139.82it/s]
 82%|████████▏ | 142/173 [00:01<00:00, 139.47it/s]
100%|██████████| 173/173 [00:01<00:00, 139.18it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 395.35it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 120.22it/s]
 16%|█▌        | 28/173 [00:00<00:01, 132.47it/s]
 25%|██▍       | 43/173 [00:00<00:00, 137.88it/s]
 34%|███▎      | 58/173 [00:00<00:00, 140.57it/s]
 42%|████▏     | 73/173 [00:00<00:00, 141.25it/s]
 51%|█████     | 88/173 [00:00<00:00, 142.32it/s]
 60%|█████▉    | 1

[2m[36m(func pid=15868)[0m Epoch 00098: reducing learning rate of group 0 to 2.9257e-06.


  8%|▊         | 13/173 [00:00<00:01, 129.11it/s]
 25%|██▍       | 43/173 [00:00<00:00, 140.86it/s]
 34%|███▎      | 58/173 [00:00<00:00, 136.50it/s]
 42%|████▏     | 73/173 [00:00<00:00, 136.48it/s]
 50%|█████     | 87/173 [00:00<00:00, 134.38it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 135.87it/s]
 66%|██████▋   | 115/173 [00:00<00:00, 133.45it/s]
 75%|███████▌  | 130/173 [00:00<00:00, 136.42it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 138.79it/s]
100%|██████████| 173/173 [00:01<00:00, 136.67it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 24%|██▍       | 12/50 [00:00<00:00, 110.21it/s]
100%|██████████| 50/50 [00:00<00:00, 238.16it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
 16%|█▌        | 28/173 [00:00<00:01, 133.75it/s]
 25%|██▍       | 43/173 [00:00<00:00, 136.61it/s]
 34%|███▎      | 58/173 [00:00<00:00, 139.73it/s]
 42%|████▏     | 72/173 [00:00<00:00, 138.87it/s]
 50%|████▉     | 86/173 [00:00<00:00, 138.76it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 140.95it/s]
 67%|██████▋   | 

[2m[36m(func pid=15868)[0m Epoch 00104: reducing learning rate of group 0 to 1.4629e-06.


 16%|█▌        | 28/173 [00:00<00:01, 130.13it/s]
 25%|██▍       | 43/173 [00:00<00:00, 136.25it/s]
 33%|███▎      | 57/173 [00:00<00:00, 131.36it/s]
 42%|████▏     | 72/173 [00:00<00:00, 135.73it/s]
 50%|█████     | 87/173 [00:00<00:00, 138.40it/s]
 59%|█████▉    | 102/173 [00:00<00:00, 138.85it/s]
 67%|██████▋   | 116/173 [00:00<00:00, 137.97it/s]
 76%|███████▌  | 131/173 [00:00<00:00, 139.00it/s]
 84%|████████▍ | 145/173 [00:01<00:00, 138.95it/s]
 92%|█████████▏| 159/173 [00:01<00:00, 138.05it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 396.72it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 128.14it/s]
 15%|█▌        | 26/173 [00:00<00:01, 128.28it/s]
 24%|██▎       | 41/173 [00:00<00:00, 132.59it/s]
 32%|███▏      | 55/173 [00:00<00:00, 133.49it/s]
 40%|███▉      | 69/173 [00:00<00:00, 133.89it/s]
 48%|████▊     | 83/173 [00:00<00:00, 133.67it/s]
 56%|█████▌    | 97/173 [00:00<00:00, 135.64it/s]
 64%|██████▍   | 

[2m[36m(func pid=15868)[0m Epoch 00110: reducing learning rate of group 0 to 7.3143e-07.


  9%|▊         | 15/173 [00:00<00:01, 140.92it/s]
 17%|█▋        | 30/173 [00:00<00:01, 139.10it/s]
 25%|██▌       | 44/173 [00:00<00:00, 137.14it/s]
 34%|███▎      | 58/173 [00:00<00:00, 135.61it/s]
 42%|████▏     | 72/173 [00:00<00:00, 135.74it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 138.03it/s]
 66%|██████▋   | 115/173 [00:00<00:00, 137.56it/s]
 75%|███████▌  | 130/173 [00:00<00:00, 139.64it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 138.76it/s]
 91%|█████████▏| 158/173 [00:01<00:00, 139.11it/s]
 82%|████████▏ | 41/50 [00:00<00:00, 398.12it/s]
100%|██████████| 50/50 [00:00<00:00, 393.64it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 128.46it/s]
 16%|█▌        | 28/173 [00:00<00:01, 132.77it/s]
 25%|██▍       | 43/173 [00:00<00:00, 137.87it/s]
 33%|███▎      | 57/173 [00:00<00:00, 137.93it/s]
 50%|████▉     | 86/173 [00:00<00:00, 137.51it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 138.71it/s]
 66%|██████▋   | 115/173 [00:00<00:00, 137.78it/s]
 75%|

[2m[36m(func pid=15868)[0m Epoch 00116: reducing learning rate of group 0 to 3.6572e-07.


 17%|█▋        | 30/173 [00:00<00:01, 142.49it/s]
 26%|██▌       | 45/173 [00:00<00:00, 141.95it/s]
 35%|███▍      | 60/173 [00:00<00:00, 140.44it/s]
 43%|████▎     | 75/173 [00:00<00:00, 141.28it/s]
 52%|█████▏    | 90/173 [00:00<00:00, 142.28it/s]
 61%|██████    | 105/173 [00:00<00:00, 142.56it/s]
 69%|██████▉   | 120/173 [00:00<00:00, 142.49it/s]
 78%|███████▊  | 135/173 [00:00<00:00, 142.07it/s]
 87%|████████▋ | 150/173 [00:01<00:00, 141.15it/s]
 95%|█████████▌| 165/173 [00:01<00:00, 141.21it/s]
 80%|████████  | 40/50 [00:00<00:00, 396.38it/s]
100%|██████████| 50/50 [00:00<00:00, 390.79it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  7%|▋         | 12/173 [00:00<00:01, 117.28it/s]
 15%|█▌        | 26/173 [00:00<00:01, 126.86it/s]
 24%|██▎       | 41/173 [00:00<00:00, 134.81it/s]
 32%|███▏      | 55/173 [00:00<00:00, 136.37it/s]
 40%|████      | 70/173 [00:00<00:00, 139.17it/s]
 49%|████▉     | 85/173 [00:00<00:00, 139.57it/s]
 58%|█████▊    | 100/173 [00:00<00:00, 141.05it/s]
 66%|█

[2m[36m(func pid=15868)[0m Epoch 00122: reducing learning rate of group 0 to 1.8286e-07.


 16%|█▌        | 27/173 [00:00<00:01, 131.08it/s]
 24%|██▎       | 41/173 [00:00<00:00, 134.30it/s]
 32%|███▏      | 55/173 [00:00<00:00, 133.68it/s]
 40%|████      | 70/173 [00:00<00:00, 136.70it/s]
 49%|████▊     | 84/173 [00:00<00:00, 137.27it/s]
 57%|█████▋    | 98/173 [00:00<00:00, 137.62it/s]
 65%|██████▍   | 112/173 [00:00<00:00, 137.11it/s]
 73%|███████▎  | 127/173 [00:00<00:00, 139.10it/s]
 82%|████████▏ | 141/173 [00:01<00:00, 132.79it/s]
 90%|████████▉ | 155/173 [00:01<00:00, 130.69it/s]
100%|██████████| 173/173 [00:01<00:00, 133.50it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 375.94it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 129.98it/s]
 16%|█▌        | 28/173 [00:00<00:01, 134.78it/s]
 24%|██▍       | 42/173 [00:00<00:00, 134.12it/s]
 33%|███▎      | 57/173 [00:00<00:00, 137.01it/s]
 41%|████      | 71/173 [00:00<00:00, 135.25it/s]
 49%|████▉     | 85/173 [00:00<00:00, 134.23it/s]
 57%|█████▋    | 

[2m[36m(func pid=15868)[0m Epoch 00128: reducing learning rate of group 0 to 9.1429e-08.


  8%|▊         | 14/173 [00:00<00:01, 139.20it/s]
 16%|█▌        | 28/173 [00:00<00:01, 135.81it/s]
 24%|██▍       | 42/173 [00:00<00:00, 137.63it/s]
 33%|███▎      | 57/173 [00:00<00:00, 140.33it/s]
 42%|████▏     | 72/173 [00:00<00:00, 139.30it/s]
 50%|████▉     | 86/173 [00:00<00:00, 112.04it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 121.39it/s]
 66%|██████▋   | 115/173 [00:00<00:00, 125.80it/s]
 75%|███████▍  | 129/173 [00:01<00:00, 129.37it/s]
 83%|████████▎ | 143/173 [00:01<00:00, 132.09it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
 80%|████████  | 40/50 [00:00<00:00, 399.10it/s]
100%|██████████| 50/50 [00:00<00:00, 395.99it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  6%|▋         | 11/173 [00:00<00:01, 108.70it/s]
 14%|█▍        | 25/173 [00:00<00:01, 126.63it/s]
 23%|██▎       | 39/173 [00:00<00:01, 132.12it/s]
 31%|███       | 53/173 [00:00<00:00, 133.17it/s]
 39%|███▊      | 67/173 [00:00<00:00, 135.17it/s]
 47%|████▋     | 81/173 [00:00<00:00, 131.92it/s]
 55%|█████▍    | 95

[2m[36m(func pid=15868)[0m Epoch 00134: reducing learning rate of group 0 to 4.5715e-08.


  0%|          | 0/173 [00:00<?, ?it/s]
  9%|▊         | 15/173 [00:00<00:01, 140.83it/s]
 17%|█▋        | 30/173 [00:00<00:01, 137.48it/s]
 25%|██▌       | 44/173 [00:00<00:01, 119.81it/s]
 34%|███▍      | 59/173 [00:00<00:00, 128.21it/s]
 43%|████▎     | 74/173 [00:00<00:00, 133.56it/s]
 51%|█████     | 88/173 [00:00<00:00, 134.24it/s]
 59%|█████▉    | 102/173 [00:00<00:00, 134.92it/s]
 67%|██████▋   | 116/173 [00:00<00:00, 135.66it/s]
 76%|███████▌  | 131/173 [00:00<00:00, 138.21it/s]
 84%|████████▍ | 146/173 [00:01<00:00, 139.66it/s]
 93%|█████████▎| 161/173 [00:01<00:00, 141.39it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 397.32it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 123.63it/s]
 16%|█▌        | 28/173 [00:00<00:01, 132.94it/s]
 25%|██▍       | 43/173 [00:00<00:00, 136.87it/s]
 33%|███▎      | 57/173 [00:00<00:00, 136.95it/s]
 41%|████      | 71/173 [00:00<00:00, 137.53it/s]
 49%|████▉     | 85/173 [00

[2m[36m(func pid=15868)[0m Epoch 00140: reducing learning rate of group 0 to 2.2857e-08.


  9%|▊         | 15/173 [00:00<00:01, 141.32it/s]
 17%|█▋        | 30/173 [00:00<00:01, 133.00it/s]
 26%|██▌       | 45/173 [00:00<00:00, 135.93it/s]
 34%|███▍      | 59/173 [00:00<00:00, 129.67it/s]
 42%|████▏     | 73/173 [00:00<00:00, 129.58it/s]
 50%|█████     | 87/173 [00:00<00:00, 132.49it/s]
 58%|█████▊    | 101/173 [00:00<00:00, 134.06it/s]
 66%|██████▋   | 115/173 [00:00<00:00, 132.61it/s]
 75%|███████▍  | 129/173 [00:00<00:00, 134.78it/s]
 83%|████████▎ | 144/173 [00:01<00:00, 136.97it/s]
 92%|█████████▏| 159/173 [00:01<00:00, 139.10it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 390.75it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 14/173 [00:00<00:01, 133.55it/s]
 16%|█▌        | 28/173 [00:00<00:01, 128.08it/s]
 25%|██▍       | 43/173 [00:00<00:00, 133.89it/s]
 33%|███▎      | 57/173 [00:00<00:00, 135.95it/s]
 41%|████      | 71/173 [00:00<00:00, 136.94it/s]
 49%|████▉     | 85/173 [00:00<00:00, 137.34it/s]
 58%|█████▊    | 

[2m[36m(func pid=15868)[0m Epoch 00146: reducing learning rate of group 0 to 1.1429e-08.


  9%|▊         | 15/173 [00:00<00:01, 140.37it/s]
 17%|█▋        | 30/173 [00:00<00:01, 138.81it/s]
 25%|██▌       | 44/173 [00:00<00:00, 136.18it/s]
 34%|███▍      | 59/173 [00:00<00:00, 139.13it/s]
 42%|████▏     | 73/173 [00:00<00:00, 137.22it/s]
 51%|█████     | 88/173 [00:00<00:00, 139.07it/s]
 59%|█████▉    | 102/173 [00:00<00:00, 139.31it/s]
 68%|██████▊   | 117/173 [00:00<00:00, 140.50it/s]
 76%|███████▋  | 132/173 [00:00<00:00, 137.33it/s]
 85%|████████▍ | 147/173 [00:01<00:00, 139.75it/s]
 94%|█████████▎| 162/173 [00:01<00:00, 140.73it/s]
100%|██████████| 173/173 [00:01<00:00, 139.26it/s]
  0%|          | 0/50 [00:00<?, ?it/s]
100%|██████████| 50/50 [00:00<00:00, 389.36it/s]
  0%|          | 0/173 [00:00<?, ?it/s]
  8%|▊         | 13/173 [00:00<00:01, 127.10it/s]
 16%|█▌        | 28/173 [00:00<00:01, 136.74it/s]
 24%|██▍       | 42/173 [00:00<00:00, 135.68it/s]
 32%|███▏      | 56/173 [00:00<00:00, 135.26it/s]
 40%|████      | 70/173 [00:00<00:00, 135.15it/s]
 49%|████▊     |

[2m[36m(func pid=10100)[0m Epoch 00020: reducing learning rate of group 0 to 3.1266e-03.


 17%|█▋        | 15/87 [00:00<00:00, 75.34it/s]
 26%|██▋       | 23/87 [00:00<00:00, 74.70it/s]
 36%|███▌      | 31/87 [00:00<00:00, 75.91it/s]
 45%|████▍     | 39/87 [00:00<00:00, 76.27it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 76.42it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 76.82it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 75.91it/s]
 91%|█████████ | 79/87 [00:01<00:00, 76.72it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 165.27it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  8%|▊         | 7/87 [00:00<00:01, 64.08it/s]
 17%|█▋        | 15/87 [00:00<00:01, 69.42it/s]
 26%|██▋       | 23/87 [00:00<00:00, 73.24it/s]
 36%|███▌      | 31/87 [00:00<00:00, 73.83it/s]
 45%|████▍     | 39/87 [00:00<00:00, 74.74it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 71.95it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 72.38it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 72.97it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 73.26it/s]
 91%|█████████ | 79/87 [00:01<00:00, 74.94it/s]
100%|█████

[2m[36m(func pid=10100)[0m Epoch 00035: reducing learning rate of group 0 to 1.5633e-03.


 20%|█▉        | 17/87 [00:00<00:00, 76.92it/s]
 29%|██▊       | 25/87 [00:00<00:00, 77.18it/s]
 38%|███▊      | 33/87 [00:00<00:00, 76.57it/s]
 47%|████▋     | 41/87 [00:00<00:00, 74.64it/s]
 56%|█████▋    | 49/87 [00:00<00:00, 72.64it/s]
 66%|██████▌   | 57/87 [00:00<00:00, 71.17it/s]
 75%|███████▍  | 65/87 [00:00<00:00, 71.51it/s]
 84%|████████▍ | 73/87 [00:01<00:00, 70.49it/s]
 93%|█████████▎| 81/87 [00:01<00:00, 72.70it/s]
100%|██████████| 87/87 [00:01<00:00, 73.49it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 183.96it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 71.99it/s]
 18%|█▊        | 16/87 [00:00<00:00, 72.78it/s]
 28%|██▊       | 24/87 [00:00<00:00, 71.87it/s]
 37%|███▋      | 32/87 [00:00<00:00, 71.91it/s]
 46%|████▌     | 40/87 [00:00<00:00, 73.06it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 73.42it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 72.38it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 74.04it/s]
 83%|█████

[2m[36m(func pid=10100)[0m Epoch 00044: reducing learning rate of group 0 to 7.8164e-04.


 28%|██▊       | 24/87 [00:00<00:00, 74.74it/s]
 37%|███▋      | 32/87 [00:00<00:00, 76.43it/s]
 46%|████▌     | 40/87 [00:00<00:00, 77.11it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 77.07it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 77.05it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 75.38it/s]
100%|██████████| 87/87 [00:01<00:00, 75.98it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 177.34it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 74.16it/s]
 18%|█▊        | 16/87 [00:00<00:00, 71.20it/s]
 28%|██▊       | 24/87 [00:00<00:00, 70.19it/s]
 37%|███▋      | 32/87 [00:00<00:00, 70.47it/s]
 46%|████▌     | 40/87 [00:00<00:00, 71.53it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 72.57it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 73.09it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 74.67it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 73.61it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 73.97it/s]
100%|██████████| 87/87 [00:01<00:00, 73.48it/s]
  0%|     

[2m[36m(func pid=10100)[0m Epoch 00065: reducing learning rate of group 0 to 3.9082e-04.


 18%|█▊        | 16/87 [00:00<00:00, 74.87it/s]
 28%|██▊       | 24/87 [00:00<00:00, 74.50it/s]
 37%|███▋      | 32/87 [00:00<00:00, 72.76it/s]
 46%|████▌     | 40/87 [00:00<00:00, 73.48it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 74.35it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 75.00it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 75.42it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 75.82it/s]
100%|██████████| 87/87 [00:01<00:00, 75.00it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 72%|███████▏  | 18/25 [00:00<00:00, 179.32it/s]
100%|██████████| 25/25 [00:00<00:00, 181.01it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 74.29it/s]
 18%|█▊        | 16/87 [00:00<00:00, 75.28it/s]
 28%|██▊       | 24/87 [00:00<00:00, 75.66it/s]
 37%|███▋      | 32/87 [00:00<00:00, 76.10it/s]
 46%|████▌     | 40/87 [00:00<00:00, 76.68it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 76.30it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 76.22it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 75.33it/s]
 83%|████

[2m[36m(func pid=10100)[0m Epoch 00073: reducing learning rate of group 0 to 1.9541e-04.


  9%|▉         | 8/87 [00:00<00:01, 72.11it/s]
 18%|█▊        | 16/87 [00:00<00:00, 74.01it/s]
 28%|██▊       | 24/87 [00:00<00:00, 74.73it/s]
 37%|███▋      | 32/87 [00:00<00:00, 74.33it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 75.25it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 75.36it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 75.17it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 75.57it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 75.23it/s]
100%|██████████| 87/87 [00:01<00:00, 75.33it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 76%|███████▌  | 19/25 [00:00<00:00, 183.25it/s]
100%|██████████| 25/25 [00:00<00:00, 181.66it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 74.00it/s]
 18%|█▊        | 16/87 [00:00<00:00, 75.37it/s]
 28%|██▊       | 24/87 [00:00<00:00, 75.67it/s]
 37%|███▋      | 32/87 [00:00<00:00, 74.83it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 76.58it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 76.77it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 76.98it/s]
 83%|█████

[2m[36m(func pid=10100)[0m Epoch 00083: reducing learning rate of group 0 to 9.7705e-05.


 18%|█▊        | 16/87 [00:00<00:00, 77.81it/s]
 28%|██▊       | 24/87 [00:00<00:00, 76.03it/s]
 37%|███▋      | 32/87 [00:00<00:00, 76.97it/s]
 46%|████▌     | 40/87 [00:00<00:00, 75.40it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 76.09it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 76.83it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 77.34it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 77.46it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 75.46it/s]
100%|██████████| 87/87 [00:01<00:00, 76.22it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 183.23it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 76.32it/s]
 18%|█▊        | 16/87 [00:00<00:00, 78.11it/s]
 28%|██▊       | 24/87 [00:00<00:00, 75.76it/s]
 37%|███▋      | 32/87 [00:00<00:00, 72.49it/s]
 46%|████▌     | 40/87 [00:00<00:00, 73.30it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 72.79it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 73.72it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 74.02it/s]
 83%|█████

[2m[36m(func pid=10100)[0m Epoch 00090: reducing learning rate of group 0 to 4.8852e-05.


  9%|▉         | 8/87 [00:00<00:01, 75.39it/s]
 18%|█▊        | 16/87 [00:00<00:00, 76.97it/s]
 28%|██▊       | 24/87 [00:00<00:00, 74.52it/s]
 37%|███▋      | 32/87 [00:00<00:00, 75.97it/s]
 46%|████▌     | 40/87 [00:00<00:00, 75.56it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 75.10it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 76.09it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 75.47it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 76.19it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 75.28it/s]
100%|██████████| 87/87 [00:01<00:00, 75.77it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 117.15it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 75.33it/s]
 18%|█▊        | 16/87 [00:00<00:00, 76.99it/s]
 28%|██▊       | 24/87 [00:00<00:00, 77.16it/s]
 37%|███▋      | 32/87 [00:00<00:00, 77.63it/s]
 46%|████▌     | 40/87 [00:00<00:00, 77.42it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 77.02it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 76.93it/s]
 74%|██████

[2m[36m(func pid=10100)[0m Epoch 00096: reducing learning rate of group 0 to 2.4426e-05.


  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 75.72it/s]
 18%|█▊        | 16/87 [00:00<00:00, 74.70it/s]
 28%|██▊       | 24/87 [00:00<00:00, 73.13it/s]
 37%|███▋      | 32/87 [00:00<00:00, 73.15it/s]
 46%|████▌     | 40/87 [00:00<00:00, 74.27it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 74.21it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 74.04it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 75.50it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 75.70it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 114.99it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 72.11it/s]
 18%|█▊        | 16/87 [00:00<00:00, 74.84it/s]
 28%|██▊       | 24/87 [00:00<00:00, 73.82it/s]
 37%|███▋      | 32/87 [00:00<00:00, 73.37it/s]
 46%|████▌     | 40/87 [00:00<00:00, 73.63it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 74.25it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 74.63it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 75.77it/s]
 83%|████████▎ | 72/

[2m[36m(func pid=10100)[0m Epoch 00102: reducing learning rate of group 0 to 1.2213e-05.


  9%|▉         | 8/87 [00:00<00:01, 72.68it/s]
 18%|█▊        | 16/87 [00:00<00:00, 74.26it/s]
 28%|██▊       | 24/87 [00:00<00:00, 68.61it/s]
 37%|███▋      | 32/87 [00:00<00:00, 69.80it/s]
 46%|████▌     | 40/87 [00:00<00:00, 71.37it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 72.78it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 73.26it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 72.46it/s]
 83%|████████▎ | 72/87 [00:01<00:00, 71.66it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 71.97it/s]
100%|██████████| 87/87 [00:01<00:00, 71.96it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 32%|███▏      | 8/25 [00:00<00:00, 67.22it/s]
100%|██████████| 25/25 [00:00<00:00, 117.21it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 76.26it/s]
 18%|█▊        | 16/87 [00:00<00:00, 74.29it/s]
 28%|██▊       | 24/87 [00:00<00:00, 73.61it/s]
 37%|███▋      | 32/87 [00:00<00:00, 73.65it/s]
 46%|████▌     | 40/87 [00:00<00:00, 74.83it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 76.21it/s]
 64%|██████▍

[2m[36m(func pid=10100)[0m Epoch 00108: reducing learning rate of group 0 to 6.1066e-06.


  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 71.32it/s]
 18%|█▊        | 16/87 [00:00<00:00, 71.80it/s]
 28%|██▊       | 24/87 [00:00<00:00, 68.61it/s]
 37%|███▋      | 32/87 [00:00<00:00, 71.57it/s]
 46%|████▌     | 40/87 [00:00<00:00, 70.86it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 72.77it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 72.12it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 70.74it/s]
 83%|████████▎ | 72/87 [00:01<00:00, 71.15it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 73.27it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 72%|███████▏  | 18/25 [00:00<00:00, 99.54it/s]
100%|██████████| 25/25 [00:00<00:00, 113.13it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  8%|▊         | 7/87 [00:00<00:01, 63.69it/s]
 17%|█▋        | 15/87 [00:00<00:01, 67.42it/s]
 26%|██▋       | 23/87 [00:00<00:00, 69.10it/s]
 36%|███▌      | 31/87 [00:00<00:00, 71.78it/s]
 45%|████▍     | 39/87 [00:00<00:00, 73.12it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 73.19it/s]
 63%|██████▎   | 55/

[2m[36m(func pid=10100)[0m Epoch 00114: reducing learning rate of group 0 to 3.0533e-06.


  9%|▉         | 8/87 [00:00<00:01, 73.35it/s]
 18%|█▊        | 16/87 [00:00<00:00, 74.03it/s]
 28%|██▊       | 24/87 [00:00<00:00, 74.16it/s]
 37%|███▋      | 32/87 [00:00<00:00, 74.46it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 75.84it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 75.59it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 75.66it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 74.95it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 75.56it/s]
100%|██████████| 87/87 [00:01<00:00, 75.24it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 119.11it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 74.51it/s]
 18%|█▊        | 16/87 [00:00<00:00, 75.00it/s]
 28%|██▊       | 24/87 [00:00<00:00, 76.91it/s]
 46%|████▌     | 40/87 [00:00<00:00, 76.69it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 76.28it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 76.28it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 75.74it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 74.38it/s]
 92%|██████

[2m[36m(func pid=10100)[0m Epoch 00120: reducing learning rate of group 0 to 1.5266e-06.


  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 73.35it/s]
 18%|█▊        | 16/87 [00:00<00:00, 73.35it/s]
 37%|███▋      | 32/87 [00:00<00:00, 75.64it/s]
 46%|████▌     | 40/87 [00:00<00:00, 75.22it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 76.11it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 76.84it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 76.87it/s]
100%|██████████| 87/87 [00:01<00:00, 74.70it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 32%|███▏      | 8/25 [00:00<00:00, 63.35it/s]
100%|██████████| 25/25 [00:00<00:00, 102.44it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 68.81it/s]
 18%|█▊        | 16/87 [00:00<00:00, 71.47it/s]
 28%|██▊       | 24/87 [00:00<00:00, 73.25it/s]
 37%|███▋      | 32/87 [00:00<00:00, 74.08it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 74.78it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 75.01it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 76.11it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 75.45it/s]
 92%|█████████▏| 80/8

[2m[36m(func pid=10100)[0m Epoch 00126: reducing learning rate of group 0 to 7.6332e-07.


  9%|▉         | 8/87 [00:00<00:01, 70.84it/s]
 18%|█▊        | 16/87 [00:00<00:00, 72.27it/s]
 28%|██▊       | 24/87 [00:00<00:00, 74.56it/s]
 37%|███▋      | 32/87 [00:00<00:00, 73.97it/s]
 46%|████▌     | 40/87 [00:00<00:00, 74.69it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 75.30it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 75.84it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 74.27it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 75.16it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 74.24it/s]
100%|██████████| 87/87 [00:01<00:00, 74.83it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 72%|███████▏  | 18/25 [00:00<00:00, 102.92it/s]
100%|██████████| 25/25 [00:00<00:00, 116.92it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 75.46it/s]
 18%|█▊        | 16/87 [00:00<00:00, 75.56it/s]
 28%|██▊       | 24/87 [00:00<00:00, 74.89it/s]
 37%|███▋      | 32/87 [00:00<00:00, 73.47it/s]
 46%|████▌     | 40/87 [00:00<00:00, 74.88it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 73.47it/s]
 64%|█████

[2m[36m(func pid=10100)[0m Epoch 00132: reducing learning rate of group 0 to 3.8166e-07.


 18%|█▊        | 16/87 [00:00<00:00, 77.20it/s]
 28%|██▊       | 24/87 [00:00<00:00, 74.27it/s]
 37%|███▋      | 32/87 [00:00<00:00, 71.55it/s]
 46%|████▌     | 40/87 [00:00<00:00, 72.04it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 73.59it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 74.85it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 71.88it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 73.34it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
100%|██████████| 25/25 [00:00<00:00, 108.51it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 71.34it/s]
 18%|█▊        | 16/87 [00:00<00:00, 74.16it/s]
 28%|██▊       | 24/87 [00:00<00:00, 68.88it/s]
 36%|███▌      | 31/87 [00:00<00:00, 66.82it/s]
 45%|████▍     | 39/87 [00:00<00:00, 68.74it/s]
 54%|█████▍    | 47/87 [00:00<00:00, 69.89it/s]
 63%|██████▎   | 55/87 [00:00<00:00, 71.14it/s]
 72%|███████▏  | 63/87 [00:00<00:00, 73.21it/s]
 82%|████████▏ | 71/87 [00:00<00:00, 72.44it/s]
 91%|█████████ | 79/87 [00:01<00:00, 72.70it/s]
100%|█████

[2m[36m(func pid=10100)[0m Epoch 00138: reducing learning rate of group 0 to 1.9083e-07.


 18%|█▊        | 16/87 [00:00<00:00, 74.26it/s]
 28%|██▊       | 24/87 [00:00<00:00, 75.49it/s]
 37%|███▋      | 32/87 [00:00<00:00, 75.17it/s]
 46%|████▌     | 40/87 [00:00<00:00, 75.81it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 76.15it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 75.73it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 76.19it/s]
 92%|█████████▏| 80/87 [00:01<00:00, 76.15it/s]
100%|██████████| 87/87 [00:01<00:00, 76.09it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 32%|███▏      | 8/25 [00:00<00:00, 66.73it/s]
100%|██████████| 25/25 [00:00<00:00, 117.39it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 74.25it/s]
 18%|█▊        | 16/87 [00:00<00:00, 76.22it/s]
 28%|██▊       | 24/87 [00:00<00:00, 75.49it/s]
 37%|███▋      | 32/87 [00:00<00:00, 76.29it/s]
 46%|████▌     | 40/87 [00:00<00:00, 76.27it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 76.29it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 76.52it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 76.79it/s]
 83%|██████

[2m[36m(func pid=10100)[0m Epoch 00144: reducing learning rate of group 0 to 9.5415e-08.


  9%|▉         | 8/87 [00:00<00:01, 75.48it/s]
 18%|█▊        | 16/87 [00:00<00:00, 75.03it/s]
 28%|██▊       | 24/87 [00:00<00:00, 72.74it/s]
 37%|███▋      | 32/87 [00:00<00:00, 73.78it/s]
 46%|████▌     | 40/87 [00:00<00:00, 75.17it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 75.02it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 75.68it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 76.28it/s]
 83%|████████▎ | 72/87 [00:00<00:00, 75.89it/s]
  0%|          | 0/25 [00:00<?, ?it/s]
 72%|███████▏  | 18/25 [00:00<00:00, 101.72it/s]
100%|██████████| 25/25 [00:00<00:00, 115.26it/s]
  0%|          | 0/87 [00:00<?, ?it/s]
  9%|▉         | 8/87 [00:00<00:01, 71.98it/s]
 18%|█▊        | 16/87 [00:00<00:00, 73.25it/s]
 28%|██▊       | 24/87 [00:00<00:00, 74.37it/s]
 37%|███▋      | 32/87 [00:00<00:00, 74.33it/s]
 46%|████▌     | 40/87 [00:00<00:00, 75.27it/s]
 55%|█████▌    | 48/87 [00:00<00:00, 76.61it/s]
 64%|██████▍   | 56/87 [00:00<00:00, 76.47it/s]
 74%|███████▎  | 64/87 [00:00<00:00, 76.36it/s]
 83%|█████

TuneError: ('Trials did not complete', [train_model_00d49_00006, train_model_00d49_00013])