In [1]:
from collections import defaultdict
import numpy as np
import pandas as pd
import csv
import json
import logging
from logging import Logger
import os
import sys
from typing import Callable, Dict, List, Tuple, Literal, Union
from functools import reduce
from copy import deepcopy

from rdkit import Chem

from tensorboardX import SummaryWriter
import torch
import torch.nn as nn
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler, ExponentialLR
from tqdm import tqdm, trange

import chemprop
from chemprop.args import TrainArgs, PredictArgs, HyperoptArgs
from chemprop.data import get_task_names, get_data, validate_dataset_type, MoleculeDataset, MoleculeDataLoader
from chemprop.train import run_training, train, predict
from chemprop.constants import TEST_SCORES_FILE_NAME, TRAIN_LOGGER_NAME, MODEL_FILE_NAME, HYPEROPT_LOGGER_NAME
from chemprop.utils import create_logger, makedirs, load_args, update_prediction_args, load_checkpoint, load_scalers



## Training Args

In [2]:
args = TrainArgs()

# Common
args.smiles_columns: List[str] = ['SMILES']
args.gpu: int = 1
args.features_path: List[str] = ["../../Data/ML_data/Data_split/Features/RN_train.npz"]
args.no_features_scaling: bool = True

# # TrainArg
args.data_path: str = "../../Data/ML_data/Data_split/train.csv"
args.target_columns: List[str] = ['G1','G2','G3','G4','G5','G6','G7','G8','G9','G10','G11','G12','G13']
args.dataset_type: Literal['regression', 'classification', 'multiclass'] = 'classification'
args.separate_val_path: str = "../../Data/ML_data/Data_split/val.csv"
args.separate_test_path: str = "../../Data/ML_data/Data_split/test.csv"
args.save_dir: str = '../../Results/Trained_model/DMPNN_RN_Ensemble_5'

# # Model arguments
args.separate_val_features_path: List[str] = ["../../Data/ML_data/Data_split/Features/RN_val.npz"]
args.separate_test_features_path: List[str] = ["../../Data/ML_data/Data_split/Features/RN_test.npz"]
args.config_path: str = "../../Data/ML_data/Data_split/Opt_hyperpars/DMPNN_RN.json"
args.ensemble_size: int = 5

args.process_args()

## Load data

In [3]:
# logger: quite and verbose file
logger = create_logger(name=TRAIN_LOGGER_NAME, save_dir=args.save_dir, quiet=args.quiet)
if logger is not None:
    debug, info = logger.debug, logger.info
else:
    debug = info = print

# Initialize relevant variables
init_seed = args.seed
save_dir = args.save_dir
args.task_names = get_task_names(path=args.data_path, smiles_columns=args.smiles_columns,
                                 target_columns=args.target_columns, ignore_columns=args.ignore_columns)


# Print args
# debug('Args')
# debug({key:value for key, value in args.__dict__.items() if not key.startswith('_') and not callable(key)})

# Save args
makedirs(args.save_dir)
#     args.save(os.path.join(args.save_dir, 'args.json'))

# Get data
debug('Loading data')
data = get_data(
    path=args.data_path,
    args=args,
    smiles_columns=args.smiles_columns,
    logger=logger,
    skip_none_targets=True
)
validate_dataset_type(data, dataset_type=args.dataset_type)
args.features_size = data.features_size()

if args.atom_descriptors == 'descriptor':
    args.atom_descriptors_size = data.atom_descriptors_size()
    args.ffn_hidden_size += args.atom_descriptors_size
elif args.atom_descriptors == 'feature':
    args.atom_features_size = data.atom_features_size()
    set_extra_atom_fdim(args.atom_features_size)
if args.bond_features_path is not None:
    args.bond_features_size = data.bond_features_size()
    set_extra_bond_fdim(args.bond_features_size)

debug(f'Number of tasks = {args.num_tasks}')

Loading data
37773it [00:00, 138056.15it/s]
100%|██████████| 37773/37773 [00:00<00:00, 206116.23it/s]
100%|██████████| 37773/37773 [00:04<00:00, 8348.60it/s] 
Number of tasks = 13


## Training

In [4]:
# chemprop cross_validate
# Run training on different random seeds for each fold
all_scores = defaultdict(list)
for fold_num in range(args.num_folds):
    info(f'Fold {fold_num}')
    args.seed = init_seed + fold_num
    args.save_dir = os.path.join(save_dir, f'fold_{fold_num}')
    makedirs(args.save_dir)
    data.reset_features_and_targets()
    model_scores = run_training(args, data, logger)
    for metric, scores in model_scores.items():
        all_scores[metric].append(scores)
all_scores = dict(all_scores)    

Fold 0
Splitting data with seed 0
4723it [00:00, 186670.85it/s]
100%|██████████| 4723/4723 [00:00<00:00, 205949.85it/s]
100%|██████████| 4723/4723 [00:00<00:00, 8216.36it/s]
4721it [00:00, 186530.29it/s]
100%|██████████| 4721/4721 [00:00<00:00, 214755.42it/s]
100%|██████████| 4721/4721 [00:00<00:00, 8311.52it/s]
Class sizes
G1 0: 96.70%, 1: 3.30%
G2 0: 97.37%, 1: 2.63%
G3 0: 96.61%, 1: 3.39%
G4 0: 95.81%, 1: 4.19%
G5 0: 95.92%, 1: 4.08%
G6 0: 96.50%, 1: 3.50%
G7 0: 95.89%, 1: 4.11%
G8 0: 96.01%, 1: 3.99%
G9 0: 96.67%, 1: 3.33%
G10 0: 96.67%, 1: 3.33%
G11 0: 93.72%, 1: 6.28%
G12 0: 96.58%, 1: 3.42%
G13 0: 94.79%, 1: 5.21%
Total size = 37,773 | train size = 37,773 | val size = 4,721 | test size = 4,723
Building model 0
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.35000000000000003, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=1100, bias=False)

  1%|          | 4/756 [00:00<01:07, 11.21it/s][ALoss = 1.8982e-01, PNorm = 51.1379, GNorm = 0.1752, lr_0 = 5.5417e-04

  1%|          | 8/756 [00:00<00:37, 19.69it/s][A
  2%|▏         | 12/756 [00:00<00:30, 24.20it/s][ALoss = 1.8663e-01, PNorm = 51.1419, GNorm = 0.0992, lr_0 = 5.6013e-04

  2%|▏         | 16/756 [00:00<00:25, 28.62it/s][A
  3%|▎         | 20/756 [00:00<00:24, 30.43it/s][ALoss = 1.2350e-01, PNorm = 51.1587, GNorm = 0.1133, lr_0 = 5.6609e-04

  3%|▎         | 25/756 [00:00<00:21, 33.59it/s][A
  4%|▍         | 29/756 [00:01<00:21, 33.69it/s][A
  4%|▍         | 34/756 [00:01<00:20, 34.98it/s][ALoss = 1.6582e-01, PNorm = 51.1555, GNorm = 0.1259, lr_0 = 5.7205e-04

  5%|▌         | 38/756 [00:01<00:20, 35.80it/s][A
  6%|▌         | 42/756 [00:01<00:19, 36.21it/s][ALoss = 1.4670e-01, PNorm = 51.1698, GNorm = 0.0888, lr_0 = 5.7801e-04

  6%|▌         | 46/756 [00:01<00:19, 36.39it/s][A
  7%|▋         | 50/756 [00:01<00:19, 36.60it/s][A
  7%|▋         | 54/756 [00:

  1%|          | 5/756 [00:00<00:54, 13.69it/s][A
  1%|          | 9/756 [00:00<00:36, 20.64it/s][ALoss = 1.7451e-01, PNorm = 53.2559, GNorm = 0.0996, lr_0 = 9.9859e-04

  2%|▏         | 13/756 [00:00<00:29, 25.43it/s][A
  2%|▏         | 17/756 [00:00<00:25, 28.71it/s][ALoss = 1.8309e-01, PNorm = 53.2886, GNorm = 0.1104, lr_0 = 9.9750e-04

  3%|▎         | 21/756 [00:00<00:23, 31.24it/s][A
  3%|▎         | 25/756 [00:00<00:21, 33.26it/s][A
  4%|▍         | 29/756 [00:01<00:20, 34.72it/s][ALoss = 1.2966e-01, PNorm = 53.3583, GNorm = 0.0750, lr_0 = 9.9641e-04

  4%|▍         | 33/756 [00:01<00:20, 35.48it/s][A
  5%|▍         | 37/756 [00:01<00:19, 36.19it/s][ALoss = 1.3311e-01, PNorm = 53.3893, GNorm = 0.0897, lr_0 = 9.9533e-04

  5%|▌         | 41/756 [00:01<00:19, 36.50it/s][A
  6%|▌         | 45/756 [00:01<00:19, 36.83it/s][A
  6%|▋         | 49/756 [00:01<00:18, 37.26it/s][ALoss = 1.4696e-01, PNorm = 53.4260, GNorm = 0.1306, lr_0 = 9.9424e-04

  7%|▋         | 53/756 [00:


 55%|█████▍    | 413/756 [00:11<00:09, 35.21it/s][A
 55%|█████▌    | 417/756 [00:11<00:09, 34.90it/s][ALoss = 1.7504e-01, PNorm = 55.0915, GNorm = 0.0799, lr_0 = 9.5497e-04

 56%|█████▌    | 421/756 [00:11<00:09, 34.97it/s][A
 56%|█████▌    | 425/756 [00:12<00:09, 35.83it/s][A
 57%|█████▋    | 429/756 [00:12<00:09, 36.02it/s][ALoss = 1.5678e-01, PNorm = 55.1858, GNorm = 0.0908, lr_0 = 9.5393e-04

 57%|█████▋    | 433/756 [00:12<00:08, 36.13it/s][A
 58%|█████▊    | 437/756 [00:12<00:08, 36.24it/s][ALoss = 1.6297e-01, PNorm = 55.2474, GNorm = 0.0875, lr_0 = 9.5289e-04

 58%|█████▊    | 441/756 [00:12<00:08, 36.21it/s][A
 59%|█████▉    | 445/756 [00:12<00:08, 36.29it/s][A
 59%|█████▉    | 449/756 [00:12<00:08, 36.52it/s][ALoss = 1.3256e-01, PNorm = 55.3183, GNorm = 0.1758, lr_0 = 9.5186e-04

 60%|█████▉    | 453/756 [00:12<00:08, 36.84it/s][A
 60%|██████    | 457/756 [00:12<00:08, 37.17it/s][ALoss = 1.5904e-01, PNorm = 55.3340, GNorm = 0.1346, lr_0 = 9.5082e-04

 61%|██████  


 60%|██████    | 455/756 [00:14<00:08, 36.27it/s][A
 61%|██████    | 459/756 [00:14<00:08, 34.55it/s][ALoss = 1.7157e-01, PNorm = 58.5143, GNorm = 0.1126, lr_0 = 8.7528e-04

 61%|██████▏   | 464/756 [00:14<00:07, 36.93it/s][A
 62%|██████▏   | 468/756 [00:14<00:08, 34.62it/s][A
 63%|██████▎   | 473/756 [00:14<00:09, 29.52it/s][ALoss = 1.7897e-01, PNorm = 58.5581, GNorm = 0.1212, lr_0 = 8.7433e-04

 63%|██████▎   | 478/756 [00:14<00:08, 32.35it/s][A
 64%|██████▍   | 482/756 [00:15<00:10, 26.41it/s][ALoss = 1.4035e-01, PNorm = 58.6024, GNorm = 0.1001, lr_0 = 8.7338e-04

 64%|██████▍   | 486/756 [00:15<00:09, 29.04it/s][A
 65%|██████▍   | 490/756 [00:15<00:09, 29.39it/s][ALoss = 1.8138e-01, PNorm = 58.6117, GNorm = 0.0973, lr_0 = 8.7243e-04

 65%|██████▌   | 495/756 [00:15<00:07, 32.66it/s][A
 66%|██████▌   | 499/756 [00:15<00:08, 31.87it/s][ALoss = 1.2849e-01, PNorm = 58.6689, GNorm = 0.0680, lr_0 = 8.7148e-04

 67%|██████▋   | 504/756 [00:15<00:07, 34.87it/s][A
 67%|██████▋ 


 68%|██████▊   | 511/756 [00:14<00:06, 36.77it/s][A
 68%|██████▊   | 515/756 [00:14<00:06, 35.38it/s][ALoss = 1.3864e-01, PNorm = 61.9253, GNorm = 0.0938, lr_0 = 8.0128e-04

 69%|██████▊   | 519/756 [00:14<00:06, 36.21it/s][A
 69%|██████▉   | 523/756 [00:15<00:06, 34.53it/s][A
 70%|██████▉   | 528/756 [00:15<00:06, 36.44it/s][ALoss = 1.4422e-01, PNorm = 61.9716, GNorm = 0.0901, lr_0 = 8.0041e-04

 70%|███████   | 532/756 [00:15<00:06, 34.75it/s][A
 71%|███████   | 537/756 [00:15<00:05, 36.54it/s][ALoss = 1.6096e-01, PNorm = 62.0063, GNorm = 0.1063, lr_0 = 7.9954e-04

 72%|███████▏  | 541/756 [00:15<00:06, 34.74it/s][A
 72%|███████▏  | 546/756 [00:15<00:06, 34.14it/s][ALoss = 1.1840e-01, PNorm = 62.0416, GNorm = 0.1226, lr_0 = 7.9867e-04

 73%|███████▎  | 551/756 [00:15<00:05, 36.11it/s][A
 73%|███████▎  | 555/756 [00:16<00:05, 35.13it/s][ALoss = 1.8717e-01, PNorm = 62.0735, GNorm = 0.1397, lr_0 = 7.9780e-04

 74%|███████▍  | 560/756 [00:16<00:05, 37.01it/s][A
 75%|███████▍


 73%|███████▎  | 553/756 [00:15<00:06, 29.62it/s][A
 74%|███████▎  | 557/756 [00:16<00:06, 31.60it/s][A
 74%|███████▍  | 561/756 [00:16<00:06, 30.90it/s][ALoss = 1.2976e-01, PNorm = 65.6134, GNorm = 0.1180, lr_0 = 7.3441e-04

 75%|███████▍  | 565/756 [00:16<00:05, 32.30it/s][A
 75%|███████▌  | 569/756 [00:16<00:05, 31.43it/s][ALoss = 1.3908e-01, PNorm = 65.6489, GNorm = 0.0935, lr_0 = 7.3362e-04

 76%|███████▌  | 573/756 [00:16<00:05, 33.10it/s][A
 76%|███████▋  | 577/756 [00:16<00:05, 34.07it/s][A
 77%|███████▋  | 581/756 [00:16<00:05, 29.96it/s][ALoss = 1.2284e-01, PNorm = 65.7032, GNorm = 0.1120, lr_0 = 7.3282e-04

 77%|███████▋  | 585/756 [00:16<00:05, 31.14it/s][A
 78%|███████▊  | 589/756 [00:17<00:05, 32.98it/s][ALoss = 1.3206e-01, PNorm = 65.7694, GNorm = 0.1063, lr_0 = 7.3202e-04

 78%|███████▊  | 593/756 [00:17<00:04, 33.16it/s][A
 79%|███████▉  | 597/756 [00:17<00:04, 34.09it/s][A
 79%|███████▉  | 601/756 [00:17<00:04, 34.33it/s][ALoss = 1.9889e-01, PNorm = 65.8

 74%|███████▍  | 562/756 [00:18<00:05, 33.09it/s][A
 75%|███████▍  | 566/756 [00:18<00:05, 32.51it/s][ALoss = 1.2851e-01, PNorm = 68.9577, GNorm = 0.0916, lr_0 = 6.7599e-04

 75%|███████▌  | 570/756 [00:18<00:05, 33.45it/s][A
 76%|███████▌  | 574/756 [00:18<00:05, 32.04it/s][ALoss = 1.3481e-01, PNorm = 69.0240, GNorm = 0.1475, lr_0 = 6.7526e-04

 76%|███████▋  | 578/756 [00:18<00:05, 33.28it/s][A
 77%|███████▋  | 582/756 [00:18<00:05, 32.10it/s][A
 78%|███████▊  | 586/756 [00:18<00:05, 33.38it/s][ALoss = 1.1490e-01, PNorm = 69.0676, GNorm = 0.1025, lr_0 = 6.7452e-04

 78%|███████▊  | 590/756 [00:19<00:05, 31.92it/s][A
 79%|███████▊  | 594/756 [00:19<00:04, 33.28it/s][ALoss = 1.4773e-01, PNorm = 69.1176, GNorm = 0.2267, lr_0 = 6.7379e-04

 79%|███████▉  | 598/756 [00:19<00:04, 31.64it/s][A
 80%|███████▉  | 602/756 [00:19<00:04, 32.80it/s][A
 80%|████████  | 606/756 [00:19<00:04, 31.87it/s][ALoss = 1.6135e-01, PNorm = 69.1544, GNorm = 0.1252, lr_0 = 6.7306e-04

 81%|████████ 


 76%|███████▌  | 575/756 [00:16<00:05, 36.12it/s][A
 77%|███████▋  | 579/756 [00:16<00:04, 35.68it/s][ALoss = 1.3609e-01, PNorm = 71.9660, GNorm = 0.1018, lr_0 = 6.2161e-04

 77%|███████▋  | 583/756 [00:17<00:04, 35.63it/s][A
 78%|███████▊  | 587/756 [00:17<00:04, 35.17it/s][A
 78%|███████▊  | 591/756 [00:17<00:04, 35.59it/s][ALoss = 1.2321e-01, PNorm = 72.0028, GNorm = 0.1095, lr_0 = 6.2094e-04

 79%|███████▊  | 595/756 [00:17<00:04, 34.76it/s][A
 79%|███████▉  | 599/756 [00:17<00:04, 34.99it/s][ALoss = 1.2306e-01, PNorm = 72.0383, GNorm = 0.1567, lr_0 = 6.2026e-04

 80%|███████▉  | 603/756 [00:17<00:04, 34.35it/s][A
 80%|████████  | 607/756 [00:17<00:04, 34.89it/s][A
 81%|████████  | 611/756 [00:17<00:04, 34.71it/s][ALoss = 9.8325e-02, PNorm = 72.0794, GNorm = 0.1175, lr_0 = 6.1958e-04

 81%|████████▏ | 615/756 [00:18<00:04, 35.02it/s][A
 82%|████████▏ | 619/756 [00:18<00:03, 34.67it/s][ALoss = 1.5837e-01, PNorm = 72.1102, GNorm = 0.1235, lr_0 = 6.1891e-04

 82%|████████


 78%|███████▊  | 587/756 [00:17<00:05, 32.72it/s][A
 78%|███████▊  | 591/756 [00:18<00:04, 33.87it/s][A
 79%|███████▊  | 595/756 [00:18<00:04, 32.88it/s][ALoss = 1.5447e-01, PNorm = 74.6322, GNorm = 0.1156, lr_0 = 5.7154e-04

 79%|███████▉  | 599/756 [00:18<00:04, 33.60it/s][A
 80%|███████▉  | 603/756 [00:18<00:04, 32.64it/s][ALoss = 1.4249e-01, PNorm = 74.6787, GNorm = 0.1740, lr_0 = 5.7092e-04

 80%|████████  | 607/756 [00:18<00:04, 34.04it/s][A
 81%|████████  | 611/756 [00:18<00:04, 30.40it/s][A
 81%|████████▏ | 615/756 [00:18<00:04, 32.72it/s][ALoss = 1.2813e-01, PNorm = 74.7185, GNorm = 0.0950, lr_0 = 5.7030e-04

 82%|████████▏ | 619/756 [00:18<00:05, 26.22it/s][A
 82%|████████▏ | 623/756 [00:19<00:04, 29.14it/s][ALoss = 1.2562e-01, PNorm = 74.7554, GNorm = 0.1588, lr_0 = 5.6968e-04

 83%|████████▎ | 627/756 [00:19<00:04, 29.58it/s][A
 84%|████████▎ | 632/756 [00:19<00:03, 32.37it/s][A
 84%|████████▍ | 636/756 [00:19<00:03, 31.56it/s][ALoss = 1.5976e-01, PNorm = 74.7


 27%|██▋       | 201/756 [00:06<00:15, 35.11it/s][A
 27%|██▋       | 205/756 [00:06<00:16, 33.61it/s][A
 28%|██▊       | 210/756 [00:06<00:16, 33.36it/s][ALoss = 1.1745e-01, PNorm = 76.0095, GNorm = 0.1631, lr_0 = 5.4897e-04

 28%|██▊       | 214/756 [00:06<00:15, 34.74it/s][A
 29%|██▉       | 218/756 [00:06<00:15, 34.01it/s][ALoss = 1.4252e-01, PNorm = 76.0516, GNorm = 0.1329, lr_0 = 5.4837e-04

 29%|██▉       | 222/756 [00:07<00:15, 34.89it/s][A
 30%|██▉       | 226/756 [00:07<00:15, 34.36it/s][ALoss = 1.1480e-01, PNorm = 76.0875, GNorm = 0.2545, lr_0 = 5.4777e-04

 31%|███       | 231/756 [00:07<00:14, 36.34it/s][A
 31%|███       | 235/756 [00:07<00:14, 35.09it/s][A
 32%|███▏      | 239/756 [00:07<00:14, 35.83it/s][ALoss = 1.3020e-01, PNorm = 76.1242, GNorm = 0.1348, lr_0 = 5.4718e-04

 32%|███▏      | 243/756 [00:07<00:14, 34.76it/s][A
 33%|███▎      | 247/756 [00:07<00:14, 35.12it/s][ALoss = 1.0717e-01, PNorm = 76.1707, GNorm = 0.0797, lr_0 = 5.4658e-04

 33%|███▎    


 29%|██▊       | 217/756 [00:06<00:14, 36.19it/s][A
 29%|██▉       | 221/756 [00:06<00:14, 37.05it/s][A
 30%|██▉       | 225/756 [00:06<00:14, 36.60it/s][ALoss = 2.0230e-01, PNorm = 78.4109, GNorm = 0.3430, lr_0 = 5.0475e-04

 30%|███       | 229/756 [00:06<00:14, 37.05it/s][A
 31%|███       | 233/756 [00:07<00:14, 36.66it/s][ALoss = 1.5960e-01, PNorm = 78.4485, GNorm = 0.1924, lr_0 = 5.0420e-04

 31%|███▏      | 237/756 [00:07<00:14, 36.72it/s][A
 32%|███▏      | 241/756 [00:07<00:14, 35.80it/s][A
 32%|███▏      | 245/756 [00:07<00:13, 36.61it/s][ALoss = 1.3722e-01, PNorm = 78.4966, GNorm = 0.1320, lr_0 = 5.0365e-04

 33%|███▎      | 249/756 [00:07<00:14, 33.94it/s][A
 33%|███▎      | 253/756 [00:07<00:14, 35.15it/s][ALoss = 1.0914e-01, PNorm = 78.5299, GNorm = 0.1737, lr_0 = 5.0310e-04

 34%|███▍      | 257/756 [00:07<00:15, 32.25it/s][A
 35%|███▍      | 261/756 [00:07<00:14, 34.20it/s][A
 35%|███▌      | 265/756 [00:08<00:15, 32.49it/s][ALoss = 1.5886e-01, PNorm = 78.5


 83%|████████▎ | 628/756 [00:19<00:03, 35.24it/s][A
 84%|████████▎ | 633/756 [00:19<00:03, 34.84it/s][ALoss = 1.7669e-01, PNorm = 79.6018, GNorm = 0.3188, lr_0 = 4.8270e-04

 84%|████████▍ | 637/756 [00:19<00:03, 35.66it/s][A
 85%|████████▍ | 641/756 [00:19<00:03, 34.32it/s][ALoss = 1.5658e-01, PNorm = 79.6243, GNorm = 0.2156, lr_0 = 4.8218e-04

 85%|████████▌ | 646/756 [00:19<00:03, 36.55it/s][A
 86%|████████▌ | 650/756 [00:19<00:02, 35.96it/s][A
 87%|████████▋ | 655/756 [00:19<00:02, 37.95it/s][ALoss = 1.3670e-01, PNorm = 79.6595, GNorm = 0.2092, lr_0 = 4.8165e-04

 87%|████████▋ | 659/756 [00:19<00:02, 36.39it/s][A
 88%|████████▊ | 664/756 [00:20<00:02, 38.12it/s][ALoss = 8.4703e-02, PNorm = 79.7074, GNorm = 0.0903, lr_0 = 4.8113e-04

 88%|████████▊ | 668/756 [00:20<00:02, 34.55it/s][A
 89%|████████▉ | 673/756 [00:20<00:02, 34.92it/s][ALoss = 1.7765e-01, PNorm = 79.7302, GNorm = 0.1889, lr_0 = 4.8060e-04

 90%|████████▉ | 677/756 [00:20<00:02, 35.60it/s][A
 90%|████████


 85%|████████▍ | 642/756 [00:18<00:03, 35.82it/s][A
 85%|████████▌ | 646/756 [00:18<00:03, 36.34it/s][ALoss = 1.4743e-01, PNorm = 81.6967, GNorm = 0.2924, lr_0 = 4.4387e-04

 86%|████████▌ | 650/756 [00:19<00:02, 35.89it/s][A
 87%|████████▋ | 654/756 [00:19<00:02, 35.75it/s][A
 87%|████████▋ | 658/756 [00:19<00:02, 35.46it/s][ALoss = 1.4682e-01, PNorm = 81.7133, GNorm = 0.1294, lr_0 = 4.4339e-04

 88%|████████▊ | 662/756 [00:19<00:02, 36.03it/s][A
 88%|████████▊ | 666/756 [00:19<00:02, 36.09it/s][ALoss = 1.7002e-01, PNorm = 81.7309, GNorm = 0.1370, lr_0 = 4.4290e-04

 89%|████████▊ | 670/756 [00:19<00:02, 36.13it/s][A
 89%|████████▉ | 674/756 [00:19<00:02, 36.26it/s][A
 90%|████████▉ | 678/756 [00:19<00:02, 36.74it/s][ALoss = 1.3313e-01, PNorm = 81.7647, GNorm = 0.1118, lr_0 = 4.4242e-04

 90%|█████████ | 682/756 [00:19<00:02, 36.53it/s][A
 91%|█████████ | 686/756 [00:20<00:01, 36.78it/s][ALoss = 9.0046e-02, PNorm = 81.8025, GNorm = 0.1818, lr_0 = 4.4194e-04

 91%|████████


 33%|███▎      | 246/756 [00:07<00:15, 33.54it/s][A
 33%|███▎      | 250/756 [00:07<00:14, 34.54it/s][A
 34%|███▎      | 254/756 [00:07<00:16, 30.16it/s][ALoss = 1.2039e-01, PNorm = 82.7520, GNorm = 0.1419, lr_0 = 4.2676e-04

 34%|███▍      | 258/756 [00:07<00:15, 32.03it/s][A
 35%|███▍      | 262/756 [00:07<00:15, 32.85it/s][ALoss = 1.5483e-01, PNorm = 82.7761, GNorm = 0.1469, lr_0 = 4.2629e-04

 35%|███▌      | 266/756 [00:07<00:14, 34.38it/s][A
 36%|███▌      | 270/756 [00:08<00:14, 34.63it/s][A
 36%|███▌      | 274/756 [00:08<00:13, 35.81it/s][ALoss = 1.2527e-01, PNorm = 82.8022, GNorm = 0.1391, lr_0 = 4.2583e-04

 37%|███▋      | 278/756 [00:08<00:13, 35.61it/s][A
 37%|███▋      | 282/756 [00:08<00:12, 36.60it/s][ALoss = 2.0243e-01, PNorm = 82.8125, GNorm = 0.2296, lr_0 = 4.2536e-04

 38%|███▊      | 286/756 [00:08<00:13, 35.70it/s][A
 38%|███▊      | 290/756 [00:08<00:12, 36.70it/s][A
 39%|███▉      | 294/756 [00:08<00:12, 36.04it/s][ALoss = 1.7254e-01, PNorm = 82.8

 34%|███▎      | 254/756 [00:07<00:14, 34.79it/s][A
 34%|███▍      | 258/756 [00:07<00:13, 35.63it/s][ALoss = 1.4770e-01, PNorm = 84.4443, GNorm = 0.2495, lr_0 = 3.9281e-04

 35%|███▍      | 262/756 [00:07<00:14, 35.10it/s][A
 35%|███▌      | 266/756 [00:07<00:13, 36.30it/s][ALoss = 1.0896e-01, PNorm = 84.4680, GNorm = 0.2092, lr_0 = 3.9238e-04

 36%|███▌      | 270/756 [00:07<00:13, 35.73it/s][A
 36%|███▌      | 274/756 [00:08<00:13, 36.60it/s][A
 37%|███▋      | 278/756 [00:08<00:13, 36.17it/s][ALoss = 1.3583e-01, PNorm = 84.4901, GNorm = 0.1625, lr_0 = 3.9195e-04

 37%|███▋      | 282/756 [00:08<00:12, 36.53it/s][A
 38%|███▊      | 286/756 [00:08<00:12, 36.23it/s][ALoss = 1.2843e-01, PNorm = 84.5174, GNorm = 0.2197, lr_0 = 3.9153e-04

 38%|███▊      | 290/756 [00:08<00:12, 36.83it/s][A
 39%|███▉      | 294/756 [00:08<00:12, 36.49it/s][A
 39%|███▉      | 298/756 [00:08<00:12, 37.25it/s][ALoss = 1.4066e-01, PNorm = 84.5414, GNorm = 0.3479, lr_0 = 3.9110e-04

 40%|███▉     


 35%|███▌      | 266/756 [00:07<00:14, 34.72it/s][A
 36%|███▌      | 270/756 [00:07<00:13, 35.12it/s][ALoss = 1.1767e-01, PNorm = 86.0868, GNorm = 0.2894, lr_0 = 3.6121e-04

 36%|███▌      | 274/756 [00:08<00:13, 35.40it/s][A
 37%|███▋      | 278/756 [00:08<00:13, 36.22it/s][A
 37%|███▋      | 282/756 [00:08<00:13, 36.12it/s][ALoss = 1.0695e-01, PNorm = 86.1088, GNorm = 0.1340, lr_0 = 3.6082e-04

 38%|███▊      | 286/756 [00:08<00:13, 36.13it/s][A
 38%|███▊      | 290/756 [00:08<00:12, 36.22it/s][ALoss = 1.4482e-01, PNorm = 86.1258, GNorm = 0.4439, lr_0 = 3.6042e-04

 39%|███▉      | 294/756 [00:08<00:12, 36.38it/s][A
 39%|███▉      | 298/756 [00:08<00:12, 35.84it/s][A
 40%|███▉      | 302/756 [00:08<00:12, 35.88it/s][ALoss = 1.0522e-01, PNorm = 86.1476, GNorm = 0.3024, lr_0 = 3.6003e-04

 40%|████      | 306/756 [00:08<00:12, 35.22it/s][A
 41%|████      | 310/756 [00:09<00:12, 35.60it/s][ALoss = 8.6654e-02, PNorm = 86.1747, GNorm = 0.0849, lr_0 = 3.5964e-04

 42%|████▏   

 37%|███▋      | 278/756 [00:08<00:13, 34.51it/s][ALoss = 1.2120e-01, PNorm = 87.4784, GNorm = 0.2447, lr_0 = 3.3248e-04

 37%|███▋      | 282/756 [00:08<00:13, 34.07it/s][A
 38%|███▊      | 286/756 [00:08<00:13, 34.34it/s][ALoss = 1.0188e-01, PNorm = 87.5022, GNorm = 0.3351, lr_0 = 3.3211e-04

 38%|███▊      | 290/756 [00:08<00:13, 33.86it/s][A
 39%|███▉      | 294/756 [00:08<00:13, 34.75it/s][A
 39%|███▉      | 298/756 [00:09<00:13, 33.06it/s][ALoss = 1.2255e-01, PNorm = 87.5164, GNorm = 0.1931, lr_0 = 3.3175e-04

 40%|███▉      | 302/756 [00:09<00:13, 34.35it/s][A
 40%|████      | 306/756 [00:09<00:12, 35.03it/s][ALoss = 1.2539e-01, PNorm = 87.5266, GNorm = 0.3979, lr_0 = 3.3139e-04

 41%|████      | 310/756 [00:09<00:12, 35.47it/s][A
 42%|████▏     | 314/756 [00:09<00:12, 35.06it/s][A
 42%|████▏     | 318/756 [00:09<00:14, 30.63it/s][ALoss = 1.2888e-01, PNorm = 87.5404, GNorm = 0.3627, lr_0 = 3.3103e-04

 43%|████▎     | 322/756 [00:09<00:13, 31.09it/s][A
 43%|████▎    


 90%|█████████ | 682/756 [00:20<00:02, 34.70it/s][A
 91%|█████████ | 686/756 [00:20<00:01, 35.56it/s][ALoss = 1.3410e-01, PNorm = 88.1755, GNorm = 0.2917, lr_0 = 3.1795e-04

 91%|█████████▏| 690/756 [00:20<00:01, 35.45it/s][A
 92%|█████████▏| 694/756 [00:20<00:01, 35.86it/s][A
 92%|█████████▏| 698/756 [00:20<00:01, 35.93it/s][ALoss = 1.3320e-01, PNorm = 88.1962, GNorm = 0.1220, lr_0 = 3.1761e-04

 93%|█████████▎| 702/756 [00:20<00:01, 36.20it/s][A
 93%|█████████▎| 706/756 [00:20<00:01, 36.20it/s][ALoss = 1.4909e-01, PNorm = 88.2093, GNorm = 0.2979, lr_0 = 3.1726e-04

 94%|█████████▍| 710/756 [00:20<00:01, 36.60it/s][A
 94%|█████████▍| 714/756 [00:21<00:01, 36.20it/s][A
 95%|█████████▍| 718/756 [00:21<00:01, 36.71it/s][ALoss = 1.6889e-01, PNorm = 88.2175, GNorm = 0.1981, lr_0 = 3.1692e-04

 96%|█████████▌| 722/756 [00:21<00:00, 35.71it/s][A
 96%|█████████▌| 726/756 [00:21<00:00, 36.10it/s][ALoss = 1.2016e-01, PNorm = 88.2340, GNorm = 0.4337, lr_0 = 3.1657e-04

 97%|████████


 39%|███▉      | 294/756 [00:09<00:14, 32.38it/s][A
 39%|███▉      | 298/756 [00:09<00:14, 31.78it/s][ALoss = 1.8595e-01, PNorm = 88.8072, GNorm = 0.2008, lr_0 = 3.0540e-04

 40%|████      | 303/756 [00:10<00:13, 34.40it/s][A
 41%|████      | 307/756 [00:10<00:14, 31.57it/s][A
 41%|████      | 311/756 [00:10<00:13, 33.59it/s][ALoss = 1.3058e-01, PNorm = 88.8239, GNorm = 0.3079, lr_0 = 3.0506e-04

 42%|████▏     | 315/756 [00:10<00:13, 31.61it/s][A
 42%|████▏     | 319/756 [00:10<00:13, 33.16it/s][ALoss = 1.1954e-01, PNorm = 88.8432, GNorm = 0.2157, lr_0 = 3.0473e-04

 43%|████▎     | 323/756 [00:10<00:13, 30.99it/s][A
 43%|████▎     | 327/756 [00:10<00:13, 32.72it/s][A
 44%|████▍     | 331/756 [00:10<00:13, 31.50it/s][ALoss = 1.2356e-01, PNorm = 88.8582, GNorm = 0.3363, lr_0 = 3.0440e-04

 44%|████▍     | 335/756 [00:11<00:12, 33.46it/s][A
 45%|████▍     | 339/756 [00:11<00:13, 30.14it/s][ALoss = 1.0774e-01, PNorm = 88.8776, GNorm = 0.2056, lr_0 = 3.0407e-04

 45%|████▌   

 45%|████▍     | 337/756 [00:10<00:11, 35.10it/s][ALoss = 1.6617e-01, PNorm = 89.9886, GNorm = 0.2425, lr_0 = 2.8019e-04

 45%|████▌     | 341/756 [00:10<00:12, 33.96it/s][A
 46%|████▌     | 346/756 [00:10<00:11, 35.85it/s][ALoss = 1.2176e-01, PNorm = 90.0034, GNorm = 0.3661, lr_0 = 2.7988e-04

 46%|████▋     | 350/756 [00:10<00:11, 34.72it/s][A
 47%|████▋     | 355/756 [00:11<00:11, 33.66it/s][ALoss = 1.3966e-01, PNorm = 90.0158, GNorm = 0.3126, lr_0 = 2.7958e-04

 48%|████▊     | 360/756 [00:11<00:10, 36.02it/s][A
 48%|████▊     | 364/756 [00:11<00:11, 34.60it/s][ALoss = 1.5357e-01, PNorm = 90.0251, GNorm = 0.3804, lr_0 = 2.7927e-04

 49%|████▉     | 369/756 [00:11<00:10, 36.58it/s][A
 49%|████▉     | 373/756 [00:11<00:11, 33.29it/s][ALoss = 1.3251e-01, PNorm = 90.0378, GNorm = 0.2835, lr_0 = 2.7897e-04

 50%|█████     | 378/756 [00:11<00:10, 35.64it/s][A
 51%|█████     | 382/756 [00:11<00:10, 34.79it/s][A
 51%|█████     | 386/756 [00:11<00:10, 36.09it/s][ALoss = 1.3539e-


 53%|█████▎    | 403/756 [00:12<00:11, 30.41it/s][A
 54%|█████▍    | 408/756 [00:12<00:10, 33.52it/s][ALoss = 1.1594e-01, PNorm = 91.0494, GNorm = 0.4857, lr_0 = 2.5597e-04

 54%|█████▍    | 412/756 [00:12<00:11, 30.90it/s][A
 55%|█████▌    | 416/756 [00:12<00:10, 32.95it/s][A
 56%|█████▌    | 420/756 [00:13<00:10, 30.96it/s][ALoss = 1.1201e-01, PNorm = 91.0570, GNorm = 0.2821, lr_0 = 2.5569e-04

 56%|█████▌    | 424/756 [00:13<00:10, 33.01it/s][A
 57%|█████▋    | 428/756 [00:13<00:10, 32.02it/s][ALoss = 1.4709e-01, PNorm = 91.0575, GNorm = 0.5875, lr_0 = 2.5541e-04

 57%|█████▋    | 432/756 [00:13<00:09, 33.75it/s][A
 58%|█████▊    | 436/756 [00:13<00:10, 31.76it/s][A
 58%|█████▊    | 441/756 [00:13<00:09, 34.60it/s][ALoss = 1.2799e-01, PNorm = 91.0622, GNorm = 0.3093, lr_0 = 2.5513e-04

 59%|█████▉    | 445/756 [00:13<00:09, 32.56it/s][A
 60%|█████▉    | 450/756 [00:14<00:09, 31.15it/s][ALoss = 1.4586e-01, PNorm = 91.0725, GNorm = 0.2718, lr_0 = 2.5485e-04

 60%|██████  


 58%|█████▊    | 439/756 [00:12<00:08, 36.02it/s][A
 59%|█████▊    | 443/756 [00:12<00:08, 36.34it/s][ALoss = 1.3843e-01, PNorm = 91.9342, GNorm = 0.6029, lr_0 = 2.3484e-04

 59%|█████▉    | 447/756 [00:12<00:08, 36.37it/s][A
 60%|█████▉    | 451/756 [00:12<00:08, 36.37it/s][A
 60%|██████    | 455/756 [00:12<00:08, 36.51it/s][ALoss = 9.4802e-02, PNorm = 91.9497, GNorm = 0.2976, lr_0 = 2.3458e-04

 61%|██████    | 459/756 [00:12<00:08, 36.49it/s][A
 61%|██████    | 463/756 [00:13<00:07, 36.72it/s][ALoss = 1.2882e-01, PNorm = 91.9618, GNorm = 0.1779, lr_0 = 2.3433e-04

 62%|██████▏   | 467/756 [00:13<00:07, 36.47it/s][A
 62%|██████▏   | 471/756 [00:13<00:07, 36.51it/s][A
 63%|██████▎   | 475/756 [00:13<00:07, 36.52it/s][ALoss = 9.7883e-02, PNorm = 91.9737, GNorm = 0.4996, lr_0 = 2.3407e-04

 63%|██████▎   | 479/756 [00:13<00:07, 36.34it/s][A
 64%|██████▍   | 483/756 [00:13<00:07, 36.42it/s][ALoss = 1.4218e-01, PNorm = 91.9827, GNorm = 0.2646, lr_0 = 2.3382e-04

 64%|██████▍ 

 59%|█████▉    | 449/756 [00:13<00:08, 34.94it/s][ALoss = 1.4012e-01, PNorm = 92.6527, GNorm = 0.3594, lr_0 = 2.1618e-04

 60%|█████▉    | 453/756 [00:13<00:08, 35.42it/s][A
 60%|██████    | 457/756 [00:13<00:08, 35.69it/s][ALoss = 1.2775e-01, PNorm = 92.6561, GNorm = 0.1502, lr_0 = 2.1594e-04

 61%|██████    | 461/756 [00:13<00:08, 35.44it/s][A
 62%|██████▏   | 465/756 [00:13<00:08, 34.98it/s][A
 62%|██████▏   | 469/756 [00:13<00:08, 35.00it/s][ALoss = 1.4092e-01, PNorm = 92.6612, GNorm = 0.3292, lr_0 = 2.1571e-04

 63%|██████▎   | 473/756 [00:14<00:08, 34.41it/s][A
 63%|██████▎   | 477/756 [00:14<00:08, 34.59it/s][ALoss = 1.1423e-01, PNorm = 92.6708, GNorm = 0.3096, lr_0 = 2.1547e-04

 64%|██████▎   | 481/756 [00:14<00:07, 34.53it/s][A
 64%|██████▍   | 485/756 [00:14<00:07, 34.16it/s][A
 65%|██████▍   | 489/756 [00:14<00:07, 34.35it/s][ALoss = 1.3382e-01, PNorm = 92.6863, GNorm = 0.2631, lr_0 = 2.1524e-04

 65%|██████▌   | 493/756 [00:14<00:07, 34.47it/s][A
 66%|██████▌  


  7%|▋         | 50/756 [00:01<00:21, 32.66it/s][A
  7%|▋         | 54/756 [00:01<00:21, 33.35it/s][ALoss = 1.0658e-01, PNorm = 92.9759, GNorm = 0.2867, lr_0 = 2.0784e-04

  8%|▊         | 58/756 [00:02<00:22, 30.66it/s][A
  8%|▊         | 62/756 [00:02<00:21, 31.67it/s][ALoss = 1.0492e-01, PNorm = 92.9899, GNorm = 0.2744, lr_0 = 2.0762e-04

  9%|▊         | 66/756 [00:02<00:21, 32.71it/s][A
  9%|▉         | 70/756 [00:02<00:20, 33.39it/s][A
 10%|▉         | 74/756 [00:02<00:19, 34.57it/s][ALoss = 1.2890e-01, PNorm = 92.9983, GNorm = 0.2863, lr_0 = 2.0739e-04

 10%|█         | 78/756 [00:02<00:19, 34.76it/s][A
 11%|█         | 82/756 [00:02<00:18, 35.51it/s][ALoss = 1.2167e-01, PNorm = 93.0047, GNorm = 0.2431, lr_0 = 2.0717e-04

 11%|█▏        | 86/756 [00:02<00:18, 35.67it/s][A
 12%|█▏        | 90/756 [00:02<00:18, 36.24it/s][A
 12%|█▏        | 94/756 [00:03<00:18, 36.04it/s][ALoss = 1.0983e-01, PNorm = 93.0121, GNorm = 0.3900, lr_0 = 2.0694e-04

 13%|█▎        | 98/756 [


  8%|▊         | 63/756 [00:02<00:19, 35.43it/s][A
  9%|▉         | 67/756 [00:02<00:20, 33.19it/s][ALoss = 1.0415e-01, PNorm = 93.5503, GNorm = 0.2185, lr_0 = 1.9112e-04

 10%|▉         | 72/756 [00:02<00:19, 35.48it/s][A
 10%|█         | 76/756 [00:02<00:20, 33.20it/s][ALoss = 1.2276e-01, PNorm = 93.5566, GNorm = 0.1721, lr_0 = 1.9092e-04

 11%|█         | 80/756 [00:02<00:19, 34.59it/s][A
 11%|█         | 84/756 [00:02<00:20, 32.23it/s][A
 12%|█▏        | 89/756 [00:03<00:21, 30.85it/s][ALoss = 1.0632e-01, PNorm = 93.5622, GNorm = 0.2454, lr_0 = 1.9071e-04

 12%|█▏        | 94/756 [00:03<00:19, 33.57it/s][A
 13%|█▎        | 98/756 [00:03<00:20, 31.56it/s][ALoss = 1.0547e-01, PNorm = 93.5694, GNorm = 0.1744, lr_0 = 1.9050e-04

 13%|█▎        | 102/756 [00:03<00:19, 33.47it/s][A
 14%|█▍        | 106/756 [00:03<00:20, 31.93it/s][ALoss = 1.4074e-01, PNorm = 93.5745, GNorm = 0.4007, lr_0 = 1.9029e-04

 15%|█▍        | 110/756 [00:03<00:19, 33.66it/s][A
 15%|█▌        | 114/7


 15%|█▌        | 115/756 [00:03<00:21, 29.33it/s][A
 16%|█▌        | 120/756 [00:03<00:19, 32.66it/s][A
 16%|█▋        | 124/756 [00:03<00:19, 32.56it/s][ALoss = 1.2869e-01, PNorm = 94.1108, GNorm = 0.5271, lr_0 = 1.7497e-04

 17%|█▋        | 129/756 [00:04<00:18, 34.41it/s][A
 18%|█▊        | 133/756 [00:04<00:20, 30.88it/s][ALoss = 1.1482e-01, PNorm = 94.1177, GNorm = 0.1493, lr_0 = 1.7477e-04

 18%|█▊        | 137/756 [00:04<00:18, 32.62it/s][A
 19%|█▊        | 141/756 [00:04<00:18, 32.83it/s][ALoss = 1.2675e-01, PNorm = 94.1263, GNorm = 0.2318, lr_0 = 1.7458e-04

 19%|█▉        | 145/756 [00:04<00:17, 34.62it/s][A
 20%|█▉        | 149/756 [00:04<00:20, 29.38it/s][A
 20%|██        | 153/756 [00:04<00:18, 31.86it/s][ALoss = 1.4753e-01, PNorm = 94.1293, GNorm = 0.5528, lr_0 = 1.7439e-04

 21%|██        | 157/756 [00:04<00:18, 31.95it/s][A
 21%|██▏       | 161/756 [00:05<00:17, 33.67it/s][ALoss = 1.2748e-01, PNorm = 94.1363, GNorm = 0.4653, lr_0 = 1.7420e-04

 22%|██▏     


 71%|███████   | 536/756 [00:15<00:05, 36.68it/s][A
 71%|███████▏  | 540/756 [00:15<00:06, 35.08it/s][ALoss = 1.1859e-01, PNorm = 94.3802, GNorm = 0.1764, lr_0 = 1.6714e-04

 72%|███████▏  | 545/756 [00:16<00:05, 36.56it/s][A
 73%|███████▎  | 549/756 [00:16<00:05, 35.60it/s][A
 73%|███████▎  | 554/756 [00:16<00:05, 37.20it/s][ALoss = 1.1095e-01, PNorm = 94.3839, GNorm = 0.4285, lr_0 = 1.6696e-04

 74%|███████▍  | 558/756 [00:16<00:06, 29.65it/s][A
 74%|███████▍  | 563/756 [00:16<00:06, 30.98it/s][ALoss = 1.0207e-01, PNorm = 94.3921, GNorm = 0.2260, lr_0 = 1.6678e-04

 75%|███████▌  | 568/756 [00:16<00:05, 33.48it/s][A
 76%|███████▌  | 572/756 [00:16<00:05, 32.75it/s][ALoss = 1.1671e-01, PNorm = 94.3977, GNorm = 0.5016, lr_0 = 1.6660e-04

 76%|███████▌  | 576/756 [00:17<00:05, 34.34it/s][A
 77%|███████▋  | 580/756 [00:17<00:05, 31.95it/s][ALoss = 1.0658e-01, PNorm = 94.4012, GNorm = 0.6832, lr_0 = 1.6641e-04

 77%|███████▋  | 585/756 [00:17<00:05, 34.17it/s][A
 78%|███████▊

 73%|███████▎  | 555/756 [00:16<00:05, 34.33it/s][ALoss = 1.0340e-01, PNorm = 94.8310, GNorm = 0.1862, lr_0 = 1.5370e-04

 74%|███████▍  | 559/756 [00:16<00:05, 34.38it/s][A
 74%|███████▍  | 563/756 [00:16<00:05, 34.21it/s][A
 75%|███████▌  | 567/756 [00:16<00:05, 34.71it/s][ALoss = 9.4568e-02, PNorm = 94.8396, GNorm = 0.1955, lr_0 = 1.5353e-04

 76%|███████▌  | 571/756 [00:16<00:05, 34.72it/s][A
 76%|███████▌  | 575/756 [00:16<00:05, 35.29it/s][ALoss = 1.2944e-01, PNorm = 94.8457, GNorm = 0.4614, lr_0 = 1.5336e-04

 77%|███████▋  | 579/756 [00:17<00:05, 35.33it/s][A
 77%|███████▋  | 583/756 [00:17<00:04, 36.03it/s][A
 78%|███████▊  | 587/756 [00:17<00:04, 35.69it/s][ALoss = 1.2929e-01, PNorm = 94.8500, GNorm = 0.5176, lr_0 = 1.5319e-04

 78%|███████▊  | 591/756 [00:17<00:04, 35.98it/s][A
 79%|███████▊  | 595/756 [00:17<00:04, 35.70it/s][ALoss = 1.3600e-01, PNorm = 94.8559, GNorm = 0.5588, lr_0 = 1.5303e-04

 79%|███████▉  | 599/756 [00:17<00:04, 36.40it/s][A
 80%|███████▉ 


 79%|███████▊  | 594/756 [00:18<00:04, 33.22it/s][A
 79%|███████▉  | 598/756 [00:18<00:04, 34.80it/s][A
 80%|███████▉  | 602/756 [00:18<00:04, 33.69it/s][ALoss = 8.3198e-02, PNorm = 95.2655, GNorm = 0.1124, lr_0 = 1.4085e-04

 80%|████████  | 607/756 [00:18<00:04, 35.75it/s][A
 81%|████████  | 611/756 [00:18<00:04, 34.43it/s][ALoss = 1.2111e-01, PNorm = 95.2660, GNorm = 0.1800, lr_0 = 1.4070e-04

 81%|████████▏ | 616/756 [00:18<00:03, 36.39it/s][A
 82%|████████▏ | 620/756 [00:18<00:03, 34.83it/s][ALoss = 1.3541e-01, PNorm = 95.2655, GNorm = 0.1976, lr_0 = 1.4055e-04

 83%|████████▎ | 625/756 [00:18<00:03, 36.59it/s][A
 83%|████████▎ | 629/756 [00:19<00:03, 34.97it/s][ALoss = 1.1441e-01, PNorm = 95.2674, GNorm = 0.4894, lr_0 = 1.4039e-04

 84%|████████▍ | 634/756 [00:19<00:03, 34.42it/s][A
 85%|████████▍ | 639/756 [00:19<00:03, 36.26it/s][A
 85%|████████▌ | 643/756 [00:19<00:03, 33.99it/s][ALoss = 1.1128e-01, PNorm = 95.2741, GNorm = 0.1763, lr_0 = 1.4024e-04

 86%|████████


 28%|██▊       | 210/756 [00:06<00:16, 33.65it/s][A
 28%|██▊       | 214/756 [00:06<00:17, 30.23it/s][A
 29%|██▉       | 218/756 [00:06<00:16, 31.81it/s][ALoss = 9.0070e-02, PNorm = 95.4228, GNorm = 0.3920, lr_0 = 1.3528e-04

 29%|██▉       | 222/756 [00:06<00:16, 33.35it/s][A
 30%|██▉       | 226/756 [00:06<00:16, 31.20it/s][ALoss = 1.3476e-01, PNorm = 95.4260, GNorm = 0.1509, lr_0 = 1.3513e-04

 30%|███       | 230/756 [00:06<00:16, 32.39it/s][A
 31%|███       | 234/756 [00:07<00:15, 33.29it/s][A
 31%|███▏      | 238/756 [00:07<00:15, 34.38it/s][ALoss = 1.5881e-01, PNorm = 95.4278, GNorm = 0.2362, lr_0 = 1.3498e-04

 32%|███▏      | 242/756 [00:07<00:14, 34.48it/s][A
 33%|███▎      | 246/756 [00:07<00:14, 35.08it/s][ALoss = 1.4994e-01, PNorm = 95.4287, GNorm = 0.4241, lr_0 = 1.3483e-04

 33%|███▎      | 250/756 [00:07<00:14, 34.97it/s][A
 34%|███▎      | 254/756 [00:07<00:14, 35.64it/s][A
 34%|███▍      | 258/756 [00:07<00:14, 35.02it/s][ALoss = 1.5114e-01, PNorm = 95.4

 28%|██▊       | 214/756 [00:06<00:18, 28.71it/s][A
 29%|██▉       | 218/756 [00:06<00:17, 30.48it/s][A
 29%|██▉       | 222/756 [00:06<00:16, 32.37it/s][ALoss = 1.2467e-01, PNorm = 95.7378, GNorm = 0.4259, lr_0 = 1.2453e-04

 30%|██▉       | 226/756 [00:06<00:15, 33.27it/s][A
 30%|███       | 230/756 [00:06<00:15, 34.43it/s][ALoss = 1.3011e-01, PNorm = 95.7427, GNorm = 0.5297, lr_0 = 1.2439e-04

 31%|███       | 234/756 [00:07<00:15, 34.76it/s][A
 31%|███▏      | 238/756 [00:07<00:14, 35.69it/s][A
 32%|███▏      | 242/756 [00:07<00:14, 35.61it/s][ALoss = 1.1811e-01, PNorm = 95.7466, GNorm = 0.5209, lr_0 = 1.2426e-04

 33%|███▎      | 246/756 [00:07<00:14, 35.89it/s][A
 33%|███▎      | 250/756 [00:07<00:14, 35.67it/s][ALoss = 1.1067e-01, PNorm = 95.7511, GNorm = 0.1760, lr_0 = 1.2412e-04

 34%|███▎      | 254/756 [00:07<00:13, 35.87it/s][A
 34%|███▍      | 258/756 [00:07<00:13, 35.71it/s][A
 35%|███▍      | 262/756 [00:07<00:13, 36.33it/s][ALoss = 1.2143e-01, PNorm = 95.75

 29%|██▉       | 218/756 [00:06<00:15, 35.18it/s][A
 29%|██▉       | 222/756 [00:06<00:15, 33.47it/s][A
 30%|██▉       | 226/756 [00:06<00:15, 34.82it/s][ALoss = 9.8345e-02, PNorm = 96.0318, GNorm = 0.1723, lr_0 = 1.1462e-04

 30%|███       | 230/756 [00:07<00:15, 33.20it/s][A
 31%|███       | 234/756 [00:07<00:14, 34.85it/s][ALoss = 1.2885e-01, PNorm = 96.0358, GNorm = 0.7133, lr_0 = 1.1450e-04

 31%|███▏      | 238/756 [00:07<00:15, 34.05it/s][A
 32%|███▏      | 242/756 [00:07<00:14, 35.10it/s][A
 33%|███▎      | 246/756 [00:07<00:15, 32.82it/s][ALoss = 1.1924e-01, PNorm = 96.0369, GNorm = 0.6000, lr_0 = 1.1437e-04

 33%|███▎      | 250/756 [00:07<00:14, 33.94it/s][A
 34%|███▎      | 254/756 [00:07<00:15, 32.57it/s][ALoss = 9.0466e-02, PNorm = 96.0421, GNorm = 0.7493, lr_0 = 1.1425e-04

 34%|███▍      | 258/756 [00:07<00:14, 33.89it/s][A
 35%|███▍      | 262/756 [00:07<00:14, 33.74it/s][A
 35%|███▌      | 266/756 [00:08<00:14, 34.79it/s][ALoss = 1.2915e-01, PNorm = 96.04

 31%|███       | 234/756 [00:07<00:14, 35.00it/s][A
 31%|███▏      | 238/756 [00:07<00:14, 35.85it/s][ALoss = 1.4513e-01, PNorm = 96.2924, GNorm = 0.6409, lr_0 = 1.0540e-04

 32%|███▏      | 242/756 [00:07<00:14, 35.48it/s][A
 33%|███▎      | 246/756 [00:07<00:14, 36.15it/s][A
 33%|███▎      | 250/756 [00:07<00:14, 35.96it/s][ALoss = 1.1605e-01, PNorm = 96.2937, GNorm = 0.6037, lr_0 = 1.0529e-04

 34%|███▎      | 254/756 [00:07<00:13, 36.43it/s][A
 34%|███▍      | 258/756 [00:07<00:13, 35.96it/s][ALoss = 9.1772e-02, PNorm = 96.2972, GNorm = 0.2710, lr_0 = 1.0517e-04

 35%|███▍      | 262/756 [00:07<00:13, 36.43it/s][A
 35%|███▌      | 266/756 [00:08<00:13, 35.94it/s][A
 36%|███▌      | 270/756 [00:08<00:13, 36.44it/s][ALoss = 1.1286e-01, PNorm = 96.3008, GNorm = 0.4940, lr_0 = 1.0506e-04

 36%|███▌      | 274/756 [00:08<00:13, 35.86it/s][A
 37%|███▋      | 278/756 [00:08<00:13, 36.38it/s][ALoss = 1.2669e-01, PNorm = 96.3033, GNorm = 0.4448, lr_0 = 1.0494e-04

 37%|███▋     


 26%|██▌       | 193/756 [00:06<00:15, 35.35it/s][A
 26%|██▌       | 197/756 [00:06<00:15, 35.33it/s][ALoss = 1.3754e-01, PNorm = 50.5665, GNorm = 0.3380, lr_0 = 2.1980e-04

 27%|██▋       | 201/756 [00:06<00:15, 36.01it/s][A
 27%|██▋       | 205/756 [00:06<00:15, 35.68it/s][A
 28%|██▊       | 209/756 [00:06<00:15, 35.60it/s][ALoss = 2.0161e-01, PNorm = 50.5707, GNorm = 0.1851, lr_0 = 2.2576e-04

 28%|██▊       | 213/756 [00:06<00:15, 35.52it/s][A
 29%|██▊       | 217/756 [00:06<00:15, 33.97it/s][ALoss = 1.7063e-01, PNorm = 50.5770, GNorm = 0.1722, lr_0 = 2.3172e-04

 29%|██▉       | 221/756 [00:06<00:15, 33.66it/s][A
 30%|██▉       | 225/756 [00:06<00:16, 33.15it/s][A
 30%|███       | 229/756 [00:07<00:15, 33.41it/s][ALoss = 1.9787e-01, PNorm = 50.5804, GNorm = 0.2736, lr_0 = 2.3768e-04

 31%|███       | 233/756 [00:07<00:18, 28.32it/s][A
 31%|███▏      | 237/756 [00:07<00:17, 29.91it/s][ALoss = 1.8438e-01, PNorm = 50.5879, GNorm = 0.4002, lr_0 = 2.4364e-04

 32%|███▏    

 29%|██▊       | 216/756 [00:06<00:17, 31.52it/s][A
 29%|██▉       | 220/756 [00:06<00:18, 29.40it/s][ALoss = 1.8682e-01, PNorm = 51.3861, GNorm = 0.1333, lr_0 = 6.8530e-04

 30%|██▉       | 225/756 [00:07<00:16, 32.42it/s][A
 30%|███       | 229/756 [00:07<00:16, 31.99it/s][A
 31%|███       | 234/756 [00:07<00:15, 34.75it/s][ALoss = 1.8534e-01, PNorm = 51.4027, GNorm = 0.1270, lr_0 = 6.9126e-04

 31%|███▏      | 238/756 [00:07<00:17, 29.25it/s][A
 32%|███▏      | 243/756 [00:07<00:16, 30.33it/s][ALoss = 1.5059e-01, PNorm = 51.4390, GNorm = 0.0785, lr_0 = 6.9722e-04

 33%|███▎      | 248/756 [00:07<00:15, 33.09it/s][A
 33%|███▎      | 252/756 [00:07<00:15, 32.63it/s][ALoss = 1.8092e-01, PNorm = 51.4362, GNorm = 0.2753, lr_0 = 7.0318e-04

 34%|███▍      | 257/756 [00:07<00:14, 34.94it/s][A
 35%|███▍      | 261/756 [00:08<00:14, 33.88it/s][ALoss = 1.8593e-01, PNorm = 51.4541, GNorm = 0.1403, lr_0 = 7.0914e-04

 35%|███▌      | 266/756 [00:08<00:13, 35.99it/s][A
 36%|███▌     

 35%|███▌      | 265/756 [00:07<00:13, 36.03it/s][A
 36%|███▌      | 269/756 [00:08<00:13, 36.19it/s][ALoss = 1.4420e-01, PNorm = 54.1434, GNorm = 0.0958, lr_0 = 9.7070e-04

 36%|███▌      | 273/756 [00:08<00:13, 36.18it/s][A
 37%|███▋      | 277/756 [00:08<00:13, 36.44it/s][ALoss = 1.7634e-01, PNorm = 54.1573, GNorm = 0.0962, lr_0 = 9.6965e-04

 37%|███▋      | 281/756 [00:08<00:13, 36.52it/s][A
 38%|███▊      | 285/756 [00:08<00:12, 36.71it/s][A
 38%|███▊      | 289/756 [00:08<00:12, 36.63it/s][ALoss = 1.7356e-01, PNorm = 54.1989, GNorm = 0.0951, lr_0 = 9.6859e-04

 39%|███▉      | 293/756 [00:08<00:12, 36.26it/s][A
 39%|███▉      | 297/756 [00:08<00:12, 36.52it/s][ALoss = 2.1071e-01, PNorm = 54.2089, GNorm = 0.1008, lr_0 = 9.6754e-04

 40%|███▉      | 301/756 [00:08<00:12, 36.40it/s][A
 40%|████      | 305/756 [00:09<00:12, 36.29it/s][A
 41%|████      | 309/756 [00:09<00:12, 36.07it/s][ALoss = 1.2445e-01, PNorm = 54.2973, GNorm = 0.0991, lr_0 = 9.6648e-04

 41%|████▏    


 36%|███▌      | 274/756 [00:09<00:15, 30.94it/s][A
 37%|███▋      | 278/756 [00:09<00:14, 32.31it/s][A
 37%|███▋      | 282/756 [00:09<00:18, 25.65it/s][ALoss = 1.7001e-01, PNorm = 56.8619, GNorm = 0.2936, lr_0 = 8.9261e-04

 38%|███▊      | 286/756 [00:09<00:16, 28.29it/s][A
 38%|███▊      | 290/756 [00:09<00:16, 29.11it/s][ALoss = 1.4173e-01, PNorm = 56.9038, GNorm = 0.0837, lr_0 = 8.9164e-04

 39%|███▉      | 295/756 [00:09<00:14, 32.58it/s][A
 40%|███▉      | 299/756 [00:09<00:16, 28.40it/s][A
 40%|████      | 303/756 [00:10<00:14, 30.84it/s][ALoss = 1.7141e-01, PNorm = 56.9490, GNorm = 0.2575, lr_0 = 8.9067e-04

 41%|████      | 307/756 [00:10<00:14, 30.68it/s][A
 41%|████      | 311/756 [00:10<00:13, 32.56it/s][ALoss = 1.4857e-01, PNorm = 56.9567, GNorm = 0.1059, lr_0 = 8.8970e-04

 42%|████▏     | 315/756 [00:10<00:14, 31.21it/s][A
 42%|████▏     | 319/756 [00:10<00:13, 32.90it/s][A
 43%|████▎     | 323/756 [00:10<00:13, 32.16it/s][ALoss = 1.5323e-01, PNorm = 57.0

 37%|███▋      | 281/756 [00:08<00:12, 37.21it/s][A
 38%|███▊      | 285/756 [00:08<00:12, 36.82it/s][ALoss = 1.1920e-01, PNorm = 60.1993, GNorm = 0.0776, lr_0 = 8.2161e-04

 38%|███▊      | 289/756 [00:08<00:12, 37.09it/s][A
 39%|███▉      | 293/756 [00:08<00:12, 36.27it/s][A
 39%|███▉      | 297/756 [00:08<00:12, 36.89it/s][ALoss = 1.1513e-01, PNorm = 60.2379, GNorm = 0.1594, lr_0 = 8.2071e-04

 40%|███▉      | 301/756 [00:08<00:12, 36.28it/s][A
 40%|████      | 305/756 [00:09<00:12, 36.53it/s][ALoss = 1.6206e-01, PNorm = 60.2427, GNorm = 0.0938, lr_0 = 8.1982e-04

 41%|████      | 309/756 [00:09<00:12, 35.46it/s][A
 41%|████▏     | 313/756 [00:09<00:12, 35.70it/s][A
 42%|████▏     | 317/756 [00:09<00:12, 35.13it/s][ALoss = 1.0545e-01, PNorm = 60.2815, GNorm = 0.0733, lr_0 = 8.1893e-04

 42%|████▏     | 321/756 [00:09<00:12, 35.35it/s][A
 43%|████▎     | 325/756 [00:09<00:12, 35.02it/s][ALoss = 1.1703e-01, PNorm = 60.3250, GNorm = 0.0957, lr_0 = 8.1804e-04

 44%|████▎    

 40%|████      | 304/756 [00:08<00:12, 35.08it/s][A
 41%|████      | 308/756 [00:09<00:13, 32.99it/s][ALoss = 1.3907e-01, PNorm = 63.3145, GNorm = 0.0954, lr_0 = 7.5469e-04

 41%|████▏     | 313/756 [00:09<00:13, 32.62it/s][A
 42%|████▏     | 318/756 [00:09<00:12, 35.13it/s][A
 43%|████▎     | 322/756 [00:09<00:12, 34.19it/s][ALoss = 1.9246e-01, PNorm = 63.3559, GNorm = 0.1441, lr_0 = 7.5387e-04

 43%|████▎     | 326/756 [00:09<00:12, 35.49it/s][A
 44%|████▎     | 330/756 [00:09<00:12, 34.13it/s][ALoss = 1.4792e-01, PNorm = 63.4263, GNorm = 0.0928, lr_0 = 7.5305e-04

 44%|████▍     | 334/756 [00:09<00:11, 35.37it/s][A
 45%|████▍     | 338/756 [00:09<00:12, 34.38it/s][ALoss = 1.1758e-01, PNorm = 63.5005, GNorm = 0.0684, lr_0 = 7.5223e-04

 45%|████▌     | 343/756 [00:10<00:11, 36.27it/s][A
 46%|████▌     | 347/756 [00:10<00:11, 34.73it/s][A
 47%|████▋     | 352/756 [00:10<00:11, 36.52it/s][ALoss = 1.1585e-01, PNorm = 63.5442, GNorm = 0.0793, lr_0 = 7.5141e-04

 47%|████▋    


 45%|████▍     | 338/756 [00:10<00:12, 32.95it/s][A
 45%|████▌     | 342/756 [00:11<00:13, 31.35it/s][A
 46%|████▌     | 346/756 [00:11<00:12, 32.69it/s][ALoss = 1.3638e-01, PNorm = 66.9592, GNorm = 0.1437, lr_0 = 6.9239e-04

 46%|████▋     | 350/756 [00:11<00:15, 26.28it/s][A
 47%|████▋     | 354/756 [00:11<00:16, 24.40it/s][A
 47%|████▋     | 357/756 [00:11<00:15, 25.19it/s][ALoss = 1.5150e-01, PNorm = 66.9953, GNorm = 0.2624, lr_0 = 6.9164e-04

 48%|████▊     | 362/756 [00:11<00:13, 28.64it/s][A
 48%|████▊     | 366/756 [00:12<00:15, 24.43it/s][ALoss = 1.9912e-01, PNorm = 67.0163, GNorm = 0.1569, lr_0 = 6.9088e-04

 49%|████▉     | 370/756 [00:12<00:14, 27.19it/s][A
 49%|████▉     | 373/756 [00:12<00:14, 27.26it/s][ALoss = 1.5225e-01, PNorm = 67.0747, GNorm = 0.1159, lr_0 = 6.9013e-04

 50%|█████     | 378/756 [00:12<00:12, 30.07it/s][A
 51%|█████     | 382/756 [00:12<00:12, 30.13it/s][A
 51%|█████     | 386/756 [00:12<00:13, 27.90it/s][ALoss = 1.6574e-01, PNorm = 67.1

 46%|████▋     | 350/756 [00:10<00:11, 34.20it/s][ALoss = 1.7334e-01, PNorm = 70.4989, GNorm = 0.3508, lr_0 = 6.3738e-04

 47%|████▋     | 354/756 [00:10<00:11, 35.14it/s][A
 47%|████▋     | 358/756 [00:10<00:11, 34.87it/s][ALoss = 1.6712e-01, PNorm = 70.5504, GNorm = 0.2476, lr_0 = 6.3669e-04

 48%|████▊     | 362/756 [00:10<00:11, 35.64it/s][A
 48%|████▊     | 366/756 [00:10<00:11, 35.25it/s][A
 49%|████▉     | 370/756 [00:11<00:10, 36.12it/s][ALoss = 1.6334e-01, PNorm = 70.6109, GNorm = 0.1280, lr_0 = 6.3599e-04

 49%|████▉     | 374/756 [00:11<00:10, 35.24it/s][A
 50%|█████     | 378/756 [00:11<00:10, 36.07it/s][ALoss = 1.1276e-01, PNorm = 70.6756, GNorm = 0.0844, lr_0 = 6.3530e-04

 51%|█████     | 382/756 [00:11<00:10, 35.50it/s][A
 51%|█████     | 386/756 [00:11<00:10, 36.51it/s][A
 52%|█████▏    | 390/756 [00:11<00:10, 35.83it/s][ALoss = 1.9515e-01, PNorm = 70.6917, GNorm = 0.1822, lr_0 = 6.3461e-04

 52%|█████▏    | 394/756 [00:11<00:09, 36.38it/s][A
 53%|█████▎   


100%|█████████▉| 754/756 [00:22<00:00, 35.95it/s][A
                                                 [A
  0%|          | 0/95 [00:00<?, ?it/s][A
  1%|          | 1/95 [00:00<00:35,  2.63it/s][A
  8%|▊         | 8/95 [00:00<00:04, 20.98it/s][A
 19%|█▉        | 18/95 [00:00<00:01, 41.50it/s][A
 27%|██▋       | 26/95 [00:00<00:01, 52.15it/s][A
 37%|███▋      | 35/95 [00:00<00:00, 62.64it/s][A
 47%|████▋     | 45/95 [00:00<00:00, 71.61it/s][A
 57%|█████▋    | 54/95 [00:01<00:00, 59.16it/s][A
 64%|██████▍   | 61/95 [00:01<00:00, 52.09it/s][A
 71%|███████   | 67/95 [00:01<00:00, 50.28it/s][A
 78%|███████▊  | 74/95 [00:01<00:00, 45.63it/s][A
 87%|████████▋ | 83/95 [00:01<00:00, 54.59it/s][A
 95%|█████████▍| 90/95 [00:01<00:00, 53.78it/s][A
                                               [AValidation auc = 0.775363
 27%|██▋       | 8/30 [03:17<09:06, 24.84s/it]Epoch 8

  0%|          | 0/756 [00:00<?, ?it/s][A
  0%|          | 1/756 [00:00<04:24,  2.86it/s][A
  1%|          |

  9%|▉         | 9/95 [00:00<00:04, 18.04it/s][A
 18%|█▊        | 17/95 [00:00<00:03, 25.46it/s][A
 26%|██▋       | 25/95 [00:01<00:02, 31.02it/s][A
 35%|███▍      | 33/95 [00:01<00:01, 39.76it/s][A
 43%|████▎     | 41/95 [00:01<00:01, 47.52it/s][A
 52%|█████▏    | 49/95 [00:01<00:00, 54.64it/s][A
 61%|██████    | 58/95 [00:01<00:00, 62.68it/s][A
 69%|██████▉   | 66/95 [00:01<00:00, 66.08it/s][A
 78%|███████▊  | 74/95 [00:01<00:00, 67.22it/s][A
 88%|████████▊ | 84/95 [00:01<00:00, 74.64it/s][A
 99%|█████████▉| 94/95 [00:01<00:00, 79.87it/s][A
                                               [AValidation auc = 0.771962
 30%|███       | 9/30 [03:41<08:36, 24.58s/it]Epoch 9

  0%|          | 0/756 [00:00<?, ?it/s][ALoss = 1.6021e-01, PNorm = 75.1961, GNorm = 0.1538, lr_0 = 5.6167e-04

  0%|          | 1/756 [00:00<04:24,  2.85it/s][A
  1%|          | 5/756 [00:00<00:57, 13.17it/s][A
  1%|          | 8/756 [00:00<00:46, 16.12it/s][ALoss = 1.1741e-01, PNorm = 75.2399, GNorm = 

 35%|███▍      | 33/95 [00:01<00:01, 44.50it/s][A
 43%|████▎     | 41/95 [00:01<00:01, 49.24it/s][A
 52%|█████▏    | 49/95 [00:01<00:00, 53.26it/s][A
 60%|██████    | 57/95 [00:01<00:00, 57.96it/s][A
 68%|██████▊   | 65/95 [00:01<00:00, 60.44it/s][A
 77%|███████▋  | 73/95 [00:01<00:00, 61.55it/s][A
 86%|████████▋ | 82/95 [00:01<00:00, 67.64it/s][A
 98%|█████████▊| 93/95 [00:01<00:00, 77.10it/s][A
                                               [AValidation auc = 0.765484
 33%|███▎      | 10/30 [04:05<08:08, 24.41s/it]Epoch 10

  0%|          | 0/756 [00:00<?, ?it/s][A
  0%|          | 1/756 [00:00<04:37,  2.72it/s][A
  1%|          | 5/756 [00:00<00:58, 12.89it/s][ALoss = 1.5602e-01, PNorm = 77.9960, GNorm = 0.1666, lr_0 = 5.1699e-04

  1%|          | 9/756 [00:00<00:39, 18.95it/s][A
  2%|▏         | 13/756 [00:00<00:31, 23.94it/s][ALoss = 1.4215e-01, PNorm = 78.0223, GNorm = 0.1639, lr_0 = 5.1643e-04

  2%|▏         | 17/756 [00:00<00:27, 26.43it/s][A
  3%|▎         | 21


 50%|████▉     | 376/756 [00:12<00:12, 30.94it/s][A
 50%|█████     | 380/756 [00:12<00:12, 29.78it/s][A
 51%|█████     | 384/756 [00:12<00:11, 31.84it/s][ALoss = 1.3755e-01, PNorm = 79.2823, GNorm = 0.1853, lr_0 = 4.9603e-04

 51%|█████▏    | 388/756 [00:12<00:11, 30.74it/s][A
 52%|█████▏    | 392/756 [00:12<00:11, 32.62it/s][ALoss = 1.2655e-01, PNorm = 79.3184, GNorm = 0.1416, lr_0 = 4.9549e-04

 52%|█████▏    | 396/756 [00:12<00:11, 31.71it/s][A
 53%|█████▎    | 401/756 [00:13<00:10, 32.55it/s][A
 54%|█████▎    | 405/756 [00:13<00:10, 32.87it/s][ALoss = 1.3986e-01, PNorm = 79.3526, GNorm = 0.1245, lr_0 = 4.9495e-04

 54%|█████▍    | 409/756 [00:13<00:13, 26.51it/s][A
 55%|█████▍    | 413/756 [00:13<00:12, 28.52it/s][ALoss = 9.1722e-02, PNorm = 79.3907, GNorm = 0.2529, lr_0 = 4.9441e-04

 55%|█████▌    | 417/756 [00:13<00:11, 29.28it/s][A
 56%|█████▌    | 421/756 [00:13<00:11, 29.03it/s][A
 56%|█████▌    | 425/756 [00:13<00:10, 30.13it/s][ALoss = 1.4605e-01, PNorm = 79.4

 51%|█████▏    | 389/756 [00:12<00:12, 30.08it/s][ALoss = 1.4997e-01, PNorm = 81.3732, GNorm = 0.5235, lr_0 = 4.5662e-04

 52%|█████▏    | 393/756 [00:12<00:12, 30.12it/s][A
 53%|█████▎    | 397/756 [00:13<00:11, 31.35it/s][ALoss = 1.4016e-01, PNorm = 81.4013, GNorm = 0.1529, lr_0 = 4.5612e-04

 53%|█████▎    | 401/756 [00:13<00:11, 31.57it/s][A
 54%|█████▎    | 405/756 [00:13<00:10, 32.82it/s][A
 54%|█████▍    | 409/756 [00:13<00:10, 32.36it/s][ALoss = 1.4060e-01, PNorm = 81.4338, GNorm = 0.3315, lr_0 = 4.5563e-04

 55%|█████▍    | 413/756 [00:13<00:10, 32.92it/s][A
 55%|█████▌    | 417/756 [00:13<00:10, 32.35it/s][ALoss = 1.2560e-01, PNorm = 81.4587, GNorm = 0.1308, lr_0 = 4.5513e-04

 56%|█████▌    | 421/756 [00:13<00:10, 33.23it/s][A
 56%|█████▌    | 425/756 [00:13<00:10, 32.62it/s][A
 57%|█████▋    | 429/756 [00:13<00:09, 33.71it/s][ALoss = 1.8137e-01, PNorm = 81.4818, GNorm = 0.2193, lr_0 = 4.5464e-04

 57%|█████▋    | 433/756 [00:14<00:09, 32.86it/s][A
 58%|█████▊   

 54%|█████▎    | 405/756 [00:12<00:10, 33.42it/s][A
 54%|█████▍    | 409/756 [00:12<00:10, 31.83it/s][A
 55%|█████▍    | 413/756 [00:12<00:10, 33.83it/s][ALoss = 1.3726e-01, PNorm = 83.5033, GNorm = 0.2897, lr_0 = 4.1938e-04

 55%|█████▌    | 417/756 [00:13<00:10, 31.58it/s][A
 56%|█████▌    | 422/756 [00:13<00:09, 33.89it/s][ALoss = 1.7802e-01, PNorm = 83.5227, GNorm = 0.2637, lr_0 = 4.1893e-04

 56%|█████▋    | 426/756 [00:13<00:10, 31.80it/s][A
 57%|█████▋    | 430/756 [00:13<00:09, 33.41it/s][A
 57%|█████▋    | 434/756 [00:13<00:09, 32.41it/s][ALoss = 1.8808e-01, PNorm = 83.5395, GNorm = 0.2364, lr_0 = 4.1847e-04

 58%|█████▊    | 438/756 [00:13<00:09, 34.00it/s][A
 58%|█████▊    | 442/756 [00:13<00:09, 32.58it/s][ALoss = 1.3754e-01, PNorm = 83.5831, GNorm = 0.3426, lr_0 = 4.1802e-04

 59%|█████▉    | 446/756 [00:13<00:09, 33.92it/s][A
 60%|█████▉    | 450/756 [00:14<00:09, 32.54it/s][A
 60%|██████    | 454/756 [00:14<00:08, 34.00it/s][ALoss = 1.2501e-01, PNorm = 83.61


  2%|▏         | 12/756 [00:00<00:34, 21.77it/s][A
  2%|▏         | 17/756 [00:00<00:26, 27.95it/s][ALoss = 1.4785e-01, PNorm = 84.3481, GNorm = 0.2829, lr_0 = 4.0321e-04

  3%|▎         | 21/756 [00:00<00:25, 28.33it/s][A
  3%|▎         | 26/756 [00:01<00:22, 32.48it/s][ALoss = 1.5133e-01, PNorm = 84.3758, GNorm = 0.2523, lr_0 = 4.0277e-04

  4%|▍         | 30/756 [00:01<00:23, 31.20it/s][A
  5%|▍         | 35/756 [00:01<00:21, 34.14it/s][A
  5%|▌         | 39/756 [00:01<00:22, 32.57it/s][ALoss = 1.1870e-01, PNorm = 84.4096, GNorm = 0.2337, lr_0 = 4.0234e-04

  6%|▌         | 43/756 [00:01<00:20, 34.02it/s][A
  6%|▌         | 47/756 [00:01<00:21, 32.34it/s][ALoss = 1.6069e-01, PNorm = 84.4320, GNorm = 0.2019, lr_0 = 4.0190e-04

  7%|▋         | 51/756 [00:01<00:20, 33.92it/s][A
  7%|▋         | 55/756 [00:01<00:21, 32.48it/s][ALoss = 1.4552e-01, PNorm = 84.4535, GNorm = 0.2832, lr_0 = 4.0146e-04

  8%|▊         | 60/756 [00:02<00:25, 26.92it/s][A
  8%|▊         | 64/756 [


  9%|▊         | 65/756 [00:02<00:20, 33.75it/s][A
  9%|▉         | 69/756 [00:02<00:19, 34.94it/s][A
 10%|▉         | 73/756 [00:02<00:20, 33.60it/s][ALoss = 1.2888e-01, PNorm = 86.2758, GNorm = 0.1353, lr_0 = 3.6916e-04

 10%|█         | 77/756 [00:02<00:19, 34.57it/s][A
 11%|█         | 81/756 [00:02<00:19, 33.89it/s][ALoss = 1.4612e-01, PNorm = 86.3023, GNorm = 0.1544, lr_0 = 3.6876e-04

 11%|█         | 85/756 [00:02<00:19, 34.95it/s][A
 12%|█▏        | 89/756 [00:03<00:19, 34.29it/s][A
 12%|█▏        | 93/756 [00:03<00:18, 35.24it/s][ALoss = 1.5238e-01, PNorm = 86.3300, GNorm = 0.2520, lr_0 = 3.6836e-04

 13%|█▎        | 97/756 [00:03<00:19, 33.81it/s][A
 13%|█▎        | 101/756 [00:03<00:18, 34.83it/s][ALoss = 1.4697e-01, PNorm = 86.3635, GNorm = 0.1929, lr_0 = 3.6796e-04

 14%|█▍        | 105/756 [00:03<00:19, 33.97it/s][A
 14%|█▍        | 109/756 [00:03<00:18, 35.39it/s][A
 15%|█▍        | 113/756 [00:03<00:18, 34.68it/s][ALoss = 1.3263e-01, PNorm = 86.3965, GNor


 10%|█         | 79/756 [00:02<00:20, 32.89it/s][A
 11%|█         | 83/756 [00:02<00:22, 29.61it/s][A
 12%|█▏        | 87/756 [00:03<00:20, 31.90it/s][ALoss = 1.6565e-01, PNorm = 87.7391, GNorm = 0.2221, lr_0 = 3.3943e-04

 12%|█▏        | 91/756 [00:03<00:22, 29.14it/s][A
 13%|█▎        | 95/756 [00:03<00:21, 31.35it/s][ALoss = 1.3843e-01, PNorm = 87.7550, GNorm = 0.5373, lr_0 = 3.3906e-04

 13%|█▎        | 99/756 [00:03<00:21, 29.87it/s][A
 14%|█▍        | 104/756 [00:03<00:19, 32.90it/s][A
 14%|█▍        | 108/756 [00:03<00:20, 31.91it/s][ALoss = 1.2017e-01, PNorm = 87.7808, GNorm = 0.1377, lr_0 = 3.3869e-04

 15%|█▍        | 112/756 [00:03<00:19, 33.62it/s][A
 15%|█▌        | 116/756 [00:04<00:21, 30.30it/s][ALoss = 1.2189e-01, PNorm = 87.8044, GNorm = 0.1841, lr_0 = 3.3832e-04

 16%|█▌        | 120/756 [00:04<00:19, 32.49it/s][A
 16%|█▋        | 124/756 [00:04<00:21, 29.40it/s][ALoss = 8.5668e-02, PNorm = 87.8264, GNorm = 0.2154, lr_0 = 3.3795e-04

 17%|█▋        | 12

 17%|█▋        | 125/756 [00:03<00:18, 34.19it/s][A
 17%|█▋        | 129/756 [00:04<00:19, 31.85it/s][ALoss = 1.0912e-01, PNorm = 89.0938, GNorm = 0.2089, lr_0 = 3.1110e-04

 18%|█▊        | 133/756 [00:04<00:20, 30.16it/s][A
 18%|█▊        | 137/756 [00:04<00:23, 26.69it/s][A
 19%|█▊        | 141/756 [00:04<00:21, 29.12it/s][ALoss = 1.0985e-01, PNorm = 89.1189, GNorm = 0.1823, lr_0 = 3.1076e-04

 19%|█▉        | 145/756 [00:04<00:20, 30.39it/s][A
 20%|█▉        | 149/756 [00:04<00:18, 32.14it/s][ALoss = 1.1149e-01, PNorm = 89.1284, GNorm = 0.2886, lr_0 = 3.1043e-04

 20%|██        | 153/756 [00:04<00:18, 32.62it/s][A
 21%|██        | 157/756 [00:04<00:17, 33.85it/s][A
 21%|██▏       | 161/756 [00:05<00:17, 34.10it/s][ALoss = 1.6509e-01, PNorm = 89.1378, GNorm = 0.1602, lr_0 = 3.1009e-04

 22%|██▏       | 165/756 [00:05<00:17, 34.21it/s][A
 22%|██▏       | 169/756 [00:05<00:17, 34.24it/s][ALoss = 1.4046e-01, PNorm = 89.1565, GNorm = 0.5457, lr_0 = 3.0975e-04

 23%|██▎      

 18%|█▊        | 133/756 [00:05<00:21, 29.38it/s][A
 18%|█▊        | 137/756 [00:05<00:20, 29.83it/s][ALoss = 1.1303e-01, PNorm = 90.2591, GNorm = 0.3088, lr_0 = 2.8636e-04

 19%|█▊        | 141/756 [00:05<00:21, 29.22it/s][A
 19%|█▉        | 145/756 [00:05<00:20, 29.23it/s][ALoss = 1.2406e-01, PNorm = 90.2701, GNorm = 0.4145, lr_0 = 2.8604e-04

 20%|█▉        | 149/756 [00:05<00:21, 28.86it/s][A
 20%|██        | 153/756 [00:05<00:20, 29.38it/s][A
 21%|██        | 157/756 [00:05<00:20, 29.05it/s][ALoss = 1.3624e-01, PNorm = 90.2794, GNorm = 0.1487, lr_0 = 2.8573e-04

 21%|██▏       | 161/756 [00:06<00:20, 29.59it/s][A
 22%|██▏       | 165/756 [00:06<00:20, 29.43it/s][ALoss = 1.3601e-01, PNorm = 90.2965, GNorm = 0.1970, lr_0 = 2.8542e-04

 22%|██▏       | 169/756 [00:06<00:19, 30.00it/s][A
 23%|██▎       | 173/756 [00:06<00:19, 30.28it/s][A
 23%|██▎       | 177/756 [00:06<00:18, 30.75it/s][ALoss = 1.2324e-01, PNorm = 90.3132, GNorm = 0.1926, lr_0 = 2.8511e-04

 24%|██▍      

 19%|█▉        | 145/756 [00:04<00:18, 33.27it/s][A
 20%|█▉        | 149/756 [00:05<00:19, 30.73it/s][ALoss = 1.4673e-01, PNorm = 91.3040, GNorm = 0.2830, lr_0 = 2.6332e-04

 20%|██        | 154/756 [00:05<00:20, 29.96it/s][A
 21%|██        | 159/756 [00:05<00:18, 32.95it/s][ALoss = 1.0932e-01, PNorm = 91.3114, GNorm = 0.4170, lr_0 = 2.6303e-04

 22%|██▏       | 163/756 [00:05<00:18, 32.63it/s][A
 22%|██▏       | 168/756 [00:05<00:16, 35.33it/s][ALoss = 1.4170e-01, PNorm = 91.3211, GNorm = 0.6710, lr_0 = 2.6275e-04

 23%|██▎       | 172/756 [00:05<00:17, 34.29it/s][A
 23%|██▎       | 177/756 [00:05<00:15, 36.45it/s][A
 24%|██▍       | 181/756 [00:06<00:16, 35.26it/s][ALoss = 1.1049e-01, PNorm = 91.3343, GNorm = 0.3454, lr_0 = 2.6246e-04

 25%|██▍       | 186/756 [00:06<00:16, 34.78it/s][A
 25%|██▌       | 191/756 [00:06<00:15, 36.59it/s][ALoss = 1.4992e-01, PNorm = 91.3472, GNorm = 0.3172, lr_0 = 2.6217e-04

 26%|██▌       | 195/756 [00:06<00:15, 35.20it/s][A
 26%|██▋      

 26%|██▌       | 193/756 [00:06<00:20, 27.28it/s][ALoss = 1.2858e-01, PNorm = 92.2983, GNorm = 0.3650, lr_0 = 2.4132e-04

 26%|██▌       | 197/756 [00:07<00:19, 28.51it/s][A
 27%|██▋       | 201/756 [00:07<00:18, 29.55it/s][A
 27%|██▋       | 205/756 [00:07<00:18, 30.12it/s][ALoss = 1.2764e-01, PNorm = 92.3094, GNorm = 0.4136, lr_0 = 2.4106e-04

 28%|██▊       | 209/756 [00:07<00:17, 30.50it/s][A
 28%|██▊       | 213/756 [00:07<00:17, 30.89it/s][ALoss = 1.0533e-01, PNorm = 92.3226, GNorm = 0.2680, lr_0 = 2.4079e-04

 29%|██▊       | 217/756 [00:07<00:18, 28.55it/s][A
 29%|██▉       | 221/756 [00:07<00:18, 29.63it/s][A
 30%|██▉       | 225/756 [00:07<00:17, 29.95it/s][ALoss = 1.1775e-01, PNorm = 92.3360, GNorm = 0.2185, lr_0 = 2.4053e-04

 30%|███       | 229/756 [00:08<00:17, 30.44it/s][A
 31%|███       | 233/756 [00:08<00:17, 30.28it/s][ALoss = 1.1999e-01, PNorm = 92.3417, GNorm = 0.1542, lr_0 = 2.4027e-04

 31%|███▏      | 237/756 [00:08<00:17, 30.41it/s][A
 32%|███▏     


 79%|███████▉  | 597/756 [00:20<00:05, 29.48it/s][A
 79%|███████▉  | 601/756 [00:20<00:05, 29.89it/s][A
 80%|████████  | 605/756 [00:20<00:05, 27.74it/s][ALoss = 1.2443e-01, PNorm = 92.7399, GNorm = 0.2156, lr_0 = 2.3078e-04

 81%|████████  | 609/756 [00:20<00:06, 23.81it/s][A
 81%|████████  | 613/756 [00:21<00:06, 21.83it/s][ALoss = 1.1716e-01, PNorm = 92.7473, GNorm = 0.2025, lr_0 = 2.3053e-04

 82%|████████▏ | 617/756 [00:21<00:06, 22.14it/s][A
 82%|████████▏ | 621/756 [00:21<00:05, 24.09it/s][A
 83%|████████▎ | 625/756 [00:21<00:05, 24.64it/s][ALoss = 1.4275e-01, PNorm = 92.7573, GNorm = 0.2595, lr_0 = 2.3028e-04

 83%|████████▎ | 629/756 [00:21<00:04, 26.26it/s][A
 84%|████████▎ | 633/756 [00:21<00:04, 27.35it/s][ALoss = 1.4135e-01, PNorm = 92.7669, GNorm = 0.2726, lr_0 = 2.3003e-04

 84%|████████▍ | 637/756 [00:21<00:04, 28.38it/s][A
 85%|████████▍ | 641/756 [00:22<00:03, 29.21it/s][A
 85%|████████▌ | 645/756 [00:22<00:03, 29.90it/s][ALoss = 9.9432e-02, PNorm = 92.7

 85%|████████▌ | 645/756 [00:19<00:03, 34.66it/s][A
 86%|████████▌ | 649/756 [00:19<00:02, 35.92it/s][ALoss = 1.1505e-01, PNorm = 93.5322, GNorm = 0.2128, lr_0 = 2.1152e-04

 86%|████████▋ | 653/756 [00:19<00:03, 34.20it/s][A
 87%|████████▋ | 657/756 [00:19<00:02, 35.53it/s][ALoss = 1.4781e-01, PNorm = 93.5386, GNorm = 0.1380, lr_0 = 2.1129e-04

 87%|████████▋ | 661/756 [00:19<00:02, 33.21it/s][A
 88%|████████▊ | 666/756 [00:19<00:02, 32.84it/s][ALoss = 1.2426e-01, PNorm = 93.5465, GNorm = 0.3661, lr_0 = 2.1106e-04

 89%|████████▉ | 671/756 [00:20<00:02, 34.88it/s][A
 89%|████████▉ | 675/756 [00:20<00:02, 33.59it/s][A
 90%|████████▉ | 679/756 [00:20<00:02, 35.00it/s][ALoss = 1.2800e-01, PNorm = 93.5562, GNorm = 0.3802, lr_0 = 2.1083e-04

 90%|█████████ | 683/756 [00:20<00:02, 33.73it/s][A
 91%|█████████ | 688/756 [00:20<00:01, 35.98it/s][ALoss = 1.2650e-01, PNorm = 93.5621, GNorm = 0.1622, lr_0 = 2.1060e-04

 92%|█████████▏| 692/756 [00:20<00:01, 34.49it/s][A
 92%|█████████

 87%|████████▋ | 661/756 [00:19<00:02, 34.01it/s][A
 88%|████████▊ | 665/756 [00:19<00:02, 34.27it/s][ALoss = 1.2577e-01, PNorm = 94.1802, GNorm = 0.3199, lr_0 = 1.9448e-04

 88%|████████▊ | 669/756 [00:19<00:02, 34.24it/s][A
 89%|████████▉ | 673/756 [00:19<00:02, 34.46it/s][ALoss = 1.5754e-01, PNorm = 94.1877, GNorm = 0.2832, lr_0 = 1.9427e-04

 90%|████████▉ | 677/756 [00:19<00:02, 34.43it/s][A
 90%|█████████ | 681/756 [00:20<00:02, 28.27it/s][A
 91%|█████████ | 685/756 [00:20<00:02, 29.93it/s][ALoss = 1.0369e-01, PNorm = 94.1963, GNorm = 0.1938, lr_0 = 1.9406e-04

 91%|█████████ | 689/756 [00:20<00:02, 31.17it/s][A
 92%|█████████▏| 693/756 [00:20<00:01, 32.20it/s][ALoss = 1.1996e-01, PNorm = 94.2049, GNorm = 0.2589, lr_0 = 1.9385e-04

 92%|█████████▏| 697/756 [00:20<00:01, 32.74it/s][A
 93%|█████████▎| 701/756 [00:20<00:01, 33.73it/s][A
 93%|█████████▎| 705/756 [00:20<00:01, 34.36it/s][ALoss = 1.2350e-01, PNorm = 94.2146, GNorm = 0.4728, lr_0 = 1.9364e-04

 94%|█████████

 88%|████████▊ | 664/756 [00:19<00:02, 33.25it/s][A
 88%|████████▊ | 668/756 [00:19<00:02, 33.48it/s][ALoss = 1.2579e-01, PNorm = 94.7855, GNorm = 0.1254, lr_0 = 1.7903e-04

 89%|████████▉ | 672/756 [00:19<00:02, 33.11it/s][A
 89%|████████▉ | 676/756 [00:19<00:02, 33.71it/s][ALoss = 1.6675e-01, PNorm = 94.7840, GNorm = 0.4099, lr_0 = 1.7884e-04

 90%|████████▉ | 680/756 [00:19<00:02, 33.84it/s][A
 90%|█████████ | 684/756 [00:20<00:02, 34.14it/s][A
 91%|█████████ | 688/756 [00:20<00:01, 34.18it/s][ALoss = 1.1960e-01, PNorm = 94.7893, GNorm = 0.2805, lr_0 = 1.7864e-04

 92%|█████████▏| 692/756 [00:20<00:01, 34.23it/s][A
 92%|█████████▏| 696/756 [00:20<00:01, 34.46it/s][ALoss = 7.2697e-02, PNorm = 94.8039, GNorm = 0.1661, lr_0 = 1.7845e-04

 93%|█████████▎| 700/756 [00:20<00:01, 34.69it/s][A
 93%|█████████▎| 704/756 [00:20<00:01, 34.43it/s][A
 94%|█████████▎| 708/756 [00:20<00:01, 34.70it/s][ALoss = 1.4119e-01, PNorm = 94.8160, GNorm = 0.5284, lr_0 = 1.7825e-04

 94%|█████████

 89%|████████▉ | 675/756 [00:21<00:02, 29.39it/s][A
 90%|████████▉ | 680/756 [00:21<00:02, 32.61it/s][A
 90%|█████████ | 684/756 [00:21<00:02, 32.26it/s][ALoss = 1.1044e-01, PNorm = 95.3078, GNorm = 0.2656, lr_0 = 1.6461e-04

 91%|█████████ | 688/756 [00:21<00:02, 33.80it/s][A
 92%|█████████▏| 692/756 [00:21<00:01, 33.02it/s][ALoss = 1.1461e-01, PNorm = 95.3202, GNorm = 0.1843, lr_0 = 1.6443e-04

 92%|█████████▏| 696/756 [00:21<00:01, 34.30it/s][A
 93%|█████████▎| 700/756 [00:22<00:01, 33.05it/s][A
 93%|█████████▎| 704/756 [00:22<00:01, 34.16it/s][ALoss = 1.2328e-01, PNorm = 95.3291, GNorm = 0.3861, lr_0 = 1.6425e-04

 94%|█████████▎| 708/756 [00:22<00:01, 32.70it/s][A
 94%|█████████▍| 712/756 [00:22<00:01, 33.71it/s][ALoss = 1.4910e-01, PNorm = 95.3336, GNorm = 0.4901, lr_0 = 1.6408e-04

 95%|█████████▍| 716/756 [00:22<00:01, 32.12it/s][A
 95%|█████████▌| 720/756 [00:22<00:01, 33.44it/s][A
 96%|█████████▌| 724/756 [00:22<00:00, 32.41it/s][ALoss = 1.0869e-01, PNorm = 95.33

 92%|█████████▏| 695/756 [00:21<00:01, 33.85it/s][ALoss = 9.7221e-02, PNorm = 95.7639, GNorm = 0.3965, lr_0 = 1.5137e-04

 92%|█████████▏| 699/756 [00:21<00:01, 32.75it/s][A
 93%|█████████▎| 703/756 [00:21<00:01, 34.34it/s][A
 94%|█████████▎| 707/756 [00:21<00:01, 33.11it/s][ALoss = 1.0575e-01, PNorm = 95.7703, GNorm = 0.3779, lr_0 = 1.5120e-04

 94%|█████████▍| 711/756 [00:21<00:01, 34.45it/s][A
 95%|█████████▍| 715/756 [00:22<00:01, 32.02it/s][ALoss = 1.2563e-01, PNorm = 95.7734, GNorm = 0.2653, lr_0 = 1.5104e-04

 95%|█████████▌| 719/756 [00:22<00:01, 33.63it/s][A
 96%|█████████▌| 723/756 [00:22<00:01, 32.80it/s][A
 96%|█████████▌| 727/756 [00:22<00:00, 34.54it/s][ALoss = 1.6693e-01, PNorm = 95.7727, GNorm = 0.4203, lr_0 = 1.5088e-04

 97%|█████████▋| 731/756 [00:22<00:00, 33.28it/s][A
 97%|█████████▋| 735/756 [00:22<00:00, 34.76it/s][ALoss = 1.2798e-01, PNorm = 95.7781, GNorm = 0.3254, lr_0 = 1.5071e-04

 98%|█████████▊| 739/756 [00:22<00:00, 33.22it/s][A
 98%|█████████

 93%|█████████▎| 701/756 [00:20<00:01, 34.89it/s][ALoss = 8.9283e-02, PNorm = 96.1714, GNorm = 0.2633, lr_0 = 1.3933e-04

 93%|█████████▎| 705/756 [00:20<00:01, 35.25it/s][A
 94%|█████████▍| 709/756 [00:20<00:01, 35.37it/s][A
 94%|█████████▍| 713/756 [00:20<00:01, 35.62it/s][ALoss = 1.2184e-01, PNorm = 96.1765, GNorm = 0.3632, lr_0 = 1.3918e-04

 95%|█████████▍| 717/756 [00:21<00:01, 35.02it/s][A
 95%|█████████▌| 721/756 [00:21<00:00, 35.44it/s][ALoss = 1.1397e-01, PNorm = 96.1792, GNorm = 0.1852, lr_0 = 1.3903e-04

 96%|█████████▌| 725/756 [00:21<00:00, 34.85it/s][A
 96%|█████████▋| 729/756 [00:21<00:00, 35.18it/s][A
 97%|█████████▋| 733/756 [00:21<00:00, 34.80it/s][ALoss = 1.5495e-01, PNorm = 96.1806, GNorm = 0.4628, lr_0 = 1.3887e-04

 97%|█████████▋| 737/756 [00:21<00:00, 34.80it/s][A
 98%|█████████▊| 741/756 [00:21<00:00, 34.60it/s][ALoss = 1.3132e-01, PNorm = 96.1831, GNorm = 0.3976, lr_0 = 1.3872e-04

 99%|█████████▊| 745/756 [00:21<00:00, 35.15it/s][A
 99%|█████████


 41%|████      | 309/756 [00:09<00:14, 31.49it/s][A
 41%|████▏     | 313/756 [00:09<00:15, 27.81it/s][A
 42%|████▏     | 317/756 [00:09<00:14, 30.32it/s][ALoss = 1.3600e-01, PNorm = 96.3599, GNorm = 0.3692, lr_0 = 1.3381e-04

 42%|████▏     | 321/756 [00:09<00:13, 31.35it/s][A
 43%|████▎     | 325/756 [00:09<00:12, 33.26it/s][ALoss = 1.3246e-01, PNorm = 96.3646, GNorm = 0.5605, lr_0 = 1.3366e-04

 44%|████▎     | 329/756 [00:10<00:12, 33.37it/s][A
 44%|████▍     | 333/756 [00:10<00:12, 34.64it/s][A
 45%|████▍     | 337/756 [00:10<00:12, 34.39it/s][ALoss = 9.4132e-02, PNorm = 96.3730, GNorm = 0.3905, lr_0 = 1.3352e-04

 45%|████▌     | 341/756 [00:10<00:11, 35.07it/s][A
 46%|████▌     | 345/756 [00:10<00:11, 34.74it/s][ALoss = 1.6688e-01, PNorm = 96.3751, GNorm = 0.1326, lr_0 = 1.3337e-04

 46%|████▌     | 349/756 [00:10<00:11, 35.31it/s][A
 47%|████▋     | 353/756 [00:10<00:11, 34.87it/s][A
 47%|████▋     | 357/756 [00:10<00:11, 35.74it/s][ALoss = 9.3625e-02, PNorm = 96.3

 42%|████▏     | 317/756 [00:09<00:12, 34.59it/s][A
 42%|████▏     | 321/756 [00:09<00:12, 35.25it/s][ALoss = 1.2311e-01, PNorm = 96.6821, GNorm = 0.1993, lr_0 = 1.2318e-04

 43%|████▎     | 325/756 [00:09<00:12, 34.45it/s][A
 44%|████▎     | 329/756 [00:09<00:11, 35.64it/s][ALoss = 1.3009e-01, PNorm = 96.6848, GNorm = 0.3597, lr_0 = 1.2305e-04

 44%|████▍     | 333/756 [00:09<00:12, 34.73it/s][A
 45%|████▍     | 337/756 [00:10<00:11, 35.26it/s][A
 45%|████▌     | 341/756 [00:10<00:11, 34.64it/s][ALoss = 9.7934e-02, PNorm = 96.6918, GNorm = 0.5135, lr_0 = 1.2291e-04

 46%|████▌     | 345/756 [00:10<00:11, 35.08it/s][A
 46%|████▌     | 349/756 [00:10<00:11, 35.01it/s][ALoss = 1.2864e-01, PNorm = 96.6957, GNorm = 0.3088, lr_0 = 1.2278e-04

 47%|████▋     | 353/756 [00:10<00:11, 35.71it/s][A
 47%|████▋     | 357/756 [00:10<00:11, 35.17it/s][A
 48%|████▊     | 361/756 [00:10<00:10, 36.23it/s][ALoss = 1.2783e-01, PNorm = 96.6992, GNorm = 0.5226, lr_0 = 1.2264e-04

 48%|████▊    


 46%|████▌     | 349/756 [00:10<00:12, 33.23it/s][A
 47%|████▋     | 354/756 [00:11<00:12, 33.20it/s][ALoss = 1.5578e-01, PNorm = 97.0003, GNorm = 0.2287, lr_0 = 1.1301e-04

 47%|████▋     | 359/756 [00:11<00:11, 35.32it/s][A
 48%|████▊     | 363/756 [00:11<00:11, 34.25it/s][ALoss = 1.1840e-01, PNorm = 97.0017, GNorm = 0.4369, lr_0 = 1.1289e-04

 49%|████▊     | 368/756 [00:11<00:10, 36.22it/s][A
 49%|████▉     | 372/756 [00:11<00:11, 34.07it/s][A
 50%|████▉     | 377/756 [00:11<00:10, 36.18it/s][ALoss = 9.5883e-02, PNorm = 97.0074, GNorm = 0.5936, lr_0 = 1.1277e-04

 50%|█████     | 381/756 [00:11<00:10, 34.62it/s][A
 51%|█████     | 386/756 [00:12<00:11, 33.55it/s][ALoss = 1.1561e-01, PNorm = 97.0139, GNorm = 0.3724, lr_0 = 1.1264e-04

 52%|█████▏    | 390/756 [00:12<00:10, 34.67it/s][A
 52%|█████▏    | 394/756 [00:12<00:10, 33.04it/s][ALoss = 1.4335e-01, PNorm = 97.0155, GNorm = 0.8625, lr_0 = 1.1252e-04

 53%|█████▎    | 398/756 [00:12<00:10, 34.61it/s][A
 53%|█████▎  

 50%|█████     | 381/756 [00:11<00:10, 35.35it/s][ALoss = 9.0329e-02, PNorm = 97.2641, GNorm = 0.6465, lr_0 = 1.0381e-04

 51%|█████     | 385/756 [00:11<00:10, 35.11it/s][A
 51%|█████▏    | 389/756 [00:11<00:10, 35.08it/s][ALoss = 1.4091e-01, PNorm = 97.2678, GNorm = 0.3403, lr_0 = 1.0369e-04

 52%|█████▏    | 393/756 [00:11<00:10, 34.94it/s][A
 53%|█████▎    | 397/756 [00:11<00:10, 35.57it/s][A
 53%|█████▎    | 401/756 [00:11<00:09, 35.61it/s][ALoss = 9.0224e-02, PNorm = 97.2712, GNorm = 0.2215, lr_0 = 1.0358e-04

 54%|█████▎    | 405/756 [00:12<00:09, 35.97it/s][A
 54%|█████▍    | 409/756 [00:12<00:09, 34.88it/s][ALoss = 1.1447e-01, PNorm = 97.2752, GNorm = 0.4935, lr_0 = 1.0347e-04

 55%|█████▍    | 413/756 [00:12<00:09, 35.06it/s][A
 55%|█████▌    | 417/756 [00:12<00:09, 35.46it/s][A
 56%|█████▌    | 421/756 [00:12<00:11, 30.19it/s][ALoss = 1.1942e-01, PNorm = 97.2778, GNorm = 0.1722, lr_0 = 1.0336e-04

 56%|█████▌    | 425/756 [00:12<00:10, 32.10it/s][A
 57%|█████▋   

 44%|████▎     | 330/756 [00:10<00:12, 32.88it/s][A
 44%|████▍     | 335/756 [00:10<00:11, 35.18it/s][A
 45%|████▍     | 339/756 [00:10<00:14, 28.17it/s][ALoss = 1.0170e-01, PNorm = 50.6808, GNorm = 0.1272, lr_0 = 3.0325e-04

 45%|████▌     | 343/756 [00:10<00:13, 30.70it/s][A
 46%|████▌     | 347/756 [00:10<00:13, 31.11it/s][ALoss = 1.7207e-01, PNorm = 50.6774, GNorm = 0.1264, lr_0 = 3.0921e-04

 46%|████▋     | 351/756 [00:10<00:12, 33.13it/s][A
 47%|████▋     | 355/756 [00:10<00:12, 32.74it/s][A
 47%|████▋     | 359/756 [00:10<00:11, 34.01it/s][ALoss = 1.5702e-01, PNorm = 50.6835, GNorm = 0.1593, lr_0 = 3.1517e-04

 48%|████▊     | 363/756 [00:11<00:11, 32.78it/s][A
 49%|████▊     | 367/756 [00:11<00:11, 34.04it/s][ALoss = 1.3195e-01, PNorm = 50.7006, GNorm = 0.4024, lr_0 = 3.2113e-04

 49%|████▉     | 371/756 [00:11<00:11, 32.40it/s][A
 50%|████▉     | 375/756 [00:11<00:11, 33.80it/s][A
 50%|█████     | 379/756 [00:11<00:11, 32.87it/s][ALoss = 1.4195e-01, PNorm = 50.69


 98%|█████████▊| 743/756 [00:22<00:00, 33.20it/s][A
 99%|█████████▉| 747/756 [00:22<00:00, 34.16it/s][ALoss = 1.8439e-01, PNorm = 51.0454, GNorm = 0.1917, lr_0 = 5.4762e-04

 99%|█████████▉| 751/756 [00:22<00:00, 35.09it/s][A
100%|█████████▉| 755/756 [00:22<00:00, 36.18it/s][A
                                                 [A
  0%|          | 0/95 [00:00<?, ?it/s][A
  1%|          | 1/95 [00:00<00:33,  2.78it/s][A
  5%|▌         | 5/95 [00:00<00:06, 13.27it/s][A
 13%|█▎        | 12/95 [00:00<00:02, 28.94it/s][A
 20%|██        | 19/95 [00:00<00:01, 40.34it/s][A
 27%|██▋       | 26/95 [00:00<00:01, 48.09it/s][A
 36%|███▌      | 34/95 [00:00<00:01, 55.83it/s][A
 44%|████▍     | 42/95 [00:01<00:00, 60.06it/s][A
 53%|█████▎    | 50/95 [00:01<00:00, 48.45it/s][A
 61%|██████    | 58/95 [00:01<00:00, 44.98it/s][A
 69%|██████▉   | 66/95 [00:01<00:00, 44.26it/s][A
 78%|███████▊  | 74/95 [00:01<00:00, 43.92it/s][A
 89%|████████▉ | 85/95 [00:01<00:00, 56.24it/s][A
            


 46%|████▌     | 347/756 [00:11<00:12, 32.61it/s][A
 46%|████▋     | 351/756 [00:11<00:12, 32.07it/s][ALoss = 1.3234e-01, PNorm = 51.9156, GNorm = 0.0990, lr_0 = 7.6278e-04

 47%|████▋     | 355/756 [00:11<00:12, 33.32it/s][A
 47%|████▋     | 359/756 [00:11<00:12, 32.54it/s][A
 48%|████▊     | 363/756 [00:11<00:11, 33.95it/s][ALoss = 1.8828e-01, PNorm = 51.9263, GNorm = 0.1429, lr_0 = 7.6874e-04

 49%|████▊     | 367/756 [00:11<00:11, 32.55it/s][A
 49%|████▉     | 371/756 [00:11<00:11, 33.76it/s][ALoss = 1.6106e-01, PNorm = 51.9915, GNorm = 0.1643, lr_0 = 7.7470e-04

 50%|████▉     | 375/756 [00:12<00:11, 32.30it/s][A
 50%|█████     | 379/756 [00:12<00:11, 33.65it/s][A
 51%|█████     | 383/756 [00:12<00:11, 32.99it/s][ALoss = 1.6271e-01, PNorm = 52.0068, GNorm = 0.2883, lr_0 = 7.8066e-04

 51%|█████     | 387/756 [00:12<00:10, 34.08it/s][A
 52%|█████▏    | 391/756 [00:12<00:11, 32.97it/s][ALoss = 1.4199e-01, PNorm = 52.0434, GNorm = 0.0851, lr_0 = 7.8662e-04

 52%|█████▏  


 47%|████▋     | 352/756 [00:10<00:11, 34.69it/s][A
 47%|████▋     | 356/756 [00:10<00:11, 33.86it/s][ALoss = 1.5573e-01, PNorm = 54.8016, GNorm = 0.0828, lr_0 = 9.6123e-04

 48%|████▊     | 360/756 [00:10<00:11, 34.88it/s][A
 48%|████▊     | 364/756 [00:10<00:11, 34.41it/s][A
 49%|████▊     | 368/756 [00:10<00:11, 34.99it/s][ALoss = 1.3843e-01, PNorm = 54.8601, GNorm = 0.0811, lr_0 = 9.6019e-04

 49%|████▉     | 372/756 [00:11<00:11, 34.22it/s][A
 50%|████▉     | 376/756 [00:11<00:10, 35.06it/s][ALoss = 1.5351e-01, PNorm = 54.8976, GNorm = 0.0797, lr_0 = 9.5914e-04

 50%|█████     | 380/756 [00:11<00:10, 34.31it/s][A
 51%|█████     | 384/756 [00:11<00:10, 35.43it/s][A
 51%|█████▏    | 388/756 [00:11<00:10, 34.67it/s][ALoss = 1.4140e-01, PNorm = 54.9289, GNorm = 0.1324, lr_0 = 9.5810e-04

 52%|█████▏    | 392/756 [00:11<00:10, 35.44it/s][A
 52%|█████▏    | 396/756 [00:11<00:10, 35.07it/s][ALoss = 1.6678e-01, PNorm = 54.9592, GNorm = 0.1004, lr_0 = 9.5705e-04

 53%|█████▎  


 48%|████▊     | 365/756 [00:11<00:12, 32.33it/s][A
 49%|████▉     | 369/756 [00:12<00:12, 31.59it/s][A
 49%|████▉     | 373/756 [00:12<00:11, 32.90it/s][ALoss = 1.6383e-01, PNorm = 58.4231, GNorm = 0.1058, lr_0 = 8.8390e-04

 50%|████▉     | 377/756 [00:12<00:12, 31.57it/s][A
 50%|█████     | 381/756 [00:12<00:11, 32.83it/s][ALoss = 1.4596e-01, PNorm = 58.4698, GNorm = 0.0876, lr_0 = 8.8294e-04

 51%|█████     | 385/756 [00:12<00:11, 31.50it/s][A
 51%|█████▏    | 389/756 [00:12<00:11, 32.62it/s][A
 52%|█████▏    | 393/756 [00:12<00:11, 31.32it/s][ALoss = 1.4684e-01, PNorm = 58.5094, GNorm = 0.0865, lr_0 = 8.8198e-04

 53%|█████▎    | 397/756 [00:12<00:11, 32.60it/s][A
 53%|█████▎    | 401/756 [00:13<00:11, 31.19it/s][ALoss = 1.5325e-01, PNorm = 58.5518, GNorm = 0.1981, lr_0 = 8.8102e-04

 54%|█████▎    | 405/756 [00:13<00:10, 32.17it/s][A
 54%|█████▍    | 409/756 [00:13<00:11, 31.12it/s][A
 55%|█████▍    | 413/756 [00:13<00:10, 32.70it/s][ALoss = 1.2958e-01, PNorm = 58.6

 50%|████▉     | 377/756 [00:11<00:11, 34.16it/s][ALoss = 1.2585e-01, PNorm = 61.5821, GNorm = 0.0766, lr_0 = 8.1359e-04

 50%|█████     | 381/756 [00:11<00:12, 30.57it/s][A
 51%|█████     | 385/756 [00:11<00:11, 32.36it/s][ALoss = 1.9631e-01, PNorm = 61.5943, GNorm = 0.1431, lr_0 = 8.1271e-04

 51%|█████▏    | 389/756 [00:11<00:11, 32.95it/s][A
 52%|█████▏    | 393/756 [00:11<00:11, 31.41it/s][A
 53%|█████▎    | 397/756 [00:11<00:10, 32.66it/s][ALoss = 1.5047e-01, PNorm = 61.6418, GNorm = 0.1170, lr_0 = 8.1182e-04

 53%|█████▎    | 401/756 [00:11<00:10, 33.82it/s][A
 54%|█████▎    | 405/756 [00:11<00:10, 34.53it/s][ALoss = 1.3557e-01, PNorm = 61.7279, GNorm = 0.0829, lr_0 = 8.1094e-04

 54%|█████▍    | 409/756 [00:12<00:09, 35.29it/s][A
 55%|█████▍    | 413/756 [00:12<00:09, 35.59it/s][A
 55%|█████▌    | 417/756 [00:12<00:09, 36.19it/s][ALoss = 1.9459e-01, PNorm = 61.7532, GNorm = 0.1093, lr_0 = 8.1006e-04

 56%|█████▌    | 421/756 [00:12<00:09, 36.13it/s][A
 56%|█████▌   


 51%|█████     | 385/756 [00:11<00:10, 35.63it/s][A
 51%|█████▏    | 389/756 [00:11<00:10, 35.64it/s][ALoss = 1.4868e-01, PNorm = 64.5597, GNorm = 0.1578, lr_0 = 7.4814e-04

 52%|█████▏    | 393/756 [00:11<00:10, 36.00it/s][A
 53%|█████▎    | 397/756 [00:11<00:10, 35.81it/s][A
 53%|█████▎    | 401/756 [00:11<00:09, 36.32it/s][ALoss = 1.4162e-01, PNorm = 64.5953, GNorm = 0.1209, lr_0 = 7.4733e-04

 54%|█████▎    | 405/756 [00:12<00:09, 35.85it/s][A
 54%|█████▍    | 409/756 [00:12<00:09, 36.20it/s][ALoss = 1.4905e-01, PNorm = 64.6414, GNorm = 0.2630, lr_0 = 7.4651e-04

 55%|█████▍    | 413/756 [00:12<00:09, 35.83it/s][A
 55%|█████▌    | 417/756 [00:12<00:09, 36.18it/s][A
 56%|█████▌    | 421/756 [00:12<00:09, 36.18it/s][ALoss = 1.1577e-01, PNorm = 64.6819, GNorm = 0.1868, lr_0 = 7.4570e-04

 56%|█████▌    | 425/756 [00:12<00:09, 36.59it/s][A
 57%|█████▋    | 429/756 [00:12<00:08, 36.73it/s][ALoss = 1.8111e-01, PNorm = 64.7098, GNorm = 0.1111, lr_0 = 7.4489e-04

 57%|█████▋  


 53%|█████▎    | 399/756 [00:11<00:10, 34.01it/s][A
 53%|█████▎    | 403/756 [00:11<00:10, 34.46it/s][A
 54%|█████▍    | 407/756 [00:11<00:10, 34.25it/s][ALoss = 1.3801e-01, PNorm = 67.9199, GNorm = 0.0949, lr_0 = 6.8788e-04

 54%|█████▍    | 411/756 [00:11<00:09, 34.85it/s][A
 55%|█████▍    | 415/756 [00:12<00:09, 34.86it/s][ALoss = 1.2125e-01, PNorm = 67.9526, GNorm = 0.0930, lr_0 = 6.8713e-04

 55%|█████▌    | 419/756 [00:12<00:09, 35.52it/s][A
 56%|█████▌    | 423/756 [00:12<00:09, 35.63it/s][A
 56%|█████▋    | 427/756 [00:12<00:09, 35.89it/s][ALoss = 1.4025e-01, PNorm = 67.9722, GNorm = 0.2359, lr_0 = 6.8638e-04

 57%|█████▋    | 431/756 [00:12<00:09, 35.34it/s][A
 58%|█████▊    | 435/756 [00:12<00:09, 35.61it/s][ALoss = 1.6879e-01, PNorm = 67.9772, GNorm = 0.1339, lr_0 = 6.8563e-04

 58%|█████▊    | 439/756 [00:12<00:09, 35.17it/s][A
 59%|█████▊    | 443/756 [00:12<00:08, 35.61it/s][A
 59%|█████▉    | 447/756 [00:12<00:08, 35.10it/s][ALoss = 1.4524e-01, PNorm = 68.0


  1%|          | 4/756 [00:00<01:09, 10.87it/s][A
  1%|          | 9/756 [00:00<00:34, 21.49it/s][ALoss = 1.3976e-01, PNorm = 69.4326, GNorm = 0.1521, lr_0 = 6.6143e-04

  2%|▏         | 12/756 [00:00<00:31, 23.60it/s][A
  2%|▏         | 16/756 [00:00<00:26, 28.30it/s][A
  3%|▎         | 20/756 [00:00<00:31, 23.29it/s][ALoss = 1.7841e-01, PNorm = 69.4706, GNorm = 0.0959, lr_0 = 6.6071e-04

  3%|▎         | 24/756 [00:01<00:27, 26.86it/s][A
  4%|▎         | 28/756 [00:01<00:26, 27.75it/s][ALoss = 1.4533e-01, PNorm = 69.5285, GNorm = 0.2160, lr_0 = 6.5999e-04

  4%|▍         | 32/756 [00:01<00:23, 30.34it/s][A
  5%|▍         | 36/756 [00:01<00:29, 24.39it/s][A
  5%|▌         | 40/756 [00:01<00:25, 27.71it/s][ALoss = 1.4422e-01, PNorm = 69.5765, GNorm = 0.1483, lr_0 = 6.5927e-04

  6%|▌         | 44/756 [00:01<00:25, 28.29it/s][A
  6%|▋         | 49/756 [00:01<00:22, 31.67it/s][ALoss = 1.2773e-01, PNorm = 69.6171, GNorm = 0.1960, lr_0 = 6.5855e-04

  7%|▋         | 53/756 [00


 55%|█████▍    | 414/756 [00:12<00:09, 35.52it/s][A
 55%|█████▌    | 418/756 [00:12<00:09, 36.38it/s][ALoss = 1.3949e-01, PNorm = 70.9831, GNorm = 0.1168, lr_0 = 6.3254e-04

 56%|█████▌    | 422/756 [00:12<00:09, 35.94it/s][A
 56%|█████▋    | 426/756 [00:12<00:09, 36.13it/s][A
 57%|█████▋    | 430/756 [00:12<00:10, 29.71it/s][ALoss = 1.5243e-01, PNorm = 71.0425, GNorm = 0.0813, lr_0 = 6.3185e-04

 57%|█████▋    | 434/756 [00:13<00:11, 27.61it/s][A
 58%|█████▊    | 438/756 [00:13<00:10, 29.29it/s][ALoss = 1.3088e-01, PNorm = 71.0891, GNorm = 0.1241, lr_0 = 6.3116e-04

 58%|█████▊    | 442/756 [00:13<00:10, 31.02it/s][A
 59%|█████▉    | 446/756 [00:13<00:09, 31.92it/s][A
 60%|█████▉    | 450/756 [00:13<00:09, 33.36it/s][ALoss = 1.2766e-01, PNorm = 71.1317, GNorm = 0.4976, lr_0 = 6.3048e-04

 60%|██████    | 454/756 [00:13<00:08, 33.57it/s][A
 61%|██████    | 458/756 [00:13<00:10, 28.50it/s][ALoss = 1.1476e-01, PNorm = 71.1705, GNorm = 0.1710, lr_0 = 6.2979e-04

 61%|██████  


 57%|█████▋    | 428/756 [00:12<00:08, 37.38it/s][A
 57%|█████▋    | 432/756 [00:12<00:08, 36.65it/s][A
 58%|█████▊    | 436/756 [00:12<00:08, 37.48it/s][ALoss = 1.7638e-01, PNorm = 74.3102, GNorm = 0.2510, lr_0 = 5.8159e-04

 58%|█████▊    | 440/756 [00:13<00:08, 36.01it/s][A
 59%|█████▊    | 444/756 [00:13<00:08, 36.70it/s][ALoss = 1.3760e-01, PNorm = 74.3667, GNorm = 0.2502, lr_0 = 5.8096e-04

 59%|█████▉    | 448/756 [00:13<00:08, 35.19it/s][A
 60%|█████▉    | 452/756 [00:13<00:08, 36.49it/s][A
 60%|██████    | 456/756 [00:13<00:08, 35.65it/s][ALoss = 1.3902e-01, PNorm = 74.4255, GNorm = 0.2837, lr_0 = 5.8032e-04

 61%|██████    | 460/756 [00:13<00:08, 36.56it/s][A
 61%|██████▏   | 464/756 [00:13<00:08, 35.60it/s][ALoss = 1.0865e-01, PNorm = 74.4789, GNorm = 0.1407, lr_0 = 5.7969e-04

 62%|██████▏   | 468/756 [00:13<00:07, 36.59it/s][A
 62%|██████▏   | 472/756 [00:13<00:07, 36.10it/s][A
 63%|██████▎   | 476/756 [00:14<00:07, 36.95it/s][ALoss = 1.1418e-01, PNorm = 74.5


  4%|▍         | 31/756 [00:01<00:21, 33.57it/s][A
  5%|▍         | 35/756 [00:01<00:21, 33.01it/s][A
  5%|▌         | 40/756 [00:01<00:20, 35.17it/s][ALoss = 1.5168e-01, PNorm = 75.5780, GNorm = 0.2038, lr_0 = 5.5923e-04

  6%|▌         | 44/756 [00:01<00:20, 34.49it/s][A
  6%|▋         | 49/756 [00:01<00:19, 36.56it/s][ALoss = 1.5007e-01, PNorm = 75.6124, GNorm = 0.1565, lr_0 = 5.5862e-04

  7%|▋         | 53/756 [00:01<00:20, 35.13it/s][A
  8%|▊         | 57/756 [00:01<00:19, 35.98it/s][ALoss = 1.5621e-01, PNorm = 75.6668, GNorm = 0.3139, lr_0 = 5.5801e-04

  8%|▊         | 61/756 [00:02<00:20, 34.49it/s][A
  9%|▊         | 65/756 [00:02<00:19, 35.74it/s][A
  9%|▉         | 69/756 [00:02<00:19, 34.59it/s][ALoss = 1.3772e-01, PNorm = 75.7011, GNorm = 0.1814, lr_0 = 5.5740e-04

 10%|▉         | 73/756 [00:02<00:19, 35.56it/s][A
 10%|█         | 77/756 [00:02<00:19, 34.01it/s][ALoss = 1.2019e-01, PNorm = 75.7318, GNorm = 0.1249, lr_0 = 5.5680e-04

 11%|█         | 81/756 [


 58%|█████▊    | 442/756 [00:13<00:08, 35.12it/s][A
 59%|█████▉    | 446/756 [00:13<00:08, 35.26it/s][A
 60%|█████▉    | 450/756 [00:13<00:08, 34.99it/s][ALoss = 1.1702e-01, PNorm = 76.9527, GNorm = 0.1608, lr_0 = 5.3480e-04

 60%|██████    | 454/756 [00:13<00:08, 34.63it/s][A
 61%|██████    | 458/756 [00:13<00:08, 34.57it/s][ALoss = 1.8541e-01, PNorm = 76.9958, GNorm = 0.4456, lr_0 = 5.3422e-04

 61%|██████    | 462/756 [00:13<00:08, 34.54it/s][A
 62%|██████▏   | 466/756 [00:13<00:08, 34.95it/s][A
 62%|██████▏   | 470/756 [00:13<00:08, 35.23it/s][ALoss = 1.4052e-01, PNorm = 77.0297, GNorm = 0.1495, lr_0 = 5.3364e-04

 63%|██████▎   | 474/756 [00:14<00:08, 35.14it/s][A
 63%|██████▎   | 478/756 [00:14<00:07, 35.59it/s][ALoss = 1.5033e-01, PNorm = 77.0716, GNorm = 0.1699, lr_0 = 5.3306e-04

 64%|██████▍   | 482/756 [00:14<00:07, 35.41it/s][A
 64%|██████▍   | 486/756 [00:14<00:07, 35.21it/s][A
 65%|██████▍   | 490/756 [00:14<00:07, 35.57it/s][ALoss = 1.3724e-01, PNorm = 77.1


  6%|▌         | 47/756 [00:01<00:20, 34.40it/s][A
  7%|▋         | 51/756 [00:01<00:21, 33.54it/s][ALoss = 1.2134e-01, PNorm = 78.1460, GNorm = 0.1890, lr_0 = 5.1418e-04

  7%|▋         | 56/756 [00:01<00:19, 35.61it/s][A
  8%|▊         | 60/756 [00:02<00:20, 34.59it/s][A
  9%|▊         | 65/756 [00:02<00:18, 36.40it/s][ALoss = 1.7402e-01, PNorm = 78.1585, GNorm = 0.0973, lr_0 = 5.1362e-04

  9%|▉         | 69/756 [00:02<00:19, 34.71it/s][A
 10%|▉         | 74/756 [00:02<00:19, 35.79it/s][ALoss = 1.6838e-01, PNorm = 78.1864, GNorm = 0.1740, lr_0 = 5.1306e-04

 10%|█         | 78/756 [00:02<00:19, 34.91it/s][A
 11%|█         | 82/756 [00:02<00:20, 33.44it/s][ALoss = 1.5162e-01, PNorm = 78.2325, GNorm = 0.1645, lr_0 = 5.1250e-04

 11%|█▏        | 86/756 [00:02<00:20, 32.37it/s][A
 12%|█▏        | 90/756 [00:02<00:19, 33.70it/s][A
 12%|█▏        | 94/756 [00:03<00:23, 28.41it/s][ALoss = 1.4480e-01, PNorm = 78.2718, GNorm = 0.1089, lr_0 = 5.1195e-04

 13%|█▎        | 98/756 [

  7%|▋         | 56/756 [00:02<00:24, 28.76it/s][ALoss = 1.1864e-01, PNorm = 80.5932, GNorm = 0.2778, lr_0 = 4.7333e-04

  8%|▊         | 60/756 [00:02<00:25, 27.51it/s][A
  9%|▊         | 65/756 [00:02<00:22, 31.32it/s][A
  9%|▉         | 69/756 [00:02<00:24, 27.89it/s][ALoss = 1.3688e-01, PNorm = 80.6135, GNorm = 0.2201, lr_0 = 4.7282e-04

 10%|▉         | 74/756 [00:02<00:23, 28.87it/s][A
 10%|█         | 79/756 [00:02<00:21, 32.22it/s][ALoss = 1.1152e-01, PNorm = 80.6461, GNorm = 0.1283, lr_0 = 4.7230e-04

 11%|█         | 83/756 [00:03<00:21, 31.79it/s][A
 12%|█▏        | 87/756 [00:03<00:19, 33.53it/s][ALoss = 1.4778e-01, PNorm = 80.6718, GNorm = 0.0858, lr_0 = 4.7179e-04

 12%|█▏        | 91/756 [00:03<00:20, 32.29it/s][A
 13%|█▎        | 95/756 [00:03<00:19, 33.80it/s][A
 13%|█▎        | 99/756 [00:03<00:20, 32.71it/s][ALoss = 1.3295e-01, PNorm = 80.7033, GNorm = 0.1264, lr_0 = 4.7127e-04

 14%|█▎        | 103/756 [00:03<00:19, 33.95it/s][A
 14%|█▍        | 107/756 

 10%|█         | 76/756 [00:02<00:19, 34.13it/s][A
 11%|█         | 81/756 [00:02<00:18, 36.24it/s][ALoss = 1.3633e-01, PNorm = 83.0774, GNorm = 0.1941, lr_0 = 4.3473e-04

 11%|█         | 85/756 [00:02<00:19, 35.21it/s][A
 12%|█▏        | 89/756 [00:02<00:18, 36.44it/s][A
 12%|█▏        | 93/756 [00:03<00:18, 35.37it/s][ALoss = 1.6434e-01, PNorm = 83.1051, GNorm = 0.3553, lr_0 = 4.3426e-04

 13%|█▎        | 97/756 [00:03<00:18, 36.51it/s][A
 13%|█▎        | 101/756 [00:03<00:18, 35.26it/s][ALoss = 1.5501e-01, PNorm = 83.1379, GNorm = 0.1785, lr_0 = 4.3379e-04

 14%|█▍        | 105/756 [00:03<00:17, 36.24it/s][A
 14%|█▍        | 109/756 [00:03<00:18, 35.06it/s][A
 15%|█▌        | 114/756 [00:03<00:18, 35.21it/s][ALoss = 1.3670e-01, PNorm = 83.1865, GNorm = 0.3452, lr_0 = 4.3331e-04

 16%|█▌        | 118/756 [00:03<00:17, 36.12it/s][A
 16%|█▌        | 122/756 [00:03<00:17, 35.49it/s][ALoss = 1.2474e-01, PNorm = 83.2249, GNorm = 0.2102, lr_0 = 4.3284e-04

 17%|█▋        | 126

 11%|█▏        | 86/756 [00:02<00:19, 34.89it/s][ALoss = 1.0501e-01, PNorm = 84.9997, GNorm = 0.1803, lr_0 = 4.0015e-04

 12%|█▏        | 90/756 [00:02<00:18, 35.34it/s][A
 12%|█▏        | 94/756 [00:03<00:18, 34.90it/s][A
 13%|█▎        | 98/756 [00:03<00:18, 35.34it/s][ALoss = 1.0916e-01, PNorm = 85.0206, GNorm = 0.1888, lr_0 = 3.9971e-04

 13%|█▎        | 102/756 [00:03<00:18, 34.50it/s][A
 14%|█▍        | 106/756 [00:03<00:18, 35.34it/s][ALoss = 1.3203e-01, PNorm = 85.0333, GNorm = 0.1397, lr_0 = 3.9928e-04

 15%|█▍        | 110/756 [00:03<00:18, 34.49it/s][A
 15%|█▌        | 114/756 [00:03<00:18, 35.34it/s][A
 16%|█▌        | 118/756 [00:03<00:18, 34.57it/s][ALoss = 1.4267e-01, PNorm = 85.0466, GNorm = 0.1852, lr_0 = 3.9885e-04

 16%|█▌        | 122/756 [00:03<00:17, 35.34it/s][A
 17%|█▋        | 126/756 [00:03<00:18, 34.43it/s][ALoss = 1.0303e-01, PNorm = 85.0690, GNorm = 0.0921, lr_0 = 3.9841e-04

 17%|█▋        | 130/756 [00:04<00:21, 29.29it/s][A
 18%|█▊        | 1


 14%|█▍        | 104/756 [00:03<00:20, 31.70it/s][A
 14%|█▍        | 108/756 [00:03<00:21, 29.81it/s][A
 15%|█▍        | 113/756 [00:03<00:19, 32.98it/s][ALoss = 1.4796e-01, PNorm = 86.7895, GNorm = 0.1799, lr_0 = 3.6756e-04

 15%|█▌        | 117/756 [00:03<00:22, 27.93it/s][A
 16%|█▌        | 122/756 [00:04<00:21, 29.18it/s][ALoss = 1.1309e-01, PNorm = 86.8170, GNorm = 0.2300, lr_0 = 3.6716e-04

 17%|█▋        | 126/756 [00:04<00:20, 31.42it/s][A
 17%|█▋        | 130/756 [00:04<00:21, 29.68it/s][ALoss = 1.0498e-01, PNorm = 86.8497, GNorm = 0.1005, lr_0 = 3.6676e-04

 18%|█▊        | 135/756 [00:04<00:19, 32.51it/s][A
 18%|█▊        | 139/756 [00:04<00:20, 29.92it/s][ALoss = 1.2208e-01, PNorm = 86.8759, GNorm = 0.3962, lr_0 = 3.6636e-04

 19%|█▉        | 144/756 [00:04<00:18, 32.78it/s][A
 20%|█▉        | 148/756 [00:04<00:19, 31.21it/s][A
 20%|██        | 153/756 [00:05<00:17, 33.70it/s][ALoss = 1.4681e-01, PNorm = 86.8963, GNorm = 0.4046, lr_0 = 3.6596e-04

 21%|██      


 71%|███████   | 538/756 [00:16<00:07, 29.71it/s][A
 72%|███████▏  | 543/756 [00:16<00:06, 32.78it/s][ALoss = 1.4018e-01, PNorm = 87.7695, GNorm = 0.3034, lr_0 = 3.5074e-04

 72%|███████▏  | 547/756 [00:16<00:06, 32.30it/s][A
 73%|███████▎  | 552/756 [00:16<00:05, 34.65it/s][ALoss = 1.2568e-01, PNorm = 87.7891, GNorm = 0.2440, lr_0 = 3.5036e-04

 74%|███████▎  | 556/756 [00:16<00:07, 27.70it/s][A
 74%|███████▍  | 560/756 [00:17<00:06, 30.07it/s][ALoss = 9.6698e-02, PNorm = 87.8173, GNorm = 0.1986, lr_0 = 3.4998e-04

 75%|███████▍  | 564/756 [00:17<00:06, 29.95it/s][A
 75%|███████▌  | 568/756 [00:17<00:05, 32.13it/s][A
 76%|███████▌  | 572/756 [00:17<00:05, 31.33it/s][ALoss = 1.0237e-01, PNorm = 87.8441, GNorm = 0.0965, lr_0 = 3.4960e-04

 76%|███████▌  | 576/756 [00:17<00:05, 33.46it/s][A
 77%|███████▋  | 580/756 [00:17<00:05, 32.80it/s][ALoss = 9.2613e-02, PNorm = 87.8654, GNorm = 0.1557, lr_0 = 3.4922e-04

 77%|███████▋  | 585/756 [00:17<00:04, 35.24it/s][A
 78%|███████▊

 78%|███████▊  | 591/756 [00:17<00:04, 36.83it/s][A
 79%|███████▊  | 595/756 [00:18<00:04, 35.48it/s][ALoss = 1.3055e-01, PNorm = 89.3322, GNorm = 0.3163, lr_0 = 3.2109e-04

 79%|███████▉  | 600/756 [00:18<00:04, 37.20it/s][A
 80%|███████▉  | 604/756 [00:18<00:04, 35.84it/s][ALoss = 1.2727e-01, PNorm = 89.3559, GNorm = 0.1897, lr_0 = 3.2074e-04

 81%|████████  | 609/756 [00:18<00:03, 37.59it/s][A
 81%|████████  | 613/756 [00:18<00:03, 36.44it/s][A
 82%|████████▏ | 618/756 [00:18<00:03, 35.45it/s][ALoss = 1.2835e-01, PNorm = 89.3783, GNorm = 0.2638, lr_0 = 3.2039e-04

 82%|████████▏ | 623/756 [00:18<00:03, 37.40it/s][A
 83%|████████▎ | 627/756 [00:18<00:03, 35.78it/s][ALoss = 1.2289e-01, PNorm = 89.3972, GNorm = 0.2243, lr_0 = 3.2004e-04

 84%|████████▎ | 632/756 [00:19<00:03, 37.45it/s][A
 84%|████████▍ | 636/756 [00:19<00:03, 36.11it/s][ALoss = 1.0282e-01, PNorm = 89.4130, GNorm = 0.1294, lr_0 = 3.1969e-04

 85%|████████▍ | 641/756 [00:19<00:03, 37.65it/s][A
 85%|████████▌


 81%|████████  | 613/756 [00:18<00:04, 32.79it/s][A
 82%|████████▏ | 617/756 [00:18<00:04, 34.06it/s][A
 82%|████████▏ | 621/756 [00:18<00:04, 28.36it/s][ALoss = 1.5083e-01, PNorm = 90.6244, GNorm = 0.3138, lr_0 = 2.9493e-04

 83%|████████▎ | 625/756 [00:18<00:04, 30.38it/s][A
 83%|████████▎ | 629/756 [00:18<00:04, 31.17it/s][ALoss = 1.1900e-01, PNorm = 90.6448, GNorm = 0.4040, lr_0 = 2.9461e-04

 84%|████████▎ | 633/756 [00:18<00:03, 32.59it/s][A
 84%|████████▍ | 637/756 [00:18<00:03, 32.56it/s][A
 85%|████████▍ | 641/756 [00:19<00:03, 33.39it/s][ALoss = 1.0363e-01, PNorm = 90.6665, GNorm = 0.2001, lr_0 = 2.9429e-04

 85%|████████▌ | 645/756 [00:19<00:03, 32.37it/s][A
 86%|████████▌ | 649/756 [00:19<00:03, 33.87it/s][ALoss = 1.3277e-01, PNorm = 90.6755, GNorm = 0.1807, lr_0 = 2.9397e-04

 86%|████████▋ | 653/756 [00:19<00:03, 33.72it/s][A
 87%|████████▋ | 657/756 [00:19<00:02, 34.45it/s][A
 87%|████████▋ | 661/756 [00:19<00:02, 34.13it/s][ALoss = 1.1385e-01, PNorm = 90.6

 82%|████████▏ | 622/756 [00:18<00:03, 35.39it/s][A
 83%|████████▎ | 626/756 [00:18<00:03, 35.44it/s][ALoss = 1.1150e-01, PNorm = 91.8120, GNorm = 0.4329, lr_0 = 2.7147e-04

 83%|████████▎ | 630/756 [00:18<00:03, 34.82it/s][A
 84%|████████▍ | 634/756 [00:18<00:03, 34.80it/s][ALoss = 1.3135e-01, PNorm = 91.8267, GNorm = 0.1163, lr_0 = 2.7118e-04

 84%|████████▍ | 638/756 [00:18<00:03, 35.24it/s][A
 85%|████████▍ | 642/756 [00:18<00:03, 35.56it/s][A
 85%|████████▌ | 646/756 [00:18<00:03, 35.70it/s][ALoss = 1.4857e-01, PNorm = 91.8362, GNorm = 0.4513, lr_0 = 2.7088e-04

 86%|████████▌ | 650/756 [00:18<00:02, 35.69it/s][A
 87%|████████▋ | 654/756 [00:19<00:02, 36.08it/s][ALoss = 1.2657e-01, PNorm = 91.8495, GNorm = 0.1931, lr_0 = 2.7059e-04

 87%|████████▋ | 658/756 [00:19<00:02, 36.04it/s][A
 88%|████████▊ | 662/756 [00:19<00:02, 36.18it/s][A
 88%|████████▊ | 666/756 [00:19<00:02, 36.25it/s][ALoss = 1.2636e-01, PNorm = 91.8637, GNorm = 0.1820, lr_0 = 2.7029e-04

 89%|████████▊

 83%|████████▎ | 629/756 [00:18<00:03, 35.32it/s][ALoss = 1.2663e-01, PNorm = 92.7198, GNorm = 0.3290, lr_0 = 2.4991e-04

 84%|████████▎ | 633/756 [00:18<00:03, 35.98it/s][A
 84%|████████▍ | 637/756 [00:18<00:03, 35.77it/s][A
 85%|████████▍ | 641/756 [00:18<00:03, 35.97it/s][ALoss = 1.6945e-01, PNorm = 92.7305, GNorm = 0.2506, lr_0 = 2.4963e-04

 85%|████████▌ | 645/756 [00:18<00:03, 35.62it/s][A
 86%|████████▌ | 649/756 [00:18<00:02, 36.05it/s][ALoss = 1.5613e-01, PNorm = 92.7450, GNorm = 0.3966, lr_0 = 2.4936e-04

 86%|████████▋ | 653/756 [00:18<00:02, 35.47it/s][A
 87%|████████▋ | 657/756 [00:19<00:02, 36.09it/s][A
 87%|████████▋ | 661/756 [00:19<00:02, 35.82it/s][ALoss = 1.2884e-01, PNorm = 92.7577, GNorm = 0.3729, lr_0 = 2.4909e-04

 88%|████████▊ | 665/756 [00:19<00:02, 36.32it/s][A
 88%|████████▊ | 669/756 [00:19<00:02, 36.05it/s][ALoss = 1.3395e-01, PNorm = 92.7666, GNorm = 0.2297, lr_0 = 2.4882e-04

 89%|████████▉ | 673/756 [00:19<00:02, 36.34it/s][A
 90%|████████▉

 84%|████████▎ | 632/756 [00:18<00:03, 36.47it/s][A
 84%|████████▍ | 636/756 [00:18<00:03, 36.74it/s][ALoss = 1.2984e-01, PNorm = 93.6098, GNorm = 0.6274, lr_0 = 2.3003e-04

 85%|████████▍ | 640/756 [00:18<00:03, 36.17it/s][A
 85%|████████▌ | 644/756 [00:18<00:03, 36.54it/s][ALoss = 1.5370e-01, PNorm = 93.6194, GNorm = 0.2418, lr_0 = 2.2978e-04

 86%|████████▌ | 648/756 [00:18<00:03, 35.68it/s][A
 86%|████████▌ | 652/756 [00:18<00:02, 35.49it/s][A
 87%|████████▋ | 656/756 [00:18<00:02, 35.48it/s][ALoss = 1.5757e-01, PNorm = 93.6297, GNorm = 0.2150, lr_0 = 2.2953e-04

 87%|████████▋ | 660/756 [00:18<00:02, 35.33it/s][A
 88%|████████▊ | 664/756 [00:19<00:02, 35.15it/s][ALoss = 1.2648e-01, PNorm = 93.6453, GNorm = 0.3400, lr_0 = 2.2928e-04

 88%|████████▊ | 668/756 [00:19<00:02, 35.28it/s][A
 89%|████████▉ | 672/756 [00:19<00:02, 35.41it/s][A
 89%|████████▉ | 676/756 [00:19<00:02, 33.57it/s][ALoss = 1.3414e-01, PNorm = 93.6526, GNorm = 0.4861, lr_0 = 2.2903e-04

 90%|████████▉


 33%|███▎      | 253/756 [00:07<00:14, 33.96it/s][A
 34%|███▍      | 257/756 [00:07<00:17, 27.88it/s][ALoss = 1.2439e-01, PNorm = 93.9855, GNorm = 0.4793, lr_0 = 2.2070e-04

 35%|███▍      | 262/756 [00:08<00:15, 31.89it/s][A
 35%|███▌      | 266/756 [00:08<00:15, 32.22it/s][ALoss = 1.3427e-01, PNorm = 93.9916, GNorm = 0.2530, lr_0 = 2.2046e-04

 36%|███▌      | 271/756 [00:08<00:14, 34.39it/s][A
 36%|███▋      | 275/756 [00:08<00:14, 33.73it/s][A
 37%|███▋      | 279/756 [00:08<00:13, 35.09it/s][ALoss = 1.0222e-01, PNorm = 94.0010, GNorm = 0.2117, lr_0 = 2.2022e-04

 37%|███▋      | 283/756 [00:08<00:14, 33.54it/s][A
 38%|███▊      | 287/756 [00:08<00:13, 35.13it/s][ALoss = 9.0699e-02, PNorm = 94.0167, GNorm = 0.2433, lr_0 = 2.1998e-04

 38%|███▊      | 291/756 [00:08<00:13, 34.09it/s][A
 39%|███▉      | 296/756 [00:08<00:12, 36.21it/s][A
 40%|███▉      | 300/756 [00:09<00:12, 35.29it/s][ALoss = 1.2270e-01, PNorm = 94.0290, GNorm = 0.2300, lr_0 = 2.1974e-04

 40%|████    


 89%|████████▉ | 672/756 [00:19<00:02, 35.77it/s][A
 89%|████████▉ | 676/756 [00:19<00:02, 30.72it/s][A
 90%|████████▉ | 680/756 [00:20<00:02, 32.77it/s][ALoss = 1.1997e-01, PNorm = 94.3753, GNorm = 0.3666, lr_0 = 2.1083e-04

 90%|█████████ | 684/756 [00:20<00:02, 30.94it/s][A
 91%|█████████ | 688/756 [00:20<00:02, 32.77it/s][ALoss = 1.3331e-01, PNorm = 94.3786, GNorm = 0.4919, lr_0 = 2.1060e-04

 92%|█████████▏| 692/756 [00:20<00:01, 32.77it/s][A
 92%|█████████▏| 696/756 [00:20<00:01, 34.20it/s][A
 93%|█████████▎| 700/756 [00:20<00:01, 28.17it/s][ALoss = 1.3398e-01, PNorm = 94.3874, GNorm = 0.1576, lr_0 = 2.1037e-04

 93%|█████████▎| 704/756 [00:20<00:01, 30.11it/s][A
 94%|█████████▎| 708/756 [00:21<00:01, 30.70it/s][ALoss = 1.1152e-01, PNorm = 94.3997, GNorm = 0.3161, lr_0 = 2.1014e-04

 94%|█████████▍| 712/756 [00:21<00:01, 32.47it/s][A
 95%|█████████▍| 716/756 [00:21<00:01, 32.88it/s][A
 95%|█████████▌| 720/756 [00:21<00:01, 34.70it/s][ALoss = 1.0506e-01, PNorm = 94.4

 90%|████████▉ | 680/756 [00:20<00:02, 35.79it/s][A
 90%|█████████ | 684/756 [00:20<00:02, 34.99it/s][ALoss = 1.1307e-01, PNorm = 95.0488, GNorm = 0.4947, lr_0 = 1.9406e-04

 91%|█████████ | 688/756 [00:20<00:01, 35.60it/s][A
 92%|█████████▏| 692/756 [00:20<00:01, 34.61it/s][ALoss = 1.2283e-01, PNorm = 95.0570, GNorm = 0.3632, lr_0 = 1.9385e-04

 92%|█████████▏| 696/756 [00:20<00:01, 35.32it/s][A
 93%|█████████▎| 700/756 [00:20<00:01, 34.63it/s][A
 93%|█████████▎| 704/756 [00:20<00:01, 35.46it/s][ALoss = 1.1026e-01, PNorm = 95.0651, GNorm = 0.3674, lr_0 = 1.9364e-04

 94%|█████████▎| 708/756 [00:20<00:01, 34.44it/s][A
 94%|█████████▍| 712/756 [00:20<00:01, 35.34it/s][ALoss = 1.0301e-01, PNorm = 95.0752, GNorm = 0.1678, lr_0 = 1.9343e-04

 95%|█████████▍| 716/756 [00:21<00:01, 34.95it/s][A
 95%|█████████▌| 720/756 [00:21<00:01, 35.99it/s][A
 96%|█████████▌| 724/756 [00:21<00:00, 34.98it/s][ALoss = 7.9600e-02, PNorm = 95.0866, GNorm = 0.1139, lr_0 = 1.9322e-04

 96%|█████████

 91%|█████████ | 689/756 [00:20<00:01, 36.21it/s][ALoss = 1.3423e-01, PNorm = 95.6610, GNorm = 0.2200, lr_0 = 1.7864e-04

 92%|█████████▏| 693/756 [00:20<00:01, 35.79it/s][A
 92%|█████████▏| 697/756 [00:20<00:01, 36.26it/s][ALoss = 9.3708e-02, PNorm = 95.6689, GNorm = 0.2014, lr_0 = 1.7845e-04

 93%|█████████▎| 701/756 [00:20<00:01, 36.06it/s][A
 93%|█████████▎| 705/756 [00:20<00:01, 36.47it/s][A
 94%|█████████▍| 709/756 [00:20<00:01, 36.30it/s][ALoss = 1.2970e-01, PNorm = 95.6738, GNorm = 0.2702, lr_0 = 1.7825e-04

 94%|█████████▍| 713/756 [00:20<00:01, 36.46it/s][A
 95%|█████████▍| 717/756 [00:21<00:01, 36.13it/s][ALoss = 1.0635e-01, PNorm = 95.6784, GNorm = 0.2863, lr_0 = 1.7806e-04

 95%|█████████▌| 721/756 [00:21<00:00, 35.57it/s][A
 96%|█████████▌| 725/756 [00:21<00:00, 35.35it/s][A
 96%|█████████▋| 729/756 [00:21<00:00, 35.47it/s][ALoss = 1.5011e-01, PNorm = 95.6798, GNorm = 0.2330, lr_0 = 1.7787e-04

 97%|█████████▋| 733/756 [00:21<00:00, 34.90it/s][A
 97%|█████████

 93%|█████████▎| 701/756 [00:22<00:01, 34.62it/s][ALoss = 1.2338e-01, PNorm = 96.1857, GNorm = 0.3071, lr_0 = 1.6425e-04

 93%|█████████▎| 705/756 [00:22<00:01, 33.23it/s][A
 94%|█████████▍| 709/756 [00:22<00:01, 34.23it/s][A
 94%|█████████▍| 713/756 [00:22<00:01, 33.04it/s][ALoss = 1.1396e-01, PNorm = 96.1967, GNorm = 0.3811, lr_0 = 1.6408e-04

 95%|█████████▍| 717/756 [00:22<00:01, 33.84it/s][A
 95%|█████████▌| 721/756 [00:22<00:01, 32.41it/s][ALoss = 1.3753e-01, PNorm = 96.2018, GNorm = 0.4503, lr_0 = 1.6390e-04

 96%|█████████▌| 725/756 [00:22<00:00, 33.63it/s][A
 96%|█████████▋| 729/756 [00:22<00:00, 32.97it/s][A
 97%|█████████▋| 733/756 [00:23<00:00, 34.35it/s][ALoss = 1.2259e-01, PNorm = 96.2090, GNorm = 0.4329, lr_0 = 1.6372e-04

 97%|█████████▋| 737/756 [00:23<00:00, 28.46it/s][A
 98%|█████████▊| 741/756 [00:23<00:00, 30.66it/s][ALoss = 9.1779e-02, PNorm = 96.2194, GNorm = 0.3102, lr_0 = 1.6354e-04

 99%|█████████▊| 745/756 [00:23<00:00, 32.41it/s][A
 99%|█████████

 93%|█████████▎| 705/756 [00:23<00:01, 30.88it/s][ALoss = 1.0017e-01, PNorm = 96.6471, GNorm = 0.2650, lr_0 = 1.5120e-04

 94%|█████████▍| 709/756 [00:23<00:01, 32.54it/s][A
 94%|█████████▍| 713/756 [00:23<00:01, 31.59it/s][A
 95%|█████████▍| 717/756 [00:23<00:01, 33.11it/s][ALoss = 1.0559e-01, PNorm = 96.6543, GNorm = 0.3618, lr_0 = 1.5104e-04

 95%|█████████▌| 721/756 [00:23<00:01, 31.71it/s][A
 96%|█████████▌| 725/756 [00:23<00:00, 32.55it/s][ALoss = 1.0008e-01, PNorm = 96.6580, GNorm = 0.1163, lr_0 = 1.5088e-04

 96%|█████████▋| 729/756 [00:23<00:00, 31.18it/s][A
 97%|█████████▋| 733/756 [00:23<00:00, 32.14it/s][A
 97%|█████████▋| 737/756 [00:24<00:00, 25.74it/s][ALoss = 1.1875e-01, PNorm = 96.6613, GNorm = 0.7487, lr_0 = 1.5071e-04

 98%|█████████▊| 741/756 [00:24<00:00, 28.14it/s][A
 99%|█████████▊| 745/756 [00:24<00:00, 29.57it/s][ALoss = 7.6206e-02, PNorm = 96.6652, GNorm = 0.2743, lr_0 = 1.5055e-04

 99%|█████████▉| 749/756 [00:24<00:00, 31.20it/s][A
100%|█████████

 94%|█████████▍| 713/756 [00:21<00:01, 35.03it/s][ALoss = 1.5345e-01, PNorm = 97.0460, GNorm = 0.4808, lr_0 = 1.3918e-04

 95%|█████████▍| 717/756 [00:21<00:01, 29.15it/s][A
 95%|█████████▌| 721/756 [00:21<00:01, 31.12it/s][ALoss = 1.4705e-01, PNorm = 97.0453, GNorm = 0.5203, lr_0 = 1.3903e-04

 96%|█████████▌| 725/756 [00:21<00:00, 31.24it/s][A
 96%|█████████▋| 729/756 [00:21<00:00, 32.91it/s][A
 97%|█████████▋| 733/756 [00:21<00:00, 32.72it/s][ALoss = 8.1232e-02, PNorm = 97.0492, GNorm = 0.2652, lr_0 = 1.3887e-04

 97%|█████████▋| 737/756 [00:22<00:00, 33.67it/s][A
 98%|█████████▊| 741/756 [00:22<00:00, 26.54it/s][ALoss = 1.3917e-01, PNorm = 97.0571, GNorm = 0.3089, lr_0 = 1.3872e-04

 99%|█████████▊| 746/756 [00:22<00:00, 29.69it/s][A
 99%|█████████▉| 751/756 [00:22<00:00, 32.93it/s][ALoss = 1.2676e-01, PNorm = 97.0629, GNorm = 0.6174, lr_0 = 1.3857e-04

100%|█████████▉| 755/756 [00:22<00:00, 33.82it/s][A
                                                 [A
  0%|         


 95%|█████████▌| 721/756 [00:20<00:01, 34.70it/s][A
 96%|█████████▌| 725/756 [00:20<00:00, 35.37it/s][ALoss = 9.6619e-02, PNorm = 97.3824, GNorm = 0.3955, lr_0 = 1.2797e-04

 96%|█████████▋| 729/756 [00:20<00:00, 35.61it/s][A
 97%|█████████▋| 733/756 [00:21<00:00, 35.91it/s][A
 97%|█████████▋| 737/756 [00:21<00:00, 36.13it/s][ALoss = 8.6459e-02, PNorm = 97.3880, GNorm = 0.3191, lr_0 = 1.2783e-04

 98%|█████████▊| 741/756 [00:21<00:00, 35.93it/s][A
 99%|█████████▊| 745/756 [00:21<00:00, 36.29it/s][ALoss = 8.3833e-02, PNorm = 97.3942, GNorm = 0.3382, lr_0 = 1.2769e-04

 99%|█████████▉| 749/756 [00:21<00:00, 36.03it/s][A
100%|█████████▉| 753/756 [00:21<00:00, 36.16it/s][A
                                                 [A
  0%|          | 0/95 [00:00<?, ?it/s][A
  1%|          | 1/95 [00:00<00:36,  2.56it/s][A
  9%|▉         | 9/95 [00:00<00:03, 21.62it/s][A
 18%|█▊        | 17/95 [00:00<00:02, 35.89it/s][A
 26%|██▋       | 25/95 [00:00<00:01, 44.12it/s][A
 35%|███▍      

 96%|█████████▋| 729/756 [00:21<00:00, 34.97it/s][ALoss = 1.1833e-01, PNorm = 97.6803, GNorm = 0.4674, lr_0 = 1.1780e-04

 97%|█████████▋| 733/756 [00:21<00:00, 34.79it/s][A
 97%|█████████▋| 737/756 [00:22<00:00, 35.19it/s][A
 98%|█████████▊| 741/756 [00:22<00:00, 34.79it/s][ALoss = 1.2559e-01, PNorm = 97.6809, GNorm = 0.3492, lr_0 = 1.1767e-04

 99%|█████████▊| 745/756 [00:22<00:00, 35.52it/s][A
 99%|█████████▉| 749/756 [00:22<00:00, 35.69it/s][ALoss = 1.0056e-01, PNorm = 97.6829, GNorm = 0.3744, lr_0 = 1.1754e-04

100%|█████████▉| 753/756 [00:22<00:00, 36.24it/s][A
                                                 [A
  0%|          | 0/95 [00:00<?, ?it/s][A
  1%|          | 1/95 [00:00<00:40,  2.30it/s][A
  9%|▉         | 9/95 [00:00<00:04, 19.14it/s][A
 18%|█▊        | 17/95 [00:00<00:02, 29.73it/s][A
 26%|██▋       | 25/95 [00:00<00:01, 38.29it/s][A
 35%|███▍      | 33/95 [00:01<00:01, 45.18it/s][A
 43%|████▎     | 41/95 [00:01<00:01, 50.58it/s][A
 52%|█████▏    | 49/


 47%|████▋     | 358/756 [00:10<00:11, 33.65it/s][A
 48%|████▊     | 362/756 [00:11<00:12, 32.16it/s][A
 48%|████▊     | 366/756 [00:11<00:11, 33.75it/s][ALoss = 1.2577e-01, PNorm = 97.8184, GNorm = 0.2444, lr_0 = 1.1289e-04

 49%|████▉     | 370/756 [00:11<00:14, 26.54it/s][A
 49%|████▉     | 374/756 [00:11<00:13, 29.23it/s][ALoss = 9.1292e-02, PNorm = 97.8228, GNorm = 0.2203, lr_0 = 1.1277e-04

 50%|█████     | 378/756 [00:11<00:12, 29.14it/s][A
 51%|█████     | 382/756 [00:11<00:11, 31.20it/s][A
 51%|█████     | 386/756 [00:11<00:12, 30.82it/s][ALoss = 1.0784e-01, PNorm = 97.8278, GNorm = 0.2841, lr_0 = 1.1264e-04

 52%|█████▏    | 390/756 [00:12<00:11, 33.02it/s][A
 52%|█████▏    | 394/756 [00:12<00:11, 31.95it/s][ALoss = 1.1942e-01, PNorm = 97.8286, GNorm = 0.1689, lr_0 = 1.1252e-04

 53%|█████▎    | 398/756 [00:12<00:10, 33.78it/s][A
 53%|█████▎    | 402/756 [00:12<00:10, 32.80it/s][A
 54%|█████▍    | 407/756 [00:12<00:09, 35.18it/s][ALoss = 9.8295e-02, PNorm = 97.8

 52%|█████▏    | 396/756 [00:12<00:10, 33.24it/s][A
 53%|█████▎    | 400/756 [00:12<00:10, 34.79it/s][ALoss = 8.5632e-02, PNorm = 98.1099, GNorm = 0.3066, lr_0 = 1.0358e-04

 53%|█████▎    | 404/756 [00:13<00:10, 33.04it/s][A
 54%|█████▍    | 408/756 [00:13<00:10, 33.96it/s][ALoss = 9.6119e-02, PNorm = 98.1138, GNorm = 0.5235, lr_0 = 1.0347e-04

 54%|█████▍    | 412/756 [00:13<00:10, 32.29it/s][A
 55%|█████▌    | 416/756 [00:13<00:10, 33.44it/s][A
 56%|█████▌    | 420/756 [00:13<00:10, 32.34it/s][ALoss = 1.4075e-01, PNorm = 98.1121, GNorm = 0.3069, lr_0 = 1.0336e-04

 56%|█████▌    | 424/756 [00:13<00:10, 33.04it/s][A
 57%|█████▋    | 428/756 [00:13<00:10, 32.58it/s][ALoss = 8.2778e-02, PNorm = 98.1129, GNorm = 0.2453, lr_0 = 1.0324e-04

 57%|█████▋    | 432/756 [00:13<00:09, 34.30it/s][A
 58%|█████▊    | 436/756 [00:14<00:09, 33.64it/s][A
 58%|█████▊    | 440/756 [00:14<00:08, 35.24it/s][ALoss = 1.3467e-01, PNorm = 98.1147, GNorm = 0.5342, lr_0 = 1.0313e-04

 59%|█████▊   

Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Moving model to cuda
Model 2 test auc = 0.808919                    
Building model 3
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.35000000000000003, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=1100, bias=False)
        (W_h): Linear(in_features=1100, out_features=1100, bias=False)
        (W_o): Linear(in_features=1233, out_features=1100, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.35000000000000003, inplace=False)
    (1


  1%|          | 5/756 [00:00<01:00, 12.38it/s][A
  1%|          | 9/756 [00:00<00:40, 18.37it/s][A
  2%|▏         | 14/756 [00:00<00:29, 25.53it/s][ALoss = 1.6113e-01, PNorm = 51.1570, GNorm = 0.1277, lr_0 = 5.6013e-04

  2%|▏         | 18/756 [00:00<00:27, 26.81it/s][A
  3%|▎         | 22/756 [00:00<00:24, 29.74it/s][ALoss = 1.5178e-01, PNorm = 51.1649, GNorm = 0.1298, lr_0 = 5.6609e-04

  3%|▎         | 26/756 [00:01<00:24, 29.40it/s][A
  4%|▍         | 30/756 [00:01<00:22, 31.84it/s][A
  4%|▍         | 34/756 [00:01<00:23, 30.97it/s][ALoss = 1.4615e-01, PNorm = 51.1851, GNorm = 0.0872, lr_0 = 5.7205e-04

  5%|▌         | 38/756 [00:01<00:22, 32.34it/s][A
  6%|▌         | 42/756 [00:01<00:22, 31.05it/s][ALoss = 1.2762e-01, PNorm = 51.2064, GNorm = 0.1068, lr_0 = 5.7801e-04

  6%|▌         | 46/756 [00:01<00:21, 32.49it/s][A
  7%|▋         | 50/756 [00:01<00:22, 31.16it/s][A
  7%|▋         | 54/756 [00:01<00:21, 32.81it/s][ALoss = 1.4366e-01, PNorm = 51.2148, GNorm = 0.


 55%|█████▌    | 417/756 [00:13<00:10, 31.57it/s][A
 56%|█████▌    | 421/756 [00:13<00:10, 33.30it/s][ALoss = 1.4934e-01, PNorm = 52.1615, GNorm = 0.1063, lr_0 = 8.0450e-04

 56%|█████▌    | 425/756 [00:13<00:10, 31.75it/s][A
 57%|█████▋    | 429/756 [00:13<00:09, 33.32it/s][A
 57%|█████▋    | 433/756 [00:13<00:10, 32.25it/s][ALoss = 1.5066e-01, PNorm = 52.1842, GNorm = 0.1008, lr_0 = 8.1046e-04

 58%|█████▊    | 437/756 [00:13<00:09, 33.81it/s][A
 58%|█████▊    | 441/756 [00:14<00:09, 32.54it/s][ALoss = 1.4398e-01, PNorm = 52.2336, GNorm = 0.0781, lr_0 = 8.1642e-04

 59%|█████▉    | 445/756 [00:14<00:09, 33.64it/s][A
 59%|█████▉    | 449/756 [00:14<00:09, 32.07it/s][A
 60%|█████▉    | 453/756 [00:14<00:09, 33.26it/s][ALoss = 1.4128e-01, PNorm = 52.2673, GNorm = 0.1118, lr_0 = 8.2238e-04

 60%|██████    | 457/756 [00:14<00:09, 31.37it/s][A
 61%|██████    | 461/756 [00:14<00:08, 33.00it/s][ALoss = 1.6210e-01, PNorm = 52.2840, GNorm = 0.1023, lr_0 = 8.2834e-04

 62%|██████▏ 


  3%|▎         | 21/756 [00:00<00:25, 28.69it/s][A
  3%|▎         | 25/756 [00:01<00:24, 29.70it/s][A
  4%|▍         | 29/756 [00:01<00:23, 31.60it/s][ALoss = 1.6187e-01, PNorm = 53.6451, GNorm = 0.1007, lr_0 = 9.9641e-04

  4%|▍         | 33/756 [00:01<00:22, 31.60it/s][A
  5%|▍         | 37/756 [00:01<00:22, 32.07it/s][ALoss = 1.8091e-01, PNorm = 53.7012, GNorm = 0.2460, lr_0 = 9.9533e-04

  5%|▌         | 41/756 [00:01<00:22, 31.56it/s][A
  6%|▌         | 45/756 [00:01<00:22, 31.92it/s][A
  6%|▋         | 49/756 [00:01<00:22, 31.75it/s][ALoss = 1.3529e-01, PNorm = 53.7601, GNorm = 0.1036, lr_0 = 9.9424e-04

  7%|▋         | 53/756 [00:01<00:21, 32.18it/s][A
  8%|▊         | 57/756 [00:02<00:22, 31.61it/s][ALoss = 1.2692e-01, PNorm = 53.8232, GNorm = 0.2091, lr_0 = 9.9316e-04

  8%|▊         | 61/756 [00:02<00:21, 32.22it/s][A
  9%|▊         | 65/756 [00:02<00:21, 31.79it/s][A
  9%|▉         | 69/756 [00:02<00:20, 32.75it/s][ALoss = 1.1602e-01, PNorm = 53.8435, GNorm = 

  3%|▎         | 26/756 [00:01<00:26, 28.07it/s][A
  4%|▍         | 30/756 [00:01<00:23, 30.65it/s][ALoss = 1.1815e-01, PNorm = 56.8016, GNorm = 0.0826, lr_0 = 9.1725e-04

  4%|▍         | 34/756 [00:01<00:23, 30.98it/s][A
  5%|▌         | 38/756 [00:01<00:22, 32.57it/s][A
  6%|▌         | 42/756 [00:01<00:22, 32.31it/s][ALoss = 1.6186e-01, PNorm = 56.8241, GNorm = 0.1028, lr_0 = 9.1625e-04

  6%|▌         | 46/756 [00:01<00:21, 33.61it/s][A
  7%|▋         | 50/756 [00:01<00:21, 33.39it/s][ALoss = 1.3137e-01, PNorm = 56.8992, GNorm = 0.1164, lr_0 = 9.1525e-04

  7%|▋         | 54/756 [00:02<00:20, 34.24it/s][A
  8%|▊         | 58/756 [00:02<00:20, 33.49it/s][A
  8%|▊         | 62/756 [00:02<00:20, 34.66it/s][ALoss = 1.8380e-01, PNorm = 56.9489, GNorm = 0.0917, lr_0 = 9.1426e-04

  9%|▊         | 66/756 [00:02<00:20, 34.05it/s][A
  9%|▉         | 70/756 [00:02<00:19, 35.22it/s][ALoss = 1.6204e-01, PNorm = 57.0098, GNorm = 0.1059, lr_0 = 9.1326e-04

 10%|▉         | 74/756 [0


 58%|█████▊    | 435/756 [00:12<00:09, 34.33it/s][A
 58%|█████▊    | 439/756 [00:12<00:09, 35.15it/s][A
 59%|█████▊    | 443/756 [00:13<00:10, 28.84it/s][ALoss = 1.4831e-01, PNorm = 58.6291, GNorm = 0.0991, lr_0 = 8.7719e-04

 59%|█████▉    | 447/756 [00:13<00:09, 31.02it/s][A
 60%|█████▉    | 451/756 [00:13<00:09, 31.40it/s][ALoss = 1.4677e-01, PNorm = 58.6955, GNorm = 0.1040, lr_0 = 8.7624e-04

 60%|██████    | 455/756 [00:13<00:09, 32.78it/s][A
 61%|██████    | 459/756 [00:13<00:09, 32.93it/s][A
 61%|██████    | 463/756 [00:13<00:08, 34.36it/s][ALoss = 1.7740e-01, PNorm = 58.7216, GNorm = 0.1005, lr_0 = 8.7528e-04

 62%|██████▏   | 467/756 [00:13<00:09, 29.66it/s][A
 62%|██████▏   | 471/756 [00:14<00:08, 32.15it/s][ALoss = 1.7339e-01, PNorm = 58.7736, GNorm = 0.0906, lr_0 = 8.7433e-04

 63%|██████▎   | 475/756 [00:14<00:08, 33.27it/s][A
 63%|██████▎   | 480/756 [00:14<00:07, 35.83it/s][ALoss = 1.7153e-01, PNorm = 58.7953, GNorm = 0.1098, lr_0 = 8.7338e-04

 64%|██████▍ 


 60%|█████▉    | 451/756 [00:14<00:09, 30.79it/s][A
 60%|██████    | 455/756 [00:14<00:09, 32.78it/s][ALoss = 1.7943e-01, PNorm = 61.9933, GNorm = 0.0906, lr_0 = 8.0653e-04

 61%|██████    | 459/756 [00:14<00:09, 32.33it/s][A
 61%|██████    | 463/756 [00:14<00:08, 33.29it/s][A
 62%|██████▏   | 467/756 [00:15<00:08, 32.66it/s][ALoss = 1.4184e-01, PNorm = 62.0594, GNorm = 0.0926, lr_0 = 8.0566e-04

 62%|██████▏   | 471/756 [00:15<00:08, 33.34it/s][A
 63%|██████▎   | 475/756 [00:15<00:08, 32.59it/s][ALoss = 1.1385e-01, PNorm = 62.1190, GNorm = 0.1189, lr_0 = 8.0478e-04

 63%|██████▎   | 479/756 [00:15<00:08, 33.83it/s][A
 64%|██████▍   | 483/756 [00:15<00:08, 33.25it/s][A
 64%|██████▍   | 487/756 [00:15<00:07, 34.47it/s][ALoss = 1.4512e-01, PNorm = 62.1144, GNorm = 0.0906, lr_0 = 8.0390e-04

 65%|██████▍   | 491/756 [00:15<00:07, 33.76it/s][A
 65%|██████▌   | 495/756 [00:15<00:07, 34.71it/s][ALoss = 1.4342e-01, PNorm = 62.1395, GNorm = 0.1023, lr_0 = 8.0303e-04

 66%|██████▌ 

 61%|██████    | 462/756 [00:13<00:08, 36.28it/s][ALoss = 1.3412e-01, PNorm = 64.8118, GNorm = 0.0966, lr_0 = 7.4246e-04

 62%|██████▏   | 466/756 [00:13<00:08, 36.05it/s][A
 62%|██████▏   | 470/756 [00:14<00:07, 36.25it/s][ALoss = 1.5315e-01, PNorm = 64.8513, GNorm = 0.1465, lr_0 = 7.4165e-04

 63%|██████▎   | 474/756 [00:14<00:07, 35.92it/s][A
 63%|██████▎   | 478/756 [00:14<00:07, 36.20it/s][A
 64%|██████▍   | 482/756 [00:14<00:07, 36.17it/s][ALoss = 1.4939e-01, PNorm = 64.9120, GNorm = 0.1344, lr_0 = 7.4084e-04

 64%|██████▍   | 486/756 [00:14<00:07, 36.13it/s][A
 65%|██████▍   | 490/756 [00:14<00:07, 36.23it/s][ALoss = 1.4451e-01, PNorm = 64.9748, GNorm = 0.1370, lr_0 = 7.4004e-04

 65%|██████▌   | 494/756 [00:14<00:07, 36.10it/s][A
 66%|██████▌   | 498/756 [00:14<00:07, 36.06it/s][A
 66%|██████▋   | 502/756 [00:14<00:07, 36.25it/s][ALoss = 1.8125e-01, PNorm = 65.0413, GNorm = 0.2109, lr_0 = 7.3923e-04

 67%|██████▋   | 506/756 [00:15<00:06, 35.85it/s][A
 67%|██████▋  


  9%|▉         | 69/756 [00:02<00:18, 36.36it/s][A
 10%|▉         | 73/756 [00:02<00:18, 36.73it/s][A
 10%|█         | 77/756 [00:02<00:18, 36.56it/s][ALoss = 1.1904e-01, PNorm = 66.5330, GNorm = 0.0789, lr_0 = 7.1305e-04

 11%|█         | 81/756 [00:02<00:18, 36.78it/s][A
 11%|█         | 85/756 [00:02<00:18, 36.82it/s][ALoss = 1.5101e-01, PNorm = 66.5776, GNorm = 0.1257, lr_0 = 7.1228e-04

 12%|█▏        | 89/756 [00:02<00:18, 36.86it/s][A
 12%|█▏        | 93/756 [00:02<00:18, 36.65it/s][A
 13%|█▎        | 97/756 [00:03<00:17, 36.89it/s][ALoss = 1.1887e-01, PNorm = 66.6100, GNorm = 0.1515, lr_0 = 7.1150e-04

 13%|█▎        | 101/756 [00:03<00:18, 35.94it/s][A
 14%|█▍        | 105/756 [00:03<00:17, 36.35it/s][ALoss = 1.6742e-01, PNorm = 66.6391, GNorm = 0.1182, lr_0 = 7.1073e-04

 14%|█▍        | 109/756 [00:03<00:20, 32.30it/s][A
 15%|█▍        | 113/756 [00:03<00:22, 28.37it/s][A
 15%|█▌        | 117/756 [00:03<00:21, 30.16it/s][ALoss = 1.2984e-01, PNorm = 66.6899, GNo

 10%|▉         | 72/756 [00:02<00:19, 34.79it/s][A
 10%|█         | 76/756 [00:02<00:19, 35.78it/s][A
 11%|█         | 80/756 [00:02<00:19, 35.38it/s][ALoss = 1.3806e-01, PNorm = 69.6911, GNorm = 0.2776, lr_0 = 6.5640e-04

 11%|█         | 84/756 [00:02<00:18, 36.09it/s][A
 12%|█▏        | 88/756 [00:02<00:18, 35.96it/s][ALoss = 1.1218e-01, PNorm = 69.7495, GNorm = 0.1673, lr_0 = 6.5569e-04

 12%|█▏        | 92/756 [00:02<00:19, 33.30it/s][A
 13%|█▎        | 96/756 [00:03<00:19, 33.41it/s][A
 13%|█▎        | 100/756 [00:03<00:19, 34.30it/s][ALoss = 1.3446e-01, PNorm = 69.8047, GNorm = 0.1322, lr_0 = 6.5498e-04

 14%|█▍        | 104/756 [00:03<00:19, 33.91it/s][A
 14%|█▍        | 108/756 [00:03<00:18, 34.97it/s][ALoss = 1.5600e-01, PNorm = 69.8446, GNorm = 0.1121, lr_0 = 6.5426e-04

 15%|█▍        | 112/756 [00:03<00:18, 34.69it/s][A
 15%|█▌        | 116/756 [00:03<00:18, 35.52it/s][A
 16%|█▌        | 120/756 [00:03<00:21, 29.11it/s][ALoss = 1.7699e-01, PNorm = 69.8966, GNo

 11%|█         | 80/756 [00:02<00:20, 33.59it/s][A
 11%|█         | 84/756 [00:02<00:19, 33.69it/s][ALoss = 1.4194e-01, PNorm = 72.6518, GNorm = 0.1350, lr_0 = 6.0419e-04

 12%|█▏        | 88/756 [00:02<00:19, 34.69it/s][A
 12%|█▏        | 92/756 [00:02<00:19, 34.14it/s][A
 13%|█▎        | 96/756 [00:03<00:18, 35.33it/s][ALoss = 1.7609e-01, PNorm = 72.6968, GNorm = 0.1222, lr_0 = 6.0353e-04

 13%|█▎        | 100/756 [00:03<00:19, 34.44it/s][A
 14%|█▍        | 104/756 [00:03<00:18, 35.49it/s][ALoss = 9.8155e-02, PNorm = 72.7526, GNorm = 0.1249, lr_0 = 6.0287e-04

 14%|█▍        | 108/756 [00:03<00:18, 34.84it/s][A
 15%|█▍        | 112/756 [00:03<00:18, 35.47it/s][A
 15%|█▌        | 116/756 [00:03<00:18, 34.65it/s][ALoss = 1.1417e-01, PNorm = 72.8007, GNorm = 0.1252, lr_0 = 6.0222e-04

 16%|█▌        | 120/756 [00:03<00:17, 35.36it/s][A
 16%|█▋        | 124/756 [00:03<00:18, 34.85it/s][ALoss = 1.1624e-01, PNorm = 72.8336, GNorm = 0.2085, lr_0 = 6.0156e-04

 17%|█▋        | 12

 12%|█▏        | 89/756 [00:02<00:18, 36.35it/s][ALoss = 1.6567e-01, PNorm = 75.4087, GNorm = 0.2598, lr_0 = 5.5619e-04

 12%|█▏        | 93/756 [00:02<00:18, 35.59it/s][A
 13%|█▎        | 97/756 [00:03<00:18, 36.26it/s][ALoss = 1.3998e-01, PNorm = 75.4438, GNorm = 0.2471, lr_0 = 5.5558e-04

 13%|█▎        | 101/756 [00:03<00:18, 35.73it/s][A
 14%|█▍        | 105/756 [00:03<00:17, 36.34it/s][A
 14%|█▍        | 109/756 [00:03<00:17, 36.01it/s][ALoss = 1.4850e-01, PNorm = 75.4899, GNorm = 0.1524, lr_0 = 5.5498e-04

 15%|█▍        | 113/756 [00:03<00:17, 36.44it/s][A
 15%|█▌        | 117/756 [00:03<00:17, 35.92it/s][ALoss = 9.5190e-02, PNorm = 75.5600, GNorm = 0.2268, lr_0 = 5.5437e-04

 16%|█▌        | 121/756 [00:03<00:17, 36.48it/s][A
 17%|█▋        | 125/756 [00:03<00:17, 36.33it/s][A
 17%|█▋        | 129/756 [00:03<00:17, 36.82it/s][ALoss = 1.7279e-01, PNorm = 75.5832, GNorm = 0.2019, lr_0 = 5.5377e-04

 18%|█▊        | 133/756 [00:04<00:17, 36.20it/s][A
 18%|█▊        | 


 65%|██████▌   | 493/756 [00:14<00:07, 34.90it/s][A
 66%|██████▌   | 497/756 [00:14<00:07, 35.73it/s][ALoss = 1.7608e-01, PNorm = 76.7959, GNorm = 0.1464, lr_0 = 5.3190e-04

 66%|██████▋   | 501/756 [00:14<00:07, 35.03it/s][A
 67%|██████▋   | 505/756 [00:14<00:07, 35.41it/s][A
 67%|██████▋   | 509/756 [00:14<00:07, 35.08it/s][ALoss = 1.1956e-01, PNorm = 76.8578, GNorm = 0.1494, lr_0 = 5.3132e-04

 68%|██████▊   | 513/756 [00:14<00:06, 35.31it/s][A
 68%|██████▊   | 517/756 [00:14<00:06, 35.28it/s][ALoss = 1.6053e-01, PNorm = 76.8882, GNorm = 0.2490, lr_0 = 5.3074e-04

 69%|██████▉   | 521/756 [00:15<00:06, 36.01it/s][A
 69%|██████▉   | 525/756 [00:15<00:06, 36.07it/s][A
 70%|██████▉   | 529/756 [00:15<00:06, 36.64it/s][ALoss = 1.1605e-01, PNorm = 76.9204, GNorm = 0.1512, lr_0 = 5.3016e-04

 71%|███████   | 533/756 [00:15<00:06, 35.77it/s][A
 71%|███████   | 537/756 [00:15<00:06, 36.05it/s][ALoss = 1.7767e-01, PNorm = 76.9422, GNorm = 0.1062, lr_0 = 5.2958e-04

 72%|███████▏


 13%|█▎        | 98/756 [00:03<00:20, 32.61it/s][A
 14%|█▎        | 103/756 [00:03<00:18, 35.14it/s][ALoss = 1.2468e-01, PNorm = 77.7784, GNorm = 0.1868, lr_0 = 5.1139e-04

 14%|█▍        | 107/756 [00:03<00:20, 32.34it/s][A
 15%|█▍        | 112/756 [00:03<00:18, 35.21it/s][ALoss = 1.8545e-01, PNorm = 77.8103, GNorm = 0.3967, lr_0 = 5.1083e-04

 15%|█▌        | 116/756 [00:03<00:20, 30.88it/s][A
 16%|█▌        | 121/756 [00:03<00:18, 34.10it/s][A
 17%|█▋        | 125/756 [00:04<00:20, 30.60it/s][ALoss = 1.4800e-01, PNorm = 77.8362, GNorm = 0.1110, lr_0 = 5.1028e-04

 17%|█▋        | 130/756 [00:04<00:21, 29.41it/s][A
 18%|█▊        | 135/756 [00:04<00:19, 32.40it/s][ALoss = 1.2325e-01, PNorm = 77.8790, GNorm = 0.1729, lr_0 = 5.0972e-04

 18%|█▊        | 139/756 [00:04<00:19, 31.72it/s][A
 19%|█▉        | 144/756 [00:04<00:17, 34.51it/s][ALoss = 1.3346e-01, PNorm = 77.9115, GNorm = 0.1006, lr_0 = 5.0917e-04

 20%|█▉        | 148/756 [00:04<00:18, 33.09it/s][A
 20%|██       

 20%|██        | 154/756 [00:05<00:17, 33.48it/s][A
 21%|██        | 158/756 [00:05<00:18, 32.09it/s][ALoss = 1.3090e-01, PNorm = 80.3081, GNorm = 0.1662, lr_0 = 4.6820e-04

 21%|██▏       | 162/756 [00:05<00:17, 33.65it/s][A
 22%|██▏       | 166/756 [00:05<00:18, 32.61it/s][ALoss = 1.5823e-01, PNorm = 80.3394, GNorm = 0.1698, lr_0 = 4.6769e-04

 22%|██▏       | 170/756 [00:05<00:17, 34.14it/s][A
 23%|██▎       | 174/756 [00:05<00:17, 32.64it/s][A
 24%|██▎       | 179/756 [00:05<00:16, 35.18it/s][ALoss = 1.1635e-01, PNorm = 80.3752, GNorm = 0.2149, lr_0 = 4.6719e-04

 24%|██▍       | 183/756 [00:05<00:17, 33.08it/s][A
 25%|██▍       | 188/756 [00:06<00:17, 32.53it/s][ALoss = 1.3558e-01, PNorm = 80.4093, GNorm = 0.2222, lr_0 = 4.6668e-04

 26%|██▌       | 193/756 [00:06<00:16, 34.74it/s][A
 26%|██▌       | 197/756 [00:06<00:16, 33.03it/s][ALoss = 1.4935e-01, PNorm = 80.4375, GNorm = 0.2647, lr_0 = 4.6617e-04

 27%|██▋       | 201/756 [00:06<00:16, 34.61it/s][A
 27%|██▋      

 22%|██▏       | 169/756 [00:05<00:18, 31.23it/s][A
 23%|██▎       | 173/756 [00:05<00:18, 31.59it/s][ALoss = 1.1921e-01, PNorm = 82.2756, GNorm = 0.2022, lr_0 = 4.3049e-04

 23%|██▎       | 177/756 [00:05<00:18, 31.79it/s][A
 24%|██▍       | 181/756 [00:06<00:17, 32.48it/s][ALoss = 1.5454e-01, PNorm = 82.2945, GNorm = 0.1358, lr_0 = 4.3002e-04

 24%|██▍       | 185/756 [00:06<00:17, 32.11it/s][A
 25%|██▌       | 189/756 [00:06<00:17, 32.61it/s][A
 26%|██▌       | 193/756 [00:06<00:17, 32.65it/s][ALoss = 1.1638e-01, PNorm = 82.3220, GNorm = 0.3090, lr_0 = 4.2955e-04

 26%|██▌       | 197/756 [00:06<00:16, 32.95it/s][A
 27%|██▋       | 201/756 [00:06<00:17, 32.51it/s][ALoss = 1.3809e-01, PNorm = 82.3461, GNorm = 0.2358, lr_0 = 4.2909e-04

 27%|██▋       | 205/756 [00:06<00:17, 32.17it/s][A
 28%|██▊       | 209/756 [00:06<00:17, 32.02it/s][A
 28%|██▊       | 213/756 [00:07<00:17, 31.86it/s][ALoss = 1.6320e-01, PNorm = 82.3631, GNorm = 0.3567, lr_0 = 4.2862e-04

 29%|██▊      


 24%|██▍       | 181/756 [00:05<00:16, 34.91it/s][A
 25%|██▍       | 186/756 [00:05<00:16, 34.56it/s][ALoss = 1.3289e-01, PNorm = 84.2073, GNorm = 0.1522, lr_0 = 3.9582e-04

 25%|██▌       | 191/756 [00:05<00:15, 36.49it/s][A
 26%|██▌       | 195/756 [00:06<00:15, 35.31it/s][ALoss = 1.2345e-01, PNorm = 84.2306, GNorm = 0.2805, lr_0 = 3.9538e-04

 26%|██▋       | 200/756 [00:06<00:15, 36.93it/s][A
 27%|██▋       | 204/756 [00:06<00:15, 35.43it/s][A
 28%|██▊       | 209/756 [00:06<00:14, 37.28it/s][ALoss = 1.6806e-01, PNorm = 84.2415, GNorm = 0.2495, lr_0 = 3.9495e-04

 28%|██▊       | 213/756 [00:06<00:15, 35.31it/s][A
 29%|██▉       | 218/756 [00:06<00:15, 34.11it/s][ALoss = 1.6230e-01, PNorm = 84.2592, GNorm = 0.2502, lr_0 = 3.9452e-04

 29%|██▉       | 223/756 [00:06<00:14, 36.03it/s][A
 30%|███       | 227/756 [00:07<00:15, 34.16it/s][ALoss = 1.1327e-01, PNorm = 84.3039, GNorm = 0.1870, lr_0 = 3.9409e-04

 31%|███       | 231/756 [00:07<00:14, 35.43it/s][A
 31%|███     


 81%|████████  | 613/756 [00:18<00:04, 34.69it/s][A
 82%|████████▏ | 618/756 [00:18<00:04, 31.82it/s][ALoss = 1.0337e-01, PNorm = 85.3008, GNorm = 0.1414, lr_0 = 3.7770e-04

 82%|████████▏ | 623/756 [00:19<00:03, 34.36it/s][A
 83%|████████▎ | 627/756 [00:19<00:04, 31.46it/s][ALoss = 1.3920e-01, PNorm = 85.3088, GNorm = 0.1910, lr_0 = 3.7729e-04

 84%|████████▎ | 632/756 [00:19<00:03, 34.31it/s][A
 84%|████████▍ | 636/756 [00:19<00:03, 32.02it/s][ALoss = 1.0490e-01, PNorm = 85.3228, GNorm = 0.1433, lr_0 = 3.7688e-04

 85%|████████▍ | 640/756 [00:19<00:03, 33.76it/s][A
 85%|████████▌ | 644/756 [00:19<00:03, 32.84it/s][A
 86%|████████▌ | 649/756 [00:19<00:03, 35.22it/s][ALoss = 1.1028e-01, PNorm = 85.3495, GNorm = 0.1388, lr_0 = 3.7647e-04

 86%|████████▋ | 653/756 [00:19<00:03, 31.71it/s][A
 87%|████████▋ | 658/756 [00:20<00:03, 30.60it/s][ALoss = 1.5207e-01, PNorm = 85.3601, GNorm = 0.1791, lr_0 = 3.7606e-04

 88%|████████▊ | 663/756 [00:20<00:02, 33.35it/s][A
 88%|████████

 85%|████████▍ | 640/756 [00:19<00:03, 35.54it/s][ALoss = 1.2253e-01, PNorm = 86.9730, GNorm = 0.1948, lr_0 = 3.4694e-04

 85%|████████▌ | 644/756 [00:19<00:03, 34.28it/s][A
 86%|████████▌ | 648/756 [00:19<00:03, 35.39it/s][A
 86%|████████▌ | 652/756 [00:19<00:03, 34.05it/s][ALoss = 1.3877e-01, PNorm = 86.9849, GNorm = 0.1149, lr_0 = 3.4656e-04

 87%|████████▋ | 656/756 [00:19<00:02, 35.19it/s][A
 87%|████████▋ | 660/756 [00:19<00:02, 34.44it/s][ALoss = 1.0205e-01, PNorm = 87.0036, GNorm = 0.1691, lr_0 = 3.4619e-04

 88%|████████▊ | 664/756 [00:20<00:02, 34.92it/s][A
 88%|████████▊ | 668/756 [00:20<00:02, 33.73it/s][A
 89%|████████▉ | 672/756 [00:20<00:02, 35.14it/s][ALoss = 1.5705e-01, PNorm = 87.0201, GNorm = 0.1854, lr_0 = 3.4581e-04

 89%|████████▉ | 676/756 [00:20<00:02, 33.63it/s][A
 90%|████████▉ | 680/756 [00:20<00:02, 35.07it/s][ALoss = 9.8647e-02, PNorm = 87.0433, GNorm = 0.1730, lr_0 = 3.4543e-04

 90%|█████████ | 684/756 [00:20<00:02, 33.60it/s][A
 91%|█████████


 86%|████████▌ | 651/756 [00:19<00:03, 34.76it/s][A
 87%|████████▋ | 655/756 [00:19<00:02, 35.42it/s][ALoss = 1.0656e-01, PNorm = 88.2971, GNorm = 0.3238, lr_0 = 3.1900e-04

 87%|████████▋ | 659/756 [00:19<00:02, 34.50it/s][A
 88%|████████▊ | 663/756 [00:19<00:02, 35.57it/s][A
 88%|████████▊ | 667/756 [00:19<00:02, 35.13it/s][ALoss = 1.3272e-01, PNorm = 88.3133, GNorm = 0.3749, lr_0 = 3.1865e-04

 89%|████████▉ | 671/756 [00:20<00:02, 35.98it/s][A
 89%|████████▉ | 675/756 [00:20<00:02, 35.56it/s][ALoss = 2.1543e-01, PNorm = 88.3113, GNorm = 0.2608, lr_0 = 3.1830e-04

 90%|████████▉ | 679/756 [00:20<00:02, 36.34it/s][A
 90%|█████████ | 683/756 [00:20<00:02, 35.76it/s][A
 91%|█████████ | 687/756 [00:20<00:01, 36.68it/s][ALoss = 1.3940e-01, PNorm = 88.3345, GNorm = 0.2218, lr_0 = 3.1795e-04

 91%|█████████▏| 691/756 [00:20<00:01, 35.78it/s][A
 92%|█████████▏| 695/756 [00:20<00:01, 36.47it/s][ALoss = 1.2174e-01, PNorm = 88.3610, GNorm = 0.2255, lr_0 = 3.1761e-04

 92%|████████

 87%|████████▋ | 657/756 [00:21<00:03, 29.14it/s][A
 87%|████████▋ | 661/756 [00:21<00:03, 30.88it/s][ALoss = 1.3201e-01, PNorm = 89.4979, GNorm = 0.2266, lr_0 = 2.9365e-04

 88%|████████▊ | 665/756 [00:21<00:03, 29.15it/s][A
 88%|████████▊ | 669/756 [00:21<00:02, 30.98it/s][ALoss = 1.3426e-01, PNorm = 89.5125, GNorm = 0.2325, lr_0 = 2.9333e-04

 89%|████████▉ | 673/756 [00:21<00:02, 29.79it/s][A
 90%|████████▉ | 677/756 [00:22<00:02, 31.64it/s][A
 90%|█████████ | 681/756 [00:22<00:02, 30.55it/s][ALoss = 1.0674e-01, PNorm = 89.5275, GNorm = 0.2050, lr_0 = 2.9301e-04

 91%|█████████ | 685/756 [00:22<00:02, 32.00it/s][A
 91%|█████████ | 689/756 [00:22<00:02, 30.03it/s][ALoss = 1.4881e-01, PNorm = 89.5382, GNorm = 0.1372, lr_0 = 2.9269e-04

 92%|█████████▏| 693/756 [00:22<00:01, 31.67it/s][A
 92%|█████████▏| 697/756 [00:22<00:01, 30.24it/s][A
 93%|█████████▎| 701/756 [00:22<00:01, 32.03it/s][ALoss = 1.2590e-01, PNorm = 89.5493, GNorm = 0.3613, lr_0 = 2.9238e-04

 93%|█████████

 93%|█████████▎| 705/756 [00:22<00:02, 25.24it/s][ALoss = 1.4033e-01, PNorm = 90.5468, GNorm = 0.4470, lr_0 = 2.6912e-04

 94%|█████████▍| 710/756 [00:22<00:01, 29.02it/s][A
 94%|█████████▍| 714/756 [00:22<00:01, 28.26it/s][ALoss = 1.2526e-01, PNorm = 90.5668, GNorm = 0.2865, lr_0 = 2.6883e-04

 95%|█████████▍| 718/756 [00:22<00:01, 30.78it/s][A
 96%|█████████▌| 722/756 [00:23<00:01, 30.17it/s][A
 96%|█████████▌| 727/756 [00:23<00:00, 32.99it/s][ALoss = 1.3911e-01, PNorm = 90.5804, GNorm = 0.1481, lr_0 = 2.6853e-04

 97%|█████████▋| 731/756 [00:23<00:00, 29.08it/s][A
 97%|█████████▋| 736/756 [00:23<00:00, 32.72it/s][ALoss = 9.3662e-02, PNorm = 90.5971, GNorm = 0.2510, lr_0 = 2.6824e-04

 98%|█████████▊| 740/756 [00:23<00:00, 30.05it/s][A
 99%|█████████▊| 745/756 [00:23<00:00, 31.36it/s][ALoss = 1.1812e-01, PNorm = 90.6099, GNorm = 0.4881, lr_0 = 2.6795e-04

 99%|█████████▉| 749/756 [00:23<00:00, 33.22it/s][A
100%|█████████▉| 753/756 [00:24<00:00, 34.34it/s][A
              


 43%|████▎     | 324/756 [00:10<00:13, 32.78it/s][A
 44%|████▎     | 329/756 [00:10<00:12, 34.94it/s][ALoss = 1.3821e-01, PNorm = 91.0692, GNorm = 0.2937, lr_0 = 2.5821e-04

 44%|████▍     | 333/756 [00:10<00:12, 32.83it/s][A
 45%|████▍     | 337/756 [00:10<00:12, 34.55it/s][A
 45%|████▌     | 341/756 [00:10<00:12, 32.55it/s][ALoss = 1.5111e-01, PNorm = 91.0734, GNorm = 0.3845, lr_0 = 2.5793e-04

 46%|████▌     | 345/756 [00:10<00:12, 34.15it/s][A
 46%|████▌     | 349/756 [00:11<00:12, 32.91it/s][ALoss = 1.3856e-01, PNorm = 91.0778, GNorm = 0.3075, lr_0 = 2.5765e-04

 47%|████▋     | 353/756 [00:11<00:11, 34.32it/s][A
 47%|████▋     | 357/756 [00:11<00:13, 30.11it/s][A
 48%|████▊     | 361/756 [00:11<00:12, 32.31it/s][ALoss = 1.6198e-01, PNorm = 91.0900, GNorm = 0.4697, lr_0 = 2.5736e-04

 48%|████▊     | 365/756 [00:11<00:12, 30.66it/s][A
 49%|████▉     | 370/756 [00:11<00:12, 30.45it/s][ALoss = 1.4729e-01, PNorm = 91.1067, GNorm = 0.3652, lr_0 = 2.5708e-04

 49%|████▉   


 50%|█████     | 379/756 [00:11<00:11, 33.85it/s][A
 51%|█████     | 384/756 [00:11<00:10, 36.16it/s][ALoss = 1.2649e-01, PNorm = 92.0295, GNorm = 0.3290, lr_0 = 2.3638e-04

 51%|█████▏    | 388/756 [00:11<00:10, 34.65it/s][A
 52%|█████▏    | 393/756 [00:11<00:09, 36.55it/s][ALoss = 1.6668e-01, PNorm = 92.0445, GNorm = 0.5567, lr_0 = 2.3612e-04

 53%|█████▎    | 397/756 [00:11<00:10, 34.79it/s][A
 53%|█████▎    | 402/756 [00:12<00:09, 36.85it/s][A
 54%|█████▎    | 406/756 [00:12<00:09, 35.18it/s][ALoss = 1.1567e-01, PNorm = 92.0611, GNorm = 0.2366, lr_0 = 2.3586e-04

 54%|█████▍    | 410/756 [00:12<00:09, 36.39it/s][A
 55%|█████▍    | 414/756 [00:12<00:09, 35.30it/s][ALoss = 1.1066e-01, PNorm = 92.0776, GNorm = 0.1699, lr_0 = 2.3561e-04

 55%|█████▌    | 419/756 [00:12<00:10, 33.54it/s][A
 56%|█████▌    | 423/756 [00:12<00:09, 35.02it/s][ALoss = 1.3103e-01, PNorm = 92.0901, GNorm = 0.3879, lr_0 = 2.3535e-04

 56%|█████▋    | 427/756 [00:12<00:09, 33.53it/s][A
 57%|█████▋  

 54%|█████▍    | 409/756 [00:12<00:10, 32.66it/s][ALoss = 1.3139e-01, PNorm = 92.8177, GNorm = 0.2840, lr_0 = 2.1712e-04

 55%|█████▍    | 413/756 [00:12<00:10, 33.62it/s][A
 55%|█████▌    | 417/756 [00:12<00:09, 33.95it/s][ALoss = 1.2787e-01, PNorm = 92.8212, GNorm = 0.1879, lr_0 = 2.1689e-04

 56%|█████▌    | 421/756 [00:12<00:09, 34.48it/s][A
 56%|█████▌    | 425/756 [00:13<00:09, 33.74it/s][A
 57%|█████▋    | 429/756 [00:13<00:09, 34.39it/s][ALoss = 1.2461e-01, PNorm = 92.8317, GNorm = 0.2081, lr_0 = 2.1665e-04

 57%|█████▋    | 433/756 [00:13<00:09, 33.77it/s][A
 58%|█████▊    | 437/756 [00:13<00:09, 34.46it/s][ALoss = 1.0444e-01, PNorm = 92.8487, GNorm = 0.1772, lr_0 = 2.1642e-04

 58%|█████▊    | 441/756 [00:13<00:09, 33.98it/s][A
 59%|█████▉    | 445/756 [00:13<00:09, 34.55it/s][A
 59%|█████▉    | 449/756 [00:13<00:08, 34.34it/s][ALoss = 1.1255e-01, PNorm = 92.8578, GNorm = 0.3448, lr_0 = 2.1618e-04

 60%|█████▉    | 453/756 [00:13<00:08, 34.68it/s][A
 60%|██████   


  2%|▏         | 16/756 [00:00<00:26, 27.88it/s][A
  3%|▎         | 20/756 [00:00<00:24, 30.33it/s][A
  3%|▎         | 24/756 [00:01<00:22, 32.62it/s][ALoss = 1.1261e-01, PNorm = 93.1465, GNorm = 0.3965, lr_0 = 2.0852e-04

  4%|▎         | 28/756 [00:01<00:21, 33.15it/s][A
  4%|▍         | 32/756 [00:01<00:21, 33.95it/s][ALoss = 1.1947e-01, PNorm = 93.1542, GNorm = 0.3196, lr_0 = 2.0830e-04

  5%|▍         | 36/756 [00:01<00:21, 33.74it/s][A
  5%|▌         | 40/756 [00:01<00:20, 34.29it/s][A
  6%|▌         | 44/756 [00:01<00:25, 27.66it/s][ALoss = 1.1402e-01, PNorm = 93.1589, GNorm = 0.2025, lr_0 = 2.0807e-04

  6%|▋         | 48/756 [00:01<00:23, 29.58it/s][A
  7%|▋         | 52/756 [00:02<00:28, 24.94it/s][ALoss = 1.5313e-01, PNorm = 93.1600, GNorm = 0.3291, lr_0 = 2.0784e-04

  7%|▋         | 56/756 [00:02<00:25, 28.00it/s][A
  8%|▊         | 60/756 [00:02<00:23, 29.89it/s][A
  8%|▊         | 64/756 [00:02<00:21, 31.92it/s][ALoss = 1.5813e-01, PNorm = 93.1645, GNorm = 

  3%|▎         | 21/756 [00:01<00:25, 28.42it/s][A
  3%|▎         | 25/756 [00:01<00:23, 30.61it/s][A
  4%|▍         | 29/756 [00:01<00:22, 32.09it/s][ALoss = 8.4182e-02, PNorm = 93.7561, GNorm = 0.3916, lr_0 = 1.9196e-04

  4%|▍         | 33/756 [00:01<00:21, 32.94it/s][A
  5%|▍         | 37/756 [00:01<00:21, 33.71it/s][ALoss = 1.1594e-01, PNorm = 93.7702, GNorm = 0.3659, lr_0 = 1.9175e-04

  5%|▌         | 41/756 [00:01<00:21, 33.93it/s][A
  6%|▌         | 45/756 [00:01<00:20, 34.37it/s][A
  6%|▋         | 49/756 [00:01<00:20, 34.87it/s][ALoss = 1.3843e-01, PNorm = 93.7740, GNorm = 0.4615, lr_0 = 1.9154e-04

  7%|▋         | 53/756 [00:01<00:19, 35.39it/s][A
  8%|▊         | 57/756 [00:02<00:19, 35.80it/s][ALoss = 1.1808e-01, PNorm = 93.7760, GNorm = 0.3122, lr_0 = 1.9133e-04

  8%|▊         | 61/756 [00:02<00:19, 36.18it/s][A
  9%|▊         | 65/756 [00:02<00:19, 36.21it/s][A
  9%|▉         | 69/756 [00:02<00:18, 36.63it/s][ALoss = 1.1357e-01, PNorm = 93.7849, GNorm = 0

  4%|▍         | 29/756 [00:01<00:22, 32.54it/s][A
  4%|▍         | 34/756 [00:01<00:22, 31.62it/s][ALoss = 1.2449e-01, PNorm = 94.3081, GNorm = 0.3975, lr_0 = 1.7669e-04

  5%|▌         | 38/756 [00:01<00:21, 33.54it/s][A
  6%|▌         | 42/756 [00:01<00:21, 32.59it/s][ALoss = 1.0254e-01, PNorm = 94.3206, GNorm = 0.2669, lr_0 = 1.7650e-04

  6%|▌         | 47/756 [00:01<00:20, 35.21it/s][A
  7%|▋         | 51/756 [00:01<00:20, 34.48it/s][ALoss = 1.1333e-01, PNorm = 94.3274, GNorm = 0.2661, lr_0 = 1.7630e-04

  7%|▋         | 56/756 [00:01<00:19, 36.58it/s][A
  8%|▊         | 60/756 [00:02<00:20, 34.77it/s][ALoss = 1.2892e-01, PNorm = 94.3334, GNorm = 0.3428, lr_0 = 1.7611e-04

  9%|▊         | 65/756 [00:02<00:18, 36.47it/s][A
  9%|▉         | 69/756 [00:02<00:19, 34.87it/s][A
 10%|▉         | 74/756 [00:02<00:20, 34.08it/s][ALoss = 1.5600e-01, PNorm = 94.3399, GNorm = 0.4130, lr_0 = 1.7592e-04

 10%|█         | 78/756 [00:02<00:19, 35.50it/s][A
 11%|█         | 82/756 [0

 11%|█         | 81/756 [00:02<00:20, 33.41it/s][A
 11%|█         | 85/756 [00:03<00:21, 31.52it/s][ALoss = 1.4395e-01, PNorm = 94.8560, GNorm = 0.2405, lr_0 = 1.6177e-04

 12%|█▏        | 90/756 [00:03<00:20, 31.90it/s][A
 13%|█▎        | 95/756 [00:03<00:19, 34.53it/s][ALoss = 1.2881e-01, PNorm = 94.8598, GNorm = 0.3258, lr_0 = 1.6159e-04

 13%|█▎        | 99/756 [00:03<00:19, 33.11it/s][A
 14%|█▎        | 103/756 [00:03<00:18, 34.55it/s][A
 14%|█▍        | 107/756 [00:03<00:20, 31.64it/s][ALoss = 1.2384e-01, PNorm = 94.8658, GNorm = 0.4112, lr_0 = 1.6142e-04

 15%|█▍        | 111/756 [00:03<00:19, 33.56it/s][A
 15%|█▌        | 115/756 [00:03<00:20, 30.86it/s][ALoss = 8.8365e-02, PNorm = 94.8770, GNorm = 0.3296, lr_0 = 1.6124e-04

 16%|█▌        | 119/756 [00:04<00:19, 32.85it/s][A
 16%|█▋        | 123/756 [00:04<00:19, 32.09it/s][A
 17%|█▋        | 128/756 [00:04<00:17, 34.89it/s][ALoss = 1.2660e-01, PNorm = 94.8812, GNorm = 0.2752, lr_0 = 1.6106e-04

 17%|█▋        | 13


 68%|██████▊   | 511/756 [00:15<00:06, 36.25it/s][A
 68%|██████▊   | 515/756 [00:15<00:06, 34.92it/s][ALoss = 9.7675e-02, PNorm = 95.1041, GNorm = 0.2910, lr_0 = 1.5437e-04

 69%|██████▉   | 520/756 [00:15<00:06, 36.89it/s][A
 69%|██████▉   | 524/756 [00:16<00:06, 35.23it/s][ALoss = 1.3757e-01, PNorm = 95.1071, GNorm = 0.5159, lr_0 = 1.5420e-04

 70%|██████▉   | 529/756 [00:16<00:06, 37.01it/s][A
 71%|███████   | 533/756 [00:16<00:06, 35.32it/s][A
 71%|███████   | 538/756 [00:16<00:06, 34.63it/s][ALoss = 7.7593e-02, PNorm = 95.1133, GNorm = 0.1293, lr_0 = 1.5403e-04

 72%|███████▏  | 543/756 [00:16<00:05, 36.48it/s][A
 72%|███████▏  | 547/756 [00:16<00:05, 34.84it/s][ALoss = 1.0834e-01, PNorm = 95.1201, GNorm = 0.1143, lr_0 = 1.5386e-04

 73%|███████▎  | 552/756 [00:16<00:05, 36.61it/s][A
 74%|███████▎  | 556/756 [00:16<00:05, 35.12it/s][ALoss = 1.0655e-01, PNorm = 95.1253, GNorm = 0.2809, lr_0 = 1.5370e-04

 74%|███████▍  | 561/756 [00:17<00:05, 36.96it/s][A
 75%|███████▍


 71%|███████   | 537/756 [00:17<00:07, 27.88it/s][A
 72%|███████▏  | 541/756 [00:17<00:07, 29.84it/s][ALoss = 1.1325e-01, PNorm = 95.5024, GNorm = 0.4636, lr_0 = 1.4178e-04

 72%|███████▏  | 545/756 [00:18<00:07, 27.62it/s][A
 73%|███████▎  | 549/756 [00:18<00:06, 29.77it/s][A
 73%|███████▎  | 553/756 [00:18<00:07, 27.96it/s][ALoss = 1.3170e-01, PNorm = 95.5078, GNorm = 0.3526, lr_0 = 1.4162e-04

 74%|███████▎  | 557/756 [00:18<00:06, 29.43it/s][A
 74%|███████▍  | 561/756 [00:18<00:06, 28.44it/s][ALoss = 1.3376e-01, PNorm = 95.5154, GNorm = 0.6594, lr_0 = 1.4147e-04

 75%|███████▍  | 565/756 [00:18<00:06, 30.33it/s][A
 75%|███████▌  | 569/756 [00:18<00:06, 30.48it/s][A
 76%|███████▌  | 573/756 [00:18<00:05, 32.03it/s][ALoss = 1.2704e-01, PNorm = 95.5200, GNorm = 0.2975, lr_0 = 1.4132e-04

 76%|███████▋  | 577/756 [00:19<00:05, 30.90it/s][A
 77%|███████▋  | 581/756 [00:19<00:05, 32.42it/s][ALoss = 9.7431e-02, PNorm = 95.5247, GNorm = 0.2334, lr_0 = 1.4116e-04

 77%|███████▋


 19%|█▊        | 141/756 [00:04<00:17, 34.68it/s][A
 19%|█▉        | 145/756 [00:04<00:17, 35.05it/s][ALoss = 1.2184e-01, PNorm = 95.6919, GNorm = 0.1976, lr_0 = 1.3631e-04

 20%|█▉        | 149/756 [00:04<00:17, 34.61it/s][A
 20%|██        | 153/756 [00:04<00:17, 35.19it/s][A
 21%|██        | 157/756 [00:04<00:16, 35.50it/s][ALoss = 1.1132e-01, PNorm = 95.6944, GNorm = 0.3040, lr_0 = 1.3616e-04

 21%|██▏       | 161/756 [00:05<00:16, 35.27it/s][A
 22%|██▏       | 165/756 [00:05<00:17, 34.70it/s][ALoss = 1.3908e-01, PNorm = 95.7019, GNorm = 0.5533, lr_0 = 1.3601e-04

 22%|██▏       | 169/756 [00:05<00:17, 34.40it/s][A
 23%|██▎       | 173/756 [00:05<00:17, 34.05it/s][A
 23%|██▎       | 177/756 [00:05<00:16, 34.47it/s][ALoss = 1.3631e-01, PNorm = 95.7038, GNorm = 0.3815, lr_0 = 1.3587e-04

 24%|██▍       | 181/756 [00:05<00:16, 34.18it/s][A
 24%|██▍       | 185/756 [00:05<00:16, 34.67it/s][ALoss = 1.2041e-01, PNorm = 95.7056, GNorm = 0.2837, lr_0 = 1.3572e-04

 25%|██▌     

 19%|█▉        | 143/756 [00:04<00:17, 35.24it/s][A
 19%|█▉        | 147/756 [00:04<00:17, 34.18it/s][A
 20%|█▉        | 151/756 [00:04<00:17, 35.13it/s][ALoss = 1.1179e-01, PNorm = 96.0337, GNorm = 0.3099, lr_0 = 1.2548e-04

 21%|██        | 155/756 [00:04<00:17, 33.98it/s][A
 21%|██        | 159/756 [00:05<00:16, 35.22it/s][ALoss = 1.0141e-01, PNorm = 96.0399, GNorm = 0.2873, lr_0 = 1.2535e-04

 22%|██▏       | 163/756 [00:05<00:17, 34.11it/s][A
 22%|██▏       | 167/756 [00:05<00:16, 35.33it/s][A
 23%|██▎       | 171/756 [00:05<00:17, 34.02it/s][ALoss = 1.1923e-01, PNorm = 96.0458, GNorm = 0.5660, lr_0 = 1.2521e-04

 23%|██▎       | 175/756 [00:05<00:16, 35.12it/s][A
 24%|██▎       | 179/756 [00:05<00:16, 34.08it/s][ALoss = 1.2012e-01, PNorm = 96.0498, GNorm = 0.4026, lr_0 = 1.2507e-04

 24%|██▍       | 183/756 [00:05<00:16, 35.00it/s][A
 25%|██▍       | 187/756 [00:05<00:16, 33.99it/s][A
 25%|██▌       | 191/756 [00:06<00:16, 35.23it/s][ALoss = 1.0805e-01, PNorm = 96.05

 21%|██        | 158/756 [00:04<00:16, 35.86it/s][A
 21%|██▏       | 162/756 [00:04<00:16, 35.69it/s][A
 22%|██▏       | 166/756 [00:04<00:16, 36.39it/s][ALoss = 1.1639e-01, PNorm = 96.3237, GNorm = 0.3001, lr_0 = 1.1537e-04

 22%|██▏       | 170/756 [00:05<00:16, 35.82it/s][A
 23%|██▎       | 174/756 [00:05<00:15, 36.52it/s][ALoss = 1.3728e-01, PNorm = 96.3298, GNorm = 0.4598, lr_0 = 1.1525e-04

 24%|██▎       | 178/756 [00:05<00:18, 31.99it/s][A
 24%|██▍       | 182/756 [00:05<00:18, 31.18it/s][A
 25%|██▍       | 186/756 [00:05<00:20, 27.64it/s][ALoss = 1.2789e-01, PNorm = 96.3345, GNorm = 0.6901, lr_0 = 1.1512e-04

 25%|██▌       | 190/756 [00:05<00:18, 29.81it/s][A
 26%|██▌       | 194/756 [00:05<00:18, 29.90it/s][ALoss = 9.7372e-02, PNorm = 96.3399, GNorm = 0.3838, lr_0 = 1.1500e-04

 26%|██▋       | 199/756 [00:05<00:16, 33.10it/s][A
 27%|██▋       | 203/756 [00:06<00:16, 34.01it/s][A
 27%|██▋       | 207/756 [00:06<00:15, 35.46it/s][ALoss = 1.1022e-01, PNorm = 96.34

 23%|██▎       | 177/756 [00:05<00:21, 27.41it/s][A
 24%|██▍       | 181/756 [00:06<00:19, 29.78it/s][ALoss = 9.1712e-02, PNorm = 96.5991, GNorm = 0.3741, lr_0 = 1.0609e-04

 24%|██▍       | 185/756 [00:06<00:19, 28.63it/s][A
 25%|██▌       | 190/756 [00:06<00:17, 31.85it/s][ALoss = 1.0537e-01, PNorm = 96.6035, GNorm = 0.4690, lr_0 = 1.0598e-04

 26%|██▌       | 194/756 [00:06<00:18, 30.39it/s][A
 26%|██▌       | 198/756 [00:06<00:17, 32.52it/s][ALoss = 1.0420e-01, PNorm = 96.6094, GNorm = 0.4770, lr_0 = 1.0586e-04

 27%|██▋       | 202/756 [00:06<00:18, 30.06it/s][A
 27%|██▋       | 207/756 [00:06<00:16, 32.98it/s][A
 28%|██▊       | 211/756 [00:07<00:17, 31.07it/s][ALoss = 1.1417e-01, PNorm = 96.6133, GNorm = 0.2333, lr_0 = 1.0575e-04

 28%|██▊       | 215/756 [00:07<00:16, 32.93it/s][A
 29%|██▉       | 219/756 [00:07<00:18, 29.24it/s][ALoss = 1.2577e-01, PNorm = 96.6148, GNorm = 0.5884, lr_0 = 1.0563e-04

 30%|██▉       | 224/756 [00:07<00:16, 32.29it/s][A
 30%|███      

 21%|██        | 157/756 [00:04<00:17, 34.64it/s][ALoss = 2.2466e-01, PNorm = 50.5361, GNorm = 0.2322, lr_0 = 1.9596e-04

 21%|██▏       | 161/756 [00:05<00:17, 34.65it/s][A
 22%|██▏       | 165/756 [00:05<00:16, 34.81it/s][A
 22%|██▏       | 169/756 [00:05<00:16, 34.97it/s][ALoss = 1.9619e-01, PNorm = 50.5455, GNorm = 0.2630, lr_0 = 2.0192e-04

 23%|██▎       | 173/756 [00:05<00:16, 34.72it/s][A
 23%|██▎       | 177/756 [00:05<00:16, 34.98it/s][ALoss = 1.4687e-01, PNorm = 50.5510, GNorm = 0.2360, lr_0 = 2.0788e-04

 24%|██▍       | 181/756 [00:05<00:16, 35.40it/s][A
 24%|██▍       | 185/756 [00:05<00:15, 36.00it/s][A
 25%|██▌       | 189/756 [00:05<00:15, 36.12it/s][ALoss = 2.0553e-01, PNorm = 50.5505, GNorm = 0.4038, lr_0 = 2.1384e-04

 26%|██▌       | 193/756 [00:05<00:15, 35.81it/s][A
 26%|██▌       | 197/756 [00:06<00:15, 35.96it/s][ALoss = 1.8313e-01, PNorm = 50.5646, GNorm = 0.2325, lr_0 = 2.1980e-04

 27%|██▋       | 201/756 [00:06<00:15, 35.77it/s][A
 27%|██▋      


 74%|███████▍  | 561/756 [00:16<00:05, 35.75it/s][A
 75%|███████▍  | 565/756 [00:16<00:05, 35.10it/s][A
 75%|███████▌  | 569/756 [00:16<00:05, 34.39it/s][ALoss = 1.6272e-01, PNorm = 50.8520, GNorm = 0.1104, lr_0 = 4.4033e-04

 76%|███████▌  | 573/756 [00:16<00:05, 34.25it/s][A
 76%|███████▋  | 577/756 [00:17<00:05, 34.24it/s][ALoss = 1.6650e-01, PNorm = 50.8580, GNorm = 0.1011, lr_0 = 4.4629e-04

 77%|███████▋  | 581/756 [00:17<00:05, 34.18it/s][A
 77%|███████▋  | 585/756 [00:17<00:04, 34.41it/s][A
 78%|███████▊  | 589/756 [00:17<00:04, 34.56it/s][ALoss = 1.3505e-01, PNorm = 50.8787, GNorm = 0.1728, lr_0 = 4.5225e-04

 78%|███████▊  | 593/756 [00:17<00:04, 34.49it/s][A
 79%|███████▉  | 597/756 [00:17<00:04, 34.72it/s][ALoss = 1.7012e-01, PNorm = 50.8906, GNorm = 0.3925, lr_0 = 4.5821e-04

 79%|███████▉  | 601/756 [00:17<00:04, 34.61it/s][A
 80%|████████  | 605/756 [00:17<00:04, 34.65it/s][A
 81%|████████  | 609/756 [00:17<00:04, 34.72it/s][ALoss = 1.8189e-01, PNorm = 50.8


 22%|██▏       | 167/756 [00:05<00:17, 33.11it/s][A
 23%|██▎       | 171/756 [00:05<00:18, 31.81it/s][ALoss = 1.4871e-01, PNorm = 51.3778, GNorm = 0.1002, lr_0 = 6.5550e-04

 23%|██▎       | 175/756 [00:05<00:17, 33.16it/s][A
 24%|██▎       | 179/756 [00:06<00:22, 25.47it/s][A
 24%|██▍       | 183/756 [00:06<00:20, 28.25it/s][ALoss = 1.1677e-01, PNorm = 51.4163, GNorm = 0.0916, lr_0 = 6.6146e-04

 25%|██▍       | 187/756 [00:06<00:20, 28.13it/s][A
 25%|██▌       | 191/756 [00:06<00:18, 30.39it/s][ALoss = 1.7275e-01, PNorm = 51.4197, GNorm = 0.1182, lr_0 = 6.6742e-04

 26%|██▌       | 195/756 [00:06<00:18, 29.97it/s][A
 26%|██▋       | 199/756 [00:06<00:17, 32.17it/s][A
 27%|██▋       | 203/756 [00:06<00:17, 31.15it/s][ALoss = 1.4413e-01, PNorm = 51.4825, GNorm = 0.1113, lr_0 = 6.7338e-04

 27%|██▋       | 207/756 [00:06<00:16, 32.67it/s][A
 28%|██▊       | 211/756 [00:06<00:17, 31.50it/s][ALoss = 1.9793e-01, PNorm = 51.5168, GNorm = 0.2043, lr_0 = 6.7934e-04

 28%|██▊     


 76%|███████▋  | 578/756 [00:18<00:05, 30.21it/s][A
 77%|███████▋  | 583/756 [00:18<00:05, 32.94it/s][ALoss = 1.5562e-01, PNorm = 52.4712, GNorm = 0.0837, lr_0 = 8.9987e-04

 78%|███████▊  | 587/756 [00:18<00:05, 30.88it/s][A
 78%|███████▊  | 591/756 [00:18<00:05, 32.94it/s][ALoss = 1.4514e-01, PNorm = 52.5001, GNorm = 0.1510, lr_0 = 9.0583e-04

 79%|███████▊  | 595/756 [00:19<00:05, 32.03it/s][A
 79%|███████▉  | 600/756 [00:19<00:04, 34.47it/s][A
 80%|███████▉  | 604/756 [00:19<00:04, 32.67it/s][ALoss = 1.2681e-01, PNorm = 52.5221, GNorm = 0.1785, lr_0 = 9.1179e-04

 81%|████████  | 609/756 [00:19<00:04, 35.09it/s][A
 81%|████████  | 613/756 [00:19<00:04, 33.70it/s][ALoss = 1.4444e-01, PNorm = 52.5460, GNorm = 0.1356, lr_0 = 9.1775e-04

 82%|████████▏ | 618/756 [00:19<00:04, 33.09it/s][A
 82%|████████▏ | 623/756 [00:19<00:03, 35.13it/s][ALoss = 1.5661e-01, PNorm = 52.5811, GNorm = 0.1199, lr_0 = 9.2371e-04

 83%|████████▎ | 627/756 [00:20<00:03, 33.76it/s][A
 83%|████████


 78%|███████▊  | 590/756 [00:17<00:04, 33.90it/s][A
 79%|███████▊  | 594/756 [00:17<00:04, 34.99it/s][A
 79%|███████▉  | 598/756 [00:17<00:04, 34.45it/s][ALoss = 1.1658e-01, PNorm = 55.5714, GNorm = 0.0839, lr_0 = 9.3643e-04

 80%|███████▉  | 602/756 [00:17<00:04, 35.13it/s][A
 80%|████████  | 606/756 [00:17<00:04, 34.54it/s][ALoss = 1.2255e-01, PNorm = 55.6014, GNorm = 0.1281, lr_0 = 9.3541e-04

 81%|████████  | 610/756 [00:18<00:04, 35.20it/s][A
 81%|████████  | 614/756 [00:18<00:04, 35.10it/s][A
 82%|████████▏ | 618/756 [00:18<00:03, 36.28it/s][ALoss = 1.4780e-01, PNorm = 55.6288, GNorm = 0.1265, lr_0 = 9.3439e-04

 82%|████████▏ | 622/756 [00:18<00:03, 35.25it/s][A
 83%|████████▎ | 626/756 [00:18<00:03, 35.92it/s][ALoss = 1.4454e-01, PNorm = 55.6748, GNorm = 0.1318, lr_0 = 9.3338e-04

 83%|████████▎ | 630/756 [00:18<00:03, 34.60it/s][A
 84%|████████▍ | 634/756 [00:18<00:03, 35.25it/s][A
 84%|████████▍ | 638/756 [00:18<00:03, 34.11it/s][ALoss = 1.5152e-01, PNorm = 55.6

 80%|███████▉  | 602/756 [00:17<00:04, 35.54it/s][ALoss = 1.3691e-01, PNorm = 59.0277, GNorm = 0.1432, lr_0 = 8.6204e-04

 80%|████████  | 606/756 [00:17<00:04, 35.16it/s][A
 81%|████████  | 610/756 [00:17<00:04, 35.92it/s][ALoss = 1.2173e-01, PNorm = 59.0599, GNorm = 0.0757, lr_0 = 8.6110e-04

 81%|████████  | 614/756 [00:17<00:04, 34.93it/s][A
 82%|████████▏ | 618/756 [00:17<00:03, 35.48it/s][A
 82%|████████▏ | 622/756 [00:17<00:03, 34.58it/s][ALoss = 1.4254e-01, PNorm = 59.1003, GNorm = 0.1358, lr_0 = 8.6016e-04

 83%|████████▎ | 626/756 [00:17<00:03, 35.43it/s][A
 83%|████████▎ | 630/756 [00:17<00:03, 34.96it/s][ALoss = 1.6338e-01, PNorm = 59.1338, GNorm = 0.1851, lr_0 = 8.5922e-04

 84%|████████▍ | 634/756 [00:18<00:03, 35.63it/s][A
 84%|████████▍ | 638/756 [00:18<00:03, 34.93it/s][A
 85%|████████▍ | 642/756 [00:18<00:03, 35.81it/s][ALoss = 1.7835e-01, PNorm = 59.1735, GNorm = 0.1515, lr_0 = 8.5829e-04

 85%|████████▌ | 646/756 [00:18<00:03, 35.03it/s][A
 86%|████████▌


 81%|████████  | 609/756 [00:18<00:04, 33.18it/s][A
 81%|████████  | 613/756 [00:18<00:04, 34.06it/s][A
 82%|████████▏ | 617/756 [00:19<00:04, 33.16it/s][ALoss = 1.4488e-01, PNorm = 62.8363, GNorm = 0.1392, lr_0 = 7.9260e-04

 82%|████████▏ | 621/756 [00:19<00:03, 34.12it/s][A
 83%|████████▎ | 625/756 [00:19<00:03, 33.67it/s][ALoss = 2.0504e-01, PNorm = 62.8720, GNorm = 0.1073, lr_0 = 7.9174e-04

 83%|████████▎ | 629/756 [00:19<00:03, 34.38it/s][A
 84%|████████▎ | 633/756 [00:19<00:03, 33.85it/s][A
 84%|████████▍ | 637/756 [00:19<00:03, 34.24it/s][ALoss = 1.5671e-01, PNorm = 62.9276, GNorm = 0.0995, lr_0 = 7.9088e-04

 85%|████████▍ | 641/756 [00:19<00:03, 33.32it/s][A
 85%|████████▌ | 645/756 [00:19<00:03, 34.26it/s][ALoss = 1.4899e-01, PNorm = 62.9617, GNorm = 0.1408, lr_0 = 7.9001e-04

 86%|████████▌ | 649/756 [00:20<00:03, 33.72it/s][A
 86%|████████▋ | 653/756 [00:20<00:02, 34.42it/s][A
 87%|████████▋ | 657/756 [00:20<00:02, 33.49it/s][ALoss = 1.4728e-01, PNorm = 63.0


 28%|██▊       | 215/756 [00:06<00:14, 36.41it/s][A
 29%|██▉       | 219/756 [00:06<00:15, 35.55it/s][ALoss = 1.7569e-01, PNorm = 64.3339, GNorm = 0.1501, lr_0 = 7.6212e-04

 29%|██▉       | 223/756 [00:06<00:14, 36.07it/s][A
 30%|███       | 227/756 [00:06<00:14, 35.55it/s][A
 31%|███       | 231/756 [00:07<00:14, 36.39it/s][ALoss = 1.6393e-01, PNorm = 64.3784, GNorm = 0.3517, lr_0 = 7.6129e-04

 31%|███       | 235/756 [00:07<00:14, 35.76it/s][A
 32%|███▏      | 239/756 [00:07<00:14, 36.56it/s][ALoss = 1.4814e-01, PNorm = 64.4231, GNorm = 0.1179, lr_0 = 7.6046e-04

 32%|███▏      | 243/756 [00:07<00:14, 35.46it/s][A
 33%|███▎      | 247/756 [00:07<00:14, 36.33it/s][A
 33%|███▎      | 251/756 [00:07<00:14, 35.50it/s][ALoss = 1.4308e-01, PNorm = 64.4901, GNorm = 0.1240, lr_0 = 7.5964e-04

 34%|███▎      | 255/756 [00:07<00:13, 36.05it/s][A
 34%|███▍      | 259/756 [00:07<00:14, 35.33it/s][ALoss = 1.4621e-01, PNorm = 64.5545, GNorm = 0.1209, lr_0 = 7.5881e-04

 35%|███▍    


 32%|███▏      | 239/756 [00:07<00:16, 31.38it/s][A
 32%|███▏      | 243/756 [00:08<00:19, 26.36it/s][ALoss = 1.7185e-01, PNorm = 67.6004, GNorm = 0.2786, lr_0 = 6.9997e-04

 33%|███▎      | 248/756 [00:08<00:16, 30.27it/s][A
 33%|███▎      | 252/756 [00:08<00:17, 29.07it/s][A
 34%|███▍      | 257/756 [00:08<00:15, 32.21it/s][ALoss = 1.7907e-01, PNorm = 67.6380, GNorm = 0.1429, lr_0 = 6.9921e-04

 35%|███▍      | 261/756 [00:08<00:16, 29.12it/s][A
 35%|███▌      | 266/756 [00:08<00:17, 27.88it/s][ALoss = 1.3144e-01, PNorm = 67.7114, GNorm = 0.1144, lr_0 = 6.9845e-04

 36%|███▌      | 270/756 [00:08<00:16, 30.37it/s][A
 36%|███▌      | 274/756 [00:09<00:17, 27.46it/s][ALoss = 2.0095e-01, PNorm = 67.7566, GNorm = 0.1371, lr_0 = 6.9769e-04

 37%|███▋      | 278/756 [00:09<00:15, 29.89it/s][A
 37%|███▋      | 282/756 [00:09<00:16, 28.44it/s][A
 38%|███▊      | 287/756 [00:09<00:14, 31.59it/s][ALoss = 1.1571e-01, PNorm = 67.8301, GNorm = 0.1938, lr_0 = 6.9693e-04

 38%|███▊    


 88%|████████▊ | 669/756 [00:21<00:02, 33.53it/s][A
 89%|████████▉ | 674/756 [00:21<00:02, 33.20it/s][ALoss = 1.5656e-01, PNorm = 69.3728, GNorm = 0.1733, lr_0 = 6.6794e-04

 90%|████████▉ | 679/756 [00:21<00:02, 35.38it/s][A
 90%|█████████ | 683/756 [00:21<00:02, 33.94it/s][ALoss = 1.2706e-01, PNorm = 69.4414, GNorm = 0.1324, lr_0 = 6.6722e-04

 91%|█████████ | 688/756 [00:21<00:01, 35.88it/s][A
 92%|█████████▏| 692/756 [00:21<00:01, 34.53it/s][A
 92%|█████████▏| 697/756 [00:21<00:01, 36.75it/s][ALoss = 1.2154e-01, PNorm = 69.4721, GNorm = 0.0896, lr_0 = 6.6649e-04

 93%|█████████▎| 701/756 [00:22<00:01, 30.15it/s][A
 93%|█████████▎| 706/756 [00:22<00:01, 31.20it/s][ALoss = 1.5913e-01, PNorm = 69.4893, GNorm = 0.0961, lr_0 = 6.6576e-04

 94%|█████████▍| 711/756 [00:22<00:01, 33.60it/s][A
 95%|█████████▍| 715/756 [00:22<00:01, 32.82it/s][ALoss = 1.4461e-01, PNorm = 69.5214, GNorm = 0.1123, lr_0 = 6.6504e-04

 95%|█████████▌| 720/756 [00:22<00:01, 35.35it/s][A
 96%|████████

 91%|█████████ | 686/756 [00:19<00:02, 34.72it/s][A
 91%|█████████▏| 690/756 [00:20<00:01, 34.69it/s][ALoss = 1.4183e-01, PNorm = 72.3895, GNorm = 0.1341, lr_0 = 6.1421e-04

 92%|█████████▏| 694/756 [00:20<00:01, 34.62it/s][A
 92%|█████████▏| 698/756 [00:20<00:01, 34.48it/s][ALoss = 1.4819e-01, PNorm = 72.4282, GNorm = 0.1592, lr_0 = 6.1354e-04

 93%|█████████▎| 702/756 [00:20<00:01, 34.54it/s][A
 93%|█████████▎| 706/756 [00:20<00:01, 34.50it/s][A
 94%|█████████▍| 710/756 [00:20<00:01, 35.03it/s][ALoss = 1.3595e-01, PNorm = 72.4634, GNorm = 0.2197, lr_0 = 6.1287e-04

 94%|█████████▍| 714/756 [00:20<00:01, 34.81it/s][A
 95%|█████████▍| 718/756 [00:20<00:01, 34.93it/s][ALoss = 1.1429e-01, PNorm = 72.4959, GNorm = 0.1088, lr_0 = 6.1221e-04

 96%|█████████▌| 722/756 [00:20<00:00, 34.77it/s][A
 96%|█████████▌| 726/756 [00:21<00:00, 35.04it/s][A
 97%|█████████▋| 730/756 [00:21<00:00, 34.93it/s][ALoss = 1.4173e-01, PNorm = 72.5181, GNorm = 0.1892, lr_0 = 6.1154e-04

 97%|█████████

 92%|█████████▏| 694/756 [00:20<00:01, 35.43it/s][ALoss = 1.6675e-01, PNorm = 75.0138, GNorm = 0.3430, lr_0 = 5.6535e-04

 92%|█████████▏| 698/756 [00:20<00:01, 35.32it/s][A
 93%|█████████▎| 702/756 [00:21<00:01, 35.71it/s][A
 93%|█████████▎| 706/756 [00:21<00:01, 32.76it/s][ALoss = 1.6524e-01, PNorm = 75.0439, GNorm = 0.1524, lr_0 = 5.6474e-04

 94%|█████████▍| 710/756 [00:21<00:01, 34.40it/s][A
 94%|█████████▍| 714/756 [00:21<00:01, 31.88it/s][ALoss = 1.6492e-01, PNorm = 75.0856, GNorm = 0.1456, lr_0 = 5.6412e-04

 95%|█████████▍| 718/756 [00:21<00:01, 30.34it/s][A
 96%|█████████▌| 722/756 [00:21<00:01, 26.80it/s][A
 96%|█████████▌| 726/756 [00:21<00:01, 29.28it/s][ALoss = 1.3168e-01, PNorm = 75.1248, GNorm = 0.1535, lr_0 = 5.6351e-04

 97%|█████████▋| 730/756 [00:21<00:00, 31.02it/s][A
 97%|█████████▋| 734/756 [00:22<00:00, 32.18it/s][ALoss = 1.2475e-01, PNorm = 75.1550, GNorm = 0.0954, lr_0 = 5.6289e-04

 98%|█████████▊| 738/756 [00:22<00:00, 33.26it/s][A
 98%|█████████


 40%|███▉      | 301/756 [00:08<00:13, 34.50it/s][A
 40%|████      | 305/756 [00:09<00:12, 35.25it/s][A
 41%|████      | 309/756 [00:09<00:12, 34.40it/s][ALoss = 1.2324e-01, PNorm = 76.2267, GNorm = 0.1796, lr_0 = 5.4302e-04

 41%|████▏     | 313/756 [00:09<00:12, 34.94it/s][A
 42%|████▏     | 317/756 [00:09<00:12, 34.19it/s][ALoss = 1.4756e-01, PNorm = 76.2381, GNorm = 0.1914, lr_0 = 5.4243e-04

 42%|████▏     | 321/756 [00:09<00:12, 34.89it/s][A
 43%|████▎     | 325/756 [00:09<00:12, 34.33it/s][A
 44%|████▎     | 329/756 [00:09<00:12, 35.13it/s][ALoss = 1.6200e-01, PNorm = 76.2660, GNorm = 0.1498, lr_0 = 5.4184e-04

 44%|████▍     | 333/756 [00:09<00:12, 34.38it/s][A
 45%|████▍     | 337/756 [00:09<00:11, 35.35it/s][ALoss = 1.6565e-01, PNorm = 76.3011, GNorm = 0.2177, lr_0 = 5.4125e-04

 45%|████▌     | 341/756 [00:10<00:12, 34.35it/s][A
 46%|████▌     | 345/756 [00:10<00:11, 34.97it/s][A
 46%|████▌     | 349/756 [00:10<00:11, 34.10it/s][ALoss = 1.4177e-01, PNorm = 76.3

 41%|████▏     | 313/756 [00:09<00:12, 36.18it/s][ALoss = 1.1157e-01, PNorm = 78.6027, GNorm = 0.1911, lr_0 = 4.9982e-04

 42%|████▏     | 317/756 [00:09<00:12, 35.39it/s][A
 42%|████▏     | 321/756 [00:09<00:12, 35.77it/s][A
 43%|████▎     | 325/756 [00:09<00:12, 34.92it/s][ALoss = 1.4302e-01, PNorm = 78.6305, GNorm = 0.2764, lr_0 = 4.9928e-04

 44%|████▎     | 329/756 [00:09<00:12, 35.29it/s][A
 44%|████▍     | 333/756 [00:10<00:12, 34.98it/s][ALoss = 1.3863e-01, PNorm = 78.6569, GNorm = 0.2268, lr_0 = 4.9874e-04

 45%|████▍     | 337/756 [00:10<00:12, 34.89it/s][A
 45%|████▌     | 341/756 [00:10<00:12, 34.24it/s][A
 46%|████▌     | 345/756 [00:10<00:11, 34.98it/s][ALoss = 1.2958e-01, PNorm = 78.6939, GNorm = 0.2493, lr_0 = 4.9819e-04

 46%|████▌     | 349/756 [00:10<00:11, 34.59it/s][A
 47%|████▋     | 353/756 [00:10<00:11, 35.40it/s][ALoss = 1.1859e-01, PNorm = 78.7422, GNorm = 0.1119, lr_0 = 4.9765e-04

 47%|████▋     | 357/756 [00:10<00:11, 35.55it/s][A
 48%|████▊    


 95%|█████████▍| 717/756 [00:21<00:01, 33.96it/s][A
 95%|█████████▌| 721/756 [00:21<00:01, 34.24it/s][A
 96%|█████████▌| 725/756 [00:21<00:00, 33.90it/s][ALoss = 1.4308e-01, PNorm = 79.8486, GNorm = 0.2524, lr_0 = 4.7799e-04

 96%|█████████▋| 729/756 [00:21<00:00, 34.06it/s][A
 97%|█████████▋| 733/756 [00:21<00:00, 33.80it/s][ALoss = 1.8580e-01, PNorm = 79.8827, GNorm = 0.2479, lr_0 = 4.7747e-04

 97%|█████████▋| 737/756 [00:22<00:00, 34.25it/s][A
 98%|█████████▊| 741/756 [00:22<00:00, 34.02it/s][A
 99%|█████████▊| 745/756 [00:22<00:00, 34.99it/s][ALoss = 1.4242e-01, PNorm = 79.9318, GNorm = 0.1983, lr_0 = 4.7695e-04

 99%|█████████▉| 749/756 [00:22<00:00, 34.80it/s][A
100%|█████████▉| 753/756 [00:22<00:00, 35.71it/s][ALoss = 1.1421e-01, PNorm = 79.9840, GNorm = 0.2022, lr_0 = 4.7644e-04

                                                 [A
  0%|          | 0/95 [00:00<?, ?it/s][A
  1%|          | 1/95 [00:00<00:33,  2.79it/s][A
  6%|▋         | 6/95 [00:00<00:05, 15.92it/


 96%|█████████▌| 723/756 [00:21<00:00, 34.64it/s][A
 96%|█████████▌| 727/756 [00:21<00:00, 34.16it/s][ALoss = 1.4630e-01, PNorm = 82.0591, GNorm = 0.1991, lr_0 = 4.4002e-04

 97%|█████████▋| 731/756 [00:21<00:00, 34.56it/s][A
 97%|█████████▋| 735/756 [00:21<00:00, 33.92it/s][A
 98%|█████████▊| 739/756 [00:21<00:00, 34.60it/s][ALoss = 1.3958e-01, PNorm = 82.0821, GNorm = 0.1752, lr_0 = 4.3954e-04

 98%|█████████▊| 743/756 [00:21<00:00, 33.78it/s][A
 99%|█████████▉| 747/756 [00:21<00:00, 34.69it/s][ALoss = 1.2410e-01, PNorm = 82.1122, GNorm = 0.2126, lr_0 = 4.3906e-04

 99%|█████████▉| 751/756 [00:22<00:00, 34.41it/s][A
100%|█████████▉| 755/756 [00:22<00:00, 35.37it/s][A
                                                 [A
  0%|          | 0/95 [00:00<?, ?it/s][A
  1%|          | 1/95 [00:00<00:31,  2.96it/s][A
  7%|▋         | 7/95 [00:00<00:04, 19.80it/s][A
 14%|█▎        | 13/95 [00:00<00:02, 31.39it/s][A
 21%|██        | 20/95 [00:00<00:01, 41.06it/s][A
 28%|██▊       


 43%|████▎     | 325/756 [00:10<00:14, 30.68it/s][A
 44%|████▎     | 329/756 [00:10<00:14, 28.93it/s][A
 44%|████▍     | 333/756 [00:10<00:13, 30.62it/s][ALoss = 1.3125e-01, PNorm = 83.0818, GNorm = 0.3480, lr_0 = 4.2305e-04

 45%|████▍     | 337/756 [00:10<00:14, 29.55it/s][A
 45%|████▌     | 341/756 [00:10<00:13, 31.22it/s][ALoss = 1.3238e-01, PNorm = 83.0981, GNorm = 0.1856, lr_0 = 4.2259e-04

 46%|████▌     | 345/756 [00:11<00:13, 30.80it/s][A
 46%|████▌     | 349/756 [00:11<00:12, 32.30it/s][A
 47%|████▋     | 353/756 [00:11<00:13, 30.97it/s][ALoss = 1.1485e-01, PNorm = 83.1218, GNorm = 0.2452, lr_0 = 4.2213e-04

 47%|████▋     | 357/756 [00:11<00:12, 32.28it/s][A
 48%|████▊     | 361/756 [00:11<00:12, 31.45it/s][ALoss = 1.7189e-01, PNorm = 83.1274, GNorm = 0.2602, lr_0 = 4.2167e-04

 48%|████▊     | 365/756 [00:11<00:11, 32.67it/s][A
 49%|████▉     | 369/756 [00:11<00:12, 31.19it/s][A
 49%|████▉     | 373/756 [00:11<00:11, 32.18it/s][ALoss = 1.1665e-01, PNorm = 83.1


 44%|████▎     | 330/756 [00:10<00:12, 34.16it/s][A
 44%|████▍     | 334/756 [00:10<00:12, 34.53it/s][A
 45%|████▍     | 338/756 [00:10<00:12, 33.53it/s][ALoss = 1.2080e-01, PNorm = 84.8568, GNorm = 0.1460, lr_0 = 3.8940e-04

 45%|████▌     | 342/756 [00:10<00:12, 34.27it/s][A
 46%|████▌     | 346/756 [00:10<00:12, 33.61it/s][ALoss = 1.1268e-01, PNorm = 84.8879, GNorm = 0.1377, lr_0 = 3.8898e-04

 46%|████▋     | 350/756 [00:10<00:11, 34.44it/s][A
 47%|████▋     | 354/756 [00:11<00:12, 33.33it/s][A
 47%|████▋     | 358/756 [00:11<00:11, 34.27it/s][ALoss = 1.9513e-01, PNorm = 84.8960, GNorm = 0.3034, lr_0 = 3.8855e-04

 48%|████▊     | 362/756 [00:11<00:11, 33.59it/s][A
 49%|████▊     | 367/756 [00:11<00:10, 35.91it/s][ALoss = 1.4037e-01, PNorm = 84.9283, GNorm = 0.3378, lr_0 = 3.8813e-04

 49%|████▉     | 371/756 [00:11<00:11, 34.82it/s][A
 50%|████▉     | 375/756 [00:11<00:10, 36.14it/s][A
 50%|█████     | 379/756 [00:11<00:10, 35.45it/s][ALoss = 1.4510e-01, PNorm = 84.9

 45%|████▍     | 337/756 [00:11<00:14, 29.09it/s][A
 45%|████▌     | 341/756 [00:11<00:13, 30.13it/s][ALoss = 1.3343e-01, PNorm = 86.4363, GNorm = 0.1667, lr_0 = 3.5846e-04

 46%|████▌     | 345/756 [00:11<00:13, 29.56it/s][A
 46%|████▌     | 349/756 [00:12<00:13, 30.37it/s][A
 47%|████▋     | 353/756 [00:12<00:14, 28.38it/s][ALoss = 1.6943e-01, PNorm = 86.4450, GNorm = 0.2624, lr_0 = 3.5807e-04

 47%|████▋     | 357/756 [00:12<00:13, 29.95it/s][A
 48%|████▊     | 361/756 [00:12<00:14, 28.15it/s][ALoss = 1.3101e-01, PNorm = 86.4685, GNorm = 0.1866, lr_0 = 3.5768e-04

 48%|████▊     | 365/756 [00:12<00:13, 29.61it/s][A
 49%|████▉     | 369/756 [00:12<00:13, 28.70it/s][A
 49%|████▉     | 373/756 [00:12<00:12, 29.95it/s][ALoss = 1.1042e-01, PNorm = 86.4936, GNorm = 0.2253, lr_0 = 3.5730e-04

 50%|████▉     | 377/756 [00:13<00:13, 28.83it/s][A
 50%|█████     | 381/756 [00:13<00:12, 30.27it/s][ALoss = 9.8935e-02, PNorm = 86.5138, GNorm = 0.2055, lr_0 = 3.5691e-04

 51%|█████    

 46%|████▌     | 344/756 [00:10<00:11, 35.70it/s][A
 46%|████▌     | 348/756 [00:10<00:11, 34.82it/s][ALoss = 1.4410e-01, PNorm = 87.8146, GNorm = 0.2874, lr_0 = 3.2995e-04

 47%|████▋     | 352/756 [00:10<00:11, 35.39it/s][A
 47%|████▋     | 356/756 [00:10<00:11, 34.86it/s][ALoss = 1.5292e-01, PNorm = 87.8344, GNorm = 0.2164, lr_0 = 3.2959e-04

 48%|████▊     | 360/756 [00:10<00:11, 35.65it/s][A
 48%|████▊     | 364/756 [00:10<00:11, 35.44it/s][A
 49%|████▊     | 368/756 [00:11<00:10, 36.41it/s][ALoss = 1.4112e-01, PNorm = 87.8615, GNorm = 0.2898, lr_0 = 3.2923e-04

 49%|████▉     | 372/756 [00:11<00:10, 35.78it/s][A
 50%|████▉     | 376/756 [00:11<00:10, 36.74it/s][ALoss = 1.2338e-01, PNorm = 87.8853, GNorm = 0.2976, lr_0 = 3.2887e-04

 50%|█████     | 380/756 [00:11<00:10, 35.14it/s][A
 51%|█████     | 384/756 [00:11<00:10, 35.63it/s][A
 51%|█████▏    | 388/756 [00:11<00:10, 34.78it/s][ALoss = 1.1911e-01, PNorm = 87.9085, GNorm = 0.2400, lr_0 = 3.2852e-04

 52%|█████▏   


 99%|█████████▉| 749/756 [00:21<00:00, 35.82it/s][A
100%|█████████▉| 754/756 [00:22<00:00, 37.03it/s][A
                                                 [A
  0%|          | 0/95 [00:00<?, ?it/s][A
  1%|          | 1/95 [00:00<00:40,  2.34it/s][A
  9%|▉         | 9/95 [00:00<00:04, 20.95it/s][A
 18%|█▊        | 17/95 [00:00<00:02, 34.72it/s][A
 26%|██▋       | 25/95 [00:00<00:01, 44.91it/s][A
 35%|███▍      | 33/95 [00:00<00:01, 52.38it/s][A
 43%|████▎     | 41/95 [00:01<00:00, 57.34it/s][A
 52%|█████▏    | 49/95 [00:01<00:00, 60.82it/s][A
 60%|██████    | 57/95 [00:01<00:00, 63.80it/s][A
 68%|██████▊   | 65/95 [00:01<00:00, 65.86it/s][A
 77%|███████▋  | 73/95 [00:01<00:00, 66.86it/s][A
 85%|████████▌ | 81/95 [00:01<00:00, 69.28it/s][A
 97%|█████████▋| 92/95 [00:01<00:00, 79.27it/s][A
                                               [AValidation auc = 0.778809
 53%|█████▎    | 16/30 [06:38<05:54, 25.30s/it]Epoch 16

  0%|          | 0/756 [00:00<?, ?it/s][A
  0%|       


 47%|████▋     | 356/756 [00:10<00:11, 34.40it/s][A
 48%|████▊     | 360/756 [00:10<00:12, 31.35it/s][ALoss = 1.2790e-01, PNorm = 89.1058, GNorm = 0.1811, lr_0 = 3.0341e-04

 48%|████▊     | 364/756 [00:10<00:14, 27.88it/s][A
 49%|████▊     | 368/756 [00:11<00:13, 29.46it/s][A
 49%|████▉     | 372/756 [00:11<00:12, 30.95it/s][ALoss = 1.8007e-01, PNorm = 89.1203, GNorm = 0.5493, lr_0 = 3.0308e-04

 50%|████▉     | 376/756 [00:11<00:12, 31.63it/s][A
 50%|█████     | 380/756 [00:11<00:11, 32.54it/s][ALoss = 1.2485e-01, PNorm = 89.1341, GNorm = 0.3004, lr_0 = 3.0275e-04

 51%|█████     | 384/756 [00:11<00:11, 32.88it/s][A
 51%|█████▏    | 388/756 [00:11<00:11, 33.32it/s][A
 52%|█████▏    | 392/756 [00:11<00:10, 33.52it/s][ALoss = 1.0553e-01, PNorm = 89.1572, GNorm = 0.3082, lr_0 = 3.0242e-04

 52%|█████▏    | 396/756 [00:11<00:10, 33.52it/s][A
 53%|█████▎    | 400/756 [00:12<00:10, 33.61it/s][ALoss = 1.7629e-01, PNorm = 89.1634, GNorm = 0.4635, lr_0 = 3.0209e-04

 53%|█████▎  

 48%|████▊     | 366/756 [00:10<00:11, 33.42it/s][ALoss = 9.2940e-02, PNorm = 90.1944, GNorm = 0.2663, lr_0 = 2.7927e-04

 49%|████▉     | 370/756 [00:11<00:11, 33.81it/s][A
 49%|████▉     | 374/756 [00:11<00:11, 33.55it/s][ALoss = 1.4080e-01, PNorm = 90.2017, GNorm = 0.1364, lr_0 = 2.7897e-04

 50%|█████     | 378/756 [00:11<00:12, 29.56it/s][A
 51%|█████     | 382/756 [00:11<00:12, 30.71it/s][A
 51%|█████     | 386/756 [00:11<00:11, 32.16it/s][ALoss = 1.0887e-01, PNorm = 90.2100, GNorm = 0.3455, lr_0 = 2.7866e-04

 52%|█████▏    | 390/756 [00:11<00:11, 32.42it/s][A
 52%|█████▏    | 394/756 [00:11<00:10, 33.60it/s][ALoss = 1.5045e-01, PNorm = 90.2181, GNorm = 0.2831, lr_0 = 2.7836e-04

 53%|█████▎    | 398/756 [00:11<00:10, 33.96it/s][A
 53%|█████▎    | 402/756 [00:12<00:10, 34.85it/s][A
 54%|█████▎    | 406/756 [00:12<00:10, 34.74it/s][ALoss = 1.4166e-01, PNorm = 90.2247, GNorm = 0.2348, lr_0 = 2.7806e-04

 54%|█████▍    | 410/756 [00:12<00:09, 35.50it/s][A
 55%|█████▍   

 48%|████▊     | 362/756 [00:12<00:13, 29.50it/s][A
 48%|████▊     | 366/756 [00:12<00:12, 30.85it/s][A
 49%|████▉     | 370/756 [00:12<00:13, 28.35it/s][ALoss = 9.5289e-02, PNorm = 91.1199, GNorm = 0.1194, lr_0 = 2.5708e-04

 49%|████▉     | 374/756 [00:12<00:12, 30.31it/s][A
 50%|█████     | 378/756 [00:12<00:13, 28.11it/s][ALoss = 1.3974e-01, PNorm = 91.1343, GNorm = 0.2059, lr_0 = 2.5680e-04

 51%|█████     | 382/756 [00:12<00:12, 30.13it/s][A
 51%|█████     | 386/756 [00:13<00:13, 27.99it/s][A
 52%|█████▏    | 390/756 [00:13<00:12, 30.48it/s][ALoss = 1.3151e-01, PNorm = 91.1406, GNorm = 0.4116, lr_0 = 2.5652e-04

 52%|█████▏    | 394/756 [00:13<00:12, 28.73it/s][A
 53%|█████▎    | 398/756 [00:13<00:11, 30.47it/s][ALoss = 1.1963e-01, PNorm = 91.1499, GNorm = 0.2721, lr_0 = 2.5625e-04

 53%|█████▎    | 402/756 [00:13<00:11, 30.32it/s][A
 54%|█████▎    | 406/756 [00:13<00:11, 31.62it/s][A
 54%|█████▍    | 410/756 [00:13<00:11, 31.40it/s][ALoss = 1.4465e-01, PNorm = 91.15

 49%|████▉     | 369/756 [00:10<00:11, 34.87it/s][A
 49%|████▉     | 373/756 [00:11<00:10, 35.03it/s][ALoss = 1.3529e-01, PNorm = 91.9420, GNorm = 0.4581, lr_0 = 2.3663e-04

 50%|████▉     | 377/756 [00:11<00:10, 35.33it/s][A
 50%|█████     | 381/756 [00:11<00:10, 35.67it/s][A
 51%|█████     | 385/756 [00:11<00:10, 35.67it/s][ALoss = 1.2967e-01, PNorm = 91.9540, GNorm = 0.4457, lr_0 = 2.3638e-04

 51%|█████▏    | 389/756 [00:11<00:10, 35.20it/s][A
 52%|█████▏    | 393/756 [00:11<00:10, 35.39it/s][ALoss = 9.4536e-02, PNorm = 91.9652, GNorm = 0.1935, lr_0 = 2.3612e-04

 53%|█████▎    | 397/756 [00:11<00:10, 35.22it/s][A
 53%|█████▎    | 401/756 [00:11<00:09, 35.54it/s][A
 54%|█████▎    | 405/756 [00:11<00:09, 35.64it/s][ALoss = 1.3950e-01, PNorm = 91.9739, GNorm = 0.2634, lr_0 = 2.3586e-04

 54%|█████▍    | 409/756 [00:12<00:09, 35.13it/s][A
 55%|█████▍    | 413/756 [00:12<00:09, 35.17it/s][ALoss = 1.0469e-01, PNorm = 91.9822, GNorm = 0.3142, lr_0 = 2.3561e-04

 55%|█████▌   

 50%|████▉     | 377/756 [00:12<00:12, 31.22it/s][ALoss = 9.8537e-02, PNorm = 92.7701, GNorm = 0.5033, lr_0 = 2.1783e-04

 50%|█████     | 381/756 [00:12<00:11, 31.74it/s][A
 51%|█████     | 385/756 [00:12<00:11, 31.30it/s][A
 51%|█████▏    | 389/756 [00:12<00:11, 32.58it/s][ALoss = 1.2186e-01, PNorm = 92.7783, GNorm = 0.4787, lr_0 = 2.1760e-04

 52%|█████▏    | 393/756 [00:12<00:11, 31.67it/s][A
 53%|█████▎    | 397/756 [00:12<00:11, 32.32it/s][ALoss = 1.2486e-01, PNorm = 92.7844, GNorm = 0.1959, lr_0 = 2.1736e-04

 53%|█████▎    | 401/756 [00:12<00:13, 26.12it/s][A
 54%|█████▎    | 405/756 [00:13<00:12, 28.28it/s][A
 54%|█████▍    | 409/756 [00:13<00:11, 29.11it/s][ALoss = 1.0682e-01, PNorm = 92.7948, GNorm = 0.1274, lr_0 = 2.1712e-04

 55%|█████▍    | 413/756 [00:13<00:11, 30.11it/s][A
 55%|█████▌    | 417/756 [00:13<00:11, 29.86it/s][ALoss = 1.2678e-01, PNorm = 92.8023, GNorm = 0.2057, lr_0 = 2.1689e-04

 56%|█████▌    | 421/756 [00:13<00:10, 31.16it/s][A
 56%|█████▌   


 51%|█████▏    | 389/756 [00:11<00:10, 35.71it/s][A
 52%|█████▏    | 393/756 [00:11<00:10, 35.47it/s][ALoss = 1.1344e-01, PNorm = 93.3857, GNorm = 0.2045, lr_0 = 2.0029e-04

 53%|█████▎    | 397/756 [00:11<00:10, 35.79it/s][A
 53%|█████▎    | 401/756 [00:11<00:09, 35.87it/s][A
 54%|█████▎    | 405/756 [00:11<00:09, 36.58it/s][ALoss = 1.4190e-01, PNorm = 93.3930, GNorm = 0.5343, lr_0 = 2.0007e-04

 54%|█████▍    | 409/756 [00:12<00:09, 36.19it/s][A
 55%|█████▍    | 413/756 [00:12<00:09, 36.67it/s][ALoss = 1.2153e-01, PNorm = 93.4027, GNorm = 0.1692, lr_0 = 1.9985e-04

 55%|█████▌    | 417/756 [00:12<00:09, 36.39it/s][A
 56%|█████▌    | 421/756 [00:12<00:09, 36.69it/s][A
 56%|█████▌    | 425/756 [00:12<00:09, 36.66it/s][ALoss = 1.2757e-01, PNorm = 93.4086, GNorm = 0.5794, lr_0 = 1.9963e-04

 57%|█████▋    | 429/756 [00:12<00:08, 36.84it/s][A
 57%|█████▋    | 433/756 [00:12<00:08, 36.44it/s][ALoss = 1.0501e-01, PNorm = 93.4178, GNorm = 0.2118, lr_0 = 1.9942e-04

 58%|█████▊  


 53%|█████▎    | 402/756 [00:12<00:10, 33.35it/s][A
 54%|█████▎    | 406/756 [00:12<00:10, 34.23it/s][ALoss = 8.4306e-02, PNorm = 93.9838, GNorm = 0.1184, lr_0 = 1.8418e-04

 54%|█████▍    | 410/756 [00:12<00:09, 34.68it/s][A
 55%|█████▍    | 414/756 [00:12<00:09, 35.25it/s][A
 55%|█████▌    | 418/756 [00:12<00:09, 35.71it/s][ALoss = 9.7247e-02, PNorm = 93.9911, GNorm = 0.1945, lr_0 = 1.8397e-04

 56%|█████▌    | 422/756 [00:12<00:09, 35.37it/s][A
 56%|█████▋    | 426/756 [00:12<00:09, 35.26it/s][ALoss = 1.1382e-01, PNorm = 93.9985, GNorm = 0.4221, lr_0 = 1.8377e-04

 57%|█████▋    | 430/756 [00:13<00:09, 35.06it/s][A
 57%|█████▋    | 434/756 [00:13<00:09, 34.92it/s][A
 58%|█████▊    | 438/756 [00:13<00:09, 34.94it/s][ALoss = 1.1289e-01, PNorm = 94.0026, GNorm = 0.5068, lr_0 = 1.8357e-04

 58%|█████▊    | 442/756 [00:13<00:09, 34.66it/s][A
 59%|█████▉    | 446/756 [00:13<00:09, 33.98it/s][ALoss = 1.3046e-01, PNorm = 94.0062, GNorm = 0.4814, lr_0 = 1.8337e-04

 60%|█████▉  


  1%|          | 7/756 [00:00<00:45, 16.62it/s][A
  1%|▏         | 10/756 [00:00<00:39, 18.76it/s][ALoss = 1.2155e-01, PNorm = 94.2089, GNorm = 0.4155, lr_0 = 1.7707e-04

  2%|▏         | 15/756 [00:00<00:28, 25.59it/s][A
  2%|▏         | 18/756 [00:00<00:28, 25.62it/s][A
  3%|▎         | 23/756 [00:01<00:24, 30.18it/s][ALoss = 1.4819e-01, PNorm = 94.2106, GNorm = 0.1963, lr_0 = 1.7688e-04

  4%|▎         | 27/756 [00:01<00:24, 30.28it/s][A
  4%|▍         | 32/756 [00:01<00:21, 33.34it/s][ALoss = 1.2280e-01, PNorm = 94.2206, GNorm = 0.3030, lr_0 = 1.7669e-04

  5%|▍         | 36/756 [00:01<00:22, 32.34it/s][A
  5%|▌         | 41/756 [00:01<00:20, 34.87it/s][ALoss = 1.2998e-01, PNorm = 94.2269, GNorm = 0.1816, lr_0 = 1.7650e-04

  6%|▌         | 45/756 [00:01<00:21, 33.01it/s][A
  7%|▋         | 50/756 [00:01<00:22, 31.44it/s][ALoss = 8.1502e-02, PNorm = 94.2377, GNorm = 0.2679, lr_0 = 1.7630e-04

  7%|▋         | 55/756 [00:02<00:20, 34.69it/s][A
  8%|▊         | 59/756 [0

  7%|▋         | 53/756 [00:01<00:20, 33.79it/s][A
  8%|▊         | 57/756 [00:02<00:20, 33.95it/s][ALoss = 1.1840e-01, PNorm = 94.6952, GNorm = 0.2113, lr_0 = 1.6230e-04

  8%|▊         | 61/756 [00:02<00:20, 33.68it/s][A
  9%|▊         | 65/756 [00:02<00:20, 33.75it/s][ALoss = 1.0201e-01, PNorm = 94.7025, GNorm = 0.1980, lr_0 = 1.6212e-04

  9%|▉         | 69/756 [00:02<00:20, 33.34it/s][A
 10%|▉         | 73/756 [00:02<00:20, 33.45it/s][A
 10%|█         | 77/756 [00:02<00:20, 33.77it/s][ALoss = 1.3284e-01, PNorm = 94.7127, GNorm = 0.2526, lr_0 = 1.6194e-04

 11%|█         | 81/756 [00:02<00:19, 33.96it/s][A
 11%|█         | 85/756 [00:02<00:19, 34.01it/s][ALoss = 1.1899e-01, PNorm = 94.7221, GNorm = 0.2037, lr_0 = 1.6177e-04

 12%|█▏        | 89/756 [00:03<00:19, 33.85it/s][A
 12%|█▏        | 93/756 [00:03<00:19, 34.23it/s][A
 13%|█▎        | 97/756 [00:03<00:19, 34.13it/s][ALoss = 1.3427e-01, PNorm = 94.7285, GNorm = 0.3215, lr_0 = 1.6159e-04

 13%|█▎        | 101/756 [

  8%|▊         | 62/756 [00:02<00:19, 34.91it/s][ALoss = 1.3453e-01, PNorm = 95.1481, GNorm = 0.4115, lr_0 = 1.4939e-04

  9%|▊         | 66/756 [00:02<00:20, 32.89it/s][A
  9%|▉         | 71/756 [00:02<00:19, 35.40it/s][ALoss = 1.0690e-01, PNorm = 95.1575, GNorm = 0.1811, lr_0 = 1.4922e-04

 10%|▉         | 75/756 [00:02<00:20, 33.17it/s][A
 11%|█         | 80/756 [00:02<00:19, 35.47it/s][ALoss = 1.0776e-01, PNorm = 95.1654, GNorm = 0.2844, lr_0 = 1.4906e-04

 11%|█         | 84/756 [00:03<00:20, 33.59it/s][A
 12%|█▏        | 88/756 [00:03<00:19, 34.98it/s][A
 12%|█▏        | 92/756 [00:03<00:19, 33.44it/s][ALoss = 1.3311e-01, PNorm = 95.1711, GNorm = 0.2820, lr_0 = 1.4890e-04

 13%|█▎        | 97/756 [00:03<00:20, 32.61it/s][A
 13%|█▎        | 102/756 [00:03<00:18, 34.82it/s][ALoss = 1.2564e-01, PNorm = 95.1747, GNorm = 0.1722, lr_0 = 1.4874e-04

 14%|█▍        | 106/756 [00:03<00:19, 32.69it/s][A
 15%|█▍        | 111/756 [00:03<00:18, 35.08it/s][ALoss = 1.0751e-01, PNorm


 64%|██████▍   | 486/756 [00:15<00:07, 33.84it/s][A
 65%|██████▍   | 490/756 [00:15<00:08, 32.24it/s][ALoss = 1.7049e-01, PNorm = 95.3816, GNorm = 1.0070, lr_0 = 1.4255e-04

 65%|██████▌   | 495/756 [00:15<00:07, 34.90it/s][A
 66%|██████▌   | 499/756 [00:15<00:07, 33.05it/s][ALoss = 1.1704e-01, PNorm = 95.3807, GNorm = 0.3882, lr_0 = 1.4240e-04

 67%|██████▋   | 504/756 [00:15<00:07, 35.20it/s][A
 67%|██████▋   | 508/756 [00:16<00:07, 32.92it/s][A
 68%|██████▊   | 513/756 [00:16<00:07, 32.33it/s][ALoss = 1.2185e-01, PNorm = 95.3879, GNorm = 0.4444, lr_0 = 1.4224e-04

 68%|██████▊   | 517/756 [00:16<00:07, 34.06it/s][A
 69%|██████▉   | 521/756 [00:16<00:07, 32.55it/s][ALoss = 1.1574e-01, PNorm = 95.3976, GNorm = 0.3334, lr_0 = 1.4209e-04

 70%|██████▉   | 526/756 [00:16<00:06, 34.84it/s][A
 70%|███████   | 530/756 [00:16<00:06, 33.06it/s][ALoss = 1.4933e-01, PNorm = 95.4038, GNorm = 0.4109, lr_0 = 1.4193e-04

 71%|███████   | 535/756 [00:16<00:06, 35.16it/s][A
 71%|███████▏

 67%|██████▋   | 505/756 [00:16<00:07, 31.55it/s][ALoss = 9.8140e-02, PNorm = 95.7438, GNorm = 0.4795, lr_0 = 1.3107e-04

 67%|██████▋   | 509/756 [00:16<00:07, 33.02it/s][A
 68%|██████▊   | 513/756 [00:16<00:07, 32.06it/s][A
 68%|██████▊   | 517/756 [00:16<00:07, 33.03it/s][ALoss = 9.1948e-02, PNorm = 95.7484, GNorm = 0.2992, lr_0 = 1.3093e-04

 69%|██████▉   | 521/756 [00:16<00:07, 31.56it/s][A
 69%|██████▉   | 525/756 [00:16<00:07, 32.22it/s][ALoss = 1.2385e-01, PNorm = 95.7499, GNorm = 0.3719, lr_0 = 1.3078e-04

 70%|██████▉   | 529/756 [00:16<00:07, 30.72it/s][A
 71%|███████   | 533/756 [00:17<00:07, 31.63it/s][A
 71%|███████   | 537/756 [00:17<00:07, 30.59it/s][ALoss = 1.1653e-01, PNorm = 95.7528, GNorm = 0.2726, lr_0 = 1.3064e-04

 72%|███████▏  | 541/756 [00:17<00:06, 31.69it/s][A
 72%|███████▏  | 545/756 [00:17<00:06, 30.66it/s][ALoss = 1.0669e-01, PNorm = 95.7571, GNorm = 0.2742, lr_0 = 1.3050e-04

 73%|███████▎  | 549/756 [00:17<00:06, 31.39it/s][A
 73%|███████▎ 


 68%|██████▊   | 515/756 [00:14<00:06, 35.52it/s][A
 69%|██████▊   | 519/756 [00:14<00:06, 35.78it/s][ALoss = 1.2986e-01, PNorm = 96.0351, GNorm = 0.3184, lr_0 = 1.2053e-04

 69%|██████▉   | 523/756 [00:15<00:06, 35.73it/s][A
 70%|██████▉   | 527/756 [00:15<00:06, 35.81it/s][A
 70%|███████   | 531/756 [00:15<00:06, 35.67it/s][ALoss = 1.7684e-01, PNorm = 96.0370, GNorm = 0.5842, lr_0 = 1.2039e-04

 71%|███████   | 535/756 [00:15<00:06, 35.69it/s][A
 71%|███████▏  | 539/756 [00:15<00:06, 35.18it/s][ALoss = 1.4673e-01, PNorm = 96.0422, GNorm = 0.2787, lr_0 = 1.2026e-04

 72%|███████▏  | 543/756 [00:15<00:06, 35.45it/s][A
 72%|███████▏  | 547/756 [00:15<00:05, 35.25it/s][A
 73%|███████▎  | 551/756 [00:15<00:05, 35.53it/s][ALoss = 1.0610e-01, PNorm = 96.0491, GNorm = 0.2249, lr_0 = 1.2013e-04

 73%|███████▎  | 555/756 [00:15<00:05, 35.03it/s][A
 74%|███████▍  | 559/756 [00:16<00:05, 35.04it/s][ALoss = 1.0487e-01, PNorm = 96.0574, GNorm = 0.5336, lr_0 = 1.2000e-04

 74%|███████▍


 16%|█▌        | 120/756 [00:03<00:19, 32.24it/s][A
 16%|█▋        | 124/756 [00:04<00:18, 33.67it/s][ALoss = 1.4846e-01, PNorm = 96.1751, GNorm = 0.4183, lr_0 = 1.1588e-04

 17%|█▋        | 128/756 [00:04<00:18, 33.91it/s][A
 17%|█▋        | 132/756 [00:04<00:17, 34.85it/s][A
 18%|█▊        | 136/756 [00:04<00:17, 35.03it/s][ALoss = 1.6244e-01, PNorm = 96.1757, GNorm = 0.4222, lr_0 = 1.1575e-04

 19%|█▊        | 140/756 [00:04<00:17, 35.43it/s][A
 19%|█▉        | 144/756 [00:04<00:17, 35.32it/s][ALoss = 1.4452e-01, PNorm = 96.1810, GNorm = 0.4981, lr_0 = 1.1563e-04

 20%|█▉        | 148/756 [00:04<00:17, 35.65it/s][A
 20%|██        | 152/756 [00:04<00:17, 35.31it/s][A
 21%|██        | 156/756 [00:04<00:16, 35.98it/s][ALoss = 1.1962e-01, PNorm = 96.1860, GNorm = 0.2385, lr_0 = 1.1550e-04

 21%|██        | 160/756 [00:05<00:16, 35.59it/s][A
 22%|██▏       | 164/756 [00:05<00:16, 36.26it/s][ALoss = 1.1735e-01, PNorm = 96.1897, GNorm = 0.3814, lr_0 = 1.1537e-04

 22%|██▏     

 17%|█▋        | 130/756 [00:04<00:20, 29.86it/s][ALoss = 1.1964e-01, PNorm = 96.4262, GNorm = 0.3048, lr_0 = 1.0667e-04

 18%|█▊        | 134/756 [00:04<00:21, 29.31it/s][A
 18%|█▊        | 138/756 [00:05<00:20, 30.67it/s][ALoss = 1.0725e-01, PNorm = 96.4296, GNorm = 0.3483, lr_0 = 1.0656e-04

 19%|█▉        | 142/756 [00:05<00:21, 28.54it/s][A
 19%|█▉        | 146/756 [00:05<00:20, 29.93it/s][A
 20%|█▉        | 150/756 [00:05<00:22, 27.15it/s][ALoss = 1.0120e-01, PNorm = 96.4327, GNorm = 0.3574, lr_0 = 1.0644e-04

 20%|██        | 154/756 [00:05<00:20, 28.85it/s][A
 21%|██        | 157/756 [00:05<00:22, 27.18it/s][A
 21%|██▏       | 161/756 [00:05<00:19, 29.98it/s][ALoss = 1.1364e-01, PNorm = 96.4364, GNorm = 0.2556, lr_0 = 1.0632e-04

 22%|██▏       | 165/756 [00:06<00:20, 28.19it/s][A
 22%|██▏       | 169/756 [00:06<00:19, 30.76it/s][ALoss = 1.0481e-01, PNorm = 96.4404, GNorm = 0.6229, lr_0 = 1.0621e-04

 23%|██▎       | 173/756 [00:06<00:20, 28.48it/s][A
 23%|██▎      


 71%|███████   | 533/756 [00:18<00:08, 26.84it/s][A
 71%|███████   | 537/756 [00:18<00:07, 29.63it/s][A
 72%|███████▏  | 541/756 [00:18<00:08, 26.30it/s][ALoss = 1.1733e-01, PNorm = 96.5509, GNorm = 1.0859, lr_0 = 1.0201e-04

 72%|███████▏  | 545/756 [00:18<00:07, 29.12it/s][A
 73%|███████▎  | 549/756 [00:18<00:07, 26.24it/s][ALoss = 1.1862e-01, PNorm = 96.5533, GNorm = 0.4071, lr_0 = 1.0190e-04

 73%|███████▎  | 553/756 [00:18<00:06, 29.26it/s][A
 74%|███████▎  | 557/756 [00:18<00:07, 28.28it/s][A
 74%|███████▍  | 561/756 [00:18<00:06, 30.76it/s][ALoss = 1.1694e-01, PNorm = 96.5561, GNorm = 0.4934, lr_0 = 1.0179e-04

 75%|███████▍  | 565/756 [00:19<00:06, 29.11it/s][A
 75%|███████▌  | 569/756 [00:19<00:05, 31.28it/s][ALoss = 1.2216e-01, PNorm = 96.5574, GNorm = 0.2832, lr_0 = 1.0168e-04

 76%|███████▌  | 573/756 [00:19<00:06, 29.91it/s][A
 76%|███████▋  | 578/756 [00:19<00:05, 32.19it/s][ALoss = 1.0474e-01, PNorm = 96.5607, GNorm = 0.3415, lr_0 = 1.0157e-04

 77%|███████▋

## Saving

In [5]:
model_scores

{'auc': [0.8255631355386794,
  0.7938604681638737,
  0.8339526113660627,
  0.8218594121079144,
  0.8066843968627716,
  0.8250288579511217,
  0.7831200284121299,
  0.8304504041510574,
  0.8582291937058875,
  0.8375058180281298,
  0.7720368811074656,
  0.8395092770865064,
  0.8156454894867737]}

In [6]:
# Convert scores to numpy arrays
for metric, scores in all_scores.items():
    all_scores[metric] = np.array(scores)

# Report results
info(f'{args.num_folds}-fold cross validation')

# Report scores for each fold
for fold_num in range(args.num_folds):
    for metric, scores in all_scores.items():
        info(f'\tSeed {init_seed + fold_num} ==> test {metric} = {np.nanmean(scores[fold_num]):.6f}')

        if args.show_individual_scores:
            for task_name, score in zip(args.task_names, scores[fold_num]):
                info(f'\t\tSeed {init_seed + fold_num} ==> test {task_name} {metric} = {score:.6f}')

# Report scores across folds
for metric, scores in all_scores.items():
    avg_scores = np.nanmean(scores, axis=1)  # average score for each model across tasks
    mean_score, std_score = np.nanmean(avg_scores), np.nanstd(avg_scores)
    info(f'Overall test {metric} = {mean_score:.6f} +/- {std_score:.6f}')

    if args.show_individual_scores:
        for task_num, task_name in enumerate(args.task_names):
            info(f'\tOverall test {task_name} {metric} = '
                 f'{np.nanmean(scores[:, task_num]):.6f} +/- {np.nanstd(scores[:, task_num]):.6f}')

# Save scores
with open(os.path.join(save_dir, TEST_SCORES_FILE_NAME), 'w') as f:
    writer = csv.writer(f)

    header = ['Task']
    for metric in args.metrics:
        header += [f'Mean {metric}', f'Standard deviation {metric}'] + \
                  [f'Fold {i} {metric}' for i in range(args.num_folds)]
    writer.writerow(header)

    for task_num, task_name in enumerate(args.task_names):
        row = [task_name]
        for metric, scores in all_scores.items():
            task_scores = scores[:, task_num]
            mean, std = np.nanmean(task_scores), np.nanstd(task_scores)
            row += [mean, std] + task_scores.tolist()
        writer.writerow(row)

# Determine mean and std score of main metric
avg_scores = np.nanmean(all_scores[args.metric], axis=1)
mean_score, std_score = np.nanmean(avg_scores), np.nanstd(avg_scores)
print(mean_score, std_score)

1-fold cross validation
	Seed 0 ==> test auc = 0.818727
Overall test auc = 0.818727 +/- 0.000000


0.8187266133821824 0.0


# Make prediction

In [7]:
args = PredictArgs()

args.smiles_columns: List[str] = ['SMILES']   
args.gpu: int = 1
args.checkpoint_dir = "../../Results/Trained_model/DMPNN_RN_Ensemble_5/fold_0/"
args.test_path: str = "../../Data/Mtb_inhibitors/Mtb_inhibitors.csv"
args.features_path: List[str] = ["../../Data/Mtb_inhibitors/Features/RN_Mtb_inhibitors.npz"]
args.preds_path: str = "../../Results/Mtb_inhibitors_pred/Mtb_inhibitors_DMPNN_preds.csv"
args.no_features_scaling = True

args.process_args()

In [8]:
# chemprop make_predictions

print('Loading training args')
train_args = load_args(args.checkpoint_paths[0])

num_tasks, task_names = train_args.num_tasks, train_args.task_names

update_prediction_args(predict_args=args, train_args=train_args)
args: Union[PredictArgs, TrainArgs]

if args.atom_descriptors == 'feature':
    set_extra_atom_fdim(train_args.atom_features_size)

if args.bond_features_path is not None:
    set_extra_bond_fdim(train_args.bond_features_size)

print('Loading data')
full_data = get_data(path=args.test_path, smiles_columns=args.smiles_columns, target_columns=[], ignore_columns=[],
                         skip_invalid_smiles=False, args=args, store_row=not args.drop_extra_columns)

print('Validating SMILES')
full_to_valid_indices = {}
valid_index = 0
for full_index in range(len(full_data)):
    if all(mol is not None for mol in full_data[full_index].mol):
        full_to_valid_indices[full_index] = valid_index
        valid_index += 1

test_data = MoleculeDataset([full_data[i] for i in sorted(full_to_valid_indices.keys())])

print(f'Test size = {len(test_data):,}')

# Predict with each model individually and sum predictions
if args.dataset_type == 'multiclass':
    sum_preds = np.zeros((len(test_data), num_tasks, args.multiclass_num_classes))
else:
    sum_preds = np.zeros((len(test_data), num_tasks))

# Create data loader
test_data_loader = MoleculeDataLoader(
    dataset=test_data,
    batch_size=args.batch_size,
    num_workers=args.num_workers
)

# Partial results for variance robust calculation.
if args.ensemble_variance:
    all_preds = np.zeros((len(test_data), num_tasks, len(args.checkpoint_paths)))

print(f'Predicting with an ensemble of {len(args.checkpoint_paths)} models')
for index, checkpoint_path in enumerate(tqdm(args.checkpoint_paths, total=len(args.checkpoint_paths))):
    # Load model and scalers
    model = load_checkpoint(checkpoint_path, device=args.device)
    scaler, features_scaler, atom_descriptor_scaler, bond_feature_scaler = load_scalers(checkpoint_path)

    # Normalize features
    if args.features_scaling or train_args.atom_descriptor_scaling or train_args.bond_feature_scaling:
        test_data.reset_features_and_targets()
        if args.features_scaling:
            test_data.normalize_features(features_scaler)
        if train_args.atom_descriptor_scaling and args.atom_descriptors is not None:
            test_data.normalize_features(atom_descriptor_scaler, scale_atom_descriptors=True)
        if train_args.bond_feature_scaling and args.bond_features_size > 0:
            test_data.normalize_features(bond_feature_scaler, scale_bond_features=True)

    # Make predictions
    model_preds = predict(
        model=model,
        data_loader=test_data_loader,
        scaler=scaler
    )
    sum_preds += np.array(model_preds)
    if args.ensemble_variance:
        all_preds[:, :, index] = model_preds

# Ensemble predictions
avg_preds = sum_preds / len(args.checkpoint_paths)
avg_preds = avg_preds.tolist()

if args.ensemble_variance:
    all_epi_uncs = np.var(all_preds, axis=2)
    all_epi_uncs = all_epi_uncs.tolist()

# Save predictions
print(f'Saving predictions to {args.preds_path}')
assert len(test_data) == len(avg_preds)
if args.ensemble_variance:
    assert len(test_data) == len(all_epi_uncs)
makedirs(args.preds_path, isfile=True)

# Get prediction column names
if args.dataset_type == 'multiclass':
    task_names = [f'{name}_class_{i}' for name in task_names for i in range(args.multiclass_num_classes)]
else:
    task_names = task_names

# Copy predictions over to full_data
for full_index, datapoint in enumerate(full_data):
    valid_index = full_to_valid_indices.get(full_index, None)
    preds = avg_preds[valid_index] if valid_index is not None else ['Invalid SMILES'] * len(task_names)
    if args.ensemble_variance:
        epi_uncs = all_epi_uncs[valid_index] if valid_index is not None else ['Invalid SMILES'] * len(task_names)

    # If extra columns have been dropped, add back in SMILES columns
    if args.drop_extra_columns:
        datapoint.row = OrderedDict()

        smiles_columns = args.smiles_columns

        for column, smiles in zip(smiles_columns, datapoint.smiles):
            datapoint.row[column] = smiles

    # Add predictions columns
    if args.ensemble_variance:
        for pred_name, pred, epi_unc in zip(task_names, preds, epi_uncs):
            datapoint.row[pred_name] = pred
            datapoint.row[pred_name+'_epi_unc'] = epi_unc
    else:
        for pred_name, pred in zip(task_names, preds):
            datapoint.row[pred_name] = pred

# Save
with open(args.preds_path, 'w') as f:
    writer = csv.DictWriter(f, fieldnames=full_data[0].row.keys())
    writer.writeheader()

    for datapoint in full_data:
        writer.writerow(datapoint.row)


Loading training args


44it [00:00, 163318.03it/s]
100%|██████████| 44/44 [00:00<00:00, 147994.69it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loading data
Validating SMILES
Test size = 44
Predicting with an ensemble of 5 models



  0%|          | 0/1 [00:00<?, ?it/s][A

Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Moving model to cuda



100%|██████████| 1/1 [00:00<00:00,  3.52it/s][A
 20%|██        | 1/5 [00:00<00:01,  2.35it/s][A
  0%|          | 0/1 [00:00<?, ?it/s][A

Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Moving model to cuda



100%|██████████| 1/1 [00:00<00:00,  3.69it/s][A
 40%|████      | 2/5 [00:00<00:01,  2.38it/s][A
  0%|          | 0/1 [00:00<?, ?it/s][A

Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Moving model to cuda



100%|██████████| 1/1 [00:00<00:00,  3.69it/s][A
 60%|██████    | 3/5 [00:01<00:00,  2.37it/s][A
  0%|          | 0/1 [00:00<?, ?it/s][A

Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Moving model to cuda



100%|██████████| 1/1 [00:00<00:00,  3.65it/s][A
 80%|████████  | 4/5 [00:01<00:00,  2.37it/s][A
  0%|          | 0/1 [00:00<?, ?it/s][A

Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Moving model to cuda



100%|██████████| 1/1 [00:00<00:00,  3.66it/s][A
100%|██████████| 5/5 [00:02<00:00,  2.37it/s][A

Saving predictions to ../../Results/Mtb_inhibitors_pred/Mtb_inhibitors_DMPNN_preds.csv



