# Imports

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from __future__ import print_function
import torch
import json

from data_loader import load_cifar10, get_class_names
from training_utils import train_model, continue_training

# CUDA

In [3]:
print(f"Is CUDA available? {torch.cuda.is_available()}")
!nvcc --version

Is CUDA available? True
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Jun__6_02:18:23_PDT_2024
Cuda compilation tools, release 12.5, V12.5.82
Build cuda_12.5.r12.5/compiler.34385749_0


In [4]:
#!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

# Initialization

In [5]:
# Set the device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
train_loader, test_loader, X_train, X_test, Y_train, Y_test = load_cifar10(batch_size=64, seed=42)
class_names = get_class_names()

Files already downloaded and verified
Files already downloaded and verified


# Train CNN

In [7]:
from model import BeterCNN
model = BeterCNN()
model = model.to(device)

print(f"Model device: {next(model.parameters()).device}")

Model device: cuda:0


In [8]:
metrics = train_model(
    model=model,
    train_loader=train_loader,
    test_loader=test_loader,
    model_name='cnn',
    num_epochs=300,
    device=device
)



Epoch 3/300 | LR: 0.005000 | Train Acc: 57.1% | Test Acc: 54.1% | Gap: 3.0%
Epoch 6/300 | LR: 0.005000 | Train Acc: 63.7% | Test Acc: 61.2% | Gap: 2.5%
Epoch 9/300 | LR: 0.005000 | Train Acc: 67.7% | Test Acc: 64.8% | Gap: 3.0%
Epoch 12/300 | LR: 0.005000 | Train Acc: 70.4% | Test Acc: 65.5% | Gap: 4.9%
Epoch 15/300 | LR: 0.005000 | Train Acc: 72.7% | Test Acc: 66.0% | Gap: 6.7%
Epoch 18/300 | LR: 0.005000 | Train Acc: 74.5% | Test Acc: 69.2% | Gap: 5.4%
Epoch 21/300 | LR: 0.005000 | Train Acc: 76.0% | Test Acc: 70.9% | Gap: 5.1%
Epoch 24/300 | LR: 0.005000 | Train Acc: 77.1% | Test Acc: 69.6% | Gap: 7.5%
Epoch 27/300 | LR: 0.005000 | Train Acc: 78.4% | Test Acc: 68.6% | Gap: 9.8%
Epoch 30/300 | LR: 0.005000 | Train Acc: 78.9% | Test Acc: 69.8% | Gap: 9.1%
Epoch 33/300 | LR: 0.005000 | Train Acc: 80.2% | Test Acc: 67.3% | Gap: 12.8%
Epoch 36/300 | LR: 0.005000 | Train Acc: 80.7% | Test Acc: 69.7% | Gap: 11.0%
Epoch 39/300 | LR: 0.005000 | Train Acc: 81.6% | Test Acc: 71.9% | Gap: 9.7%


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded checkpoint and metrics for epoch 100
Epoch 102/300 | LR: 0.005000 | Train Acc: 89.9% | Test Acc: 71.2% | Gap: 18.7%
Epoch 105/300 | LR: 0.005000 | Train Acc: 90.0% | Test Acc: 69.2% | Gap: 20.7%
Epoch 108/300 | LR: 0.005000 | Train Acc: 90.1% | Test Acc: 73.9% | Gap: 16.2%
Epoch 111/300 | LR: 0.005000 | Train Acc: 90.5% | Test Acc: 73.4% | Gap: 17.2%
Epoch 114/300 | LR: 0.005000 | Train Acc: 90.6% | Test Acc: 71.9% | Gap: 18.7%
Epoch 117/300 | LR: 0.005000 | Train Acc: 90.9% | Test Acc: 71.3% | Gap: 19.6%
Epoch 120/300 | LR: 0.005000 | Train Acc: 91.1% | Test Acc: 72.4% | Gap: 18.7%
Epoch 123/300 | LR: 0.005000 | Train Acc: 91.3% | Test Acc: 72.3% | Gap: 18.9%
Epoch 126/300 | LR: 0.005000 | Train Acc: 91.5% | Test Acc: 73.1% | Gap: 18.3%
Epoch 129/300 | LR: 0.005000 | Train Acc: 91.6% | Test Acc: 72.1% | Gap: 19.5%
Epoch 132/300 | LR: 0.005000 | Train Acc: 91.8% | Test Acc: 73.1% | Gap: 18.7%
Epoch 135/300 | LR: 0.005000 | Train Acc: 92.3% | Test Acc: 73.1% | Gap: 19.2%
Epoc

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded checkpoint and metrics for epoch 200
Epoch 201/300 | LR: 0.005000 | Train Acc: 94.6% | Test Acc: 69.0% | Gap: 25.7%
Epoch 204/300 | LR: 0.005000 | Train Acc: 94.8% | Test Acc: 70.3% | Gap: 24.5%
Epoch 207/300 | LR: 0.005000 | Train Acc: 94.9% | Test Acc: 71.5% | Gap: 23.4%
Epoch 210/300 | LR: 0.005000 | Train Acc: 94.9% | Test Acc: 72.5% | Gap: 22.4%
Epoch 213/300 | LR: 0.005000 | Train Acc: 95.1% | Test Acc: 71.4% | Gap: 23.8%
Epoch 216/300 | LR: 0.005000 | Train Acc: 95.0% | Test Acc: 72.5% | Gap: 22.6%
Epoch 219/300 | LR: 0.005000 | Train Acc: 95.2% | Test Acc: 73.6% | Gap: 21.6%
Epoch 222/300 | LR: 0.005000 | Train Acc: 95.0% | Test Acc: 72.0% | Gap: 23.1%
Epoch 225/300 | LR: 0.005000 | Train Acc: 95.0% | Test Acc: 70.2% | Gap: 24.8%
Epoch 228/300 | LR: 0.005000 | Train Acc: 95.1% | Test Acc: 72.1% | Gap: 23.0%
Epoch 231/300 | LR: 0.005000 | Train Acc: 95.3% | Test Acc: 70.5% | Gap: 24.7%
Epoch 234/300 | LR: 0.005000 | Train Acc: 95.4% | Test Acc: 71.0% | Gap: 24.4%
Epoc

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded checkpoint and metrics for epoch 300


# Continue training

In [9]:
import json
checkpoint_dir = '/content/cnn_checkpoints'  # Replace with your actual path
with open(f'{checkpoint_dir}/training_metrics.json', 'r') as f:
       metrics = json.load(f)

print(f"Last completed epoch: {metrics['current_epoch']}")

# Continue training
metrics = continue_training(
    model=model,
    train_loader=train_loader,
    test_loader=test_loader,
    model_name='cnn',
    checkpoint_dir=checkpoint_dir,
    target_epochs=700,
    device=device
)

Last completed epoch: 300
Continuing training from epoch 300 to 700


Epoch [301/700]:   2%|▏         | 19/782 [00:00<00:04, 183.24it/s]



Epoch [301/700]:   5%|▍         | 39/782 [00:00<00:03, 189.74it/s]



Epoch [301/700]:   8%|▊         | 60/782 [00:00<00:03, 197.49it/s]



Epoch [301/700]:  10%|█         | 81/782 [00:00<00:03, 198.65it/s]



Epoch [301/700]:  13%|█▎        | 102/782 [00:00<00:03, 201.67it/s]



Epoch [301/700]:  16%|█▌        | 123/782 [00:00<00:03, 204.35it/s]



Epoch [301/700]:  19%|█▊        | 145/782 [00:00<00:03, 206.04it/s]



Epoch [301/700]:  21%|██        | 166/782 [00:00<00:02, 205.99it/s]



Epoch [301/700]:  27%|██▋       | 208/782 [00:01<00:02, 206.76it/s]



Epoch [301/700]:  32%|███▏      | 252/782 [00:01<00:02, 211.23it/s]



Epoch [301/700]:  38%|███▊      | 296/782 [00:01<00:02, 209.77it/s]



Epoch [301/700]:  43%|████▎     | 340/782 [00:01<00:02, 211.43it/s]



Epoch [301/700]:  49%|████▉     | 384/782 [00:01<00:01, 210.01it/s]



Epoch [301/700]:  55%|█████▍    | 428/782 [00:02<00:01, 211.30it/s]



Epoch [301/700]:  60%|██████    | 472/782 [00:02<00:01, 212.24it/s]



Epoch [301/700]:  66%|██████▌   | 516/782 [00:02<00:01, 205.39it/s]



Epoch [301/700]:  71%|███████▏  | 558/782 [00:02<00:01, 202.52it/s]



Epoch [301/700]:  77%|███████▋  | 601/782 [00:02<00:00, 206.57it/s]



Epoch [301/700]:  82%|████████▏ | 644/782 [00:03<00:00, 207.38it/s]



Epoch [301/700]:  88%|████████▊ | 687/782 [00:03<00:00, 209.58it/s]



Epoch [301/700]:  93%|█████████▎| 731/782 [00:03<00:00, 212.44it/s]



Epoch [301/700]:  99%|█████████▉| 775/782 [00:03<00:00, 211.88it/s]



Epoch [301/700]: 100%|██████████| 782/782 [00:03<00:00, 207.41it/s]






Learning Rate: 0.005000
Train Loss: 0.1013, Accuracy: 96.30%, Confidence: 0.9588
Test Loss: 1.6341, Accuracy: 72.87%, Confidence: 0.9233
Train-Test Accuracy Gap: 23.43%


Epoch [302/700]:   3%|▎         | 22/782 [00:00<00:03, 212.14it/s]



Epoch [302/700]:   6%|▌         | 44/782 [00:00<00:03, 212.98it/s]



Epoch [302/700]:   8%|▊         | 66/782 [00:00<00:03, 214.61it/s]



Epoch [302/700]:  11%|█▏        | 88/782 [00:00<00:03, 212.79it/s]



Epoch [302/700]:  14%|█▍        | 110/782 [00:00<00:03, 213.91it/s]



Epoch [302/700]:  17%|█▋        | 132/782 [00:00<00:03, 213.47it/s]



Epoch [302/700]:  20%|█▉        | 154/782 [00:00<00:02, 212.59it/s]



Epoch [302/700]:  23%|██▎       | 176/782 [00:00<00:02, 208.90it/s]



Epoch [302/700]:  25%|██▌       | 197/782 [00:00<00:02, 209.17it/s]



Epoch [302/700]:  28%|██▊       | 218/782 [00:01<00:02, 201.54it/s]



Epoch [302/700]:  31%|███       | 239/782 [00:01<00:02, 200.17it/s]



Epoch [302/700]:  33%|███▎      | 261/782 [00:01<00:02, 204.33it/s]



Epoch [302/700]:  36%|███▌      | 282/782 [00:01<00:02, 205.02it/s]



Epoch [302/700]:  39%|███▊      | 303/782 [00:01<00:02, 206.39it/s]



Epoch [302/700]:  42%|████▏     | 325/782 [00:01<00:02, 209.00it/s]



Epoch [302/700]:  44%|████▍     | 347/782 [00:01<00:02, 211.52it/s]



Epoch [302/700]:  47%|████▋     | 369/782 [00:01<00:01, 213.44it/s]



Epoch [302/700]:  50%|█████     | 391/782 [00:01<00:01, 215.35it/s]



Epoch [302/700]:  56%|█████▌    | 435/782 [00:02<00:01, 212.82it/s]



Epoch [302/700]:  58%|█████▊    | 457/782 [00:02<00:01, 211.17it/s]



Epoch [302/700]:  61%|██████▏   | 479/782 [00:02<00:01, 209.43it/s]



Epoch [302/700]:  64%|██████▍   | 501/782 [00:02<00:01, 210.83it/s]



Epoch [302/700]:  67%|██████▋   | 523/782 [00:02<00:01, 211.08it/s]



Epoch [302/700]:  70%|██████▉   | 545/782 [00:02<00:01, 212.72it/s]



Epoch [302/700]:  73%|███████▎  | 567/782 [00:02<00:01, 211.20it/s]



Epoch [302/700]:  75%|███████▌  | 589/782 [00:02<00:00, 210.07it/s]



Epoch [302/700]:  78%|███████▊  | 611/782 [00:02<00:00, 208.68it/s]



Epoch [302/700]:  81%|████████  | 633/782 [00:03<00:00, 209.55it/s]



Epoch [302/700]:  84%|████████▍ | 655/782 [00:03<00:00, 210.63it/s]



Epoch [302/700]:  87%|████████▋ | 677/782 [00:03<00:00, 208.28it/s]



Epoch [302/700]:  89%|████████▉ | 698/782 [00:03<00:00, 206.39it/s]



Epoch [302/700]:  92%|█████████▏| 719/782 [00:03<00:00, 207.15it/s]



Epoch [302/700]:  95%|█████████▍| 740/782 [00:03<00:00, 207.81it/s]



Epoch [302/700]:  97%|█████████▋| 761/782 [00:03<00:00, 208.16it/s]



Epoch [302/700]: 100%|██████████| 782/782 [00:03<00:00, 209.39it/s]






Learning Rate: 0.005000
Train Loss: 0.0997, Accuracy: 96.47%, Confidence: 0.9595
Test Loss: 2.2499, Accuracy: 68.27%, Confidence: 0.9218
Train-Test Accuracy Gap: 28.20%


Epoch [303/700]:   3%|▎         | 21/782 [00:00<00:03, 206.01it/s]



Epoch [303/700]:   5%|▌         | 43/782 [00:00<00:03, 210.21it/s]



Epoch [303/700]:   8%|▊         | 65/782 [00:00<00:03, 209.87it/s]



Epoch [303/700]:  11%|█         | 86/782 [00:00<00:03, 209.32it/s]



Epoch [303/700]:  14%|█▍        | 108/782 [00:00<00:03, 210.45it/s]



Epoch [303/700]:  17%|█▋        | 130/782 [00:00<00:03, 212.01it/s]



Epoch [303/700]:  19%|█▉        | 152/782 [00:00<00:03, 209.24it/s]



Epoch [303/700]:  22%|██▏       | 174/782 [00:00<00:02, 211.23it/s]



Epoch [303/700]:  25%|██▌       | 196/782 [00:00<00:02, 209.05it/s]



Epoch [303/700]:  28%|██▊       | 218/782 [00:01<00:02, 210.26it/s]



Epoch [303/700]:  31%|███       | 240/782 [00:01<00:02, 211.62it/s]



Epoch [303/700]:  34%|███▎      | 262/782 [00:01<00:02, 211.65it/s]



Epoch [303/700]:  36%|███▋      | 284/782 [00:01<00:02, 212.41it/s]



Epoch [303/700]:  39%|███▉      | 306/782 [00:01<00:02, 211.60it/s]



Epoch [303/700]:  42%|████▏     | 328/782 [00:01<00:02, 211.73it/s]



Epoch [303/700]:  45%|████▍     | 350/782 [00:01<00:02, 211.54it/s]



Epoch [303/700]:  48%|████▊     | 372/782 [00:01<00:01, 211.00it/s]



Epoch [303/700]:  50%|█████     | 394/782 [00:01<00:01, 211.85it/s]



Epoch [303/700]:  53%|█████▎    | 416/782 [00:01<00:01, 212.31it/s]



Epoch [303/700]:  56%|█████▌    | 438/782 [00:02<00:01, 212.25it/s]



Epoch [303/700]:  59%|█████▉    | 460/782 [00:02<00:01, 211.20it/s]



Epoch [303/700]:  62%|██████▏   | 482/782 [00:02<00:01, 212.16it/s]



Epoch [303/700]:  64%|██████▍   | 504/782 [00:02<00:01, 211.42it/s]



Epoch [303/700]:  67%|██████▋   | 526/782 [00:02<00:01, 212.02it/s]



Epoch [303/700]:  70%|███████   | 548/782 [00:02<00:01, 212.53it/s]



Epoch [303/700]:  73%|███████▎  | 570/782 [00:02<00:00, 212.28it/s]



Epoch [303/700]:  76%|███████▌  | 592/782 [00:02<00:00, 212.49it/s]



Epoch [303/700]:  79%|███████▊  | 614/782 [00:02<00:00, 212.37it/s]



Epoch [303/700]:  81%|████████▏ | 636/782 [00:03<00:00, 213.51it/s]



Epoch [303/700]:  84%|████████▍ | 658/782 [00:03<00:00, 212.39it/s]



Epoch [303/700]:  87%|████████▋ | 680/782 [00:03<00:00, 213.10it/s]



Epoch [303/700]:  90%|████████▉ | 702/782 [00:03<00:00, 212.70it/s]



Epoch [303/700]:  93%|█████████▎| 724/782 [00:03<00:00, 212.06it/s]



Epoch [303/700]:  95%|█████████▌| 746/782 [00:03<00:00, 211.32it/s]



Epoch [303/700]: 100%|██████████| 782/782 [00:03<00:00, 211.31it/s]






Learning Rate: 0.005000
Train Loss: 0.1041, Accuracy: 96.35%, Confidence: 0.9593
Test Loss: 1.7291, Accuracy: 72.30%, Confidence: 0.9268
Train-Test Accuracy Gap: 24.05%


Epoch [304/700]:   3%|▎         | 21/782 [00:00<00:03, 209.05it/s]



Epoch [304/700]:   5%|▌         | 43/782 [00:00<00:03, 210.45it/s]



Epoch [304/700]:   8%|▊         | 65/782 [00:00<00:03, 209.59it/s]



Epoch [304/700]:  14%|█▍        | 109/782 [00:00<00:03, 211.98it/s]



Epoch [304/700]:  20%|█▉        | 153/782 [00:00<00:03, 205.33it/s]



Epoch [304/700]:  25%|██▌       | 196/782 [00:00<00:02, 208.35it/s]



Epoch [304/700]:  34%|███▎      | 262/782 [00:01<00:02, 210.42it/s]



Epoch [304/700]:  39%|███▉      | 306/782 [00:01<00:02, 208.81it/s]



Epoch [304/700]:  45%|████▍     | 349/782 [00:01<00:02, 208.70it/s]



Epoch [304/700]:  50%|█████     | 391/782 [00:01<00:01, 205.47it/s]



Epoch [304/700]:  55%|█████▌    | 434/782 [00:02<00:01, 210.36it/s]



Epoch [304/700]:  61%|██████    | 478/782 [00:02<00:01, 209.39it/s]



Epoch [304/700]:  66%|██████▋   | 520/782 [00:02<00:01, 206.88it/s]



Epoch [304/700]:  72%|███████▏  | 563/782 [00:02<00:01, 209.50it/s]



Epoch [304/700]:  78%|███████▊  | 607/782 [00:02<00:00, 211.86it/s]



Epoch [304/700]:  83%|████████▎ | 651/782 [00:03<00:00, 211.03it/s]



Epoch [304/700]:  89%|████████▉ | 695/782 [00:03<00:00, 213.41it/s]



Epoch [304/700]:  95%|█████████▍| 739/782 [00:03<00:00, 212.62it/s]



Epoch [304/700]: 100%|██████████| 782/782 [00:03<00:00, 209.68it/s]






Learning Rate: 0.005000
Train Loss: 0.1002, Accuracy: 96.49%, Confidence: 0.9602
Test Loss: 1.7032, Accuracy: 72.98%, Confidence: 0.9273
Train-Test Accuracy Gap: 23.51%


Epoch [305/700]:   3%|▎         | 21/782 [00:00<00:03, 209.51it/s]



Epoch [305/700]:  11%|█▏        | 88/782 [00:00<00:03, 216.35it/s]



Epoch [305/700]:  17%|█▋        | 132/782 [00:00<00:03, 215.85it/s]



Epoch [305/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.62it/s]



Epoch [305/700]:  28%|██▊       | 220/782 [00:01<00:02, 211.98it/s]



Epoch [305/700]:  34%|███▍      | 264/782 [00:01<00:02, 211.28it/s]



Epoch [305/700]:  39%|███▉      | 308/782 [00:01<00:02, 211.64it/s]



Epoch [305/700]:  45%|████▌     | 352/782 [00:01<00:02, 212.16it/s]



Epoch [305/700]:  51%|█████     | 396/782 [00:01<00:01, 211.57it/s]



Epoch [305/700]:  56%|█████▋    | 440/782 [00:02<00:01, 209.16it/s]



Epoch [305/700]:  62%|██████▏   | 484/782 [00:02<00:01, 210.37it/s]



Epoch [305/700]:  68%|██████▊   | 528/782 [00:02<00:01, 210.88it/s]



Epoch [305/700]:  73%|███████▎  | 572/782 [00:02<00:00, 212.39it/s]



Epoch [305/700]:  79%|███████▉  | 616/782 [00:02<00:00, 213.29it/s]



Epoch [305/700]:  84%|████████▍ | 660/782 [00:03<00:00, 214.38it/s]



Epoch [305/700]:  90%|█████████ | 704/782 [00:03<00:00, 211.48it/s]



Epoch [305/700]:  96%|█████████▌| 748/782 [00:03<00:00, 212.98it/s]



Epoch [305/700]: 100%|██████████| 782/782 [00:03<00:00, 212.11it/s]


Learning Rate: 0.005000
Train Loss: 0.0979, Accuracy: 96.56%, Confidence: 0.9602
Test Loss: 1.5873, Accuracy: 74.14%, Confidence: 0.9246
Train-Test Accuracy Gap: 22.42%


Epoch [306/700]:   3%|▎         | 21/782 [00:00<00:03, 208.31it/s]



Epoch [306/700]:   5%|▌         | 43/782 [00:00<00:03, 210.40it/s]



Epoch [306/700]:   8%|▊         | 65/782 [00:00<00:03, 211.46it/s]



Epoch [306/700]:  11%|█         | 87/782 [00:00<00:03, 209.52it/s]



Epoch [306/700]:  14%|█▍        | 109/782 [00:00<00:03, 212.67it/s]



Epoch [306/700]:  17%|█▋        | 131/782 [00:00<00:03, 214.46it/s]



Epoch [306/700]:  20%|█▉        | 153/782 [00:00<00:02, 211.75it/s]



Epoch [306/700]:  22%|██▏       | 175/782 [00:00<00:02, 212.29it/s]



Epoch [306/700]:  25%|██▌       | 197/782 [00:00<00:02, 213.17it/s]



Epoch [306/700]:  28%|██▊       | 219/782 [00:01<00:02, 211.99it/s]



Epoch [306/700]:  31%|███       | 241/782 [00:01<00:02, 211.67it/s]



Epoch [306/700]:  34%|███▎      | 263/782 [00:01<00:02, 210.17it/s]



Epoch [306/700]:  36%|███▋      | 285/782 [00:01<00:02, 210.51it/s]



Epoch [306/700]:  39%|███▉      | 307/782 [00:01<00:02, 211.90it/s]



Epoch [306/700]:  42%|████▏     | 329/782 [00:01<00:02, 212.65it/s]



Epoch [306/700]:  45%|████▍     | 351/782 [00:01<00:02, 212.91it/s]



Epoch [306/700]:  48%|████▊     | 373/782 [00:01<00:01, 209.77it/s]



Epoch [306/700]:  51%|█████     | 395/782 [00:01<00:01, 210.35it/s]



Epoch [306/700]:  53%|█████▎    | 417/782 [00:01<00:01, 208.84it/s]



Epoch [306/700]:  56%|█████▌    | 439/782 [00:02<00:01, 210.10it/s]



Epoch [306/700]:  59%|█████▉    | 461/782 [00:02<00:01, 210.50it/s]



Epoch [306/700]:  62%|██████▏   | 483/782 [00:02<00:01, 210.51it/s]



Epoch [306/700]:  65%|██████▍   | 505/782 [00:02<00:01, 211.53it/s]



Epoch [306/700]:  67%|██████▋   | 527/782 [00:02<00:01, 210.73it/s]



Epoch [306/700]:  70%|███████   | 549/782 [00:02<00:01, 210.12it/s]



Epoch [306/700]:  73%|███████▎  | 571/782 [00:02<00:01, 210.33it/s]



Epoch [306/700]:  76%|███████▌  | 593/782 [00:02<00:00, 210.51it/s]



Epoch [306/700]:  79%|███████▊  | 615/782 [00:02<00:00, 210.60it/s]



Epoch [306/700]:  81%|████████▏ | 637/782 [00:03<00:00, 210.56it/s]



Epoch [306/700]:  84%|████████▍ | 659/782 [00:03<00:00, 208.88it/s]



Epoch [306/700]:  87%|████████▋ | 681/782 [00:03<00:00, 209.32it/s]



Epoch [306/700]:  90%|████████▉ | 703/782 [00:03<00:00, 210.86it/s]



Epoch [306/700]:  93%|█████████▎| 725/782 [00:03<00:00, 211.32it/s]



Epoch [306/700]:  96%|█████████▌| 747/782 [00:03<00:00, 212.49it/s]



Epoch [306/700]: 100%|██████████| 782/782 [00:03<00:00, 211.05it/s]






Learning Rate: 0.005000
Train Loss: 0.0969, Accuracy: 96.63%, Confidence: 0.9606
Test Loss: 1.6601, Accuracy: 72.67%, Confidence: 0.9236
Train-Test Accuracy Gap: 23.96%


Epoch [307/700]:   3%|▎         | 21/782 [00:00<00:03, 209.25it/s]



Epoch [307/700]:   5%|▌         | 43/782 [00:00<00:03, 210.74it/s]



Epoch [307/700]:  11%|█         | 87/782 [00:00<00:03, 213.81it/s]



Epoch [307/700]:  17%|█▋        | 131/782 [00:00<00:03, 214.93it/s]



Epoch [307/700]:  22%|██▏       | 174/782 [00:00<00:02, 209.76it/s]



Epoch [307/700]:  28%|██▊       | 216/782 [00:01<00:02, 206.84it/s]



Epoch [307/700]:  33%|███▎      | 259/782 [00:01<00:02, 209.61it/s]



Epoch [307/700]:  39%|███▊      | 302/782 [00:01<00:02, 210.76it/s]



Epoch [307/700]:  44%|████▍     | 346/782 [00:01<00:02, 211.88it/s]



Epoch [307/700]:  50%|████▉     | 390/782 [00:01<00:01, 214.75it/s]



Epoch [307/700]:  55%|█████▌    | 434/782 [00:02<00:01, 212.88it/s]



Epoch [307/700]:  61%|██████    | 477/782 [00:02<00:01, 204.22it/s]



Epoch [307/700]:  66%|██████▋   | 519/782 [00:02<00:01, 204.33it/s]



Epoch [307/700]:  72%|███████▏  | 561/782 [00:02<00:01, 206.09it/s]



Epoch [307/700]:  77%|███████▋  | 604/782 [00:02<00:00, 208.34it/s]



Epoch [307/700]:  80%|███████▉  | 625/782 [00:02<00:00, 208.14it/s]



Epoch [307/700]:  83%|████████▎ | 646/782 [00:03<00:00, 206.92it/s]



Epoch [307/700]:  85%|████████▌ | 667/782 [00:03<00:00, 206.80it/s]



Epoch [307/700]:  88%|████████▊ | 689/782 [00:03<00:00, 208.97it/s]



Epoch [307/700]:  91%|█████████ | 711/782 [00:03<00:00, 209.87it/s]



Epoch [307/700]:  94%|█████████▎| 732/782 [00:03<00:00, 209.16it/s]



Epoch [307/700]:  96%|█████████▋| 753/782 [00:03<00:00, 208.97it/s]



Epoch [307/700]: 100%|██████████| 782/782 [00:03<00:00, 208.97it/s]






Learning Rate: 0.005000
Train Loss: 0.0982, Accuracy: 96.51%, Confidence: 0.9603
Test Loss: 1.7776, Accuracy: 71.73%, Confidence: 0.9233
Train-Test Accuracy Gap: 24.78%


Epoch [308/700]:   3%|▎         | 21/782 [00:00<00:03, 206.80it/s]



Epoch [308/700]:   5%|▌         | 43/782 [00:00<00:03, 209.40it/s]



Epoch [308/700]:   8%|▊         | 65/782 [00:00<00:03, 212.66it/s]



Epoch [308/700]:  11%|█         | 87/782 [00:00<00:03, 211.63it/s]



Epoch [308/700]:  14%|█▍        | 109/782 [00:00<00:03, 210.63it/s]



Epoch [308/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.75it/s]



Epoch [308/700]:  20%|█▉        | 153/782 [00:00<00:02, 212.99it/s]



Epoch [308/700]:  22%|██▏       | 175/782 [00:00<00:02, 212.82it/s]



Epoch [308/700]:  25%|██▌       | 197/782 [00:00<00:02, 211.93it/s]



Epoch [308/700]:  28%|██▊       | 219/782 [00:01<00:02, 211.78it/s]



Epoch [308/700]:  31%|███       | 241/782 [00:01<00:02, 211.71it/s]



Epoch [308/700]:  34%|███▎      | 263/782 [00:01<00:02, 211.38it/s]



Epoch [308/700]:  36%|███▋      | 285/782 [00:01<00:02, 211.76it/s]



Epoch [308/700]:  39%|███▉      | 307/782 [00:01<00:02, 212.03it/s]



Epoch [308/700]:  45%|████▍     | 351/782 [00:01<00:02, 213.82it/s]



Epoch [308/700]:  51%|█████     | 395/782 [00:01<00:01, 212.89it/s]



Epoch [308/700]:  56%|█████▌    | 439/782 [00:02<00:01, 212.54it/s]



Epoch [308/700]:  62%|██████▏   | 483/782 [00:02<00:01, 213.45it/s]



Epoch [308/700]:  67%|██████▋   | 527/782 [00:02<00:01, 213.50it/s]



Epoch [308/700]:  73%|███████▎  | 571/782 [00:02<00:00, 213.15it/s]



Epoch [308/700]:  79%|███████▊  | 615/782 [00:02<00:00, 211.40it/s]



Epoch [308/700]:  84%|████████▍ | 659/782 [00:03<00:00, 212.50it/s]



Epoch [308/700]:  90%|████████▉ | 703/782 [00:03<00:00, 213.26it/s]



Epoch [308/700]:  96%|█████████▌| 747/782 [00:03<00:00, 212.64it/s]



Epoch [308/700]: 100%|██████████| 782/782 [00:03<00:00, 212.18it/s]


Learning Rate: 0.005000
Train Loss: 0.0949, Accuracy: 96.65%, Confidence: 0.9604
Test Loss: 1.7342, Accuracy: 71.96%, Confidence: 0.9246
Train-Test Accuracy Gap: 24.69%


Epoch [309/700]:   3%|▎         | 21/782 [00:00<00:03, 206.81it/s]



Epoch [309/700]:   5%|▌         | 43/782 [00:00<00:03, 211.17it/s]



Epoch [309/700]:   8%|▊         | 65/782 [00:00<00:03, 211.75it/s]



Epoch [309/700]:  11%|█         | 87/782 [00:00<00:03, 211.82it/s]



Epoch [309/700]:  14%|█▍        | 109/782 [00:00<00:03, 213.96it/s]



Epoch [309/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.48it/s]



Epoch [309/700]:  20%|█▉        | 153/782 [00:00<00:02, 211.82it/s]



Epoch [309/700]:  28%|██▊       | 219/782 [00:01<00:02, 212.23it/s]



Epoch [309/700]:  34%|███▎      | 263/782 [00:01<00:02, 212.05it/s]



Epoch [309/700]:  39%|███▉      | 307/782 [00:01<00:02, 214.32it/s]



Epoch [309/700]:  45%|████▍     | 351/782 [00:01<00:02, 215.14it/s]



Epoch [309/700]:  51%|█████     | 395/782 [00:01<00:01, 210.46it/s]



Epoch [309/700]:  56%|█████▌    | 439/782 [00:02<00:01, 211.01it/s]



Epoch [309/700]:  62%|██████▏   | 483/782 [00:02<00:01, 212.26it/s]



Epoch [309/700]:  67%|██████▋   | 527/782 [00:02<00:01, 213.50it/s]



Epoch [309/700]:  73%|███████▎  | 571/782 [00:02<00:00, 213.75it/s]



Epoch [309/700]:  79%|███████▊  | 615/782 [00:02<00:00, 212.19it/s]



Epoch [309/700]:  84%|████████▍ | 659/782 [00:03<00:00, 212.81it/s]



Epoch [309/700]:  90%|████████▉ | 703/782 [00:03<00:00, 211.35it/s]



Epoch [309/700]:  96%|█████████▌| 747/782 [00:03<00:00, 211.08it/s]



Epoch [309/700]: 100%|██████████| 782/782 [00:03<00:00, 211.88it/s]


Learning Rate: 0.004500
Train Loss: 0.0837, Accuracy: 97.01%, Confidence: 0.9627
Test Loss: 1.6103, Accuracy: 73.77%, Confidence: 0.9274
Train-Test Accuracy Gap: 23.24%


Epoch [310/700]:   3%|▎         | 21/782 [00:00<00:03, 207.63it/s]



Epoch [310/700]:   5%|▌         | 43/782 [00:00<00:03, 211.19it/s]



Epoch [310/700]:   8%|▊         | 65/782 [00:00<00:03, 210.38it/s]



Epoch [310/700]:  11%|█         | 87/782 [00:00<00:03, 212.16it/s]



Epoch [310/700]:  14%|█▍        | 109/782 [00:00<00:03, 211.85it/s]



Epoch [310/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.33it/s]



Epoch [310/700]:  20%|█▉        | 153/782 [00:00<00:03, 206.80it/s]



Epoch [310/700]:  28%|██▊       | 217/782 [00:01<00:02, 208.46it/s]



Epoch [310/700]:  33%|███▎      | 260/782 [00:01<00:02, 211.53it/s]



Epoch [310/700]:  39%|███▉      | 304/782 [00:01<00:02, 210.22it/s]



Epoch [310/700]:  45%|████▍     | 348/782 [00:01<00:02, 213.08it/s]



Epoch [310/700]:  50%|█████     | 392/782 [00:01<00:01, 213.05it/s]



Epoch [310/700]:  56%|█████▌    | 436/782 [00:02<00:01, 210.84it/s]



Epoch [310/700]:  61%|██████▏   | 480/782 [00:02<00:01, 211.48it/s]



Epoch [310/700]:  67%|██████▋   | 523/782 [00:02<00:01, 207.98it/s]



Epoch [310/700]:  72%|███████▏  | 566/782 [00:02<00:01, 208.93it/s]



Epoch [310/700]:  78%|███████▊  | 609/782 [00:02<00:00, 210.99it/s]



Epoch [310/700]:  84%|████████▎ | 653/782 [00:03<00:00, 208.56it/s]



Epoch [310/700]:  89%|████████▉ | 696/782 [00:03<00:00, 211.07it/s]



Epoch [310/700]:  95%|█████████▍| 740/782 [00:03<00:00, 214.24it/s]



Epoch [310/700]: 100%|██████████| 782/782 [00:03<00:00, 210.74it/s]


Learning Rate: 0.004500
Train Loss: 0.0880, Accuracy: 96.95%, Confidence: 0.9623
Test Loss: 1.8010, Accuracy: 71.85%, Confidence: 0.9236
Train-Test Accuracy Gap: 25.10%


Epoch [311/700]:   3%|▎         | 21/782 [00:00<00:03, 205.16it/s]



Epoch [311/700]:   5%|▌         | 43/782 [00:00<00:03, 210.92it/s]



Epoch [311/700]:   8%|▊         | 65/782 [00:00<00:03, 211.09it/s]



Epoch [311/700]:  11%|█         | 87/782 [00:00<00:03, 212.36it/s]



Epoch [311/700]:  14%|█▍        | 109/782 [00:00<00:03, 212.21it/s]



Epoch [311/700]:  22%|██▏       | 175/782 [00:00<00:02, 214.35it/s]



Epoch [311/700]:  28%|██▊       | 219/782 [00:01<00:02, 212.28it/s]



Epoch [311/700]:  34%|███▎      | 263/782 [00:01<00:02, 214.01it/s]



Epoch [311/700]:  39%|███▉      | 307/782 [00:01<00:02, 215.01it/s]



Epoch [311/700]:  45%|████▍     | 351/782 [00:01<00:02, 214.29it/s]



Epoch [311/700]:  51%|█████     | 395/782 [00:01<00:01, 208.17it/s]



Epoch [311/700]:  56%|█████▌    | 438/782 [00:02<00:01, 209.03it/s]



Epoch [311/700]:  62%|██████▏   | 482/782 [00:02<00:01, 211.61it/s]



Epoch [311/700]:  67%|██████▋   | 526/782 [00:02<00:01, 213.72it/s]



Epoch [311/700]:  73%|███████▎  | 570/782 [00:02<00:00, 215.00it/s]



Epoch [311/700]:  79%|███████▊  | 614/782 [00:02<00:00, 215.68it/s]



Epoch [311/700]:  84%|████████▍ | 659/782 [00:03<00:00, 216.40it/s]



Epoch [311/700]:  90%|████████▉ | 703/782 [00:03<00:00, 215.72it/s]



Epoch [311/700]:  96%|█████████▌| 747/782 [00:03<00:00, 214.18it/s]



Epoch [311/700]: 100%|██████████| 782/782 [00:03<00:00, 213.11it/s]


Learning Rate: 0.004500
Train Loss: 0.0851, Accuracy: 97.14%, Confidence: 0.9630
Test Loss: 1.8981, Accuracy: 71.32%, Confidence: 0.9267
Train-Test Accuracy Gap: 25.82%


Epoch [312/700]:   3%|▎         | 21/782 [00:00<00:03, 208.75it/s]



Epoch [312/700]:  11%|█         | 87/782 [00:00<00:03, 215.38it/s]



Epoch [312/700]:  17%|█▋        | 131/782 [00:00<00:03, 212.09it/s]



Epoch [312/700]:  22%|██▏       | 175/782 [00:00<00:02, 213.77it/s]



Epoch [312/700]:  28%|██▊       | 219/782 [00:01<00:02, 215.57it/s]



Epoch [312/700]:  34%|███▎      | 263/782 [00:01<00:02, 215.28it/s]



Epoch [312/700]:  39%|███▉      | 308/782 [00:01<00:02, 216.51it/s]



Epoch [312/700]:  45%|████▌     | 352/782 [00:01<00:01, 215.92it/s]



Epoch [312/700]:  51%|█████     | 396/782 [00:01<00:01, 214.33it/s]



Epoch [312/700]:  56%|█████▋    | 440/782 [00:02<00:01, 211.38it/s]



Epoch [312/700]:  62%|██████▏   | 484/782 [00:02<00:01, 211.39it/s]



Epoch [312/700]:  68%|██████▊   | 528/782 [00:02<00:01, 211.86it/s]



Epoch [312/700]:  76%|███████▌  | 594/782 [00:02<00:00, 213.54it/s]



Epoch [312/700]:  82%|████████▏ | 638/782 [00:02<00:00, 211.21it/s]



Epoch [312/700]:  90%|█████████ | 704/782 [00:03<00:00, 209.77it/s]



Epoch [312/700]:  96%|█████████▌| 748/782 [00:03<00:00, 211.47it/s]



Epoch [312/700]: 100%|██████████| 782/782 [00:03<00:00, 212.62it/s]


Learning Rate: 0.004500
Train Loss: 0.0854, Accuracy: 97.06%, Confidence: 0.9629
Test Loss: 1.7075, Accuracy: 72.63%, Confidence: 0.9272
Train-Test Accuracy Gap: 24.43%


Epoch [313/700]:   3%|▎         | 21/782 [00:00<00:03, 206.35it/s]



Epoch [313/700]:   5%|▌         | 42/782 [00:00<00:03, 208.02it/s]



Epoch [313/700]:   8%|▊         | 63/782 [00:00<00:03, 207.76it/s]



Epoch [313/700]:  11%|█         | 85/782 [00:00<00:03, 209.05it/s]



Epoch [313/700]:  14%|█▎        | 106/782 [00:00<00:03, 209.26it/s]



Epoch [313/700]:  16%|█▋        | 128/782 [00:00<00:03, 210.14it/s]



Epoch [313/700]:  19%|█▉        | 150/782 [00:00<00:03, 208.75it/s]



Epoch [313/700]:  22%|██▏       | 171/782 [00:00<00:02, 208.69it/s]



Epoch [313/700]:  27%|██▋       | 213/782 [00:01<00:02, 207.88it/s]



Epoch [313/700]:  33%|███▎      | 255/782 [00:01<00:02, 205.17it/s]



Epoch [313/700]:  38%|███▊      | 298/782 [00:01<00:02, 208.06it/s]



Epoch [313/700]:  43%|████▎     | 340/782 [00:01<00:02, 206.30it/s]



Epoch [313/700]:  49%|████▉     | 383/782 [00:01<00:01, 208.32it/s]



Epoch [313/700]:  54%|█████▍    | 426/782 [00:02<00:01, 208.15it/s]



Epoch [313/700]:  60%|█████▉    | 469/782 [00:02<00:01, 210.72it/s]



Epoch [313/700]:  66%|██████▌   | 513/782 [00:02<00:01, 211.85it/s]



Epoch [313/700]:  71%|███████   | 557/782 [00:02<00:01, 210.81it/s]



Epoch [313/700]:  77%|███████▋  | 600/782 [00:02<00:00, 206.38it/s]



Epoch [313/700]:  82%|████████▏ | 643/782 [00:03<00:00, 208.04it/s]



Epoch [313/700]:  88%|████████▊ | 687/782 [00:03<00:00, 211.85it/s]



Epoch [313/700]:  93%|█████████▎| 731/782 [00:03<00:00, 213.82it/s]



Epoch [313/700]: 100%|██████████| 782/782 [00:03<00:00, 208.62it/s]






Learning Rate: 0.004500
Train Loss: 0.0898, Accuracy: 96.87%, Confidence: 0.9628
Test Loss: 1.8381, Accuracy: 71.46%, Confidence: 0.9228
Train-Test Accuracy Gap: 25.41%


Epoch [314/700]:   3%|▎         | 21/782 [00:00<00:03, 208.59it/s]



Epoch [314/700]:   5%|▌         | 42/782 [00:00<00:03, 208.77it/s]



Epoch [314/700]:   8%|▊         | 64/782 [00:00<00:03, 210.06it/s]



Epoch [314/700]:  11%|█         | 86/782 [00:00<00:03, 209.65it/s]



Epoch [314/700]:  14%|█▎        | 107/782 [00:00<00:03, 209.13it/s]



Epoch [314/700]:  16%|█▋        | 129/782 [00:00<00:03, 209.61it/s]



Epoch [314/700]:  19%|█▉        | 150/782 [00:00<00:03, 209.62it/s]



Epoch [314/700]:  28%|██▊       | 216/782 [00:01<00:02, 211.44it/s]



Epoch [314/700]:  33%|███▎      | 260/782 [00:01<00:02, 211.92it/s]



Epoch [314/700]:  39%|███▉      | 304/782 [00:01<00:02, 214.13it/s]



Epoch [314/700]:  45%|████▍     | 348/782 [00:01<00:02, 214.33it/s]



Epoch [314/700]:  50%|█████     | 392/782 [00:01<00:01, 214.47it/s]



Epoch [314/700]:  56%|█████▌    | 436/782 [00:02<00:01, 215.71it/s]



Epoch [314/700]:  61%|██████▏   | 480/782 [00:02<00:01, 214.94it/s]



Epoch [314/700]:  67%|██████▋   | 524/782 [00:02<00:01, 212.81it/s]



Epoch [314/700]:  73%|███████▎  | 568/782 [00:02<00:01, 212.81it/s]



Epoch [314/700]:  78%|███████▊  | 612/782 [00:02<00:00, 212.75it/s]



Epoch [314/700]:  84%|████████▍ | 656/782 [00:03<00:00, 209.42it/s]



Epoch [314/700]:  90%|████████▉ | 700/782 [00:03<00:00, 208.77it/s]



Epoch [314/700]:  95%|█████████▌| 744/782 [00:03<00:00, 212.36it/s]



Epoch [314/700]: 100%|██████████| 782/782 [00:03<00:00, 212.12it/s]


Learning Rate: 0.004500
Train Loss: 0.0865, Accuracy: 97.03%, Confidence: 0.9635
Test Loss: 1.7168, Accuracy: 72.21%, Confidence: 0.9256
Train-Test Accuracy Gap: 24.82%


Epoch [315/700]:   3%|▎         | 22/782 [00:00<00:03, 211.50it/s]



Epoch [315/700]:   6%|▌         | 44/782 [00:00<00:03, 211.55it/s]



Epoch [315/700]:   8%|▊         | 66/782 [00:00<00:03, 212.31it/s]



Epoch [315/700]:  17%|█▋        | 132/782 [00:00<00:03, 213.71it/s]



Epoch [315/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.81it/s]



Epoch [315/700]:  28%|██▊       | 220/782 [00:01<00:02, 209.43it/s]



Epoch [315/700]:  34%|███▎      | 263/782 [00:01<00:02, 209.18it/s]



Epoch [315/700]:  39%|███▉      | 307/782 [00:01<00:02, 212.37it/s]



Epoch [315/700]:  45%|████▍     | 351/782 [00:01<00:02, 208.75it/s]



Epoch [315/700]:  50%|█████     | 394/782 [00:01<00:01, 211.15it/s]



Epoch [315/700]:  56%|█████▌    | 438/782 [00:02<00:01, 210.55it/s]



Epoch [315/700]:  62%|██████▏   | 482/782 [00:02<00:01, 212.23it/s]



Epoch [315/700]:  67%|██████▋   | 526/782 [00:02<00:01, 214.26it/s]



Epoch [315/700]:  73%|███████▎  | 570/782 [00:02<00:00, 215.11it/s]



Epoch [315/700]:  79%|███████▊  | 614/782 [00:02<00:00, 213.39it/s]



Epoch [315/700]:  84%|████████▍ | 658/782 [00:03<00:00, 208.72it/s]



Epoch [315/700]:  90%|████████▉ | 700/782 [00:03<00:00, 205.92it/s]



Epoch [315/700]:  95%|█████████▌| 743/782 [00:03<00:00, 206.13it/s]



Epoch [315/700]: 100%|██████████| 782/782 [00:03<00:00, 210.61it/s]


Learning Rate: 0.004500
Train Loss: 0.0870, Accuracy: 97.04%, Confidence: 0.9632
Test Loss: 1.6961, Accuracy: 72.83%, Confidence: 0.9254
Train-Test Accuracy Gap: 24.21%


Epoch [316/700]:   3%|▎         | 22/782 [00:00<00:03, 213.04it/s]



Epoch [316/700]:   6%|▌         | 44/782 [00:00<00:03, 213.22it/s]



Epoch [316/700]:   8%|▊         | 66/782 [00:00<00:03, 213.58it/s]



Epoch [316/700]:  11%|█▏        | 88/782 [00:00<00:03, 215.36it/s]



Epoch [316/700]:  17%|█▋        | 132/782 [00:00<00:03, 214.39it/s]



Epoch [316/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.49it/s]



Epoch [316/700]:  28%|██▊       | 219/782 [00:01<00:02, 206.08it/s]



Epoch [316/700]:  34%|███▎      | 262/782 [00:01<00:02, 208.94it/s]



Epoch [316/700]:  39%|███▉      | 306/782 [00:01<00:02, 211.55it/s]



Epoch [316/700]:  45%|████▍     | 350/782 [00:01<00:02, 211.54it/s]



Epoch [316/700]:  50%|█████     | 394/782 [00:01<00:01, 213.78it/s]



Epoch [316/700]:  56%|█████▌    | 438/782 [00:02<00:01, 204.85it/s]



Epoch [316/700]:  62%|██████▏   | 482/782 [00:02<00:01, 210.16it/s]



Epoch [316/700]:  67%|██████▋   | 526/782 [00:02<00:01, 210.07it/s]



Epoch [316/700]:  73%|███████▎  | 570/782 [00:02<00:01, 209.98it/s]



Epoch [316/700]:  79%|███████▊  | 614/782 [00:02<00:00, 210.71it/s]



Epoch [316/700]:  84%|████████▍ | 658/782 [00:03<00:00, 210.58it/s]



Epoch [316/700]:  93%|█████████▎| 724/782 [00:03<00:00, 213.81it/s]



Epoch [316/700]:  98%|█████████▊| 768/782 [00:03<00:00, 214.29it/s]



Epoch [316/700]: 100%|██████████| 782/782 [00:03<00:00, 211.06it/s]


Learning Rate: 0.004500
Train Loss: 0.0810, Accuracy: 97.17%, Confidence: 0.9643
Test Loss: 1.6445, Accuracy: 73.08%, Confidence: 0.9256
Train-Test Accuracy Gap: 24.09%


Epoch [317/700]:   3%|▎         | 21/782 [00:00<00:03, 204.11it/s]



Epoch [317/700]:   5%|▌         | 43/782 [00:00<00:03, 208.80it/s]



Epoch [317/700]:  11%|█         | 85/782 [00:00<00:03, 209.36it/s]



Epoch [317/700]:  16%|█▋        | 128/782 [00:00<00:03, 210.83it/s]



Epoch [317/700]:  22%|██▏       | 172/782 [00:00<00:02, 213.39it/s]



Epoch [317/700]:  28%|██▊       | 216/782 [00:01<00:02, 211.00it/s]



Epoch [317/700]:  33%|███▎      | 260/782 [00:01<00:02, 208.30it/s]



Epoch [317/700]:  39%|███▊      | 303/782 [00:01<00:02, 206.14it/s]



Epoch [317/700]:  44%|████▍     | 347/782 [00:01<00:02, 210.53it/s]



Epoch [317/700]:  47%|████▋     | 369/782 [00:01<00:01, 211.84it/s]



Epoch [317/700]:  50%|█████     | 391/782 [00:01<00:01, 212.70it/s]



Epoch [317/700]:  53%|█████▎    | 413/782 [00:01<00:01, 213.96it/s]



Epoch [317/700]:  56%|█████▌    | 435/782 [00:02<00:01, 212.62it/s]



Epoch [317/700]:  58%|█████▊    | 457/782 [00:02<00:01, 213.69it/s]



Epoch [317/700]:  61%|██████▏   | 479/782 [00:02<00:01, 214.05it/s]



Epoch [317/700]:  64%|██████▍   | 501/782 [00:02<00:01, 214.17it/s]



Epoch [317/700]:  67%|██████▋   | 523/782 [00:02<00:01, 214.84it/s]



Epoch [317/700]:  70%|██████▉   | 545/782 [00:02<00:01, 212.85it/s]



Epoch [317/700]:  73%|███████▎  | 567/782 [00:02<00:01, 214.40it/s]



Epoch [317/700]:  75%|███████▌  | 589/782 [00:02<00:00, 214.31it/s]



Epoch [317/700]:  78%|███████▊  | 611/782 [00:02<00:00, 210.57it/s]



Epoch [317/700]:  81%|████████  | 633/782 [00:02<00:00, 212.24it/s]



Epoch [317/700]:  84%|████████▍ | 655/782 [00:03<00:00, 211.95it/s]



Epoch [317/700]:  87%|████████▋ | 677/782 [00:03<00:00, 211.43it/s]



Epoch [317/700]:  89%|████████▉ | 699/782 [00:03<00:00, 210.96it/s]



Epoch [317/700]:  92%|█████████▏| 721/782 [00:03<00:00, 211.44it/s]



Epoch [317/700]:  95%|█████████▌| 743/782 [00:03<00:00, 211.61it/s]



Epoch [317/700]:  98%|█████████▊| 765/782 [00:03<00:00, 211.74it/s]



Epoch [317/700]: 100%|██████████| 782/782 [00:03<00:00, 211.04it/s]


Learning Rate: 0.004500
Train Loss: 0.0828, Accuracy: 97.19%, Confidence: 0.9642
Test Loss: 1.6803, Accuracy: 72.75%, Confidence: 0.9261
Train-Test Accuracy Gap: 24.44%


Epoch [318/700]:   3%|▎         | 21/782 [00:00<00:03, 204.42it/s]



Epoch [318/700]:  11%|█▏        | 88/782 [00:00<00:03, 217.14it/s]



Epoch [318/700]:  17%|█▋        | 132/782 [00:00<00:03, 215.81it/s]



Epoch [318/700]:  23%|██▎       | 176/782 [00:00<00:02, 216.32it/s]



Epoch [318/700]:  28%|██▊       | 220/782 [00:01<00:02, 214.68it/s]



Epoch [318/700]:  34%|███▍      | 264/782 [00:01<00:02, 211.98it/s]



Epoch [318/700]:  42%|████▏     | 331/782 [00:01<00:02, 216.37it/s]



Epoch [318/700]:  48%|████▊     | 375/782 [00:01<00:01, 216.13it/s]



Epoch [318/700]:  54%|█████▎    | 419/782 [00:01<00:01, 215.99it/s]



Epoch [318/700]:  59%|█████▉    | 463/782 [00:02<00:01, 210.60it/s]



Epoch [318/700]:  65%|██████▍   | 506/782 [00:02<00:01, 209.54it/s]



Epoch [318/700]:  70%|███████   | 549/782 [00:02<00:01, 210.87it/s]



Epoch [318/700]:  76%|███████▌  | 593/782 [00:02<00:00, 212.45it/s]



Epoch [318/700]:  81%|████████▏ | 637/782 [00:02<00:00, 212.53it/s]



Epoch [318/700]:  87%|████████▋ | 681/782 [00:03<00:00, 211.44it/s]



Epoch [318/700]:  93%|█████████▎| 725/782 [00:03<00:00, 211.92it/s]



Epoch [318/700]:  98%|█████████▊| 769/782 [00:03<00:00, 211.68it/s]



Epoch [318/700]: 100%|██████████| 782/782 [00:03<00:00, 212.82it/s]


Learning Rate: 0.004500
Train Loss: 0.0860, Accuracy: 97.02%, Confidence: 0.9644
Test Loss: 2.5231, Accuracy: 66.21%, Confidence: 0.9229
Train-Test Accuracy Gap: 30.81%


Epoch [319/700]:   3%|▎         | 21/782 [00:00<00:03, 206.89it/s]



Epoch [319/700]:   5%|▌         | 43/782 [00:00<00:03, 210.51it/s]



Epoch [319/700]:   8%|▊         | 65/782 [00:00<00:03, 211.26it/s]



Epoch [319/700]:  11%|█         | 87/782 [00:00<00:03, 214.37it/s]



Epoch [319/700]:  17%|█▋        | 131/782 [00:00<00:03, 216.83it/s]



Epoch [319/700]:  23%|██▎       | 176/782 [00:00<00:02, 218.38it/s]



Epoch [319/700]:  28%|██▊       | 220/782 [00:01<00:02, 212.91it/s]



Epoch [319/700]:  34%|███▍      | 264/782 [00:01<00:02, 207.61it/s]



Epoch [319/700]:  39%|███▉      | 307/782 [00:01<00:02, 209.15it/s]



Epoch [319/700]:  45%|████▍     | 351/782 [00:01<00:02, 210.23it/s]



Epoch [319/700]:  51%|█████     | 395/782 [00:01<00:01, 211.29it/s]



Epoch [319/700]:  56%|█████▌    | 439/782 [00:02<00:01, 209.35it/s]



Epoch [319/700]:  62%|██████▏   | 482/782 [00:02<00:01, 210.50it/s]



Epoch [319/700]:  67%|██████▋   | 526/782 [00:02<00:01, 207.34it/s]



Epoch [319/700]:  73%|███████▎  | 568/782 [00:02<00:01, 207.34it/s]



Epoch [319/700]:  78%|███████▊  | 610/782 [00:02<00:00, 205.21it/s]



Epoch [319/700]:  84%|████████▎ | 653/782 [00:03<00:00, 208.12it/s]



Epoch [319/700]:  89%|████████▉ | 696/782 [00:03<00:00, 208.90it/s]



Epoch [319/700]:  95%|█████████▍| 740/782 [00:03<00:00, 211.79it/s]



Epoch [319/700]: 100%|██████████| 782/782 [00:03<00:00, 210.44it/s]


Learning Rate: 0.004500
Train Loss: 0.0857, Accuracy: 97.05%, Confidence: 0.9633
Test Loss: 2.2051, Accuracy: 69.01%, Confidence: 0.9268
Train-Test Accuracy Gap: 28.04%


Epoch [320/700]:   3%|▎         | 21/782 [00:00<00:03, 204.13it/s]



Epoch [320/700]:   5%|▌         | 42/782 [00:00<00:03, 207.29it/s]



Epoch [320/700]:   8%|▊         | 64/782 [00:00<00:03, 211.07it/s]



Epoch [320/700]:  17%|█▋        | 131/782 [00:00<00:02, 217.78it/s]



Epoch [320/700]:  22%|██▏       | 175/782 [00:00<00:02, 217.69it/s]



Epoch [320/700]:  28%|██▊       | 219/782 [00:01<00:02, 213.83it/s]



Epoch [320/700]:  34%|███▎      | 263/782 [00:01<00:02, 212.53it/s]



Epoch [320/700]:  39%|███▉      | 307/782 [00:01<00:02, 211.60it/s]



Epoch [320/700]:  45%|████▍     | 351/782 [00:01<00:02, 213.60it/s]



Epoch [320/700]:  51%|█████     | 395/782 [00:01<00:01, 213.30it/s]



Epoch [320/700]:  56%|█████▌    | 439/782 [00:02<00:01, 209.89it/s]



Epoch [320/700]:  62%|██████▏   | 483/782 [00:02<00:01, 209.48it/s]



Epoch [320/700]:  67%|██████▋   | 526/782 [00:02<00:01, 210.24it/s]



Epoch [320/700]:  73%|███████▎  | 570/782 [00:02<00:00, 212.08it/s]



Epoch [320/700]:  81%|████████▏ | 636/782 [00:02<00:00, 211.09it/s]



Epoch [320/700]:  87%|████████▋ | 680/782 [00:03<00:00, 205.48it/s]



Epoch [320/700]:  90%|████████▉ | 702/782 [00:03<00:00, 208.04it/s]



Epoch [320/700]:  98%|█████████▊| 767/782 [00:03<00:00, 209.97it/s]



Epoch [320/700]: 100%|██████████| 782/782 [00:03<00:00, 211.25it/s]


Learning Rate: 0.004500
Train Loss: 0.0859, Accuracy: 97.05%, Confidence: 0.9649
Test Loss: 1.6798, Accuracy: 72.76%, Confidence: 0.9258
Train-Test Accuracy Gap: 24.29%


Epoch [321/700]:   3%|▎         | 21/782 [00:00<00:03, 206.18it/s]



Epoch [321/700]:   5%|▌         | 42/782 [00:00<00:03, 208.17it/s]



Epoch [321/700]:   8%|▊         | 64/782 [00:00<00:03, 211.62it/s]



Epoch [321/700]:  11%|█         | 86/782 [00:00<00:03, 211.57it/s]



Epoch [321/700]:  14%|█▍        | 108/782 [00:00<00:03, 209.97it/s]



Epoch [321/700]:  22%|██▏       | 174/782 [00:00<00:02, 212.73it/s]



Epoch [321/700]:  28%|██▊       | 218/782 [00:01<00:02, 212.37it/s]



Epoch [321/700]:  34%|███▎      | 262/782 [00:01<00:02, 212.60it/s]



Epoch [321/700]:  39%|███▉      | 306/782 [00:01<00:02, 210.92it/s]



Epoch [321/700]:  45%|████▍     | 350/782 [00:01<00:02, 211.19it/s]



Epoch [321/700]:  50%|█████     | 394/782 [00:01<00:01, 214.33it/s]



Epoch [321/700]:  56%|█████▌    | 438/782 [00:02<00:01, 212.71it/s]



Epoch [321/700]:  62%|██████▏   | 482/782 [00:02<00:01, 210.13it/s]



Epoch [321/700]:  67%|██████▋   | 526/782 [00:02<00:01, 210.45it/s]



Epoch [321/700]:  73%|███████▎  | 570/782 [00:02<00:01, 210.38it/s]



Epoch [321/700]:  79%|███████▊  | 614/782 [00:02<00:00, 211.17it/s]



Epoch [321/700]:  84%|████████▍ | 658/782 [00:03<00:00, 212.25it/s]



Epoch [321/700]:  90%|████████▉ | 702/782 [00:03<00:00, 209.67it/s]



Epoch [321/700]:  95%|█████████▌| 745/782 [00:03<00:00, 211.45it/s]



Epoch [321/700]: 100%|██████████| 782/782 [00:03<00:00, 211.03it/s]


Learning Rate: 0.004500
Train Loss: 0.0877, Accuracy: 97.00%, Confidence: 0.9640
Test Loss: 1.5933, Accuracy: 73.82%, Confidence: 0.9277
Train-Test Accuracy Gap: 23.18%


Epoch [322/700]:   3%|▎         | 22/782 [00:00<00:03, 213.40it/s]



Epoch [322/700]:   8%|▊         | 66/782 [00:00<00:03, 212.56it/s]



Epoch [322/700]:  17%|█▋        | 132/782 [00:00<00:03, 216.62it/s]



Epoch [322/700]:  23%|██▎       | 176/782 [00:00<00:02, 213.54it/s]



Epoch [322/700]:  28%|██▊       | 220/782 [00:01<00:02, 212.31it/s]



Epoch [322/700]:  34%|███▍      | 264/782 [00:01<00:02, 211.57it/s]



Epoch [322/700]:  39%|███▉      | 308/782 [00:01<00:02, 210.77it/s]



Epoch [322/700]:  45%|████▌     | 352/782 [00:01<00:02, 212.52it/s]



Epoch [322/700]:  53%|█████▎    | 418/782 [00:01<00:01, 213.40it/s]



Epoch [322/700]:  59%|█████▉    | 462/782 [00:02<00:01, 213.24it/s]



Epoch [322/700]:  65%|██████▍   | 506/782 [00:02<00:01, 214.22it/s]



Epoch [322/700]:  70%|███████   | 550/782 [00:02<00:01, 212.63it/s]



Epoch [322/700]:  76%|███████▌  | 594/782 [00:02<00:00, 209.30it/s]



Epoch [322/700]:  84%|████████▍ | 660/782 [00:03<00:00, 213.13it/s]



Epoch [322/700]:  90%|█████████ | 704/782 [00:03<00:00, 210.94it/s]



Epoch [322/700]:  96%|█████████▌| 748/782 [00:03<00:00, 211.08it/s]



Epoch [322/700]: 100%|██████████| 782/782 [00:03<00:00, 211.95it/s]


Learning Rate: 0.004500
Train Loss: 0.0733, Accuracy: 97.53%, Confidence: 0.9661
Test Loss: 1.6342, Accuracy: 73.68%, Confidence: 0.9278
Train-Test Accuracy Gap: 23.85%


Epoch [323/700]:   3%|▎         | 22/782 [00:00<00:03, 211.15it/s]



Epoch [323/700]:   6%|▌         | 44/782 [00:00<00:03, 205.50it/s]



Epoch [323/700]:   8%|▊         | 66/782 [00:00<00:03, 210.21it/s]



Epoch [323/700]:  11%|█▏        | 88/782 [00:00<00:03, 210.18it/s]



Epoch [323/700]:  14%|█▍        | 110/782 [00:00<00:03, 211.34it/s]



Epoch [323/700]:  17%|█▋        | 132/782 [00:00<00:03, 213.01it/s]



Epoch [323/700]:  20%|█▉        | 154/782 [00:00<00:02, 210.79it/s]



Epoch [323/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.10it/s]



Epoch [323/700]:  28%|██▊       | 220/782 [00:01<00:02, 214.64it/s]



Epoch [323/700]:  34%|███▍      | 264/782 [00:01<00:02, 213.38it/s]



Epoch [323/700]:  37%|███▋      | 286/782 [00:01<00:02, 211.32it/s]



Epoch [323/700]:  39%|███▉      | 308/782 [00:01<00:02, 212.09it/s]



Epoch [323/700]:  45%|████▌     | 352/782 [00:01<00:02, 213.73it/s]



Epoch [323/700]:  51%|█████     | 396/782 [00:01<00:01, 214.11it/s]



Epoch [323/700]:  56%|█████▋    | 440/782 [00:02<00:01, 214.21it/s]



Epoch [323/700]:  62%|██████▏   | 484/782 [00:02<00:01, 214.17it/s]



Epoch [323/700]:  68%|██████▊   | 528/782 [00:02<00:01, 213.79it/s]



Epoch [323/700]:  73%|███████▎  | 572/782 [00:02<00:00, 214.65it/s]



Epoch [323/700]:  79%|███████▉  | 616/782 [00:02<00:00, 214.58it/s]



Epoch [323/700]:  84%|████████▍ | 660/782 [00:03<00:00, 213.82it/s]



Epoch [323/700]:  90%|█████████ | 704/782 [00:03<00:00, 213.29it/s]



Epoch [323/700]:  96%|█████████▌| 748/782 [00:03<00:00, 212.24it/s]



Epoch [323/700]: 100%|██████████| 782/782 [00:03<00:00, 212.37it/s]


Learning Rate: 0.004500
Train Loss: 0.0873, Accuracy: 97.03%, Confidence: 0.9638
Test Loss: 1.9689, Accuracy: 70.06%, Confidence: 0.9259
Train-Test Accuracy Gap: 26.97%


Epoch [324/700]:   3%|▎         | 22/782 [00:00<00:03, 210.20it/s]



Epoch [324/700]:   6%|▌         | 44/782 [00:00<00:03, 212.11it/s]



Epoch [324/700]:   8%|▊         | 66/782 [00:00<00:03, 212.80it/s]



Epoch [324/700]:  11%|█▏        | 88/782 [00:00<00:03, 214.09it/s]



Epoch [324/700]:  14%|█▍        | 110/782 [00:00<00:03, 212.82it/s]



Epoch [324/700]:  17%|█▋        | 132/782 [00:00<00:03, 214.04it/s]



Epoch [324/700]:  20%|█▉        | 154/782 [00:00<00:02, 212.96it/s]



Epoch [324/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.85it/s]



Epoch [324/700]:  25%|██▌       | 198/782 [00:00<00:02, 212.49it/s]



Epoch [324/700]:  28%|██▊       | 220/782 [00:01<00:02, 209.63it/s]



Epoch [324/700]:  34%|███▍      | 264/782 [00:01<00:02, 213.22it/s]



Epoch [324/700]:  39%|███▉      | 308/782 [00:01<00:02, 213.86it/s]



Epoch [324/700]:  45%|████▌     | 352/782 [00:01<00:02, 214.54it/s]



Epoch [324/700]:  51%|█████     | 396/782 [00:01<00:01, 213.37it/s]



Epoch [324/700]:  56%|█████▋    | 440/782 [00:02<00:01, 213.07it/s]



Epoch [324/700]:  62%|██████▏   | 484/782 [00:02<00:01, 212.31it/s]



Epoch [324/700]:  68%|██████▊   | 528/782 [00:02<00:01, 214.24it/s]



Epoch [324/700]:  73%|███████▎  | 572/782 [00:02<00:00, 214.74it/s]



Epoch [324/700]:  79%|███████▉  | 616/782 [00:02<00:00, 212.55it/s]



Epoch [324/700]:  84%|████████▍ | 660/782 [00:03<00:00, 212.84it/s]



Epoch [324/700]:  90%|█████████ | 704/782 [00:03<00:00, 211.54it/s]



Epoch [324/700]:  96%|█████████▌| 747/782 [00:03<00:00, 207.95it/s]



Epoch [324/700]: 100%|██████████| 782/782 [00:03<00:00, 211.66it/s]


Learning Rate: 0.004500
Train Loss: 0.0813, Accuracy: 97.05%, Confidence: 0.9648
Test Loss: 2.0619, Accuracy: 70.80%, Confidence: 0.9310
Train-Test Accuracy Gap: 26.25%


Epoch [325/700]:   3%|▎         | 21/782 [00:00<00:03, 205.65it/s]



Epoch [325/700]:  11%|█         | 85/782 [00:00<00:03, 208.30it/s]



Epoch [325/700]:  16%|█▌        | 127/782 [00:00<00:03, 204.71it/s]



Epoch [325/700]:  22%|██▏       | 169/782 [00:00<00:03, 201.06it/s]



Epoch [325/700]:  27%|██▋       | 211/782 [00:01<00:02, 202.29it/s]



Epoch [325/700]:  32%|███▏      | 253/782 [00:01<00:02, 203.91it/s]



Epoch [325/700]:  38%|███▊      | 296/782 [00:01<00:02, 206.85it/s]



Epoch [325/700]:  43%|████▎     | 339/782 [00:01<00:02, 206.88it/s]



Epoch [325/700]:  49%|████▊     | 381/782 [00:01<00:01, 206.98it/s]



Epoch [325/700]:  54%|█████▍    | 425/782 [00:02<00:01, 209.55it/s]



Epoch [325/700]:  60%|█████▉    | 467/782 [00:02<00:01, 207.89it/s]



Epoch [325/700]:  65%|██████▌   | 510/782 [00:02<00:01, 205.49it/s]



Epoch [325/700]:  71%|███████   | 554/782 [00:02<00:01, 209.30it/s]



Epoch [325/700]:  76%|███████▋  | 597/782 [00:02<00:00, 208.48it/s]



Epoch [325/700]:  82%|████████▏ | 640/782 [00:03<00:00, 209.75it/s]



Epoch [325/700]:  87%|████████▋ | 684/782 [00:03<00:00, 211.13it/s]



Epoch [325/700]:  96%|█████████▌| 750/782 [00:03<00:00, 214.85it/s]



Epoch [325/700]: 100%|██████████| 782/782 [00:03<00:00, 207.95it/s]


Learning Rate: 0.004500
Train Loss: 0.0852, Accuracy: 97.10%, Confidence: 0.9645
Test Loss: 1.7455, Accuracy: 73.17%, Confidence: 0.9289
Train-Test Accuracy Gap: 23.93%


Epoch [326/700]:   3%|▎         | 21/782 [00:00<00:03, 207.02it/s]



Epoch [326/700]:   5%|▌         | 43/782 [00:00<00:03, 211.49it/s]



Epoch [326/700]:  11%|█         | 87/782 [00:00<00:03, 213.21it/s]



Epoch [326/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.76it/s]



Epoch [326/700]:  22%|██▏       | 175/782 [00:00<00:02, 213.69it/s]



Epoch [326/700]:  28%|██▊       | 219/782 [00:01<00:02, 209.86it/s]



Epoch [326/700]:  34%|███▎      | 263/782 [00:01<00:02, 206.78it/s]



Epoch [326/700]:  39%|███▉      | 306/782 [00:01<00:02, 209.92it/s]



Epoch [326/700]:  45%|████▍     | 350/782 [00:01<00:02, 212.89it/s]



Epoch [326/700]:  50%|█████     | 394/782 [00:01<00:01, 214.38it/s]



Epoch [326/700]:  56%|█████▌    | 439/782 [00:02<00:01, 216.05it/s]



Epoch [326/700]:  62%|██████▏   | 484/782 [00:02<00:01, 217.13it/s]



Epoch [326/700]:  68%|██████▊   | 528/782 [00:02<00:01, 214.51it/s]



Epoch [326/700]:  73%|███████▎  | 572/782 [00:02<00:00, 213.82it/s]



Epoch [326/700]:  82%|████████▏ | 638/782 [00:02<00:00, 215.18it/s]



Epoch [326/700]:  87%|████████▋ | 682/782 [00:03<00:00, 212.58it/s]



Epoch [326/700]:  93%|█████████▎| 726/782 [00:03<00:00, 210.70it/s]



Epoch [326/700]:  98%|█████████▊| 770/782 [00:03<00:00, 211.36it/s]



Epoch [326/700]: 100%|██████████| 782/782 [00:03<00:00, 212.31it/s]


Learning Rate: 0.004500
Train Loss: 0.0840, Accuracy: 97.09%, Confidence: 0.9647
Test Loss: 1.8790, Accuracy: 71.62%, Confidence: 0.9295
Train-Test Accuracy Gap: 25.47%


Epoch [327/700]:   3%|▎         | 21/782 [00:00<00:03, 208.96it/s]



Epoch [327/700]:   5%|▌         | 43/782 [00:00<00:03, 214.03it/s]



Epoch [327/700]:   8%|▊         | 65/782 [00:00<00:03, 208.23it/s]



Epoch [327/700]:  17%|█▋        | 131/782 [00:00<00:03, 214.20it/s]



Epoch [327/700]:  23%|██▎       | 176/782 [00:00<00:02, 215.42it/s]



Epoch [327/700]:  28%|██▊       | 220/782 [00:01<00:02, 215.10it/s]



Epoch [327/700]:  34%|███▍      | 264/782 [00:01<00:02, 212.77it/s]



Epoch [327/700]:  39%|███▉      | 308/782 [00:01<00:02, 211.03it/s]



Epoch [327/700]:  45%|████▌     | 352/782 [00:01<00:02, 211.39it/s]



Epoch [327/700]:  51%|█████     | 396/782 [00:01<00:01, 212.75it/s]



Epoch [327/700]:  56%|█████▋    | 441/782 [00:02<00:01, 215.38it/s]



Epoch [327/700]:  62%|██████▏   | 485/782 [00:02<00:01, 213.60it/s]



Epoch [327/700]:  68%|██████▊   | 529/782 [00:02<00:01, 211.64it/s]



Epoch [327/700]:  73%|███████▎  | 573/782 [00:02<00:00, 212.61it/s]



Epoch [327/700]:  79%|███████▉  | 617/782 [00:02<00:00, 214.51it/s]



Epoch [327/700]:  85%|████████▍ | 662/782 [00:03<00:00, 216.98it/s]



Epoch [327/700]:  90%|█████████ | 706/782 [00:03<00:00, 214.24it/s]



Epoch [327/700]:  96%|█████████▌| 750/782 [00:03<00:00, 210.89it/s]



Epoch [327/700]: 100%|██████████| 782/782 [00:03<00:00, 212.47it/s]


Learning Rate: 0.004500
Train Loss: 0.0816, Accuracy: 97.18%, Confidence: 0.9658
Test Loss: 1.5991, Accuracy: 73.44%, Confidence: 0.9250
Train-Test Accuracy Gap: 23.74%


Epoch [328/700]:   3%|▎         | 22/782 [00:00<00:03, 216.84it/s]



Epoch [328/700]:  11%|█▏        | 88/782 [00:00<00:03, 214.96it/s]



Epoch [328/700]:  17%|█▋        | 132/782 [00:00<00:03, 216.20it/s]



Epoch [328/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.66it/s]



Epoch [328/700]:  28%|██▊       | 220/782 [00:01<00:02, 212.83it/s]



Epoch [328/700]:  34%|███▍      | 264/782 [00:01<00:02, 208.20it/s]



Epoch [328/700]:  39%|███▉      | 307/782 [00:01<00:02, 209.66it/s]



Epoch [328/700]:  45%|████▍     | 350/782 [00:01<00:02, 211.21it/s]



Epoch [328/700]:  50%|█████     | 394/782 [00:01<00:01, 210.34it/s]



Epoch [328/700]:  56%|█████▌    | 438/782 [00:02<00:01, 210.22it/s]



Epoch [328/700]:  62%|██████▏   | 482/782 [00:02<00:01, 213.10it/s]



Epoch [328/700]:  67%|██████▋   | 526/782 [00:02<00:01, 213.73it/s]



Epoch [328/700]:  73%|███████▎  | 569/782 [00:02<00:01, 209.05it/s]



Epoch [328/700]:  78%|███████▊  | 611/782 [00:02<00:00, 207.64it/s]



Epoch [328/700]:  84%|████████▍ | 655/782 [00:03<00:00, 208.89it/s]



Epoch [328/700]:  89%|████████▉ | 698/782 [00:03<00:00, 208.71it/s]



Epoch [328/700]:  95%|█████████▍| 741/782 [00:03<00:00, 210.65it/s]



Epoch [328/700]: 100%|██████████| 782/782 [00:03<00:00, 210.94it/s]


Learning Rate: 0.004500
Train Loss: 0.0813, Accuracy: 97.18%, Confidence: 0.9648
Test Loss: 1.7477, Accuracy: 73.50%, Confidence: 0.9308
Train-Test Accuracy Gap: 23.68%


Epoch [329/700]:   3%|▎         | 21/782 [00:00<00:03, 209.33it/s]



Epoch [329/700]:  11%|█         | 87/782 [00:00<00:03, 215.47it/s]



Epoch [329/700]:  17%|█▋        | 131/782 [00:00<00:03, 213.62it/s]



Epoch [329/700]:  22%|██▏       | 175/782 [00:00<00:02, 215.83it/s]



Epoch [329/700]:  28%|██▊       | 220/782 [00:01<00:02, 216.92it/s]



Epoch [329/700]:  37%|███▋      | 286/782 [00:01<00:02, 215.45it/s]



Epoch [329/700]:  42%|████▏     | 330/782 [00:01<00:02, 215.41it/s]



Epoch [329/700]:  48%|████▊     | 374/782 [00:01<00:01, 216.50it/s]



Epoch [329/700]:  53%|█████▎    | 418/782 [00:01<00:01, 215.19it/s]



Epoch [329/700]:  59%|█████▉    | 462/782 [00:02<00:01, 211.98it/s]



Epoch [329/700]:  67%|██████▋   | 527/782 [00:02<00:01, 206.48it/s]



Epoch [329/700]:  73%|███████▎  | 569/782 [00:02<00:01, 204.23it/s]



Epoch [329/700]:  78%|███████▊  | 612/782 [00:02<00:00, 208.43it/s]



Epoch [329/700]:  84%|████████▍ | 655/782 [00:03<00:00, 208.87it/s]



Epoch [329/700]:  89%|████████▉ | 698/782 [00:03<00:00, 208.78it/s]



Epoch [329/700]:  95%|█████████▍| 741/782 [00:03<00:00, 208.20it/s]



Epoch [329/700]: 100%|██████████| 782/782 [00:03<00:00, 211.67it/s]


Learning Rate: 0.004500
Train Loss: 0.0828, Accuracy: 97.07%, Confidence: 0.9649
Test Loss: 1.7063, Accuracy: 73.10%, Confidence: 0.9283
Train-Test Accuracy Gap: 23.97%


Epoch [330/700]:   3%|▎         | 22/782 [00:00<00:03, 210.25it/s]



Epoch [330/700]:  11%|█▏        | 88/782 [00:00<00:03, 209.27it/s]



Epoch [330/700]:  17%|█▋        | 132/782 [00:00<00:03, 213.24it/s]



Epoch [330/700]:  25%|██▌       | 198/782 [00:00<00:02, 215.74it/s]



Epoch [330/700]:  31%|███       | 242/782 [00:01<00:02, 213.72it/s]



Epoch [330/700]:  37%|███▋      | 286/782 [00:01<00:02, 213.58it/s]



Epoch [330/700]:  42%|████▏     | 330/782 [00:01<00:02, 210.45it/s]



Epoch [330/700]:  48%|████▊     | 373/782 [00:01<00:01, 209.02it/s]



Epoch [330/700]:  53%|█████▎    | 415/782 [00:01<00:01, 208.50it/s]



Epoch [330/700]:  59%|█████▊    | 458/782 [00:02<00:01, 209.34it/s]



Epoch [330/700]:  64%|██████▍   | 502/782 [00:02<00:01, 210.01it/s]



Epoch [330/700]:  70%|██████▉   | 545/782 [00:02<00:01, 209.31it/s]



Epoch [330/700]:  75%|███████▌  | 588/782 [00:02<00:00, 209.83it/s]



Epoch [330/700]:  81%|████████  | 632/782 [00:02<00:00, 212.46it/s]



Epoch [330/700]:  86%|████████▋ | 676/782 [00:03<00:00, 213.54it/s]



Epoch [330/700]:  92%|█████████▏| 719/782 [00:03<00:00, 206.41it/s]



Epoch [330/700]:  97%|█████████▋| 762/782 [00:03<00:00, 207.82it/s]



Epoch [330/700]: 100%|██████████| 782/782 [00:03<00:00, 210.41it/s]


Learning Rate: 0.004500
Train Loss: 0.0791, Accuracy: 97.31%, Confidence: 0.9657
Test Loss: 1.7133, Accuracy: 73.71%, Confidence: 0.9293
Train-Test Accuracy Gap: 23.60%


Epoch [331/700]:   3%|▎         | 22/782 [00:00<00:03, 210.88it/s]



Epoch [331/700]:   6%|▌         | 44/782 [00:00<00:03, 214.31it/s]



Epoch [331/700]:   8%|▊         | 66/782 [00:00<00:03, 215.40it/s]



Epoch [331/700]:  11%|█▏        | 88/782 [00:00<00:03, 212.10it/s]



Epoch [331/700]:  17%|█▋        | 132/782 [00:00<00:03, 214.54it/s]



Epoch [331/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.33it/s]



Epoch [331/700]:  28%|██▊       | 220/782 [00:01<00:02, 212.93it/s]



Epoch [331/700]:  34%|███▍      | 264/782 [00:01<00:02, 210.08it/s]



Epoch [331/700]:  39%|███▉      | 307/782 [00:01<00:02, 204.76it/s]



Epoch [331/700]:  42%|████▏     | 328/782 [00:01<00:02, 204.19it/s]



Epoch [331/700]:  45%|████▍     | 349/782 [00:01<00:02, 202.87it/s]



Epoch [331/700]:  50%|█████     | 392/782 [00:01<00:01, 206.32it/s]



Epoch [331/700]:  56%|█████▌    | 435/782 [00:02<00:01, 207.73it/s]



Epoch [331/700]:  61%|██████    | 478/782 [00:02<00:01, 210.48it/s]



Epoch [331/700]:  67%|██████▋   | 522/782 [00:02<00:01, 210.34it/s]



Epoch [331/700]:  72%|███████▏  | 566/782 [00:02<00:01, 211.62it/s]



Epoch [331/700]:  78%|███████▊  | 610/782 [00:02<00:00, 211.96it/s]



Epoch [331/700]:  84%|████████▎ | 654/782 [00:03<00:00, 208.35it/s]



Epoch [331/700]:  89%|████████▉ | 696/782 [00:03<00:00, 202.92it/s]



Epoch [331/700]:  95%|█████████▍| 740/782 [00:03<00:00, 208.91it/s]



Epoch [331/700]: 100%|██████████| 782/782 [00:03<00:00, 208.97it/s]


Learning Rate: 0.004500
Train Loss: 0.0754, Accuracy: 97.40%, Confidence: 0.9669
Test Loss: 1.9777, Accuracy: 70.04%, Confidence: 0.9230
Train-Test Accuracy Gap: 27.36%


Epoch [332/700]:   3%|▎         | 22/782 [00:00<00:03, 211.94it/s]



Epoch [332/700]:   6%|▌         | 44/782 [00:00<00:03, 214.90it/s]



Epoch [332/700]:   8%|▊         | 66/782 [00:00<00:03, 215.50it/s]



Epoch [332/700]:  11%|█▏        | 88/782 [00:00<00:03, 212.31it/s]



Epoch [332/700]:  14%|█▍        | 110/782 [00:00<00:03, 206.20it/s]



Epoch [332/700]:  17%|█▋        | 131/782 [00:00<00:03, 203.90it/s]



Epoch [332/700]:  19%|█▉        | 152/782 [00:00<00:03, 204.66it/s]



Epoch [332/700]:  22%|██▏       | 174/782 [00:00<00:02, 207.64it/s]



Epoch [332/700]:  28%|██▊       | 219/782 [00:01<00:02, 212.58it/s]



Epoch [332/700]:  34%|███▎      | 263/782 [00:01<00:02, 213.37it/s]



Epoch [332/700]:  39%|███▉      | 307/782 [00:01<00:02, 208.47it/s]



Epoch [332/700]:  45%|████▍     | 350/782 [00:01<00:02, 208.55it/s]



Epoch [332/700]:  47%|████▋     | 371/782 [00:01<00:01, 208.03it/s]



Epoch [332/700]:  50%|█████     | 392/782 [00:01<00:01, 206.93it/s]



Epoch [332/700]:  56%|█████▌    | 436/782 [00:02<00:01, 209.99it/s]



Epoch [332/700]:  64%|██████▍   | 502/782 [00:02<00:01, 210.84it/s]



Epoch [332/700]:  70%|██████▉   | 546/782 [00:02<00:01, 211.41it/s]



Epoch [332/700]:  75%|███████▌  | 590/782 [00:02<00:00, 212.44it/s]



Epoch [332/700]:  81%|████████  | 634/782 [00:03<00:00, 214.04it/s]



Epoch [332/700]:  87%|████████▋ | 678/782 [00:03<00:00, 214.28it/s]



Epoch [332/700]:  92%|█████████▏| 722/782 [00:03<00:00, 211.86it/s]



Epoch [332/700]:  98%|█████████▊| 766/782 [00:03<00:00, 212.37it/s]



Epoch [332/700]: 100%|██████████| 782/782 [00:03<00:00, 210.55it/s]


Learning Rate: 0.004500
Train Loss: 0.0791, Accuracy: 97.27%, Confidence: 0.9662
Test Loss: 1.6707, Accuracy: 73.69%, Confidence: 0.9303
Train-Test Accuracy Gap: 23.58%


Epoch [333/700]:   3%|▎         | 20/782 [00:00<00:03, 198.68it/s]



Epoch [333/700]:   5%|▌         | 42/782 [00:00<00:03, 209.94it/s]



Epoch [333/700]:  11%|█         | 86/782 [00:00<00:03, 209.73it/s]



Epoch [333/700]:  16%|█▋        | 129/782 [00:00<00:03, 210.02it/s]



Epoch [333/700]:  22%|██▏       | 173/782 [00:00<00:02, 211.02it/s]



Epoch [333/700]:  28%|██▊       | 217/782 [00:01<00:02, 212.16it/s]



Epoch [333/700]:  33%|███▎      | 261/782 [00:01<00:02, 213.48it/s]



Epoch [333/700]:  36%|███▌      | 283/782 [00:01<00:02, 211.34it/s]



Epoch [333/700]:  39%|███▉      | 305/782 [00:01<00:02, 211.44it/s]



Epoch [333/700]:  45%|████▍     | 349/782 [00:01<00:02, 212.24it/s]



Epoch [333/700]:  50%|█████     | 393/782 [00:01<00:01, 211.91it/s]



Epoch [333/700]:  56%|█████▌    | 437/782 [00:02<00:01, 214.90it/s]



Epoch [333/700]:  62%|██████▏   | 482/782 [00:02<00:01, 217.37it/s]



Epoch [333/700]:  67%|██████▋   | 527/782 [00:02<00:01, 218.11it/s]



Epoch [333/700]:  76%|███████▌  | 593/782 [00:02<00:00, 218.01it/s]



Epoch [333/700]:  81%|████████▏ | 637/782 [00:02<00:00, 216.55it/s]



Epoch [333/700]:  90%|████████▉ | 703/782 [00:03<00:00, 215.15it/s]



Epoch [333/700]:  93%|█████████▎| 725/782 [00:03<00:00, 213.19it/s]



Epoch [333/700]: 100%|██████████| 782/782 [00:03<00:00, 213.31it/s]


Learning Rate: 0.004500
Train Loss: 0.0804, Accuracy: 97.20%, Confidence: 0.9664
Test Loss: 2.1745, Accuracy: 70.47%, Confidence: 0.9291
Train-Test Accuracy Gap: 26.73%


Epoch [334/700]:   3%|▎         | 21/782 [00:00<00:03, 209.69it/s]



Epoch [334/700]:   5%|▌         | 43/782 [00:00<00:03, 210.38it/s]



Epoch [334/700]:   8%|▊         | 65/782 [00:00<00:03, 209.32it/s]



Epoch [334/700]:  11%|█         | 86/782 [00:00<00:03, 207.22it/s]



Epoch [334/700]:  14%|█▎        | 107/782 [00:00<00:03, 203.45it/s]



Epoch [334/700]:  16%|█▋        | 128/782 [00:00<00:03, 203.02it/s]



Epoch [334/700]:  19%|█▉        | 149/782 [00:00<00:03, 204.34it/s]



Epoch [334/700]:  22%|██▏       | 171/782 [00:00<00:02, 207.02it/s]



Epoch [334/700]:  25%|██▍       | 193/782 [00:00<00:02, 209.75it/s]



Epoch [334/700]:  27%|██▋       | 215/782 [00:01<00:02, 210.06it/s]



Epoch [334/700]:  30%|███       | 237/782 [00:01<00:02, 209.53it/s]



Epoch [334/700]:  33%|███▎      | 259/782 [00:01<00:02, 211.71it/s]



Epoch [334/700]:  39%|███▊      | 303/782 [00:01<00:02, 210.75it/s]



Epoch [334/700]:  44%|████▍     | 347/782 [00:01<00:02, 211.39it/s]



Epoch [334/700]:  50%|█████     | 392/782 [00:01<00:01, 214.21it/s]



Epoch [334/700]:  56%|█████▌    | 435/782 [00:02<00:01, 207.21it/s]



Epoch [334/700]:  61%|██████    | 478/782 [00:02<00:01, 209.00it/s]



Epoch [334/700]:  67%|██████▋   | 522/782 [00:02<00:01, 210.94it/s]



Epoch [334/700]:  72%|███████▏  | 566/782 [00:02<00:01, 210.85it/s]



Epoch [334/700]:  78%|███████▊  | 609/782 [00:02<00:00, 206.50it/s]



Epoch [334/700]:  86%|████████▌ | 672/782 [00:03<00:00, 205.37it/s]



Epoch [334/700]:  91%|█████████▏| 715/782 [00:03<00:00, 208.60it/s]



Epoch [334/700]:  97%|█████████▋| 759/782 [00:03<00:00, 211.43it/s]



Epoch [334/700]: 100%|██████████| 782/782 [00:03<00:00, 208.81it/s]


Learning Rate: 0.004500
Train Loss: 0.0856, Accuracy: 97.01%, Confidence: 0.9649
Test Loss: 1.7055, Accuracy: 73.11%, Confidence: 0.9286
Train-Test Accuracy Gap: 23.90%


Epoch [335/700]:   3%|▎         | 22/782 [00:00<00:03, 215.25it/s]



Epoch [335/700]:   6%|▌         | 44/782 [00:00<00:03, 215.93it/s]



Epoch [335/700]:   8%|▊         | 66/782 [00:00<00:03, 215.15it/s]



Epoch [335/700]:  11%|█▏        | 88/782 [00:00<00:03, 215.78it/s]



Epoch [335/700]:  17%|█▋        | 132/782 [00:00<00:03, 216.12it/s]



Epoch [335/700]:  23%|██▎       | 176/782 [00:00<00:02, 214.06it/s]



Epoch [335/700]:  28%|██▊       | 220/782 [00:01<00:02, 214.12it/s]



Epoch [335/700]:  31%|███       | 242/782 [00:01<00:02, 211.29it/s]



Epoch [335/700]:  34%|███▍      | 264/782 [00:01<00:02, 206.32it/s]



Epoch [335/700]:  36%|███▋      | 285/782 [00:01<00:02, 206.87it/s]



Epoch [335/700]:  39%|███▉      | 306/782 [00:01<00:02, 207.35it/s]



Epoch [335/700]:  42%|████▏     | 328/782 [00:01<00:02, 208.92it/s]



Epoch [335/700]:  45%|████▍     | 350/782 [00:01<00:02, 210.09it/s]



Epoch [335/700]:  48%|████▊     | 372/782 [00:01<00:01, 211.30it/s]



Epoch [335/700]:  50%|█████     | 394/782 [00:01<00:01, 212.59it/s]



Epoch [335/700]:  53%|█████▎    | 416/782 [00:01<00:01, 210.92it/s]



Epoch [335/700]:  56%|█████▌    | 438/782 [00:02<00:01, 212.53it/s]



Epoch [335/700]:  59%|█████▉    | 460/782 [00:02<00:01, 212.88it/s]



Epoch [335/700]:  62%|██████▏   | 482/782 [00:02<00:01, 212.95it/s]



Epoch [335/700]:  64%|██████▍   | 504/782 [00:02<00:01, 212.66it/s]



Epoch [335/700]:  67%|██████▋   | 526/782 [00:02<00:01, 210.69it/s]



Epoch [335/700]:  70%|███████   | 548/782 [00:02<00:01, 210.99it/s]



Epoch [335/700]:  73%|███████▎  | 570/782 [00:02<00:01, 210.78it/s]



Epoch [335/700]:  76%|███████▌  | 592/782 [00:02<00:00, 211.58it/s]



Epoch [335/700]:  81%|████████▏ | 636/782 [00:02<00:00, 213.33it/s]



Epoch [335/700]:  87%|████████▋ | 680/782 [00:03<00:00, 213.22it/s]



Epoch [335/700]:  93%|█████████▎| 724/782 [00:03<00:00, 212.22it/s]



Epoch [335/700]:  98%|█████████▊| 768/782 [00:03<00:00, 210.53it/s]



Epoch [335/700]: 100%|██████████| 782/782 [00:03<00:00, 211.41it/s]


Learning Rate: 0.004500
Train Loss: 0.0815, Accuracy: 97.13%, Confidence: 0.9656
Test Loss: 2.1476, Accuracy: 69.97%, Confidence: 0.9285
Train-Test Accuracy Gap: 27.16%


Epoch [336/700]:   3%|▎         | 21/782 [00:00<00:03, 207.63it/s]



Epoch [336/700]:   5%|▌         | 43/782 [00:00<00:03, 209.98it/s]



Epoch [336/700]:   8%|▊         | 65/782 [00:00<00:03, 213.01it/s]



Epoch [336/700]:  11%|█         | 87/782 [00:00<00:03, 212.37it/s]



Epoch [336/700]:  14%|█▍        | 109/782 [00:00<00:03, 212.52it/s]



Epoch [336/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.41it/s]



Epoch [336/700]:  20%|█▉        | 153/782 [00:00<00:02, 212.32it/s]



Epoch [336/700]:  22%|██▏       | 175/782 [00:00<00:02, 213.16it/s]



Epoch [336/700]:  25%|██▌       | 197/782 [00:00<00:02, 214.85it/s]



Epoch [336/700]:  28%|██▊       | 219/782 [00:01<00:02, 214.77it/s]



Epoch [336/700]:  34%|███▎      | 263/782 [00:01<00:02, 215.93it/s]



Epoch [336/700]:  39%|███▉      | 308/782 [00:01<00:02, 217.40it/s]



Epoch [336/700]:  45%|████▌     | 352/782 [00:01<00:02, 212.29it/s]



Epoch [336/700]:  51%|█████     | 396/782 [00:01<00:01, 212.64it/s]



Epoch [336/700]:  56%|█████▌    | 439/782 [00:02<00:01, 208.58it/s]



Epoch [336/700]:  62%|██████▏   | 481/782 [00:02<00:01, 206.56it/s]



Epoch [336/700]:  67%|██████▋   | 523/782 [00:02<00:01, 206.69it/s]



Epoch [336/700]:  72%|███████▏  | 566/782 [00:02<00:01, 209.59it/s]



Epoch [336/700]:  78%|███████▊  | 608/782 [00:02<00:00, 207.12it/s]



Epoch [336/700]:  80%|████████  | 629/782 [00:02<00:00, 205.75it/s]



Epoch [336/700]:  83%|████████▎ | 650/782 [00:03<00:00, 206.19it/s]



Epoch [336/700]:  86%|████████▌ | 671/782 [00:03<00:00, 206.30it/s]



Epoch [336/700]:  89%|████████▊ | 693/782 [00:03<00:00, 208.15it/s]



Epoch [336/700]:  91%|█████████▏| 714/782 [00:03<00:00, 205.32it/s]



Epoch [336/700]:  94%|█████████▍| 735/782 [00:03<00:00, 203.55it/s]



Epoch [336/700]:  97%|█████████▋| 756/782 [00:03<00:00, 205.39it/s]



Epoch [336/700]: 100%|██████████| 782/782 [00:03<00:00, 209.53it/s]






Learning Rate: 0.004500
Train Loss: 0.0743, Accuracy: 97.41%, Confidence: 0.9668
Test Loss: 1.7874, Accuracy: 72.11%, Confidence: 0.9292
Train-Test Accuracy Gap: 25.30%


Epoch [337/700]:   3%|▎         | 20/782 [00:00<00:03, 199.81it/s]



Epoch [337/700]:  11%|█         | 84/782 [00:00<00:03, 200.55it/s]



Epoch [337/700]:  16%|█▌        | 126/782 [00:00<00:03, 201.77it/s]



Epoch [337/700]:  22%|██▏       | 169/782 [00:00<00:02, 205.96it/s]



Epoch [337/700]:  27%|██▋       | 212/782 [00:01<00:02, 209.32it/s]



Epoch [337/700]:  33%|███▎      | 255/782 [00:01<00:02, 210.01it/s]



Epoch [337/700]:  38%|███▊      | 299/782 [00:01<00:02, 210.04it/s]



Epoch [337/700]:  44%|████▍     | 343/782 [00:01<00:02, 212.33it/s]



Epoch [337/700]:  49%|████▉     | 387/782 [00:01<00:01, 212.51it/s]



Epoch [337/700]:  55%|█████▌    | 431/782 [00:02<00:01, 211.90it/s]



Epoch [337/700]:  61%|██████    | 475/782 [00:02<00:01, 210.53it/s]



Epoch [337/700]:  66%|██████▋   | 519/782 [00:02<00:01, 212.15it/s]



Epoch [337/700]:  72%|███████▏  | 563/782 [00:02<00:01, 210.98it/s]



Epoch [337/700]:  78%|███████▊  | 607/782 [00:02<00:00, 211.23it/s]



Epoch [337/700]:  83%|████████▎ | 650/782 [00:03<00:00, 207.99it/s]



Epoch [337/700]:  88%|████████▊ | 692/782 [00:03<00:00, 207.13it/s]



Epoch [337/700]:  94%|█████████▍| 735/782 [00:03<00:00, 209.02it/s]



Epoch [337/700]: 100%|██████████| 782/782 [00:03<00:00, 209.04it/s]


Learning Rate: 0.004500
Train Loss: 0.0784, Accuracy: 97.34%, Confidence: 0.9661
Test Loss: 1.8475, Accuracy: 71.48%, Confidence: 0.9285
Train-Test Accuracy Gap: 25.86%


Epoch [338/700]:   3%|▎         | 21/782 [00:00<00:03, 208.60it/s]



Epoch [338/700]:  11%|█         | 87/782 [00:00<00:03, 210.49it/s]



Epoch [338/700]:  17%|█▋        | 131/782 [00:00<00:03, 210.82it/s]



Epoch [338/700]:  22%|██▏       | 175/782 [00:00<00:02, 211.24it/s]



Epoch [338/700]:  28%|██▊       | 219/782 [00:01<00:02, 213.75it/s]



Epoch [338/700]:  34%|███▎      | 263/782 [00:01<00:02, 212.97it/s]



Epoch [338/700]:  39%|███▉      | 307/782 [00:01<00:02, 210.99it/s]



Epoch [338/700]:  45%|████▍     | 350/782 [00:01<00:02, 206.71it/s]



Epoch [338/700]:  50%|█████     | 394/782 [00:01<00:01, 210.53it/s]



Epoch [338/700]:  56%|█████▌    | 438/782 [00:02<00:01, 209.46it/s]



Epoch [338/700]:  62%|██████▏   | 482/782 [00:02<00:01, 213.33it/s]



Epoch [338/700]:  67%|██████▋   | 526/782 [00:02<00:01, 210.28it/s]



Epoch [338/700]:  73%|███████▎  | 570/782 [00:02<00:01, 211.27it/s]



Epoch [338/700]:  79%|███████▊  | 614/782 [00:02<00:00, 210.60it/s]



Epoch [338/700]:  84%|████████▍ | 657/782 [00:03<00:00, 208.37it/s]



Epoch [338/700]:  90%|████████▉ | 700/782 [00:03<00:00, 209.50it/s]



Epoch [338/700]:  95%|█████████▌| 743/782 [00:03<00:00, 211.52it/s]



Epoch [338/700]: 100%|██████████| 782/782 [00:03<00:00, 210.08it/s]


Learning Rate: 0.004500
Train Loss: 0.0760, Accuracy: 97.33%, Confidence: 0.9667
Test Loss: 1.9652, Accuracy: 71.17%, Confidence: 0.9293
Train-Test Accuracy Gap: 26.16%


Epoch [339/700]:   3%|▎         | 21/782 [00:00<00:03, 207.63it/s]



Epoch [339/700]:  11%|█         | 87/782 [00:00<00:03, 215.31it/s]



Epoch [339/700]:  17%|█▋        | 131/782 [00:00<00:03, 216.68it/s]



Epoch [339/700]:  23%|██▎       | 176/782 [00:00<00:02, 218.91it/s]



Epoch [339/700]:  28%|██▊       | 220/782 [00:01<00:02, 217.49it/s]



Epoch [339/700]:  34%|███▍      | 264/782 [00:01<00:02, 213.50it/s]



Epoch [339/700]:  42%|████▏     | 330/782 [00:01<00:02, 212.45it/s]



Epoch [339/700]:  48%|████▊     | 374/782 [00:01<00:01, 211.20it/s]



Epoch [339/700]:  53%|█████▎    | 418/782 [00:01<00:01, 212.91it/s]



Epoch [339/700]:  59%|█████▉    | 462/782 [00:02<00:01, 212.43it/s]



Epoch [339/700]:  65%|██████▍   | 506/782 [00:02<00:01, 211.65it/s]



Epoch [339/700]:  70%|███████   | 550/782 [00:02<00:01, 212.19it/s]



Epoch [339/700]:  76%|███████▌  | 594/782 [00:02<00:00, 213.80it/s]



Epoch [339/700]:  82%|████████▏ | 638/782 [00:02<00:00, 211.63it/s]



Epoch [339/700]:  87%|████████▋ | 682/782 [00:03<00:00, 212.92it/s]



Epoch [339/700]:  93%|█████████▎| 726/782 [00:03<00:00, 211.60it/s]



Epoch [339/700]: 100%|██████████| 782/782 [00:03<00:00, 212.98it/s]






Learning Rate: 0.004500
Train Loss: 0.0762, Accuracy: 97.46%, Confidence: 0.9669
Test Loss: 2.1218, Accuracy: 69.22%, Confidence: 0.9277
Train-Test Accuracy Gap: 28.24%


Epoch [340/700]:   3%|▎         | 22/782 [00:00<00:03, 212.03it/s]



Epoch [340/700]:  11%|█▏        | 88/782 [00:00<00:03, 215.34it/s]



Epoch [340/700]:  17%|█▋        | 132/782 [00:00<00:03, 213.71it/s]



Epoch [340/700]:  23%|██▎       | 176/782 [00:00<00:02, 214.80it/s]



Epoch [340/700]:  28%|██▊       | 220/782 [00:01<00:02, 213.20it/s]



Epoch [340/700]:  34%|███▍      | 264/782 [00:01<00:02, 211.50it/s]



Epoch [340/700]:  39%|███▉      | 308/782 [00:01<00:02, 209.33it/s]



Epoch [340/700]:  48%|████▊     | 373/782 [00:01<00:01, 209.95it/s]



Epoch [340/700]:  53%|█████▎    | 417/782 [00:01<00:01, 212.95it/s]



Epoch [340/700]:  59%|█████▉    | 461/782 [00:02<00:01, 211.84it/s]



Epoch [340/700]:  64%|██████▍   | 504/782 [00:02<00:01, 209.22it/s]



Epoch [340/700]:  70%|███████   | 548/782 [00:02<00:01, 210.91it/s]



Epoch [340/700]:  76%|███████▌  | 592/782 [00:02<00:00, 211.66it/s]



Epoch [340/700]:  81%|████████▏ | 636/782 [00:03<00:00, 210.28it/s]



Epoch [340/700]:  87%|████████▋ | 680/782 [00:03<00:00, 210.29it/s]



Epoch [340/700]:  93%|█████████▎| 724/782 [00:03<00:00, 210.25it/s]



Epoch [340/700]:  98%|█████████▊| 768/782 [00:03<00:00, 212.84it/s]



Epoch [340/700]: 100%|██████████| 782/782 [00:03<00:00, 211.35it/s]


Learning Rate: 0.004500
Train Loss: 0.0764, Accuracy: 97.35%, Confidence: 0.9663
Test Loss: 1.7061, Accuracy: 73.22%, Confidence: 0.9308
Train-Test Accuracy Gap: 24.13%


Epoch [341/700]:   3%|▎         | 21/782 [00:00<00:03, 209.91it/s]



Epoch [341/700]:   5%|▌         | 42/782 [00:00<00:03, 209.66it/s]



Epoch [341/700]:  11%|█         | 85/782 [00:00<00:03, 209.98it/s]



Epoch [341/700]:  16%|█▋        | 128/782 [00:00<00:03, 209.33it/s]



Epoch [341/700]:  22%|██▏       | 172/782 [00:00<00:02, 210.38it/s]



Epoch [341/700]:  28%|██▊       | 216/782 [00:01<00:02, 210.61it/s]



Epoch [341/700]:  33%|███▎      | 260/782 [00:01<00:02, 211.42it/s]



Epoch [341/700]:  39%|███▉      | 304/782 [00:01<00:02, 211.26it/s]



Epoch [341/700]:  45%|████▍     | 348/782 [00:01<00:02, 210.00it/s]



Epoch [341/700]:  50%|█████     | 392/782 [00:01<00:01, 214.50it/s]



Epoch [341/700]:  56%|█████▌    | 436/782 [00:02<00:01, 214.94it/s]



Epoch [341/700]:  61%|██████▏   | 480/782 [00:02<00:01, 212.96it/s]



Epoch [341/700]:  67%|██████▋   | 524/782 [00:02<00:01, 212.70it/s]



Epoch [341/700]:  73%|███████▎  | 568/782 [00:02<00:01, 213.54it/s]



Epoch [341/700]:  78%|███████▊  | 612/782 [00:02<00:00, 214.41it/s]



Epoch [341/700]:  84%|████████▍ | 656/782 [00:03<00:00, 212.34it/s]



Epoch [341/700]:  90%|████████▉ | 700/782 [00:03<00:00, 214.29it/s]



Epoch [341/700]:  95%|█████████▌| 744/782 [00:03<00:00, 213.32it/s]



Epoch [341/700]: 100%|██████████| 782/782 [00:03<00:00, 211.72it/s]


Learning Rate: 0.004500
Train Loss: 0.0824, Accuracy: 97.17%, Confidence: 0.9665
Test Loss: 1.8020, Accuracy: 72.45%, Confidence: 0.9308
Train-Test Accuracy Gap: 24.72%


Epoch [342/700]:   3%|▎         | 21/782 [00:00<00:03, 205.03it/s]



Epoch [342/700]:  11%|█         | 87/782 [00:00<00:03, 213.05it/s]



Epoch [342/700]:  17%|█▋        | 131/782 [00:00<00:03, 208.65it/s]



Epoch [342/700]:  22%|██▏       | 175/782 [00:00<00:02, 210.06it/s]



Epoch [342/700]:  28%|██▊       | 219/782 [00:01<00:02, 212.64it/s]



Epoch [342/700]:  34%|███▎      | 263/782 [00:01<00:02, 213.27it/s]



Epoch [342/700]:  39%|███▉      | 307/782 [00:01<00:02, 211.68it/s]



Epoch [342/700]:  42%|████▏     | 329/782 [00:01<00:02, 203.96it/s]



Epoch [342/700]:  48%|████▊     | 372/782 [00:01<00:01, 205.01it/s]



Epoch [342/700]:  56%|█████▌    | 438/782 [00:02<00:01, 209.49it/s]



Epoch [342/700]:  62%|██████▏   | 482/782 [00:02<00:01, 212.59it/s]



Epoch [342/700]:  67%|██████▋   | 526/782 [00:02<00:01, 213.82it/s]



Epoch [342/700]:  73%|███████▎  | 570/782 [00:02<00:00, 212.09it/s]



Epoch [342/700]:  79%|███████▊  | 614/782 [00:02<00:00, 214.92it/s]



Epoch [342/700]:  84%|████████▍ | 658/782 [00:03<00:00, 212.14it/s]



Epoch [342/700]:  90%|████████▉ | 702/782 [00:03<00:00, 211.28it/s]



Epoch [342/700]:  95%|█████████▌| 746/782 [00:03<00:00, 210.45it/s]



Epoch [342/700]: 100%|██████████| 782/782 [00:03<00:00, 210.00it/s]


Learning Rate: 0.004500
Train Loss: 0.0766, Accuracy: 97.33%, Confidence: 0.9669
Test Loss: 1.8142, Accuracy: 72.42%, Confidence: 0.9305
Train-Test Accuracy Gap: 24.91%


Epoch [343/700]:   3%|▎         | 20/782 [00:00<00:03, 198.93it/s]



Epoch [343/700]:  11%|█         | 86/782 [00:00<00:03, 214.82it/s]



Epoch [343/700]:  17%|█▋        | 130/782 [00:00<00:03, 211.21it/s]



Epoch [343/700]:  22%|██▏       | 174/782 [00:00<00:02, 209.74it/s]



Epoch [343/700]:  25%|██▌       | 196/782 [00:00<00:02, 210.30it/s]



Epoch [343/700]:  34%|███▎      | 262/782 [00:01<00:02, 213.32it/s]



Epoch [343/700]:  39%|███▉      | 306/782 [00:01<00:02, 213.66it/s]



Epoch [343/700]:  45%|████▍     | 350/782 [00:01<00:02, 210.56it/s]



Epoch [343/700]:  50%|█████     | 394/782 [00:01<00:01, 210.91it/s]



Epoch [343/700]:  56%|█████▌    | 438/782 [00:02<00:01, 211.24it/s]



Epoch [343/700]:  62%|██████▏   | 482/782 [00:02<00:01, 211.21it/s]



Epoch [343/700]:  67%|██████▋   | 526/782 [00:02<00:01, 212.07it/s]



Epoch [343/700]:  73%|███████▎  | 569/782 [00:02<00:01, 206.85it/s]



Epoch [343/700]:  78%|███████▊  | 611/782 [00:02<00:00, 205.44it/s]



Epoch [343/700]:  84%|████████▍ | 655/782 [00:03<00:00, 209.58it/s]



Epoch [343/700]:  89%|████████▉ | 697/782 [00:03<00:00, 207.66it/s]



Epoch [343/700]:  95%|█████████▍| 739/782 [00:03<00:00, 205.52it/s]



Epoch [343/700]: 100%|██████████| 782/782 [00:03<00:00, 209.28it/s]


Learning Rate: 0.004500
Train Loss: 0.0775, Accuracy: 97.33%, Confidence: 0.9676
Test Loss: 1.8358, Accuracy: 72.42%, Confidence: 0.9300
Train-Test Accuracy Gap: 24.91%


Epoch [344/700]:   3%|▎         | 21/782 [00:00<00:03, 208.15it/s]



Epoch [344/700]:   5%|▌         | 43/782 [00:00<00:03, 209.42it/s]



Epoch [344/700]:   8%|▊         | 64/782 [00:00<00:03, 206.12it/s]



Epoch [344/700]:  11%|█         | 86/782 [00:00<00:03, 208.80it/s]



Epoch [344/700]:  14%|█▎        | 107/782 [00:00<00:03, 204.71it/s]



Epoch [344/700]:  16%|█▋        | 129/782 [00:00<00:03, 207.59it/s]



Epoch [344/700]:  19%|█▉        | 151/782 [00:00<00:03, 209.81it/s]



Epoch [344/700]:  22%|██▏       | 173/782 [00:00<00:02, 209.61it/s]



Epoch [344/700]:  28%|██▊       | 217/782 [00:01<00:02, 210.18it/s]



Epoch [344/700]:  33%|███▎      | 261/782 [00:01<00:02, 210.28it/s]



Epoch [344/700]:  39%|███▉      | 305/782 [00:01<00:02, 210.03it/s]



Epoch [344/700]:  45%|████▍     | 349/782 [00:01<00:02, 209.97it/s]



Epoch [344/700]:  50%|█████     | 393/782 [00:01<00:01, 211.47it/s]



Epoch [344/700]:  56%|█████▌    | 437/782 [00:02<00:01, 213.52it/s]



Epoch [344/700]:  62%|██████▏   | 481/782 [00:02<00:01, 212.18it/s]



Epoch [344/700]:  67%|██████▋   | 525/782 [00:02<00:01, 212.71it/s]



Epoch [344/700]:  73%|███████▎  | 569/782 [00:02<00:01, 208.40it/s]



Epoch [344/700]:  78%|███████▊  | 612/782 [00:02<00:00, 209.34it/s]



Epoch [344/700]:  84%|████████▍ | 655/782 [00:03<00:00, 209.61it/s]



Epoch [344/700]:  89%|████████▉ | 697/782 [00:03<00:00, 208.60it/s]



Epoch [344/700]:  98%|█████████▊| 763/782 [00:03<00:00, 211.86it/s]



Epoch [344/700]: 100%|██████████| 782/782 [00:03<00:00, 209.84it/s]


Learning Rate: 0.004500
Train Loss: 0.0750, Accuracy: 97.44%, Confidence: 0.9678
Test Loss: 1.8706, Accuracy: 72.35%, Confidence: 0.9311
Train-Test Accuracy Gap: 25.09%


Epoch [345/700]:   3%|▎         | 21/782 [00:00<00:03, 205.70it/s]



Epoch [345/700]:   5%|▌         | 43/782 [00:00<00:03, 209.17it/s]



Epoch [345/700]:   8%|▊         | 64/782 [00:00<00:03, 203.63it/s]



Epoch [345/700]:  11%|█         | 85/782 [00:00<00:03, 205.04it/s]



Epoch [345/700]:  14%|█▎        | 106/782 [00:00<00:03, 202.04it/s]



Epoch [345/700]:  16%|█▌        | 127/782 [00:00<00:03, 200.27it/s]



Epoch [345/700]:  19%|█▉        | 149/782 [00:00<00:03, 204.13it/s]



Epoch [345/700]:  22%|██▏       | 171/782 [00:00<00:02, 205.55it/s]



Epoch [345/700]:  25%|██▍       | 193/782 [00:00<00:02, 208.01it/s]



Epoch [345/700]:  33%|███▎      | 259/782 [00:01<00:02, 213.53it/s]



Epoch [345/700]:  39%|███▊      | 303/782 [00:01<00:02, 211.72it/s]



Epoch [345/700]:  44%|████▍     | 347/782 [00:01<00:02, 210.28it/s]



Epoch [345/700]:  50%|█████     | 391/782 [00:01<00:01, 210.82it/s]



Epoch [345/700]:  56%|█████▌    | 435/782 [00:02<00:01, 214.10it/s]



Epoch [345/700]:  61%|██████▏   | 479/782 [00:02<00:01, 212.52it/s]



Epoch [345/700]:  67%|██████▋   | 523/782 [00:02<00:01, 211.56it/s]



Epoch [345/700]:  73%|███████▎  | 567/782 [00:02<00:01, 212.39it/s]



Epoch [345/700]:  78%|███████▊  | 611/782 [00:02<00:00, 213.83it/s]



Epoch [345/700]:  84%|████████▍ | 655/782 [00:03<00:00, 214.06it/s]



Epoch [345/700]:  89%|████████▉ | 699/782 [00:03<00:00, 210.57it/s]



Epoch [345/700]:  95%|█████████▌| 743/782 [00:03<00:00, 210.74it/s]



Epoch [345/700]: 100%|██████████| 782/782 [00:03<00:00, 210.39it/s]


Learning Rate: 0.004500
Train Loss: 0.0772, Accuracy: 97.32%, Confidence: 0.9675
Test Loss: 2.0810, Accuracy: 70.15%, Confidence: 0.9297
Train-Test Accuracy Gap: 27.17%


Epoch [346/700]:   3%|▎         | 22/782 [00:00<00:03, 211.81it/s]



Epoch [346/700]:   8%|▊         | 66/782 [00:00<00:03, 215.62it/s]



Epoch [346/700]:  17%|█▋        | 131/782 [00:00<00:03, 206.50it/s]



Epoch [346/700]:  22%|██▏       | 174/782 [00:00<00:02, 209.49it/s]



Epoch [346/700]:  28%|██▊       | 217/782 [00:01<00:02, 208.64it/s]



Epoch [346/700]:  33%|███▎      | 261/782 [00:01<00:02, 211.90it/s]



Epoch [346/700]:  39%|███▉      | 305/782 [00:01<00:02, 213.26it/s]



Epoch [346/700]:  45%|████▍     | 348/782 [00:01<00:02, 206.43it/s]



Epoch [346/700]:  50%|█████     | 392/782 [00:01<00:01, 210.10it/s]



Epoch [346/700]:  56%|█████▌    | 436/782 [00:02<00:01, 210.71it/s]



Epoch [346/700]:  61%|██████▏   | 480/782 [00:02<00:01, 209.40it/s]



Epoch [346/700]:  67%|██████▋   | 522/782 [00:02<00:01, 207.81it/s]



Epoch [346/700]:  72%|███████▏  | 564/782 [00:02<00:01, 207.54it/s]



Epoch [346/700]:  77%|███████▋  | 606/782 [00:02<00:00, 207.43it/s]



Epoch [346/700]:  83%|████████▎ | 649/782 [00:03<00:00, 208.15it/s]



Epoch [346/700]:  88%|████████▊ | 691/782 [00:03<00:00, 205.90it/s]



Epoch [346/700]:  94%|█████████▎| 733/782 [00:03<00:00, 197.65it/s]



Epoch [346/700]:  99%|█████████▉| 776/782 [00:03<00:00, 204.24it/s]



Epoch [346/700]: 100%|██████████| 782/782 [00:03<00:00, 207.25it/s]


Learning Rate: 0.004500
Train Loss: 0.0760, Accuracy: 97.35%, Confidence: 0.9677
Test Loss: 1.9477, Accuracy: 72.39%, Confidence: 0.9320
Train-Test Accuracy Gap: 24.96%


Epoch [347/700]:   3%|▎         | 21/782 [00:00<00:03, 209.42it/s]



Epoch [347/700]:   5%|▌         | 43/782 [00:00<00:03, 210.27it/s]



Epoch [347/700]:   8%|▊         | 65/782 [00:00<00:03, 210.75it/s]



Epoch [347/700]:  14%|█▍        | 109/782 [00:00<00:03, 209.88it/s]



Epoch [347/700]:  17%|█▋        | 131/782 [00:00<00:03, 210.83it/s]



Epoch [347/700]:  22%|██▏       | 175/782 [00:00<00:02, 212.53it/s]



Epoch [347/700]:  28%|██▊       | 219/782 [00:01<00:02, 213.22it/s]



Epoch [347/700]:  34%|███▎      | 263/782 [00:01<00:02, 211.48it/s]



Epoch [347/700]:  39%|███▉      | 307/782 [00:01<00:02, 209.48it/s]



Epoch [347/700]:  45%|████▍     | 350/782 [00:01<00:02, 211.56it/s]



Epoch [347/700]:  50%|█████     | 394/782 [00:01<00:01, 212.06it/s]



Epoch [347/700]:  56%|█████▌    | 438/782 [00:02<00:01, 212.66it/s]



Epoch [347/700]:  62%|██████▏   | 482/782 [00:02<00:01, 212.68it/s]



Epoch [347/700]:  67%|██████▋   | 526/782 [00:02<00:01, 212.52it/s]



Epoch [347/700]:  73%|███████▎  | 570/782 [00:02<00:00, 213.94it/s]



Epoch [347/700]:  79%|███████▊  | 614/782 [00:02<00:00, 214.76it/s]



Epoch [347/700]:  84%|████████▍ | 658/782 [00:03<00:00, 213.09it/s]



Epoch [347/700]:  90%|████████▉ | 702/782 [00:03<00:00, 215.12it/s]



Epoch [347/700]:  95%|█████████▌| 746/782 [00:03<00:00, 210.64it/s]



Epoch [347/700]: 100%|██████████| 782/782 [00:03<00:00, 211.90it/s]


Learning Rate: 0.004500
Train Loss: 0.0800, Accuracy: 97.27%, Confidence: 0.9667
Test Loss: 2.0694, Accuracy: 70.40%, Confidence: 0.9312
Train-Test Accuracy Gap: 26.87%


Epoch [348/700]:   3%|▎         | 21/782 [00:00<00:03, 209.18it/s]



Epoch [348/700]:  11%|█         | 87/782 [00:00<00:03, 214.55it/s]



Epoch [348/700]:  20%|█▉        | 153/782 [00:00<00:02, 214.10it/s]



Epoch [348/700]:  25%|██▌       | 197/782 [00:00<00:02, 212.66it/s]



Epoch [348/700]:  31%|███       | 241/782 [00:01<00:02, 211.32it/s]



Epoch [348/700]:  36%|███▋      | 285/782 [00:01<00:02, 212.49it/s]



Epoch [348/700]:  42%|████▏     | 329/782 [00:01<00:02, 211.53it/s]



Epoch [348/700]:  48%|████▊     | 373/782 [00:01<00:01, 212.32it/s]



Epoch [348/700]:  56%|█████▌    | 439/782 [00:02<00:01, 212.38it/s]



Epoch [348/700]:  62%|██████▏   | 483/782 [00:02<00:01, 211.28it/s]



Epoch [348/700]:  68%|██████▊   | 528/782 [00:02<00:01, 216.09it/s]



Epoch [348/700]:  73%|███████▎  | 572/782 [00:02<00:01, 207.45it/s]



Epoch [348/700]:  79%|███████▉  | 617/782 [00:02<00:00, 213.34it/s]



Epoch [348/700]:  85%|████████▍ | 661/782 [00:03<00:00, 210.08it/s]



Epoch [348/700]:  90%|█████████ | 705/782 [00:03<00:00, 210.46it/s]



Epoch [348/700]:  96%|█████████▌| 749/782 [00:03<00:00, 210.74it/s]



Epoch [348/700]: 100%|██████████| 782/782 [00:03<00:00, 211.67it/s]


Learning Rate: 0.004500
Train Loss: 0.0722, Accuracy: 97.53%, Confidence: 0.9682
Test Loss: 2.0369, Accuracy: 70.48%, Confidence: 0.9287
Train-Test Accuracy Gap: 27.05%


Epoch [349/700]:   3%|▎         | 22/782 [00:00<00:03, 211.00it/s]



Epoch [349/700]:   6%|▌         | 44/782 [00:00<00:03, 211.76it/s]



Epoch [349/700]:   8%|▊         | 66/782 [00:00<00:03, 212.19it/s]



Epoch [349/700]:  17%|█▋        | 132/782 [00:00<00:03, 211.71it/s]



Epoch [349/700]:  23%|██▎       | 176/782 [00:00<00:02, 211.10it/s]



Epoch [349/700]:  28%|██▊       | 220/782 [00:01<00:02, 213.18it/s]



Epoch [349/700]:  34%|███▍      | 264/782 [00:01<00:02, 213.61it/s]



Epoch [349/700]:  42%|████▏     | 330/782 [00:01<00:02, 214.79it/s]



Epoch [349/700]:  48%|████▊     | 374/782 [00:01<00:01, 209.13it/s]



Epoch [349/700]:  53%|█████▎    | 416/782 [00:01<00:01, 206.00it/s]



Epoch [349/700]:  59%|█████▊    | 459/782 [00:02<00:01, 207.39it/s]



Epoch [349/700]:  64%|██████▍   | 501/782 [00:02<00:01, 205.17it/s]



Epoch [349/700]:  70%|██████▉   | 544/782 [00:02<00:01, 206.62it/s]



Epoch [349/700]:  75%|███████▌  | 587/782 [00:02<00:00, 209.87it/s]



Epoch [349/700]:  81%|████████  | 630/782 [00:02<00:00, 209.83it/s]



Epoch [349/700]:  86%|████████▌ | 673/782 [00:03<00:00, 209.59it/s]



Epoch [349/700]:  92%|█████████▏| 716/782 [00:03<00:00, 209.91it/s]



Epoch [349/700]:  97%|█████████▋| 758/782 [00:03<00:00, 207.68it/s]



Epoch [349/700]: 100%|██████████| 782/782 [00:03<00:00, 209.27it/s]


Learning Rate: 0.004500
Train Loss: 0.0752, Accuracy: 97.38%, Confidence: 0.9676
Test Loss: 2.4553, Accuracy: 67.30%, Confidence: 0.9269
Train-Test Accuracy Gap: 30.08%


Epoch [350/700]:   3%|▎         | 21/782 [00:00<00:03, 209.32it/s]



Epoch [350/700]:   5%|▌         | 43/782 [00:00<00:03, 212.98it/s]



Epoch [350/700]:   8%|▊         | 65/782 [00:00<00:03, 212.56it/s]



Epoch [350/700]:  11%|█▏        | 88/782 [00:00<00:03, 215.61it/s]



Epoch [350/700]:  14%|█▍        | 110/782 [00:00<00:03, 212.43it/s]



Epoch [350/700]:  17%|█▋        | 132/782 [00:00<00:03, 211.36it/s]



Epoch [350/700]:  20%|█▉        | 154/782 [00:00<00:02, 212.64it/s]



Epoch [350/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.12it/s]



Epoch [350/700]:  25%|██▌       | 198/782 [00:00<00:02, 212.71it/s]



Epoch [350/700]:  28%|██▊       | 220/782 [00:01<00:02, 213.91it/s]



Epoch [350/700]:  31%|███       | 242/782 [00:01<00:02, 215.39it/s]



Epoch [350/700]:  34%|███▍      | 264/782 [00:01<00:02, 215.26it/s]



Epoch [350/700]:  37%|███▋      | 286/782 [00:01<00:02, 214.34it/s]



Epoch [350/700]:  39%|███▉      | 308/782 [00:01<00:02, 212.68it/s]



Epoch [350/700]:  42%|████▏     | 330/782 [00:01<00:02, 210.02it/s]



Epoch [350/700]:  45%|████▌     | 352/782 [00:01<00:02, 209.73it/s]



Epoch [350/700]:  51%|█████     | 396/782 [00:01<00:01, 211.25it/s]



Epoch [350/700]:  56%|█████▋    | 440/782 [00:02<00:01, 210.79it/s]



Epoch [350/700]:  62%|██████▏   | 484/782 [00:02<00:01, 211.73it/s]



Epoch [350/700]:  68%|██████▊   | 528/782 [00:02<00:01, 212.42it/s]



Epoch [350/700]:  70%|███████   | 550/782 [00:02<00:01, 212.82it/s]



Epoch [350/700]:  73%|███████▎  | 572/782 [00:02<00:00, 212.75it/s]



Epoch [350/700]:  76%|███████▌  | 594/782 [00:02<00:00, 213.14it/s]



Epoch [350/700]:  79%|███████▉  | 616/782 [00:02<00:00, 212.42it/s]



Epoch [350/700]:  82%|████████▏ | 638/782 [00:03<00:00, 213.19it/s]



Epoch [350/700]:  84%|████████▍ | 660/782 [00:03<00:00, 213.65it/s]



Epoch [350/700]:  87%|████████▋ | 682/782 [00:03<00:00, 213.64it/s]



Epoch [350/700]:  90%|█████████ | 704/782 [00:03<00:00, 212.76it/s]



Epoch [350/700]:  93%|█████████▎| 726/782 [00:03<00:00, 213.57it/s]



Epoch [350/700]:  96%|█████████▌| 748/782 [00:03<00:00, 207.83it/s]



Epoch [350/700]:  98%|█████████▊| 770/782 [00:03<00:00, 209.16it/s]



Epoch [350/700]: 100%|██████████| 782/782 [00:03<00:00, 211.84it/s]


Learning Rate: 0.004500
Train Loss: 0.0726, Accuracy: 97.48%, Confidence: 0.9684
Test Loss: 2.1500, Accuracy: 70.60%, Confidence: 0.9314
Train-Test Accuracy Gap: 26.88%


Epoch [351/700]:   3%|▎         | 22/782 [00:00<00:03, 210.82it/s]



Epoch [351/700]:   8%|▊         | 66/782 [00:00<00:03, 215.27it/s]



Epoch [351/700]:  11%|█▏        | 88/782 [00:00<00:03, 213.77it/s]



Epoch [351/700]:  14%|█▍        | 110/782 [00:00<00:03, 206.86it/s]



Epoch [351/700]:  17%|█▋        | 131/782 [00:00<00:03, 205.08it/s]



Epoch [351/700]:  22%|██▏       | 175/782 [00:00<00:02, 210.00it/s]



Epoch [351/700]:  28%|██▊       | 220/782 [00:01<00:02, 214.93it/s]



Epoch [351/700]:  34%|███▍      | 264/782 [00:01<00:02, 215.87it/s]



Epoch [351/700]:  37%|███▋      | 286/782 [00:01<00:02, 215.05it/s]



Epoch [351/700]:  39%|███▉      | 308/782 [00:01<00:02, 215.69it/s]



Epoch [351/700]:  42%|████▏     | 330/782 [00:01<00:02, 213.15it/s]



Epoch [351/700]:  45%|████▌     | 352/782 [00:01<00:02, 214.19it/s]



Epoch [351/700]:  48%|████▊     | 374/782 [00:01<00:01, 214.54it/s]



Epoch [351/700]:  51%|█████     | 396/782 [00:01<00:01, 213.72it/s]



Epoch [351/700]:  59%|█████▉    | 462/782 [00:02<00:01, 214.14it/s]



Epoch [351/700]:  65%|██████▍   | 506/782 [00:02<00:01, 214.37it/s]



Epoch [351/700]:  70%|███████   | 550/782 [00:02<00:01, 214.67it/s]



Epoch [351/700]:  76%|███████▌  | 594/782 [00:02<00:00, 213.07it/s]



Epoch [351/700]:  82%|████████▏ | 638/782 [00:02<00:00, 210.63it/s]



Epoch [351/700]:  87%|████████▋ | 682/782 [00:03<00:00, 212.18it/s]



Epoch [351/700]:  93%|█████████▎| 726/782 [00:03<00:00, 212.31it/s]



Epoch [351/700]:  98%|█████████▊| 770/782 [00:03<00:00, 211.73it/s]



Epoch [351/700]: 100%|██████████| 782/782 [00:03<00:00, 212.31it/s]


Learning Rate: 0.004500
Train Loss: 0.0727, Accuracy: 97.52%, Confidence: 0.9686
Test Loss: 2.1502, Accuracy: 69.95%, Confidence: 0.9326
Train-Test Accuracy Gap: 27.57%


Epoch [352/700]:   3%|▎         | 21/782 [00:00<00:03, 205.47it/s]



Epoch [352/700]:  11%|█         | 87/782 [00:00<00:03, 215.22it/s]



Epoch [352/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.33it/s]



Epoch [352/700]:  22%|██▏       | 175/782 [00:00<00:02, 210.85it/s]



Epoch [352/700]:  28%|██▊       | 219/782 [00:01<00:02, 212.01it/s]



Epoch [352/700]:  34%|███▎      | 263/782 [00:01<00:02, 212.44it/s]



Epoch [352/700]:  39%|███▉      | 307/782 [00:01<00:02, 213.16it/s]



Epoch [352/700]:  45%|████▍     | 351/782 [00:01<00:02, 209.97it/s]



Epoch [352/700]:  51%|█████     | 395/782 [00:01<00:01, 211.08it/s]



Epoch [352/700]:  56%|█████▌    | 439/782 [00:02<00:01, 213.54it/s]



Epoch [352/700]:  62%|██████▏   | 483/782 [00:02<00:01, 211.53it/s]



Epoch [352/700]:  67%|██████▋   | 526/782 [00:02<00:01, 201.06it/s]



Epoch [352/700]:  73%|███████▎  | 568/782 [00:02<00:01, 197.75it/s]



Epoch [352/700]:  78%|███████▊  | 608/782 [00:02<00:00, 193.65it/s]



Epoch [352/700]:  83%|████████▎ | 651/782 [00:03<00:00, 203.58it/s]



Epoch [352/700]:  89%|████████▊ | 694/782 [00:03<00:00, 205.75it/s]



Epoch [352/700]:  94%|█████████▍| 736/782 [00:03<00:00, 204.10it/s]



Epoch [352/700]:  99%|█████████▉| 777/782 [00:03<00:00, 196.93it/s]



Epoch [352/700]: 100%|██████████| 782/782 [00:03<00:00, 206.13it/s]


Learning Rate: 0.004500
Train Loss: 0.0770, Accuracy: 97.36%, Confidence: 0.9683
Test Loss: 1.9442, Accuracy: 71.74%, Confidence: 0.9326
Train-Test Accuracy Gap: 25.62%


Epoch [353/700]:   3%|▎         | 21/782 [00:00<00:03, 204.75it/s]



Epoch [353/700]:  11%|█         | 87/782 [00:00<00:03, 211.38it/s]



Epoch [353/700]:  17%|█▋        | 130/782 [00:00<00:03, 200.01it/s]



Epoch [353/700]:  22%|██▏       | 175/782 [00:00<00:02, 209.26it/s]



Epoch [353/700]:  28%|██▊       | 219/782 [00:01<00:02, 213.30it/s]



Epoch [353/700]:  34%|███▎      | 263/782 [00:01<00:02, 213.45it/s]



Epoch [353/700]:  39%|███▉      | 307/782 [00:01<00:02, 213.17it/s]



Epoch [353/700]:  45%|████▍     | 351/782 [00:01<00:02, 213.37it/s]



Epoch [353/700]:  51%|█████     | 395/782 [00:01<00:01, 212.70it/s]



Epoch [353/700]:  56%|█████▌    | 439/782 [00:02<00:01, 214.16it/s]



Epoch [353/700]:  62%|██████▏   | 483/782 [00:02<00:01, 214.62it/s]



Epoch [353/700]:  67%|██████▋   | 527/782 [00:02<00:01, 210.01it/s]



Epoch [353/700]:  73%|███████▎  | 571/782 [00:02<00:01, 208.25it/s]



Epoch [353/700]:  79%|███████▊  | 614/782 [00:02<00:00, 209.29it/s]



Epoch [353/700]:  84%|████████▍ | 657/782 [00:03<00:00, 211.59it/s]



Epoch [353/700]:  90%|████████▉ | 701/782 [00:03<00:00, 208.24it/s]



Epoch [353/700]:  95%|█████████▌| 744/782 [00:03<00:00, 205.30it/s]



Epoch [353/700]: 100%|██████████| 782/782 [00:03<00:00, 209.49it/s]


Learning Rate: 0.004500
Train Loss: 0.0763, Accuracy: 97.43%, Confidence: 0.9677
Test Loss: 2.1042, Accuracy: 71.24%, Confidence: 0.9345
Train-Test Accuracy Gap: 26.19%


Epoch [354/700]:   3%|▎         | 21/782 [00:00<00:03, 208.61it/s]



Epoch [354/700]:  11%|█         | 87/782 [00:00<00:03, 215.07it/s]



Epoch [354/700]:  17%|█▋        | 131/782 [00:00<00:03, 213.27it/s]



Epoch [354/700]:  22%|██▏       | 175/782 [00:00<00:02, 211.83it/s]



Epoch [354/700]:  28%|██▊       | 219/782 [00:01<00:02, 212.30it/s]



Epoch [354/700]:  31%|███       | 241/782 [00:01<00:02, 211.17it/s]



Epoch [354/700]:  36%|███▋      | 285/782 [00:01<00:02, 210.45it/s]



Epoch [354/700]:  42%|████▏     | 329/782 [00:01<00:02, 210.15it/s]



Epoch [354/700]:  51%|█████     | 395/782 [00:01<00:01, 211.39it/s]



Epoch [354/700]:  56%|█████▌    | 439/782 [00:02<00:01, 211.07it/s]



Epoch [354/700]:  62%|██████▏   | 483/782 [00:02<00:01, 209.14it/s]



Epoch [354/700]:  67%|██████▋   | 526/782 [00:02<00:01, 210.91it/s]



Epoch [354/700]:  73%|███████▎  | 569/782 [00:02<00:01, 207.43it/s]



Epoch [354/700]:  78%|███████▊  | 613/782 [00:02<00:00, 211.40it/s]



Epoch [354/700]:  84%|████████▍ | 657/782 [00:03<00:00, 214.08it/s]



Epoch [354/700]:  90%|████████▉ | 701/782 [00:03<00:00, 215.03it/s]



Epoch [354/700]:  95%|█████████▌| 745/782 [00:03<00:00, 212.99it/s]



Epoch [354/700]: 100%|██████████| 782/782 [00:03<00:00, 211.39it/s]


Learning Rate: 0.004500
Train Loss: 0.0766, Accuracy: 97.36%, Confidence: 0.9689
Test Loss: 1.8925, Accuracy: 72.13%, Confidence: 0.9335
Train-Test Accuracy Gap: 25.23%


Epoch [355/700]:   3%|▎         | 22/782 [00:00<00:03, 214.16it/s]



Epoch [355/700]:   6%|▌         | 44/782 [00:00<00:03, 213.12it/s]



Epoch [355/700]:   8%|▊         | 66/782 [00:00<00:03, 211.62it/s]



Epoch [355/700]:  11%|█▏        | 88/782 [00:00<00:03, 210.22it/s]



Epoch [355/700]:  14%|█▍        | 110/782 [00:00<00:03, 210.31it/s]



Epoch [355/700]:  20%|█▉        | 154/782 [00:00<00:02, 210.26it/s]



Epoch [355/700]:  25%|██▌       | 198/782 [00:00<00:02, 210.42it/s]



Epoch [355/700]:  34%|███▍      | 264/782 [00:01<00:02, 208.23it/s]



Epoch [355/700]:  39%|███▉      | 307/782 [00:01<00:02, 210.70it/s]



Epoch [355/700]:  45%|████▍     | 350/782 [00:01<00:02, 207.25it/s]



Epoch [355/700]:  50%|█████     | 393/782 [00:01<00:01, 207.14it/s]



Epoch [355/700]:  56%|█████▌    | 435/782 [00:02<00:01, 207.20it/s]



Epoch [355/700]:  61%|██████▏   | 479/782 [00:02<00:01, 211.93it/s]



Epoch [355/700]:  67%|██████▋   | 523/782 [00:02<00:01, 212.79it/s]



Epoch [355/700]:  73%|███████▎  | 567/782 [00:02<00:01, 212.54it/s]



Epoch [355/700]:  78%|███████▊  | 611/782 [00:02<00:00, 209.18it/s]



Epoch [355/700]:  84%|████████▍ | 655/782 [00:03<00:00, 210.42it/s]



Epoch [355/700]:  89%|████████▉ | 698/782 [00:03<00:00, 206.27it/s]



Epoch [355/700]:  95%|█████████▍| 740/782 [00:03<00:00, 202.76it/s]



Epoch [355/700]: 100%|██████████| 782/782 [00:03<00:00, 208.44it/s]






Learning Rate: 0.004500
Train Loss: 0.0706, Accuracy: 97.55%, Confidence: 0.9692
Test Loss: 2.0568, Accuracy: 70.54%, Confidence: 0.9316
Train-Test Accuracy Gap: 27.01%


Epoch [356/700]:   3%|▎         | 22/782 [00:00<00:03, 212.93it/s]



Epoch [356/700]:   6%|▌         | 44/782 [00:00<00:03, 215.69it/s]



Epoch [356/700]:   8%|▊         | 66/782 [00:00<00:03, 215.94it/s]



Epoch [356/700]:  11%|█▏        | 88/782 [00:00<00:03, 213.97it/s]



Epoch [356/700]:  14%|█▍        | 110/782 [00:00<00:03, 213.59it/s]



Epoch [356/700]:  17%|█▋        | 132/782 [00:00<00:03, 213.05it/s]



Epoch [356/700]:  20%|█▉        | 154/782 [00:00<00:02, 212.08it/s]



Epoch [356/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.00it/s]



Epoch [356/700]:  25%|██▌       | 198/782 [00:00<00:02, 211.91it/s]



Epoch [356/700]:  28%|██▊       | 220/782 [00:01<00:02, 212.16it/s]



Epoch [356/700]:  31%|███       | 242/782 [00:01<00:02, 211.09it/s]



Epoch [356/700]:  34%|███▍      | 264/782 [00:01<00:02, 209.31it/s]



Epoch [356/700]:  37%|███▋      | 286/782 [00:01<00:02, 210.94it/s]



Epoch [356/700]:  42%|████▏     | 330/782 [00:01<00:02, 211.85it/s]



Epoch [356/700]:  48%|████▊     | 374/782 [00:01<00:01, 211.06it/s]



Epoch [356/700]:  53%|█████▎    | 418/782 [00:01<00:01, 210.59it/s]



Epoch [356/700]:  59%|█████▉    | 462/782 [00:02<00:01, 211.41it/s]



Epoch [356/700]:  65%|██████▍   | 506/782 [00:02<00:01, 211.12it/s]



Epoch [356/700]:  70%|███████   | 550/782 [00:02<00:01, 210.50it/s]



Epoch [356/700]:  76%|███████▌  | 594/782 [00:02<00:00, 212.62it/s]



Epoch [356/700]:  82%|████████▏ | 638/782 [00:03<00:00, 211.78it/s]



Epoch [356/700]:  87%|████████▋ | 682/782 [00:03<00:00, 210.72it/s]



Epoch [356/700]:  93%|█████████▎| 725/782 [00:03<00:00, 203.08it/s]



Epoch [356/700]:  98%|█████████▊| 768/782 [00:03<00:00, 205.57it/s]



Epoch [356/700]: 100%|██████████| 782/782 [00:03<00:00, 210.12it/s]






Learning Rate: 0.004500
Train Loss: 0.0813, Accuracy: 97.17%, Confidence: 0.9673
Test Loss: 2.0459, Accuracy: 70.47%, Confidence: 0.9287
Train-Test Accuracy Gap: 26.70%


Epoch [357/700]:   3%|▎         | 21/782 [00:00<00:03, 206.02it/s]



Epoch [357/700]:   5%|▌         | 43/782 [00:00<00:03, 211.89it/s]



Epoch [357/700]:   8%|▊         | 65/782 [00:00<00:03, 204.76it/s]



Epoch [357/700]:  11%|█         | 86/782 [00:00<00:03, 206.46it/s]



Epoch [357/700]:  16%|█▋        | 129/782 [00:00<00:03, 209.04it/s]



Epoch [357/700]:  22%|██▏       | 173/782 [00:00<00:02, 212.82it/s]



Epoch [357/700]:  28%|██▊       | 217/782 [00:01<00:02, 212.38it/s]



Epoch [357/700]:  33%|███▎      | 261/782 [00:01<00:02, 212.33it/s]



Epoch [357/700]:  36%|███▌      | 283/782 [00:01<00:02, 210.01it/s]



Epoch [357/700]:  42%|████▏     | 326/782 [00:01<00:02, 209.48it/s]



Epoch [357/700]:  45%|████▍     | 348/782 [00:01<00:02, 210.96it/s]



Epoch [357/700]:  47%|████▋     | 370/782 [00:01<00:01, 210.12it/s]



Epoch [357/700]:  50%|█████     | 392/782 [00:01<00:01, 208.88it/s]



Epoch [357/700]:  53%|█████▎    | 413/782 [00:01<00:01, 209.16it/s]



Epoch [357/700]:  55%|█████▌    | 434/782 [00:02<00:01, 207.40it/s]



Epoch [357/700]:  58%|█████▊    | 456/782 [00:02<00:01, 208.43it/s]



Epoch [357/700]:  61%|██████    | 478/782 [00:02<00:01, 209.98it/s]



Epoch [357/700]:  64%|██████▍   | 500/782 [00:02<00:01, 212.46it/s]



Epoch [357/700]:  67%|██████▋   | 522/782 [00:02<00:01, 208.56it/s]



Epoch [357/700]:  72%|███████▏  | 565/782 [00:02<00:01, 209.55it/s]



Epoch [357/700]:  78%|███████▊  | 608/782 [00:02<00:00, 210.37it/s]



Epoch [357/700]:  83%|████████▎ | 652/782 [00:03<00:00, 212.61it/s]



Epoch [357/700]:  89%|████████▉ | 696/782 [00:03<00:00, 211.97it/s]



Epoch [357/700]:  95%|█████████▍| 739/782 [00:03<00:00, 207.60it/s]



Epoch [357/700]: 100%|██████████| 782/782 [00:03<00:00, 209.41it/s]






Learning Rate: 0.004500
Train Loss: 0.0729, Accuracy: 97.42%, Confidence: 0.9686
Test Loss: 1.8580, Accuracy: 72.57%, Confidence: 0.9302
Train-Test Accuracy Gap: 24.85%


Epoch [358/700]:   3%|▎         | 21/782 [00:00<00:03, 208.45it/s]



Epoch [358/700]:   5%|▌         | 43/782 [00:00<00:03, 210.29it/s]



Epoch [358/700]:   8%|▊         | 65/782 [00:00<00:03, 212.02it/s]



Epoch [358/700]:  11%|█         | 87/782 [00:00<00:03, 212.15it/s]



Epoch [358/700]:  14%|█▍        | 109/782 [00:00<00:03, 211.13it/s]



Epoch [358/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.86it/s]



Epoch [358/700]:  22%|██▏       | 175/782 [00:00<00:02, 213.42it/s]



Epoch [358/700]:  28%|██▊       | 219/782 [00:01<00:02, 211.98it/s]



Epoch [358/700]:  34%|███▎      | 263/782 [00:01<00:02, 211.08it/s]



Epoch [358/700]:  36%|███▋      | 285/782 [00:01<00:02, 206.97it/s]



Epoch [358/700]:  39%|███▉      | 307/782 [00:01<00:02, 209.57it/s]



Epoch [358/700]:  42%|████▏     | 329/782 [00:01<00:02, 210.41it/s]



Epoch [358/700]:  45%|████▍     | 351/782 [00:01<00:02, 211.49it/s]



Epoch [358/700]:  51%|█████     | 395/782 [00:01<00:01, 209.56it/s]



Epoch [358/700]:  56%|█████▌    | 438/782 [00:02<00:01, 209.20it/s]



Epoch [358/700]:  61%|██████▏   | 480/782 [00:02<00:01, 207.91it/s]



Epoch [358/700]:  67%|██████▋   | 522/782 [00:02<00:01, 204.87it/s]



Epoch [358/700]:  72%|███████▏  | 565/782 [00:02<00:01, 206.42it/s]



Epoch [358/700]:  75%|███████▍  | 586/782 [00:02<00:00, 205.60it/s]



Epoch [358/700]:  78%|███████▊  | 608/782 [00:02<00:00, 207.21it/s]



Epoch [358/700]:  80%|████████  | 629/782 [00:03<00:00, 207.01it/s]



Epoch [358/700]:  83%|████████▎ | 651/782 [00:03<00:00, 208.97it/s]



Epoch [358/700]:  86%|████████▌ | 673/782 [00:03<00:00, 209.52it/s]



Epoch [358/700]:  89%|████████▊ | 694/782 [00:03<00:00, 208.66it/s]



Epoch [358/700]:  91%|█████████▏| 715/782 [00:03<00:00, 208.48it/s]



Epoch [358/700]:  94%|█████████▍| 736/782 [00:03<00:00, 207.89it/s]



Epoch [358/700]:  97%|█████████▋| 757/782 [00:03<00:00, 205.33it/s]



Epoch [358/700]: 100%|██████████| 782/782 [00:03<00:00, 208.26it/s]






Learning Rate: 0.004500
Train Loss: 0.0701, Accuracy: 97.61%, Confidence: 0.9695
Test Loss: 1.7183, Accuracy: 73.79%, Confidence: 0.9325
Train-Test Accuracy Gap: 23.82%


Epoch [359/700]:   3%|▎         | 22/782 [00:00<00:03, 214.63it/s]



Epoch [359/700]:   6%|▌         | 44/782 [00:00<00:03, 216.87it/s]



Epoch [359/700]:  11%|█▏        | 88/782 [00:00<00:03, 217.53it/s]



Epoch [359/700]:  14%|█▍        | 110/782 [00:00<00:03, 215.19it/s]



Epoch [359/700]:  17%|█▋        | 132/782 [00:00<00:03, 214.17it/s]



Epoch [359/700]:  20%|█▉        | 154/782 [00:00<00:02, 210.93it/s]



Epoch [359/700]:  23%|██▎       | 176/782 [00:00<00:02, 210.24it/s]



Epoch [359/700]:  25%|██▌       | 198/782 [00:00<00:02, 210.95it/s]



Epoch [359/700]:  28%|██▊       | 220/782 [00:01<00:02, 209.85it/s]



Epoch [359/700]:  31%|███       | 241/782 [00:01<00:02, 209.67it/s]



Epoch [359/700]:  34%|███▎      | 262/782 [00:01<00:02, 209.36it/s]



Epoch [359/700]:  36%|███▋      | 284/782 [00:01<00:02, 210.29it/s]



Epoch [359/700]:  39%|███▉      | 306/782 [00:01<00:02, 211.58it/s]



Epoch [359/700]:  42%|████▏     | 328/782 [00:01<00:02, 211.52it/s]



Epoch [359/700]:  45%|████▍     | 350/782 [00:01<00:02, 209.44it/s]



Epoch [359/700]:  48%|████▊     | 372/782 [00:01<00:01, 211.06it/s]



Epoch [359/700]:  50%|█████     | 394/782 [00:01<00:01, 209.29it/s]



Epoch [359/700]:  53%|█████▎    | 415/782 [00:01<00:01, 207.14it/s]



Epoch [359/700]:  56%|█████▌    | 436/782 [00:02<00:01, 205.25it/s]



Epoch [359/700]:  59%|█████▊    | 458/782 [00:02<00:01, 207.54it/s]



Epoch [359/700]:  61%|██████▏   | 480/782 [00:02<00:01, 208.89it/s]



Epoch [359/700]:  64%|██████▍   | 501/782 [00:02<00:01, 206.24it/s]



Epoch [359/700]:  67%|██████▋   | 522/782 [00:02<00:01, 207.01it/s]



Epoch [359/700]:  70%|██████▉   | 544/782 [00:02<00:01, 208.52it/s]



Epoch [359/700]:  72%|███████▏  | 566/782 [00:02<00:01, 210.78it/s]



Epoch [359/700]:  75%|███████▌  | 588/782 [00:02<00:00, 212.01it/s]



Epoch [359/700]:  78%|███████▊  | 610/782 [00:02<00:00, 212.42it/s]



Epoch [359/700]:  81%|████████  | 632/782 [00:03<00:00, 211.65it/s]



Epoch [359/700]:  86%|████████▋ | 676/782 [00:03<00:00, 213.07it/s]



Epoch [359/700]:  92%|█████████▏| 720/782 [00:03<00:00, 212.25it/s]



Epoch [359/700]:  98%|█████████▊| 764/782 [00:03<00:00, 213.18it/s]



Epoch [359/700]: 100%|██████████| 782/782 [00:03<00:00, 210.90it/s]


Learning Rate: 0.004500
Train Loss: 0.0708, Accuracy: 97.49%, Confidence: 0.9692
Test Loss: 1.8649, Accuracy: 72.38%, Confidence: 0.9318
Train-Test Accuracy Gap: 25.11%


Epoch [360/700]:   3%|▎         | 21/782 [00:00<00:03, 209.94it/s]



Epoch [360/700]:  11%|█         | 87/782 [00:00<00:03, 211.88it/s]



Epoch [360/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.52it/s]



Epoch [360/700]:  23%|██▎       | 176/782 [00:00<00:02, 214.03it/s]



Epoch [360/700]:  28%|██▊       | 220/782 [00:01<00:02, 213.90it/s]



Epoch [360/700]:  34%|███▍      | 264/782 [00:01<00:02, 210.23it/s]



Epoch [360/700]:  39%|███▉      | 308/782 [00:01<00:02, 211.99it/s]



Epoch [360/700]:  45%|████▌     | 352/782 [00:01<00:02, 212.03it/s]



Epoch [360/700]:  51%|█████     | 396/782 [00:01<00:01, 210.67it/s]



Epoch [360/700]:  56%|█████▋    | 440/782 [00:02<00:01, 210.71it/s]



Epoch [360/700]:  62%|██████▏   | 484/782 [00:02<00:01, 211.62it/s]



Epoch [360/700]:  67%|██████▋   | 527/782 [00:02<00:01, 207.62it/s]



Epoch [360/700]:  73%|███████▎  | 570/782 [00:02<00:01, 209.09it/s]



Epoch [360/700]:  79%|███████▊  | 614/782 [00:02<00:00, 210.88it/s]



Epoch [360/700]:  84%|████████▍ | 658/782 [00:03<00:00, 209.46it/s]



Epoch [360/700]:  90%|████████▉ | 701/782 [00:03<00:00, 209.00it/s]



Epoch [360/700]:  95%|█████████▌| 745/782 [00:03<00:00, 210.75it/s]



Epoch [360/700]: 100%|██████████| 782/782 [00:03<00:00, 210.74it/s]


Learning Rate: 0.004500
Train Loss: 0.0751, Accuracy: 97.43%, Confidence: 0.9692
Test Loss: 1.7131, Accuracy: 73.89%, Confidence: 0.9337
Train-Test Accuracy Gap: 23.54%


Epoch [361/700]:   3%|▎         | 21/782 [00:00<00:03, 209.82it/s]



Epoch [361/700]:  11%|█         | 87/782 [00:00<00:03, 210.38it/s]



Epoch [361/700]:  17%|█▋        | 131/782 [00:00<00:03, 210.08it/s]



Epoch [361/700]:  22%|██▏       | 175/782 [00:00<00:02, 208.78it/s]



Epoch [361/700]:  28%|██▊       | 217/782 [00:01<00:02, 207.57it/s]



Epoch [361/700]:  33%|███▎      | 261/782 [00:01<00:02, 209.23it/s]



Epoch [361/700]:  39%|███▉      | 304/782 [00:01<00:02, 209.94it/s]



Epoch [361/700]:  44%|████▍     | 346/782 [00:01<00:02, 207.61it/s]



Epoch [361/700]:  50%|████▉     | 388/782 [00:01<00:01, 205.58it/s]



Epoch [361/700]:  55%|█████▍    | 430/782 [00:02<00:01, 207.71it/s]



Epoch [361/700]:  60%|██████    | 472/782 [00:02<00:01, 207.78it/s]



Epoch [361/700]:  66%|██████▌   | 514/782 [00:02<00:01, 201.90it/s]



Epoch [361/700]:  71%|███████   | 556/782 [00:02<00:01, 204.74it/s]



Epoch [361/700]:  77%|███████▋  | 600/782 [00:02<00:00, 210.20it/s]



Epoch [361/700]:  82%|████████▏ | 644/782 [00:03<00:00, 209.59it/s]



Epoch [361/700]:  88%|████████▊ | 688/782 [00:03<00:00, 211.42it/s]



Epoch [361/700]:  94%|█████████▎| 732/782 [00:03<00:00, 209.46it/s]



Epoch [361/700]: 100%|██████████| 782/782 [00:03<00:00, 207.61it/s]






Learning Rate: 0.004500
Train Loss: 0.0751, Accuracy: 97.38%, Confidence: 0.9688
Test Loss: 2.0154, Accuracy: 70.99%, Confidence: 0.9301
Train-Test Accuracy Gap: 26.39%


Epoch [362/700]:   3%|▎         | 21/782 [00:00<00:03, 202.69it/s]



Epoch [362/700]:   5%|▌         | 43/782 [00:00<00:03, 209.45it/s]



Epoch [362/700]:   8%|▊         | 65/782 [00:00<00:03, 211.29it/s]



Epoch [362/700]:  17%|█▋        | 131/782 [00:00<00:03, 214.28it/s]



Epoch [362/700]:  22%|██▏       | 175/782 [00:00<00:02, 215.50it/s]



Epoch [362/700]:  28%|██▊       | 219/782 [00:01<00:02, 215.81it/s]



Epoch [362/700]:  34%|███▎      | 263/782 [00:01<00:02, 216.04it/s]



Epoch [362/700]:  39%|███▉      | 307/782 [00:01<00:02, 214.29it/s]



Epoch [362/700]:  45%|████▍     | 351/782 [00:01<00:02, 212.91it/s]



Epoch [362/700]:  51%|█████     | 395/782 [00:01<00:01, 214.42it/s]



Epoch [362/700]:  56%|█████▌    | 439/782 [00:02<00:01, 210.58it/s]



Epoch [362/700]:  62%|██████▏   | 483/782 [00:02<00:01, 212.26it/s]



Epoch [362/700]:  67%|██████▋   | 527/782 [00:02<00:01, 209.63it/s]



Epoch [362/700]:  73%|███████▎  | 571/782 [00:02<00:00, 213.50it/s]



Epoch [362/700]:  79%|███████▊  | 615/782 [00:02<00:00, 212.42it/s]



Epoch [362/700]:  84%|████████▍ | 659/782 [00:03<00:00, 209.82it/s]



Epoch [362/700]:  90%|████████▉ | 703/782 [00:03<00:00, 211.54it/s]



Epoch [362/700]:  96%|█████████▌| 747/782 [00:03<00:00, 212.80it/s]



Epoch [362/700]: 100%|██████████| 782/782 [00:03<00:00, 212.44it/s]


Learning Rate: 0.004500
Train Loss: 0.0790, Accuracy: 97.22%, Confidence: 0.9682
Test Loss: 1.8904, Accuracy: 72.90%, Confidence: 0.9345
Train-Test Accuracy Gap: 24.32%


Epoch [363/700]:   3%|▎         | 21/782 [00:00<00:03, 206.15it/s]



Epoch [363/700]:  11%|█         | 87/782 [00:00<00:03, 210.09it/s]



Epoch [363/700]:  17%|█▋        | 130/782 [00:00<00:03, 209.07it/s]



Epoch [363/700]:  22%|██▏       | 173/782 [00:00<00:02, 209.50it/s]



Epoch [363/700]:  27%|██▋       | 215/782 [00:01<00:02, 208.23it/s]



Epoch [363/700]:  33%|███▎      | 259/782 [00:01<00:02, 211.27it/s]



Epoch [363/700]:  39%|███▊      | 303/782 [00:01<00:02, 210.49it/s]



Epoch [363/700]:  44%|████▍     | 347/782 [00:01<00:02, 210.68it/s]



Epoch [363/700]:  50%|█████     | 391/782 [00:01<00:01, 209.71it/s]



Epoch [363/700]:  56%|█████▌    | 435/782 [00:02<00:01, 211.40it/s]



Epoch [363/700]:  61%|██████▏   | 479/782 [00:02<00:01, 208.44it/s]



Epoch [363/700]:  67%|██████▋   | 523/782 [00:02<00:01, 209.40it/s]



Epoch [363/700]:  72%|███████▏  | 565/782 [00:02<00:01, 207.15it/s]



Epoch [363/700]:  78%|███████▊  | 608/782 [00:02<00:00, 209.01it/s]



Epoch [363/700]:  83%|████████▎ | 652/782 [00:03<00:00, 210.53it/s]



Epoch [363/700]:  89%|████████▉ | 696/782 [00:03<00:00, 210.16it/s]



Epoch [363/700]:  97%|█████████▋| 761/782 [00:03<00:00, 208.22it/s]



Epoch [363/700]: 100%|██████████| 782/782 [00:03<00:00, 209.39it/s]


Learning Rate: 0.004500
Train Loss: 0.0780, Accuracy: 97.33%, Confidence: 0.9691
Test Loss: 1.8176, Accuracy: 73.15%, Confidence: 0.9351
Train-Test Accuracy Gap: 24.18%


Epoch [364/700]:   3%|▎         | 21/782 [00:00<00:03, 204.88it/s]



Epoch [364/700]:  11%|█         | 87/782 [00:00<00:03, 208.53it/s]



Epoch [364/700]:  16%|█▋        | 129/782 [00:00<00:03, 203.82it/s]



Epoch [364/700]:  22%|██▏       | 172/782 [00:00<00:02, 206.02it/s]



Epoch [364/700]:  27%|██▋       | 214/782 [00:01<00:02, 204.62it/s]



Epoch [364/700]:  33%|███▎      | 256/782 [00:01<00:02, 206.81it/s]



Epoch [364/700]:  38%|███▊      | 298/782 [00:01<00:02, 204.83it/s]



Epoch [364/700]:  43%|████▎     | 340/782 [00:01<00:02, 203.48it/s]



Epoch [364/700]:  49%|████▉     | 382/782 [00:01<00:01, 202.38it/s]



Epoch [364/700]:  54%|█████▍    | 424/782 [00:02<00:01, 202.04it/s]



Epoch [364/700]:  60%|█████▉    | 466/782 [00:02<00:01, 202.58it/s]



Epoch [364/700]:  65%|██████▍   | 508/782 [00:02<00:01, 200.75it/s]



Epoch [364/700]:  70%|███████   | 550/782 [00:02<00:01, 202.97it/s]



Epoch [364/700]:  76%|███████▌  | 592/782 [00:02<00:00, 205.21it/s]



Epoch [364/700]:  84%|████████▍ | 658/782 [00:03<00:00, 209.80it/s]



Epoch [364/700]:  90%|████████▉ | 702/782 [00:03<00:00, 209.74it/s]



Epoch [364/700]:  95%|█████████▌| 746/782 [00:03<00:00, 210.22it/s]



Epoch [364/700]: 100%|██████████| 782/782 [00:03<00:00, 205.37it/s]


Learning Rate: 0.004500
Train Loss: 0.0694, Accuracy: 97.60%, Confidence: 0.9696
Test Loss: 1.8133, Accuracy: 72.99%, Confidence: 0.9309
Train-Test Accuracy Gap: 24.61%


Epoch [365/700]:   3%|▎         | 22/782 [00:00<00:03, 210.05it/s]



Epoch [365/700]:   6%|▌         | 44/782 [00:00<00:03, 212.65it/s]



Epoch [365/700]:   8%|▊         | 66/782 [00:00<00:03, 208.64it/s]



Epoch [365/700]:  17%|█▋        | 131/782 [00:00<00:03, 210.16it/s]



Epoch [365/700]:  22%|██▏       | 175/782 [00:00<00:02, 213.69it/s]



Epoch [365/700]:  28%|██▊       | 219/782 [00:01<00:02, 211.31it/s]



Epoch [365/700]:  34%|███▎      | 262/782 [00:01<00:02, 207.82it/s]



Epoch [365/700]:  39%|███▉      | 305/782 [00:01<00:02, 209.26it/s]



Epoch [365/700]:  45%|████▍     | 349/782 [00:01<00:02, 208.68it/s]



Epoch [365/700]:  50%|█████     | 393/782 [00:01<00:01, 211.67it/s]



Epoch [365/700]:  56%|█████▌    | 437/782 [00:02<00:01, 209.21it/s]



Epoch [365/700]:  62%|██████▏   | 481/782 [00:02<00:01, 212.04it/s]



Epoch [365/700]:  67%|██████▋   | 525/782 [00:02<00:01, 210.75it/s]



Epoch [365/700]:  73%|███████▎  | 569/782 [00:02<00:00, 213.74it/s]



Epoch [365/700]:  81%|████████  | 635/782 [00:03<00:00, 214.39it/s]



Epoch [365/700]:  87%|████████▋ | 679/782 [00:03<00:00, 213.04it/s]



Epoch [365/700]:  92%|█████████▏| 723/782 [00:03<00:00, 213.16it/s]



Epoch [365/700]:  98%|█████████▊| 767/782 [00:03<00:00, 213.31it/s]



Epoch [365/700]: 100%|██████████| 782/782 [00:03<00:00, 211.18it/s]


Learning Rate: 0.004500
Train Loss: 0.0695, Accuracy: 97.65%, Confidence: 0.9703
Test Loss: 1.7241, Accuracy: 74.03%, Confidence: 0.9347
Train-Test Accuracy Gap: 23.62%


Epoch [366/700]:   3%|▎         | 22/782 [00:00<00:03, 211.70it/s]



Epoch [366/700]:  11%|█▏        | 88/782 [00:00<00:03, 215.83it/s]



Epoch [366/700]:  17%|█▋        | 132/782 [00:00<00:03, 215.23it/s]



Epoch [366/700]:  23%|██▎       | 177/782 [00:00<00:02, 216.56it/s]



Epoch [366/700]:  28%|██▊       | 221/782 [00:01<00:02, 214.53it/s]



Epoch [366/700]:  34%|███▍      | 265/782 [00:01<00:02, 214.39it/s]



Epoch [366/700]:  40%|███▉      | 309/782 [00:01<00:02, 210.51it/s]



Epoch [366/700]:  45%|████▌     | 353/782 [00:01<00:02, 210.75it/s]



Epoch [366/700]:  51%|█████     | 397/782 [00:01<00:01, 210.84it/s]



Epoch [366/700]:  59%|█████▉    | 463/782 [00:02<00:01, 209.63it/s]



Epoch [366/700]:  65%|██████▍   | 505/782 [00:02<00:01, 207.13it/s]



Epoch [366/700]:  70%|██████▉   | 547/782 [00:02<00:01, 203.47it/s]



Epoch [366/700]:  75%|███████▌  | 589/782 [00:02<00:00, 206.41it/s]



Epoch [366/700]:  81%|████████  | 632/782 [00:03<00:00, 206.37it/s]



Epoch [366/700]:  86%|████████▋ | 675/782 [00:03<00:00, 207.42it/s]



Epoch [366/700]:  92%|█████████▏| 718/782 [00:03<00:00, 206.05it/s]



Epoch [366/700]:  97%|█████████▋| 760/782 [00:03<00:00, 204.41it/s]



Epoch [366/700]: 100%|██████████| 782/782 [00:03<00:00, 208.60it/s]


Learning Rate: 0.004500
Train Loss: 0.0775, Accuracy: 97.34%, Confidence: 0.9690
Test Loss: 2.1770, Accuracy: 70.25%, Confidence: 0.9346
Train-Test Accuracy Gap: 27.09%


Epoch [367/700]:   3%|▎         | 20/782 [00:00<00:03, 194.44it/s]



Epoch [367/700]:   5%|▌         | 41/782 [00:00<00:03, 197.38it/s]



Epoch [367/700]:   8%|▊         | 61/782 [00:00<00:03, 195.47it/s]



Epoch [367/700]:  10%|█         | 82/782 [00:00<00:03, 198.41it/s]



Epoch [367/700]:  13%|█▎        | 102/782 [00:00<00:03, 194.28it/s]



Epoch [367/700]:  16%|█▌        | 123/782 [00:00<00:03, 199.23it/s]



Epoch [367/700]:  18%|█▊        | 143/782 [00:00<00:03, 197.58it/s]



Epoch [367/700]:  21%|██        | 164/782 [00:00<00:03, 198.86it/s]



Epoch [367/700]:  24%|██▎       | 185/782 [00:00<00:02, 200.55it/s]



Epoch [367/700]:  32%|███▏      | 251/782 [00:01<00:02, 208.98it/s]



Epoch [367/700]:  38%|███▊      | 294/782 [00:01<00:02, 209.60it/s]



Epoch [367/700]:  43%|████▎     | 338/782 [00:01<00:02, 211.29it/s]



Epoch [367/700]:  49%|████▉     | 382/782 [00:01<00:01, 207.53it/s]



Epoch [367/700]:  54%|█████▍    | 426/782 [00:02<00:01, 211.28it/s]



Epoch [367/700]:  60%|██████    | 470/782 [00:02<00:01, 212.47it/s]



Epoch [367/700]:  66%|██████▌   | 514/782 [00:02<00:01, 209.46it/s]



Epoch [367/700]:  71%|███████   | 557/782 [00:02<00:01, 209.50it/s]



Epoch [367/700]:  77%|███████▋  | 601/782 [00:02<00:00, 208.79it/s]



Epoch [367/700]:  82%|████████▏ | 643/782 [00:03<00:00, 207.28it/s]



Epoch [367/700]:  88%|████████▊ | 685/782 [00:03<00:00, 204.24it/s]



Epoch [367/700]:  93%|█████████▎| 727/782 [00:03<00:00, 203.62it/s]



Epoch [367/700]:  98%|█████████▊| 769/782 [00:03<00:00, 200.46it/s]



Epoch [367/700]: 100%|██████████| 782/782 [00:03<00:00, 205.15it/s]


Learning Rate: 0.004500
Train Loss: 0.0740, Accuracy: 97.43%, Confidence: 0.9690
Test Loss: 1.8328, Accuracy: 71.82%, Confidence: 0.9294
Train-Test Accuracy Gap: 25.61%


Epoch [368/700]:   3%|▎         | 21/782 [00:00<00:03, 205.63it/s]



Epoch [368/700]:  11%|█         | 86/782 [00:00<00:03, 208.63it/s]



Epoch [368/700]:  16%|█▋        | 128/782 [00:00<00:03, 206.09it/s]



Epoch [368/700]:  22%|██▏       | 171/782 [00:00<00:02, 209.85it/s]



Epoch [368/700]:  27%|██▋       | 215/782 [00:01<00:02, 212.75it/s]



Epoch [368/700]:  33%|███▎      | 259/782 [00:01<00:02, 213.33it/s]



Epoch [368/700]:  39%|███▊      | 303/782 [00:01<00:02, 211.37it/s]



Epoch [368/700]:  44%|████▍     | 347/782 [00:01<00:02, 210.58it/s]



Epoch [368/700]:  50%|█████     | 391/782 [00:01<00:01, 211.29it/s]



Epoch [368/700]:  56%|█████▌    | 435/782 [00:02<00:01, 212.62it/s]



Epoch [368/700]:  61%|██████▏   | 479/782 [00:02<00:01, 212.29it/s]



Epoch [368/700]:  67%|██████▋   | 523/782 [00:02<00:01, 213.39it/s]



Epoch [368/700]:  73%|███████▎  | 567/782 [00:02<00:01, 212.15it/s]



Epoch [368/700]:  78%|███████▊  | 611/782 [00:02<00:00, 212.48it/s]



Epoch [368/700]:  87%|████████▋ | 678/782 [00:03<00:00, 212.96it/s]



Epoch [368/700]:  92%|█████████▏| 722/782 [00:03<00:00, 212.74it/s]



Epoch [368/700]:  98%|█████████▊| 766/782 [00:03<00:00, 213.66it/s]



Epoch [368/700]: 100%|██████████| 782/782 [00:03<00:00, 210.97it/s]


Learning Rate: 0.004500
Train Loss: 0.0678, Accuracy: 97.73%, Confidence: 0.9703
Test Loss: 2.1713, Accuracy: 69.92%, Confidence: 0.9285
Train-Test Accuracy Gap: 27.81%


Epoch [369/700]:   3%|▎         | 21/782 [00:00<00:03, 202.66it/s]



Epoch [369/700]:   5%|▌         | 43/782 [00:00<00:03, 208.94it/s]



Epoch [369/700]:   8%|▊         | 65/782 [00:00<00:03, 209.29it/s]



Epoch [369/700]:  11%|█         | 86/782 [00:00<00:03, 204.90it/s]



Epoch [369/700]:  14%|█▍        | 108/782 [00:00<00:03, 207.05it/s]



Epoch [369/700]:  17%|█▋        | 130/782 [00:00<00:03, 210.32it/s]



Epoch [369/700]:  22%|██▏       | 174/782 [00:00<00:02, 212.57it/s]



Epoch [369/700]:  28%|██▊       | 218/782 [00:01<00:02, 212.85it/s]



Epoch [369/700]:  34%|███▎      | 262/782 [00:01<00:02, 214.28it/s]



Epoch [369/700]:  39%|███▉      | 306/782 [00:01<00:02, 212.69it/s]



Epoch [369/700]:  45%|████▍     | 350/782 [00:01<00:02, 208.84it/s]



Epoch [369/700]:  50%|█████     | 392/782 [00:01<00:01, 206.57it/s]



Epoch [369/700]:  56%|█████▌    | 436/782 [00:02<00:01, 210.93it/s]



Epoch [369/700]:  61%|██████▏   | 479/782 [00:02<00:01, 207.76it/s]



Epoch [369/700]:  67%|██████▋   | 522/782 [00:02<00:01, 209.72it/s]



Epoch [369/700]:  72%|███████▏  | 566/782 [00:02<00:01, 208.82it/s]



Epoch [369/700]:  78%|███████▊  | 609/782 [00:02<00:00, 209.31it/s]



Epoch [369/700]:  83%|████████▎ | 651/782 [00:03<00:00, 207.53it/s]



Epoch [369/700]:  89%|████████▉ | 695/782 [00:03<00:00, 211.69it/s]



Epoch [369/700]:  94%|█████████▍| 738/782 [00:03<00:00, 208.31it/s]



Epoch [369/700]: 100%|██████████| 782/782 [00:03<00:00, 209.38it/s]






Learning Rate: 0.004500
Train Loss: 0.0782, Accuracy: 97.32%, Confidence: 0.9686
Test Loss: 2.1743, Accuracy: 69.93%, Confidence: 0.9343
Train-Test Accuracy Gap: 27.39%


Epoch [370/700]:   3%|▎         | 21/782 [00:00<00:03, 209.25it/s]



Epoch [370/700]:  11%|█         | 87/782 [00:00<00:03, 211.08it/s]



Epoch [370/700]:  17%|█▋        | 130/782 [00:00<00:03, 207.26it/s]



Epoch [370/700]:  22%|██▏       | 172/782 [00:00<00:02, 205.17it/s]



Epoch [370/700]:  27%|██▋       | 214/782 [00:01<00:02, 206.89it/s]



Epoch [370/700]:  33%|███▎      | 256/782 [00:01<00:02, 206.82it/s]



Epoch [370/700]:  38%|███▊      | 299/782 [00:01<00:02, 208.30it/s]



Epoch [370/700]:  44%|████▎     | 341/782 [00:01<00:02, 205.58it/s]



Epoch [370/700]:  49%|████▉     | 384/782 [00:01<00:01, 207.76it/s]



Epoch [370/700]:  55%|█████▍    | 428/782 [00:02<00:01, 210.26it/s]



Epoch [370/700]:  60%|██████    | 472/782 [00:02<00:01, 212.90it/s]



Epoch [370/700]:  66%|██████▌   | 515/782 [00:02<00:01, 206.40it/s]



Epoch [370/700]:  71%|███████▏  | 558/782 [00:02<00:01, 207.27it/s]



Epoch [370/700]:  77%|███████▋  | 601/782 [00:02<00:00, 207.35it/s]



Epoch [370/700]:  82%|████████▏ | 645/782 [00:03<00:00, 209.60it/s]



Epoch [370/700]:  88%|████████▊ | 687/782 [00:03<00:00, 207.79it/s]



Epoch [370/700]:  93%|█████████▎| 729/782 [00:03<00:00, 203.73it/s]



Epoch [370/700]: 100%|██████████| 782/782 [00:03<00:00, 207.29it/s]


Learning Rate: 0.004500
Train Loss: 0.0747, Accuracy: 97.43%, Confidence: 0.9693
Test Loss: 1.8161, Accuracy: 73.01%, Confidence: 0.9315
Train-Test Accuracy Gap: 24.42%


Epoch [371/700]:   3%|▎         | 22/782 [00:00<00:03, 212.04it/s]



Epoch [371/700]:  11%|█▏        | 88/782 [00:00<00:03, 213.17it/s]



Epoch [371/700]:  20%|█▉        | 154/782 [00:00<00:02, 214.15it/s]



Epoch [371/700]:  25%|██▌       | 198/782 [00:00<00:02, 212.25it/s]



Epoch [371/700]:  31%|███       | 242/782 [00:01<00:02, 212.93it/s]



Epoch [371/700]:  37%|███▋      | 286/782 [00:01<00:02, 209.88it/s]



Epoch [371/700]:  42%|████▏     | 329/782 [00:01<00:02, 208.04it/s]



Epoch [371/700]:  48%|████▊     | 373/782 [00:01<00:01, 209.89it/s]



Epoch [371/700]:  53%|█████▎    | 416/782 [00:01<00:01, 212.38it/s]



Epoch [371/700]:  59%|█████▉    | 460/782 [00:02<00:01, 210.19it/s]



Epoch [371/700]:  64%|██████▍   | 504/782 [00:02<00:01, 210.64it/s]



Epoch [371/700]:  70%|███████   | 548/782 [00:02<00:01, 210.17it/s]



Epoch [371/700]:  76%|███████▌  | 592/782 [00:02<00:00, 210.39it/s]



Epoch [371/700]:  81%|████████  | 635/782 [00:03<00:00, 208.54it/s]



Epoch [371/700]:  90%|████████▉ | 701/782 [00:03<00:00, 213.05it/s]



Epoch [371/700]:  95%|█████████▌| 745/782 [00:03<00:00, 211.67it/s]



Epoch [371/700]: 100%|██████████| 782/782 [00:03<00:00, 210.89it/s]


Learning Rate: 0.004500
Train Loss: 0.0677, Accuracy: 97.68%, Confidence: 0.9712
Test Loss: 2.2082, Accuracy: 70.29%, Confidence: 0.9346
Train-Test Accuracy Gap: 27.39%


Epoch [372/700]:   3%|▎         | 21/782 [00:00<00:03, 207.62it/s]



Epoch [372/700]:   5%|▌         | 43/782 [00:00<00:03, 212.82it/s]



Epoch [372/700]:  11%|█         | 86/782 [00:00<00:03, 202.01it/s]



Epoch [372/700]:  14%|█▍        | 108/782 [00:00<00:03, 208.13it/s]



Epoch [372/700]:  17%|█▋        | 130/782 [00:00<00:03, 209.48it/s]



Epoch [372/700]:  19%|█▉        | 152/782 [00:00<00:02, 210.21it/s]



Epoch [372/700]:  22%|██▏       | 174/782 [00:00<00:02, 206.98it/s]



Epoch [372/700]:  25%|██▍       | 195/782 [00:00<00:02, 206.29it/s]



Epoch [372/700]:  28%|██▊       | 216/782 [00:01<00:02, 205.66it/s]



Epoch [372/700]:  30%|███       | 237/782 [00:01<00:02, 204.76it/s]



Epoch [372/700]:  33%|███▎      | 259/782 [00:01<00:02, 207.38it/s]



Epoch [372/700]:  36%|███▌      | 280/782 [00:01<00:02, 207.66it/s]



Epoch [372/700]:  39%|███▊      | 302/782 [00:01<00:02, 208.67it/s]



Epoch [372/700]:  41%|████▏     | 324/782 [00:01<00:02, 210.74it/s]



Epoch [372/700]:  44%|████▍     | 346/782 [00:01<00:02, 210.49it/s]



Epoch [372/700]:  47%|████▋     | 368/782 [00:01<00:01, 211.50it/s]



Epoch [372/700]:  50%|████▉     | 390/782 [00:01<00:01, 209.83it/s]



Epoch [372/700]:  53%|█████▎    | 411/782 [00:01<00:01, 207.33it/s]



Epoch [372/700]:  58%|█████▊    | 454/782 [00:02<00:01, 208.71it/s]



Epoch [372/700]:  63%|██████▎   | 496/782 [00:02<00:01, 203.61it/s]



Epoch [372/700]:  69%|██████▉   | 538/782 [00:02<00:01, 203.27it/s]



Epoch [372/700]:  74%|███████▍  | 580/782 [00:02<00:01, 200.79it/s]



Epoch [372/700]:  80%|███████▉  | 622/782 [00:03<00:00, 201.18it/s]



Epoch [372/700]:  85%|████████▍ | 664/782 [00:03<00:00, 200.58it/s]



Epoch [372/700]:  90%|█████████ | 706/782 [00:03<00:00, 201.91it/s]



Epoch [372/700]:  96%|█████████▌| 749/782 [00:03<00:00, 206.08it/s]



Epoch [372/700]: 100%|██████████| 782/782 [00:03<00:00, 205.73it/s]


Learning Rate: 0.004500
Train Loss: 0.0685, Accuracy: 97.63%, Confidence: 0.9706
Test Loss: 1.9131, Accuracy: 72.20%, Confidence: 0.9336
Train-Test Accuracy Gap: 25.43%


Epoch [373/700]:   3%|▎         | 21/782 [00:00<00:03, 204.34it/s]



Epoch [373/700]:   5%|▌         | 42/782 [00:00<00:03, 204.88it/s]



Epoch [373/700]:   8%|▊         | 63/782 [00:00<00:03, 204.49it/s]



Epoch [373/700]:  11%|█         | 84/782 [00:00<00:03, 201.24it/s]



Epoch [373/700]:  13%|█▎        | 105/782 [00:00<00:03, 203.61it/s]



Epoch [373/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.09it/s]



Epoch [373/700]:  19%|█▉        | 148/782 [00:00<00:03, 205.84it/s]



Epoch [373/700]:  22%|██▏       | 169/782 [00:00<00:02, 206.16it/s]



Epoch [373/700]:  24%|██▍       | 190/782 [00:00<00:02, 205.77it/s]



Epoch [373/700]:  33%|███▎      | 256/782 [00:01<00:02, 209.66it/s]



Epoch [373/700]:  38%|███▊      | 299/782 [00:01<00:02, 211.22it/s]



Epoch [373/700]:  44%|████▍     | 343/782 [00:01<00:02, 204.44it/s]



Epoch [373/700]:  49%|████▉     | 386/782 [00:01<00:01, 207.65it/s]



Epoch [373/700]:  55%|█████▍    | 428/782 [00:02<00:01, 205.27it/s]



Epoch [373/700]:  60%|██████    | 470/782 [00:02<00:01, 201.02it/s]



Epoch [373/700]:  65%|██████▌   | 512/782 [00:02<00:01, 201.73it/s]



Epoch [373/700]:  71%|███████   | 555/782 [00:02<00:01, 205.67it/s]



Epoch [373/700]:  76%|███████▋  | 597/782 [00:02<00:00, 206.12it/s]



Epoch [373/700]:  84%|████████▍ | 660/782 [00:03<00:00, 202.16it/s]



Epoch [373/700]:  90%|████████▉ | 702/782 [00:03<00:00, 202.88it/s]



Epoch [373/700]:  95%|█████████▌| 744/782 [00:03<00:00, 200.33it/s]



Epoch [373/700]: 100%|██████████| 782/782 [00:03<00:00, 204.84it/s]


Learning Rate: 0.004500
Train Loss: 0.0771, Accuracy: 97.29%, Confidence: 0.9696
Test Loss: 1.9492, Accuracy: 71.93%, Confidence: 0.9346
Train-Test Accuracy Gap: 25.36%


Epoch [374/700]:   3%|▎         | 20/782 [00:00<00:03, 192.22it/s]



Epoch [374/700]:  11%|█         | 83/782 [00:00<00:03, 205.82it/s]



Epoch [374/700]:  16%|█▌        | 125/782 [00:00<00:03, 206.73it/s]



Epoch [374/700]:  21%|██▏       | 167/782 [00:00<00:02, 207.55it/s]



Epoch [374/700]:  27%|██▋       | 209/782 [00:01<00:02, 205.50it/s]



Epoch [374/700]:  32%|███▏      | 252/782 [00:01<00:02, 204.82it/s]



Epoch [374/700]:  38%|███▊      | 295/782 [00:01<00:02, 208.06it/s]



Epoch [374/700]:  46%|████▌     | 359/782 [00:01<00:02, 207.64it/s]



Epoch [374/700]:  51%|█████▏    | 401/782 [00:01<00:01, 207.57it/s]



Epoch [374/700]:  57%|█████▋    | 443/782 [00:02<00:01, 204.90it/s]



Epoch [374/700]:  62%|██████▏   | 486/782 [00:02<00:01, 207.89it/s]



Epoch [374/700]:  68%|██████▊   | 529/782 [00:02<00:01, 209.10it/s]



Epoch [374/700]:  73%|███████▎  | 572/782 [00:02<00:00, 210.26it/s]



Epoch [374/700]:  79%|███████▉  | 616/782 [00:02<00:00, 211.28it/s]



Epoch [374/700]:  84%|████████▍ | 659/782 [00:03<00:00, 208.66it/s]



Epoch [374/700]:  90%|████████▉ | 701/782 [00:03<00:00, 207.81it/s]



Epoch [374/700]:  95%|█████████▌| 743/782 [00:03<00:00, 206.41it/s]



Epoch [374/700]: 100%|██████████| 782/782 [00:03<00:00, 207.06it/s]


Learning Rate: 0.004500
Train Loss: 0.0700, Accuracy: 97.55%, Confidence: 0.9701
Test Loss: 1.8703, Accuracy: 73.18%, Confidence: 0.9349
Train-Test Accuracy Gap: 24.37%


Epoch [375/700]:   3%|▎         | 20/782 [00:00<00:03, 199.63it/s]



Epoch [375/700]:  11%|█         | 84/782 [00:00<00:03, 209.27it/s]



Epoch [375/700]:  16%|█▌        | 126/782 [00:00<00:03, 209.42it/s]



Epoch [375/700]:  21%|██▏       | 168/782 [00:00<00:02, 205.97it/s]



Epoch [375/700]:  27%|██▋       | 210/782 [00:01<00:02, 206.42it/s]



Epoch [375/700]:  32%|███▏      | 252/782 [00:01<00:02, 200.95it/s]



Epoch [375/700]:  38%|███▊      | 294/782 [00:01<00:02, 204.11it/s]



Epoch [375/700]:  43%|████▎     | 337/782 [00:01<00:02, 207.65it/s]



Epoch [375/700]:  52%|█████▏    | 403/782 [00:01<00:01, 211.56it/s]



Epoch [375/700]:  57%|█████▋    | 446/782 [00:02<00:01, 206.36it/s]



Epoch [375/700]:  62%|██████▏   | 488/782 [00:02<00:01, 202.68it/s]



Epoch [375/700]:  68%|██████▊   | 530/782 [00:02<00:01, 202.59it/s]



Epoch [375/700]:  73%|███████▎  | 573/782 [00:02<00:01, 206.31it/s]



Epoch [375/700]:  79%|███████▊  | 615/782 [00:02<00:00, 203.66it/s]



Epoch [375/700]:  84%|████████▍ | 658/782 [00:03<00:00, 206.46it/s]



Epoch [375/700]:  90%|████████▉ | 700/782 [00:03<00:00, 206.08it/s]



Epoch [375/700]:  95%|█████████▍| 742/782 [00:03<00:00, 199.22it/s]



Epoch [375/700]: 100%|██████████| 782/782 [00:03<00:00, 205.33it/s]


Learning Rate: 0.004500
Train Loss: 0.0751, Accuracy: 97.42%, Confidence: 0.9698
Test Loss: 2.1466, Accuracy: 70.93%, Confidence: 0.9337
Train-Test Accuracy Gap: 26.49%


Epoch [376/700]:   3%|▎         | 21/782 [00:00<00:03, 204.32it/s]



Epoch [376/700]:   5%|▌         | 43/782 [00:00<00:03, 208.29it/s]



Epoch [376/700]:  11%|█         | 86/782 [00:00<00:03, 209.70it/s]



Epoch [376/700]:  16%|█▋        | 129/782 [00:00<00:03, 209.17it/s]



Epoch [376/700]:  22%|██▏       | 172/782 [00:00<00:02, 209.82it/s]



Epoch [376/700]:  28%|██▊       | 216/782 [00:01<00:02, 206.53it/s]



Epoch [376/700]:  33%|███▎      | 259/782 [00:01<00:02, 208.24it/s]



Epoch [376/700]:  38%|███▊      | 301/782 [00:01<00:02, 204.60it/s]



Epoch [376/700]:  44%|████▍     | 344/782 [00:01<00:02, 206.39it/s]



Epoch [376/700]:  49%|████▉     | 386/782 [00:01<00:01, 201.35it/s]



Epoch [376/700]:  55%|█████▍    | 428/782 [00:02<00:01, 198.59it/s]



Epoch [376/700]:  60%|█████▉    | 469/782 [00:02<00:01, 199.66it/s]



Epoch [376/700]:  68%|██████▊   | 529/782 [00:02<00:01, 193.96it/s]



Epoch [376/700]:  73%|███████▎  | 572/782 [00:02<00:01, 202.01it/s]



Epoch [376/700]:  79%|███████▊  | 614/782 [00:03<00:00, 200.35it/s]



Epoch [376/700]:  84%|████████▍ | 656/782 [00:03<00:00, 200.92it/s]



Epoch [376/700]:  87%|████████▋ | 677/782 [00:03<00:00, 197.07it/s]



Epoch [376/700]:  95%|█████████▍| 742/782 [00:03<00:00, 205.30it/s]



Epoch [376/700]: 100%|██████████| 782/782 [00:03<00:00, 203.47it/s]


Learning Rate: 0.004500
Train Loss: 0.0717, Accuracy: 97.58%, Confidence: 0.9704
Test Loss: 2.3064, Accuracy: 68.87%, Confidence: 0.9326
Train-Test Accuracy Gap: 28.71%


Epoch [377/700]:   3%|▎         | 20/782 [00:00<00:03, 198.90it/s]



Epoch [377/700]:   5%|▌         | 41/782 [00:00<00:03, 205.14it/s]



Epoch [377/700]:   8%|▊         | 62/782 [00:00<00:03, 204.41it/s]



Epoch [377/700]:  16%|█▌        | 127/782 [00:00<00:03, 208.69it/s]



Epoch [377/700]:  22%|██▏       | 170/782 [00:00<00:02, 207.99it/s]



Epoch [377/700]:  27%|██▋       | 212/782 [00:01<00:02, 206.46it/s]



Epoch [377/700]:  32%|███▏      | 254/782 [00:01<00:02, 205.00it/s]



Epoch [377/700]:  38%|███▊      | 297/782 [00:01<00:02, 207.12it/s]



Epoch [377/700]:  43%|████▎     | 340/782 [00:01<00:02, 208.51it/s]



Epoch [377/700]:  49%|████▉     | 384/782 [00:01<00:01, 209.71it/s]



Epoch [377/700]:  54%|█████▍    | 426/782 [00:02<00:01, 206.82it/s]



Epoch [377/700]:  60%|██████    | 470/782 [00:02<00:01, 210.06it/s]



Epoch [377/700]:  66%|██████▌   | 514/782 [00:02<00:01, 209.25it/s]



Epoch [377/700]:  71%|███████   | 557/782 [00:02<00:01, 210.06it/s]



Epoch [377/700]:  77%|███████▋  | 601/782 [00:02<00:00, 208.96it/s]



Epoch [377/700]:  82%|████████▏ | 644/782 [00:03<00:00, 210.49it/s]



Epoch [377/700]:  88%|████████▊ | 688/782 [00:03<00:00, 209.25it/s]



Epoch [377/700]:  93%|█████████▎| 730/782 [00:03<00:00, 208.48it/s]



Epoch [377/700]: 100%|██████████| 782/782 [00:03<00:00, 208.12it/s]






Learning Rate: 0.004500
Train Loss: 0.0738, Accuracy: 97.48%, Confidence: 0.9699
Test Loss: 1.8434, Accuracy: 72.56%, Confidence: 0.9330
Train-Test Accuracy Gap: 24.92%


Epoch [378/700]:   3%|▎         | 22/782 [00:00<00:03, 214.74it/s]



Epoch [378/700]:   6%|▌         | 44/782 [00:00<00:03, 217.24it/s]



Epoch [378/700]:   8%|▊         | 66/782 [00:00<00:03, 214.88it/s]



Epoch [378/700]:  11%|█▏        | 88/782 [00:00<00:03, 214.62it/s]



Epoch [378/700]:  14%|█▍        | 110/782 [00:00<00:03, 213.52it/s]



Epoch [378/700]:  17%|█▋        | 132/782 [00:00<00:03, 211.34it/s]



Epoch [378/700]:  20%|█▉        | 154/782 [00:00<00:02, 210.20it/s]



Epoch [378/700]:  23%|██▎       | 176/782 [00:00<00:02, 206.59it/s]



Epoch [378/700]:  25%|██▌       | 197/782 [00:00<00:02, 201.38it/s]



Epoch [378/700]:  28%|██▊       | 219/782 [00:01<00:02, 204.61it/s]



Epoch [378/700]:  31%|███       | 240/782 [00:01<00:02, 206.06it/s]



Epoch [378/700]:  34%|███▎      | 262/782 [00:01<00:02, 207.79it/s]



Epoch [378/700]:  36%|███▌      | 283/782 [00:01<00:02, 208.09it/s]



Epoch [378/700]:  39%|███▉      | 304/782 [00:01<00:02, 207.99it/s]



Epoch [378/700]:  42%|████▏     | 326/782 [00:01<00:02, 208.96it/s]



Epoch [378/700]:  45%|████▍     | 348/782 [00:01<00:02, 209.80it/s]



Epoch [378/700]:  47%|████▋     | 370/782 [00:01<00:01, 212.10it/s]



Epoch [378/700]:  50%|█████     | 392/782 [00:01<00:01, 207.94it/s]



Epoch [378/700]:  53%|█████▎    | 413/782 [00:01<00:01, 207.51it/s]



Epoch [378/700]:  58%|█████▊    | 457/782 [00:02<00:01, 210.69it/s]



Epoch [378/700]:  64%|██████▍   | 501/782 [00:02<00:01, 214.46it/s]



Epoch [378/700]:  67%|██████▋   | 523/782 [00:02<00:01, 213.49it/s]



Epoch [378/700]:  70%|██████▉   | 545/782 [00:02<00:01, 212.81it/s]



Epoch [378/700]:  73%|███████▎  | 567/782 [00:02<00:01, 212.34it/s]



Epoch [378/700]:  75%|███████▌  | 589/782 [00:02<00:00, 212.01it/s]



Epoch [378/700]:  78%|███████▊  | 611/782 [00:02<00:00, 208.77it/s]



Epoch [378/700]:  81%|████████  | 632/782 [00:03<00:00, 205.41it/s]



Epoch [378/700]:  84%|████████▎ | 653/782 [00:03<00:00, 206.29it/s]



Epoch [378/700]:  86%|████████▋ | 675/782 [00:03<00:00, 209.52it/s]



Epoch [378/700]:  89%|████████▉ | 697/782 [00:03<00:00, 209.89it/s]



Epoch [378/700]:  92%|█████████▏| 719/782 [00:03<00:00, 211.34it/s]



Epoch [378/700]:  95%|█████████▍| 741/782 [00:03<00:00, 210.45it/s]



Epoch [378/700]:  98%|█████████▊| 763/782 [00:03<00:00, 209.39it/s]



Epoch [378/700]: 100%|██████████| 782/782 [00:03<00:00, 209.69it/s]


Learning Rate: 0.004500
Train Loss: 0.0642, Accuracy: 97.79%, Confidence: 0.9720
Test Loss: 1.8184, Accuracy: 72.04%, Confidence: 0.9324
Train-Test Accuracy Gap: 25.75%


Epoch [379/700]:   3%|▎         | 20/782 [00:00<00:03, 197.02it/s]



Epoch [379/700]:   5%|▌         | 41/782 [00:00<00:03, 203.79it/s]



Epoch [379/700]:   8%|▊         | 62/782 [00:00<00:03, 199.30it/s]



Epoch [379/700]:  11%|█         | 83/782 [00:00<00:03, 202.83it/s]



Epoch [379/700]:  13%|█▎        | 104/782 [00:00<00:03, 204.13it/s]



Epoch [379/700]:  16%|█▌        | 126/782 [00:00<00:03, 207.38it/s]



Epoch [379/700]:  19%|█▉        | 148/782 [00:00<00:03, 208.58it/s]



Epoch [379/700]:  27%|██▋       | 214/782 [00:01<00:02, 213.61it/s]



Epoch [379/700]:  33%|███▎      | 258/782 [00:01<00:02, 213.86it/s]



Epoch [379/700]:  39%|███▊      | 302/782 [00:01<00:02, 214.38it/s]



Epoch [379/700]:  44%|████▍     | 346/782 [00:01<00:02, 212.98it/s]



Epoch [379/700]:  50%|████▉     | 390/782 [00:01<00:01, 210.97it/s]



Epoch [379/700]:  58%|█████▊    | 455/782 [00:02<00:01, 209.72it/s]



Epoch [379/700]:  64%|██████▎   | 498/782 [00:02<00:01, 208.60it/s]



Epoch [379/700]:  69%|██████▉   | 542/782 [00:02<00:01, 210.65it/s]



Epoch [379/700]:  75%|███████▍  | 586/782 [00:02<00:00, 211.69it/s]



Epoch [379/700]:  80%|████████  | 629/782 [00:03<00:00, 208.34it/s]



Epoch [379/700]:  86%|████████▌ | 671/782 [00:03<00:00, 206.59it/s]



Epoch [379/700]:  91%|█████████▏| 714/782 [00:03<00:00, 208.58it/s]



Epoch [379/700]:  97%|█████████▋| 758/782 [00:03<00:00, 210.93it/s]



Epoch [379/700]: 100%|██████████| 782/782 [00:03<00:00, 209.29it/s]


Learning Rate: 0.004500
Train Loss: 0.0712, Accuracy: 97.53%, Confidence: 0.9703
Test Loss: 2.4566, Accuracy: 67.89%, Confidence: 0.9320
Train-Test Accuracy Gap: 29.64%


Epoch [380/700]:   3%|▎         | 20/782 [00:00<00:03, 198.82it/s]



Epoch [380/700]:   5%|▌         | 42/782 [00:00<00:03, 206.14it/s]



Epoch [380/700]:   8%|▊         | 64/782 [00:00<00:03, 209.59it/s]



Epoch [380/700]:  11%|█         | 86/782 [00:00<00:03, 211.71it/s]



Epoch [380/700]:  17%|█▋        | 130/782 [00:00<00:03, 213.57it/s]



Epoch [380/700]:  22%|██▏       | 174/782 [00:00<00:02, 211.00it/s]



Epoch [380/700]:  28%|██▊       | 218/782 [00:01<00:02, 213.26it/s]



Epoch [380/700]:  34%|███▎      | 262/782 [00:01<00:02, 213.77it/s]



Epoch [380/700]:  39%|███▉      | 306/782 [00:01<00:02, 213.61it/s]



Epoch [380/700]:  45%|████▍     | 350/782 [00:01<00:02, 211.30it/s]



Epoch [380/700]:  50%|█████     | 394/782 [00:01<00:01, 211.16it/s]



Epoch [380/700]:  56%|█████▌    | 438/782 [00:02<00:01, 213.23it/s]



Epoch [380/700]:  62%|██████▏   | 482/782 [00:02<00:01, 213.54it/s]



Epoch [380/700]:  70%|███████   | 548/782 [00:02<00:01, 213.27it/s]



Epoch [380/700]:  73%|███████▎  | 570/782 [00:02<00:01, 208.59it/s]



Epoch [380/700]:  78%|███████▊  | 613/782 [00:02<00:00, 205.21it/s]



Epoch [380/700]:  87%|████████▋ | 677/782 [00:03<00:00, 208.85it/s]



Epoch [380/700]:  92%|█████████▏| 721/782 [00:03<00:00, 211.55it/s]



Epoch [380/700]:  98%|█████████▊| 765/782 [00:03<00:00, 209.38it/s]



Epoch [380/700]: 100%|██████████| 782/782 [00:03<00:00, 210.58it/s]


Learning Rate: 0.004500
Train Loss: 0.0680, Accuracy: 97.76%, Confidence: 0.9713
Test Loss: 1.9392, Accuracy: 71.56%, Confidence: 0.9325
Train-Test Accuracy Gap: 26.20%


Epoch [381/700]:   3%|▎         | 21/782 [00:00<00:03, 206.64it/s]



Epoch [381/700]:   5%|▌         | 42/782 [00:00<00:03, 204.34it/s]



Epoch [381/700]:   8%|▊         | 64/782 [00:00<00:03, 207.65it/s]



Epoch [381/700]:  11%|█         | 86/782 [00:00<00:03, 208.65it/s]



Epoch [381/700]:  14%|█▎        | 107/782 [00:00<00:03, 209.12it/s]



Epoch [381/700]:  16%|█▋        | 129/782 [00:00<00:03, 211.13it/s]



Epoch [381/700]:  19%|█▉        | 151/782 [00:00<00:03, 210.07it/s]



Epoch [381/700]:  22%|██▏       | 173/782 [00:00<00:02, 209.37it/s]



Epoch [381/700]:  25%|██▍       | 195/782 [00:00<00:02, 210.44it/s]



Epoch [381/700]:  28%|██▊       | 217/782 [00:01<00:02, 210.51it/s]



Epoch [381/700]:  31%|███       | 239/782 [00:01<00:02, 208.53it/s]



Epoch [381/700]:  36%|███▌      | 281/782 [00:01<00:02, 205.47it/s]



Epoch [381/700]:  41%|████▏     | 324/782 [00:01<00:02, 206.17it/s]



Epoch [381/700]:  47%|████▋     | 367/782 [00:01<00:01, 207.80it/s]



Epoch [381/700]:  52%|█████▏    | 410/782 [00:01<00:01, 206.66it/s]



Epoch [381/700]:  58%|█████▊    | 452/782 [00:02<00:01, 206.59it/s]



Epoch [381/700]:  63%|██████▎   | 494/782 [00:02<00:01, 206.29it/s]



Epoch [381/700]:  66%|██████▌   | 516/782 [00:02<00:01, 208.21it/s]



Epoch [381/700]:  69%|██████▉   | 538/782 [00:02<00:01, 209.60it/s]



Epoch [381/700]:  71%|███████▏  | 559/782 [00:02<00:01, 209.50it/s]



Epoch [381/700]:  74%|███████▍  | 580/782 [00:02<00:00, 205.66it/s]



Epoch [381/700]:  77%|███████▋  | 601/782 [00:02<00:00, 204.62it/s]



Epoch [381/700]:  80%|███████▉  | 622/782 [00:03<00:00, 203.04it/s]



Epoch [381/700]:  82%|████████▏ | 643/782 [00:03<00:00, 203.84it/s]



Epoch [381/700]:  85%|████████▍ | 664/782 [00:03<00:00, 204.46it/s]



Epoch [381/700]:  88%|████████▊ | 686/782 [00:03<00:00, 206.34it/s]



Epoch [381/700]:  90%|█████████ | 707/782 [00:03<00:00, 204.66it/s]



Epoch [381/700]:  93%|█████████▎| 729/782 [00:03<00:00, 206.59it/s]



Epoch [381/700]:  96%|█████████▌| 750/782 [00:03<00:00, 202.36it/s]



Epoch [381/700]:  99%|█████████▊| 771/782 [00:03<00:00, 203.02it/s]



Epoch [381/700]: 100%|██████████| 782/782 [00:03<00:00, 206.33it/s]


Learning Rate: 0.004500
Train Loss: 0.0721, Accuracy: 97.51%, Confidence: 0.9706
Test Loss: 2.0104, Accuracy: 70.95%, Confidence: 0.9303
Train-Test Accuracy Gap: 26.56%


Epoch [382/700]:   3%|▎         | 21/782 [00:00<00:03, 206.82it/s]



Epoch [382/700]:   5%|▌         | 42/782 [00:00<00:03, 203.39it/s]



Epoch [382/700]:  11%|█         | 86/782 [00:00<00:03, 210.89it/s]



Epoch [382/700]:  17%|█▋        | 130/782 [00:00<00:03, 210.71it/s]



Epoch [382/700]:  22%|██▏       | 174/782 [00:00<00:02, 211.15it/s]



Epoch [382/700]:  28%|██▊       | 218/782 [00:01<00:02, 212.60it/s]



Epoch [382/700]:  31%|███       | 240/782 [00:01<00:02, 213.66it/s]



Epoch [382/700]:  34%|███▎      | 262/782 [00:01<00:02, 207.24it/s]



Epoch [382/700]:  39%|███▉      | 304/782 [00:01<00:02, 199.92it/s]



Epoch [382/700]:  42%|████▏     | 325/782 [00:01<00:02, 197.82it/s]



Epoch [382/700]:  44%|████▍     | 345/782 [00:01<00:02, 197.86it/s]



Epoch [382/700]:  47%|████▋     | 366/782 [00:01<00:02, 199.56it/s]



Epoch [382/700]:  49%|████▉     | 387/782 [00:01<00:01, 201.70it/s]



Epoch [382/700]:  52%|█████▏    | 408/782 [00:01<00:01, 198.07it/s]



Epoch [382/700]:  55%|█████▍    | 429/782 [00:02<00:01, 200.32it/s]



Epoch [382/700]:  58%|█████▊    | 450/782 [00:02<00:01, 200.30it/s]



Epoch [382/700]:  60%|██████    | 471/782 [00:02<00:01, 196.42it/s]



Epoch [382/700]:  63%|██████▎   | 492/782 [00:02<00:01, 200.10it/s]



Epoch [382/700]:  66%|██████▌   | 513/782 [00:02<00:01, 201.70it/s]



Epoch [382/700]:  68%|██████▊   | 534/782 [00:02<00:01, 203.94it/s]



Epoch [382/700]:  71%|███████   | 556/782 [00:02<00:01, 206.91it/s]



Epoch [382/700]:  74%|███████▍  | 577/782 [00:02<00:01, 204.82it/s]



Epoch [382/700]:  76%|███████▋  | 598/782 [00:02<00:00, 202.81it/s]



Epoch [382/700]:  79%|███████▉  | 619/782 [00:03<00:00, 200.88it/s]



Epoch [382/700]:  82%|████████▏ | 640/782 [00:03<00:00, 201.01it/s]



Epoch [382/700]:  85%|████████▍ | 661/782 [00:03<00:00, 199.45it/s]



Epoch [382/700]:  87%|████████▋ | 681/782 [00:03<00:00, 195.75it/s]



Epoch [382/700]:  90%|████████▉ | 702/782 [00:03<00:00, 197.80it/s]



Epoch [382/700]:  93%|█████████▎| 724/782 [00:03<00:00, 202.48it/s]



Epoch [382/700]:  98%|█████████▊| 767/782 [00:03<00:00, 202.85it/s]



Epoch [382/700]: 100%|██████████| 782/782 [00:03<00:00, 203.46it/s]


Learning Rate: 0.004500
Train Loss: 0.0659, Accuracy: 97.73%, Confidence: 0.9714
Test Loss: 2.1999, Accuracy: 70.78%, Confidence: 0.9370
Train-Test Accuracy Gap: 26.95%


Epoch [383/700]:   3%|▎         | 22/782 [00:00<00:03, 210.63it/s]



Epoch [383/700]:   6%|▌         | 44/782 [00:00<00:03, 213.68it/s]



Epoch [383/700]:   8%|▊         | 66/782 [00:00<00:03, 212.78it/s]



Epoch [383/700]:  11%|█▏        | 88/782 [00:00<00:03, 211.17it/s]



Epoch [383/700]:  14%|█▍        | 110/782 [00:00<00:03, 209.61it/s]



Epoch [383/700]:  17%|█▋        | 131/782 [00:00<00:03, 207.32it/s]



Epoch [383/700]:  20%|█▉        | 153/782 [00:00<00:03, 209.55it/s]



Epoch [383/700]:  28%|██▊       | 218/782 [00:01<00:02, 212.50it/s]



Epoch [383/700]:  34%|███▎      | 262/782 [00:01<00:02, 209.82it/s]



Epoch [383/700]:  39%|███▉      | 306/782 [00:01<00:02, 211.40it/s]



Epoch [383/700]:  45%|████▍     | 350/782 [00:01<00:02, 212.98it/s]



Epoch [383/700]:  50%|█████     | 394/782 [00:01<00:01, 211.51it/s]



Epoch [383/700]:  56%|█████▌    | 438/782 [00:02<00:01, 213.26it/s]



Epoch [383/700]:  62%|██████▏   | 482/782 [00:02<00:01, 212.53it/s]



Epoch [383/700]:  67%|██████▋   | 526/782 [00:02<00:01, 211.49it/s]



Epoch [383/700]:  73%|███████▎  | 570/782 [00:02<00:01, 211.87it/s]



Epoch [383/700]:  79%|███████▊  | 614/782 [00:02<00:00, 211.73it/s]



Epoch [383/700]:  84%|████████▍ | 658/782 [00:03<00:00, 211.18it/s]



Epoch [383/700]:  90%|████████▉ | 702/782 [00:03<00:00, 213.33it/s]



Epoch [383/700]:  96%|█████████▌| 747/782 [00:03<00:00, 211.53it/s]



Epoch [383/700]: 100%|██████████| 782/782 [00:03<00:00, 210.23it/s]


Learning Rate: 0.004500
Train Loss: 0.0649, Accuracy: 97.74%, Confidence: 0.9712
Test Loss: 1.8132, Accuracy: 73.52%, Confidence: 0.9350
Train-Test Accuracy Gap: 24.22%


Epoch [384/700]:   3%|▎         | 22/782 [00:00<00:03, 215.94it/s]



Epoch [384/700]:  11%|█▏        | 88/782 [00:00<00:03, 210.48it/s]



Epoch [384/700]:  17%|█▋        | 132/782 [00:00<00:03, 209.69it/s]



Epoch [384/700]:  22%|██▏       | 175/782 [00:00<00:02, 209.27it/s]



Epoch [384/700]:  28%|██▊       | 219/782 [00:01<00:02, 212.03it/s]



Epoch [384/700]:  34%|███▎      | 263/782 [00:01<00:02, 212.42it/s]



Epoch [384/700]:  39%|███▉      | 307/782 [00:01<00:02, 212.97it/s]



Epoch [384/700]:  45%|████▍     | 351/782 [00:01<00:02, 206.62it/s]



Epoch [384/700]:  50%|█████     | 394/782 [00:01<00:01, 207.13it/s]



Epoch [384/700]:  56%|█████▌    | 438/782 [00:02<00:01, 210.03it/s]



Epoch [384/700]:  62%|██████▏   | 481/782 [00:02<00:01, 207.73it/s]



Epoch [384/700]:  67%|██████▋   | 524/782 [00:02<00:01, 209.15it/s]



Epoch [384/700]:  73%|███████▎  | 567/782 [00:02<00:01, 211.59it/s]



Epoch [384/700]:  78%|███████▊  | 611/782 [00:02<00:00, 210.95it/s]



Epoch [384/700]:  84%|████████▍ | 655/782 [00:03<00:00, 210.80it/s]



Epoch [384/700]:  89%|████████▉ | 699/782 [00:03<00:00, 211.26it/s]



Epoch [384/700]:  95%|█████████▌| 743/782 [00:03<00:00, 207.43it/s]



Epoch [384/700]: 100%|██████████| 782/782 [00:03<00:00, 209.41it/s]


Learning Rate: 0.004500
Train Loss: 0.0776, Accuracy: 97.33%, Confidence: 0.9703
Test Loss: 2.0434, Accuracy: 70.38%, Confidence: 0.9310
Train-Test Accuracy Gap: 26.95%


Epoch [385/700]:   3%|▎         | 21/782 [00:00<00:03, 206.19it/s]



Epoch [385/700]:   5%|▌         | 43/782 [00:00<00:03, 209.80it/s]



Epoch [385/700]:  11%|█         | 86/782 [00:00<00:03, 209.23it/s]



Epoch [385/700]:  16%|█▋        | 129/782 [00:00<00:03, 207.85it/s]



Epoch [385/700]:  22%|██▏       | 172/782 [00:00<00:02, 209.54it/s]



Epoch [385/700]:  27%|██▋       | 215/782 [00:01<00:02, 210.66it/s]



Epoch [385/700]:  33%|███▎      | 258/782 [00:01<00:02, 204.07it/s]



Epoch [385/700]:  38%|███▊      | 300/782 [00:01<00:02, 199.46it/s]



Epoch [385/700]:  44%|████▎     | 342/782 [00:01<00:02, 204.58it/s]



Epoch [385/700]:  49%|████▉     | 385/782 [00:01<00:01, 206.46it/s]



Epoch [385/700]:  55%|█████▍    | 428/782 [00:02<00:01, 207.87it/s]



Epoch [385/700]:  60%|██████    | 470/782 [00:02<00:01, 200.82it/s]



Epoch [385/700]:  69%|██████▊   | 536/782 [00:02<00:01, 208.29it/s]



Epoch [385/700]:  74%|███████▍  | 578/782 [00:02<00:00, 206.60it/s]



Epoch [385/700]:  79%|███████▉  | 621/782 [00:03<00:00, 207.21it/s]



Epoch [385/700]:  85%|████████▍ | 663/782 [00:03<00:00, 206.40it/s]



Epoch [385/700]:  90%|█████████ | 706/782 [00:03<00:00, 208.22it/s]



Epoch [385/700]:  96%|█████████▌| 748/782 [00:03<00:00, 206.88it/s]



Epoch [385/700]: 100%|██████████| 782/782 [00:03<00:00, 206.16it/s]


Learning Rate: 0.004500
Train Loss: 0.0664, Accuracy: 97.67%, Confidence: 0.9717
Test Loss: 1.8923, Accuracy: 73.22%, Confidence: 0.9350
Train-Test Accuracy Gap: 24.45%


Epoch [386/700]:   3%|▎         | 22/782 [00:00<00:03, 210.94it/s]



Epoch [386/700]:   6%|▌         | 44/782 [00:00<00:03, 210.92it/s]



Epoch [386/700]:  11%|█▏        | 88/782 [00:00<00:03, 206.81it/s]



Epoch [386/700]:  14%|█▍        | 109/782 [00:00<00:03, 200.36it/s]



Epoch [386/700]:  17%|█▋        | 131/782 [00:00<00:03, 205.21it/s]



Epoch [386/700]:  22%|██▏       | 174/782 [00:00<00:02, 209.57it/s]



Epoch [386/700]:  28%|██▊       | 217/782 [00:01<00:02, 208.00it/s]



Epoch [386/700]:  33%|███▎      | 260/782 [00:01<00:02, 208.48it/s]



Epoch [386/700]:  39%|███▊      | 303/782 [00:01<00:02, 210.01it/s]



Epoch [386/700]:  44%|████▍     | 347/782 [00:01<00:02, 209.73it/s]



Epoch [386/700]:  50%|████▉     | 390/782 [00:01<00:01, 207.54it/s]



Epoch [386/700]:  55%|█████▌    | 433/782 [00:02<00:01, 208.68it/s]



Epoch [386/700]:  61%|██████    | 476/782 [00:02<00:01, 210.64it/s]



Epoch [386/700]:  66%|██████▋   | 519/782 [00:02<00:01, 207.47it/s]



Epoch [386/700]:  72%|███████▏  | 562/782 [00:02<00:01, 208.09it/s]



Epoch [386/700]:  77%|███████▋  | 605/782 [00:02<00:00, 209.72it/s]



Epoch [386/700]:  83%|████████▎ | 648/782 [00:03<00:00, 208.12it/s]



Epoch [386/700]:  88%|████████▊ | 690/782 [00:03<00:00, 206.50it/s]



Epoch [386/700]:  94%|█████████▎| 732/782 [00:03<00:00, 205.99it/s]



Epoch [386/700]: 100%|██████████| 782/782 [00:03<00:00, 207.07it/s]






Learning Rate: 0.004500
Train Loss: 0.0767, Accuracy: 97.29%, Confidence: 0.9700
Test Loss: 2.0278, Accuracy: 71.80%, Confidence: 0.9365
Train-Test Accuracy Gap: 25.49%


Epoch [387/700]:   3%|▎         | 21/782 [00:00<00:03, 207.76it/s]



Epoch [387/700]:  11%|█         | 87/782 [00:00<00:03, 214.00it/s]



Epoch [387/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.34it/s]



Epoch [387/700]:  22%|██▏       | 175/782 [00:00<00:02, 214.01it/s]



Epoch [387/700]:  31%|███       | 241/782 [00:01<00:02, 209.67it/s]



Epoch [387/700]:  34%|███▎      | 263/782 [00:01<00:02, 210.68it/s]



Epoch [387/700]:  39%|███▉      | 307/782 [00:01<00:02, 212.42it/s]



Epoch [387/700]:  48%|████▊     | 373/782 [00:01<00:01, 213.34it/s]



Epoch [387/700]:  53%|█████▎    | 417/782 [00:01<00:01, 212.46it/s]



Epoch [387/700]:  59%|█████▉    | 461/782 [00:02<00:01, 212.15it/s]



Epoch [387/700]:  65%|██████▍   | 505/782 [00:02<00:01, 212.81it/s]



Epoch [387/700]:  70%|███████   | 549/782 [00:02<00:01, 209.96it/s]



Epoch [387/700]:  76%|███████▌  | 593/782 [00:02<00:00, 209.49it/s]



Epoch [387/700]:  81%|████████▏ | 637/782 [00:03<00:00, 210.60it/s]



Epoch [387/700]:  87%|████████▋ | 681/782 [00:03<00:00, 209.99it/s]



Epoch [387/700]:  93%|█████████▎| 725/782 [00:03<00:00, 210.42it/s]



Epoch [387/700]:  98%|█████████▊| 768/782 [00:03<00:00, 206.94it/s]



Epoch [387/700]: 100%|██████████| 782/782 [00:03<00:00, 210.62it/s]


Learning Rate: 0.004500
Train Loss: 0.0608, Accuracy: 97.94%, Confidence: 0.9727
Test Loss: 1.9306, Accuracy: 73.07%, Confidence: 0.9365
Train-Test Accuracy Gap: 24.87%


Epoch [388/700]:   3%|▎         | 21/782 [00:00<00:03, 205.15it/s]



Epoch [388/700]:   8%|▊         | 64/782 [00:00<00:03, 206.67it/s]



Epoch [388/700]:  16%|█▋        | 129/782 [00:00<00:03, 211.40it/s]



Epoch [388/700]:  19%|█▉        | 151/782 [00:00<00:03, 207.88it/s]



Epoch [388/700]:  25%|██▍       | 194/782 [00:00<00:02, 208.71it/s]



Epoch [388/700]:  30%|███       | 238/782 [00:01<00:02, 212.58it/s]



Epoch [388/700]:  36%|███▌      | 282/782 [00:01<00:02, 209.51it/s]



Epoch [388/700]:  41%|████▏     | 324/782 [00:01<00:02, 206.73it/s]



Epoch [388/700]:  47%|████▋     | 366/782 [00:01<00:02, 206.66it/s]



Epoch [388/700]:  52%|█████▏    | 409/782 [00:01<00:01, 204.65it/s]



Epoch [388/700]:  58%|█████▊    | 452/782 [00:02<00:01, 207.93it/s]



Epoch [388/700]:  63%|██████▎   | 494/782 [00:02<00:01, 208.61it/s]



Epoch [388/700]:  69%|██████▊   | 537/782 [00:02<00:01, 208.79it/s]



Epoch [388/700]:  74%|███████▍  | 579/782 [00:02<00:01, 202.18it/s]



Epoch [388/700]:  79%|███████▉  | 621/782 [00:03<00:00, 202.88it/s]



Epoch [388/700]:  85%|████████▍ | 663/782 [00:03<00:00, 205.29it/s]



Epoch [388/700]:  90%|█████████ | 705/782 [00:03<00:00, 203.76it/s]



Epoch [388/700]:  96%|█████████▌| 747/782 [00:03<00:00, 204.79it/s]



Epoch [388/700]: 100%|██████████| 782/782 [00:03<00:00, 206.41it/s]


Learning Rate: 0.004500
Train Loss: 0.0773, Accuracy: 97.35%, Confidence: 0.9699
Test Loss: 2.2250, Accuracy: 70.79%, Confidence: 0.9336
Train-Test Accuracy Gap: 26.56%


Epoch [389/700]:   3%|▎         | 20/782 [00:00<00:03, 192.40it/s]



Epoch [389/700]:   5%|▌         | 41/782 [00:00<00:03, 198.57it/s]



Epoch [389/700]:   8%|▊         | 62/782 [00:00<00:03, 203.65it/s]



Epoch [389/700]:  11%|█         | 83/782 [00:00<00:03, 202.23it/s]



Epoch [389/700]:  13%|█▎        | 104/782 [00:00<00:03, 198.96it/s]



Epoch [389/700]:  16%|█▌        | 126/782 [00:00<00:03, 203.36it/s]



Epoch [389/700]:  22%|██▏       | 169/782 [00:00<00:02, 206.39it/s]



Epoch [389/700]:  24%|██▍       | 191/782 [00:00<00:02, 207.73it/s]



Epoch [389/700]:  27%|██▋       | 213/782 [00:01<00:02, 209.39it/s]



Epoch [389/700]:  33%|███▎      | 257/782 [00:01<00:02, 211.24it/s]



Epoch [389/700]:  38%|███▊      | 301/782 [00:01<00:02, 211.19it/s]



Epoch [389/700]:  44%|████▍     | 345/782 [00:01<00:02, 212.56it/s]



Epoch [389/700]:  50%|████▉     | 389/782 [00:01<00:01, 211.80it/s]



Epoch [389/700]:  55%|█████▌    | 433/782 [00:02<00:01, 209.29it/s]



Epoch [389/700]:  61%|██████    | 476/782 [00:02<00:01, 210.20it/s]



Epoch [389/700]:  66%|██████▋   | 519/782 [00:02<00:01, 201.20it/s]



Epoch [389/700]:  72%|███████▏  | 562/782 [00:02<00:01, 205.75it/s]



Epoch [389/700]:  77%|███████▋  | 604/782 [00:02<00:00, 206.11it/s]



Epoch [389/700]:  83%|████████▎ | 647/782 [00:03<00:00, 207.70it/s]



Epoch [389/700]:  88%|████████▊ | 690/782 [00:03<00:00, 203.83it/s]



Epoch [389/700]:  94%|█████████▎| 733/782 [00:03<00:00, 206.56it/s]



Epoch [389/700]:  99%|█████████▉| 775/782 [00:03<00:00, 203.33it/s]



Epoch [389/700]: 100%|██████████| 782/782 [00:03<00:00, 205.82it/s]


Learning Rate: 0.004500
Train Loss: 0.0767, Accuracy: 97.38%, Confidence: 0.9709
Test Loss: 1.9952, Accuracy: 71.80%, Confidence: 0.9329
Train-Test Accuracy Gap: 25.58%


Epoch [390/700]:   3%|▎         | 21/782 [00:00<00:03, 202.31it/s]



Epoch [390/700]:  11%|█         | 84/782 [00:00<00:03, 208.10it/s]



Epoch [390/700]:  16%|█▌        | 127/782 [00:00<00:03, 208.41it/s]



Epoch [390/700]:  22%|██▏       | 170/782 [00:00<00:02, 210.42it/s]



Epoch [390/700]:  27%|██▋       | 214/782 [00:01<00:02, 213.41it/s]



Epoch [390/700]:  36%|███▌      | 280/782 [00:01<00:02, 209.74it/s]



Epoch [390/700]:  44%|████▍     | 344/782 [00:01<00:02, 209.14it/s]



Epoch [390/700]:  49%|████▉     | 387/782 [00:01<00:01, 210.01it/s]



Epoch [390/700]:  55%|█████▌    | 432/782 [00:02<00:01, 213.90it/s]



Epoch [390/700]:  61%|██████    | 476/782 [00:02<00:01, 215.21it/s]



Epoch [390/700]:  66%|██████▋   | 520/782 [00:02<00:01, 210.11it/s]



Epoch [390/700]:  72%|███████▏  | 564/782 [00:02<00:01, 208.76it/s]



Epoch [390/700]:  78%|███████▊  | 607/782 [00:02<00:00, 206.85it/s]



Epoch [390/700]:  83%|████████▎ | 649/782 [00:03<00:00, 203.98it/s]



Epoch [390/700]:  88%|████████▊ | 692/782 [00:03<00:00, 204.84it/s]



Epoch [390/700]:  94%|█████████▍| 735/782 [00:03<00:00, 208.43it/s]



Epoch [390/700]: 100%|██████████| 782/782 [00:03<00:00, 209.21it/s]






Learning Rate: 0.004500
Train Loss: 0.0593, Accuracy: 98.03%, Confidence: 0.9734
Test Loss: 2.0683, Accuracy: 72.33%, Confidence: 0.9382
Train-Test Accuracy Gap: 25.70%


Epoch [391/700]:   3%|▎         | 22/782 [00:00<00:03, 211.09it/s]



Epoch [391/700]:   6%|▌         | 44/782 [00:00<00:03, 211.19it/s]



Epoch [391/700]:   8%|▊         | 66/782 [00:00<00:03, 208.02it/s]



Epoch [391/700]:  11%|█         | 87/782 [00:00<00:03, 208.51it/s]



Epoch [391/700]:  17%|█▋        | 130/782 [00:00<00:03, 208.70it/s]



Epoch [391/700]:  22%|██▏       | 172/782 [00:00<00:02, 206.39it/s]



Epoch [391/700]:  28%|██▊       | 216/782 [00:01<00:02, 210.59it/s]



Epoch [391/700]:  33%|███▎      | 260/782 [00:01<00:02, 208.18it/s]



Epoch [391/700]:  39%|███▊      | 303/782 [00:01<00:02, 207.09it/s]



Epoch [391/700]:  44%|████▍     | 345/782 [00:01<00:02, 205.34it/s]



Epoch [391/700]:  49%|████▉     | 387/782 [00:01<00:01, 204.51it/s]



Epoch [391/700]:  55%|█████▍    | 429/782 [00:02<00:01, 204.67it/s]



Epoch [391/700]:  60%|██████    | 472/782 [00:02<00:01, 206.61it/s]



Epoch [391/700]:  66%|██████▌   | 515/782 [00:02<00:01, 208.31it/s]



Epoch [391/700]:  71%|███████   | 557/782 [00:02<00:01, 206.00it/s]



Epoch [391/700]:  77%|███████▋  | 599/782 [00:02<00:00, 206.67it/s]



Epoch [391/700]:  82%|████████▏ | 642/782 [00:03<00:00, 208.43it/s]



Epoch [391/700]:  87%|████████▋ | 684/782 [00:03<00:00, 203.87it/s]



Epoch [391/700]:  93%|█████████▎| 727/782 [00:03<00:00, 206.26it/s]



Epoch [391/700]:  99%|█████████▊| 771/782 [00:03<00:00, 208.50it/s]



Epoch [391/700]: 100%|██████████| 782/782 [00:03<00:00, 206.37it/s]


Learning Rate: 0.004500
Train Loss: 0.0699, Accuracy: 97.66%, Confidence: 0.9716
Test Loss: 1.9724, Accuracy: 73.11%, Confidence: 0.9378
Train-Test Accuracy Gap: 24.55%


Epoch [392/700]:   3%|▎         | 21/782 [00:00<00:03, 204.71it/s]



Epoch [392/700]:   5%|▌         | 43/782 [00:00<00:03, 207.09it/s]



Epoch [392/700]:   8%|▊         | 64/782 [00:00<00:03, 204.53it/s]



Epoch [392/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.20it/s]



Epoch [392/700]:  22%|██▏       | 170/782 [00:00<00:02, 210.31it/s]



Epoch [392/700]:  30%|███       | 236/782 [00:01<00:02, 212.51it/s]



Epoch [392/700]:  36%|███▌      | 280/782 [00:01<00:02, 211.65it/s]



Epoch [392/700]:  41%|████▏     | 324/782 [00:01<00:02, 213.67it/s]



Epoch [392/700]:  47%|████▋     | 368/782 [00:01<00:01, 207.68it/s]



Epoch [392/700]:  53%|█████▎    | 412/782 [00:01<00:01, 209.12it/s]



Epoch [392/700]:  58%|█████▊    | 456/782 [00:02<00:01, 210.33it/s]



Epoch [392/700]:  64%|██████▍   | 500/782 [00:02<00:01, 210.71it/s]



Epoch [392/700]:  70%|██████▉   | 544/782 [00:02<00:01, 210.26it/s]



Epoch [392/700]:  75%|███████▌  | 588/782 [00:02<00:00, 212.78it/s]



Epoch [392/700]:  81%|████████  | 632/782 [00:03<00:00, 211.02it/s]



Epoch [392/700]:  86%|████████▋ | 676/782 [00:03<00:00, 211.79it/s]



Epoch [392/700]:  92%|█████████▏| 720/782 [00:03<00:00, 212.77it/s]



Epoch [392/700]:  98%|█████████▊| 764/782 [00:03<00:00, 211.06it/s]



Epoch [392/700]: 100%|██████████| 782/782 [00:03<00:00, 210.37it/s]


Learning Rate: 0.004500
Train Loss: 0.0700, Accuracy: 97.57%, Confidence: 0.9718
Test Loss: 1.9389, Accuracy: 72.11%, Confidence: 0.9360
Train-Test Accuracy Gap: 25.46%


Epoch [393/700]:   3%|▎         | 21/782 [00:00<00:03, 209.87it/s]



Epoch [393/700]:   5%|▌         | 43/782 [00:00<00:03, 210.34it/s]



Epoch [393/700]:   8%|▊         | 65/782 [00:00<00:03, 208.68it/s]



Epoch [393/700]:  11%|█         | 87/782 [00:00<00:03, 210.80it/s]



Epoch [393/700]:  14%|█▍        | 109/782 [00:00<00:03, 206.31it/s]



Epoch [393/700]:  17%|█▋        | 131/782 [00:00<00:03, 209.45it/s]



Epoch [393/700]:  19%|█▉        | 152/782 [00:00<00:03, 206.78it/s]



Epoch [393/700]:  22%|██▏       | 174/782 [00:00<00:02, 209.47it/s]



Epoch [393/700]:  28%|██▊       | 217/782 [00:01<00:02, 211.14it/s]



Epoch [393/700]:  33%|███▎      | 260/782 [00:01<00:02, 207.12it/s]



Epoch [393/700]:  39%|███▊      | 303/782 [00:01<00:02, 208.75it/s]



Epoch [393/700]:  44%|████▍     | 346/782 [00:01<00:02, 208.52it/s]



Epoch [393/700]:  50%|████▉     | 389/782 [00:01<00:01, 208.21it/s]



Epoch [393/700]:  55%|█████▌    | 431/782 [00:02<00:01, 207.73it/s]



Epoch [393/700]:  61%|██████    | 474/782 [00:02<00:01, 207.33it/s]



Epoch [393/700]:  66%|██████▌   | 518/782 [00:02<00:01, 210.24it/s]



Epoch [393/700]:  69%|██████▉   | 540/782 [00:02<00:01, 209.71it/s]



Epoch [393/700]:  72%|███████▏  | 561/782 [00:02<00:01, 207.13it/s]



Epoch [393/700]:  77%|███████▋  | 603/782 [00:02<00:00, 204.36it/s]



Epoch [393/700]:  80%|███████▉  | 624/782 [00:03<00:00, 205.83it/s]



Epoch [393/700]:  83%|████████▎ | 646/782 [00:03<00:00, 207.88it/s]



Epoch [393/700]:  85%|████████▌ | 667/782 [00:03<00:00, 207.94it/s]



Epoch [393/700]:  88%|████████▊ | 689/782 [00:03<00:00, 209.79it/s]



Epoch [393/700]:  91%|█████████ | 710/782 [00:03<00:00, 209.76it/s]



Epoch [393/700]:  93%|█████████▎| 731/782 [00:03<00:00, 206.92it/s]



Epoch [393/700]:  96%|█████████▌| 752/782 [00:03<00:00, 207.19it/s]



Epoch [393/700]: 100%|██████████| 782/782 [00:03<00:00, 207.66it/s]






Learning Rate: 0.004500
Train Loss: 0.0691, Accuracy: 97.67%, Confidence: 0.9717
Test Loss: 1.8878, Accuracy: 72.64%, Confidence: 0.9323
Train-Test Accuracy Gap: 25.03%


Epoch [394/700]:   3%|▎         | 20/782 [00:00<00:03, 198.97it/s]



Epoch [394/700]:   5%|▌         | 42/782 [00:00<00:03, 206.03it/s]



Epoch [394/700]:  11%|█         | 86/782 [00:00<00:03, 213.15it/s]



Epoch [394/700]:  17%|█▋        | 130/782 [00:00<00:03, 214.68it/s]



Epoch [394/700]:  19%|█▉        | 152/782 [00:00<00:02, 214.93it/s]



Epoch [394/700]:  22%|██▏       | 174/782 [00:00<00:02, 212.86it/s]



Epoch [394/700]:  25%|██▌       | 196/782 [00:00<00:02, 212.15it/s]



Epoch [394/700]:  28%|██▊       | 218/782 [00:01<00:02, 210.76it/s]



Epoch [394/700]:  31%|███       | 240/782 [00:01<00:02, 208.45it/s]



Epoch [394/700]:  33%|███▎      | 261/782 [00:01<00:02, 207.52it/s]



Epoch [394/700]:  36%|███▌      | 282/782 [00:01<00:02, 203.75it/s]



Epoch [394/700]:  39%|███▉      | 304/782 [00:01<00:02, 206.86it/s]



Epoch [394/700]:  42%|████▏     | 325/782 [00:01<00:02, 206.50it/s]



Epoch [394/700]:  44%|████▍     | 346/782 [00:01<00:02, 205.93it/s]



Epoch [394/700]:  47%|████▋     | 367/782 [00:01<00:02, 202.90it/s]



Epoch [394/700]:  50%|████▉     | 388/782 [00:01<00:01, 204.04it/s]



Epoch [394/700]:  52%|█████▏    | 409/782 [00:01<00:01, 205.34it/s]



Epoch [394/700]:  55%|█████▍    | 430/782 [00:02<00:01, 205.88it/s]



Epoch [394/700]:  58%|█████▊    | 451/782 [00:02<00:01, 206.57it/s]



Epoch [394/700]:  60%|██████    | 472/782 [00:02<00:01, 205.78it/s]



Epoch [394/700]:  63%|██████▎   | 493/782 [00:02<00:01, 203.40it/s]



Epoch [394/700]:  66%|██████▌   | 514/782 [00:02<00:01, 203.45it/s]



Epoch [394/700]:  68%|██████▊   | 535/782 [00:02<00:01, 204.46it/s]



Epoch [394/700]:  71%|███████   | 556/782 [00:02<00:01, 205.42it/s]



Epoch [394/700]:  74%|███████▍  | 577/782 [00:02<00:01, 200.12it/s]



Epoch [394/700]:  76%|███████▋  | 598/782 [00:02<00:00, 202.06it/s]



Epoch [394/700]:  82%|████████▏ | 640/782 [00:03<00:00, 201.92it/s]



Epoch [394/700]:  87%|████████▋ | 682/782 [00:03<00:00, 202.94it/s]



Epoch [394/700]:  93%|█████████▎| 726/782 [00:03<00:00, 207.63it/s]



Epoch [394/700]:  96%|█████████▌| 748/782 [00:03<00:00, 209.26it/s]



Epoch [394/700]: 100%|██████████| 782/782 [00:03<00:00, 206.54it/s]






Learning Rate: 0.004500
Train Loss: 0.0653, Accuracy: 97.82%, Confidence: 0.9725
Test Loss: 1.8935, Accuracy: 72.66%, Confidence: 0.9363
Train-Test Accuracy Gap: 25.16%


Epoch [395/700]:   3%|▎         | 20/782 [00:00<00:03, 196.17it/s]



Epoch [395/700]:   5%|▌         | 42/782 [00:00<00:03, 204.64it/s]



Epoch [395/700]:   8%|▊         | 63/782 [00:00<00:03, 200.81it/s]



Epoch [395/700]:  11%|█         | 85/782 [00:00<00:03, 205.45it/s]



Epoch [395/700]:  14%|█▎        | 107/782 [00:00<00:03, 209.76it/s]



Epoch [395/700]:  22%|██▏       | 173/782 [00:00<00:02, 212.11it/s]



Epoch [395/700]:  28%|██▊       | 217/782 [00:01<00:02, 209.26it/s]



Epoch [395/700]:  34%|███▎      | 262/782 [00:01<00:02, 213.15it/s]



Epoch [395/700]:  39%|███▉      | 306/782 [00:01<00:02, 214.41it/s]



Epoch [395/700]:  45%|████▍     | 350/782 [00:01<00:01, 216.21it/s]



Epoch [395/700]:  50%|█████     | 394/782 [00:01<00:01, 214.32it/s]



Epoch [395/700]:  56%|█████▌    | 438/782 [00:02<00:01, 211.28it/s]



Epoch [395/700]:  62%|██████▏   | 482/782 [00:02<00:01, 211.90it/s]



Epoch [395/700]:  67%|██████▋   | 526/782 [00:02<00:01, 210.30it/s]



Epoch [395/700]:  73%|███████▎  | 570/782 [00:02<00:00, 213.33it/s]



Epoch [395/700]:  79%|███████▊  | 614/782 [00:02<00:00, 213.73it/s]



Epoch [395/700]:  84%|████████▍ | 658/782 [00:03<00:00, 212.30it/s]



Epoch [395/700]:  90%|████████▉ | 702/782 [00:03<00:00, 213.61it/s]



Epoch [395/700]:  95%|█████████▌| 746/782 [00:03<00:00, 214.13it/s]



Epoch [395/700]: 100%|██████████| 782/782 [00:03<00:00, 212.07it/s]


Learning Rate: 0.004500
Train Loss: 0.0646, Accuracy: 97.76%, Confidence: 0.9729
Test Loss: 1.9169, Accuracy: 72.73%, Confidence: 0.9376
Train-Test Accuracy Gap: 25.03%


Epoch [396/700]:   3%|▎         | 21/782 [00:00<00:03, 203.12it/s]



Epoch [396/700]:   5%|▌         | 42/782 [00:00<00:03, 204.75it/s]



Epoch [396/700]:   8%|▊         | 64/782 [00:00<00:03, 208.77it/s]



Epoch [396/700]:  17%|█▋        | 130/782 [00:00<00:03, 215.44it/s]



Epoch [396/700]:  22%|██▏       | 174/782 [00:00<00:02, 211.68it/s]



Epoch [396/700]:  31%|███       | 240/782 [00:01<00:02, 213.00it/s]



Epoch [396/700]:  36%|███▋      | 284/782 [00:01<00:02, 208.96it/s]



Epoch [396/700]:  42%|████▏     | 327/782 [00:01<00:02, 210.74it/s]



Epoch [396/700]:  47%|████▋     | 370/782 [00:01<00:02, 204.48it/s]



Epoch [396/700]:  53%|█████▎    | 413/782 [00:01<00:01, 205.64it/s]



Epoch [396/700]:  58%|█████▊    | 456/782 [00:02<00:01, 208.21it/s]



Epoch [396/700]:  64%|██████▍   | 499/782 [00:02<00:01, 210.06it/s]



Epoch [396/700]:  69%|██████▉   | 543/782 [00:02<00:01, 211.93it/s]



Epoch [396/700]:  75%|███████▌  | 587/782 [00:02<00:00, 211.75it/s]



Epoch [396/700]:  81%|████████  | 631/782 [00:03<00:00, 212.69it/s]



Epoch [396/700]:  86%|████████▋ | 675/782 [00:03<00:00, 210.88it/s]



Epoch [396/700]:  95%|█████████▍| 741/782 [00:03<00:00, 210.93it/s]



Epoch [396/700]: 100%|██████████| 782/782 [00:03<00:00, 209.95it/s]


Learning Rate: 0.004500
Train Loss: 0.0616, Accuracy: 97.92%, Confidence: 0.9737
Test Loss: 1.9562, Accuracy: 72.51%, Confidence: 0.9355
Train-Test Accuracy Gap: 25.41%


Epoch [397/700]:   3%|▎         | 21/782 [00:00<00:03, 208.39it/s]



Epoch [397/700]:   5%|▌         | 43/782 [00:00<00:03, 210.52it/s]



Epoch [397/700]:   8%|▊         | 65/782 [00:00<00:03, 206.62it/s]



Epoch [397/700]:  11%|█         | 87/782 [00:00<00:03, 210.52it/s]



Epoch [397/700]:  17%|█▋        | 130/782 [00:00<00:03, 209.41it/s]



Epoch [397/700]:  22%|██▏       | 173/782 [00:00<00:02, 211.28it/s]



Epoch [397/700]:  28%|██▊       | 217/782 [00:01<00:02, 211.30it/s]



Epoch [397/700]:  33%|███▎      | 260/782 [00:01<00:02, 204.65it/s]



Epoch [397/700]:  39%|███▊      | 302/782 [00:01<00:02, 201.31it/s]



Epoch [397/700]:  44%|████▍     | 345/782 [00:01<00:02, 205.52it/s]



Epoch [397/700]:  49%|████▉     | 387/782 [00:01<00:01, 204.68it/s]



Epoch [397/700]:  55%|█████▍    | 429/782 [00:02<00:01, 200.27it/s]



Epoch [397/700]:  58%|█████▊    | 450/782 [00:02<00:01, 197.71it/s]



Epoch [397/700]:  60%|██████    | 472/782 [00:02<00:01, 201.62it/s]



Epoch [397/700]:  63%|██████▎   | 493/782 [00:02<00:01, 202.44it/s]



Epoch [397/700]:  66%|██████▌   | 514/782 [00:02<00:01, 203.80it/s]



Epoch [397/700]:  68%|██████▊   | 535/782 [00:02<00:01, 203.34it/s]



Epoch [397/700]:  71%|███████   | 557/782 [00:02<00:01, 205.68it/s]



Epoch [397/700]:  74%|███████▍  | 578/782 [00:02<00:00, 206.02it/s]



Epoch [397/700]:  77%|███████▋  | 599/782 [00:02<00:00, 204.93it/s]



Epoch [397/700]:  79%|███████▉  | 620/782 [00:03<00:00, 203.91it/s]



Epoch [397/700]:  82%|████████▏ | 641/782 [00:03<00:00, 202.01it/s]



Epoch [397/700]:  87%|████████▋ | 683/782 [00:03<00:00, 202.71it/s]



Epoch [397/700]:  93%|█████████▎| 725/782 [00:03<00:00, 203.41it/s]



Epoch [397/700]:  98%|█████████▊| 768/782 [00:03<00:00, 205.52it/s]



Epoch [397/700]: 100%|██████████| 782/782 [00:03<00:00, 204.52it/s]


Learning Rate: 0.004500
Train Loss: 0.0724, Accuracy: 97.55%, Confidence: 0.9719
Test Loss: 1.9459, Accuracy: 72.49%, Confidence: 0.9360
Train-Test Accuracy Gap: 25.06%


Epoch [398/700]:   3%|▎         | 21/782 [00:00<00:03, 203.96it/s]



Epoch [398/700]:   5%|▌         | 43/782 [00:00<00:03, 208.60it/s]



Epoch [398/700]:   8%|▊         | 64/782 [00:00<00:03, 207.56it/s]



Epoch [398/700]:  11%|█         | 86/782 [00:00<00:03, 209.00it/s]



Epoch [398/700]:  14%|█▎        | 107/782 [00:00<00:03, 209.15it/s]



Epoch [398/700]:  16%|█▋        | 129/782 [00:00<00:03, 212.33it/s]



Epoch [398/700]:  19%|█▉        | 151/782 [00:00<00:03, 209.16it/s]



Epoch [398/700]:  22%|██▏       | 173/782 [00:00<00:02, 209.50it/s]



Epoch [398/700]:  25%|██▍       | 195/782 [00:00<00:02, 210.91it/s]



Epoch [398/700]:  33%|███▎      | 260/782 [00:01<00:02, 210.52it/s]



Epoch [398/700]:  39%|███▉      | 304/782 [00:01<00:02, 207.62it/s]



Epoch [398/700]:  44%|████▍     | 347/782 [00:01<00:02, 210.42it/s]



Epoch [398/700]:  50%|█████     | 391/782 [00:01<00:01, 210.11it/s]



Epoch [398/700]:  56%|█████▌    | 435/782 [00:02<00:01, 212.07it/s]



Epoch [398/700]:  61%|██████▏   | 479/782 [00:02<00:01, 211.75it/s]



Epoch [398/700]:  67%|██████▋   | 523/782 [00:02<00:01, 209.28it/s]



Epoch [398/700]:  72%|███████▏  | 566/782 [00:02<00:01, 209.89it/s]



Epoch [398/700]:  78%|███████▊  | 608/782 [00:02<00:00, 207.02it/s]



Epoch [398/700]:  83%|████████▎ | 650/782 [00:03<00:00, 203.12it/s]



Epoch [398/700]:  89%|████████▊ | 693/782 [00:03<00:00, 206.41it/s]



Epoch [398/700]:  97%|█████████▋| 759/782 [00:03<00:00, 210.31it/s]



Epoch [398/700]: 100%|██████████| 782/782 [00:03<00:00, 209.03it/s]


Learning Rate: 0.004500
Train Loss: 0.0690, Accuracy: 97.58%, Confidence: 0.9721
Test Loss: 1.9239, Accuracy: 72.75%, Confidence: 0.9352
Train-Test Accuracy Gap: 24.83%


Epoch [399/700]:   3%|▎         | 21/782 [00:00<00:03, 206.61it/s]



Epoch [399/700]:   5%|▌         | 42/782 [00:00<00:03, 208.49it/s]



Epoch [399/700]:   8%|▊         | 63/782 [00:00<00:03, 205.92it/s]



Epoch [399/700]:  11%|█         | 85/782 [00:00<00:03, 208.72it/s]



Epoch [399/700]:  14%|█▎        | 106/782 [00:00<00:03, 203.63it/s]



Epoch [399/700]:  16%|█▌        | 127/782 [00:00<00:03, 205.46it/s]



Epoch [399/700]:  19%|█▉        | 148/782 [00:00<00:03, 204.68it/s]



Epoch [399/700]:  22%|██▏       | 169/782 [00:00<00:02, 206.14it/s]



Epoch [399/700]:  24%|██▍       | 190/782 [00:00<00:02, 206.44it/s]



Epoch [399/700]:  27%|██▋       | 211/782 [00:01<00:02, 204.26it/s]



Epoch [399/700]:  30%|██▉       | 232/782 [00:01<00:02, 205.30it/s]



Epoch [399/700]:  32%|███▏      | 253/782 [00:01<00:02, 205.08it/s]



Epoch [399/700]:  35%|███▌      | 275/782 [00:01<00:02, 207.97it/s]



Epoch [399/700]:  38%|███▊      | 297/782 [00:01<00:02, 209.28it/s]



Epoch [399/700]:  41%|████      | 319/782 [00:01<00:02, 211.24it/s]



Epoch [399/700]:  44%|████▎     | 341/782 [00:01<00:02, 209.41it/s]



Epoch [399/700]:  46%|████▋     | 363/782 [00:01<00:01, 210.81it/s]



Epoch [399/700]:  49%|████▉     | 385/782 [00:01<00:01, 211.43it/s]



Epoch [399/700]:  52%|█████▏    | 407/782 [00:01<00:01, 210.66it/s]



Epoch [399/700]:  55%|█████▍    | 429/782 [00:02<00:01, 209.33it/s]



Epoch [399/700]:  58%|█████▊    | 450/782 [00:02<00:01, 208.71it/s]



Epoch [399/700]:  60%|██████    | 472/782 [00:02<00:01, 209.72it/s]



Epoch [399/700]:  66%|██████▌   | 515/782 [00:02<00:01, 211.04it/s]



Epoch [399/700]:  71%|███████▏  | 559/782 [00:02<00:01, 211.45it/s]



Epoch [399/700]:  77%|███████▋  | 603/782 [00:02<00:00, 207.48it/s]



Epoch [399/700]:  82%|████████▏ | 645/782 [00:03<00:00, 206.69it/s]



Epoch [399/700]:  88%|████████▊ | 688/782 [00:03<00:00, 209.34it/s]



Epoch [399/700]:  93%|█████████▎| 730/782 [00:03<00:00, 203.11it/s]



Epoch [399/700]:  99%|█████████▊| 772/782 [00:03<00:00, 205.03it/s]



Epoch [399/700]: 100%|██████████| 782/782 [00:03<00:00, 207.11it/s]


Learning Rate: 0.004500
Train Loss: 0.0741, Accuracy: 97.52%, Confidence: 0.9715
Test Loss: 1.7575, Accuracy: 74.35%, Confidence: 0.9357
Train-Test Accuracy Gap: 23.17%


Epoch [400/700]:   3%|▎         | 21/782 [00:00<00:03, 208.06it/s]



Epoch [400/700]:  11%|█         | 87/782 [00:00<00:03, 214.52it/s]



Epoch [400/700]:  17%|█▋        | 130/782 [00:00<00:03, 207.89it/s]



Epoch [400/700]:  22%|██▏       | 173/782 [00:00<00:02, 208.22it/s]



Epoch [400/700]:  28%|██▊       | 216/782 [00:01<00:02, 210.64it/s]



Epoch [400/700]:  33%|███▎      | 259/782 [00:01<00:02, 206.92it/s]



Epoch [400/700]:  39%|███▊      | 302/782 [00:01<00:02, 207.70it/s]



Epoch [400/700]:  44%|████▍     | 346/782 [00:01<00:02, 212.41it/s]



Epoch [400/700]:  50%|████▉     | 390/782 [00:01<00:01, 210.62it/s]



Epoch [400/700]:  55%|█████▌    | 434/782 [00:02<00:01, 206.21it/s]



Epoch [400/700]:  61%|██████    | 477/782 [00:02<00:01, 207.22it/s]



Epoch [400/700]:  66%|██████▋   | 519/782 [00:02<00:01, 204.42it/s]



Epoch [400/700]:  72%|███████▏  | 562/782 [00:02<00:01, 204.74it/s]



Epoch [400/700]:  77%|███████▋  | 604/782 [00:02<00:00, 206.00it/s]



Epoch [400/700]:  85%|████████▌ | 667/782 [00:03<00:00, 197.53it/s]



Epoch [400/700]:  91%|█████████ | 709/782 [00:03<00:00, 199.25it/s]



Epoch [400/700]:  96%|█████████▌| 752/782 [00:03<00:00, 206.89it/s]



Epoch [400/700]: 100%|██████████| 782/782 [00:03<00:00, 206.74it/s]


Learning Rate: 0.004500
Train Loss: 0.0672, Accuracy: 97.69%, Confidence: 0.9726
Test Loss: 1.8083, Accuracy: 74.00%, Confidence: 0.9349
Train-Test Accuracy Gap: 23.69%


Epoch [401/700]:   5%|▍         | 39/782 [00:00<00:03, 189.03it/s]



Epoch [401/700]:  10%|█         | 79/782 [00:00<00:03, 197.14it/s]



Epoch [401/700]:  15%|█▌        | 120/782 [00:00<00:03, 198.34it/s]



Epoch [401/700]:  21%|██        | 162/782 [00:00<00:03, 202.58it/s]



Epoch [401/700]:  26%|██▌       | 205/782 [00:01<00:02, 204.57it/s]



Epoch [401/700]:  32%|███▏      | 247/782 [00:01<00:02, 204.29it/s]



Epoch [401/700]:  37%|███▋      | 290/782 [00:01<00:02, 206.75it/s]



Epoch [401/700]:  42%|████▏     | 332/782 [00:01<00:02, 203.33it/s]



Epoch [401/700]:  48%|████▊     | 374/782 [00:01<00:01, 205.77it/s]



Epoch [401/700]:  53%|█████▎    | 417/782 [00:02<00:01, 206.18it/s]



Epoch [401/700]:  61%|██████▏   | 480/782 [00:02<00:01, 204.68it/s]



Epoch [401/700]:  67%|██████▋   | 523/782 [00:02<00:01, 207.53it/s]



Epoch [401/700]:  72%|███████▏  | 565/782 [00:02<00:01, 201.59it/s]



Epoch [401/700]:  78%|███████▊  | 607/782 [00:02<00:00, 202.70it/s]



Epoch [401/700]:  83%|████████▎ | 649/782 [00:03<00:00, 202.80it/s]



Epoch [401/700]:  88%|████████▊ | 691/782 [00:03<00:00, 203.36it/s]



Epoch [401/700]:  94%|█████████▍| 734/782 [00:03<00:00, 205.45it/s]



Epoch [401/700]: 100%|██████████| 782/782 [00:03<00:00, 202.64it/s]






Learning Rate: 0.004500
Train Loss: 0.0670, Accuracy: 97.71%, Confidence: 0.9728
Test Loss: 1.8425, Accuracy: 73.69%, Confidence: 0.9372
Train-Test Accuracy Gap: 24.02%


Epoch [402/700]:   3%|▎         | 20/782 [00:00<00:03, 193.98it/s]



Epoch [402/700]:   5%|▌         | 41/782 [00:00<00:03, 202.15it/s]



Epoch [402/700]:   8%|▊         | 63/782 [00:00<00:03, 207.07it/s]



Epoch [402/700]:  11%|█         | 85/782 [00:00<00:03, 209.18it/s]



Epoch [402/700]:  14%|█▎        | 107/782 [00:00<00:03, 210.41it/s]



Epoch [402/700]:  16%|█▋        | 129/782 [00:00<00:03, 211.04it/s]



Epoch [402/700]:  19%|█▉        | 151/782 [00:00<00:02, 211.89it/s]



Epoch [402/700]:  22%|██▏       | 173/782 [00:00<00:02, 212.70it/s]



Epoch [402/700]:  25%|██▍       | 195/782 [00:00<00:02, 213.69it/s]



Epoch [402/700]:  31%|███       | 239/782 [00:01<00:02, 211.01it/s]



Epoch [402/700]:  33%|███▎      | 261/782 [00:01<00:02, 211.14it/s]



Epoch [402/700]:  36%|███▌      | 283/782 [00:01<00:02, 208.05it/s]



Epoch [402/700]:  39%|███▉      | 304/782 [00:01<00:02, 204.02it/s]



Epoch [402/700]:  42%|████▏     | 325/782 [00:01<00:02, 202.91it/s]



Epoch [402/700]:  44%|████▍     | 346/782 [00:01<00:02, 203.53it/s]



Epoch [402/700]:  47%|████▋     | 367/782 [00:01<00:02, 202.19it/s]



Epoch [402/700]:  50%|████▉     | 389/782 [00:01<00:01, 205.90it/s]



Epoch [402/700]:  52%|█████▏    | 410/782 [00:01<00:01, 205.77it/s]



Epoch [402/700]:  55%|█████▌    | 431/782 [00:02<00:01, 205.31it/s]



Epoch [402/700]:  58%|█████▊    | 452/782 [00:02<00:01, 205.51it/s]



Epoch [402/700]:  60%|██████    | 473/782 [00:02<00:01, 206.67it/s]



Epoch [402/700]:  63%|██████▎   | 494/782 [00:02<00:01, 206.91it/s]



Epoch [402/700]:  66%|██████▌   | 515/782 [00:02<00:01, 207.04it/s]



Epoch [402/700]:  69%|██████▊   | 537/782 [00:02<00:01, 208.29it/s]



Epoch [402/700]:  71%|███████▏  | 559/782 [00:02<00:01, 208.57it/s]



Epoch [402/700]:  74%|███████▍  | 580/782 [00:02<00:00, 207.42it/s]



Epoch [402/700]:  77%|███████▋  | 601/782 [00:02<00:00, 206.39it/s]



Epoch [402/700]:  82%|████████▏ | 643/782 [00:03<00:00, 204.84it/s]



Epoch [402/700]:  88%|████████▊ | 686/782 [00:03<00:00, 207.78it/s]



Epoch [402/700]:  93%|█████████▎| 729/782 [00:03<00:00, 206.76it/s]



Epoch [402/700]:  99%|█████████▊| 771/782 [00:03<00:00, 201.10it/s]



Epoch [402/700]: 100%|██████████| 782/782 [00:03<00:00, 206.39it/s]


Learning Rate: 0.004500
Train Loss: 0.0669, Accuracy: 97.69%, Confidence: 0.9725
Test Loss: 1.9960, Accuracy: 71.58%, Confidence: 0.9348
Train-Test Accuracy Gap: 26.11%


Epoch [403/700]:   3%|▎         | 21/782 [00:00<00:03, 206.75it/s]



Epoch [403/700]:  11%|█         | 86/782 [00:00<00:03, 204.18it/s]



Epoch [403/700]:  16%|█▋        | 128/782 [00:00<00:03, 202.21it/s]



Epoch [403/700]:  22%|██▏       | 171/782 [00:00<00:02, 206.66it/s]



Epoch [403/700]:  27%|██▋       | 214/782 [00:01<00:02, 205.77it/s]



Epoch [403/700]:  33%|███▎      | 257/782 [00:01<00:02, 206.97it/s]



Epoch [403/700]:  38%|███▊      | 299/782 [00:01<00:02, 207.78it/s]



Epoch [403/700]:  44%|████▎     | 342/782 [00:01<00:02, 208.47it/s]



Epoch [403/700]:  49%|████▉     | 384/782 [00:01<00:01, 207.78it/s]



Epoch [403/700]:  54%|█████▍    | 426/782 [00:02<00:01, 207.70it/s]



Epoch [403/700]:  60%|██████    | 470/782 [00:02<00:01, 210.92it/s]



Epoch [403/700]:  66%|██████▌   | 514/782 [00:02<00:01, 210.83it/s]



Epoch [403/700]:  71%|███████   | 557/782 [00:02<00:01, 207.31it/s]



Epoch [403/700]:  79%|███████▉  | 621/782 [00:02<00:00, 208.64it/s]



Epoch [403/700]:  85%|████████▍ | 663/782 [00:03<00:00, 208.22it/s]



Epoch [403/700]:  90%|█████████ | 706/782 [00:03<00:00, 209.15it/s]



Epoch [403/700]:  96%|█████████▌| 749/782 [00:03<00:00, 210.80it/s]



Epoch [403/700]: 100%|██████████| 782/782 [00:03<00:00, 207.38it/s]


Learning Rate: 0.004500
Train Loss: 0.0742, Accuracy: 97.52%, Confidence: 0.9718
Test Loss: 2.0984, Accuracy: 71.86%, Confidence: 0.9374
Train-Test Accuracy Gap: 25.66%


Epoch [404/700]:   3%|▎         | 21/782 [00:00<00:03, 207.06it/s]



Epoch [404/700]:  11%|█         | 86/782 [00:00<00:03, 212.34it/s]



Epoch [404/700]:  17%|█▋        | 130/782 [00:00<00:03, 213.62it/s]



Epoch [404/700]:  22%|██▏       | 173/782 [00:00<00:02, 209.27it/s]



Epoch [404/700]:  28%|██▊       | 216/782 [00:01<00:02, 210.18it/s]



Epoch [404/700]:  33%|███▎      | 260/782 [00:01<00:02, 210.35it/s]



Epoch [404/700]:  39%|███▉      | 304/782 [00:01<00:02, 213.05it/s]



Epoch [404/700]:  45%|████▍     | 348/782 [00:01<00:02, 207.42it/s]



Epoch [404/700]:  50%|████▉     | 390/782 [00:01<00:01, 205.83it/s]



Epoch [404/700]:  58%|█████▊    | 454/782 [00:02<00:01, 201.32it/s]



Epoch [404/700]:  63%|██████▎   | 496/782 [00:02<00:01, 200.51it/s]



Epoch [404/700]:  69%|██████▉   | 539/782 [00:02<00:01, 206.24it/s]



Epoch [404/700]:  74%|███████▍  | 581/782 [00:02<00:00, 206.05it/s]



Epoch [404/700]:  80%|███████▉  | 623/782 [00:03<00:00, 203.32it/s]



Epoch [404/700]:  85%|████████▌ | 666/782 [00:03<00:00, 200.85it/s]



Epoch [404/700]:  91%|█████████ | 708/782 [00:03<00:00, 201.71it/s]



Epoch [404/700]:  96%|█████████▌| 750/782 [00:03<00:00, 201.68it/s]



Epoch [404/700]: 100%|██████████| 782/782 [00:03<00:00, 204.97it/s]


Learning Rate: 0.004500
Train Loss: 0.0649, Accuracy: 97.82%, Confidence: 0.9732
Test Loss: 2.1676, Accuracy: 70.91%, Confidence: 0.9347
Train-Test Accuracy Gap: 26.91%


Epoch [405/700]:   3%|▎         | 20/782 [00:00<00:03, 195.09it/s]



Epoch [405/700]:  11%|█         | 84/782 [00:00<00:03, 206.31it/s]



Epoch [405/700]:  16%|█▌        | 126/782 [00:00<00:03, 207.66it/s]



Epoch [405/700]:  21%|██▏       | 168/782 [00:00<00:03, 202.11it/s]



Epoch [405/700]:  27%|██▋       | 210/782 [00:01<00:02, 202.56it/s]



Epoch [405/700]:  32%|███▏      | 253/782 [00:01<00:02, 207.13it/s]



Epoch [405/700]:  41%|████      | 317/782 [00:01<00:02, 204.65it/s]



Epoch [405/700]:  46%|████▌     | 360/782 [00:01<00:02, 206.18it/s]



Epoch [405/700]:  51%|█████▏    | 402/782 [00:01<00:01, 205.94it/s]



Epoch [405/700]:  57%|█████▋    | 445/782 [00:02<00:01, 207.97it/s]



Epoch [405/700]:  62%|██████▏   | 488/782 [00:02<00:01, 208.53it/s]



Epoch [405/700]:  68%|██████▊   | 531/782 [00:02<00:01, 209.61it/s]



Epoch [405/700]:  73%|███████▎  | 573/782 [00:02<00:01, 208.44it/s]



Epoch [405/700]:  79%|███████▊  | 615/782 [00:02<00:00, 206.20it/s]



Epoch [405/700]:  84%|████████▍ | 657/782 [00:03<00:00, 206.38it/s]



Epoch [405/700]:  90%|████████▉ | 701/782 [00:03<00:00, 208.89it/s]



Epoch [405/700]:  95%|█████████▌| 745/782 [00:03<00:00, 210.02it/s]



Epoch [405/700]: 100%|██████████| 782/782 [00:03<00:00, 206.84it/s]


Learning Rate: 0.004500
Train Loss: 0.0677, Accuracy: 97.73%, Confidence: 0.9730
Test Loss: 1.9801, Accuracy: 72.58%, Confidence: 0.9376
Train-Test Accuracy Gap: 25.15%


Epoch [406/700]:   3%|▎         | 22/782 [00:00<00:03, 211.76it/s]



Epoch [406/700]:   8%|▊         | 66/782 [00:00<00:03, 205.19it/s]



Epoch [406/700]:  16%|█▋        | 129/782 [00:00<00:03, 203.76it/s]



Epoch [406/700]:  22%|██▏       | 172/782 [00:00<00:02, 205.04it/s]



Epoch [406/700]:  27%|██▋       | 214/782 [00:01<00:02, 205.10it/s]



Epoch [406/700]:  33%|███▎      | 257/782 [00:01<00:02, 206.58it/s]



Epoch [406/700]:  38%|███▊      | 300/782 [00:01<00:02, 205.40it/s]



Epoch [406/700]:  44%|████▍     | 343/782 [00:01<00:02, 208.15it/s]



Epoch [406/700]:  49%|████▉     | 385/782 [00:01<00:01, 204.75it/s]



Epoch [406/700]:  55%|█████▍    | 428/782 [00:02<00:01, 206.76it/s]



Epoch [406/700]:  60%|██████    | 472/782 [00:02<00:01, 209.58it/s]



Epoch [406/700]:  66%|██████▌   | 516/782 [00:02<00:01, 210.81it/s]



Epoch [406/700]:  72%|███████▏  | 560/782 [00:02<00:01, 212.48it/s]



Epoch [406/700]:  80%|████████  | 626/782 [00:03<00:00, 213.14it/s]



Epoch [406/700]:  86%|████████▌ | 670/782 [00:03<00:00, 214.17it/s]



Epoch [406/700]:  91%|█████████▏| 714/782 [00:03<00:00, 210.30it/s]



Epoch [406/700]:  97%|█████████▋| 757/782 [00:03<00:00, 209.42it/s]



Epoch [406/700]: 100%|██████████| 782/782 [00:03<00:00, 208.13it/s]


Learning Rate: 0.004500
Train Loss: 0.0664, Accuracy: 97.72%, Confidence: 0.9725
Test Loss: 1.9974, Accuracy: 72.35%, Confidence: 0.9392
Train-Test Accuracy Gap: 25.37%


Epoch [407/700]:   3%|▎         | 21/782 [00:00<00:03, 202.53it/s]



Epoch [407/700]:   5%|▌         | 42/782 [00:00<00:03, 206.83it/s]



Epoch [407/700]:   8%|▊         | 64/782 [00:00<00:03, 209.46it/s]



Epoch [407/700]:  17%|█▋        | 131/782 [00:00<00:03, 212.81it/s]



Epoch [407/700]:  22%|██▏       | 175/782 [00:00<00:02, 212.03it/s]



Epoch [407/700]:  25%|██▌       | 197/782 [00:00<00:02, 208.24it/s]



Epoch [407/700]:  34%|███▎      | 263/782 [00:01<00:02, 211.82it/s]



Epoch [407/700]:  39%|███▉      | 307/782 [00:01<00:02, 212.97it/s]



Epoch [407/700]:  45%|████▍     | 351/782 [00:01<00:02, 211.55it/s]



Epoch [407/700]:  51%|█████     | 395/782 [00:01<00:01, 212.01it/s]



Epoch [407/700]:  56%|█████▌    | 439/782 [00:02<00:01, 206.61it/s]



Epoch [407/700]:  64%|██████▍   | 504/782 [00:02<00:01, 208.83it/s]



Epoch [407/700]:  70%|██████▉   | 547/782 [00:02<00:01, 209.70it/s]



Epoch [407/700]:  75%|███████▌  | 589/782 [00:02<00:00, 203.69it/s]



Epoch [407/700]:  81%|████████  | 631/782 [00:03<00:00, 202.27it/s]



Epoch [407/700]:  86%|████████▌ | 674/782 [00:03<00:00, 203.92it/s]



Epoch [407/700]:  92%|█████████▏| 716/782 [00:03<00:00, 202.61it/s]



Epoch [407/700]:  94%|█████████▍| 738/782 [00:03<00:00, 205.15it/s]



Epoch [407/700]: 100%|██████████| 782/782 [00:03<00:00, 207.81it/s]


Learning Rate: 0.004500
Train Loss: 0.0660, Accuracy: 97.76%, Confidence: 0.9728
Test Loss: 2.1206, Accuracy: 72.36%, Confidence: 0.9397
Train-Test Accuracy Gap: 25.40%


Epoch [408/700]:   2%|▏         | 19/782 [00:00<00:04, 187.51it/s]



Epoch [408/700]:   5%|▌         | 40/782 [00:00<00:03, 198.82it/s]



Epoch [408/700]:   8%|▊         | 62/782 [00:00<00:03, 204.09it/s]



Epoch [408/700]:  16%|█▌        | 126/782 [00:00<00:03, 203.76it/s]



Epoch [408/700]:  22%|██▏       | 169/782 [00:00<00:02, 206.31it/s]



Epoch [408/700]:  27%|██▋       | 212/782 [00:01<00:02, 208.76it/s]



Epoch [408/700]:  32%|███▏      | 254/782 [00:01<00:02, 205.95it/s]



Epoch [408/700]:  38%|███▊      | 296/782 [00:01<00:02, 206.87it/s]



Epoch [408/700]:  43%|████▎     | 338/782 [00:01<00:02, 203.51it/s]



Epoch [408/700]:  49%|████▊     | 380/782 [00:01<00:01, 202.93it/s]



Epoch [408/700]:  54%|█████▍    | 424/782 [00:02<00:01, 207.05it/s]



Epoch [408/700]:  60%|█████▉    | 467/782 [00:02<00:01, 208.92it/s]



Epoch [408/700]:  65%|██████▌   | 511/782 [00:02<00:01, 210.52it/s]



Epoch [408/700]:  71%|███████   | 555/782 [00:02<00:01, 209.51it/s]



Epoch [408/700]:  76%|███████▋  | 598/782 [00:02<00:00, 210.07it/s]



Epoch [408/700]:  82%|████████▏ | 642/782 [00:03<00:00, 211.16it/s]



Epoch [408/700]:  88%|████████▊ | 686/782 [00:03<00:00, 211.93it/s]



Epoch [408/700]:  93%|█████████▎| 730/782 [00:03<00:00, 210.57it/s]



Epoch [408/700]: 100%|██████████| 782/782 [00:03<00:00, 207.62it/s]






Learning Rate: 0.004500
Train Loss: 0.0701, Accuracy: 97.63%, Confidence: 0.9727
Test Loss: 1.8654, Accuracy: 73.35%, Confidence: 0.9352
Train-Test Accuracy Gap: 24.28%


Epoch [409/700]:   3%|▎         | 21/782 [00:00<00:03, 207.49it/s]



Epoch [409/700]:   5%|▌         | 43/782 [00:00<00:03, 209.42it/s]



Epoch [409/700]:   8%|▊         | 65/782 [00:00<00:03, 210.74it/s]



Epoch [409/700]:  11%|█         | 87/782 [00:00<00:03, 203.43it/s]



Epoch [409/700]:  14%|█▍        | 108/782 [00:00<00:03, 199.62it/s]



Epoch [409/700]:  17%|█▋        | 130/782 [00:00<00:03, 203.27it/s]



Epoch [409/700]:  22%|██▏       | 173/782 [00:00<00:02, 207.27it/s]



Epoch [409/700]:  28%|██▊       | 216/782 [00:01<00:02, 207.90it/s]



Epoch [409/700]:  30%|███       | 238/782 [00:01<00:02, 208.89it/s]



Epoch [409/700]:  33%|███▎      | 260/782 [00:01<00:02, 209.41it/s]



Epoch [409/700]:  39%|███▊      | 302/782 [00:01<00:02, 208.42it/s]



Epoch [409/700]:  44%|████▍     | 344/782 [00:01<00:02, 206.98it/s]



Epoch [409/700]:  49%|████▉     | 387/782 [00:01<00:01, 206.25it/s]



Epoch [409/700]:  55%|█████▍    | 430/782 [00:02<00:01, 207.36it/s]



Epoch [409/700]:  61%|██████    | 474/782 [00:02<00:01, 210.64it/s]



Epoch [409/700]:  66%|██████▌   | 518/782 [00:02<00:01, 209.86it/s]



Epoch [409/700]:  75%|███████▍  | 583/782 [00:02<00:00, 211.03it/s]



Epoch [409/700]:  80%|████████  | 627/782 [00:03<00:00, 212.15it/s]



Epoch [409/700]:  86%|████████▌ | 671/782 [00:03<00:00, 211.41it/s]



Epoch [409/700]:  91%|█████████▏| 715/782 [00:03<00:00, 211.74it/s]



Epoch [409/700]:  97%|█████████▋| 759/782 [00:03<00:00, 210.42it/s]



Epoch [409/700]: 100%|██████████| 782/782 [00:03<00:00, 208.42it/s]


Learning Rate: 0.004500
Train Loss: 0.0636, Accuracy: 97.74%, Confidence: 0.9730
Test Loss: 1.9201, Accuracy: 72.60%, Confidence: 0.9329
Train-Test Accuracy Gap: 25.14%


Epoch [410/700]:   3%|▎         | 20/782 [00:00<00:03, 191.62it/s]



Epoch [410/700]:   5%|▌         | 42/782 [00:00<00:03, 203.19it/s]



Epoch [410/700]:   8%|▊         | 64/782 [00:00<00:03, 206.95it/s]



Epoch [410/700]:  11%|█         | 85/782 [00:00<00:03, 207.88it/s]



Epoch [410/700]:  14%|█▎        | 107/782 [00:00<00:03, 211.16it/s]



Epoch [410/700]:  22%|██▏       | 172/782 [00:00<00:02, 209.48it/s]



Epoch [410/700]:  28%|██▊       | 216/782 [00:01<00:02, 210.88it/s]



Epoch [410/700]:  30%|███       | 238/782 [00:01<00:02, 210.19it/s]



Epoch [410/700]:  36%|███▌      | 282/782 [00:01<00:02, 210.20it/s]



Epoch [410/700]:  45%|████▍     | 348/782 [00:01<00:02, 210.43it/s]



Epoch [410/700]:  50%|█████     | 392/782 [00:01<00:01, 209.79it/s]



Epoch [410/700]:  56%|█████▌    | 435/782 [00:02<00:01, 208.58it/s]



Epoch [410/700]:  61%|██████    | 477/782 [00:02<00:01, 204.66it/s]



Epoch [410/700]:  66%|██████▋   | 520/782 [00:02<00:01, 204.94it/s]



Epoch [410/700]:  72%|███████▏  | 563/782 [00:02<00:01, 207.51it/s]



Epoch [410/700]:  77%|███████▋  | 605/782 [00:02<00:00, 204.16it/s]



Epoch [410/700]:  83%|████████▎ | 649/782 [00:03<00:00, 208.60it/s]



Epoch [410/700]:  88%|████████▊ | 691/782 [00:03<00:00, 209.09it/s]



Epoch [410/700]:  94%|█████████▎| 733/782 [00:03<00:00, 206.26it/s]



Epoch [410/700]: 100%|██████████| 782/782 [00:03<00:00, 207.53it/s]


Learning Rate: 0.004500
Train Loss: 0.0623, Accuracy: 97.88%, Confidence: 0.9738
Test Loss: 2.4203, Accuracy: 70.00%, Confidence: 0.9319
Train-Test Accuracy Gap: 27.88%


Epoch [411/700]:   3%|▎         | 21/782 [00:00<00:03, 205.39it/s]



Epoch [411/700]:  14%|█▍        | 110/782 [00:00<00:03, 212.47it/s]



Epoch [411/700]:  20%|█▉        | 154/782 [00:00<00:03, 206.91it/s]



Epoch [411/700]:  25%|██▌       | 197/782 [00:00<00:02, 206.41it/s]



Epoch [411/700]:  31%|███       | 239/782 [00:01<00:02, 205.36it/s]



Epoch [411/700]:  36%|███▌      | 281/782 [00:01<00:02, 206.15it/s]



Epoch [411/700]:  41%|████▏     | 324/782 [00:01<00:02, 207.95it/s]



Epoch [411/700]:  47%|████▋     | 367/782 [00:01<00:01, 209.09it/s]



Epoch [411/700]:  52%|█████▏    | 410/782 [00:01<00:01, 206.42it/s]



Epoch [411/700]:  58%|█████▊    | 454/782 [00:02<00:01, 210.16it/s]



Epoch [411/700]:  64%|██████▎   | 497/782 [00:02<00:01, 209.21it/s]



Epoch [411/700]:  69%|██████▉   | 540/782 [00:02<00:01, 209.23it/s]



Epoch [411/700]:  75%|███████▍  | 583/782 [00:02<00:00, 209.30it/s]



Epoch [411/700]:  83%|████████▎ | 647/782 [00:03<00:00, 209.88it/s]



Epoch [411/700]:  88%|████████▊ | 690/782 [00:03<00:00, 207.13it/s]



Epoch [411/700]:  94%|█████████▎| 733/782 [00:03<00:00, 207.97it/s]



Epoch [411/700]: 100%|██████████| 782/782 [00:03<00:00, 208.04it/s]






Learning Rate: 0.004500
Train Loss: 0.0706, Accuracy: 97.61%, Confidence: 0.9726
Test Loss: 1.9365, Accuracy: 73.17%, Confidence: 0.9374
Train-Test Accuracy Gap: 24.44%


Epoch [412/700]:   3%|▎         | 21/782 [00:00<00:03, 205.71it/s]



Epoch [412/700]:   5%|▌         | 43/782 [00:00<00:03, 211.72it/s]



Epoch [412/700]:   8%|▊         | 65/782 [00:00<00:03, 211.00it/s]



Epoch [412/700]:  11%|█         | 87/782 [00:00<00:03, 208.44it/s]



Epoch [412/700]:  16%|█▋        | 129/782 [00:00<00:03, 205.55it/s]



Epoch [412/700]:  22%|██▏       | 172/782 [00:00<00:02, 207.94it/s]



Epoch [412/700]:  27%|██▋       | 214/782 [00:01<00:02, 205.70it/s]



Epoch [412/700]:  30%|███       | 236/782 [00:01<00:02, 208.52it/s]



Epoch [412/700]:  33%|███▎      | 258/782 [00:01<00:02, 210.37it/s]



Epoch [412/700]:  36%|███▌      | 280/782 [00:01<00:02, 207.36it/s]



Epoch [412/700]:  38%|███▊      | 301/782 [00:01<00:02, 205.39it/s]



Epoch [412/700]:  41%|████      | 322/782 [00:01<00:02, 206.54it/s]



Epoch [412/700]:  44%|████▍     | 344/782 [00:01<00:02, 208.31it/s]



Epoch [412/700]:  47%|████▋     | 366/782 [00:01<00:01, 209.49it/s]



Epoch [412/700]:  50%|████▉     | 388/782 [00:01<00:01, 210.83it/s]



Epoch [412/700]:  52%|█████▏    | 410/782 [00:01<00:01, 210.75it/s]



Epoch [412/700]:  55%|█████▌    | 432/782 [00:02<00:01, 210.98it/s]



Epoch [412/700]:  58%|█████▊    | 454/782 [00:02<00:01, 213.05it/s]



Epoch [412/700]:  61%|██████    | 476/782 [00:02<00:01, 211.32it/s]



Epoch [412/700]:  64%|██████▎   | 498/782 [00:02<00:01, 210.43it/s]



Epoch [412/700]:  66%|██████▋   | 520/782 [00:02<00:01, 211.00it/s]



Epoch [412/700]:  72%|███████▏  | 564/782 [00:02<00:01, 213.71it/s]



Epoch [412/700]:  75%|███████▍  | 586/782 [00:02<00:00, 213.07it/s]



Epoch [412/700]:  78%|███████▊  | 608/782 [00:02<00:00, 213.90it/s]



Epoch [412/700]:  81%|████████  | 630/782 [00:03<00:00, 212.94it/s]



Epoch [412/700]:  83%|████████▎ | 652/782 [00:03<00:00, 213.37it/s]



Epoch [412/700]:  86%|████████▌ | 674/782 [00:03<00:00, 213.10it/s]



Epoch [412/700]:  89%|████████▉ | 696/782 [00:03<00:00, 212.71it/s]



Epoch [412/700]:  92%|█████████▏| 718/782 [00:03<00:00, 211.78it/s]



Epoch [412/700]:  97%|█████████▋| 762/782 [00:03<00:00, 210.84it/s]



Epoch [412/700]: 100%|██████████| 782/782 [00:03<00:00, 209.85it/s]


Learning Rate: 0.004500
Train Loss: 0.0633, Accuracy: 97.89%, Confidence: 0.9737
Test Loss: 2.2442, Accuracy: 70.81%, Confidence: 0.9379
Train-Test Accuracy Gap: 27.08%


Epoch [413/700]:   3%|▎         | 21/782 [00:00<00:03, 201.59it/s]



Epoch [413/700]:   8%|▊         | 64/782 [00:00<00:03, 207.22it/s]



Epoch [413/700]:  16%|█▋        | 129/782 [00:00<00:03, 209.15it/s]



Epoch [413/700]:  22%|██▏       | 171/782 [00:00<00:02, 205.64it/s]



Epoch [413/700]:  27%|██▋       | 214/782 [00:01<00:02, 207.46it/s]



Epoch [413/700]:  33%|███▎      | 258/782 [00:01<00:02, 211.67it/s]



Epoch [413/700]:  38%|███▊      | 301/782 [00:01<00:02, 205.08it/s]



Epoch [413/700]:  44%|████▍     | 344/782 [00:01<00:02, 208.42it/s]



Epoch [413/700]:  49%|████▉     | 387/782 [00:01<00:01, 211.00it/s]



Epoch [413/700]:  55%|█████▍    | 430/782 [00:02<00:01, 208.42it/s]



Epoch [413/700]:  60%|██████    | 473/782 [00:02<00:01, 209.91it/s]



Epoch [413/700]:  66%|██████▌   | 516/782 [00:02<00:01, 208.08it/s]



Epoch [413/700]:  71%|███████▏  | 558/782 [00:02<00:01, 207.88it/s]



Epoch [413/700]:  77%|███████▋  | 600/782 [00:02<00:00, 205.94it/s]



Epoch [413/700]:  82%|████████▏ | 643/782 [00:03<00:00, 202.35it/s]



Epoch [413/700]:  88%|████████▊ | 687/782 [00:03<00:00, 207.46it/s]



Epoch [413/700]:  93%|█████████▎| 730/782 [00:03<00:00, 208.04it/s]



Epoch [413/700]: 100%|██████████| 782/782 [00:03<00:00, 206.89it/s]






Learning Rate: 0.004500
Train Loss: 0.0693, Accuracy: 97.56%, Confidence: 0.9728
Test Loss: 1.8598, Accuracy: 73.90%, Confidence: 0.9370
Train-Test Accuracy Gap: 23.66%


Epoch [414/700]:   3%|▎         | 20/782 [00:00<00:03, 195.32it/s]



Epoch [414/700]:   8%|▊         | 63/782 [00:00<00:03, 203.79it/s]



Epoch [414/700]:  11%|█         | 85/782 [00:00<00:03, 206.85it/s]



Epoch [414/700]:  14%|█▎        | 106/782 [00:00<00:03, 204.26it/s]



Epoch [414/700]:  16%|█▋        | 128/782 [00:00<00:03, 207.14it/s]



Epoch [414/700]:  22%|██▏       | 171/782 [00:00<00:02, 205.95it/s]



Epoch [414/700]:  27%|██▋       | 214/782 [00:01<00:02, 208.53it/s]



Epoch [414/700]:  33%|███▎      | 257/782 [00:01<00:02, 210.18it/s]



Epoch [414/700]:  36%|███▌      | 279/782 [00:01<00:02, 209.26it/s]



Epoch [414/700]:  38%|███▊      | 300/782 [00:01<00:02, 208.99it/s]



Epoch [414/700]:  41%|████      | 322/782 [00:01<00:02, 209.88it/s]



Epoch [414/700]:  44%|████▍     | 343/782 [00:01<00:02, 209.71it/s]



Epoch [414/700]:  47%|████▋     | 365/782 [00:01<00:01, 210.22it/s]



Epoch [414/700]:  49%|████▉     | 387/782 [00:01<00:01, 211.72it/s]



Epoch [414/700]:  52%|█████▏    | 409/782 [00:01<00:01, 212.94it/s]



Epoch [414/700]:  55%|█████▌    | 431/782 [00:02<00:01, 213.25it/s]



Epoch [414/700]:  58%|█████▊    | 453/782 [00:02<00:01, 207.62it/s]



Epoch [414/700]:  61%|██████    | 474/782 [00:02<00:01, 203.14it/s]



Epoch [414/700]:  63%|██████▎   | 495/782 [00:02<00:01, 200.92it/s]



Epoch [414/700]:  66%|██████▌   | 516/782 [00:02<00:01, 200.75it/s]



Epoch [414/700]:  69%|██████▊   | 537/782 [00:02<00:01, 199.97it/s]



Epoch [414/700]:  71%|███████▏  | 558/782 [00:02<00:01, 200.98it/s]



Epoch [414/700]:  74%|███████▍  | 579/782 [00:02<00:00, 203.07it/s]



Epoch [414/700]:  77%|███████▋  | 600/782 [00:02<00:00, 202.97it/s]



Epoch [414/700]:  79%|███████▉  | 621/782 [00:03<00:00, 201.54it/s]



Epoch [414/700]:  82%|████████▏ | 643/782 [00:03<00:00, 205.50it/s]



Epoch [414/700]:  88%|████████▊ | 685/782 [00:03<00:00, 207.74it/s]



Epoch [414/700]:  93%|█████████▎| 727/782 [00:03<00:00, 205.43it/s]



Epoch [414/700]:  98%|█████████▊| 770/782 [00:03<00:00, 207.18it/s]



Epoch [414/700]: 100%|██████████| 782/782 [00:03<00:00, 206.20it/s]


Learning Rate: 0.004500
Train Loss: 0.0644, Accuracy: 97.78%, Confidence: 0.9730
Test Loss: 1.8975, Accuracy: 73.33%, Confidence: 0.9346
Train-Test Accuracy Gap: 24.45%


Epoch [415/700]:   3%|▎         | 21/782 [00:00<00:03, 209.79it/s]



Epoch [415/700]:   5%|▌         | 43/782 [00:00<00:03, 213.76it/s]



Epoch [415/700]:   8%|▊         | 65/782 [00:00<00:03, 211.72it/s]



Epoch [415/700]:  11%|█         | 87/782 [00:00<00:03, 208.95it/s]



Epoch [415/700]:  14%|█▍        | 108/782 [00:00<00:03, 198.16it/s]



Epoch [415/700]:  16%|█▋        | 128/782 [00:00<00:03, 197.12it/s]



Epoch [415/700]:  19%|█▉        | 150/782 [00:00<00:03, 202.43it/s]



Epoch [415/700]:  22%|██▏       | 172/782 [00:00<00:02, 206.23it/s]



Epoch [415/700]:  27%|██▋       | 215/782 [00:01<00:02, 208.85it/s]



Epoch [415/700]:  33%|███▎      | 258/782 [00:01<00:02, 208.71it/s]



Epoch [415/700]:  38%|███▊      | 301/782 [00:01<00:02, 208.83it/s]



Epoch [415/700]:  44%|████▍     | 343/782 [00:01<00:02, 206.91it/s]



Epoch [415/700]:  49%|████▉     | 386/782 [00:01<00:01, 209.33it/s]



Epoch [415/700]:  55%|█████▍    | 429/782 [00:02<00:01, 206.08it/s]



Epoch [415/700]:  60%|██████    | 472/782 [00:02<00:01, 206.88it/s]



Epoch [415/700]:  66%|██████▌   | 515/782 [00:02<00:01, 209.29it/s]



Epoch [415/700]:  71%|███████▏  | 558/782 [00:02<00:01, 207.20it/s]



Epoch [415/700]:  77%|███████▋  | 600/782 [00:02<00:00, 207.81it/s]



Epoch [415/700]:  82%|████████▏ | 643/782 [00:03<00:00, 209.17it/s]



Epoch [415/700]:  88%|████████▊ | 686/782 [00:03<00:00, 209.74it/s]



Epoch [415/700]:  93%|█████████▎| 730/782 [00:03<00:00, 212.88it/s]



Epoch [415/700]: 100%|██████████| 782/782 [00:03<00:00, 207.80it/s]


Learning Rate: 0.004500
Train Loss: 0.0634, Accuracy: 97.82%, Confidence: 0.9735
Test Loss: 2.0631, Accuracy: 72.11%, Confidence: 0.9358
Train-Test Accuracy Gap: 25.71%


Epoch [416/700]:   3%|▎         | 20/782 [00:00<00:03, 197.91it/s]



Epoch [416/700]:   5%|▌         | 42/782 [00:00<00:03, 206.28it/s]



Epoch [416/700]:   8%|▊         | 63/782 [00:00<00:03, 203.14it/s]



Epoch [416/700]:  11%|█         | 85/782 [00:00<00:03, 206.72it/s]



Epoch [416/700]:  16%|█▌        | 127/782 [00:00<00:03, 206.06it/s]



Epoch [416/700]:  22%|██▏       | 169/782 [00:00<00:02, 204.39it/s]



Epoch [416/700]:  27%|██▋       | 212/782 [00:01<00:02, 201.41it/s]



Epoch [416/700]:  30%|██▉       | 233/782 [00:01<00:02, 201.53it/s]



Epoch [416/700]:  33%|███▎      | 255/782 [00:01<00:02, 204.78it/s]



Epoch [416/700]:  38%|███▊      | 299/782 [00:01<00:02, 208.16it/s]



Epoch [416/700]:  44%|████▎     | 342/782 [00:01<00:02, 208.53it/s]



Epoch [416/700]:  49%|████▉     | 384/782 [00:01<00:01, 206.56it/s]



Epoch [416/700]:  52%|█████▏    | 405/782 [00:01<00:01, 205.26it/s]



Epoch [416/700]:  54%|█████▍    | 426/782 [00:02<00:01, 199.63it/s]



Epoch [416/700]:  60%|█████▉    | 468/782 [00:02<00:01, 201.73it/s]



Epoch [416/700]:  65%|██████▌   | 510/782 [00:02<00:01, 204.49it/s]



Epoch [416/700]:  68%|██████▊   | 531/782 [00:02<00:01, 204.17it/s]



Epoch [416/700]:  71%|███████   | 552/782 [00:02<00:01, 201.51it/s]



Epoch [416/700]:  76%|███████▌  | 594/782 [00:02<00:00, 200.47it/s]



Epoch [416/700]:  81%|████████▏ | 636/782 [00:03<00:00, 201.02it/s]



Epoch [416/700]:  87%|████████▋ | 678/782 [00:03<00:00, 202.03it/s]



Epoch [416/700]:  92%|█████████▏| 720/782 [00:03<00:00, 203.02it/s]



Epoch [416/700]:  97%|█████████▋| 761/782 [00:03<00:00, 198.42it/s]



Epoch [416/700]: 100%|██████████| 782/782 [00:03<00:00, 202.45it/s]






Learning Rate: 0.004500
Train Loss: 0.0654, Accuracy: 97.82%, Confidence: 0.9740
Test Loss: 2.2335, Accuracy: 71.29%, Confidence: 0.9381
Train-Test Accuracy Gap: 26.53%


Epoch [417/700]:   3%|▎         | 20/782 [00:00<00:03, 194.17it/s]



Epoch [417/700]:   5%|▌         | 41/782 [00:00<00:03, 197.40it/s]



Epoch [417/700]:   8%|▊         | 61/782 [00:00<00:03, 197.97it/s]



Epoch [417/700]:  10%|█         | 82/782 [00:00<00:03, 199.23it/s]



Epoch [417/700]:  16%|█▌        | 124/782 [00:00<00:03, 201.75it/s]



Epoch [417/700]:  21%|██▏       | 167/782 [00:00<00:03, 201.77it/s]



Epoch [417/700]:  27%|██▋       | 209/782 [00:01<00:02, 201.59it/s]



Epoch [417/700]:  32%|███▏      | 251/782 [00:01<00:02, 203.50it/s]



Epoch [417/700]:  38%|███▊      | 294/782 [00:01<00:02, 206.04it/s]



Epoch [417/700]:  43%|████▎     | 337/782 [00:01<00:02, 207.46it/s]



Epoch [417/700]:  49%|████▊     | 381/782 [00:01<00:01, 210.75it/s]



Epoch [417/700]:  54%|█████▍    | 425/782 [00:02<00:01, 210.12it/s]



Epoch [417/700]:  60%|█████▉    | 468/782 [00:02<00:01, 209.18it/s]



Epoch [417/700]:  63%|██████▎   | 489/782 [00:02<00:01, 205.69it/s]



Epoch [417/700]:  65%|██████▌   | 510/782 [00:02<00:01, 206.38it/s]



Epoch [417/700]:  68%|██████▊   | 531/782 [00:02<00:01, 206.79it/s]



Epoch [417/700]:  71%|███████   | 553/782 [00:02<00:01, 209.20it/s]



Epoch [417/700]:  74%|███████▎  | 575/782 [00:02<00:00, 210.68it/s]



Epoch [417/700]:  76%|███████▋  | 597/782 [00:02<00:00, 210.42it/s]



Epoch [417/700]:  79%|███████▉  | 619/782 [00:03<00:00, 206.64it/s]



Epoch [417/700]:  82%|████████▏ | 640/782 [00:03<00:00, 206.07it/s]



Epoch [417/700]:  85%|████████▍ | 662/782 [00:03<00:00, 208.78it/s]



Epoch [417/700]:  87%|████████▋ | 683/782 [00:03<00:00, 208.81it/s]



Epoch [417/700]:  90%|█████████ | 705/782 [00:03<00:00, 209.63it/s]



Epoch [417/700]:  93%|█████████▎| 726/782 [00:03<00:00, 204.98it/s]



Epoch [417/700]:  96%|█████████▌| 747/782 [00:03<00:00, 202.06it/s]



Epoch [417/700]:  98%|█████████▊| 769/782 [00:03<00:00, 205.80it/s]



Epoch [417/700]: 100%|██████████| 782/782 [00:03<00:00, 205.29it/s]


Learning Rate: 0.004500
Train Loss: 0.0676, Accuracy: 97.69%, Confidence: 0.9734
Test Loss: 1.9915, Accuracy: 72.14%, Confidence: 0.9377
Train-Test Accuracy Gap: 25.55%


Epoch [418/700]:   3%|▎         | 21/782 [00:00<00:03, 201.37it/s]



Epoch [418/700]:   8%|▊         | 65/782 [00:00<00:03, 209.26it/s]



Epoch [418/700]:  17%|█▋        | 131/782 [00:00<00:03, 212.31it/s]



Epoch [418/700]:  22%|██▏       | 175/782 [00:00<00:02, 214.37it/s]



Epoch [418/700]:  28%|██▊       | 219/782 [00:01<00:02, 211.79it/s]



Epoch [418/700]:  34%|███▎      | 263/782 [00:01<00:02, 213.65it/s]



Epoch [418/700]:  39%|███▉      | 307/782 [00:01<00:02, 211.91it/s]



Epoch [418/700]:  45%|████▍     | 351/782 [00:01<00:02, 211.79it/s]



Epoch [418/700]:  51%|█████     | 395/782 [00:01<00:01, 210.49it/s]



Epoch [418/700]:  56%|█████▌    | 439/782 [00:02<00:01, 209.19it/s]



Epoch [418/700]:  62%|██████▏   | 481/782 [00:02<00:01, 208.96it/s]



Epoch [418/700]:  67%|██████▋   | 524/782 [00:02<00:01, 206.36it/s]



Epoch [418/700]:  73%|███████▎  | 567/782 [00:02<00:01, 209.61it/s]



Epoch [418/700]:  78%|███████▊  | 609/782 [00:02<00:00, 201.00it/s]



Epoch [418/700]:  83%|████████▎ | 651/782 [00:03<00:00, 202.76it/s]



Epoch [418/700]:  89%|████████▊ | 694/782 [00:03<00:00, 206.37it/s]



Epoch [418/700]:  94%|█████████▍| 736/782 [00:03<00:00, 202.28it/s]



Epoch [418/700]: 100%|██████████| 782/782 [00:03<00:00, 208.49it/s]






Learning Rate: 0.004500
Train Loss: 0.0621, Accuracy: 97.91%, Confidence: 0.9736
Test Loss: 2.0074, Accuracy: 72.67%, Confidence: 0.9372
Train-Test Accuracy Gap: 25.24%


Epoch [419/700]:   3%|▎         | 21/782 [00:00<00:03, 208.27it/s]



Epoch [419/700]:   5%|▌         | 43/782 [00:00<00:03, 209.58it/s]



Epoch [419/700]:   8%|▊         | 64/782 [00:00<00:03, 205.09it/s]



Epoch [419/700]:  11%|█         | 85/782 [00:00<00:03, 200.86it/s]



Epoch [419/700]:  14%|█▎        | 106/782 [00:00<00:03, 198.69it/s]



Epoch [419/700]:  16%|█▋        | 128/782 [00:00<00:03, 203.49it/s]



Epoch [419/700]:  19%|█▉        | 149/782 [00:00<00:03, 201.82it/s]



Epoch [419/700]:  22%|██▏       | 171/782 [00:00<00:02, 204.02it/s]



Epoch [419/700]:  25%|██▍       | 192/782 [00:00<00:02, 204.21it/s]



Epoch [419/700]:  27%|██▋       | 213/782 [00:01<00:02, 204.93it/s]



Epoch [419/700]:  30%|██▉       | 234/782 [00:01<00:02, 202.93it/s]



Epoch [419/700]:  33%|███▎      | 255/782 [00:01<00:02, 203.45it/s]



Epoch [419/700]:  38%|███▊      | 297/782 [00:01<00:02, 205.67it/s]



Epoch [419/700]:  43%|████▎     | 339/782 [00:01<00:02, 204.10it/s]



Epoch [419/700]:  49%|████▊     | 381/782 [00:01<00:01, 205.25it/s]



Epoch [419/700]:  51%|█████▏    | 402/782 [00:01<00:01, 202.60it/s]



Epoch [419/700]:  54%|█████▍    | 423/782 [00:02<00:01, 197.45it/s]



Epoch [419/700]:  59%|█████▉    | 464/782 [00:02<00:01, 196.97it/s]



Epoch [419/700]:  65%|██████▍   | 507/782 [00:02<00:01, 202.17it/s]



Epoch [419/700]:  68%|██████▊   | 528/782 [00:02<00:01, 200.02it/s]



Epoch [419/700]:  70%|███████   | 549/782 [00:02<00:01, 199.17it/s]



Epoch [419/700]:  75%|███████▌  | 590/782 [00:02<00:00, 199.51it/s]



Epoch [419/700]:  78%|███████▊  | 611/782 [00:03<00:00, 200.19it/s]



Epoch [419/700]:  81%|████████  | 633/782 [00:03<00:00, 203.99it/s]



Epoch [419/700]:  84%|████████▎ | 654/782 [00:03<00:00, 203.03it/s]



Epoch [419/700]:  86%|████████▋ | 675/782 [00:03<00:00, 201.32it/s]



Epoch [419/700]:  89%|████████▉ | 696/782 [00:03<00:00, 201.04it/s]



Epoch [419/700]:  92%|█████████▏| 717/782 [00:03<00:00, 202.25it/s]



Epoch [419/700]:  94%|█████████▍| 738/782 [00:03<00:00, 202.07it/s]



Epoch [419/700]:  97%|█████████▋| 759/782 [00:03<00:00, 201.02it/s]



Epoch [419/700]: 100%|██████████| 782/782 [00:03<00:00, 201.85it/s]






Learning Rate: 0.004500
Train Loss: 0.0651, Accuracy: 97.75%, Confidence: 0.9734
Test Loss: 1.9426, Accuracy: 73.47%, Confidence: 0.9387
Train-Test Accuracy Gap: 24.28%


Epoch [420/700]:   3%|▎         | 21/782 [00:00<00:03, 209.33it/s]



Epoch [420/700]:   5%|▌         | 42/782 [00:00<00:03, 206.43it/s]



Epoch [420/700]:   8%|▊         | 63/782 [00:00<00:03, 202.00it/s]



Epoch [420/700]:  11%|█         | 84/782 [00:00<00:03, 204.60it/s]



Epoch [420/700]:  13%|█▎        | 105/782 [00:00<00:03, 203.18it/s]



Epoch [420/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.51it/s]



Epoch [420/700]:  22%|██▏       | 170/782 [00:00<00:02, 207.05it/s]



Epoch [420/700]:  27%|██▋       | 214/782 [00:01<00:02, 210.28it/s]



Epoch [420/700]:  30%|███       | 236/782 [00:01<00:02, 206.34it/s]



Epoch [420/700]:  33%|███▎      | 257/782 [00:01<00:02, 205.51it/s]



Epoch [420/700]:  36%|███▌      | 278/782 [00:01<00:02, 206.70it/s]



Epoch [420/700]:  38%|███▊      | 299/782 [00:01<00:02, 203.00it/s]



Epoch [420/700]:  41%|████      | 320/782 [00:01<00:02, 203.52it/s]



Epoch [420/700]:  44%|████▎     | 341/782 [00:01<00:02, 200.63it/s]



Epoch [420/700]:  46%|████▋     | 362/782 [00:01<00:02, 200.50it/s]



Epoch [420/700]:  49%|████▉     | 383/782 [00:01<00:01, 202.10it/s]



Epoch [420/700]:  52%|█████▏    | 404/782 [00:01<00:01, 201.06it/s]



Epoch [420/700]:  54%|█████▍    | 425/782 [00:02<00:01, 197.22it/s]



Epoch [420/700]:  57%|█████▋    | 446/782 [00:02<00:01, 199.24it/s]



Epoch [420/700]:  60%|█████▉    | 467/782 [00:02<00:01, 202.31it/s]



Epoch [420/700]:  62%|██████▏   | 488/782 [00:02<00:01, 204.09it/s]



Epoch [420/700]:  65%|██████▌   | 509/782 [00:02<00:01, 205.58it/s]



Epoch [420/700]:  68%|██████▊   | 530/782 [00:02<00:01, 206.78it/s]



Epoch [420/700]:  70%|███████   | 551/782 [00:02<00:01, 205.40it/s]



Epoch [420/700]:  73%|███████▎  | 572/782 [00:02<00:01, 203.26it/s]



Epoch [420/700]:  76%|███████▌  | 593/782 [00:02<00:00, 204.50it/s]



Epoch [420/700]:  79%|███████▊  | 614/782 [00:03<00:00, 206.01it/s]



Epoch [420/700]:  81%|████████▏ | 636/782 [00:03<00:00, 207.38it/s]



Epoch [420/700]:  84%|████████▍ | 657/782 [00:03<00:00, 205.44it/s]



Epoch [420/700]:  92%|█████████▏| 721/782 [00:03<00:00, 207.97it/s]



Epoch [420/700]:  98%|█████████▊| 764/782 [00:03<00:00, 207.78it/s]



Epoch [420/700]: 100%|██████████| 782/782 [00:03<00:00, 204.80it/s]


Learning Rate: 0.004500
Train Loss: 0.0684, Accuracy: 97.65%, Confidence: 0.9730
Test Loss: 2.0390, Accuracy: 72.57%, Confidence: 0.9378
Train-Test Accuracy Gap: 25.08%


Epoch [421/700]:   3%|▎         | 21/782 [00:00<00:03, 203.75it/s]



Epoch [421/700]:   5%|▌         | 43/782 [00:00<00:03, 207.87it/s]



Epoch [421/700]:   8%|▊         | 64/782 [00:00<00:03, 205.40it/s]



Epoch [421/700]:  16%|█▋        | 129/782 [00:00<00:03, 209.87it/s]



Epoch [421/700]:  22%|██▏       | 172/782 [00:00<00:02, 208.60it/s]



Epoch [421/700]:  27%|██▋       | 215/782 [00:01<00:02, 209.76it/s]



Epoch [421/700]:  33%|███▎      | 258/782 [00:01<00:02, 212.50it/s]



Epoch [421/700]:  39%|███▊      | 302/782 [00:01<00:02, 211.99it/s]



Epoch [421/700]:  44%|████▍     | 346/782 [00:01<00:02, 208.99it/s]



Epoch [421/700]:  50%|████▉     | 390/782 [00:01<00:01, 209.59it/s]



Epoch [421/700]:  58%|█████▊    | 456/782 [00:02<00:01, 213.85it/s]



Epoch [421/700]:  64%|██████▍   | 501/782 [00:02<00:01, 213.48it/s]



Epoch [421/700]:  70%|██████▉   | 545/782 [00:02<00:01, 213.60it/s]



Epoch [421/700]:  75%|███████▌  | 588/782 [00:02<00:00, 206.64it/s]



Epoch [421/700]:  81%|████████  | 632/782 [00:03<00:00, 211.38it/s]



Epoch [421/700]:  86%|████████▋ | 676/782 [00:03<00:00, 211.03it/s]



Epoch [421/700]:  89%|████████▉ | 698/782 [00:03<00:00, 210.21it/s]



Epoch [421/700]:  95%|█████████▍| 742/782 [00:03<00:00, 211.03it/s]



Epoch [421/700]: 100%|██████████| 782/782 [00:03<00:00, 209.73it/s]


Learning Rate: 0.004500
Train Loss: 0.0648, Accuracy: 97.81%, Confidence: 0.9741
Test Loss: 1.9205, Accuracy: 73.40%, Confidence: 0.9381
Train-Test Accuracy Gap: 24.41%


Epoch [422/700]:   3%|▎         | 21/782 [00:00<00:03, 207.35it/s]



Epoch [422/700]:   5%|▌         | 43/782 [00:00<00:03, 211.91it/s]



Epoch [422/700]:  11%|█         | 87/782 [00:00<00:03, 214.04it/s]



Epoch [422/700]:  17%|█▋        | 131/782 [00:00<00:03, 207.89it/s]



Epoch [422/700]:  22%|██▏       | 173/782 [00:00<00:02, 206.46it/s]



Epoch [422/700]:  28%|██▊       | 216/782 [00:01<00:02, 205.64it/s]



Epoch [422/700]:  33%|███▎      | 260/782 [00:01<00:02, 210.09it/s]



Epoch [422/700]:  39%|███▉      | 304/782 [00:01<00:02, 213.18it/s]



Epoch [422/700]:  45%|████▍     | 348/782 [00:01<00:02, 215.20it/s]



Epoch [422/700]:  50%|█████     | 392/782 [00:01<00:01, 210.49it/s]



Epoch [422/700]:  56%|█████▌    | 436/782 [00:02<00:01, 212.06it/s]



Epoch [422/700]:  61%|██████▏   | 480/782 [00:02<00:01, 210.77it/s]



Epoch [422/700]:  67%|██████▋   | 523/782 [00:02<00:01, 206.69it/s]



Epoch [422/700]:  72%|███████▏  | 565/782 [00:02<00:01, 203.66it/s]



Epoch [422/700]:  78%|███████▊  | 608/782 [00:02<00:00, 208.18it/s]



Epoch [422/700]:  83%|████████▎ | 650/782 [00:03<00:00, 205.84it/s]



Epoch [422/700]:  91%|█████████▏| 714/782 [00:03<00:00, 207.85it/s]



Epoch [422/700]:  97%|█████████▋| 757/782 [00:03<00:00, 206.23it/s]



Epoch [422/700]: 100%|██████████| 782/782 [00:03<00:00, 207.63it/s]


Learning Rate: 0.004500
Train Loss: 0.0608, Accuracy: 97.95%, Confidence: 0.9741
Test Loss: 2.0289, Accuracy: 72.84%, Confidence: 0.9398
Train-Test Accuracy Gap: 25.11%


Epoch [423/700]:   3%|▎         | 21/782 [00:00<00:03, 205.23it/s]



Epoch [423/700]:   5%|▌         | 42/782 [00:00<00:03, 206.27it/s]



Epoch [423/700]:  11%|█         | 85/782 [00:00<00:03, 205.94it/s]



Epoch [423/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.46it/s]



Epoch [423/700]:  22%|██▏       | 169/782 [00:00<00:02, 205.25it/s]



Epoch [423/700]:  27%|██▋       | 211/782 [00:01<00:02, 203.21it/s]



Epoch [423/700]:  32%|███▏      | 254/782 [00:01<00:02, 203.00it/s]



Epoch [423/700]:  38%|███▊      | 296/782 [00:01<00:02, 204.18it/s]



Epoch [423/700]:  43%|████▎     | 338/782 [00:01<00:02, 203.08it/s]



Epoch [423/700]:  49%|████▊     | 381/782 [00:01<00:01, 204.48it/s]



Epoch [423/700]:  54%|█████▍    | 423/782 [00:02<00:01, 206.05it/s]



Epoch [423/700]:  60%|█████▉    | 466/782 [00:02<00:01, 206.58it/s]



Epoch [423/700]:  65%|██████▌   | 509/782 [00:02<00:01, 208.29it/s]



Epoch [423/700]:  70%|███████   | 551/782 [00:02<00:01, 207.07it/s]



Epoch [423/700]:  76%|███████▌  | 593/782 [00:02<00:00, 208.02it/s]



Epoch [423/700]:  81%|████████▏ | 636/782 [00:03<00:00, 204.24it/s]



Epoch [423/700]:  87%|████████▋ | 679/782 [00:03<00:00, 207.42it/s]



Epoch [423/700]:  92%|█████████▏| 723/782 [00:03<00:00, 210.26it/s]



Epoch [423/700]: 100%|██████████| 782/782 [00:03<00:00, 205.46it/s]






Learning Rate: 0.004500
Train Loss: 0.0667, Accuracy: 97.75%, Confidence: 0.9734
Test Loss: 1.9974, Accuracy: 72.73%, Confidence: 0.9406
Train-Test Accuracy Gap: 25.02%


Epoch [424/700]:   3%|▎         | 21/782 [00:00<00:03, 201.10it/s]



Epoch [424/700]:   5%|▌         | 42/782 [00:00<00:03, 205.27it/s]



Epoch [424/700]:  11%|█         | 84/782 [00:00<00:03, 202.86it/s]



Epoch [424/700]:  16%|█▋        | 128/782 [00:00<00:03, 211.36it/s]



Epoch [424/700]:  22%|██▏       | 172/782 [00:00<00:02, 214.28it/s]



Epoch [424/700]:  27%|██▋       | 215/782 [00:01<00:02, 208.46it/s]



Epoch [424/700]:  36%|███▌      | 280/782 [00:01<00:02, 209.49it/s]



Epoch [424/700]:  41%|████      | 322/782 [00:01<00:02, 206.74it/s]



Epoch [424/700]:  47%|████▋     | 364/782 [00:01<00:02, 202.78it/s]



Epoch [424/700]:  52%|█████▏    | 408/782 [00:01<00:01, 207.49it/s]



Epoch [424/700]:  58%|█████▊    | 452/782 [00:02<00:01, 211.58it/s]



Epoch [424/700]:  63%|██████▎   | 496/782 [00:02<00:01, 211.45it/s]



Epoch [424/700]:  69%|██████▉   | 540/782 [00:02<00:01, 209.34it/s]



Epoch [424/700]:  74%|███████▍  | 582/782 [00:02<00:00, 205.11it/s]



Epoch [424/700]:  80%|███████▉  | 624/782 [00:03<00:00, 203.53it/s]



Epoch [424/700]:  88%|████████▊ | 687/782 [00:03<00:00, 206.99it/s]



Epoch [424/700]:  93%|█████████▎| 730/782 [00:03<00:00, 207.97it/s]



Epoch [424/700]: 100%|██████████| 782/782 [00:03<00:00, 207.59it/s]






Learning Rate: 0.004500
Train Loss: 0.0658, Accuracy: 97.65%, Confidence: 0.9738
Test Loss: 2.0591, Accuracy: 72.32%, Confidence: 0.9374
Train-Test Accuracy Gap: 25.33%


Epoch [425/700]:   3%|▎         | 21/782 [00:00<00:03, 201.57it/s]



Epoch [425/700]:   5%|▌         | 43/782 [00:00<00:03, 207.13it/s]



Epoch [425/700]:   8%|▊         | 65/782 [00:00<00:03, 208.85it/s]



Epoch [425/700]:  11%|█         | 86/782 [00:00<00:03, 208.49it/s]



Epoch [425/700]:  14%|█▎        | 107/782 [00:00<00:03, 205.27it/s]



Epoch [425/700]:  16%|█▋        | 129/782 [00:00<00:03, 207.02it/s]



Epoch [425/700]:  22%|██▏       | 172/782 [00:00<00:02, 208.83it/s]



Epoch [425/700]:  27%|██▋       | 214/782 [00:01<00:02, 208.18it/s]



Epoch [425/700]:  33%|███▎      | 257/782 [00:01<00:02, 206.99it/s]



Epoch [425/700]:  38%|███▊      | 300/782 [00:01<00:02, 206.82it/s]



Epoch [425/700]:  44%|████▍     | 343/782 [00:01<00:02, 207.48it/s]



Epoch [425/700]:  49%|████▉     | 385/782 [00:01<00:01, 207.45it/s]



Epoch [425/700]:  55%|█████▍    | 429/782 [00:02<00:01, 209.85it/s]



Epoch [425/700]:  60%|██████    | 473/782 [00:02<00:01, 212.55it/s]



Epoch [425/700]:  66%|██████▌   | 517/782 [00:02<00:01, 205.95it/s]



Epoch [425/700]:  71%|███████▏  | 559/782 [00:02<00:01, 202.50it/s]



Epoch [425/700]:  77%|███████▋  | 602/782 [00:02<00:00, 205.95it/s]



Epoch [425/700]:  82%|████████▏ | 645/782 [00:03<00:00, 206.72it/s]



Epoch [425/700]:  88%|████████▊ | 687/782 [00:03<00:00, 207.53it/s]



Epoch [425/700]:  93%|█████████▎| 729/782 [00:03<00:00, 204.22it/s]



Epoch [425/700]:  99%|█████████▊| 771/782 [00:03<00:00, 204.68it/s]



Epoch [425/700]: 100%|██████████| 782/782 [00:03<00:00, 206.24it/s]


Learning Rate: 0.004500
Train Loss: 0.0636, Accuracy: 97.76%, Confidence: 0.9740
Test Loss: 2.3727, Accuracy: 70.47%, Confidence: 0.9392
Train-Test Accuracy Gap: 27.29%


Epoch [426/700]:   3%|▎         | 21/782 [00:00<00:03, 202.97it/s]



Epoch [426/700]:  11%|█         | 86/782 [00:00<00:03, 210.81it/s]



Epoch [426/700]:  19%|█▉        | 151/782 [00:00<00:03, 208.64it/s]



Epoch [426/700]:  25%|██▍       | 194/782 [00:00<00:02, 209.22it/s]



Epoch [426/700]:  30%|███       | 237/782 [00:01<00:02, 209.68it/s]



Epoch [426/700]:  36%|███▌      | 279/782 [00:01<00:02, 208.43it/s]



Epoch [426/700]:  41%|████      | 322/782 [00:01<00:02, 208.61it/s]



Epoch [426/700]:  47%|████▋     | 365/782 [00:01<00:01, 208.84it/s]



Epoch [426/700]:  52%|█████▏    | 408/782 [00:01<00:01, 210.11it/s]



Epoch [426/700]:  58%|█████▊    | 452/782 [00:02<00:01, 210.19it/s]



Epoch [426/700]:  63%|██████▎   | 496/782 [00:02<00:01, 210.70it/s]



Epoch [426/700]:  69%|██████▉   | 540/782 [00:02<00:01, 211.58it/s]



Epoch [426/700]:  75%|███████▍  | 584/782 [00:02<00:00, 209.16it/s]



Epoch [426/700]:  80%|████████  | 628/782 [00:03<00:00, 209.66it/s]



Epoch [426/700]:  86%|████████▌ | 670/782 [00:03<00:00, 208.15it/s]



Epoch [426/700]:  91%|█████████ | 712/782 [00:03<00:00, 202.10it/s]



Epoch [426/700]:  97%|█████████▋| 756/782 [00:03<00:00, 208.59it/s]



Epoch [426/700]: 100%|██████████| 782/782 [00:03<00:00, 208.62it/s]


Learning Rate: 0.004500
Train Loss: 0.0645, Accuracy: 97.77%, Confidence: 0.9747
Test Loss: 2.0987, Accuracy: 71.76%, Confidence: 0.9358
Train-Test Accuracy Gap: 26.01%


Epoch [427/700]:   3%|▎         | 22/782 [00:00<00:03, 211.92it/s]



Epoch [427/700]:   6%|▌         | 44/782 [00:00<00:03, 212.48it/s]



Epoch [427/700]:   8%|▊         | 66/782 [00:00<00:03, 213.08it/s]



Epoch [427/700]:  17%|█▋        | 132/782 [00:00<00:03, 211.33it/s]



Epoch [427/700]:  22%|██▏       | 175/782 [00:00<00:02, 208.66it/s]



Epoch [427/700]:  28%|██▊       | 219/782 [00:01<00:02, 212.66it/s]



Epoch [427/700]:  34%|███▎      | 263/782 [00:01<00:02, 212.75it/s]



Epoch [427/700]:  42%|████▏     | 329/782 [00:01<00:02, 207.30it/s]



Epoch [427/700]:  47%|████▋     | 371/782 [00:01<00:02, 202.72it/s]



Epoch [427/700]:  56%|█████▌    | 437/782 [00:02<00:01, 210.52it/s]



Epoch [427/700]:  62%|██████▏   | 481/782 [00:02<00:01, 212.79it/s]



Epoch [427/700]:  67%|██████▋   | 525/782 [00:02<00:01, 212.34it/s]



Epoch [427/700]:  73%|███████▎  | 569/782 [00:02<00:01, 208.25it/s]



Epoch [427/700]:  78%|███████▊  | 612/782 [00:02<00:00, 210.71it/s]



Epoch [427/700]:  81%|████████  | 634/782 [00:03<00:00, 207.62it/s]



Epoch [427/700]:  87%|████████▋ | 677/782 [00:03<00:00, 204.94it/s]



Epoch [427/700]:  92%|█████████▏| 719/782 [00:03<00:00, 203.89it/s]



Epoch [427/700]:  97%|█████████▋| 762/782 [00:03<00:00, 203.64it/s]



Epoch [427/700]: 100%|██████████| 782/782 [00:03<00:00, 208.29it/s]


Learning Rate: 0.004500
Train Loss: 0.0626, Accuracy: 97.89%, Confidence: 0.9746
Test Loss: 1.9698, Accuracy: 72.94%, Confidence: 0.9386
Train-Test Accuracy Gap: 24.95%


Epoch [428/700]:   2%|▏         | 19/782 [00:00<00:04, 189.60it/s]



Epoch [428/700]:   5%|▌         | 40/782 [00:00<00:03, 199.51it/s]



Epoch [428/700]:  11%|█         | 83/782 [00:00<00:03, 207.38it/s]



Epoch [428/700]:  16%|█▌        | 126/782 [00:00<00:03, 209.72it/s]



Epoch [428/700]:  22%|██▏       | 170/782 [00:00<00:02, 211.62it/s]



Epoch [428/700]:  27%|██▋       | 213/782 [00:01<00:02, 205.20it/s]



Epoch [428/700]:  33%|███▎      | 256/782 [00:01<00:02, 207.80it/s]



Epoch [428/700]:  38%|███▊      | 298/782 [00:01<00:02, 207.95it/s]



Epoch [428/700]:  44%|████▎     | 342/782 [00:01<00:02, 211.03it/s]



Epoch [428/700]:  52%|█████▏    | 407/782 [00:01<00:01, 210.05it/s]



Epoch [428/700]:  58%|█████▊    | 450/782 [00:02<00:01, 205.07it/s]



Epoch [428/700]:  63%|██████▎   | 492/782 [00:02<00:01, 201.48it/s]



Epoch [428/700]:  68%|██████▊   | 535/782 [00:02<00:01, 205.35it/s]



Epoch [428/700]:  74%|███████▍  | 578/782 [00:02<00:00, 206.44it/s]



Epoch [428/700]:  80%|███████▉  | 622/782 [00:03<00:00, 209.81it/s]



Epoch [428/700]:  85%|████████▌ | 665/782 [00:03<00:00, 209.68it/s]



Epoch [428/700]:  91%|█████████ | 708/782 [00:03<00:00, 209.85it/s]



Epoch [428/700]:  96%|█████████▌| 750/782 [00:03<00:00, 206.06it/s]



Epoch [428/700]: 100%|██████████| 782/782 [00:03<00:00, 206.44it/s]


Learning Rate: 0.004500
Train Loss: 0.0617, Accuracy: 97.88%, Confidence: 0.9747
Test Loss: 2.1846, Accuracy: 70.75%, Confidence: 0.9359
Train-Test Accuracy Gap: 27.13%


Epoch [429/700]:   3%|▎         | 21/782 [00:00<00:03, 209.66it/s]



Epoch [429/700]:   5%|▌         | 42/782 [00:00<00:03, 205.35it/s]



Epoch [429/700]:   8%|▊         | 63/782 [00:00<00:03, 202.83it/s]



Epoch [429/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.40it/s]



Epoch [429/700]:  22%|██▏       | 170/782 [00:00<00:02, 209.51it/s]



Epoch [429/700]:  27%|██▋       | 213/782 [00:01<00:02, 209.77it/s]



Epoch [429/700]:  33%|███▎      | 256/782 [00:01<00:02, 210.53it/s]



Epoch [429/700]:  38%|███▊      | 300/782 [00:01<00:02, 209.14it/s]



Epoch [429/700]:  44%|████▍     | 343/782 [00:01<00:02, 206.79it/s]



Epoch [429/700]:  49%|████▉     | 387/782 [00:01<00:01, 210.59it/s]



Epoch [429/700]:  55%|█████▌    | 431/782 [00:02<00:01, 214.53it/s]



Epoch [429/700]:  61%|██████    | 475/782 [00:02<00:01, 213.76it/s]



Epoch [429/700]:  66%|██████▋   | 519/782 [00:02<00:01, 210.89it/s]



Epoch [429/700]:  75%|███████▍  | 585/782 [00:02<00:00, 211.91it/s]



Epoch [429/700]:  80%|████████  | 629/782 [00:03<00:00, 209.82it/s]



Epoch [429/700]:  86%|████████▌ | 672/782 [00:03<00:00, 207.78it/s]



Epoch [429/700]:  91%|█████████▏| 714/782 [00:03<00:00, 205.74it/s]



Epoch [429/700]:  97%|█████████▋| 756/782 [00:03<00:00, 200.11it/s]



Epoch [429/700]: 100%|██████████| 782/782 [00:03<00:00, 207.76it/s]


Learning Rate: 0.004500
Train Loss: 0.0692, Accuracy: 97.69%, Confidence: 0.9743
Test Loss: 2.2874, Accuracy: 70.73%, Confidence: 0.9388
Train-Test Accuracy Gap: 26.96%


Epoch [430/700]:   2%|▏         | 18/782 [00:00<00:04, 178.66it/s]



Epoch [430/700]:   5%|▍         | 39/782 [00:00<00:03, 192.11it/s]



Epoch [430/700]:   8%|▊         | 59/782 [00:00<00:03, 190.85it/s]



Epoch [430/700]:  16%|█▌        | 122/782 [00:00<00:03, 200.67it/s]



Epoch [430/700]:  21%|██        | 164/782 [00:00<00:03, 203.87it/s]



Epoch [430/700]:  26%|██▋       | 207/782 [00:01<00:02, 205.28it/s]



Epoch [430/700]:  32%|███▏      | 249/782 [00:01<00:02, 205.79it/s]



Epoch [430/700]:  37%|███▋      | 292/782 [00:01<00:02, 208.11it/s]



Epoch [430/700]:  43%|████▎     | 335/782 [00:01<00:02, 208.21it/s]



Epoch [430/700]:  48%|████▊     | 379/782 [00:01<00:01, 211.42it/s]



Epoch [430/700]:  54%|█████▍    | 422/782 [00:02<00:01, 207.08it/s]



Epoch [430/700]:  59%|█████▉    | 465/782 [00:02<00:01, 208.87it/s]



Epoch [430/700]:  65%|██████▍   | 508/782 [00:02<00:01, 210.07it/s]



Epoch [430/700]:  71%|███████   | 552/782 [00:02<00:01, 210.81it/s]



Epoch [430/700]:  76%|███████▌  | 595/782 [00:02<00:00, 207.29it/s]



Epoch [430/700]:  82%|████████▏ | 638/782 [00:03<00:00, 208.74it/s]



Epoch [430/700]:  87%|████████▋ | 680/782 [00:03<00:00, 205.22it/s]



Epoch [430/700]:  92%|█████████▏| 723/782 [00:03<00:00, 206.55it/s]



Epoch [430/700]: 100%|██████████| 782/782 [00:03<00:00, 206.19it/s]


Learning Rate: 0.004500
Train Loss: 0.0626, Accuracy: 97.83%, Confidence: 0.9742
Test Loss: 2.0517, Accuracy: 72.49%, Confidence: 0.9384
Train-Test Accuracy Gap: 25.34%


Epoch [431/700]:   3%|▎         | 21/782 [00:00<00:03, 205.41it/s]



Epoch [431/700]:   5%|▌         | 42/782 [00:00<00:03, 207.31it/s]



Epoch [431/700]:   8%|▊         | 63/782 [00:00<00:03, 202.00it/s]



Epoch [431/700]:  16%|█▋        | 129/782 [00:00<00:03, 209.49it/s]



Epoch [431/700]:  22%|██▏       | 171/782 [00:00<00:02, 208.22it/s]



Epoch [431/700]:  27%|██▋       | 214/782 [00:01<00:02, 208.86it/s]



Epoch [431/700]:  33%|███▎      | 256/782 [00:01<00:02, 205.01it/s]



Epoch [431/700]:  41%|████      | 320/782 [00:01<00:02, 208.87it/s]



Epoch [431/700]:  46%|████▋     | 362/782 [00:01<00:02, 208.87it/s]



Epoch [431/700]:  52%|█████▏    | 405/782 [00:01<00:01, 209.69it/s]



Epoch [431/700]:  57%|█████▋    | 447/782 [00:02<00:01, 206.65it/s]



Epoch [431/700]:  63%|██████▎   | 489/782 [00:02<00:01, 208.27it/s]



Epoch [431/700]:  68%|██████▊   | 531/782 [00:02<00:01, 208.37it/s]



Epoch [431/700]:  73%|███████▎  | 574/782 [00:02<00:00, 208.21it/s]



Epoch [431/700]:  79%|███████▉  | 618/782 [00:02<00:00, 211.37it/s]



Epoch [431/700]:  87%|████████▋ | 684/782 [00:03<00:00, 211.60it/s]



Epoch [431/700]:  93%|█████████▎| 727/782 [00:03<00:00, 207.52it/s]



Epoch [431/700]:  98%|█████████▊| 769/782 [00:03<00:00, 201.13it/s]



Epoch [431/700]: 100%|██████████| 782/782 [00:03<00:00, 207.18it/s]


Learning Rate: 0.004500
Train Loss: 0.0603, Accuracy: 97.96%, Confidence: 0.9746
Test Loss: 2.0476, Accuracy: 72.48%, Confidence: 0.9372
Train-Test Accuracy Gap: 25.48%


Epoch [432/700]:   3%|▎         | 22/782 [00:00<00:03, 212.28it/s]



Epoch [432/700]:   6%|▌         | 44/782 [00:00<00:03, 215.15it/s]



Epoch [432/700]:   8%|▊         | 66/782 [00:00<00:03, 214.53it/s]



Epoch [432/700]:  11%|█▏        | 88/782 [00:00<00:03, 212.72it/s]



Epoch [432/700]:  14%|█▍        | 110/782 [00:00<00:03, 208.01it/s]



Epoch [432/700]:  17%|█▋        | 131/782 [00:00<00:03, 206.32it/s]



Epoch [432/700]:  19%|█▉        | 152/782 [00:00<00:03, 206.29it/s]



Epoch [432/700]:  22%|██▏       | 173/782 [00:00<00:02, 206.14it/s]



Epoch [432/700]:  25%|██▍       | 195/782 [00:00<00:02, 209.08it/s]



Epoch [432/700]:  28%|██▊       | 217/782 [00:01<00:02, 210.31it/s]



Epoch [432/700]:  31%|███       | 239/782 [00:01<00:02, 209.63it/s]



Epoch [432/700]:  33%|███▎      | 260/782 [00:01<00:02, 209.56it/s]



Epoch [432/700]:  36%|███▌      | 282/782 [00:01<00:02, 209.95it/s]



Epoch [432/700]:  39%|███▊      | 303/782 [00:01<00:02, 209.22it/s]



Epoch [432/700]:  44%|████▍     | 346/782 [00:01<00:02, 210.30it/s]



Epoch [432/700]:  50%|████▉     | 390/782 [00:01<00:01, 210.59it/s]



Epoch [432/700]:  55%|█████▌    | 434/782 [00:02<00:01, 211.50it/s]



Epoch [432/700]:  61%|██████    | 478/782 [00:02<00:01, 210.62it/s]



Epoch [432/700]:  67%|██████▋   | 522/782 [00:02<00:01, 209.16it/s]



Epoch [432/700]:  72%|███████▏  | 565/782 [00:02<00:01, 210.18it/s]



Epoch [432/700]:  78%|███████▊  | 608/782 [00:02<00:00, 206.92it/s]



Epoch [432/700]:  80%|████████  | 629/782 [00:03<00:00, 207.56it/s]



Epoch [432/700]:  83%|████████▎ | 650/782 [00:03<00:00, 202.67it/s]



Epoch [432/700]:  86%|████████▌ | 671/782 [00:03<00:00, 200.85it/s]



Epoch [432/700]:  88%|████████▊ | 692/782 [00:03<00:00, 199.32it/s]



Epoch [432/700]:  91%|█████████ | 712/782 [00:03<00:00, 199.37it/s]



Epoch [432/700]:  94%|█████████▍| 734/782 [00:03<00:00, 204.07it/s]



Epoch [432/700]:  97%|█████████▋| 755/782 [00:03<00:00, 205.69it/s]



Epoch [432/700]: 100%|██████████| 782/782 [00:03<00:00, 207.87it/s]






Learning Rate: 0.004500
Train Loss: 0.0649, Accuracy: 97.74%, Confidence: 0.9745
Test Loss: 2.0653, Accuracy: 72.81%, Confidence: 0.9381
Train-Test Accuracy Gap: 24.93%


Epoch [433/700]:   3%|▎         | 21/782 [00:00<00:03, 203.27it/s]



Epoch [433/700]:  11%|█         | 87/782 [00:00<00:03, 211.65it/s]



Epoch [433/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.81it/s]



Epoch [433/700]:  22%|██▏       | 175/782 [00:00<00:02, 208.94it/s]



Epoch [433/700]:  28%|██▊       | 218/782 [00:01<00:02, 210.38it/s]



Epoch [433/700]:  31%|███       | 240/782 [00:01<00:02, 206.96it/s]



Epoch [433/700]:  36%|███▋      | 284/782 [00:01<00:02, 210.89it/s]



Epoch [433/700]:  42%|████▏     | 328/782 [00:01<00:02, 208.27it/s]



Epoch [433/700]:  48%|████▊     | 372/782 [00:01<00:01, 211.46it/s]



Epoch [433/700]:  53%|█████▎    | 415/782 [00:01<00:01, 206.31it/s]



Epoch [433/700]:  58%|█████▊    | 457/782 [00:02<00:01, 205.31it/s]



Epoch [433/700]:  67%|██████▋   | 522/782 [00:02<00:01, 209.44it/s]



Epoch [433/700]:  72%|███████▏  | 564/782 [00:02<00:01, 206.50it/s]



Epoch [433/700]:  78%|███████▊  | 607/782 [00:02<00:00, 207.43it/s]



Epoch [433/700]:  83%|████████▎ | 649/782 [00:03<00:00, 206.78it/s]



Epoch [433/700]:  88%|████████▊ | 692/782 [00:03<00:00, 207.50it/s]



Epoch [433/700]:  94%|█████████▍| 735/782 [00:03<00:00, 208.48it/s]



Epoch [433/700]: 100%|██████████| 782/782 [00:03<00:00, 207.90it/s]






Learning Rate: 0.004500
Train Loss: 0.0659, Accuracy: 97.75%, Confidence: 0.9745
Test Loss: 1.9626, Accuracy: 72.52%, Confidence: 0.9382
Train-Test Accuracy Gap: 25.23%


Epoch [434/700]:   3%|▎         | 21/782 [00:00<00:03, 204.54it/s]



Epoch [434/700]:   5%|▌         | 42/782 [00:00<00:03, 205.25it/s]



Epoch [434/700]:   8%|▊         | 63/782 [00:00<00:03, 204.45it/s]



Epoch [434/700]:  16%|█▌        | 127/782 [00:00<00:03, 208.23it/s]



Epoch [434/700]:  22%|██▏       | 169/782 [00:00<00:03, 203.69it/s]



Epoch [434/700]:  27%|██▋       | 211/782 [00:01<00:02, 203.16it/s]



Epoch [434/700]:  32%|███▏      | 254/782 [00:01<00:02, 207.05it/s]



Epoch [434/700]:  38%|███▊      | 296/782 [00:01<00:02, 204.12it/s]



Epoch [434/700]:  43%|████▎     | 339/782 [00:01<00:02, 207.99it/s]



Epoch [434/700]:  49%|████▉     | 382/782 [00:01<00:01, 207.14it/s]



Epoch [434/700]:  54%|█████▍    | 424/782 [00:02<00:01, 206.38it/s]



Epoch [434/700]:  63%|██████▎   | 490/782 [00:02<00:01, 209.31it/s]



Epoch [434/700]:  65%|██████▌   | 511/782 [00:02<00:01, 204.76it/s]



Epoch [434/700]:  74%|███████▎  | 576/782 [00:02<00:00, 206.97it/s]



Epoch [434/700]:  79%|███████▉  | 618/782 [00:03<00:00, 201.68it/s]



Epoch [434/700]:  84%|████████▍ | 660/782 [00:03<00:00, 202.13it/s]



Epoch [434/700]:  90%|████████▉ | 701/782 [00:03<00:00, 196.04it/s]



Epoch [434/700]:  95%|█████████▍| 742/782 [00:03<00:00, 198.03it/s]



Epoch [434/700]: 100%|██████████| 782/782 [00:03<00:00, 203.16it/s]






Learning Rate: 0.004500
Train Loss: 0.0612, Accuracy: 97.85%, Confidence: 0.9741
Test Loss: 2.0492, Accuracy: 73.07%, Confidence: 0.9414
Train-Test Accuracy Gap: 24.78%


Epoch [435/700]:   3%|▎         | 20/782 [00:00<00:03, 197.87it/s]



Epoch [435/700]:   5%|▌         | 41/782 [00:00<00:03, 203.98it/s]



Epoch [435/700]:   8%|▊         | 62/782 [00:00<00:03, 203.42it/s]



Epoch [435/700]:  11%|█         | 83/782 [00:00<00:03, 202.08it/s]



Epoch [435/700]:  13%|█▎        | 105/782 [00:00<00:03, 206.88it/s]



Epoch [435/700]:  16%|█▌        | 126/782 [00:00<00:03, 206.77it/s]



Epoch [435/700]:  19%|█▉        | 147/782 [00:00<00:03, 206.11it/s]



Epoch [435/700]:  21%|██▏       | 168/782 [00:00<00:02, 206.00it/s]



Epoch [435/700]:  24%|██▍       | 189/782 [00:00<00:02, 205.07it/s]



Epoch [435/700]:  27%|██▋       | 210/782 [00:01<00:02, 205.84it/s]



Epoch [435/700]:  30%|██▉       | 231/782 [00:01<00:02, 204.84it/s]



Epoch [435/700]:  32%|███▏      | 252/782 [00:01<00:02, 202.28it/s]



Epoch [435/700]:  35%|███▍      | 273/782 [00:01<00:02, 203.66it/s]



Epoch [435/700]:  38%|███▊      | 294/782 [00:01<00:02, 199.17it/s]



Epoch [435/700]:  43%|████▎     | 338/782 [00:01<00:02, 207.28it/s]



Epoch [435/700]:  49%|████▊     | 381/782 [00:01<00:01, 206.39it/s]



Epoch [435/700]:  54%|█████▍    | 424/782 [00:02<00:01, 208.21it/s]



Epoch [435/700]:  60%|█████▉    | 466/782 [00:02<00:01, 206.74it/s]



Epoch [435/700]:  65%|██████▍   | 508/782 [00:02<00:01, 205.45it/s]



Epoch [435/700]:  70%|███████   | 551/782 [00:02<00:01, 203.36it/s]



Epoch [435/700]:  76%|███████▌  | 594/782 [00:02<00:00, 205.99it/s]



Epoch [435/700]:  81%|████████▏ | 637/782 [00:03<00:00, 208.11it/s]



Epoch [435/700]:  87%|████████▋ | 679/782 [00:03<00:00, 206.44it/s]



Epoch [435/700]:  92%|█████████▏| 721/782 [00:03<00:00, 201.49it/s]



Epoch [435/700]:  95%|█████████▍| 742/782 [00:03<00:00, 201.99it/s]



Epoch [435/700]:  98%|█████████▊| 763/782 [00:03<00:00, 203.94it/s]



Epoch [435/700]: 100%|██████████| 782/782 [00:03<00:00, 204.32it/s]






Learning Rate: 0.004500
Train Loss: 0.0649, Accuracy: 97.75%, Confidence: 0.9746
Test Loss: 1.9712, Accuracy: 72.32%, Confidence: 0.9350
Train-Test Accuracy Gap: 25.43%


Epoch [436/700]:   2%|▏         | 19/782 [00:00<00:04, 185.26it/s]



Epoch [436/700]:   5%|▌         | 40/782 [00:00<00:03, 196.85it/s]



Epoch [436/700]:   8%|▊         | 61/782 [00:00<00:03, 199.66it/s]



Epoch [436/700]:  10%|█         | 82/782 [00:00<00:03, 202.75it/s]



Epoch [436/700]:  13%|█▎        | 103/782 [00:00<00:03, 196.95it/s]



Epoch [436/700]:  21%|██▏       | 167/782 [00:00<00:03, 202.66it/s]



Epoch [436/700]:  27%|██▋       | 209/782 [00:01<00:02, 204.73it/s]



Epoch [436/700]:  32%|███▏      | 251/782 [00:01<00:02, 203.40it/s]



Epoch [436/700]:  38%|███▊      | 294/782 [00:01<00:02, 207.81it/s]



Epoch [436/700]:  46%|████▌     | 360/782 [00:01<00:01, 213.39it/s]



Epoch [436/700]:  52%|█████▏    | 404/782 [00:01<00:01, 207.19it/s]



Epoch [436/700]:  57%|█████▋    | 448/782 [00:02<00:01, 211.46it/s]



Epoch [436/700]:  63%|██████▎   | 492/782 [00:02<00:01, 210.45it/s]



Epoch [436/700]:  69%|██████▊   | 536/782 [00:02<00:01, 210.32it/s]



Epoch [436/700]:  74%|███████▍  | 580/782 [00:02<00:00, 211.61it/s]



Epoch [436/700]:  80%|███████▉  | 624/782 [00:03<00:00, 209.48it/s]



Epoch [436/700]:  85%|████████▌ | 667/782 [00:03<00:00, 209.83it/s]



Epoch [436/700]:  91%|█████████ | 710/782 [00:03<00:00, 209.92it/s]



Epoch [436/700]:  96%|█████████▋| 753/782 [00:03<00:00, 210.52it/s]



Epoch [436/700]: 100%|██████████| 782/782 [00:03<00:00, 207.36it/s]


Learning Rate: 0.004500
Train Loss: 0.0601, Accuracy: 97.96%, Confidence: 0.9752
Test Loss: 2.1773, Accuracy: 71.44%, Confidence: 0.9402
Train-Test Accuracy Gap: 26.52%


Epoch [437/700]:   3%|▎         | 21/782 [00:00<00:03, 207.83it/s]



Epoch [437/700]:   5%|▌         | 42/782 [00:00<00:03, 207.97it/s]



Epoch [437/700]:   8%|▊         | 63/782 [00:00<00:03, 204.21it/s]



Epoch [437/700]:  16%|█▋        | 128/782 [00:00<00:03, 210.36it/s]



Epoch [437/700]:  22%|██▏       | 172/782 [00:00<00:02, 209.38it/s]



Epoch [437/700]:  27%|██▋       | 214/782 [00:01<00:02, 207.32it/s]



Epoch [437/700]:  36%|███▌      | 278/782 [00:01<00:02, 208.94it/s]



Epoch [437/700]:  41%|████      | 320/782 [00:01<00:02, 208.88it/s]



Epoch [437/700]:  46%|████▋     | 362/782 [00:01<00:02, 204.55it/s]



Epoch [437/700]:  52%|█████▏    | 405/782 [00:01<00:01, 207.08it/s]



Epoch [437/700]:  57%|█████▋    | 447/782 [00:02<00:01, 202.73it/s]



Epoch [437/700]:  63%|██████▎   | 490/782 [00:02<00:01, 206.90it/s]



Epoch [437/700]:  68%|██████▊   | 534/782 [00:02<00:01, 210.91it/s]



Epoch [437/700]:  74%|███████▍  | 578/782 [00:02<00:00, 211.44it/s]



Epoch [437/700]:  80%|███████▉  | 622/782 [00:02<00:00, 209.45it/s]



Epoch [437/700]:  88%|████████▊ | 686/782 [00:03<00:00, 207.69it/s]



Epoch [437/700]:  93%|█████████▎| 728/782 [00:03<00:00, 205.53it/s]



Epoch [437/700]:  98%|█████████▊| 770/782 [00:03<00:00, 203.91it/s]



Epoch [437/700]: 100%|██████████| 782/782 [00:03<00:00, 207.12it/s]


Learning Rate: 0.004500
Train Loss: 0.0597, Accuracy: 97.98%, Confidence: 0.9756
Test Loss: 2.2243, Accuracy: 71.09%, Confidence: 0.9409
Train-Test Accuracy Gap: 26.89%


Epoch [438/700]:   3%|▎         | 22/782 [00:00<00:03, 212.04it/s]



Epoch [438/700]:   6%|▌         | 44/782 [00:00<00:03, 211.95it/s]



Epoch [438/700]:   8%|▊         | 66/782 [00:00<00:03, 211.41it/s]



Epoch [438/700]:  14%|█▍        | 110/782 [00:00<00:03, 211.86it/s]



Epoch [438/700]:  20%|█▉        | 154/782 [00:00<00:02, 211.26it/s]



Epoch [438/700]:  28%|██▊       | 220/782 [00:01<00:02, 212.03it/s]



Epoch [438/700]:  34%|███▎      | 263/782 [00:01<00:02, 206.46it/s]



Epoch [438/700]:  39%|███▉      | 306/782 [00:01<00:02, 208.55it/s]



Epoch [438/700]:  45%|████▍     | 349/782 [00:01<00:02, 211.24it/s]



Epoch [438/700]:  50%|█████     | 393/782 [00:01<00:01, 214.03it/s]



Epoch [438/700]:  59%|█████▊    | 459/782 [00:02<00:01, 214.14it/s]



Epoch [438/700]:  64%|██████▍   | 502/782 [00:02<00:01, 208.22it/s]



Epoch [438/700]:  70%|██████▉   | 544/782 [00:02<00:01, 207.38it/s]



Epoch [438/700]:  75%|███████▌  | 587/782 [00:02<00:00, 207.73it/s]



Epoch [438/700]:  81%|████████  | 631/782 [00:03<00:00, 211.44it/s]



Epoch [438/700]:  89%|████████▉ | 697/782 [00:03<00:00, 213.17it/s]



Epoch [438/700]:  95%|█████████▍| 741/782 [00:03<00:00, 210.88it/s]



Epoch [438/700]: 100%|██████████| 782/782 [00:03<00:00, 210.20it/s]


Learning Rate: 0.004500
Train Loss: 0.0609, Accuracy: 97.84%, Confidence: 0.9753
Test Loss: 2.1310, Accuracy: 72.11%, Confidence: 0.9388
Train-Test Accuracy Gap: 25.73%


Epoch [439/700]:   3%|▎         | 21/782 [00:00<00:03, 206.31it/s]



Epoch [439/700]:   5%|▌         | 43/782 [00:00<00:03, 209.58it/s]



Epoch [439/700]:   8%|▊         | 65/782 [00:00<00:03, 210.09it/s]



Epoch [439/700]:  14%|█▍        | 108/782 [00:00<00:03, 204.05it/s]



Epoch [439/700]:  22%|██▏       | 171/782 [00:00<00:02, 204.22it/s]



Epoch [439/700]:  27%|██▋       | 215/782 [00:01<00:02, 208.92it/s]



Epoch [439/700]:  33%|███▎      | 257/782 [00:01<00:02, 208.42it/s]



Epoch [439/700]:  38%|███▊      | 301/782 [00:01<00:02, 210.68it/s]



Epoch [439/700]:  44%|████▍     | 345/782 [00:01<00:02, 207.72it/s]



Epoch [439/700]:  50%|████▉     | 389/782 [00:01<00:01, 209.65it/s]



Epoch [439/700]:  55%|█████▌    | 433/782 [00:02<00:01, 213.44it/s]



Epoch [439/700]:  61%|██████    | 477/782 [00:02<00:01, 214.08it/s]



Epoch [439/700]:  69%|██████▉   | 543/782 [00:02<00:01, 215.12it/s]



Epoch [439/700]:  75%|███████▌  | 587/782 [00:02<00:00, 213.67it/s]



Epoch [439/700]:  81%|████████  | 631/782 [00:03<00:00, 212.80it/s]



Epoch [439/700]:  86%|████████▋ | 675/782 [00:03<00:00, 212.82it/s]



Epoch [439/700]:  92%|█████████▏| 719/782 [00:03<00:00, 213.59it/s]



Epoch [439/700]:  98%|█████████▊| 763/782 [00:03<00:00, 208.37it/s]



Epoch [439/700]: 100%|██████████| 782/782 [00:03<00:00, 209.98it/s]


Learning Rate: 0.004500
Train Loss: 0.0613, Accuracy: 97.87%, Confidence: 0.9749
Test Loss: 2.1286, Accuracy: 71.62%, Confidence: 0.9382
Train-Test Accuracy Gap: 26.25%


Epoch [440/700]:   3%|▎         | 22/782 [00:00<00:03, 212.11it/s]



Epoch [440/700]:   6%|▌         | 44/782 [00:00<00:03, 210.91it/s]



Epoch [440/700]:   8%|▊         | 66/782 [00:00<00:03, 208.59it/s]



Epoch [440/700]:  11%|█▏        | 88/782 [00:00<00:03, 212.10it/s]



Epoch [440/700]:  14%|█▍        | 110/782 [00:00<00:03, 207.75it/s]



Epoch [440/700]:  17%|█▋        | 131/782 [00:00<00:03, 205.98it/s]



Epoch [440/700]:  19%|█▉        | 152/782 [00:00<00:03, 206.99it/s]



Epoch [440/700]:  22%|██▏       | 174/782 [00:00<00:02, 209.54it/s]



Epoch [440/700]:  28%|██▊       | 218/782 [00:01<00:02, 210.79it/s]



Epoch [440/700]:  34%|███▎      | 262/782 [00:01<00:02, 205.73it/s]



Epoch [440/700]:  36%|███▌      | 283/782 [00:01<00:02, 206.90it/s]



Epoch [440/700]:  39%|███▉      | 304/782 [00:01<00:02, 205.88it/s]



Epoch [440/700]:  42%|████▏     | 325/782 [00:01<00:02, 204.70it/s]



Epoch [440/700]:  44%|████▍     | 346/782 [00:01<00:02, 201.55it/s]



Epoch [440/700]:  47%|████▋     | 367/782 [00:01<00:02, 199.45it/s]



Epoch [440/700]:  50%|████▉     | 389/782 [00:01<00:01, 202.99it/s]



Epoch [440/700]:  52%|█████▏    | 410/782 [00:01<00:01, 203.29it/s]



Epoch [440/700]:  55%|█████▌    | 431/782 [00:02<00:01, 204.25it/s]



Epoch [440/700]:  58%|█████▊    | 453/782 [00:02<00:01, 206.76it/s]



Epoch [440/700]:  61%|██████    | 474/782 [00:02<00:01, 204.17it/s]



Epoch [440/700]:  63%|██████▎   | 495/782 [00:02<00:01, 204.95it/s]



Epoch [440/700]:  69%|██████▊   | 537/782 [00:02<00:01, 206.12it/s]



Epoch [440/700]:  74%|███████▍  | 579/782 [00:02<00:01, 200.77it/s]



Epoch [440/700]:  80%|███████▉  | 623/782 [00:03<00:00, 205.82it/s]



Epoch [440/700]:  85%|████████▌ | 665/782 [00:03<00:00, 199.77it/s]



Epoch [440/700]:  90%|█████████ | 707/782 [00:03<00:00, 199.94it/s]



Epoch [440/700]:  93%|█████████▎| 728/782 [00:03<00:00, 201.92it/s]



Epoch [440/700]:  96%|█████████▌| 749/782 [00:03<00:00, 202.49it/s]



Epoch [440/700]:  98%|█████████▊| 770/782 [00:03<00:00, 204.21it/s]



Epoch [440/700]: 100%|██████████| 782/782 [00:03<00:00, 204.90it/s]


Learning Rate: 0.004500
Train Loss: 0.0641, Accuracy: 97.86%, Confidence: 0.9753
Test Loss: 2.2190, Accuracy: 71.88%, Confidence: 0.9392
Train-Test Accuracy Gap: 25.98%


Epoch [441/700]:   3%|▎         | 21/782 [00:00<00:03, 201.22it/s]



Epoch [441/700]:   5%|▌         | 42/782 [00:00<00:03, 203.40it/s]



Epoch [441/700]:   8%|▊         | 63/782 [00:00<00:03, 206.19it/s]



Epoch [441/700]:  11%|█         | 84/782 [00:00<00:03, 203.99it/s]



Epoch [441/700]:  14%|█▎        | 106/782 [00:00<00:03, 207.03it/s]



Epoch [441/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.05it/s]



Epoch [441/700]:  19%|█▉        | 148/782 [00:00<00:03, 205.63it/s]



Epoch [441/700]:  27%|██▋       | 214/782 [00:01<00:02, 212.13it/s]



Epoch [441/700]:  33%|███▎      | 258/782 [00:01<00:02, 211.22it/s]



Epoch [441/700]:  39%|███▊      | 302/782 [00:01<00:02, 210.73it/s]



Epoch [441/700]:  44%|████▍     | 345/782 [00:01<00:02, 204.27it/s]



Epoch [441/700]:  49%|████▉     | 387/782 [00:01<00:01, 206.16it/s]



Epoch [441/700]:  55%|█████▌    | 431/782 [00:02<00:01, 208.97it/s]



Epoch [441/700]:  61%|██████    | 474/782 [00:02<00:01, 210.66it/s]



Epoch [441/700]:  66%|██████▌   | 517/782 [00:02<00:01, 205.10it/s]



Epoch [441/700]:  71%|███████▏  | 559/782 [00:02<00:01, 204.42it/s]



Epoch [441/700]:  77%|███████▋  | 603/782 [00:02<00:00, 209.00it/s]



Epoch [441/700]:  82%|████████▏ | 645/782 [00:03<00:00, 209.22it/s]



Epoch [441/700]:  91%|█████████ | 711/782 [00:03<00:00, 212.36it/s]



Epoch [441/700]:  97%|█████████▋| 755/782 [00:03<00:00, 207.18it/s]



Epoch [441/700]: 100%|██████████| 782/782 [00:03<00:00, 207.36it/s]


Learning Rate: 0.004500
Train Loss: 0.0658, Accuracy: 97.79%, Confidence: 0.9745
Test Loss: 2.0094, Accuracy: 73.42%, Confidence: 0.9406
Train-Test Accuracy Gap: 24.37%


Epoch [442/700]:   3%|▎         | 20/782 [00:00<00:03, 194.17it/s]



Epoch [442/700]:   5%|▌         | 42/782 [00:00<00:03, 203.73it/s]



Epoch [442/700]:   8%|▊         | 64/782 [00:00<00:03, 207.23it/s]



Epoch [442/700]:  16%|█▋        | 128/782 [00:00<00:03, 208.90it/s]



Epoch [442/700]:  22%|██▏       | 171/782 [00:00<00:02, 210.71it/s]



Epoch [442/700]:  27%|██▋       | 215/782 [00:01<00:02, 212.21it/s]



Epoch [442/700]:  33%|███▎      | 259/782 [00:01<00:02, 215.02it/s]



Epoch [442/700]:  42%|████▏     | 325/782 [00:01<00:02, 211.88it/s]



Epoch [442/700]:  47%|████▋     | 369/782 [00:01<00:01, 212.42it/s]



Epoch [442/700]:  53%|█████▎    | 413/782 [00:01<00:01, 207.39it/s]



Epoch [442/700]:  61%|██████▏   | 479/782 [00:02<00:01, 211.31it/s]



Epoch [442/700]:  67%|██████▋   | 523/782 [00:02<00:01, 210.84it/s]



Epoch [442/700]:  73%|███████▎  | 567/782 [00:02<00:01, 210.65it/s]



Epoch [442/700]:  78%|███████▊  | 611/782 [00:02<00:00, 211.23it/s]



Epoch [442/700]:  84%|████████▍ | 655/782 [00:03<00:00, 212.62it/s]



Epoch [442/700]:  89%|████████▉ | 699/782 [00:03<00:00, 212.39it/s]



Epoch [442/700]:  95%|█████████▌| 743/782 [00:03<00:00, 211.98it/s]



Epoch [442/700]: 100%|██████████| 782/782 [00:03<00:00, 210.16it/s]


Learning Rate: 0.004500
Train Loss: 0.0603, Accuracy: 97.89%, Confidence: 0.9753
Test Loss: 2.2673, Accuracy: 70.82%, Confidence: 0.9397
Train-Test Accuracy Gap: 27.07%


Epoch [443/700]:   3%|▎         | 21/782 [00:00<00:03, 201.45it/s]



Epoch [443/700]:  11%|█         | 87/782 [00:00<00:03, 211.40it/s]



Epoch [443/700]:  17%|█▋        | 131/782 [00:00<00:03, 212.91it/s]



Epoch [443/700]:  22%|██▏       | 174/782 [00:00<00:02, 206.40it/s]



Epoch [443/700]:  28%|██▊       | 217/782 [00:01<00:02, 207.42it/s]



Epoch [443/700]:  36%|███▌      | 280/782 [00:01<00:02, 205.31it/s]



Epoch [443/700]:  44%|████▍     | 343/782 [00:01<00:02, 205.25it/s]



Epoch [443/700]:  52%|█████▏    | 408/782 [00:01<00:01, 209.64it/s]



Epoch [443/700]:  58%|█████▊    | 450/782 [00:02<00:01, 203.25it/s]



Epoch [443/700]:  63%|██████▎   | 492/782 [00:02<00:01, 200.58it/s]



Epoch [443/700]:  68%|██████▊   | 534/782 [00:02<00:01, 204.68it/s]



Epoch [443/700]:  74%|███████▎  | 576/782 [00:02<00:00, 206.31it/s]



Epoch [443/700]:  82%|████████▏ | 641/782 [00:03<00:00, 210.86it/s]



Epoch [443/700]:  87%|████████▋ | 684/782 [00:03<00:00, 203.60it/s]



Epoch [443/700]:  93%|█████████▎| 726/782 [00:03<00:00, 204.48it/s]



Epoch [443/700]:  98%|█████████▊| 768/782 [00:03<00:00, 200.59it/s]



Epoch [443/700]: 100%|██████████| 782/782 [00:03<00:00, 205.29it/s]


Learning Rate: 0.004500
Train Loss: 0.0628, Accuracy: 97.84%, Confidence: 0.9756
Test Loss: 2.1006, Accuracy: 72.44%, Confidence: 0.9367
Train-Test Accuracy Gap: 25.40%


Epoch [444/700]:   3%|▎         | 22/782 [00:00<00:03, 212.33it/s]



Epoch [444/700]:  11%|█▏        | 89/782 [00:00<00:03, 219.42it/s]



Epoch [444/700]:  17%|█▋        | 133/782 [00:00<00:03, 212.68it/s]



Epoch [444/700]:  23%|██▎       | 177/782 [00:00<00:02, 212.24it/s]



Epoch [444/700]:  28%|██▊       | 221/782 [00:01<00:02, 214.48it/s]



Epoch [444/700]:  34%|███▍      | 265/782 [00:01<00:02, 212.90it/s]



Epoch [444/700]:  39%|███▉      | 308/782 [00:01<00:02, 207.02it/s]



Epoch [444/700]:  45%|████▌     | 352/782 [00:01<00:02, 211.69it/s]



Epoch [444/700]:  51%|█████     | 396/782 [00:01<00:01, 212.18it/s]



Epoch [444/700]:  56%|█████▋    | 440/782 [00:02<00:01, 212.65it/s]



Epoch [444/700]:  62%|██████▏   | 484/782 [00:02<00:01, 211.87it/s]



Epoch [444/700]:  68%|██████▊   | 528/782 [00:02<00:01, 210.65it/s]



Epoch [444/700]:  73%|███████▎  | 572/782 [00:02<00:00, 212.78it/s]



Epoch [444/700]:  79%|███████▉  | 616/782 [00:02<00:00, 210.11it/s]



Epoch [444/700]:  84%|████████▍ | 659/782 [00:03<00:00, 209.19it/s]



Epoch [444/700]:  90%|████████▉ | 703/782 [00:03<00:00, 208.42it/s]



Epoch [444/700]:  95%|█████████▌| 746/782 [00:03<00:00, 209.38it/s]



Epoch [444/700]: 100%|██████████| 782/782 [00:03<00:00, 210.92it/s]


Learning Rate: 0.004500
Train Loss: 0.0579, Accuracy: 98.01%, Confidence: 0.9759
Test Loss: 2.1177, Accuracy: 71.69%, Confidence: 0.9351
Train-Test Accuracy Gap: 26.32%


Epoch [445/700]:   3%|▎         | 21/782 [00:00<00:03, 206.83it/s]



Epoch [445/700]:   5%|▌         | 43/782 [00:00<00:03, 209.25it/s]



Epoch [445/700]:  11%|█         | 86/782 [00:00<00:03, 211.37it/s]



Epoch [445/700]:  17%|█▋        | 130/782 [00:00<00:03, 209.93it/s]



Epoch [445/700]:  22%|██▏       | 174/782 [00:00<00:02, 210.11it/s]



Epoch [445/700]:  28%|██▊       | 218/782 [00:01<00:02, 205.57it/s]



Epoch [445/700]:  33%|███▎      | 261/782 [00:01<00:02, 207.98it/s]



Epoch [445/700]:  39%|███▊      | 303/782 [00:01<00:02, 206.23it/s]



Epoch [445/700]:  44%|████▍     | 345/782 [00:01<00:02, 207.29it/s]



Epoch [445/700]:  49%|████▉     | 387/782 [00:01<00:01, 204.04it/s]



Epoch [445/700]:  55%|█████▍    | 430/782 [00:02<00:01, 206.92it/s]



Epoch [445/700]:  60%|██████    | 473/782 [00:02<00:01, 206.42it/s]



Epoch [445/700]:  66%|██████▌   | 515/782 [00:02<00:01, 203.22it/s]



Epoch [445/700]:  71%|███████▏  | 558/782 [00:02<00:01, 208.72it/s]



Epoch [445/700]:  77%|███████▋  | 600/782 [00:02<00:00, 204.01it/s]



Epoch [445/700]:  82%|████████▏ | 642/782 [00:03<00:00, 205.45it/s]



Epoch [445/700]:  87%|████████▋ | 684/782 [00:03<00:00, 205.53it/s]



Epoch [445/700]:  93%|█████████▎| 726/782 [00:03<00:00, 205.82it/s]



Epoch [445/700]: 100%|██████████| 782/782 [00:03<00:00, 206.34it/s]






Learning Rate: 0.004500
Train Loss: 0.0700, Accuracy: 97.57%, Confidence: 0.9744
Test Loss: 1.9775, Accuracy: 73.38%, Confidence: 0.9398
Train-Test Accuracy Gap: 24.19%


Epoch [446/700]:   3%|▎         | 21/782 [00:00<00:03, 204.75it/s]



Epoch [446/700]:   5%|▌         | 42/782 [00:00<00:03, 206.60it/s]



Epoch [446/700]:  11%|█         | 84/782 [00:00<00:03, 207.52it/s]



Epoch [446/700]:  16%|█▌        | 126/782 [00:00<00:03, 207.47it/s]



Epoch [446/700]:  21%|██▏       | 168/782 [00:00<00:02, 208.57it/s]



Epoch [446/700]:  27%|██▋       | 210/782 [00:01<00:02, 204.25it/s]



Epoch [446/700]:  32%|███▏      | 254/782 [00:01<00:02, 208.57it/s]



Epoch [446/700]:  38%|███▊      | 297/782 [00:01<00:02, 209.81it/s]



Epoch [446/700]:  43%|████▎     | 339/782 [00:01<00:02, 205.28it/s]



Epoch [446/700]:  49%|████▊     | 381/782 [00:01<00:02, 199.89it/s]



Epoch [446/700]:  54%|█████▍    | 423/782 [00:02<00:01, 204.21it/s]



Epoch [446/700]:  59%|█████▉    | 465/782 [00:02<00:01, 205.64it/s]



Epoch [446/700]:  65%|██████▍   | 507/782 [00:02<00:01, 202.78it/s]



Epoch [446/700]:  70%|███████   | 550/782 [00:02<00:01, 202.21it/s]



Epoch [446/700]:  79%|███████▊  | 615/782 [00:02<00:00, 208.14it/s]



Epoch [446/700]:  84%|████████▍ | 657/782 [00:03<00:00, 208.30it/s]



Epoch [446/700]:  89%|████████▉ | 699/782 [00:03<00:00, 204.48it/s]



Epoch [446/700]:  95%|█████████▍| 742/782 [00:03<00:00, 205.95it/s]



Epoch [446/700]: 100%|██████████| 782/782 [00:03<00:00, 205.57it/s]


Learning Rate: 0.004500
Train Loss: 0.0649, Accuracy: 97.78%, Confidence: 0.9748
Test Loss: 2.4074, Accuracy: 69.42%, Confidence: 0.9365
Train-Test Accuracy Gap: 28.36%


Epoch [447/700]:   3%|▎         | 21/782 [00:00<00:03, 204.41it/s]



Epoch [447/700]:   5%|▌         | 43/782 [00:00<00:03, 207.80it/s]



Epoch [447/700]:   8%|▊         | 64/782 [00:00<00:03, 206.67it/s]



Epoch [447/700]:  11%|█         | 86/782 [00:00<00:03, 210.34it/s]



Epoch [447/700]:  14%|█▍        | 108/782 [00:00<00:03, 210.26it/s]



Epoch [447/700]:  17%|█▋        | 130/782 [00:00<00:03, 206.41it/s]



Epoch [447/700]:  19%|█▉        | 151/782 [00:00<00:03, 204.88it/s]



Epoch [447/700]:  22%|██▏       | 172/782 [00:00<00:02, 205.79it/s]



Epoch [447/700]:  28%|██▊       | 216/782 [00:01<00:02, 210.39it/s]



Epoch [447/700]:  33%|███▎      | 260/782 [00:01<00:02, 205.09it/s]



Epoch [447/700]:  39%|███▊      | 303/782 [00:01<00:02, 203.36it/s]



Epoch [447/700]:  44%|████▍     | 346/782 [00:01<00:02, 205.88it/s]



Epoch [447/700]:  47%|████▋     | 367/782 [00:01<00:02, 204.20it/s]



Epoch [447/700]:  50%|████▉     | 389/782 [00:01<00:01, 206.89it/s]



Epoch [447/700]:  52%|█████▏    | 410/782 [00:01<00:01, 206.57it/s]



Epoch [447/700]:  55%|█████▌    | 432/782 [00:02<00:01, 209.89it/s]



Epoch [447/700]:  58%|█████▊    | 454/782 [00:02<00:01, 212.01it/s]



Epoch [447/700]:  61%|██████    | 476/782 [00:02<00:01, 212.79it/s]



Epoch [447/700]:  64%|██████▎   | 498/782 [00:02<00:01, 211.29it/s]



Epoch [447/700]:  66%|██████▋   | 520/782 [00:02<00:01, 212.59it/s]



Epoch [447/700]:  69%|██████▉   | 542/782 [00:02<00:01, 213.04it/s]



Epoch [447/700]:  75%|███████▍  | 586/782 [00:02<00:00, 212.19it/s]



Epoch [447/700]:  81%|████████  | 630/782 [00:03<00:00, 210.03it/s]



Epoch [447/700]:  86%|████████▌ | 674/782 [00:03<00:00, 206.39it/s]



Epoch [447/700]:  92%|█████████▏| 717/782 [00:03<00:00, 207.73it/s]



Epoch [447/700]:  97%|█████████▋| 760/782 [00:03<00:00, 207.61it/s]



Epoch [447/700]: 100%|██████████| 782/782 [00:03<00:00, 207.50it/s]


Learning Rate: 0.004500
Train Loss: 0.0596, Accuracy: 98.01%, Confidence: 0.9761
Test Loss: 1.9579, Accuracy: 73.62%, Confidence: 0.9403
Train-Test Accuracy Gap: 24.39%


Epoch [448/700]:   3%|▎         | 21/782 [00:00<00:03, 205.64it/s]



Epoch [448/700]:   5%|▌         | 43/782 [00:00<00:03, 210.21it/s]



Epoch [448/700]:   8%|▊         | 65/782 [00:00<00:03, 201.32it/s]



Epoch [448/700]:  11%|█         | 86/782 [00:00<00:03, 204.30it/s]



Epoch [448/700]:  14%|█▎        | 107/782 [00:00<00:03, 200.75it/s]



Epoch [448/700]:  22%|██▏       | 172/782 [00:00<00:02, 208.89it/s]



Epoch [448/700]:  27%|██▋       | 215/782 [00:01<00:02, 209.21it/s]



Epoch [448/700]:  33%|███▎      | 258/782 [00:01<00:02, 209.58it/s]



Epoch [448/700]:  41%|████▏     | 324/782 [00:01<00:02, 210.16it/s]



Epoch [448/700]:  47%|████▋     | 368/782 [00:01<00:01, 210.80it/s]



Epoch [448/700]:  53%|█████▎    | 412/782 [00:01<00:01, 210.67it/s]



Epoch [448/700]:  58%|█████▊    | 456/782 [00:02<00:01, 209.88it/s]



Epoch [448/700]:  64%|██████▍   | 500/782 [00:02<00:01, 210.92it/s]



Epoch [448/700]:  72%|███████▏  | 566/782 [00:02<00:01, 214.78it/s]



Epoch [448/700]:  78%|███████▊  | 609/782 [00:02<00:00, 209.32it/s]



Epoch [448/700]:  83%|████████▎ | 652/782 [00:03<00:00, 209.21it/s]



Epoch [448/700]:  89%|████████▉ | 695/782 [00:03<00:00, 208.87it/s]



Epoch [448/700]:  94%|█████████▍| 738/782 [00:03<00:00, 207.26it/s]



Epoch [448/700]: 100%|██████████| 782/782 [00:03<00:00, 208.44it/s]






Learning Rate: 0.004500
Train Loss: 0.0596, Accuracy: 98.03%, Confidence: 0.9761
Test Loss: 2.2468, Accuracy: 71.60%, Confidence: 0.9393
Train-Test Accuracy Gap: 26.43%


Epoch [449/700]:   3%|▎         | 20/782 [00:00<00:03, 193.75it/s]



Epoch [449/700]:  11%|█         | 86/782 [00:00<00:03, 210.06it/s]



Epoch [449/700]:  16%|█▋        | 129/782 [00:00<00:03, 206.21it/s]



Epoch [449/700]:  22%|██▏       | 172/782 [00:00<00:02, 209.76it/s]



Epoch [449/700]:  27%|██▋       | 215/782 [00:01<00:02, 209.17it/s]



Epoch [449/700]:  33%|███▎      | 257/782 [00:01<00:02, 209.02it/s]



Epoch [449/700]:  38%|███▊      | 301/782 [00:01<00:02, 212.96it/s]



Epoch [449/700]:  44%|████▍     | 345/782 [00:01<00:02, 209.67it/s]



Epoch [449/700]:  49%|████▉     | 387/782 [00:01<00:01, 209.48it/s]



Epoch [449/700]:  58%|█████▊    | 451/782 [00:02<00:01, 207.66it/s]



Epoch [449/700]:  63%|██████▎   | 494/782 [00:02<00:01, 208.10it/s]



Epoch [449/700]:  69%|██████▊   | 536/782 [00:02<00:01, 208.27it/s]



Epoch [449/700]:  74%|███████▍  | 579/782 [00:02<00:00, 209.85it/s]



Epoch [449/700]:  80%|███████▉  | 623/782 [00:02<00:00, 211.66it/s]



Epoch [449/700]:  85%|████████▌ | 666/782 [00:03<00:00, 206.80it/s]



Epoch [449/700]:  91%|█████████ | 708/782 [00:03<00:00, 206.71it/s]



Epoch [449/700]:  96%|█████████▌| 750/782 [00:03<00:00, 205.79it/s]



Epoch [449/700]: 100%|██████████| 782/782 [00:03<00:00, 208.09it/s]


Learning Rate: 0.004500
Train Loss: 0.0588, Accuracy: 98.04%, Confidence: 0.9762
Test Loss: 1.9477, Accuracy: 73.05%, Confidence: 0.9378
Train-Test Accuracy Gap: 24.99%


Epoch [450/700]:   3%|▎         | 20/782 [00:00<00:03, 199.94it/s]



Epoch [450/700]:  11%|█         | 85/782 [00:00<00:03, 212.01it/s]



Epoch [450/700]:  16%|█▋        | 129/782 [00:00<00:03, 213.11it/s]



Epoch [450/700]:  22%|██▏       | 173/782 [00:00<00:02, 209.46it/s]



Epoch [450/700]:  30%|███       | 237/782 [00:01<00:02, 210.48it/s]



Epoch [450/700]:  36%|███▌      | 281/782 [00:01<00:02, 209.90it/s]



Epoch [450/700]:  42%|████▏     | 325/782 [00:01<00:02, 211.76it/s]



Epoch [450/700]:  47%|████▋     | 369/782 [00:01<00:01, 211.96it/s]



Epoch [450/700]:  53%|█████▎    | 412/782 [00:01<00:01, 208.19it/s]



Epoch [450/700]:  58%|█████▊    | 454/782 [00:02<00:01, 206.39it/s]



Epoch [450/700]:  64%|██████▎   | 497/782 [00:02<00:01, 207.41it/s]



Epoch [450/700]:  69%|██████▉   | 540/782 [00:02<00:01, 209.43it/s]



Epoch [450/700]:  75%|███████▍  | 583/782 [00:02<00:00, 207.54it/s]



Epoch [450/700]:  80%|████████  | 626/782 [00:02<00:00, 208.66it/s]



Epoch [450/700]:  86%|████████▌ | 669/782 [00:03<00:00, 205.59it/s]



Epoch [450/700]:  91%|█████████ | 712/782 [00:03<00:00, 208.28it/s]



Epoch [450/700]:  97%|█████████▋| 755/782 [00:03<00:00, 211.36it/s]



Epoch [450/700]: 100%|██████████| 782/782 [00:03<00:00, 208.98it/s]


Learning Rate: 0.004500
Train Loss: 0.0650, Accuracy: 97.77%, Confidence: 0.9747
Test Loss: 2.5933, Accuracy: 69.85%, Confidence: 0.9397
Train-Test Accuracy Gap: 27.92%


Epoch [451/700]:   3%|▎         | 21/782 [00:00<00:03, 204.93it/s]



Epoch [451/700]:  11%|█         | 86/782 [00:00<00:03, 209.26it/s]



Epoch [451/700]:  17%|█▋        | 132/782 [00:00<00:03, 216.08it/s]



Epoch [451/700]:  23%|██▎       | 176/782 [00:00<00:02, 215.47it/s]



Epoch [451/700]:  28%|██▊       | 220/782 [00:01<00:02, 215.47it/s]



Epoch [451/700]:  34%|███▍      | 264/782 [00:01<00:02, 212.79it/s]



Epoch [451/700]:  39%|███▉      | 308/782 [00:01<00:02, 212.60it/s]



Epoch [451/700]:  45%|████▌     | 352/782 [00:01<00:02, 208.63it/s]



Epoch [451/700]:  51%|█████     | 396/782 [00:01<00:01, 212.46it/s]



Epoch [451/700]:  56%|█████▋    | 440/782 [00:02<00:01, 211.17it/s]



Epoch [451/700]:  62%|██████▏   | 484/782 [00:02<00:01, 208.64it/s]



Epoch [451/700]:  68%|██████▊   | 528/782 [00:02<00:01, 211.33it/s]



Epoch [451/700]:  73%|███████▎  | 572/782 [00:02<00:01, 208.26it/s]



Epoch [451/700]:  79%|███████▊  | 614/782 [00:02<00:00, 207.94it/s]



Epoch [451/700]:  84%|████████▍ | 656/782 [00:03<00:00, 203.02it/s]



Epoch [451/700]:  87%|████████▋ | 677/782 [00:03<00:00, 201.20it/s]



Epoch [451/700]:  95%|█████████▍| 740/782 [00:03<00:00, 203.25it/s]



Epoch [451/700]: 100%|██████████| 782/782 [00:03<00:00, 208.10it/s]






Learning Rate: 0.004500
Train Loss: 0.0632, Accuracy: 97.87%, Confidence: 0.9756
Test Loss: 2.0099, Accuracy: 73.06%, Confidence: 0.9400
Train-Test Accuracy Gap: 24.81%


Epoch [452/700]:   3%|▎         | 21/782 [00:00<00:03, 200.73it/s]



Epoch [452/700]:   5%|▌         | 42/782 [00:00<00:03, 200.77it/s]



Epoch [452/700]:   8%|▊         | 63/782 [00:00<00:03, 199.43it/s]



Epoch [452/700]:  16%|█▋        | 129/782 [00:00<00:03, 211.29it/s]



Epoch [452/700]:  22%|██▏       | 173/782 [00:00<00:02, 208.46it/s]



Epoch [452/700]:  28%|██▊       | 216/782 [00:01<00:02, 208.51it/s]



Epoch [452/700]:  33%|███▎      | 258/782 [00:01<00:02, 205.83it/s]



Epoch [452/700]:  38%|███▊      | 301/782 [00:01<00:02, 207.36it/s]



Epoch [452/700]:  44%|████▍     | 343/782 [00:01<00:02, 206.49it/s]



Epoch [452/700]:  49%|████▉     | 386/782 [00:01<00:01, 209.13it/s]



Epoch [452/700]:  55%|█████▍    | 429/782 [00:02<00:01, 206.73it/s]



Epoch [452/700]:  60%|██████    | 471/782 [00:02<00:01, 199.62it/s]



Epoch [452/700]:  65%|██████▌   | 512/782 [00:02<00:01, 202.22it/s]



Epoch [452/700]:  71%|███████   | 556/782 [00:02<00:01, 206.96it/s]



Epoch [452/700]:  77%|███████▋  | 599/782 [00:02<00:00, 208.35it/s]



Epoch [452/700]:  82%|████████▏ | 642/782 [00:03<00:00, 207.32it/s]



Epoch [452/700]:  87%|████████▋ | 684/782 [00:03<00:00, 201.26it/s]



Epoch [452/700]:  93%|█████████▎| 726/782 [00:03<00:00, 197.13it/s]



Epoch [452/700]: 100%|██████████| 782/782 [00:03<00:00, 204.73it/s]






Learning Rate: 0.004500
Train Loss: 0.0613, Accuracy: 97.85%, Confidence: 0.9756
Test Loss: 1.9824, Accuracy: 73.18%, Confidence: 0.9402
Train-Test Accuracy Gap: 24.67%


Epoch [453/700]:   3%|▎         | 20/782 [00:00<00:03, 198.16it/s]



Epoch [453/700]:   5%|▌         | 40/782 [00:00<00:03, 199.03it/s]



Epoch [453/700]:  10%|█         | 82/782 [00:00<00:03, 203.98it/s]



Epoch [453/700]:  16%|█▌        | 126/782 [00:00<00:03, 209.07it/s]



Epoch [453/700]:  19%|█▉        | 147/782 [00:00<00:03, 208.88it/s]



Epoch [453/700]:  22%|██▏       | 169/782 [00:00<00:02, 210.63it/s]



Epoch [453/700]:  24%|██▍       | 191/782 [00:00<00:02, 208.43it/s]



Epoch [453/700]:  27%|██▋       | 213/782 [00:01<00:02, 210.03it/s]



Epoch [453/700]:  30%|███       | 235/782 [00:01<00:02, 209.88it/s]



Epoch [453/700]:  33%|███▎      | 256/782 [00:01<00:02, 208.66it/s]



Epoch [453/700]:  36%|███▌      | 278/782 [00:01<00:02, 210.17it/s]



Epoch [453/700]:  38%|███▊      | 300/782 [00:01<00:02, 210.61it/s]



Epoch [453/700]:  41%|████      | 322/782 [00:01<00:02, 203.53it/s]



Epoch [453/700]:  44%|████▍     | 343/782 [00:01<00:02, 200.68it/s]



Epoch [453/700]:  47%|████▋     | 364/782 [00:01<00:02, 202.65it/s]



Epoch [453/700]:  49%|████▉     | 385/782 [00:01<00:01, 204.70it/s]



Epoch [453/700]:  52%|█████▏    | 406/782 [00:01<00:01, 203.90it/s]



Epoch [453/700]:  55%|█████▍    | 427/782 [00:02<00:01, 204.67it/s]



Epoch [453/700]:  57%|█████▋    | 448/782 [00:02<00:01, 205.13it/s]



Epoch [453/700]:  60%|█████▉    | 469/782 [00:02<00:01, 203.88it/s]



Epoch [453/700]:  63%|██████▎   | 491/782 [00:02<00:01, 206.20it/s]



Epoch [453/700]:  68%|██████▊   | 533/782 [00:02<00:01, 204.67it/s]



Epoch [453/700]:  74%|███████▎  | 576/782 [00:02<00:00, 207.82it/s]



Epoch [453/700]:  76%|███████▋  | 597/782 [00:02<00:00, 207.70it/s]



Epoch [453/700]:  79%|███████▉  | 618/782 [00:02<00:00, 206.36it/s]



Epoch [453/700]:  82%|████████▏ | 639/782 [00:03<00:00, 205.58it/s]



Epoch [453/700]:  85%|████████▍ | 661/782 [00:03<00:00, 207.35it/s]



Epoch [453/700]:  87%|████████▋ | 683/782 [00:03<00:00, 208.73it/s]



Epoch [453/700]:  90%|█████████ | 704/782 [00:03<00:00, 201.98it/s]



Epoch [453/700]:  96%|█████████▌| 747/782 [00:03<00:00, 206.10it/s]



Epoch [453/700]:  98%|█████████▊| 768/782 [00:03<00:00, 204.22it/s]



Epoch [453/700]: 100%|██████████| 782/782 [00:03<00:00, 205.69it/s]


Learning Rate: 0.004500
Train Loss: 0.0592, Accuracy: 97.92%, Confidence: 0.9758
Test Loss: 1.9279, Accuracy: 73.81%, Confidence: 0.9406
Train-Test Accuracy Gap: 24.11%


Epoch [454/700]:   3%|▎         | 21/782 [00:00<00:03, 206.69it/s]



Epoch [454/700]:   5%|▌         | 43/782 [00:00<00:03, 212.44it/s]



Epoch [454/700]:  11%|█         | 87/782 [00:00<00:03, 214.86it/s]



Epoch [454/700]:  17%|█▋        | 131/782 [00:00<00:03, 213.91it/s]



Epoch [454/700]:  22%|██▏       | 175/782 [00:00<00:02, 212.47it/s]



Epoch [454/700]:  28%|██▊       | 219/782 [00:01<00:02, 205.96it/s]



Epoch [454/700]:  33%|███▎      | 261/782 [00:01<00:02, 207.10it/s]



Epoch [454/700]:  39%|███▉      | 304/782 [00:01<00:02, 209.58it/s]



Epoch [454/700]:  44%|████▍     | 347/782 [00:01<00:02, 206.78it/s]



Epoch [454/700]:  50%|████▉     | 390/782 [00:01<00:01, 210.00it/s]



Epoch [454/700]:  55%|█████▌    | 434/782 [00:02<00:01, 208.90it/s]



Epoch [454/700]:  58%|█████▊    | 455/782 [00:02<00:01, 209.16it/s]



Epoch [454/700]:  61%|██████    | 476/782 [00:02<00:01, 209.32it/s]



Epoch [454/700]:  64%|██████▎   | 498/782 [00:02<00:01, 209.63it/s]



Epoch [454/700]:  66%|██████▋   | 520/782 [00:02<00:01, 210.06it/s]



Epoch [454/700]:  69%|██████▉   | 542/782 [00:02<00:01, 209.88it/s]



Epoch [454/700]:  72%|███████▏  | 563/782 [00:02<00:01, 204.98it/s]



Epoch [454/700]:  75%|███████▍  | 584/782 [00:02<00:00, 205.99it/s]



Epoch [454/700]:  77%|███████▋  | 605/782 [00:02<00:00, 202.68it/s]



Epoch [454/700]:  80%|████████  | 627/782 [00:03<00:00, 205.13it/s]



Epoch [454/700]:  86%|████████▌ | 670/782 [00:03<00:00, 207.17it/s]



Epoch [454/700]:  91%|█████████ | 713/782 [00:03<00:00, 208.92it/s]



Epoch [454/700]:  97%|█████████▋| 756/782 [00:03<00:00, 206.40it/s]



Epoch [454/700]: 100%|██████████| 782/782 [00:03<00:00, 207.47it/s]


Learning Rate: 0.004500
Train Loss: 0.0598, Accuracy: 97.95%, Confidence: 0.9765
Test Loss: 2.0969, Accuracy: 72.82%, Confidence: 0.9393
Train-Test Accuracy Gap: 25.13%


Epoch [455/700]:   3%|▎         | 22/782 [00:00<00:03, 212.49it/s]



Epoch [455/700]:   6%|▌         | 44/782 [00:00<00:03, 209.47it/s]



Epoch [455/700]:   8%|▊         | 65/782 [00:00<00:03, 203.75it/s]



Epoch [455/700]:  11%|█         | 86/782 [00:00<00:03, 205.93it/s]



Epoch [455/700]:  14%|█▍        | 108/782 [00:00<00:03, 209.84it/s]



Epoch [455/700]:  17%|█▋        | 130/782 [00:00<00:03, 211.48it/s]



Epoch [455/700]:  22%|██▏       | 174/782 [00:00<00:02, 213.05it/s]



Epoch [455/700]:  28%|██▊       | 218/782 [00:01<00:02, 212.23it/s]



Epoch [455/700]:  31%|███       | 240/782 [00:01<00:02, 211.70it/s]



Epoch [455/700]:  34%|███▎      | 262/782 [00:01<00:02, 203.20it/s]



Epoch [455/700]:  36%|███▌      | 283/782 [00:01<00:02, 197.95it/s]



Epoch [455/700]:  39%|███▉      | 304/782 [00:01<00:02, 199.84it/s]



Epoch [455/700]:  42%|████▏     | 325/782 [00:01<00:02, 199.49it/s]



Epoch [455/700]:  44%|████▍     | 347/782 [00:01<00:02, 202.83it/s]



Epoch [455/700]:  47%|████▋     | 368/782 [00:01<00:02, 204.24it/s]



Epoch [455/700]:  50%|████▉     | 389/782 [00:01<00:01, 205.29it/s]



Epoch [455/700]:  52%|█████▏    | 410/782 [00:01<00:01, 205.20it/s]



Epoch [455/700]:  55%|█████▌    | 431/782 [00:02<00:01, 204.29it/s]



Epoch [455/700]:  58%|█████▊    | 452/782 [00:02<00:01, 198.39it/s]



Epoch [455/700]:  60%|██████    | 473/782 [00:02<00:01, 201.51it/s]



Epoch [455/700]:  63%|██████▎   | 494/782 [00:02<00:01, 202.34it/s]



Epoch [455/700]:  66%|██████▌   | 515/782 [00:02<00:01, 202.38it/s]



Epoch [455/700]:  69%|██████▊   | 537/782 [00:02<00:01, 205.04it/s]



Epoch [455/700]:  74%|███████▍  | 579/782 [00:02<00:00, 206.59it/s]



Epoch [455/700]:  79%|███████▉  | 621/782 [00:03<00:00, 205.06it/s]



Epoch [455/700]:  82%|████████▏ | 642/782 [00:03<00:00, 204.41it/s]



Epoch [455/700]:  87%|████████▋ | 684/782 [00:03<00:00, 201.98it/s]



Epoch [455/700]:  90%|█████████ | 705/782 [00:03<00:00, 202.22it/s]



Epoch [455/700]:  93%|█████████▎| 727/782 [00:03<00:00, 205.08it/s]



Epoch [455/700]:  96%|█████████▌| 748/782 [00:03<00:00, 204.76it/s]



Epoch [455/700]:  98%|█████████▊| 770/782 [00:03<00:00, 206.88it/s]



Epoch [455/700]: 100%|██████████| 782/782 [00:03<00:00, 205.15it/s]


Learning Rate: 0.004500
Train Loss: 0.0575, Accuracy: 98.09%, Confidence: 0.9765
Test Loss: 2.1467, Accuracy: 72.32%, Confidence: 0.9411
Train-Test Accuracy Gap: 25.77%


Epoch [456/700]:   3%|▎         | 21/782 [00:00<00:03, 207.37it/s]



Epoch [456/700]:  11%|█         | 87/782 [00:00<00:03, 211.70it/s]



Epoch [456/700]:  14%|█▍        | 109/782 [00:00<00:03, 205.17it/s]



Epoch [456/700]:  22%|██▏       | 173/782 [00:00<00:02, 207.52it/s]



Epoch [456/700]:  28%|██▊       | 216/782 [00:01<00:02, 206.40it/s]



Epoch [456/700]:  33%|███▎      | 258/782 [00:01<00:02, 206.58it/s]



Epoch [456/700]:  38%|███▊      | 301/782 [00:01<00:02, 208.99it/s]



Epoch [456/700]:  44%|████▍     | 344/782 [00:01<00:02, 207.26it/s]



Epoch [456/700]:  52%|█████▏    | 409/782 [00:01<00:01, 207.45it/s]



Epoch [456/700]:  58%|█████▊    | 452/782 [00:02<00:01, 210.48it/s]



Epoch [456/700]:  63%|██████▎   | 496/782 [00:02<00:01, 213.39it/s]



Epoch [456/700]:  69%|██████▉   | 540/782 [00:02<00:01, 213.94it/s]



Epoch [456/700]:  75%|███████▍  | 584/782 [00:02<00:00, 211.54it/s]



Epoch [456/700]:  80%|████████  | 627/782 [00:03<00:00, 207.37it/s]



Epoch [456/700]:  86%|████████▌ | 669/782 [00:03<00:00, 207.53it/s]



Epoch [456/700]:  91%|█████████ | 712/782 [00:03<00:00, 208.46it/s]



Epoch [456/700]:  96%|█████████▋| 754/782 [00:03<00:00, 208.22it/s]



Epoch [456/700]: 100%|██████████| 782/782 [00:03<00:00, 208.27it/s]


Learning Rate: 0.004500
Train Loss: 0.0616, Accuracy: 97.87%, Confidence: 0.9756
Test Loss: 2.1446, Accuracy: 71.99%, Confidence: 0.9390
Train-Test Accuracy Gap: 25.88%


Epoch [457/700]:   3%|▎         | 21/782 [00:00<00:03, 205.45it/s]



Epoch [457/700]:  11%|█         | 87/782 [00:00<00:03, 210.36it/s]



Epoch [457/700]:  17%|█▋        | 131/782 [00:00<00:03, 212.83it/s]



Epoch [457/700]:  22%|██▏       | 175/782 [00:00<00:02, 210.66it/s]



Epoch [457/700]:  25%|██▌       | 197/782 [00:00<00:02, 206.96it/s]



Epoch [457/700]:  31%|███       | 240/782 [00:01<00:02, 207.60it/s]



Epoch [457/700]:  36%|███▋      | 284/782 [00:01<00:02, 210.33it/s]



Epoch [457/700]:  42%|████▏     | 329/782 [00:01<00:02, 207.89it/s]



Epoch [457/700]:  48%|████▊     | 374/782 [00:01<00:01, 211.49it/s]



Epoch [457/700]:  53%|█████▎    | 418/782 [00:01<00:01, 210.56it/s]



Epoch [457/700]:  62%|██████▏   | 483/782 [00:02<00:01, 208.61it/s]



Epoch [457/700]:  67%|██████▋   | 526/782 [00:02<00:01, 211.12it/s]



Epoch [457/700]:  73%|███████▎  | 570/782 [00:02<00:01, 210.21it/s]



Epoch [457/700]:  79%|███████▊  | 614/782 [00:02<00:00, 211.79it/s]



Epoch [457/700]:  84%|████████▍ | 658/782 [00:03<00:00, 211.14it/s]



Epoch [457/700]:  90%|████████▉ | 702/782 [00:03<00:00, 211.12it/s]



Epoch [457/700]:  93%|█████████▎| 724/782 [00:03<00:00, 208.63it/s]



Epoch [457/700]: 100%|██████████| 782/782 [00:03<00:00, 209.20it/s]


Learning Rate: 0.004500
Train Loss: 0.0630, Accuracy: 97.87%, Confidence: 0.9761
Test Loss: 2.0995, Accuracy: 72.97%, Confidence: 0.9401
Train-Test Accuracy Gap: 24.90%


Epoch [458/700]:   3%|▎         | 21/782 [00:00<00:03, 205.13it/s]



Epoch [458/700]:   5%|▌         | 43/782 [00:00<00:03, 209.07it/s]



Epoch [458/700]:   8%|▊         | 65/782 [00:00<00:03, 210.37it/s]



Epoch [458/700]:  11%|█         | 87/782 [00:00<00:03, 211.92it/s]



Epoch [458/700]:  14%|█▍        | 109/782 [00:00<00:03, 209.67it/s]



Epoch [458/700]:  17%|█▋        | 130/782 [00:00<00:03, 206.56it/s]



Epoch [458/700]:  19%|█▉        | 151/782 [00:00<00:03, 201.60it/s]



Epoch [458/700]:  22%|██▏       | 172/782 [00:00<00:03, 202.69it/s]



Epoch [458/700]:  25%|██▍       | 194/782 [00:00<00:02, 205.97it/s]



Epoch [458/700]:  27%|██▋       | 215/782 [00:01<00:02, 205.18it/s]



Epoch [458/700]:  30%|███       | 236/782 [00:01<00:02, 204.18it/s]



Epoch [458/700]:  42%|████▏     | 325/782 [00:01<00:02, 213.89it/s]



Epoch [458/700]:  47%|████▋     | 369/782 [00:01<00:01, 210.33it/s]



Epoch [458/700]:  53%|█████▎    | 412/782 [00:01<00:01, 206.06it/s]



Epoch [458/700]:  58%|█████▊    | 454/782 [00:02<00:01, 204.33it/s]



Epoch [458/700]:  64%|██████▎   | 498/782 [00:02<00:01, 208.13it/s]



Epoch [458/700]:  69%|██████▉   | 540/782 [00:02<00:01, 205.64it/s]



Epoch [458/700]:  75%|███████▍  | 583/782 [00:02<00:00, 207.99it/s]



Epoch [458/700]:  80%|████████  | 626/782 [00:03<00:00, 209.02it/s]



Epoch [458/700]:  85%|████████▌ | 668/782 [00:03<00:00, 204.90it/s]



Epoch [458/700]:  91%|█████████ | 710/782 [00:03<00:00, 204.54it/s]



Epoch [458/700]:  96%|█████████▋| 754/782 [00:03<00:00, 208.44it/s]



Epoch [458/700]: 100%|██████████| 782/782 [00:03<00:00, 206.91it/s]


Learning Rate: 0.004500
Train Loss: 0.0558, Accuracy: 98.14%, Confidence: 0.9771
Test Loss: 2.3959, Accuracy: 70.28%, Confidence: 0.9425
Train-Test Accuracy Gap: 27.86%


Epoch [459/700]:   2%|▏         | 19/782 [00:00<00:04, 185.86it/s]



Epoch [459/700]:  11%|█         | 83/782 [00:00<00:03, 209.64it/s]



Epoch [459/700]:  16%|█▌        | 127/782 [00:00<00:03, 212.10it/s]



Epoch [459/700]:  22%|██▏       | 170/782 [00:00<00:02, 207.18it/s]



Epoch [459/700]:  27%|██▋       | 212/782 [00:01<00:02, 202.43it/s]



Epoch [459/700]:  32%|███▏      | 254/782 [00:01<00:02, 202.62it/s]



Epoch [459/700]:  38%|███▊      | 296/782 [00:01<00:02, 205.07it/s]



Epoch [459/700]:  43%|████▎     | 339/782 [00:01<00:02, 206.46it/s]



Epoch [459/700]:  52%|█████▏    | 403/782 [00:01<00:01, 208.55it/s]



Epoch [459/700]:  57%|█████▋    | 445/782 [00:02<00:01, 206.22it/s]



Epoch [459/700]:  62%|██████▏   | 488/782 [00:02<00:01, 207.90it/s]



Epoch [459/700]:  68%|██████▊   | 531/782 [00:02<00:01, 207.45it/s]



Epoch [459/700]:  73%|███████▎  | 573/782 [00:02<00:01, 208.46it/s]



Epoch [459/700]:  81%|████████▏ | 637/782 [00:03<00:00, 207.80it/s]



Epoch [459/700]:  87%|████████▋ | 679/782 [00:03<00:00, 207.92it/s]



Epoch [459/700]:  92%|█████████▏| 721/782 [00:03<00:00, 206.80it/s]



Epoch [459/700]:  98%|█████████▊| 763/782 [00:03<00:00, 205.71it/s]



Epoch [459/700]: 100%|██████████| 782/782 [00:03<00:00, 206.10it/s]


Learning Rate: 0.004500
Train Loss: 0.0689, Accuracy: 97.79%, Confidence: 0.9758
Test Loss: 2.2935, Accuracy: 70.80%, Confidence: 0.9379
Train-Test Accuracy Gap: 26.99%


Epoch [460/700]:   3%|▎         | 20/782 [00:00<00:03, 194.83it/s]



Epoch [460/700]:   5%|▌         | 42/782 [00:00<00:03, 204.05it/s]



Epoch [460/700]:   8%|▊         | 63/782 [00:00<00:03, 206.36it/s]



Epoch [460/700]:  11%|█         | 84/782 [00:00<00:03, 201.84it/s]



Epoch [460/700]:  13%|█▎        | 105/782 [00:00<00:03, 201.72it/s]



Epoch [460/700]:  16%|█▌        | 126/782 [00:00<00:03, 202.01it/s]



Epoch [460/700]:  19%|█▉        | 147/782 [00:00<00:03, 202.50it/s]



Epoch [460/700]:  21%|██▏       | 168/782 [00:00<00:03, 202.93it/s]



Epoch [460/700]:  24%|██▍       | 190/782 [00:00<00:02, 206.25it/s]



Epoch [460/700]:  27%|██▋       | 212/782 [00:01<00:02, 208.57it/s]



Epoch [460/700]:  30%|██▉       | 234/782 [00:01<00:02, 210.00it/s]



Epoch [460/700]:  33%|███▎      | 256/782 [00:01<00:02, 206.97it/s]



Epoch [460/700]:  35%|███▌      | 277/782 [00:01<00:02, 205.01it/s]



Epoch [460/700]:  38%|███▊      | 298/782 [00:01<00:02, 205.63it/s]



Epoch [460/700]:  41%|████      | 320/782 [00:01<00:02, 207.19it/s]



Epoch [460/700]:  44%|████▎     | 341/782 [00:01<00:02, 207.81it/s]



Epoch [460/700]:  49%|████▉     | 384/782 [00:01<00:01, 203.76it/s]



Epoch [460/700]:  55%|█████▍    | 427/782 [00:02<00:01, 207.07it/s]



Epoch [460/700]:  60%|██████    | 470/782 [00:02<00:01, 206.87it/s]



Epoch [460/700]:  66%|██████▌   | 513/782 [00:02<00:01, 203.54it/s]



Epoch [460/700]:  74%|███████▍  | 579/782 [00:02<00:00, 209.89it/s]



Epoch [460/700]:  80%|███████▉  | 624/782 [00:03<00:00, 213.47it/s]



Epoch [460/700]:  85%|████████▌ | 668/782 [00:03<00:00, 213.69it/s]



Epoch [460/700]:  91%|█████████ | 712/782 [00:03<00:00, 211.37it/s]



Epoch [460/700]:  97%|█████████▋| 755/782 [00:03<00:00, 208.17it/s]



Epoch [460/700]: 100%|██████████| 782/782 [00:03<00:00, 206.89it/s]


Learning Rate: 0.004500
Train Loss: 0.0575, Accuracy: 98.03%, Confidence: 0.9766
Test Loss: 2.2245, Accuracy: 71.81%, Confidence: 0.9428
Train-Test Accuracy Gap: 26.22%


Epoch [461/700]:   3%|▎         | 20/782 [00:00<00:03, 195.55it/s]



Epoch [461/700]:   5%|▌         | 42/782 [00:00<00:03, 208.02it/s]



Epoch [461/700]:   8%|▊         | 64/782 [00:00<00:03, 211.12it/s]



Epoch [461/700]:  11%|█         | 86/782 [00:00<00:03, 211.41it/s]



Epoch [461/700]:  14%|█▍        | 108/782 [00:00<00:03, 206.45it/s]



Epoch [461/700]:  16%|█▋        | 129/782 [00:00<00:03, 205.68it/s]



Epoch [461/700]:  19%|█▉        | 151/782 [00:00<00:03, 207.81it/s]



Epoch [461/700]:  28%|██▊       | 217/782 [00:01<00:02, 213.68it/s]



Epoch [461/700]:  36%|███▌      | 283/782 [00:01<00:02, 210.51it/s]



Epoch [461/700]:  42%|████▏     | 327/782 [00:01<00:02, 208.45it/s]



Epoch [461/700]:  47%|████▋     | 369/782 [00:01<00:01, 206.56it/s]



Epoch [461/700]:  53%|█████▎    | 412/782 [00:01<00:01, 204.93it/s]



Epoch [461/700]:  58%|█████▊    | 454/782 [00:02<00:01, 202.32it/s]



Epoch [461/700]:  63%|██████▎   | 496/782 [00:02<00:01, 201.80it/s]



Epoch [461/700]:  69%|██████▉   | 538/782 [00:02<00:01, 202.49it/s]



Epoch [461/700]:  74%|███████▍  | 580/782 [00:02<00:00, 204.09it/s]



Epoch [461/700]:  80%|███████▉  | 622/782 [00:03<00:00, 206.54it/s]



Epoch [461/700]:  85%|████████▍ | 664/782 [00:03<00:00, 199.90it/s]



Epoch [461/700]:  90%|█████████ | 707/782 [00:03<00:00, 204.93it/s]



Epoch [461/700]:  96%|█████████▌| 749/782 [00:03<00:00, 205.09it/s]



Epoch [461/700]: 100%|██████████| 782/782 [00:03<00:00, 205.57it/s]


Learning Rate: 0.004500
Train Loss: 0.0603, Accuracy: 97.92%, Confidence: 0.9762
Test Loss: 2.3521, Accuracy: 70.04%, Confidence: 0.9372
Train-Test Accuracy Gap: 27.88%


Epoch [462/700]:   3%|▎         | 21/782 [00:00<00:03, 205.90it/s]



Epoch [462/700]:   5%|▌         | 42/782 [00:00<00:03, 205.77it/s]



Epoch [462/700]:  11%|█         | 85/782 [00:00<00:03, 206.63it/s]



Epoch [462/700]:  16%|█▋        | 128/782 [00:00<00:03, 207.95it/s]



Epoch [462/700]:  19%|█▉        | 150/782 [00:00<00:03, 208.84it/s]



Epoch [462/700]:  22%|██▏       | 171/782 [00:00<00:02, 204.90it/s]



Epoch [462/700]:  25%|██▍       | 192/782 [00:00<00:02, 206.33it/s]



Epoch [462/700]:  27%|██▋       | 214/782 [00:01<00:02, 209.14it/s]



Epoch [462/700]:  30%|███       | 236/782 [00:01<00:02, 210.37it/s]



Epoch [462/700]:  33%|███▎      | 258/782 [00:01<00:02, 208.48it/s]



Epoch [462/700]:  38%|███▊      | 301/782 [00:01<00:02, 209.06it/s]



Epoch [462/700]:  44%|████▍     | 345/782 [00:01<00:02, 212.31it/s]



Epoch [462/700]:  50%|████▉     | 389/782 [00:01<00:01, 212.07it/s]



Epoch [462/700]:  55%|█████▌    | 433/782 [00:02<00:01, 212.80it/s]



Epoch [462/700]:  61%|██████    | 477/782 [00:02<00:01, 215.22it/s]



Epoch [462/700]:  67%|██████▋   | 521/782 [00:02<00:01, 216.64it/s]



Epoch [462/700]:  72%|███████▏  | 565/782 [00:02<00:01, 215.27it/s]



Epoch [462/700]:  78%|███████▊  | 609/782 [00:02<00:00, 210.38it/s]



Epoch [462/700]:  83%|████████▎ | 652/782 [00:03<00:00, 206.39it/s]



Epoch [462/700]:  89%|████████▉ | 695/782 [00:03<00:00, 205.58it/s]



Epoch [462/700]:  94%|█████████▍| 737/782 [00:03<00:00, 196.63it/s]



Epoch [462/700]: 100%|██████████| 782/782 [00:03<00:00, 208.62it/s]






Learning Rate: 0.004500
Train Loss: 0.0585, Accuracy: 97.99%, Confidence: 0.9764
Test Loss: 2.0096, Accuracy: 73.49%, Confidence: 0.9425
Train-Test Accuracy Gap: 24.50%


Epoch [463/700]:   3%|▎         | 20/782 [00:00<00:03, 193.45it/s]



Epoch [463/700]:   8%|▊         | 63/782 [00:00<00:03, 204.63it/s]



Epoch [463/700]:  14%|█▎        | 106/782 [00:00<00:03, 204.68it/s]



Epoch [463/700]:  16%|█▋        | 128/782 [00:00<00:03, 208.19it/s]



Epoch [463/700]:  19%|█▉        | 149/782 [00:00<00:03, 207.96it/s]



Epoch [463/700]:  22%|██▏       | 171/782 [00:00<00:02, 208.76it/s]



Epoch [463/700]:  27%|██▋       | 214/782 [00:01<00:02, 206.94it/s]



Epoch [463/700]:  33%|███▎      | 257/782 [00:01<00:02, 208.66it/s]



Epoch [463/700]:  38%|███▊      | 299/782 [00:01<00:02, 206.84it/s]



Epoch [463/700]:  44%|████▎     | 342/782 [00:01<00:02, 205.31it/s]



Epoch [463/700]:  49%|████▉     | 385/782 [00:01<00:01, 207.61it/s]



Epoch [463/700]:  55%|█████▍    | 427/782 [00:02<00:01, 204.53it/s]



Epoch [463/700]:  60%|██████    | 470/782 [00:02<00:01, 206.48it/s]



Epoch [463/700]:  66%|██████▌   | 513/782 [00:02<00:01, 206.65it/s]



Epoch [463/700]:  71%|███████   | 556/782 [00:02<00:01, 208.81it/s]



Epoch [463/700]:  77%|███████▋  | 599/782 [00:02<00:00, 209.35it/s]



Epoch [463/700]:  82%|████████▏ | 641/782 [00:03<00:00, 206.69it/s]



Epoch [463/700]:  88%|████████▊ | 685/782 [00:03<00:00, 210.74it/s]



Epoch [463/700]:  93%|█████████▎| 729/782 [00:03<00:00, 207.18it/s]



Epoch [463/700]:  99%|█████████▊| 772/782 [00:03<00:00, 209.78it/s]



Epoch [463/700]: 100%|██████████| 782/782 [00:03<00:00, 206.67it/s]


Learning Rate: 0.004500
Train Loss: 0.0654, Accuracy: 97.75%, Confidence: 0.9757
Test Loss: 2.1561, Accuracy: 72.52%, Confidence: 0.9401
Train-Test Accuracy Gap: 25.23%


Epoch [464/700]:   3%|▎         | 21/782 [00:00<00:03, 207.04it/s]



Epoch [464/700]:   5%|▌         | 43/782 [00:00<00:03, 210.20it/s]



Epoch [464/700]:  11%|█         | 87/782 [00:00<00:03, 208.76it/s]



Epoch [464/700]:  17%|█▋        | 130/782 [00:00<00:03, 209.98it/s]



Epoch [464/700]:  22%|██▏       | 174/782 [00:00<00:02, 208.62it/s]



Epoch [464/700]:  28%|██▊       | 216/782 [00:01<00:02, 205.57it/s]



Epoch [464/700]:  33%|███▎      | 258/782 [00:01<00:02, 200.99it/s]



Epoch [464/700]:  38%|███▊      | 300/782 [00:01<00:02, 202.09it/s]



Epoch [464/700]:  44%|████▍     | 343/782 [00:01<00:02, 205.83it/s]



Epoch [464/700]:  49%|████▉     | 386/782 [00:01<00:01, 206.72it/s]



Epoch [464/700]:  55%|█████▍    | 429/782 [00:02<00:01, 206.42it/s]



Epoch [464/700]:  60%|██████    | 471/782 [00:02<00:01, 203.02it/s]



Epoch [464/700]:  66%|██████▌   | 514/782 [00:02<00:01, 204.76it/s]



Epoch [464/700]:  71%|███████   | 556/782 [00:02<00:01, 206.15it/s]



Epoch [464/700]:  76%|███████▋  | 598/782 [00:02<00:00, 202.55it/s]



Epoch [464/700]:  84%|████████▍ | 659/782 [00:03<00:00, 196.43it/s]



Epoch [464/700]:  90%|████████▉ | 702/782 [00:03<00:00, 202.14it/s]



Epoch [464/700]:  95%|█████████▌| 745/782 [00:03<00:00, 203.14it/s]



Epoch [464/700]: 100%|██████████| 782/782 [00:03<00:00, 203.97it/s]


Learning Rate: 0.004500
Train Loss: 0.0609, Accuracy: 97.94%, Confidence: 0.9759
Test Loss: 2.5122, Accuracy: 69.89%, Confidence: 0.9385
Train-Test Accuracy Gap: 28.05%


Epoch [465/700]:   3%|▎         | 20/782 [00:00<00:03, 197.56it/s]



Epoch [465/700]:   5%|▌         | 42/782 [00:00<00:03, 204.13it/s]



Epoch [465/700]:   8%|▊         | 63/782 [00:00<00:03, 200.79it/s]



Epoch [465/700]:  11%|█         | 84/782 [00:00<00:03, 201.31it/s]



Epoch [465/700]:  13%|█▎        | 105/782 [00:00<00:03, 200.40it/s]



Epoch [465/700]:  22%|██▏       | 170/782 [00:00<00:02, 207.78it/s]



Epoch [465/700]:  27%|██▋       | 213/782 [00:01<00:02, 206.48it/s]



Epoch [465/700]:  33%|███▎      | 255/782 [00:01<00:02, 207.51it/s]



Epoch [465/700]:  38%|███▊      | 298/782 [00:01<00:02, 208.49it/s]



Epoch [465/700]:  43%|████▎     | 340/782 [00:01<00:02, 204.36it/s]



Epoch [465/700]:  49%|████▉     | 382/782 [00:01<00:01, 202.56it/s]



Epoch [465/700]:  54%|█████▍    | 424/782 [00:02<00:01, 204.98it/s]



Epoch [465/700]:  60%|█████▉    | 466/782 [00:02<00:01, 200.26it/s]



Epoch [465/700]:  65%|██████▌   | 509/782 [00:02<00:01, 204.67it/s]



Epoch [465/700]:  71%|███████   | 552/782 [00:02<00:01, 207.28it/s]



Epoch [465/700]:  76%|███████▌  | 594/782 [00:02<00:00, 205.89it/s]



Epoch [465/700]:  81%|████████▏ | 637/782 [00:03<00:00, 208.10it/s]



Epoch [465/700]:  87%|████████▋ | 680/782 [00:03<00:00, 208.24it/s]



Epoch [465/700]:  92%|█████████▏| 723/782 [00:03<00:00, 209.41it/s]



Epoch [465/700]:  98%|█████████▊| 766/782 [00:03<00:00, 208.39it/s]



Epoch [465/700]: 100%|██████████| 782/782 [00:03<00:00, 205.06it/s]


Learning Rate: 0.004500
Train Loss: 0.0647, Accuracy: 97.69%, Confidence: 0.9758
Test Loss: 2.4197, Accuracy: 69.81%, Confidence: 0.9388
Train-Test Accuracy Gap: 27.88%


Epoch [466/700]:   3%|▎         | 20/782 [00:00<00:03, 196.51it/s]



Epoch [466/700]:  11%|█         | 83/782 [00:00<00:03, 207.95it/s]



Epoch [466/700]:  16%|█▌        | 126/782 [00:00<00:03, 209.73it/s]



Epoch [466/700]:  22%|██▏       | 169/782 [00:00<00:02, 208.58it/s]



Epoch [466/700]:  27%|██▋       | 212/782 [00:01<00:02, 210.49it/s]



Epoch [466/700]:  33%|███▎      | 256/782 [00:01<00:02, 205.31it/s]



Epoch [466/700]:  38%|███▊      | 299/782 [00:01<00:02, 206.26it/s]



Epoch [466/700]:  44%|████▎     | 341/782 [00:01<00:02, 206.95it/s]



Epoch [466/700]:  49%|████▉     | 383/782 [00:01<00:01, 206.44it/s]



Epoch [466/700]:  54%|█████▍    | 426/782 [00:02<00:01, 206.80it/s]



Epoch [466/700]:  60%|█████▉    | 469/782 [00:02<00:01, 208.49it/s]



Epoch [466/700]:  66%|██████▌   | 513/782 [00:02<00:01, 210.21it/s]



Epoch [466/700]:  71%|███████   | 557/782 [00:02<00:01, 210.70it/s]



Epoch [466/700]:  77%|███████▋  | 601/782 [00:02<00:00, 209.37it/s]



Epoch [466/700]:  82%|████████▏ | 644/782 [00:03<00:00, 208.72it/s]



Epoch [466/700]:  88%|████████▊ | 688/782 [00:03<00:00, 209.31it/s]



Epoch [466/700]:  93%|█████████▎| 730/782 [00:03<00:00, 207.56it/s]



Epoch [466/700]: 100%|██████████| 782/782 [00:03<00:00, 207.43it/s]






Learning Rate: 0.004500
Train Loss: 0.0622, Accuracy: 97.82%, Confidence: 0.9763
Test Loss: 1.9955, Accuracy: 74.09%, Confidence: 0.9420
Train-Test Accuracy Gap: 23.73%


Epoch [467/700]:   3%|▎         | 20/782 [00:00<00:03, 199.33it/s]



Epoch [467/700]:   5%|▌         | 42/782 [00:00<00:03, 208.85it/s]



Epoch [467/700]:   8%|▊         | 63/782 [00:00<00:03, 196.05it/s]



Epoch [467/700]:  11%|█         | 83/782 [00:00<00:03, 196.53it/s]



Epoch [467/700]:  16%|█▌        | 126/782 [00:00<00:03, 204.98it/s]



Epoch [467/700]:  22%|██▏       | 170/782 [00:00<00:02, 208.33it/s]



Epoch [467/700]:  25%|██▍       | 192/782 [00:00<00:02, 209.42it/s]



Epoch [467/700]:  27%|██▋       | 213/782 [00:01<00:02, 205.81it/s]



Epoch [467/700]:  30%|██▉       | 234/782 [00:01<00:02, 205.77it/s]



Epoch [467/700]:  33%|███▎      | 255/782 [00:01<00:02, 203.41it/s]



Epoch [467/700]:  35%|███▌      | 276/782 [00:01<00:02, 205.33it/s]



Epoch [467/700]:  38%|███▊      | 297/782 [00:01<00:02, 204.40it/s]



Epoch [467/700]:  41%|████      | 318/782 [00:01<00:02, 204.94it/s]



Epoch [467/700]:  43%|████▎     | 339/782 [00:01<00:02, 203.95it/s]



Epoch [467/700]:  46%|████▌     | 360/782 [00:01<00:02, 204.98it/s]



Epoch [467/700]:  49%|████▊     | 381/782 [00:01<00:01, 203.74it/s]



Epoch [467/700]:  52%|█████▏    | 403/782 [00:01<00:01, 206.66it/s]



Epoch [467/700]:  54%|█████▍    | 424/782 [00:02<00:01, 205.67it/s]



Epoch [467/700]:  57%|█████▋    | 445/782 [00:02<00:01, 205.87it/s]



Epoch [467/700]:  60%|█████▉    | 466/782 [00:02<00:01, 201.61it/s]



Epoch [467/700]:  62%|██████▏   | 487/782 [00:02<00:01, 203.59it/s]



Epoch [467/700]:  65%|██████▍   | 508/782 [00:02<00:01, 204.36it/s]



Epoch [467/700]:  68%|██████▊   | 529/782 [00:02<00:01, 205.08it/s]



Epoch [467/700]:  73%|███████▎  | 571/782 [00:02<00:01, 205.42it/s]



Epoch [467/700]:  76%|███████▌  | 592/782 [00:02<00:00, 202.95it/s]



Epoch [467/700]:  78%|███████▊  | 613/782 [00:03<00:00, 198.85it/s]



Epoch [467/700]:  81%|████████  | 633/782 [00:03<00:00, 193.75it/s]



Epoch [467/700]:  84%|████████▎ | 653/782 [00:03<00:00, 192.95it/s]



Epoch [467/700]:  86%|████████▌ | 673/782 [00:03<00:00, 191.66it/s]



Epoch [467/700]:  89%|████████▊ | 694/782 [00:03<00:00, 195.96it/s]



Epoch [467/700]:  91%|█████████▏| 715/782 [00:03<00:00, 199.83it/s]



Epoch [467/700]:  94%|█████████▍| 737/782 [00:03<00:00, 202.82it/s]



Epoch [467/700]:  97%|█████████▋| 759/782 [00:03<00:00, 206.22it/s]



Epoch [467/700]: 100%|██████████| 782/782 [00:03<00:00, 203.24it/s]






Learning Rate: 0.004500
Train Loss: 0.0625, Accuracy: 97.88%, Confidence: 0.9765
Test Loss: 2.6747, Accuracy: 67.91%, Confidence: 0.9388
Train-Test Accuracy Gap: 29.97%


Epoch [468/700]:   3%|▎         | 21/782 [00:00<00:03, 203.03it/s]



Epoch [468/700]:  11%|█         | 86/782 [00:00<00:03, 209.47it/s]



Epoch [468/700]:  16%|█▋        | 128/782 [00:00<00:03, 204.67it/s]



Epoch [468/700]:  22%|██▏       | 171/782 [00:00<00:02, 208.16it/s]



Epoch [468/700]:  27%|██▋       | 214/782 [00:01<00:02, 208.09it/s]



Epoch [468/700]:  33%|███▎      | 256/782 [00:01<00:02, 206.55it/s]



Epoch [468/700]:  38%|███▊      | 298/782 [00:01<00:02, 205.75it/s]



Epoch [468/700]:  44%|████▎     | 341/782 [00:01<00:02, 209.30it/s]



Epoch [468/700]:  49%|████▉     | 385/782 [00:01<00:01, 210.60it/s]



Epoch [468/700]:  55%|█████▍    | 429/782 [00:02<00:01, 208.05it/s]



Epoch [468/700]:  60%|██████    | 472/782 [00:02<00:01, 207.38it/s]



Epoch [468/700]:  66%|██████▌   | 516/782 [00:02<00:01, 210.87it/s]



Epoch [468/700]:  72%|███████▏  | 560/782 [00:02<00:01, 207.96it/s]



Epoch [468/700]:  80%|████████  | 626/782 [00:03<00:00, 212.15it/s]



Epoch [468/700]:  86%|████████▌ | 670/782 [00:03<00:00, 211.97it/s]



Epoch [468/700]:  91%|█████████▏| 714/782 [00:03<00:00, 211.25it/s]



Epoch [468/700]:  97%|█████████▋| 758/782 [00:03<00:00, 209.27it/s]



Epoch [468/700]: 100%|██████████| 782/782 [00:03<00:00, 208.18it/s]


Learning Rate: 0.004500
Train Loss: 0.0540, Accuracy: 98.18%, Confidence: 0.9776
Test Loss: 2.1054, Accuracy: 72.59%, Confidence: 0.9395
Train-Test Accuracy Gap: 25.59%


Epoch [469/700]:   3%|▎         | 21/782 [00:00<00:03, 205.37it/s]



Epoch [469/700]:   5%|▌         | 42/782 [00:00<00:03, 207.20it/s]



Epoch [469/700]:  11%|█         | 84/782 [00:00<00:03, 202.37it/s]



Epoch [469/700]:  16%|█▌        | 126/782 [00:00<00:03, 204.15it/s]



Epoch [469/700]:  21%|██▏       | 168/782 [00:00<00:02, 204.78it/s]



Epoch [469/700]:  27%|██▋       | 210/782 [00:01<00:02, 203.75it/s]



Epoch [469/700]:  30%|██▉       | 231/782 [00:01<00:02, 201.11it/s]



Epoch [469/700]:  32%|███▏      | 252/782 [00:01<00:02, 200.48it/s]



Epoch [469/700]:  35%|███▍      | 273/782 [00:01<00:02, 200.08it/s]



Epoch [469/700]:  43%|████▎     | 336/782 [00:01<00:02, 202.26it/s]



Epoch [469/700]:  48%|████▊     | 379/782 [00:01<00:01, 206.06it/s]



Epoch [469/700]:  54%|█████▍    | 422/782 [00:02<00:01, 206.62it/s]



Epoch [469/700]:  59%|█████▉    | 465/782 [00:02<00:01, 208.99it/s]



Epoch [469/700]:  65%|██████▌   | 509/782 [00:02<00:01, 209.56it/s]



Epoch [469/700]:  71%|███████   | 553/782 [00:02<00:01, 211.71it/s]



Epoch [469/700]:  76%|███████▌  | 596/782 [00:02<00:00, 206.55it/s]



Epoch [469/700]:  82%|████████▏ | 638/782 [00:03<00:00, 207.11it/s]



Epoch [469/700]:  90%|█████████ | 704/782 [00:03<00:00, 205.87it/s]



Epoch [469/700]:  96%|█████████▌| 748/782 [00:03<00:00, 208.93it/s]



Epoch [469/700]: 100%|██████████| 782/782 [00:03<00:00, 205.73it/s]


Learning Rate: 0.004500
Train Loss: 0.0616, Accuracy: 97.87%, Confidence: 0.9768
Test Loss: 2.9217, Accuracy: 67.30%, Confidence: 0.9411
Train-Test Accuracy Gap: 30.57%


Epoch [470/700]:   3%|▎         | 20/782 [00:00<00:03, 193.41it/s]



Epoch [470/700]:   5%|▌         | 41/782 [00:00<00:03, 201.89it/s]



Epoch [470/700]:  11%|█         | 85/782 [00:00<00:03, 208.97it/s]



Epoch [470/700]:  16%|█▋        | 129/782 [00:00<00:03, 212.43it/s]



Epoch [470/700]:  22%|██▏       | 173/782 [00:00<00:02, 213.32it/s]



Epoch [470/700]:  28%|██▊       | 217/782 [00:01<00:02, 210.09it/s]



Epoch [470/700]:  33%|███▎      | 261/782 [00:01<00:02, 210.00it/s]



Epoch [470/700]:  39%|███▉      | 304/782 [00:01<00:02, 200.85it/s]



Epoch [470/700]:  44%|████▍     | 346/782 [00:01<00:02, 199.99it/s]



Epoch [470/700]:  49%|████▉     | 387/782 [00:01<00:01, 198.26it/s]



Epoch [470/700]:  55%|█████▍    | 430/782 [00:02<00:01, 204.63it/s]



Epoch [470/700]:  60%|██████    | 472/782 [00:02<00:01, 202.75it/s]



Epoch [470/700]:  66%|██████▌   | 514/782 [00:02<00:01, 205.45it/s]



Epoch [470/700]:  68%|██████▊   | 535/782 [00:02<00:01, 204.52it/s]



Epoch [470/700]:  71%|███████   | 556/782 [00:02<00:01, 199.74it/s]



Epoch [470/700]:  74%|███████▍  | 577/782 [00:02<00:01, 199.09it/s]



Epoch [470/700]:  76%|███████▋  | 597/782 [00:02<00:00, 198.09it/s]



Epoch [470/700]:  79%|███████▉  | 617/782 [00:03<00:00, 195.25it/s]



Epoch [470/700]:  82%|████████▏ | 639/782 [00:03<00:00, 199.60it/s]



Epoch [470/700]:  84%|████████▍ | 660/782 [00:03<00:00, 202.37it/s]



Epoch [470/700]:  87%|████████▋ | 681/782 [00:03<00:00, 204.18it/s]



Epoch [470/700]:  90%|████████▉ | 702/782 [00:03<00:00, 205.16it/s]



Epoch [470/700]:  92%|█████████▏| 723/782 [00:03<00:00, 204.27it/s]



Epoch [470/700]:  95%|█████████▌| 744/782 [00:03<00:00, 203.20it/s]



Epoch [470/700]:  98%|█████████▊| 765/782 [00:03<00:00, 203.74it/s]



Epoch [470/700]: 100%|██████████| 782/782 [00:03<00:00, 203.91it/s]


Learning Rate: 0.004500
Train Loss: 0.0564, Accuracy: 98.00%, Confidence: 0.9773
Test Loss: 2.2655, Accuracy: 70.71%, Confidence: 0.9380
Train-Test Accuracy Gap: 27.29%


Epoch [471/700]:   3%|▎         | 22/782 [00:00<00:03, 211.29it/s]



Epoch [471/700]:   6%|▌         | 44/782 [00:00<00:03, 211.74it/s]



Epoch [471/700]:   8%|▊         | 66/782 [00:00<00:03, 211.59it/s]



Epoch [471/700]:  17%|█▋        | 132/782 [00:00<00:03, 211.28it/s]



Epoch [471/700]:  23%|██▎       | 176/782 [00:00<00:02, 208.89it/s]



Epoch [471/700]:  28%|██▊       | 219/782 [00:01<00:02, 210.49it/s]



Epoch [471/700]:  34%|███▎      | 262/782 [00:01<00:02, 209.78it/s]



Epoch [471/700]:  39%|███▉      | 305/782 [00:01<00:02, 206.55it/s]



Epoch [471/700]:  45%|████▍     | 348/782 [00:01<00:02, 208.28it/s]



Epoch [471/700]:  50%|████▉     | 390/782 [00:01<00:01, 205.79it/s]



Epoch [471/700]:  55%|█████▌    | 433/782 [00:02<00:01, 209.46it/s]



Epoch [471/700]:  61%|██████    | 477/782 [00:02<00:01, 210.94it/s]



Epoch [471/700]:  66%|██████▋   | 520/782 [00:02<00:01, 209.48it/s]



Epoch [471/700]:  72%|███████▏  | 562/782 [00:02<00:01, 207.45it/s]



Epoch [471/700]:  77%|███████▋  | 605/782 [00:02<00:00, 208.21it/s]



Epoch [471/700]:  83%|████████▎ | 648/782 [00:03<00:00, 210.42it/s]



Epoch [471/700]:  88%|████████▊ | 692/782 [00:03<00:00, 207.68it/s]



Epoch [471/700]:  94%|█████████▍| 735/782 [00:03<00:00, 208.56it/s]



Epoch [471/700]: 100%|██████████| 782/782 [00:03<00:00, 208.27it/s]






Learning Rate: 0.004500
Train Loss: 0.0574, Accuracy: 98.06%, Confidence: 0.9769
Test Loss: 2.4514, Accuracy: 70.48%, Confidence: 0.9403
Train-Test Accuracy Gap: 27.58%


Epoch [472/700]:   3%|▎         | 21/782 [00:00<00:03, 201.08it/s]



Epoch [472/700]:   5%|▌         | 43/782 [00:00<00:03, 206.45it/s]



Epoch [472/700]:   8%|▊         | 65/782 [00:00<00:03, 209.48it/s]



Epoch [472/700]:  11%|█         | 86/782 [00:00<00:03, 207.82it/s]



Epoch [472/700]:  14%|█▎        | 107/782 [00:00<00:03, 205.18it/s]



Epoch [472/700]:  16%|█▋        | 129/782 [00:00<00:03, 207.82it/s]



Epoch [472/700]:  22%|██▏       | 171/782 [00:00<00:02, 205.71it/s]



Epoch [472/700]:  27%|██▋       | 214/782 [00:01<00:02, 207.99it/s]



Epoch [472/700]:  33%|███▎      | 256/782 [00:01<00:02, 204.05it/s]



Epoch [472/700]:  38%|███▊      | 300/782 [00:01<00:02, 208.32it/s]



Epoch [472/700]:  44%|████▍     | 344/782 [00:01<00:02, 210.51it/s]



Epoch [472/700]:  50%|████▉     | 388/782 [00:01<00:01, 210.20it/s]



Epoch [472/700]:  55%|█████▌    | 432/782 [00:02<00:01, 210.03it/s]



Epoch [472/700]:  61%|██████    | 476/782 [00:02<00:01, 210.62it/s]



Epoch [472/700]:  66%|██████▋   | 520/782 [00:02<00:01, 210.44it/s]



Epoch [472/700]:  72%|███████▏  | 563/782 [00:02<00:01, 206.00it/s]



Epoch [472/700]:  77%|███████▋  | 605/782 [00:02<00:00, 207.32it/s]



Epoch [472/700]:  83%|████████▎ | 647/782 [00:03<00:00, 207.43it/s]



Epoch [472/700]:  88%|████████▊ | 691/782 [00:03<00:00, 209.46it/s]



Epoch [472/700]:  94%|█████████▍| 735/782 [00:03<00:00, 210.74it/s]



Epoch [472/700]: 100%|██████████| 782/782 [00:03<00:00, 207.86it/s]






Learning Rate: 0.004500
Train Loss: 0.0613, Accuracy: 97.97%, Confidence: 0.9767
Test Loss: 2.1907, Accuracy: 72.04%, Confidence: 0.9407
Train-Test Accuracy Gap: 25.93%


Epoch [473/700]:   3%|▎         | 21/782 [00:00<00:03, 206.71it/s]



Epoch [473/700]:   5%|▌         | 43/782 [00:00<00:03, 208.89it/s]



Epoch [473/700]:   8%|▊         | 65/782 [00:00<00:03, 211.32it/s]



Epoch [473/700]:  11%|█         | 87/782 [00:00<00:03, 201.24it/s]



Epoch [473/700]:  16%|█▋        | 128/782 [00:00<00:03, 198.67it/s]



Epoch [473/700]:  22%|██▏       | 171/782 [00:00<00:02, 204.46it/s]



Epoch [473/700]:  27%|██▋       | 213/782 [00:01<00:02, 203.22it/s]



Epoch [473/700]:  32%|███▏      | 254/782 [00:01<00:02, 198.43it/s]



Epoch [473/700]:  38%|███▊      | 297/782 [00:01<00:02, 204.59it/s]



Epoch [473/700]:  43%|████▎     | 339/782 [00:01<00:02, 202.93it/s]



Epoch [473/700]:  46%|████▌     | 360/782 [00:01<00:02, 202.62it/s]



Epoch [473/700]:  49%|████▉     | 382/782 [00:01<00:01, 205.75it/s]



Epoch [473/700]:  52%|█████▏    | 403/782 [00:01<00:01, 205.17it/s]



Epoch [473/700]:  54%|█████▍    | 424/782 [00:02<00:01, 205.45it/s]



Epoch [473/700]:  57%|█████▋    | 446/782 [00:02<00:01, 207.38it/s]



Epoch [473/700]:  62%|██████▏   | 488/782 [00:02<00:01, 200.72it/s]



Epoch [473/700]:  68%|██████▊   | 529/782 [00:02<00:01, 197.14it/s]



Epoch [473/700]:  73%|███████▎  | 570/782 [00:02<00:01, 198.53it/s]



Epoch [473/700]:  78%|███████▊  | 610/782 [00:03<00:00, 197.45it/s]



Epoch [473/700]:  83%|████████▎ | 652/782 [00:03<00:00, 202.76it/s]



Epoch [473/700]:  86%|████████▌ | 673/782 [00:03<00:00, 204.70it/s]



Epoch [473/700]:  89%|████████▊ | 694/782 [00:03<00:00, 206.17it/s]



Epoch [473/700]:  91%|█████████▏| 715/782 [00:03<00:00, 206.89it/s]



Epoch [473/700]:  94%|█████████▍| 737/782 [00:03<00:00, 208.96it/s]



Epoch [473/700]:  97%|█████████▋| 758/782 [00:03<00:00, 208.75it/s]



Epoch [473/700]: 100%|██████████| 782/782 [00:03<00:00, 202.94it/s]






Learning Rate: 0.004500
Train Loss: 0.0584, Accuracy: 97.93%, Confidence: 0.9765
Test Loss: 2.0167, Accuracy: 73.72%, Confidence: 0.9435
Train-Test Accuracy Gap: 24.21%


Epoch [474/700]:   3%|▎         | 22/782 [00:00<00:03, 211.88it/s]



Epoch [474/700]:   6%|▌         | 44/782 [00:00<00:03, 214.04it/s]



Epoch [474/700]:   8%|▊         | 66/782 [00:00<00:03, 212.85it/s]



Epoch [474/700]:  11%|█▏        | 88/782 [00:00<00:03, 212.08it/s]



Epoch [474/700]:  17%|█▋        | 132/782 [00:00<00:03, 211.30it/s]



Epoch [474/700]:  20%|█▉        | 154/782 [00:00<00:02, 211.34it/s]



Epoch [474/700]:  23%|██▎       | 176/782 [00:00<00:02, 211.55it/s]



Epoch [474/700]:  28%|██▊       | 220/782 [00:01<00:02, 212.54it/s]



Epoch [474/700]:  31%|███       | 242/782 [00:01<00:02, 210.62it/s]



Epoch [474/700]:  34%|███▍      | 264/782 [00:01<00:02, 210.51it/s]



Epoch [474/700]:  37%|███▋      | 286/782 [00:01<00:02, 210.44it/s]



Epoch [474/700]:  39%|███▉      | 308/782 [00:01<00:02, 204.46it/s]



Epoch [474/700]:  42%|████▏     | 329/782 [00:01<00:02, 199.51it/s]



Epoch [474/700]:  45%|████▍     | 350/782 [00:01<00:02, 200.63it/s]



Epoch [474/700]:  47%|████▋     | 371/782 [00:01<00:02, 201.73it/s]



Epoch [474/700]:  50%|█████     | 392/782 [00:01<00:01, 202.82it/s]



Epoch [474/700]:  53%|█████▎    | 413/782 [00:01<00:01, 204.58it/s]



Epoch [474/700]:  55%|█████▌    | 434/782 [00:02<00:01, 205.42it/s]



Epoch [474/700]:  61%|██████    | 478/782 [00:02<00:01, 210.42it/s]



Epoch [474/700]:  67%|██████▋   | 521/782 [00:02<00:01, 209.23it/s]



Epoch [474/700]:  69%|██████▉   | 542/782 [00:02<00:01, 208.96it/s]



Epoch [474/700]:  72%|███████▏  | 563/782 [00:02<00:01, 206.82it/s]



Epoch [474/700]:  75%|███████▍  | 584/782 [00:02<00:00, 201.19it/s]



Epoch [474/700]:  80%|████████  | 627/782 [00:03<00:00, 205.09it/s]



Epoch [474/700]:  86%|████████▌ | 671/782 [00:03<00:00, 208.44it/s]



Epoch [474/700]:  88%|████████▊ | 692/782 [00:03<00:00, 208.23it/s]



Epoch [474/700]:  91%|█████████▏| 714/782 [00:03<00:00, 210.13it/s]



Epoch [474/700]:  94%|█████████▍| 736/782 [00:03<00:00, 210.69it/s]



Epoch [474/700]:  97%|█████████▋| 758/782 [00:03<00:00, 205.74it/s]



Epoch [474/700]: 100%|██████████| 782/782 [00:03<00:00, 207.48it/s]






Learning Rate: 0.004500
Train Loss: 0.0565, Accuracy: 98.07%, Confidence: 0.9771
Test Loss: 2.1625, Accuracy: 72.66%, Confidence: 0.9403
Train-Test Accuracy Gap: 25.41%


Epoch [475/700]:   3%|▎         | 22/782 [00:00<00:03, 210.75it/s]



Epoch [475/700]:   6%|▌         | 44/782 [00:00<00:03, 206.85it/s]



Epoch [475/700]:   8%|▊         | 66/782 [00:00<00:03, 210.24it/s]



Epoch [475/700]:  11%|█▏        | 88/782 [00:00<00:03, 211.32it/s]



Epoch [475/700]:  14%|█▍        | 110/782 [00:00<00:03, 209.83it/s]



Epoch [475/700]:  17%|█▋        | 132/782 [00:00<00:03, 210.14it/s]



Epoch [475/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.02it/s]



Epoch [475/700]:  28%|██▊       | 220/782 [00:01<00:02, 210.27it/s]



Epoch [475/700]:  34%|███▍      | 264/782 [00:01<00:02, 210.37it/s]



Epoch [475/700]:  39%|███▉      | 308/782 [00:01<00:02, 211.26it/s]



Epoch [475/700]:  45%|████▌     | 352/782 [00:01<00:02, 210.46it/s]



Epoch [475/700]:  51%|█████     | 396/782 [00:01<00:01, 210.77it/s]



Epoch [475/700]:  53%|█████▎    | 418/782 [00:01<00:01, 204.11it/s]



Epoch [475/700]:  56%|█████▋    | 440/782 [00:02<00:01, 206.32it/s]



Epoch [475/700]:  59%|█████▉    | 461/782 [00:02<00:01, 204.04it/s]



Epoch [475/700]:  62%|██████▏   | 483/782 [00:02<00:01, 206.23it/s]



Epoch [475/700]:  65%|██████▍   | 505/782 [00:02<00:01, 207.59it/s]



Epoch [475/700]:  70%|███████   | 548/782 [00:02<00:01, 209.96it/s]



Epoch [475/700]:  76%|███████▌  | 592/782 [00:02<00:00, 211.62it/s]



Epoch [475/700]:  81%|████████▏ | 636/782 [00:03<00:00, 211.00it/s]



Epoch [475/700]:  87%|████████▋ | 679/782 [00:03<00:00, 207.85it/s]



Epoch [475/700]:  92%|█████████▏| 722/782 [00:03<00:00, 208.94it/s]



Epoch [475/700]:  98%|█████████▊| 764/782 [00:03<00:00, 203.10it/s]



Epoch [475/700]: 100%|██████████| 782/782 [00:03<00:00, 207.97it/s]


Learning Rate: 0.004500
Train Loss: 0.0582, Accuracy: 98.05%, Confidence: 0.9778
Test Loss: 2.2863, Accuracy: 71.04%, Confidence: 0.9436
Train-Test Accuracy Gap: 27.01%


Epoch [476/700]:   3%|▎         | 21/782 [00:00<00:03, 205.31it/s]



Epoch [476/700]:   5%|▌         | 43/782 [00:00<00:03, 211.67it/s]



Epoch [476/700]:   8%|▊         | 65/782 [00:00<00:03, 210.73it/s]



Epoch [476/700]:  17%|█▋        | 131/782 [00:00<00:03, 207.22it/s]



Epoch [476/700]:  22%|██▏       | 174/782 [00:00<00:02, 203.54it/s]



Epoch [476/700]:  28%|██▊       | 216/782 [00:01<00:02, 203.58it/s]



Epoch [476/700]:  33%|███▎      | 259/782 [00:01<00:02, 200.18it/s]



Epoch [476/700]:  38%|███▊      | 301/782 [00:01<00:02, 198.95it/s]



Epoch [476/700]:  44%|████▍     | 343/782 [00:01<00:02, 201.08it/s]



Epoch [476/700]:  49%|████▉     | 387/782 [00:01<00:01, 207.56it/s]



Epoch [476/700]:  55%|█████▍    | 430/782 [00:02<00:01, 208.49it/s]



Epoch [476/700]:  60%|██████    | 473/782 [00:02<00:01, 208.00it/s]



Epoch [476/700]:  66%|██████▌   | 515/782 [00:02<00:01, 206.10it/s]



Epoch [476/700]:  71%|███████   | 557/782 [00:02<00:01, 204.06it/s]



Epoch [476/700]:  77%|███████▋  | 600/782 [00:02<00:00, 204.85it/s]



Epoch [476/700]:  82%|████████▏ | 643/782 [00:03<00:00, 208.22it/s]



Epoch [476/700]:  88%|████████▊ | 686/782 [00:03<00:00, 206.95it/s]



Epoch [476/700]:  93%|█████████▎| 728/782 [00:03<00:00, 203.86it/s]



Epoch [476/700]: 100%|██████████| 782/782 [00:03<00:00, 205.54it/s]






Learning Rate: 0.004500
Train Loss: 0.0529, Accuracy: 98.20%, Confidence: 0.9772
Test Loss: 2.0786, Accuracy: 73.37%, Confidence: 0.9420
Train-Test Accuracy Gap: 24.83%


Epoch [477/700]:   3%|▎         | 21/782 [00:00<00:03, 207.93it/s]



Epoch [477/700]:   5%|▌         | 42/782 [00:00<00:03, 202.31it/s]



Epoch [477/700]:   8%|▊         | 63/782 [00:00<00:03, 204.77it/s]



Epoch [477/700]:  11%|█         | 84/782 [00:00<00:03, 204.68it/s]



Epoch [477/700]:  13%|█▎        | 105/782 [00:00<00:03, 204.16it/s]



Epoch [477/700]:  16%|█▌        | 126/782 [00:00<00:03, 205.86it/s]



Epoch [477/700]:  19%|█▉        | 147/782 [00:00<00:03, 206.64it/s]



Epoch [477/700]:  21%|██▏       | 168/782 [00:00<00:02, 206.37it/s]



Epoch [477/700]:  27%|██▋       | 210/782 [00:01<00:02, 206.66it/s]



Epoch [477/700]:  32%|███▏      | 253/782 [00:01<00:02, 208.48it/s]



Epoch [477/700]:  38%|███▊      | 297/782 [00:01<00:02, 210.86it/s]



Epoch [477/700]:  44%|████▎     | 341/782 [00:01<00:02, 208.14it/s]



Epoch [477/700]:  49%|████▉     | 383/782 [00:01<00:01, 203.00it/s]



Epoch [477/700]:  55%|█████▍    | 427/782 [00:02<00:01, 208.98it/s]



Epoch [477/700]:  60%|██████    | 471/782 [00:02<00:01, 207.88it/s]



Epoch [477/700]:  66%|██████▌   | 513/782 [00:02<00:01, 204.50it/s]



Epoch [477/700]:  71%|███████   | 555/782 [00:02<00:01, 200.38it/s]



Epoch [477/700]:  76%|███████▋  | 598/782 [00:02<00:00, 205.94it/s]



Epoch [477/700]:  82%|████████▏ | 641/782 [00:03<00:00, 207.36it/s]



Epoch [477/700]:  87%|████████▋ | 683/782 [00:03<00:00, 206.70it/s]



Epoch [477/700]:  93%|█████████▎| 725/782 [00:03<00:00, 206.60it/s]



Epoch [477/700]: 100%|██████████| 782/782 [00:03<00:00, 206.16it/s]






Learning Rate: 0.004500
Train Loss: 0.0570, Accuracy: 98.10%, Confidence: 0.9770
Test Loss: 2.4304, Accuracy: 70.23%, Confidence: 0.9420
Train-Test Accuracy Gap: 27.87%


Epoch [478/700]:   3%|▎         | 20/782 [00:00<00:03, 198.90it/s]



Epoch [478/700]:  11%|█         | 83/782 [00:00<00:03, 205.04it/s]



Epoch [478/700]:  16%|█▌        | 126/782 [00:00<00:03, 208.22it/s]



Epoch [478/700]:  22%|██▏       | 170/782 [00:00<00:02, 212.46it/s]



Epoch [478/700]:  27%|██▋       | 214/782 [00:01<00:02, 214.39it/s]



Epoch [478/700]:  33%|███▎      | 258/782 [00:01<00:02, 216.14it/s]



Epoch [478/700]:  39%|███▊      | 302/782 [00:01<00:02, 213.97it/s]



Epoch [478/700]:  47%|████▋     | 368/782 [00:01<00:01, 211.73it/s]



Epoch [478/700]:  50%|████▉     | 390/782 [00:01<00:01, 210.63it/s]



Epoch [478/700]:  55%|█████▌    | 433/782 [00:02<00:01, 207.89it/s]



Epoch [478/700]:  61%|██████    | 476/782 [00:02<00:01, 207.25it/s]



Epoch [478/700]:  66%|██████▋   | 519/782 [00:02<00:01, 206.48it/s]



Epoch [478/700]:  75%|███████▍  | 585/782 [00:02<00:00, 210.16it/s]



Epoch [478/700]:  80%|████████  | 629/782 [00:02<00:00, 211.09it/s]



Epoch [478/700]:  86%|████████▌ | 673/782 [00:03<00:00, 210.46it/s]



Epoch [478/700]:  92%|█████████▏| 717/782 [00:03<00:00, 207.90it/s]



Epoch [478/700]:  97%|█████████▋| 760/782 [00:03<00:00, 210.02it/s]



Epoch [478/700]: 100%|██████████| 782/782 [00:03<00:00, 209.68it/s]


Learning Rate: 0.004500
Train Loss: 0.0546, Accuracy: 98.14%, Confidence: 0.9778
Test Loss: 2.2417, Accuracy: 71.19%, Confidence: 0.9395
Train-Test Accuracy Gap: 26.95%


Epoch [479/700]:   3%|▎         | 20/782 [00:00<00:03, 194.85it/s]



Epoch [479/700]:  11%|█         | 85/782 [00:00<00:03, 210.25it/s]



Epoch [479/700]:  16%|█▋        | 129/782 [00:00<00:03, 210.77it/s]



Epoch [479/700]:  22%|██▏       | 172/782 [00:00<00:02, 208.02it/s]



Epoch [479/700]:  28%|██▊       | 216/782 [00:01<00:02, 207.82it/s]



Epoch [479/700]:  33%|███▎      | 260/782 [00:01<00:02, 211.85it/s]



Epoch [479/700]:  39%|███▉      | 304/782 [00:01<00:02, 212.35it/s]



Epoch [479/700]:  44%|████▍     | 347/782 [00:01<00:02, 206.95it/s]



Epoch [479/700]:  50%|████▉     | 390/782 [00:01<00:01, 207.63it/s]



Epoch [479/700]:  55%|█████▌    | 433/782 [00:02<00:01, 208.29it/s]



Epoch [479/700]:  61%|██████    | 476/782 [00:02<00:01, 207.82it/s]



Epoch [479/700]:  66%|██████▋   | 519/782 [00:02<00:01, 208.16it/s]



Epoch [479/700]:  72%|███████▏  | 561/782 [00:02<00:01, 206.72it/s]



Epoch [479/700]:  77%|███████▋  | 604/782 [00:02<00:00, 204.86it/s]



Epoch [479/700]:  83%|████████▎ | 646/782 [00:03<00:00, 207.06it/s]



Epoch [479/700]:  88%|████████▊ | 688/782 [00:03<00:00, 202.88it/s]



Epoch [479/700]:  94%|█████████▎| 732/782 [00:03<00:00, 208.81it/s]



Epoch [479/700]:  99%|█████████▉| 775/782 [00:03<00:00, 211.29it/s]



Epoch [479/700]: 100%|██████████| 782/782 [00:03<00:00, 207.66it/s]


Learning Rate: 0.004500
Train Loss: 0.0572, Accuracy: 98.05%, Confidence: 0.9773
Test Loss: 2.1245, Accuracy: 72.56%, Confidence: 0.9426
Train-Test Accuracy Gap: 25.49%


Epoch [480/700]:   3%|▎         | 21/782 [00:00<00:03, 207.30it/s]



Epoch [480/700]:   5%|▌         | 42/782 [00:00<00:03, 206.67it/s]



Epoch [480/700]:   8%|▊         | 63/782 [00:00<00:03, 204.91it/s]



Epoch [480/700]:  11%|█         | 84/782 [00:00<00:03, 205.49it/s]



Epoch [480/700]:  16%|█▌        | 127/782 [00:00<00:03, 208.24it/s]



Epoch [480/700]:  22%|██▏       | 171/782 [00:00<00:02, 211.51it/s]



Epoch [480/700]:  27%|██▋       | 215/782 [00:01<00:02, 209.60it/s]



Epoch [480/700]:  33%|███▎      | 258/782 [00:01<00:02, 209.09it/s]



Epoch [480/700]:  38%|███▊      | 301/782 [00:01<00:02, 209.02it/s]



Epoch [480/700]:  44%|████▍     | 343/782 [00:01<00:02, 208.49it/s]



Epoch [480/700]:  49%|████▉     | 386/782 [00:01<00:01, 209.58it/s]



Epoch [480/700]:  55%|█████▍    | 428/782 [00:02<00:01, 207.43it/s]



Epoch [480/700]:  60%|██████    | 470/782 [00:02<00:01, 205.53it/s]



Epoch [480/700]:  63%|██████▎   | 491/782 [00:02<00:01, 205.72it/s]



Epoch [480/700]:  66%|██████▌   | 513/782 [00:02<00:01, 207.40it/s]



Epoch [480/700]:  68%|██████▊   | 535/782 [00:02<00:01, 208.64it/s]



Epoch [480/700]:  71%|███████   | 556/782 [00:02<00:01, 207.93it/s]



Epoch [480/700]:  74%|███████▍  | 577/782 [00:02<00:00, 208.50it/s]



Epoch [480/700]:  77%|███████▋  | 599/782 [00:02<00:00, 210.26it/s]



Epoch [480/700]:  79%|███████▉  | 621/782 [00:02<00:00, 210.70it/s]



Epoch [480/700]:  82%|████████▏ | 643/782 [00:03<00:00, 211.11it/s]



Epoch [480/700]:  85%|████████▌ | 665/782 [00:03<00:00, 206.00it/s]



Epoch [480/700]:  88%|████████▊ | 686/782 [00:03<00:00, 205.49it/s]



Epoch [480/700]:  90%|█████████ | 707/782 [00:03<00:00, 206.07it/s]



Epoch [480/700]:  93%|█████████▎| 728/782 [00:03<00:00, 207.05it/s]



Epoch [480/700]:  96%|█████████▌| 750/782 [00:03<00:00, 208.85it/s]



Epoch [480/700]: 100%|██████████| 782/782 [00:03<00:00, 208.05it/s]


Learning Rate: 0.004500
Train Loss: 0.0588, Accuracy: 97.98%, Confidence: 0.9769
Test Loss: 2.1485, Accuracy: 71.70%, Confidence: 0.9384
Train-Test Accuracy Gap: 26.28%


Epoch [481/700]:   3%|▎         | 22/782 [00:00<00:03, 210.97it/s]



Epoch [481/700]:   8%|▊         | 66/782 [00:00<00:03, 204.21it/s]



Epoch [481/700]:  17%|█▋        | 131/782 [00:00<00:03, 210.90it/s]



Epoch [481/700]:  22%|██▏       | 175/782 [00:00<00:02, 213.41it/s]



Epoch [481/700]:  28%|██▊       | 219/782 [00:01<00:02, 215.41it/s]



Epoch [481/700]:  34%|███▎      | 263/782 [00:01<00:02, 211.66it/s]



Epoch [481/700]:  39%|███▉      | 307/782 [00:01<00:02, 210.53it/s]



Epoch [481/700]:  45%|████▍     | 351/782 [00:01<00:02, 213.00it/s]



Epoch [481/700]:  50%|█████     | 394/782 [00:01<00:01, 208.83it/s]



Epoch [481/700]:  56%|█████▌    | 438/782 [00:02<00:01, 211.14it/s]



Epoch [481/700]:  62%|██████▏   | 482/782 [00:02<00:01, 212.30it/s]



Epoch [481/700]:  70%|███████   | 548/782 [00:02<00:01, 211.18it/s]



Epoch [481/700]:  73%|███████▎  | 570/782 [00:02<00:01, 207.76it/s]



Epoch [481/700]:  81%|████████▏ | 636/782 [00:03<00:00, 211.08it/s]



Epoch [481/700]:  87%|████████▋ | 680/782 [00:03<00:00, 211.74it/s]



Epoch [481/700]:  93%|█████████▎| 724/782 [00:03<00:00, 212.25it/s]



Epoch [481/700]:  98%|█████████▊| 768/782 [00:03<00:00, 213.48it/s]



Epoch [481/700]: 100%|██████████| 782/782 [00:03<00:00, 210.35it/s]


Learning Rate: 0.004500
Train Loss: 0.0544, Accuracy: 98.13%, Confidence: 0.9778
Test Loss: 2.1439, Accuracy: 72.53%, Confidence: 0.9426
Train-Test Accuracy Gap: 25.60%


Epoch [482/700]:   3%|▎         | 21/782 [00:00<00:03, 200.61it/s]



Epoch [482/700]:   5%|▌         | 42/782 [00:00<00:03, 201.84it/s]



Epoch [482/700]:   8%|▊         | 64/782 [00:00<00:03, 206.22it/s]



Epoch [482/700]:  11%|█         | 85/782 [00:00<00:03, 206.08it/s]



Epoch [482/700]:  14%|█▎        | 107/782 [00:00<00:03, 210.01it/s]



Epoch [482/700]:  16%|█▋        | 129/782 [00:00<00:03, 210.45it/s]



Epoch [482/700]:  22%|██▏       | 172/782 [00:00<00:02, 207.75it/s]



Epoch [482/700]:  27%|██▋       | 214/782 [00:01<00:02, 207.75it/s]



Epoch [482/700]:  33%|███▎      | 256/782 [00:01<00:02, 205.55it/s]



Epoch [482/700]:  38%|███▊      | 298/782 [00:01<00:02, 205.99it/s]



Epoch [482/700]:  43%|████▎     | 340/782 [00:01<00:02, 206.57it/s]



Epoch [482/700]:  46%|████▌     | 361/782 [00:01<00:02, 206.57it/s]



Epoch [482/700]:  49%|████▉     | 383/782 [00:01<00:01, 208.87it/s]



Epoch [482/700]:  52%|█████▏    | 404/782 [00:01<00:01, 208.40it/s]



Epoch [482/700]:  54%|█████▍    | 425/782 [00:02<00:01, 208.27it/s]



Epoch [482/700]:  57%|█████▋    | 446/782 [00:02<00:01, 204.97it/s]



Epoch [482/700]:  60%|█████▉    | 467/782 [00:02<00:01, 204.62it/s]



Epoch [482/700]:  62%|██████▏   | 488/782 [00:02<00:01, 204.17it/s]



Epoch [482/700]:  65%|██████▌   | 509/782 [00:02<00:01, 204.96it/s]



Epoch [482/700]:  68%|██████▊   | 530/782 [00:02<00:01, 204.31it/s]



Epoch [482/700]:  70%|███████   | 551/782 [00:02<00:01, 203.26it/s]



Epoch [482/700]:  73%|███████▎  | 572/782 [00:02<00:01, 203.49it/s]



Epoch [482/700]:  76%|███████▌  | 593/782 [00:02<00:00, 203.58it/s]



Epoch [482/700]:  79%|███████▊  | 615/782 [00:02<00:00, 205.57it/s]



Epoch [482/700]:  81%|████████▏ | 636/782 [00:03<00:00, 205.34it/s]



Epoch [482/700]:  84%|████████▍ | 657/782 [00:03<00:00, 202.22it/s]



Epoch [482/700]:  87%|████████▋ | 678/782 [00:03<00:00, 202.22it/s]



Epoch [482/700]:  89%|████████▉ | 699/782 [00:03<00:00, 200.42it/s]



Epoch [482/700]:  95%|█████████▌| 743/782 [00:03<00:00, 206.63it/s]



Epoch [482/700]: 100%|██████████| 782/782 [00:03<00:00, 205.37it/s]


Learning Rate: 0.004500
Train Loss: 0.0576, Accuracy: 98.07%, Confidence: 0.9771
Test Loss: 2.3760, Accuracy: 70.17%, Confidence: 0.9380
Train-Test Accuracy Gap: 27.90%


Epoch [483/700]:   3%|▎         | 21/782 [00:00<00:03, 203.35it/s]



Epoch [483/700]:  11%|█         | 84/782 [00:00<00:03, 206.93it/s]



Epoch [483/700]:  16%|█▌        | 126/782 [00:00<00:03, 203.49it/s]



Epoch [483/700]:  22%|██▏       | 169/782 [00:00<00:02, 207.78it/s]



Epoch [483/700]:  27%|██▋       | 211/782 [00:01<00:02, 203.71it/s]



Epoch [483/700]:  33%|███▎      | 255/782 [00:01<00:02, 209.52it/s]



Epoch [483/700]:  38%|███▊      | 297/782 [00:01<00:02, 206.79it/s]



Epoch [483/700]:  43%|████▎     | 339/782 [00:01<00:02, 205.47it/s]



Epoch [483/700]:  49%|████▉     | 382/782 [00:01<00:01, 207.91it/s]



Epoch [483/700]:  54%|█████▍    | 424/782 [00:02<00:01, 203.06it/s]



Epoch [483/700]:  60%|█████▉    | 466/782 [00:02<00:01, 201.16it/s]



Epoch [483/700]:  65%|██████▌   | 509/782 [00:02<00:01, 206.37it/s]



Epoch [483/700]:  70%|███████   | 551/782 [00:02<00:01, 201.14it/s]



Epoch [483/700]:  76%|███████▌  | 593/782 [00:02<00:00, 203.29it/s]



Epoch [483/700]:  81%|████████  | 635/782 [00:03<00:00, 205.15it/s]



Epoch [483/700]:  87%|████████▋ | 678/782 [00:03<00:00, 204.08it/s]



Epoch [483/700]:  92%|█████████▏| 721/782 [00:03<00:00, 203.68it/s]



Epoch [483/700]: 100%|██████████| 782/782 [00:03<00:00, 205.28it/s]


Learning Rate: 0.004500
Train Loss: 0.0649, Accuracy: 97.82%, Confidence: 0.9766
Test Loss: 2.2453, Accuracy: 71.98%, Confidence: 0.9393
Train-Test Accuracy Gap: 25.84%


Epoch [484/700]:   3%|▎         | 20/782 [00:00<00:03, 198.55it/s]



Epoch [484/700]:   5%|▌         | 41/782 [00:00<00:03, 203.39it/s]



Epoch [484/700]:   8%|▊         | 62/782 [00:00<00:03, 201.98it/s]



Epoch [484/700]:  11%|█         | 83/782 [00:00<00:03, 203.91it/s]



Epoch [484/700]:  16%|█▌        | 126/782 [00:00<00:03, 209.34it/s]



Epoch [484/700]:  22%|██▏       | 170/782 [00:00<00:02, 211.79it/s]



Epoch [484/700]:  27%|██▋       | 214/782 [00:01<00:02, 211.44it/s]



Epoch [484/700]:  30%|███       | 236/782 [00:01<00:02, 211.94it/s]



Epoch [484/700]:  39%|███▊      | 302/782 [00:01<00:02, 208.46it/s]



Epoch [484/700]:  44%|████▍     | 345/782 [00:01<00:02, 209.15it/s]



Epoch [484/700]:  50%|████▉     | 388/782 [00:01<00:01, 209.28it/s]



Epoch [484/700]:  55%|█████▌    | 431/782 [00:02<00:01, 208.41it/s]



Epoch [484/700]:  61%|██████    | 474/782 [00:02<00:01, 207.16it/s]



Epoch [484/700]:  66%|██████▌   | 517/782 [00:02<00:01, 207.35it/s]



Epoch [484/700]:  72%|███████▏  | 561/782 [00:02<00:01, 212.19it/s]



Epoch [484/700]:  77%|███████▋  | 605/782 [00:02<00:00, 203.41it/s]



Epoch [484/700]:  83%|████████▎ | 648/782 [00:03<00:00, 206.71it/s]



Epoch [484/700]:  88%|████████▊ | 690/782 [00:03<00:00, 206.38it/s]



Epoch [484/700]:  94%|█████████▍| 734/782 [00:03<00:00, 209.75it/s]



Epoch [484/700]: 100%|██████████| 782/782 [00:03<00:00, 208.12it/s]


Learning Rate: 0.004500
Train Loss: 0.0570, Accuracy: 98.07%, Confidence: 0.9775
Test Loss: 2.3257, Accuracy: 72.01%, Confidence: 0.9419
Train-Test Accuracy Gap: 26.06%


Epoch [485/700]:   3%|▎         | 21/782 [00:00<00:03, 208.55it/s]



Epoch [485/700]:   5%|▌         | 42/782 [00:00<00:03, 208.50it/s]



Epoch [485/700]:   8%|▊         | 63/782 [00:00<00:03, 203.86it/s]



Epoch [485/700]:  11%|█         | 84/782 [00:00<00:03, 201.43it/s]



Epoch [485/700]:  14%|█▎        | 106/782 [00:00<00:03, 206.58it/s]



Epoch [485/700]:  16%|█▌        | 127/782 [00:00<00:03, 204.20it/s]



Epoch [485/700]:  19%|█▉        | 148/782 [00:00<00:03, 205.69it/s]



Epoch [485/700]:  27%|██▋       | 214/782 [00:01<00:02, 211.20it/s]



Epoch [485/700]:  33%|███▎      | 258/782 [00:01<00:02, 206.56it/s]



Epoch [485/700]:  38%|███▊      | 301/782 [00:01<00:02, 208.39it/s]



Epoch [485/700]:  44%|████▍     | 343/782 [00:01<00:02, 203.89it/s]



Epoch [485/700]:  49%|████▉     | 386/782 [00:01<00:01, 207.11it/s]



Epoch [485/700]:  55%|█████▍    | 428/782 [00:02<00:01, 204.35it/s]



Epoch [485/700]:  60%|██████    | 471/782 [00:02<00:01, 205.71it/s]



Epoch [485/700]:  68%|██████▊   | 535/782 [00:02<00:01, 206.15it/s]



Epoch [485/700]:  74%|███████▍  | 578/782 [00:02<00:00, 207.97it/s]



Epoch [485/700]:  80%|███████▉  | 622/782 [00:03<00:00, 210.04it/s]



Epoch [485/700]:  85%|████████▌ | 666/782 [00:03<00:00, 209.76it/s]



Epoch [485/700]:  91%|█████████ | 708/782 [00:03<00:00, 208.10it/s]



Epoch [485/700]:  96%|█████████▌| 752/782 [00:03<00:00, 209.98it/s]



Epoch [485/700]: 100%|██████████| 782/782 [00:03<00:00, 207.39it/s]


Learning Rate: 0.004500
Train Loss: 0.0539, Accuracy: 98.15%, Confidence: 0.9786
Test Loss: 1.9325, Accuracy: 73.76%, Confidence: 0.9426
Train-Test Accuracy Gap: 24.39%


Epoch [486/700]:   3%|▎         | 21/782 [00:00<00:03, 206.61it/s]



Epoch [486/700]:   5%|▌         | 42/782 [00:00<00:03, 206.45it/s]



Epoch [486/700]:  11%|█         | 85/782 [00:00<00:03, 209.00it/s]



Epoch [486/700]:  16%|█▋        | 128/782 [00:00<00:03, 209.37it/s]



Epoch [486/700]:  22%|██▏       | 171/782 [00:00<00:02, 210.40it/s]



Epoch [486/700]:  27%|██▋       | 215/782 [00:01<00:02, 209.96it/s]



Epoch [486/700]:  33%|███▎      | 259/782 [00:01<00:02, 210.57it/s]



Epoch [486/700]:  39%|███▊      | 303/782 [00:01<00:02, 211.48it/s]



Epoch [486/700]:  44%|████▍     | 347/782 [00:01<00:02, 211.73it/s]



Epoch [486/700]:  50%|█████     | 391/782 [00:01<00:01, 209.69it/s]



Epoch [486/700]:  55%|█████▌    | 434/782 [00:02<00:01, 209.88it/s]



Epoch [486/700]:  61%|██████    | 478/782 [00:02<00:01, 211.64it/s]



Epoch [486/700]:  67%|██████▋   | 522/782 [00:02<00:01, 210.28it/s]



Epoch [486/700]:  72%|███████▏  | 566/782 [00:02<00:01, 213.09it/s]



Epoch [486/700]:  78%|███████▊  | 610/782 [00:02<00:00, 212.08it/s]



Epoch [486/700]:  86%|████████▋ | 676/782 [00:03<00:00, 210.59it/s]



Epoch [486/700]:  89%|████████▉ | 698/782 [00:03<00:00, 206.32it/s]



Epoch [486/700]:  95%|█████████▍| 742/782 [00:03<00:00, 210.80it/s]



Epoch [486/700]: 100%|██████████| 782/782 [00:03<00:00, 210.33it/s]


Learning Rate: 0.004500
Train Loss: 0.0577, Accuracy: 97.98%, Confidence: 0.9777
Test Loss: 2.4595, Accuracy: 70.14%, Confidence: 0.9417
Train-Test Accuracy Gap: 27.84%


Epoch [487/700]:   3%|▎         | 20/782 [00:00<00:03, 199.42it/s]



Epoch [487/700]:   5%|▌         | 41/782 [00:00<00:03, 203.98it/s]



Epoch [487/700]:   8%|▊         | 63/782 [00:00<00:03, 208.52it/s]



Epoch [487/700]:  11%|█         | 85/782 [00:00<00:03, 210.54it/s]



Epoch [487/700]:  16%|█▋        | 129/782 [00:00<00:03, 212.51it/s]



Epoch [487/700]:  19%|█▉        | 151/782 [00:00<00:03, 205.93it/s]



Epoch [487/700]:  22%|██▏       | 173/782 [00:00<00:02, 207.97it/s]



Epoch [487/700]:  28%|██▊       | 216/782 [00:01<00:02, 210.02it/s]



Epoch [487/700]:  30%|███       | 238/782 [00:01<00:02, 210.54it/s]



Epoch [487/700]:  33%|███▎      | 260/782 [00:01<00:02, 209.63it/s]



Epoch [487/700]:  36%|███▌      | 282/782 [00:01<00:02, 210.25it/s]



Epoch [487/700]:  39%|███▉      | 304/782 [00:01<00:02, 212.22it/s]



Epoch [487/700]:  42%|████▏     | 326/782 [00:01<00:02, 212.80it/s]



Epoch [487/700]:  45%|████▍     | 348/782 [00:01<00:02, 208.56it/s]



Epoch [487/700]:  47%|████▋     | 369/782 [00:01<00:01, 207.54it/s]



Epoch [487/700]:  50%|████▉     | 390/782 [00:01<00:01, 204.66it/s]



Epoch [487/700]:  53%|█████▎    | 411/782 [00:01<00:01, 205.99it/s]



Epoch [487/700]:  55%|█████▌    | 433/782 [00:02<00:01, 207.44it/s]



Epoch [487/700]:  58%|█████▊    | 454/782 [00:02<00:01, 206.35it/s]



Epoch [487/700]:  61%|██████    | 476/782 [00:02<00:01, 207.71it/s]



Epoch [487/700]:  64%|██████▎   | 498/782 [00:02<00:01, 209.48it/s]



Epoch [487/700]:  66%|██████▋   | 520/782 [00:02<00:01, 210.07it/s]



Epoch [487/700]:  69%|██████▉   | 542/782 [00:02<00:01, 209.92it/s]



Epoch [487/700]:  72%|███████▏  | 563/782 [00:02<00:01, 208.48it/s]



Epoch [487/700]:  75%|███████▍  | 584/782 [00:02<00:00, 208.81it/s]



Epoch [487/700]:  77%|███████▋  | 605/782 [00:02<00:00, 203.05it/s]



Epoch [487/700]:  80%|████████  | 626/782 [00:03<00:00, 204.27it/s]



Epoch [487/700]:  83%|████████▎ | 647/782 [00:03<00:00, 205.81it/s]



Epoch [487/700]:  85%|████████▌ | 668/782 [00:03<00:00, 206.75it/s]



Epoch [487/700]:  88%|████████▊ | 689/782 [00:03<00:00, 207.53it/s]



Epoch [487/700]:  91%|█████████ | 710/782 [00:03<00:00, 206.72it/s]



Epoch [487/700]:  94%|█████████▎| 732/782 [00:03<00:00, 208.26it/s]



Epoch [487/700]:  96%|█████████▋| 753/782 [00:03<00:00, 204.86it/s]



Epoch [487/700]: 100%|██████████| 782/782 [00:03<00:00, 207.68it/s]






Learning Rate: 0.004500
Train Loss: 0.0597, Accuracy: 97.92%, Confidence: 0.9769
Test Loss: 2.5242, Accuracy: 69.68%, Confidence: 0.9398
Train-Test Accuracy Gap: 28.24%


Epoch [488/700]:   3%|▎         | 21/782 [00:00<00:03, 209.16it/s]



Epoch [488/700]:   5%|▌         | 43/782 [00:00<00:03, 213.55it/s]



Epoch [488/700]:   8%|▊         | 65/782 [00:00<00:03, 215.05it/s]



Epoch [488/700]:  17%|█▋        | 132/782 [00:00<00:03, 215.16it/s]



Epoch [488/700]:  23%|██▎       | 176/782 [00:00<00:02, 206.49it/s]



Epoch [488/700]:  28%|██▊       | 219/782 [00:01<00:02, 207.03it/s]



Epoch [488/700]:  34%|███▎      | 263/782 [00:01<00:02, 211.09it/s]



Epoch [488/700]:  39%|███▉      | 307/782 [00:01<00:02, 206.00it/s]



Epoch [488/700]:  45%|████▍     | 349/782 [00:01<00:02, 201.55it/s]



Epoch [488/700]:  50%|█████     | 391/782 [00:01<00:01, 200.83it/s]



Epoch [488/700]:  55%|█████▌    | 433/782 [00:02<00:01, 201.54it/s]



Epoch [488/700]:  61%|██████    | 477/782 [00:02<00:01, 208.14it/s]



Epoch [488/700]:  66%|██████▋   | 520/782 [00:02<00:01, 208.04it/s]



Epoch [488/700]:  72%|███████▏  | 562/782 [00:02<00:01, 203.26it/s]



Epoch [488/700]:  77%|███████▋  | 604/782 [00:02<00:00, 201.99it/s]



Epoch [488/700]:  83%|████████▎ | 647/782 [00:03<00:00, 206.34it/s]



Epoch [488/700]:  88%|████████▊ | 691/782 [00:03<00:00, 210.91it/s]



Epoch [488/700]:  97%|█████████▋| 757/782 [00:03<00:00, 211.02it/s]



Epoch [488/700]: 100%|██████████| 782/782 [00:03<00:00, 207.28it/s]


Learning Rate: 0.004500
Train Loss: 0.0534, Accuracy: 98.25%, Confidence: 0.9782
Test Loss: 2.4574, Accuracy: 69.60%, Confidence: 0.9420
Train-Test Accuracy Gap: 28.65%


Epoch [489/700]:   3%|▎         | 21/782 [00:00<00:03, 205.17it/s]



Epoch [489/700]:   5%|▌         | 42/782 [00:00<00:03, 207.86it/s]



Epoch [489/700]:   8%|▊         | 63/782 [00:00<00:03, 199.59it/s]



Epoch [489/700]:  11%|█         | 84/782 [00:00<00:03, 199.89it/s]



Epoch [489/700]:  13%|█▎        | 105/782 [00:00<00:03, 198.43it/s]



Epoch [489/700]:  19%|█▉        | 148/782 [00:00<00:03, 201.84it/s]



Epoch [489/700]:  24%|██▍       | 191/782 [00:00<00:02, 203.50it/s]



Epoch [489/700]:  33%|███▎      | 256/782 [00:01<00:02, 209.23it/s]



Epoch [489/700]:  38%|███▊      | 298/782 [00:01<00:02, 208.33it/s]



Epoch [489/700]:  44%|████▎     | 341/782 [00:01<00:02, 209.78it/s]



Epoch [489/700]:  52%|█████▏    | 406/782 [00:01<00:01, 211.43it/s]



Epoch [489/700]:  58%|█████▊    | 450/782 [00:02<00:01, 211.63it/s]



Epoch [489/700]:  63%|██████▎   | 494/782 [00:02<00:01, 211.59it/s]



Epoch [489/700]:  69%|██████▉   | 538/782 [00:02<00:01, 213.40it/s]



Epoch [489/700]:  74%|███████▍  | 582/782 [00:02<00:00, 213.17it/s]



Epoch [489/700]:  80%|████████  | 626/782 [00:02<00:00, 211.86it/s]



Epoch [489/700]:  86%|████████▌ | 670/782 [00:03<00:00, 211.43it/s]



Epoch [489/700]:  91%|█████████▏| 714/782 [00:03<00:00, 212.93it/s]



Epoch [489/700]: 100%|██████████| 782/782 [00:03<00:00, 209.25it/s]






Learning Rate: 0.004500
Train Loss: 0.0557, Accuracy: 98.08%, Confidence: 0.9780
Test Loss: 2.3443, Accuracy: 71.75%, Confidence: 0.9420
Train-Test Accuracy Gap: 26.33%


Epoch [490/700]:   3%|▎         | 20/782 [00:00<00:03, 195.87it/s]



Epoch [490/700]:   5%|▌         | 42/782 [00:00<00:03, 207.49it/s]



Epoch [490/700]:  11%|█         | 86/782 [00:00<00:03, 213.84it/s]



Epoch [490/700]:  17%|█▋        | 130/782 [00:00<00:03, 213.55it/s]



Epoch [490/700]:  22%|██▏       | 174/782 [00:00<00:02, 213.82it/s]



Epoch [490/700]:  28%|██▊       | 218/782 [00:01<00:02, 213.03it/s]



Epoch [490/700]:  34%|███▎      | 262/782 [00:01<00:02, 212.14it/s]



Epoch [490/700]:  39%|███▉      | 306/782 [00:01<00:02, 213.71it/s]



Epoch [490/700]:  45%|████▍     | 350/782 [00:01<00:02, 213.46it/s]



Epoch [490/700]:  53%|█████▎    | 416/782 [00:01<00:01, 211.84it/s]



Epoch [490/700]:  59%|█████▉    | 460/782 [00:02<00:01, 208.86it/s]



Epoch [490/700]:  64%|██████▍   | 504/782 [00:02<00:01, 211.32it/s]



Epoch [490/700]:  70%|███████   | 548/782 [00:02<00:01, 209.79it/s]



Epoch [490/700]:  76%|███████▌  | 591/782 [00:02<00:00, 209.51it/s]



Epoch [490/700]:  81%|████████  | 633/782 [00:02<00:00, 208.21it/s]



Epoch [490/700]:  86%|████████▋ | 675/782 [00:03<00:00, 205.22it/s]



Epoch [490/700]:  92%|█████████▏| 718/782 [00:03<00:00, 209.74it/s]



Epoch [490/700]:  97%|█████████▋| 762/782 [00:03<00:00, 209.24it/s]



Epoch [490/700]: 100%|██████████| 782/782 [00:03<00:00, 210.49it/s]


Learning Rate: 0.004500
Train Loss: 0.0537, Accuracy: 98.10%, Confidence: 0.9782
Test Loss: 2.2696, Accuracy: 72.30%, Confidence: 0.9452
Train-Test Accuracy Gap: 25.80%


Epoch [491/700]:   3%|▎         | 21/782 [00:00<00:03, 209.64it/s]



Epoch [491/700]:   5%|▌         | 43/782 [00:00<00:03, 214.04it/s]



Epoch [491/700]:  11%|█         | 87/782 [00:00<00:03, 209.74it/s]



Epoch [491/700]:  17%|█▋        | 130/782 [00:00<00:03, 210.81it/s]



Epoch [491/700]:  22%|██▏       | 174/782 [00:00<00:02, 205.85it/s]



Epoch [491/700]:  25%|██▍       | 195/782 [00:00<00:02, 203.98it/s]



Epoch [491/700]:  28%|██▊       | 216/782 [00:01<00:02, 200.11it/s]



Epoch [491/700]:  33%|███▎      | 259/782 [00:01<00:02, 204.18it/s]



Epoch [491/700]:  39%|███▊      | 302/782 [00:01<00:02, 204.68it/s]



Epoch [491/700]:  44%|████▍     | 345/782 [00:01<00:02, 206.46it/s]



Epoch [491/700]:  50%|████▉     | 388/782 [00:01<00:01, 209.90it/s]



Epoch [491/700]:  55%|█████▍    | 430/782 [00:02<00:01, 203.38it/s]



Epoch [491/700]:  60%|██████    | 472/782 [00:02<00:01, 204.04it/s]



Epoch [491/700]:  66%|██████▌   | 514/782 [00:02<00:01, 204.43it/s]



Epoch [491/700]:  68%|██████▊   | 535/782 [00:02<00:01, 201.67it/s]



Epoch [491/700]:  71%|███████   | 556/782 [00:02<00:01, 203.02it/s]



Epoch [491/700]:  74%|███████▍  | 577/782 [00:02<00:01, 201.74it/s]



Epoch [491/700]:  79%|███████▉  | 619/782 [00:03<00:00, 203.50it/s]



Epoch [491/700]:  85%|████████▍ | 662/782 [00:03<00:00, 204.42it/s]



Epoch [491/700]:  90%|█████████ | 704/782 [00:03<00:00, 202.55it/s]



Epoch [491/700]:  95%|█████████▌| 746/782 [00:03<00:00, 203.16it/s]



Epoch [491/700]: 100%|██████████| 782/782 [00:03<00:00, 204.41it/s]


Learning Rate: 0.004500
Train Loss: 0.0563, Accuracy: 98.05%, Confidence: 0.9778
Test Loss: 1.9678, Accuracy: 73.53%, Confidence: 0.9435
Train-Test Accuracy Gap: 24.52%


Epoch [492/700]:   3%|▎         | 21/782 [00:00<00:03, 205.07it/s]



Epoch [492/700]:   5%|▌         | 43/782 [00:00<00:03, 207.00it/s]



Epoch [492/700]:   8%|▊         | 64/782 [00:00<00:03, 206.99it/s]



Epoch [492/700]:  11%|█         | 86/782 [00:00<00:03, 208.96it/s]



Epoch [492/700]:  16%|█▋        | 128/782 [00:00<00:03, 207.44it/s]



Epoch [492/700]:  22%|██▏       | 171/782 [00:00<00:02, 206.54it/s]



Epoch [492/700]:  27%|██▋       | 213/782 [00:01<00:02, 203.07it/s]



Epoch [492/700]:  33%|███▎      | 256/782 [00:01<00:02, 205.82it/s]



Epoch [492/700]:  38%|███▊      | 298/782 [00:01<00:02, 206.82it/s]



Epoch [492/700]:  44%|████▎     | 341/782 [00:01<00:02, 207.33it/s]



Epoch [492/700]:  49%|████▉     | 384/782 [00:01<00:01, 208.94it/s]



Epoch [492/700]:  55%|█████▍    | 427/782 [00:02<00:01, 210.11it/s]



Epoch [492/700]:  57%|█████▋    | 449/782 [00:02<00:01, 206.90it/s]



Epoch [492/700]:  60%|██████    | 470/782 [00:02<00:01, 206.73it/s]



Epoch [492/700]:  63%|██████▎   | 492/782 [00:02<00:01, 207.48it/s]



Epoch [492/700]:  66%|██████▌   | 514/782 [00:02<00:01, 209.96it/s]



Epoch [492/700]:  69%|██████▊   | 536/782 [00:02<00:01, 210.20it/s]



Epoch [492/700]:  71%|███████▏  | 558/782 [00:02<00:01, 209.00it/s]



Epoch [492/700]:  74%|███████▍  | 579/782 [00:02<00:00, 204.30it/s]



Epoch [492/700]:  77%|███████▋  | 601/782 [00:02<00:00, 206.56it/s]



Epoch [492/700]:  80%|███████▉  | 622/782 [00:03<00:00, 207.44it/s]



Epoch [492/700]:  82%|████████▏ | 644/782 [00:03<00:00, 208.16it/s]



Epoch [492/700]:  88%|████████▊ | 686/782 [00:03<00:00, 207.26it/s]



Epoch [492/700]:  93%|█████████▎| 729/782 [00:03<00:00, 208.30it/s]



Epoch [492/700]:  99%|█████████▊| 771/782 [00:03<00:00, 207.11it/s]



Epoch [492/700]: 100%|██████████| 782/782 [00:03<00:00, 206.95it/s]






Learning Rate: 0.004500
Train Loss: 0.0571, Accuracy: 98.02%, Confidence: 0.9778
Test Loss: 2.2111, Accuracy: 72.14%, Confidence: 0.9423
Train-Test Accuracy Gap: 25.88%


Epoch [493/700]:   3%|▎         | 21/782 [00:00<00:03, 202.90it/s]



Epoch [493/700]:   5%|▌         | 42/782 [00:00<00:03, 200.37it/s]



Epoch [493/700]:  11%|█         | 85/782 [00:00<00:03, 205.52it/s]



Epoch [493/700]:  16%|█▌        | 127/782 [00:00<00:03, 206.49it/s]



Epoch [493/700]:  22%|██▏       | 169/782 [00:00<00:02, 206.69it/s]



Epoch [493/700]:  27%|██▋       | 212/782 [00:01<00:02, 209.74it/s]



Epoch [493/700]:  33%|███▎      | 255/782 [00:01<00:02, 211.21it/s]



Epoch [493/700]:  38%|███▊      | 299/782 [00:01<00:02, 212.19it/s]



Epoch [493/700]:  44%|████▎     | 342/782 [00:01<00:02, 207.41it/s]



Epoch [493/700]:  49%|████▉     | 385/782 [00:01<00:01, 208.65it/s]



Epoch [493/700]:  52%|█████▏    | 406/782 [00:01<00:01, 208.33it/s]



Epoch [493/700]:  55%|█████▍    | 428/782 [00:02<00:01, 210.24it/s]



Epoch [493/700]:  58%|█████▊    | 450/782 [00:02<00:01, 208.29it/s]



Epoch [493/700]:  60%|██████    | 471/782 [00:02<00:01, 206.67it/s]



Epoch [493/700]:  63%|██████▎   | 493/782 [00:02<00:01, 208.84it/s]



Epoch [493/700]:  68%|██████▊   | 535/782 [00:02<00:01, 204.68it/s]



Epoch [493/700]:  74%|███████▍  | 579/782 [00:02<00:00, 209.05it/s]



Epoch [493/700]:  77%|███████▋  | 600/782 [00:02<00:00, 206.52it/s]



Epoch [493/700]:  80%|███████▉  | 622/782 [00:02<00:00, 209.28it/s]



Epoch [493/700]:  82%|████████▏ | 643/782 [00:03<00:00, 206.99it/s]



Epoch [493/700]:  85%|████████▌ | 665/782 [00:03<00:00, 209.39it/s]



Epoch [493/700]:  88%|████████▊ | 686/782 [00:03<00:00, 203.96it/s]



Epoch [493/700]:  93%|█████████▎| 728/782 [00:03<00:00, 201.28it/s]



Epoch [493/700]:  96%|█████████▌| 749/782 [00:03<00:00, 197.45it/s]



Epoch [493/700]:  99%|█████████▊| 771/782 [00:03<00:00, 202.34it/s]



Epoch [493/700]: 100%|██████████| 782/782 [00:03<00:00, 206.41it/s]


Learning Rate: 0.004500
Train Loss: 0.0594, Accuracy: 98.01%, Confidence: 0.9775
Test Loss: 2.3258, Accuracy: 71.10%, Confidence: 0.9418
Train-Test Accuracy Gap: 26.91%


Epoch [494/700]:   3%|▎         | 20/782 [00:00<00:03, 191.79it/s]



Epoch [494/700]:  11%|█         | 86/782 [00:00<00:03, 209.52it/s]



Epoch [494/700]:  17%|█▋        | 130/782 [00:00<00:03, 210.32it/s]



Epoch [494/700]:  22%|██▏       | 174/782 [00:00<00:02, 207.93it/s]



Epoch [494/700]:  28%|██▊       | 216/782 [00:01<00:02, 203.54it/s]



Epoch [494/700]:  33%|███▎      | 259/782 [00:01<00:02, 206.20it/s]



Epoch [494/700]:  38%|███▊      | 301/782 [00:01<00:02, 199.09it/s]



Epoch [494/700]:  44%|████▎     | 341/782 [00:01<00:02, 198.97it/s]



Epoch [494/700]:  49%|████▉     | 383/782 [00:01<00:01, 200.82it/s]



Epoch [494/700]:  54%|█████▍    | 426/782 [00:02<00:01, 206.14it/s]



Epoch [494/700]:  60%|█████▉    | 469/782 [00:02<00:01, 206.88it/s]



Epoch [494/700]:  65%|██████▌   | 511/782 [00:02<00:01, 203.54it/s]



Epoch [494/700]:  71%|███████   | 553/782 [00:02<00:01, 201.10it/s]



Epoch [494/700]:  79%|███████▉  | 616/782 [00:03<00:00, 205.70it/s]



Epoch [494/700]:  84%|████████▍ | 659/782 [00:03<00:00, 207.99it/s]



Epoch [494/700]:  90%|████████▉ | 701/782 [00:03<00:00, 208.61it/s]



Epoch [494/700]:  95%|█████████▌| 743/782 [00:03<00:00, 208.05it/s]



Epoch [494/700]: 100%|██████████| 782/782 [00:03<00:00, 204.94it/s]


Learning Rate: 0.004500
Train Loss: 0.0564, Accuracy: 98.14%, Confidence: 0.9785
Test Loss: 2.0734, Accuracy: 72.97%, Confidence: 0.9419
Train-Test Accuracy Gap: 25.17%


Epoch [495/700]:   3%|▎         | 21/782 [00:00<00:03, 206.63it/s]



Epoch [495/700]:   5%|▌         | 43/782 [00:00<00:03, 211.27it/s]



Epoch [495/700]:   8%|▊         | 65/782 [00:00<00:03, 211.83it/s]



Epoch [495/700]:  11%|█         | 87/782 [00:00<00:03, 210.93it/s]



Epoch [495/700]:  14%|█▍        | 109/782 [00:00<00:03, 211.97it/s]



Epoch [495/700]:  17%|█▋        | 131/782 [00:00<00:03, 208.34it/s]



Epoch [495/700]:  19%|█▉        | 152/782 [00:00<00:03, 203.53it/s]



Epoch [495/700]:  22%|██▏       | 173/782 [00:00<00:03, 202.45it/s]



Epoch [495/700]:  25%|██▍       | 194/782 [00:00<00:02, 204.62it/s]



Epoch [495/700]:  30%|███       | 237/782 [00:01<00:02, 207.36it/s]



Epoch [495/700]:  36%|███▌      | 281/782 [00:01<00:02, 210.84it/s]



Epoch [495/700]:  39%|███▊      | 303/782 [00:01<00:02, 209.07it/s]



Epoch [495/700]:  42%|████▏     | 325/782 [00:01<00:02, 209.64it/s]



Epoch [495/700]:  47%|████▋     | 369/782 [00:01<00:01, 211.75it/s]



Epoch [495/700]:  53%|█████▎    | 413/782 [00:01<00:01, 211.59it/s]



Epoch [495/700]:  56%|█████▌    | 435/782 [00:02<00:01, 213.37it/s]



Epoch [495/700]:  58%|█████▊    | 457/782 [00:02<00:01, 211.40it/s]



Epoch [495/700]:  61%|██████▏   | 479/782 [00:02<00:01, 210.49it/s]



Epoch [495/700]:  64%|██████▍   | 501/782 [00:02<00:01, 206.94it/s]



Epoch [495/700]:  67%|██████▋   | 523/782 [00:02<00:01, 207.45it/s]



Epoch [495/700]:  70%|██████▉   | 545/782 [00:02<00:01, 209.39it/s]



Epoch [495/700]:  72%|███████▏  | 566/782 [00:02<00:01, 207.62it/s]



Epoch [495/700]:  78%|███████▊  | 610/782 [00:02<00:00, 212.34it/s]



Epoch [495/700]:  84%|████████▎ | 654/782 [00:03<00:00, 213.52it/s]



Epoch [495/700]:  89%|████████▉ | 698/782 [00:03<00:00, 212.24it/s]



Epoch [495/700]:  95%|█████████▍| 742/782 [00:03<00:00, 210.67it/s]



Epoch [495/700]: 100%|██████████| 782/782 [00:03<00:00, 209.77it/s]


Learning Rate: 0.004500
Train Loss: 0.0489, Accuracy: 98.39%, Confidence: 0.9791
Test Loss: 2.2239, Accuracy: 72.17%, Confidence: 0.9434
Train-Test Accuracy Gap: 26.22%


Epoch [496/700]:   3%|▎         | 22/782 [00:00<00:03, 210.80it/s]



Epoch [496/700]:  11%|█▏        | 88/782 [00:00<00:03, 213.88it/s]



Epoch [496/700]:  17%|█▋        | 132/782 [00:00<00:03, 213.31it/s]



Epoch [496/700]:  23%|██▎       | 176/782 [00:00<00:02, 210.90it/s]



Epoch [496/700]:  28%|██▊       | 220/782 [00:01<00:02, 208.53it/s]



Epoch [496/700]:  34%|███▎      | 262/782 [00:01<00:02, 206.72it/s]



Epoch [496/700]:  39%|███▉      | 304/782 [00:01<00:02, 206.36it/s]



Epoch [496/700]:  44%|████▍     | 346/782 [00:01<00:02, 207.76it/s]



Epoch [496/700]:  50%|████▉     | 389/782 [00:01<00:01, 209.27it/s]



Epoch [496/700]:  55%|█████▌    | 433/782 [00:02<00:01, 210.03it/s]



Epoch [496/700]:  61%|██████    | 477/782 [00:02<00:01, 211.52it/s]



Epoch [496/700]:  67%|██████▋   | 521/782 [00:02<00:01, 206.35it/s]



Epoch [496/700]:  75%|███████▍  | 586/782 [00:02<00:00, 212.10it/s]



Epoch [496/700]:  81%|████████  | 630/782 [00:03<00:00, 212.39it/s]



Epoch [496/700]:  86%|████████▌ | 674/782 [00:03<00:00, 213.05it/s]



Epoch [496/700]:  92%|█████████▏| 718/782 [00:03<00:00, 212.90it/s]



Epoch [496/700]:  97%|█████████▋| 762/782 [00:03<00:00, 212.14it/s]



Epoch [496/700]: 100%|██████████| 782/782 [00:03<00:00, 210.02it/s]


Learning Rate: 0.004500
Train Loss: 0.0556, Accuracy: 98.03%, Confidence: 0.9777
Test Loss: 2.2527, Accuracy: 72.05%, Confidence: 0.9417
Train-Test Accuracy Gap: 25.98%


Epoch [497/700]:   3%|▎         | 21/782 [00:00<00:03, 204.77it/s]



Epoch [497/700]:   5%|▌         | 42/782 [00:00<00:03, 207.18it/s]



Epoch [497/700]:  11%|█         | 85/782 [00:00<00:03, 209.03it/s]



Epoch [497/700]:  16%|█▌        | 127/782 [00:00<00:03, 201.73it/s]



Epoch [497/700]:  22%|██▏       | 171/782 [00:00<00:02, 209.25it/s]



Epoch [497/700]:  27%|██▋       | 213/782 [00:01<00:02, 202.53it/s]



Epoch [497/700]:  33%|███▎      | 257/782 [00:01<00:02, 207.88it/s]



Epoch [497/700]:  38%|███▊      | 299/782 [00:01<00:02, 207.13it/s]



Epoch [497/700]:  44%|████▎     | 341/782 [00:01<00:02, 204.19it/s]



Epoch [497/700]:  49%|████▉     | 384/782 [00:01<00:01, 204.47it/s]



Epoch [497/700]:  54%|█████▍    | 426/782 [00:02<00:01, 203.76it/s]



Epoch [497/700]:  60%|█████▉    | 469/782 [00:02<00:01, 207.41it/s]



Epoch [497/700]:  65%|██████▌   | 512/782 [00:02<00:01, 206.36it/s]



Epoch [497/700]:  71%|███████   | 554/782 [00:02<00:01, 204.41it/s]



Epoch [497/700]:  76%|███████▌  | 596/782 [00:02<00:00, 205.53it/s]



Epoch [497/700]:  85%|████████▍ | 662/782 [00:03<00:00, 212.65it/s]



Epoch [497/700]:  90%|█████████ | 706/782 [00:03<00:00, 210.50it/s]



Epoch [497/700]:  96%|█████████▌| 750/782 [00:03<00:00, 213.30it/s]



Epoch [497/700]: 100%|██████████| 782/782 [00:03<00:00, 207.21it/s]


Learning Rate: 0.004500
Train Loss: 0.0570, Accuracy: 98.02%, Confidence: 0.9781
Test Loss: 2.0300, Accuracy: 72.96%, Confidence: 0.9416
Train-Test Accuracy Gap: 25.06%


Epoch [498/700]:   3%|▎         | 22/782 [00:00<00:03, 214.15it/s]



Epoch [498/700]:  11%|█         | 87/782 [00:00<00:03, 204.00it/s]



Epoch [498/700]:  17%|█▋        | 130/782 [00:00<00:03, 207.89it/s]



Epoch [498/700]:  22%|██▏       | 173/782 [00:00<00:02, 205.19it/s]



Epoch [498/700]:  28%|██▊       | 216/782 [00:01<00:02, 205.53it/s]



Epoch [498/700]:  36%|███▌      | 282/782 [00:01<00:02, 209.14it/s]



Epoch [498/700]:  42%|████▏     | 325/782 [00:01<00:02, 207.86it/s]



Epoch [498/700]:  47%|████▋     | 369/782 [00:01<00:01, 212.35it/s]



Epoch [498/700]:  53%|█████▎    | 413/782 [00:01<00:01, 213.39it/s]



Epoch [498/700]:  58%|█████▊    | 457/782 [00:02<00:01, 211.27it/s]



Epoch [498/700]:  64%|██████▍   | 500/782 [00:02<00:01, 207.70it/s]



Epoch [498/700]:  70%|██████▉   | 544/782 [00:02<00:01, 209.43it/s]



Epoch [498/700]:  75%|███████▌  | 587/782 [00:02<00:00, 208.76it/s]



Epoch [498/700]:  81%|████████  | 630/782 [00:03<00:00, 211.28it/s]



Epoch [498/700]:  86%|████████▌ | 674/782 [00:03<00:00, 212.78it/s]



Epoch [498/700]:  92%|█████████▏| 718/782 [00:03<00:00, 210.65it/s]



Epoch [498/700]: 100%|██████████| 782/782 [00:03<00:00, 209.36it/s]






Learning Rate: 0.004500
Train Loss: 0.0570, Accuracy: 98.06%, Confidence: 0.9783
Test Loss: 2.2185, Accuracy: 72.22%, Confidence: 0.9438
Train-Test Accuracy Gap: 25.84%


Epoch [499/700]:   3%|▎         | 21/782 [00:00<00:03, 203.28it/s]



Epoch [499/700]:   5%|▌         | 43/782 [00:00<00:03, 208.61it/s]



Epoch [499/700]:   8%|▊         | 64/782 [00:00<00:03, 208.82it/s]



Epoch [499/700]:  11%|█         | 85/782 [00:00<00:03, 206.09it/s]



Epoch [499/700]:  16%|█▌        | 127/782 [00:00<00:03, 204.40it/s]



Epoch [499/700]:  19%|█▉        | 149/782 [00:00<00:03, 206.66it/s]



Epoch [499/700]:  22%|██▏       | 171/782 [00:00<00:02, 208.52it/s]



Epoch [499/700]:  27%|██▋       | 214/782 [00:01<00:02, 210.78it/s]



Epoch [499/700]:  33%|███▎      | 257/782 [00:01<00:02, 205.76it/s]



Epoch [499/700]:  38%|███▊      | 299/782 [00:01<00:02, 204.00it/s]



Epoch [499/700]:  44%|████▎     | 341/782 [00:01<00:02, 203.87it/s]



Epoch [499/700]:  49%|████▉     | 383/782 [00:01<00:01, 203.31it/s]



Epoch [499/700]:  54%|█████▍    | 426/782 [00:02<00:01, 206.66it/s]



Epoch [499/700]:  60%|██████    | 470/782 [00:02<00:01, 208.11it/s]



Epoch [499/700]:  66%|██████▌   | 513/782 [00:02<00:01, 209.08it/s]



Epoch [499/700]:  71%|███████   | 557/782 [00:02<00:01, 211.54it/s]



Epoch [499/700]:  80%|███████▉  | 623/782 [00:03<00:00, 212.23it/s]



Epoch [499/700]:  85%|████████▌ | 667/782 [00:03<00:00, 211.74it/s]



Epoch [499/700]:  91%|█████████ | 710/782 [00:03<00:00, 207.43it/s]



Epoch [499/700]:  96%|█████████▌| 752/782 [00:03<00:00, 203.82it/s]



Epoch [499/700]: 100%|██████████| 782/782 [00:03<00:00, 207.18it/s]


Learning Rate: 0.004500
Train Loss: 0.0599, Accuracy: 98.03%, Confidence: 0.9776
Test Loss: 2.0234, Accuracy: 73.51%, Confidence: 0.9421
Train-Test Accuracy Gap: 24.52%


Epoch [500/700]:   3%|▎         | 21/782 [00:00<00:03, 204.21it/s]



Epoch [500/700]:   5%|▌         | 43/782 [00:00<00:03, 211.81it/s]



Epoch [500/700]:   8%|▊         | 65/782 [00:00<00:03, 210.56it/s]



Epoch [500/700]:  11%|█         | 87/782 [00:00<00:03, 208.44it/s]



Epoch [500/700]:  14%|█▍        | 108/782 [00:00<00:03, 208.51it/s]



Epoch [500/700]:  22%|██▏       | 174/782 [00:00<00:02, 211.85it/s]



Epoch [500/700]:  31%|███       | 240/782 [00:01<00:02, 210.73it/s]



Epoch [500/700]:  36%|███▌      | 283/782 [00:01<00:02, 202.70it/s]



Epoch [500/700]:  42%|████▏     | 326/782 [00:01<00:02, 199.25it/s]



Epoch [500/700]:  47%|████▋     | 368/782 [00:01<00:02, 202.02it/s]



Epoch [500/700]:  52%|█████▏    | 410/782 [00:01<00:01, 205.47it/s]



Epoch [500/700]:  58%|█████▊    | 453/782 [00:02<00:01, 208.51it/s]



Epoch [500/700]:  63%|██████▎   | 496/782 [00:02<00:01, 206.69it/s]



Epoch [500/700]:  71%|███████▏  | 559/782 [00:02<00:01, 205.41it/s]



Epoch [500/700]:  77%|███████▋  | 601/782 [00:02<00:00, 205.19it/s]



Epoch [500/700]:  80%|███████▉  | 622/782 [00:03<00:00, 203.16it/s]



Epoch [500/700]:  88%|████████▊ | 687/782 [00:03<00:00, 206.35it/s]



Epoch [500/700]:  93%|█████████▎| 730/782 [00:03<00:00, 206.52it/s]



Epoch [500/700]:  99%|█████████▊| 772/782 [00:03<00:00, 207.82it/s]



Epoch [500/700]: 100%|██████████| 782/782 [00:03<00:00, 206.58it/s]


Learning Rate: 0.004500
Train Loss: 0.0542, Accuracy: 98.18%, Confidence: 0.9786
Test Loss: 2.2866, Accuracy: 72.23%, Confidence: 0.9408
Train-Test Accuracy Gap: 25.95%


Epoch [501/700]:   5%|▌         | 40/782 [00:00<00:03, 195.27it/s]



Epoch [501/700]:  10%|█         | 82/782 [00:00<00:03, 201.69it/s]



Epoch [501/700]:  16%|█▌        | 124/782 [00:00<00:03, 199.88it/s]



Epoch [501/700]:  21%|██        | 166/782 [00:00<00:03, 203.05it/s]



Epoch [501/700]:  27%|██▋       | 208/782 [00:01<00:02, 204.41it/s]



Epoch [501/700]:  32%|███▏      | 250/782 [00:01<00:02, 205.80it/s]



Epoch [501/700]:  37%|███▋      | 293/782 [00:01<00:02, 208.54it/s]



Epoch [501/700]:  46%|████▌     | 357/782 [00:01<00:02, 206.92it/s]



Epoch [501/700]:  51%|█████     | 400/782 [00:01<00:01, 208.03it/s]



Epoch [501/700]:  57%|█████▋    | 443/782 [00:02<00:01, 207.70it/s]



Epoch [501/700]:  62%|██████▏   | 485/782 [00:02<00:01, 208.11it/s]



Epoch [501/700]:  68%|██████▊   | 529/782 [00:02<00:01, 209.69it/s]



Epoch [501/700]:  73%|███████▎  | 571/782 [00:02<00:01, 205.90it/s]



Epoch [501/700]:  78%|███████▊  | 613/782 [00:02<00:00, 205.59it/s]



Epoch [501/700]:  84%|████████▎ | 654/782 [00:03<00:00, 196.51it/s]



Epoch [501/700]:  89%|████████▊ | 694/782 [00:03<00:00, 195.55it/s]



Epoch [501/700]:  94%|█████████▍| 737/782 [00:03<00:00, 201.94it/s]



Epoch [501/700]: 100%|██████████| 782/782 [00:03<00:00, 203.67it/s]


Learning Rate: 0.004500
Train Loss: 0.0563, Accuracy: 98.07%, Confidence: 0.9780
Test Loss: 2.2926, Accuracy: 70.78%, Confidence: 0.9404
Train-Test Accuracy Gap: 27.29%


Epoch [502/700]:   3%|▎         | 21/782 [00:00<00:03, 200.52it/s]



Epoch [502/700]:  11%|█         | 84/782 [00:00<00:03, 202.79it/s]



Epoch [502/700]:  16%|█▌        | 125/782 [00:00<00:03, 197.03it/s]



Epoch [502/700]:  22%|██▏       | 169/782 [00:00<00:02, 207.24it/s]



Epoch [502/700]:  27%|██▋       | 211/782 [00:01<00:02, 205.62it/s]



Epoch [502/700]:  33%|███▎      | 255/782 [00:01<00:02, 209.48it/s]



Epoch [502/700]:  38%|███▊      | 298/782 [00:01<00:02, 209.10it/s]



Epoch [502/700]:  43%|████▎     | 340/782 [00:01<00:02, 208.40it/s]



Epoch [502/700]:  49%|████▉     | 384/782 [00:01<00:01, 211.93it/s]



Epoch [502/700]:  55%|█████▍    | 428/782 [00:02<00:01, 210.11it/s]



Epoch [502/700]:  60%|██████    | 472/782 [00:02<00:01, 210.44it/s]



Epoch [502/700]:  66%|██████▌   | 516/782 [00:02<00:01, 213.95it/s]



Epoch [502/700]:  72%|███████▏  | 560/782 [00:02<00:01, 210.92it/s]



Epoch [502/700]:  77%|███████▋  | 604/782 [00:02<00:00, 206.31it/s]



Epoch [502/700]:  85%|████████▌ | 668/782 [00:03<00:00, 209.78it/s]



Epoch [502/700]:  91%|█████████ | 712/782 [00:03<00:00, 211.76it/s]



Epoch [502/700]:  97%|█████████▋| 756/782 [00:03<00:00, 210.53it/s]



Epoch [502/700]: 100%|██████████| 782/782 [00:03<00:00, 208.31it/s]


Learning Rate: 0.004500
Train Loss: 0.0576, Accuracy: 98.00%, Confidence: 0.9783
Test Loss: 2.0708, Accuracy: 73.22%, Confidence: 0.9435
Train-Test Accuracy Gap: 24.78%


Epoch [503/700]:   3%|▎         | 20/782 [00:00<00:03, 199.31it/s]



Epoch [503/700]:  11%|█         | 85/782 [00:00<00:03, 210.04it/s]



Epoch [503/700]:  16%|█▋        | 128/782 [00:00<00:03, 203.75it/s]



Epoch [503/700]:  22%|██▏       | 172/782 [00:00<00:02, 208.76it/s]



Epoch [503/700]:  28%|██▊       | 216/782 [00:01<00:02, 211.79it/s]



Epoch [503/700]:  33%|███▎      | 260/782 [00:01<00:02, 210.19it/s]



Epoch [503/700]:  39%|███▉      | 304/782 [00:01<00:02, 211.46it/s]



Epoch [503/700]:  45%|████▍     | 348/782 [00:01<00:02, 211.78it/s]



Epoch [503/700]:  50%|█████     | 391/782 [00:01<00:01, 207.06it/s]



Epoch [503/700]:  55%|█████▌    | 434/782 [00:02<00:01, 208.70it/s]



Epoch [503/700]:  64%|██████▍   | 500/782 [00:02<00:01, 213.22it/s]



Epoch [503/700]:  70%|██████▉   | 544/782 [00:02<00:01, 211.30it/s]



Epoch [503/700]:  75%|███████▌  | 588/782 [00:02<00:00, 211.00it/s]



Epoch [503/700]:  81%|████████  | 632/782 [00:03<00:00, 208.44it/s]



Epoch [503/700]:  86%|████████▌ | 674/782 [00:03<00:00, 206.61it/s]



Epoch [503/700]:  92%|█████████▏| 716/782 [00:03<00:00, 205.37it/s]



Epoch [503/700]:  97%|█████████▋| 760/782 [00:03<00:00, 209.76it/s]



Epoch [503/700]: 100%|██████████| 782/782 [00:03<00:00, 208.63it/s]


Learning Rate: 0.004500
Train Loss: 0.0553, Accuracy: 98.12%, Confidence: 0.9782
Test Loss: 2.0058, Accuracy: 73.02%, Confidence: 0.9419
Train-Test Accuracy Gap: 25.10%


Epoch [504/700]:   3%|▎         | 21/782 [00:00<00:03, 205.83it/s]



Epoch [504/700]:  11%|█         | 87/782 [00:00<00:03, 211.86it/s]



Epoch [504/700]:  17%|█▋        | 131/782 [00:00<00:03, 210.63it/s]



Epoch [504/700]:  22%|██▏       | 175/782 [00:00<00:02, 210.68it/s]



Epoch [504/700]:  28%|██▊       | 219/782 [00:01<00:02, 212.77it/s]



Epoch [504/700]:  34%|███▎      | 263/782 [00:01<00:02, 212.69it/s]



Epoch [504/700]:  39%|███▉      | 307/782 [00:01<00:02, 208.85it/s]



Epoch [504/700]:  47%|████▋     | 370/782 [00:01<00:01, 206.52it/s]



Epoch [504/700]:  53%|█████▎    | 412/782 [00:01<00:01, 205.59it/s]



Epoch [504/700]:  58%|█████▊    | 454/782 [00:02<00:01, 205.19it/s]



Epoch [504/700]:  63%|██████▎   | 496/782 [00:02<00:01, 202.82it/s]



Epoch [504/700]:  69%|██████▉   | 538/782 [00:02<00:01, 202.80it/s]



Epoch [504/700]:  74%|███████▍  | 581/782 [00:02<00:00, 203.38it/s]



Epoch [504/700]:  80%|███████▉  | 624/782 [00:03<00:00, 207.00it/s]



Epoch [504/700]:  85%|████████▌ | 666/782 [00:03<00:00, 207.74it/s]



Epoch [504/700]:  91%|█████████ | 709/782 [00:03<00:00, 209.15it/s]



Epoch [504/700]:  96%|█████████▌| 752/782 [00:03<00:00, 208.60it/s]



Epoch [504/700]: 100%|██████████| 782/782 [00:03<00:00, 207.27it/s]


Learning Rate: 0.004500
Train Loss: 0.0537, Accuracy: 98.17%, Confidence: 0.9789
Test Loss: 2.3132, Accuracy: 71.05%, Confidence: 0.9438
Train-Test Accuracy Gap: 27.12%


Epoch [505/700]:   3%|▎         | 20/782 [00:00<00:03, 191.30it/s]



Epoch [505/700]:  10%|█         | 82/782 [00:00<00:03, 202.14it/s]



Epoch [505/700]:  16%|█▌        | 124/782 [00:00<00:03, 196.66it/s]



Epoch [505/700]:  19%|█▊        | 146/782 [00:00<00:03, 201.48it/s]



Epoch [505/700]:  27%|██▋       | 210/782 [00:01<00:02, 206.13it/s]



Epoch [505/700]:  32%|███▏      | 252/782 [00:01<00:02, 205.06it/s]



Epoch [505/700]:  38%|███▊      | 295/782 [00:01<00:02, 207.17it/s]



Epoch [505/700]:  43%|████▎     | 338/782 [00:01<00:02, 207.91it/s]



Epoch [505/700]:  49%|████▊     | 380/782 [00:01<00:01, 203.85it/s]



Epoch [505/700]:  54%|█████▍    | 423/782 [00:02<00:01, 207.74it/s]



Epoch [505/700]:  60%|█████▉    | 467/782 [00:02<00:01, 211.54it/s]



Epoch [505/700]:  68%|██████▊   | 533/782 [00:02<00:01, 214.16it/s]



Epoch [505/700]:  74%|███████▍  | 577/782 [00:02<00:00, 209.60it/s]



Epoch [505/700]:  79%|███████▉  | 620/782 [00:02<00:00, 209.80it/s]



Epoch [505/700]:  85%|████████▍ | 663/782 [00:03<00:00, 207.81it/s]



Epoch [505/700]:  90%|█████████ | 706/782 [00:03<00:00, 209.10it/s]



Epoch [505/700]:  96%|█████████▌| 749/782 [00:03<00:00, 209.33it/s]



Epoch [505/700]: 100%|██████████| 782/782 [00:03<00:00, 206.73it/s]


Learning Rate: 0.004500
Train Loss: 0.0586, Accuracy: 98.03%, Confidence: 0.9781
Test Loss: 2.1108, Accuracy: 73.12%, Confidence: 0.9432
Train-Test Accuracy Gap: 24.91%


Epoch [506/700]:   3%|▎         | 22/782 [00:00<00:03, 211.24it/s]



Epoch [506/700]:   6%|▌         | 44/782 [00:00<00:03, 213.35it/s]



Epoch [506/700]:   8%|▊         | 66/782 [00:00<00:03, 210.98it/s]



Epoch [506/700]:  11%|█▏        | 88/782 [00:00<00:03, 210.94it/s]



Epoch [506/700]:  14%|█▍        | 110/782 [00:00<00:03, 212.08it/s]



Epoch [506/700]:  17%|█▋        | 132/782 [00:00<00:03, 208.37it/s]



Epoch [506/700]:  22%|██▏       | 174/782 [00:00<00:02, 206.09it/s]



Epoch [506/700]:  28%|██▊       | 218/782 [00:01<00:02, 209.14it/s]



Epoch [506/700]:  33%|███▎      | 260/782 [00:01<00:02, 209.05it/s]



Epoch [506/700]:  39%|███▉      | 304/782 [00:01<00:02, 211.18it/s]



Epoch [506/700]:  45%|████▍     | 348/782 [00:01<00:02, 207.21it/s]



Epoch [506/700]:  50%|█████     | 391/782 [00:01<00:01, 204.81it/s]



Epoch [506/700]:  55%|█████▌    | 433/782 [00:02<00:01, 200.12it/s]



Epoch [506/700]:  61%|██████    | 475/782 [00:02<00:01, 203.70it/s]



Epoch [506/700]:  66%|██████▋   | 519/782 [00:02<00:01, 208.91it/s]



Epoch [506/700]:  75%|███████▍  | 583/782 [00:02<00:00, 206.06it/s]



Epoch [506/700]:  80%|███████▉  | 625/782 [00:03<00:00, 207.50it/s]



Epoch [506/700]:  85%|████████▌ | 668/782 [00:03<00:00, 204.94it/s]



Epoch [506/700]:  91%|█████████ | 710/782 [00:03<00:00, 204.40it/s]



Epoch [506/700]:  96%|█████████▌| 752/782 [00:03<00:00, 202.12it/s]



Epoch [506/700]: 100%|██████████| 782/782 [00:03<00:00, 206.27it/s]


Learning Rate: 0.004500
Train Loss: 0.0560, Accuracy: 98.16%, Confidence: 0.9793
Test Loss: 1.9965, Accuracy: 73.42%, Confidence: 0.9431
Train-Test Accuracy Gap: 24.74%


Epoch [507/700]:   3%|▎         | 22/782 [00:00<00:03, 212.39it/s]



Epoch [507/700]:   6%|▌         | 44/782 [00:00<00:03, 204.94it/s]



Epoch [507/700]:  11%|█         | 87/782 [00:00<00:03, 208.11it/s]



Epoch [507/700]:  16%|█▋        | 129/782 [00:00<00:03, 205.75it/s]



Epoch [507/700]:  22%|██▏       | 172/782 [00:00<00:02, 207.18it/s]



Epoch [507/700]:  27%|██▋       | 215/782 [00:01<00:02, 207.99it/s]



Epoch [507/700]:  33%|███▎      | 257/782 [00:01<00:02, 203.22it/s]



Epoch [507/700]:  38%|███▊      | 299/782 [00:01<00:02, 202.99it/s]



Epoch [507/700]:  44%|████▎     | 341/782 [00:01<00:02, 198.50it/s]



Epoch [507/700]:  49%|████▉     | 382/782 [00:01<00:02, 199.07it/s]



Epoch [507/700]:  54%|█████▍    | 424/782 [00:02<00:01, 203.20it/s]



Epoch [507/700]:  60%|█████▉    | 467/782 [00:02<00:01, 205.10it/s]



Epoch [507/700]:  65%|██████▌   | 509/782 [00:02<00:01, 200.87it/s]



Epoch [507/700]:  70%|███████   | 551/782 [00:02<00:01, 198.80it/s]



Epoch [507/700]:  76%|███████▌  | 593/782 [00:02<00:00, 203.18it/s]



Epoch [507/700]:  81%|████████  | 635/782 [00:03<00:00, 198.50it/s]



Epoch [507/700]:  86%|████████▋ | 675/782 [00:03<00:00, 190.79it/s]



Epoch [507/700]:  91%|█████████▏| 715/782 [00:03<00:00, 192.47it/s]



Epoch [507/700]:  97%|█████████▋| 755/782 [00:03<00:00, 191.04it/s]



Epoch [507/700]: 100%|██████████| 782/782 [00:03<00:00, 200.17it/s]


Learning Rate: 0.004500
Train Loss: 0.0641, Accuracy: 97.73%, Confidence: 0.9772
Test Loss: 2.0784, Accuracy: 72.86%, Confidence: 0.9427
Train-Test Accuracy Gap: 24.87%


Epoch [508/700]:   2%|▏         | 18/782 [00:00<00:04, 177.94it/s]



Epoch [508/700]:  10%|▉         | 75/782 [00:00<00:03, 186.02it/s]



Epoch [508/700]:  15%|█▍        | 114/782 [00:00<00:03, 188.97it/s]



Epoch [508/700]:  20%|█▉        | 155/782 [00:00<00:03, 194.68it/s]



Epoch [508/700]:  25%|██▌       | 196/782 [00:01<00:02, 198.16it/s]



Epoch [508/700]:  30%|███       | 237/782 [00:01<00:02, 201.27it/s]



Epoch [508/700]:  36%|███▌      | 280/782 [00:01<00:02, 203.87it/s]



Epoch [508/700]:  44%|████▍     | 344/782 [00:01<00:02, 206.52it/s]



Epoch [508/700]:  49%|████▉     | 386/782 [00:01<00:01, 205.96it/s]



Epoch [508/700]:  55%|█████▍    | 429/782 [00:02<00:01, 206.24it/s]



Epoch [508/700]:  60%|██████    | 473/782 [00:02<00:01, 208.48it/s]



Epoch [508/700]:  66%|██████▌   | 516/782 [00:02<00:01, 209.81it/s]



Epoch [508/700]:  71%|███████▏  | 558/782 [00:02<00:01, 207.64it/s]



Epoch [508/700]:  77%|███████▋  | 602/782 [00:02<00:00, 209.93it/s]



Epoch [508/700]:  82%|████████▏ | 645/782 [00:03<00:00, 208.51it/s]



Epoch [508/700]:  91%|█████████ | 708/782 [00:03<00:00, 207.43it/s]



Epoch [508/700]:  96%|█████████▌| 750/782 [00:03<00:00, 207.95it/s]



Epoch [508/700]: 100%|██████████| 782/782 [00:03<00:00, 202.64it/s]


Learning Rate: 0.004500
Train Loss: 0.0523, Accuracy: 98.26%, Confidence: 0.9788
Test Loss: 2.2623, Accuracy: 72.68%, Confidence: 0.9442
Train-Test Accuracy Gap: 25.58%


Epoch [509/700]:   3%|▎         | 21/782 [00:00<00:03, 202.26it/s]



Epoch [509/700]:   5%|▌         | 42/782 [00:00<00:03, 202.14it/s]



Epoch [509/700]:   8%|▊         | 63/782 [00:00<00:03, 198.29it/s]



Epoch [509/700]:  13%|█▎        | 104/782 [00:00<00:03, 200.55it/s]



Epoch [509/700]:  16%|█▌        | 125/782 [00:00<00:03, 203.52it/s]



Epoch [509/700]:  21%|██▏       | 168/782 [00:00<00:02, 207.20it/s]



Epoch [509/700]:  27%|██▋       | 211/782 [00:01<00:02, 208.84it/s]



Epoch [509/700]:  32%|███▏      | 253/782 [00:01<00:02, 208.18it/s]



Epoch [509/700]:  38%|███▊      | 295/782 [00:01<00:02, 207.82it/s]



Epoch [509/700]:  43%|████▎     | 338/782 [00:01<00:02, 208.45it/s]



Epoch [509/700]:  49%|████▊     | 380/782 [00:01<00:01, 207.78it/s]



Epoch [509/700]:  54%|█████▍    | 424/782 [00:02<00:01, 210.92it/s]



Epoch [509/700]:  63%|██████▎   | 490/782 [00:02<00:01, 210.05it/s]



Epoch [509/700]:  68%|██████▊   | 534/782 [00:02<00:01, 210.81it/s]



Epoch [509/700]:  74%|███████▍  | 578/782 [00:02<00:00, 209.84it/s]



Epoch [509/700]:  79%|███████▉  | 620/782 [00:02<00:00, 208.71it/s]



Epoch [509/700]:  85%|████████▍ | 662/782 [00:03<00:00, 206.91it/s]



Epoch [509/700]:  90%|█████████ | 704/782 [00:03<00:00, 200.26it/s]



Epoch [509/700]:  95%|█████████▌| 746/782 [00:03<00:00, 203.88it/s]



Epoch [509/700]: 100%|██████████| 782/782 [00:03<00:00, 206.18it/s]


Learning Rate: 0.004500
Train Loss: 0.0566, Accuracy: 98.10%, Confidence: 0.9783
Test Loss: 2.1129, Accuracy: 73.27%, Confidence: 0.9430
Train-Test Accuracy Gap: 24.83%


Epoch [510/700]:   3%|▎         | 21/782 [00:00<00:03, 208.59it/s]



Epoch [510/700]:   5%|▌         | 42/782 [00:00<00:03, 205.73it/s]



Epoch [510/700]:  11%|█         | 84/782 [00:00<00:03, 205.29it/s]



Epoch [510/700]:  16%|█▌        | 127/782 [00:00<00:03, 203.84it/s]



Epoch [510/700]:  22%|██▏       | 169/782 [00:00<00:03, 204.33it/s]



Epoch [510/700]:  27%|██▋       | 211/782 [00:01<00:02, 202.75it/s]



Epoch [510/700]:  32%|███▏      | 253/782 [00:01<00:02, 202.42it/s]



Epoch [510/700]:  41%|████      | 317/782 [00:01<00:02, 205.46it/s]



Epoch [510/700]:  46%|████▌     | 359/782 [00:01<00:02, 203.69it/s]



Epoch [510/700]:  51%|█████▏    | 402/782 [00:01<00:01, 206.63it/s]



Epoch [510/700]:  57%|█████▋    | 446/782 [00:02<00:01, 210.20it/s]



Epoch [510/700]:  63%|██████▎   | 489/782 [00:02<00:01, 194.75it/s]



Epoch [510/700]:  68%|██████▊   | 531/782 [00:02<00:01, 198.39it/s]



Epoch [510/700]:  76%|███████▌  | 595/782 [00:02<00:00, 206.12it/s]



Epoch [510/700]:  82%|████████▏ | 638/782 [00:03<00:00, 207.09it/s]



Epoch [510/700]:  87%|████████▋ | 681/782 [00:03<00:00, 206.42it/s]



Epoch [510/700]:  95%|█████████▌| 745/782 [00:03<00:00, 205.99it/s]



Epoch [510/700]: 100%|██████████| 782/782 [00:03<00:00, 204.33it/s]


Learning Rate: 0.004050
Train Loss: 0.0424, Accuracy: 98.60%, Confidence: 0.9813
Test Loss: 2.0065, Accuracy: 74.00%, Confidence: 0.9434
Train-Test Accuracy Gap: 24.60%


Epoch [511/700]:   2%|▏         | 19/782 [00:00<00:04, 182.65it/s]



Epoch [511/700]:  10%|▉         | 78/782 [00:00<00:03, 190.09it/s]



Epoch [511/700]:  15%|█▌        | 120/782 [00:00<00:03, 198.92it/s]



Epoch [511/700]:  21%|██        | 162/782 [00:00<00:03, 202.84it/s]



Epoch [511/700]:  26%|██▋       | 206/782 [00:01<00:02, 208.65it/s]



Epoch [511/700]:  32%|███▏      | 248/782 [00:01<00:02, 207.40it/s]



Epoch [511/700]:  37%|███▋      | 290/782 [00:01<00:02, 207.17it/s]



Epoch [511/700]:  43%|████▎     | 333/782 [00:01<00:02, 205.87it/s]



Epoch [511/700]:  51%|█████     | 396/782 [00:01<00:01, 206.02it/s]



Epoch [511/700]:  56%|█████▌    | 438/782 [00:02<00:01, 204.74it/s]



Epoch [511/700]:  62%|██████▏   | 481/782 [00:02<00:01, 206.86it/s]



Epoch [511/700]:  67%|██████▋   | 523/782 [00:02<00:01, 203.88it/s]



Epoch [511/700]:  73%|███████▎  | 567/782 [00:02<00:01, 208.31it/s]



Epoch [511/700]:  78%|███████▊  | 610/782 [00:02<00:00, 210.75it/s]



Epoch [511/700]:  84%|████████▎ | 654/782 [00:03<00:00, 212.17it/s]



Epoch [511/700]:  89%|████████▉ | 697/782 [00:03<00:00, 208.99it/s]



Epoch [511/700]:  95%|█████████▍| 739/782 [00:03<00:00, 208.26it/s]



Epoch [511/700]: 100%|██████████| 782/782 [00:03<00:00, 204.89it/s]






Learning Rate: 0.004050
Train Loss: 0.0480, Accuracy: 98.38%, Confidence: 0.9804
Test Loss: 2.1568, Accuracy: 73.12%, Confidence: 0.9450
Train-Test Accuracy Gap: 25.26%


Epoch [512/700]:   3%|▎         | 21/782 [00:00<00:03, 202.84it/s]



Epoch [512/700]:  11%|█         | 87/782 [00:00<00:03, 209.73it/s]



Epoch [512/700]:  16%|█▋        | 129/782 [00:00<00:03, 205.89it/s]



Epoch [512/700]:  22%|██▏       | 173/782 [00:00<00:02, 210.01it/s]



Epoch [512/700]:  28%|██▊       | 216/782 [00:01<00:02, 205.85it/s]



Epoch [512/700]:  36%|███▌      | 282/782 [00:01<00:02, 211.91it/s]



Epoch [512/700]:  45%|████▍     | 348/782 [00:01<00:02, 210.89it/s]



Epoch [512/700]:  50%|█████     | 392/782 [00:01<00:01, 209.96it/s]



Epoch [512/700]:  56%|█████▌    | 435/782 [00:02<00:01, 208.93it/s]



Epoch [512/700]:  61%|██████▏   | 479/782 [00:02<00:01, 207.80it/s]



Epoch [512/700]:  67%|██████▋   | 521/782 [00:02<00:01, 204.48it/s]



Epoch [512/700]:  72%|███████▏  | 564/782 [00:02<00:01, 204.55it/s]



Epoch [512/700]:  77%|███████▋  | 606/782 [00:02<00:00, 206.07it/s]



Epoch [512/700]:  83%|████████▎ | 648/782 [00:03<00:00, 199.36it/s]



Epoch [512/700]:  88%|████████▊ | 690/782 [00:03<00:00, 201.27it/s]



Epoch [512/700]:  94%|█████████▎| 733/782 [00:03<00:00, 205.34it/s]



Epoch [512/700]: 100%|██████████| 782/782 [00:03<00:00, 206.24it/s]






Learning Rate: 0.004050
Train Loss: 0.0492, Accuracy: 98.34%, Confidence: 0.9798
Test Loss: 2.1345, Accuracy: 73.83%, Confidence: 0.9470
Train-Test Accuracy Gap: 24.51%


Epoch [513/700]:   3%|▎         | 20/782 [00:00<00:03, 191.51it/s]



Epoch [513/700]:   5%|▌         | 41/782 [00:00<00:03, 198.94it/s]



Epoch [513/700]:   8%|▊         | 62/782 [00:00<00:03, 200.05it/s]



Epoch [513/700]:  11%|█         | 83/782 [00:00<00:03, 202.05it/s]



Epoch [513/700]:  13%|█▎        | 104/782 [00:00<00:03, 201.81it/s]



Epoch [513/700]:  16%|█▌        | 125/782 [00:00<00:03, 203.42it/s]



Epoch [513/700]:  19%|█▊        | 146/782 [00:00<00:03, 205.12it/s]



Epoch [513/700]:  21%|██▏       | 167/782 [00:00<00:03, 204.39it/s]



Epoch [513/700]:  24%|██▍       | 189/782 [00:00<00:02, 207.12it/s]



Epoch [513/700]:  27%|██▋       | 210/782 [00:01<00:02, 206.97it/s]



Epoch [513/700]:  30%|██▉       | 232/782 [00:01<00:02, 208.81it/s]



Epoch [513/700]:  35%|███▌      | 275/782 [00:01<00:02, 209.15it/s]



Epoch [513/700]:  38%|███▊      | 296/782 [00:01<00:02, 207.46it/s]



Epoch [513/700]:  43%|████▎     | 338/782 [00:01<00:02, 205.03it/s]



Epoch [513/700]:  46%|████▌     | 360/782 [00:01<00:02, 207.63it/s]



Epoch [513/700]:  49%|████▊     | 381/782 [00:01<00:01, 207.67it/s]



Epoch [513/700]:  51%|█████▏    | 402/782 [00:01<00:01, 207.85it/s]



Epoch [513/700]:  54%|█████▍    | 423/782 [00:02<00:01, 206.03it/s]



Epoch [513/700]:  57%|█████▋    | 444/782 [00:02<00:01, 205.32it/s]



Epoch [513/700]:  65%|██████▍   | 507/782 [00:02<00:01, 203.25it/s]



Epoch [513/700]:  68%|██████▊   | 528/782 [00:02<00:01, 196.73it/s]



Epoch [513/700]:  75%|███████▌  | 589/782 [00:02<00:00, 199.73it/s]



Epoch [513/700]:  81%|████████  | 631/782 [00:03<00:00, 198.95it/s]



Epoch [513/700]:  86%|████████▌ | 673/782 [00:03<00:00, 204.12it/s]



Epoch [513/700]:  92%|█████████▏| 716/782 [00:03<00:00, 207.62it/s]



Epoch [513/700]:  97%|█████████▋| 759/782 [00:03<00:00, 207.60it/s]



Epoch [513/700]: 100%|██████████| 782/782 [00:03<00:00, 203.90it/s]


Learning Rate: 0.004050
Train Loss: 0.0460, Accuracy: 98.47%, Confidence: 0.9808
Test Loss: 2.0885, Accuracy: 72.21%, Confidence: 0.9399
Train-Test Accuracy Gap: 26.26%


Epoch [514/700]:   3%|▎         | 20/782 [00:00<00:03, 191.99it/s]



Epoch [514/700]:  11%|█         | 85/782 [00:00<00:03, 207.45it/s]



Epoch [514/700]:  19%|█▉        | 151/782 [00:00<00:03, 209.92it/s]



Epoch [514/700]:  25%|██▍       | 193/782 [00:00<00:02, 207.61it/s]



Epoch [514/700]:  30%|███       | 236/782 [00:01<00:02, 207.87it/s]



Epoch [514/700]:  36%|███▌      | 278/782 [00:01<00:02, 204.54it/s]



Epoch [514/700]:  41%|████      | 322/782 [00:01<00:02, 210.01it/s]



Epoch [514/700]:  47%|████▋     | 366/782 [00:01<00:01, 211.43it/s]



Epoch [514/700]:  52%|█████▏    | 410/782 [00:01<00:01, 207.55it/s]



Epoch [514/700]:  58%|█████▊    | 453/782 [00:02<00:01, 202.71it/s]



Epoch [514/700]:  63%|██████▎   | 496/782 [00:02<00:01, 205.37it/s]



Epoch [514/700]:  69%|██████▉   | 539/782 [00:02<00:01, 206.51it/s]



Epoch [514/700]:  74%|███████▍  | 582/782 [00:02<00:00, 206.90it/s]



Epoch [514/700]:  80%|███████▉  | 625/782 [00:03<00:00, 208.29it/s]



Epoch [514/700]:  85%|████████▌ | 668/782 [00:03<00:00, 208.83it/s]



Epoch [514/700]:  91%|█████████ | 710/782 [00:03<00:00, 207.96it/s]



Epoch [514/700]:  99%|█████████▉| 773/782 [00:03<00:00, 207.38it/s]



Epoch [514/700]: 100%|██████████| 782/782 [00:03<00:00, 207.07it/s]


Learning Rate: 0.004050
Train Loss: 0.0451, Accuracy: 98.47%, Confidence: 0.9809
Test Loss: 2.1437, Accuracy: 72.59%, Confidence: 0.9418
Train-Test Accuracy Gap: 25.88%


Epoch [515/700]:   2%|▏         | 19/782 [00:00<00:04, 189.99it/s]



Epoch [515/700]:   5%|▌         | 41/782 [00:00<00:03, 203.06it/s]



Epoch [515/700]:   8%|▊         | 62/782 [00:00<00:03, 205.93it/s]



Epoch [515/700]:  11%|█         | 83/782 [00:00<00:03, 200.90it/s]



Epoch [515/700]:  13%|█▎        | 105/782 [00:00<00:03, 205.34it/s]



Epoch [515/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.99it/s]



Epoch [515/700]:  19%|█▉        | 148/782 [00:00<00:03, 208.19it/s]



Epoch [515/700]:  22%|██▏       | 169/782 [00:00<00:02, 207.73it/s]



Epoch [515/700]:  24%|██▍       | 190/782 [00:00<00:02, 208.23it/s]



Epoch [515/700]:  27%|██▋       | 211/782 [00:01<00:02, 206.00it/s]



Epoch [515/700]:  30%|██▉       | 232/782 [00:01<00:02, 201.24it/s]



Epoch [515/700]:  35%|███▌      | 275/782 [00:01<00:02, 206.34it/s]



Epoch [515/700]:  41%|████      | 319/782 [00:01<00:02, 210.32it/s]



Epoch [515/700]:  44%|████▎     | 341/782 [00:01<00:02, 211.32it/s]



Epoch [515/700]:  46%|████▋     | 363/782 [00:01<00:01, 212.40it/s]



Epoch [515/700]:  49%|████▉     | 385/782 [00:01<00:01, 210.25it/s]



Epoch [515/700]:  52%|█████▏    | 407/782 [00:01<00:01, 209.34it/s]



Epoch [515/700]:  55%|█████▍    | 428/782 [00:02<00:01, 209.08it/s]



Epoch [515/700]:  57%|█████▋    | 449/782 [00:02<00:01, 207.07it/s]



Epoch [515/700]:  60%|██████    | 470/782 [00:02<00:01, 203.15it/s]



Epoch [515/700]:  63%|██████▎   | 492/782 [00:02<00:01, 205.57it/s]



Epoch [515/700]:  66%|██████▌   | 514/782 [00:02<00:01, 208.31it/s]



Epoch [515/700]:  68%|██████▊   | 535/782 [00:02<00:01, 208.43it/s]



Epoch [515/700]:  71%|███████   | 556/782 [00:02<00:01, 204.20it/s]



Epoch [515/700]:  74%|███████▍  | 577/782 [00:02<00:01, 203.42it/s]



Epoch [515/700]:  76%|███████▋  | 598/782 [00:02<00:00, 199.69it/s]



Epoch [515/700]:  79%|███████▉  | 619/782 [00:03<00:00, 200.14it/s]



Epoch [515/700]:  85%|████████▍ | 663/782 [00:03<00:00, 207.07it/s]



Epoch [515/700]:  90%|█████████ | 707/782 [00:03<00:00, 210.01it/s]



Epoch [515/700]:  96%|█████████▌| 751/782 [00:03<00:00, 211.40it/s]



Epoch [515/700]: 100%|██████████| 782/782 [00:03<00:00, 207.12it/s]


Learning Rate: 0.004050
Train Loss: 0.0468, Accuracy: 98.47%, Confidence: 0.9807
Test Loss: 2.0452, Accuracy: 73.90%, Confidence: 0.9453
Train-Test Accuracy Gap: 24.57%


Epoch [516/700]:   3%|▎         | 21/782 [00:00<00:03, 208.43it/s]



Epoch [516/700]:   8%|▊         | 65/782 [00:00<00:03, 213.86it/s]



Epoch [516/700]:  11%|█         | 87/782 [00:00<00:03, 212.46it/s]



Epoch [516/700]:  14%|█▍        | 109/782 [00:00<00:03, 206.54it/s]



Epoch [516/700]:  17%|█▋        | 130/782 [00:00<00:03, 206.65it/s]



Epoch [516/700]:  19%|█▉        | 152/782 [00:00<00:03, 208.02it/s]



Epoch [516/700]:  22%|██▏       | 174/782 [00:00<00:02, 209.88it/s]



Epoch [516/700]:  25%|██▌       | 196/782 [00:00<00:02, 212.07it/s]



Epoch [516/700]:  28%|██▊       | 218/782 [00:01<00:02, 209.43it/s]



Epoch [516/700]:  31%|███       | 240/782 [00:01<00:02, 210.07it/s]



Epoch [516/700]:  34%|███▎      | 262/782 [00:01<00:02, 209.53it/s]



Epoch [516/700]:  36%|███▋      | 284/782 [00:01<00:02, 210.90it/s]



Epoch [516/700]:  39%|███▉      | 306/782 [00:01<00:02, 212.77it/s]



Epoch [516/700]:  42%|████▏     | 328/782 [00:01<00:02, 210.56it/s]



Epoch [516/700]:  45%|████▍     | 350/782 [00:01<00:02, 209.80it/s]



Epoch [516/700]:  47%|████▋     | 371/782 [00:01<00:01, 206.75it/s]



Epoch [516/700]:  50%|█████     | 392/782 [00:01<00:01, 207.53it/s]



Epoch [516/700]:  56%|█████▌    | 435/782 [00:02<00:01, 207.92it/s]



Epoch [516/700]:  61%|██████    | 477/782 [00:02<00:01, 205.77it/s]



Epoch [516/700]:  66%|██████▋   | 519/782 [00:02<00:01, 207.02it/s]



Epoch [516/700]:  69%|██████▉   | 541/782 [00:02<00:01, 208.04it/s]



Epoch [516/700]:  72%|███████▏  | 562/782 [00:02<00:01, 208.18it/s]



Epoch [516/700]:  75%|███████▍  | 583/782 [00:02<00:00, 207.41it/s]



Epoch [516/700]:  77%|███████▋  | 604/782 [00:02<00:00, 208.13it/s]



Epoch [516/700]:  80%|███████▉  | 625/782 [00:02<00:00, 208.23it/s]



Epoch [516/700]:  85%|████████▌ | 668/782 [00:03<00:00, 210.24it/s]



Epoch [516/700]:  91%|█████████ | 712/782 [00:03<00:00, 209.56it/s]



Epoch [516/700]:  96%|█████████▋| 754/782 [00:03<00:00, 204.42it/s]



Epoch [516/700]: 100%|██████████| 782/782 [00:03<00:00, 207.60it/s]


Learning Rate: 0.004050
Train Loss: 0.0498, Accuracy: 98.34%, Confidence: 0.9801
Test Loss: 2.2949, Accuracy: 71.63%, Confidence: 0.9445
Train-Test Accuracy Gap: 26.71%


Epoch [517/700]:   3%|▎         | 21/782 [00:00<00:03, 204.29it/s]



Epoch [517/700]:  11%|█         | 87/782 [00:00<00:03, 206.78it/s]



Epoch [517/700]:  17%|█▋        | 130/782 [00:00<00:03, 210.16it/s]



Epoch [517/700]:  22%|██▏       | 174/782 [00:00<00:02, 208.55it/s]



Epoch [517/700]:  31%|███       | 240/782 [00:01<00:02, 214.38it/s]



Epoch [517/700]:  39%|███▉      | 307/782 [00:01<00:02, 217.30it/s]



Epoch [517/700]:  45%|████▍     | 351/782 [00:01<00:02, 210.67it/s]



Epoch [517/700]:  50%|█████     | 394/782 [00:01<00:01, 208.21it/s]



Epoch [517/700]:  56%|█████▌    | 438/782 [00:02<00:01, 213.28it/s]



Epoch [517/700]:  64%|██████▍   | 504/782 [00:02<00:01, 212.01it/s]



Epoch [517/700]:  70%|███████   | 548/782 [00:02<00:01, 210.34it/s]



Epoch [517/700]:  76%|███████▌  | 592/782 [00:02<00:00, 208.95it/s]



Epoch [517/700]:  81%|████████  | 634/782 [00:03<00:00, 207.47it/s]



Epoch [517/700]:  87%|████████▋ | 677/782 [00:03<00:00, 208.96it/s]



Epoch [517/700]:  95%|█████████▍| 742/782 [00:03<00:00, 212.31it/s]



Epoch [517/700]: 100%|██████████| 782/782 [00:03<00:00, 210.57it/s]


Learning Rate: 0.004050
Train Loss: 0.0404, Accuracy: 98.66%, Confidence: 0.9816
Test Loss: 2.2755, Accuracy: 73.06%, Confidence: 0.9450
Train-Test Accuracy Gap: 25.60%


Epoch [518/700]:   3%|▎         | 21/782 [00:00<00:03, 203.06it/s]



Epoch [518/700]:  17%|█▋        | 130/782 [00:00<00:03, 202.47it/s]



Epoch [518/700]:  22%|██▏       | 173/782 [00:00<00:02, 204.73it/s]



Epoch [518/700]:  28%|██▊       | 216/782 [00:01<00:02, 207.88it/s]



Epoch [518/700]:  36%|███▌      | 281/782 [00:01<00:02, 209.35it/s]



Epoch [518/700]:  41%|████▏     | 324/782 [00:01<00:02, 209.79it/s]



Epoch [518/700]:  47%|████▋     | 366/782 [00:01<00:02, 205.58it/s]



Epoch [518/700]:  52%|█████▏    | 408/782 [00:01<00:01, 204.31it/s]



Epoch [518/700]:  58%|█████▊    | 450/782 [00:02<00:01, 204.42it/s]



Epoch [518/700]:  66%|██████▌   | 513/782 [00:02<00:01, 201.85it/s]



Epoch [518/700]:  71%|███████   | 555/782 [00:02<00:01, 204.83it/s]



Epoch [518/700]:  76%|███████▋  | 597/782 [00:02<00:00, 199.74it/s]



Epoch [518/700]:  82%|████████▏ | 641/782 [00:03<00:00, 207.74it/s]



Epoch [518/700]:  88%|████████▊ | 685/782 [00:03<00:00, 212.11it/s]



Epoch [518/700]:  93%|█████████▎| 729/782 [00:03<00:00, 211.68it/s]



Epoch [518/700]: 100%|██████████| 782/782 [00:03<00:00, 206.25it/s]






Learning Rate: 0.004050
Train Loss: 0.0492, Accuracy: 98.33%, Confidence: 0.9802
Test Loss: 2.1415, Accuracy: 72.70%, Confidence: 0.9401
Train-Test Accuracy Gap: 25.63%


Epoch [519/700]:   3%|▎         | 22/782 [00:00<00:03, 209.98it/s]



Epoch [519/700]:   6%|▌         | 44/782 [00:00<00:03, 211.83it/s]



Epoch [519/700]:   8%|▊         | 66/782 [00:00<00:03, 208.38it/s]



Epoch [519/700]:  11%|█         | 87/782 [00:00<00:03, 206.62it/s]



Epoch [519/700]:  14%|█▍        | 109/782 [00:00<00:03, 209.67it/s]



Epoch [519/700]:  17%|█▋        | 131/782 [00:00<00:03, 210.91it/s]



Epoch [519/700]:  20%|█▉        | 153/782 [00:00<00:02, 211.29it/s]



Epoch [519/700]:  22%|██▏       | 175/782 [00:00<00:02, 208.14it/s]



Epoch [519/700]:  25%|██▌       | 196/782 [00:00<00:02, 202.09it/s]



Epoch [519/700]:  28%|██▊       | 218/782 [00:01<00:02, 205.12it/s]



Epoch [519/700]:  31%|███       | 240/782 [00:01<00:02, 206.85it/s]



Epoch [519/700]:  33%|███▎      | 261/782 [00:01<00:02, 204.33it/s]



Epoch [519/700]:  36%|███▌      | 282/782 [00:01<00:02, 203.15it/s]



Epoch [519/700]:  39%|███▊      | 303/782 [00:01<00:02, 202.37it/s]



Epoch [519/700]:  42%|████▏     | 325/782 [00:01<00:02, 205.85it/s]



Epoch [519/700]:  44%|████▍     | 346/782 [00:01<00:02, 205.33it/s]



Epoch [519/700]:  47%|████▋     | 367/782 [00:01<00:02, 204.10it/s]



Epoch [519/700]:  55%|█████▌    | 432/782 [00:02<00:01, 204.93it/s]



Epoch [519/700]:  61%|██████    | 474/782 [00:02<00:01, 198.08it/s]



Epoch [519/700]:  66%|██████▌   | 518/782 [00:02<00:01, 205.06it/s]



Epoch [519/700]:  72%|███████▏  | 560/782 [00:02<00:01, 203.07it/s]



Epoch [519/700]:  77%|███████▋  | 602/782 [00:02<00:00, 198.38it/s]



Epoch [519/700]:  82%|████████▏ | 644/782 [00:03<00:00, 203.41it/s]



Epoch [519/700]:  88%|████████▊ | 687/782 [00:03<00:00, 207.73it/s]



Epoch [519/700]:  93%|█████████▎| 730/782 [00:03<00:00, 207.59it/s]



Epoch [519/700]: 100%|██████████| 782/782 [00:03<00:00, 204.68it/s]






Learning Rate: 0.004050
Train Loss: 0.0505, Accuracy: 98.24%, Confidence: 0.9796
Test Loss: 2.3012, Accuracy: 71.92%, Confidence: 0.9438
Train-Test Accuracy Gap: 26.32%


Epoch [520/700]:   3%|▎         | 21/782 [00:00<00:03, 201.34it/s]



Epoch [520/700]:  11%|█         | 85/782 [00:00<00:03, 204.45it/s]



Epoch [520/700]:  16%|█▋        | 128/782 [00:00<00:03, 210.14it/s]



Epoch [520/700]:  22%|██▏       | 172/782 [00:00<00:02, 209.15it/s]



Epoch [520/700]:  28%|██▊       | 216/782 [00:01<00:02, 208.27it/s]



Epoch [520/700]:  33%|███▎      | 258/782 [00:01<00:02, 208.11it/s]



Epoch [520/700]:  38%|███▊      | 301/782 [00:01<00:02, 209.68it/s]



Epoch [520/700]:  44%|████▍     | 344/782 [00:01<00:02, 207.65it/s]



Epoch [520/700]:  52%|█████▏    | 410/782 [00:01<00:01, 211.74it/s]



Epoch [520/700]:  58%|█████▊    | 454/782 [00:02<00:01, 208.52it/s]



Epoch [520/700]:  64%|██████▎   | 497/782 [00:02<00:01, 208.10it/s]



Epoch [520/700]:  69%|██████▉   | 540/782 [00:02<00:01, 208.98it/s]



Epoch [520/700]:  74%|███████▍  | 582/782 [00:02<00:00, 204.77it/s]



Epoch [520/700]:  80%|███████▉  | 625/782 [00:03<00:00, 207.40it/s]



Epoch [520/700]:  86%|████████▌ | 669/782 [00:03<00:00, 209.22it/s]



Epoch [520/700]:  91%|█████████ | 713/782 [00:03<00:00, 208.68it/s]



Epoch [520/700]:  97%|█████████▋| 757/782 [00:03<00:00, 212.33it/s]



Epoch [520/700]: 100%|██████████| 782/782 [00:03<00:00, 208.07it/s]


Learning Rate: 0.004050
Train Loss: 0.0467, Accuracy: 98.42%, Confidence: 0.9809
Test Loss: 2.2756, Accuracy: 71.56%, Confidence: 0.9426
Train-Test Accuracy Gap: 26.86%


Epoch [521/700]:   3%|▎         | 21/782 [00:00<00:03, 207.66it/s]



Epoch [521/700]:   5%|▌         | 43/782 [00:00<00:03, 211.25it/s]



Epoch [521/700]:  11%|█         | 87/782 [00:00<00:03, 213.81it/s]



Epoch [521/700]:  14%|█▍        | 109/782 [00:00<00:03, 210.10it/s]



Epoch [521/700]:  17%|█▋        | 131/782 [00:00<00:03, 210.21it/s]



Epoch [521/700]:  22%|██▏       | 175/782 [00:00<00:02, 212.04it/s]



Epoch [521/700]:  28%|██▊       | 219/782 [00:01<00:02, 212.63it/s]



Epoch [521/700]:  34%|███▎      | 263/782 [00:01<00:02, 210.37it/s]



Epoch [521/700]:  39%|███▉      | 307/782 [00:01<00:02, 211.09it/s]



Epoch [521/700]:  45%|████▍     | 351/782 [00:01<00:02, 208.95it/s]



Epoch [521/700]:  48%|████▊     | 372/782 [00:01<00:01, 207.12it/s]



Epoch [521/700]:  50%|█████     | 393/782 [00:01<00:01, 206.60it/s]



Epoch [521/700]:  53%|█████▎    | 414/782 [00:01<00:01, 204.72it/s]



Epoch [521/700]:  56%|█████▌    | 435/782 [00:02<00:01, 205.11it/s]



Epoch [521/700]:  58%|█████▊    | 456/782 [00:02<00:01, 205.54it/s]



Epoch [521/700]:  61%|██████    | 478/782 [00:02<00:01, 207.15it/s]



Epoch [521/700]:  64%|██████▍   | 500/782 [00:02<00:01, 208.87it/s]



Epoch [521/700]:  67%|██████▋   | 521/782 [00:02<00:01, 207.23it/s]



Epoch [521/700]:  69%|██████▉   | 543/782 [00:02<00:01, 209.57it/s]



Epoch [521/700]:  75%|███████▍  | 586/782 [00:02<00:00, 210.36it/s]



Epoch [521/700]:  81%|████████  | 630/782 [00:03<00:00, 211.17it/s]



Epoch [521/700]:  86%|████████▌ | 674/782 [00:03<00:00, 212.18it/s]



Epoch [521/700]:  97%|█████████▋| 762/782 [00:03<00:00, 211.00it/s]



Epoch [521/700]: 100%|██████████| 782/782 [00:03<00:00, 209.72it/s]


Learning Rate: 0.004050
Train Loss: 0.0430, Accuracy: 98.53%, Confidence: 0.9812
Test Loss: 2.1684, Accuracy: 72.80%, Confidence: 0.9425
Train-Test Accuracy Gap: 25.73%


Epoch [522/700]:   3%|▎         | 21/782 [00:00<00:03, 209.86it/s]



Epoch [522/700]:  11%|█         | 87/782 [00:00<00:03, 215.12it/s]



Epoch [522/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.52it/s]



Epoch [522/700]:  22%|██▏       | 174/782 [00:00<00:02, 203.94it/s]



Epoch [522/700]:  28%|██▊       | 216/782 [00:01<00:02, 201.67it/s]



Epoch [522/700]:  33%|███▎      | 258/782 [00:01<00:02, 203.86it/s]



Epoch [522/700]:  38%|███▊      | 301/782 [00:01<00:02, 205.85it/s]



Epoch [522/700]:  47%|████▋     | 365/782 [00:01<00:02, 207.84it/s]



Epoch [522/700]:  52%|█████▏    | 409/782 [00:01<00:01, 212.76it/s]



Epoch [522/700]:  58%|█████▊    | 453/782 [00:02<00:01, 206.96it/s]



Epoch [522/700]:  63%|██████▎   | 496/782 [00:02<00:01, 206.77it/s]



Epoch [522/700]:  69%|██████▉   | 539/782 [00:02<00:01, 202.59it/s]



Epoch [522/700]:  74%|███████▍  | 582/782 [00:02<00:00, 203.55it/s]



Epoch [522/700]:  80%|███████▉  | 624/782 [00:03<00:00, 201.21it/s]



Epoch [522/700]:  85%|████████▌ | 667/782 [00:03<00:00, 207.11it/s]



Epoch [522/700]:  91%|█████████ | 709/782 [00:03<00:00, 207.51it/s]



Epoch [522/700]:  99%|█████████▉| 773/782 [00:03<00:00, 209.01it/s]



Epoch [522/700]: 100%|██████████| 782/782 [00:03<00:00, 206.48it/s]


Learning Rate: 0.004050
Train Loss: 0.0429, Accuracy: 98.52%, Confidence: 0.9817
Test Loss: 2.3212, Accuracy: 71.47%, Confidence: 0.9450
Train-Test Accuracy Gap: 27.05%


Epoch [523/700]:   2%|▏         | 19/782 [00:00<00:04, 188.88it/s]



Epoch [523/700]:   5%|▌         | 40/782 [00:00<00:03, 196.50it/s]



Epoch [523/700]:   8%|▊         | 60/782 [00:00<00:03, 197.26it/s]



Epoch [523/700]:  10%|█         | 80/782 [00:00<00:03, 197.70it/s]



Epoch [523/700]:  13%|█▎        | 101/782 [00:00<00:03, 200.35it/s]



Epoch [523/700]:  21%|██        | 166/782 [00:00<00:02, 207.25it/s]



Epoch [523/700]:  27%|██▋       | 209/782 [00:01<00:02, 206.70it/s]



Epoch [523/700]:  32%|███▏      | 251/782 [00:01<00:02, 208.13it/s]



Epoch [523/700]:  38%|███▊      | 295/782 [00:01<00:02, 212.81it/s]



Epoch [523/700]:  46%|████▌     | 361/782 [00:01<00:01, 212.66it/s]



Epoch [523/700]:  52%|█████▏    | 404/782 [00:01<00:01, 207.03it/s]



Epoch [523/700]:  57%|█████▋    | 448/782 [00:02<00:01, 210.76it/s]



Epoch [523/700]:  63%|██████▎   | 492/782 [00:02<00:01, 211.28it/s]



Epoch [523/700]:  69%|██████▊   | 536/782 [00:02<00:01, 212.60it/s]



Epoch [523/700]:  74%|███████▍  | 579/782 [00:02<00:00, 208.57it/s]



Epoch [523/700]:  80%|███████▉  | 622/782 [00:02<00:00, 208.96it/s]



Epoch [523/700]:  85%|████████▌ | 666/782 [00:03<00:00, 211.78it/s]



Epoch [523/700]:  91%|█████████ | 710/782 [00:03<00:00, 210.39it/s]



Epoch [523/700]:  96%|█████████▋| 754/782 [00:03<00:00, 209.79it/s]



Epoch [523/700]: 100%|██████████| 782/782 [00:03<00:00, 208.63it/s]


Learning Rate: 0.004050
Train Loss: 0.0495, Accuracy: 98.29%, Confidence: 0.9799
Test Loss: 2.2802, Accuracy: 72.49%, Confidence: 0.9433
Train-Test Accuracy Gap: 25.80%


Epoch [524/700]:   3%|▎         | 22/782 [00:00<00:03, 211.11it/s]



Epoch [524/700]:   6%|▌         | 44/782 [00:00<00:03, 209.68it/s]



Epoch [524/700]:   8%|▊         | 65/782 [00:00<00:03, 205.49it/s]



Epoch [524/700]:  17%|█▋        | 130/782 [00:00<00:03, 212.34it/s]



Epoch [524/700]:  22%|██▏       | 174/782 [00:00<00:02, 209.92it/s]



Epoch [524/700]:  28%|██▊       | 217/782 [00:01<00:02, 210.47it/s]



Epoch [524/700]:  33%|███▎      | 261/782 [00:01<00:02, 208.29it/s]



Epoch [524/700]:  42%|████▏     | 326/782 [00:01<00:02, 210.53it/s]



Epoch [524/700]:  47%|████▋     | 370/782 [00:01<00:01, 207.05it/s]



Epoch [524/700]:  53%|█████▎    | 414/782 [00:01<00:01, 210.96it/s]



Epoch [524/700]:  59%|█████▊    | 458/782 [00:02<00:01, 207.71it/s]



Epoch [524/700]:  64%|██████▍   | 500/782 [00:02<00:01, 208.70it/s]



Epoch [524/700]:  70%|██████▉   | 544/782 [00:02<00:01, 212.07it/s]



Epoch [524/700]:  75%|███████▌  | 587/782 [00:02<00:00, 207.27it/s]



Epoch [524/700]:  81%|████████  | 630/782 [00:03<00:00, 209.38it/s]



Epoch [524/700]:  86%|████████▌ | 674/782 [00:03<00:00, 209.03it/s]



Epoch [524/700]:  92%|█████████▏| 717/782 [00:03<00:00, 208.26it/s]



Epoch [524/700]: 100%|██████████| 782/782 [00:03<00:00, 209.08it/s]






Learning Rate: 0.004050
Train Loss: 0.0485, Accuracy: 98.39%, Confidence: 0.9807
Test Loss: 2.0319, Accuracy: 73.23%, Confidence: 0.9420
Train-Test Accuracy Gap: 25.16%


Epoch [525/700]:   3%|▎         | 21/782 [00:00<00:03, 202.76it/s]



Epoch [525/700]:  11%|█         | 85/782 [00:00<00:03, 206.01it/s]



Epoch [525/700]:  16%|█▌        | 127/782 [00:00<00:03, 206.21it/s]



Epoch [525/700]:  22%|██▏       | 171/782 [00:00<00:02, 208.70it/s]



Epoch [525/700]:  30%|███       | 237/782 [00:01<00:02, 211.64it/s]



Epoch [525/700]:  36%|███▌      | 281/782 [00:01<00:02, 209.95it/s]



Epoch [525/700]:  42%|████▏     | 325/782 [00:01<00:02, 209.45it/s]



Epoch [525/700]:  47%|████▋     | 369/782 [00:01<00:01, 209.72it/s]



Epoch [525/700]:  53%|█████▎    | 413/782 [00:01<00:01, 210.48it/s]



Epoch [525/700]:  58%|█████▊    | 456/782 [00:02<00:01, 205.68it/s]



Epoch [525/700]:  64%|██████▍   | 499/782 [00:02<00:01, 206.97it/s]



Epoch [525/700]:  72%|███████▏  | 564/782 [00:02<00:01, 210.22it/s]



Epoch [525/700]:  78%|███████▊  | 607/782 [00:02<00:00, 203.10it/s]



Epoch [525/700]:  83%|████████▎ | 650/782 [00:03<00:00, 206.25it/s]



Epoch [525/700]:  88%|████████▊ | 692/782 [00:03<00:00, 206.30it/s]



Epoch [525/700]:  94%|█████████▍| 735/782 [00:03<00:00, 208.56it/s]



Epoch [525/700]: 100%|██████████| 782/782 [00:03<00:00, 207.25it/s]






Learning Rate: 0.004050
Train Loss: 0.0452, Accuracy: 98.54%, Confidence: 0.9815
Test Loss: 2.5045, Accuracy: 70.12%, Confidence: 0.9422
Train-Test Accuracy Gap: 28.42%


Epoch [526/700]:   2%|▏         | 18/782 [00:00<00:04, 178.99it/s]



Epoch [526/700]:   5%|▍         | 37/782 [00:00<00:04, 184.32it/s]



Epoch [526/700]:   7%|▋         | 58/782 [00:00<00:03, 194.20it/s]



Epoch [526/700]:  16%|█▌        | 124/782 [00:00<00:03, 207.23it/s]



Epoch [526/700]:  21%|██▏       | 167/782 [00:00<00:02, 206.78it/s]



Epoch [526/700]:  30%|██▉       | 233/782 [00:01<00:02, 213.16it/s]



Epoch [526/700]:  35%|███▌      | 277/782 [00:01<00:02, 212.50it/s]



Epoch [526/700]:  41%|████      | 321/782 [00:01<00:02, 211.80it/s]



Epoch [526/700]:  47%|████▋     | 365/782 [00:01<00:01, 210.72it/s]



Epoch [526/700]:  52%|█████▏    | 409/782 [00:01<00:01, 212.81it/s]



Epoch [526/700]:  58%|█████▊    | 453/782 [00:02<00:01, 213.05it/s]



Epoch [526/700]:  64%|██████▎   | 497/782 [00:02<00:01, 212.15it/s]



Epoch [526/700]:  72%|███████▏  | 562/782 [00:02<00:01, 206.92it/s]



Epoch [526/700]:  77%|███████▋  | 605/782 [00:02<00:00, 210.18it/s]



Epoch [526/700]:  83%|████████▎ | 649/782 [00:03<00:00, 209.21it/s]



Epoch [526/700]:  88%|████████▊ | 692/782 [00:03<00:00, 210.15it/s]



Epoch [526/700]:  94%|█████████▍| 736/782 [00:03<00:00, 205.84it/s]



Epoch [526/700]: 100%|██████████| 782/782 [00:03<00:00, 208.45it/s]






Learning Rate: 0.004050
Train Loss: 0.0479, Accuracy: 98.36%, Confidence: 0.9803
Test Loss: 2.1290, Accuracy: 73.86%, Confidence: 0.9466
Train-Test Accuracy Gap: 24.50%


Epoch [527/700]:   3%|▎         | 21/782 [00:00<00:03, 202.09it/s]



Epoch [527/700]:   5%|▌         | 42/782 [00:00<00:03, 205.27it/s]



Epoch [527/700]:  11%|█         | 85/782 [00:00<00:03, 208.40it/s]



Epoch [527/700]:  16%|█▋        | 128/782 [00:00<00:03, 209.82it/s]



Epoch [527/700]:  22%|██▏       | 171/782 [00:00<00:02, 211.31it/s]



Epoch [527/700]:  27%|██▋       | 215/782 [00:01<00:02, 208.63it/s]



Epoch [527/700]:  33%|███▎      | 259/782 [00:01<00:02, 211.53it/s]



Epoch [527/700]:  42%|████▏     | 325/782 [00:01<00:02, 213.99it/s]



Epoch [527/700]:  47%|████▋     | 369/782 [00:01<00:01, 215.64it/s]



Epoch [527/700]:  53%|█████▎    | 413/782 [00:01<00:01, 215.00it/s]



Epoch [527/700]:  58%|█████▊    | 457/782 [00:02<00:01, 210.83it/s]



Epoch [527/700]:  64%|██████▍   | 501/782 [00:02<00:01, 210.31it/s]



Epoch [527/700]:  70%|██████▉   | 545/782 [00:02<00:01, 213.38it/s]



Epoch [527/700]:  75%|███████▌  | 589/782 [00:02<00:00, 211.25it/s]



Epoch [527/700]:  81%|████████  | 634/782 [00:02<00:00, 215.78it/s]



Epoch [527/700]:  87%|████████▋ | 678/782 [00:03<00:00, 215.31it/s]



Epoch [527/700]:  92%|█████████▏| 722/782 [00:03<00:00, 212.07it/s]



Epoch [527/700]:  98%|█████████▊| 766/782 [00:03<00:00, 212.05it/s]



Epoch [527/700]: 100%|██████████| 782/782 [00:03<00:00, 211.54it/s]


Learning Rate: 0.004050
Train Loss: 0.0445, Accuracy: 98.47%, Confidence: 0.9810
Test Loss: 2.0612, Accuracy: 73.78%, Confidence: 0.9453
Train-Test Accuracy Gap: 24.69%


Epoch [528/700]:   3%|▎         | 21/782 [00:00<00:03, 203.57it/s]



Epoch [528/700]:  11%|█         | 85/782 [00:00<00:03, 208.24it/s]



Epoch [528/700]:  16%|█▋        | 128/782 [00:00<00:03, 209.71it/s]



Epoch [528/700]:  22%|██▏       | 170/782 [00:00<00:02, 204.80it/s]



Epoch [528/700]:  27%|██▋       | 214/782 [00:01<00:02, 209.98it/s]



Epoch [528/700]:  33%|███▎      | 258/782 [00:01<00:02, 213.21it/s]



Epoch [528/700]:  39%|███▊      | 302/782 [00:01<00:02, 214.91it/s]



Epoch [528/700]:  44%|████▍     | 346/782 [00:01<00:02, 215.66it/s]



Epoch [528/700]:  50%|████▉     | 390/782 [00:01<00:01, 215.75it/s]



Epoch [528/700]:  58%|█████▊    | 456/782 [00:02<00:01, 214.47it/s]



Epoch [528/700]:  64%|██████▍   | 500/782 [00:02<00:01, 209.32it/s]



Epoch [528/700]:  69%|██████▉   | 543/782 [00:02<00:01, 211.03it/s]



Epoch [528/700]:  75%|███████▌  | 587/782 [00:02<00:00, 211.11it/s]



Epoch [528/700]:  81%|████████  | 630/782 [00:02<00:00, 206.14it/s]



Epoch [528/700]:  86%|████████▌ | 672/782 [00:03<00:00, 202.54it/s]



Epoch [528/700]:  91%|█████████▏| 714/782 [00:03<00:00, 204.27it/s]



Epoch [528/700]:  97%|█████████▋| 756/782 [00:03<00:00, 206.04it/s]



Epoch [528/700]: 100%|██████████| 782/782 [00:03<00:00, 209.34it/s]


Learning Rate: 0.004050
Train Loss: 0.0470, Accuracy: 98.42%, Confidence: 0.9809
Test Loss: 2.1326, Accuracy: 73.20%, Confidence: 0.9448
Train-Test Accuracy Gap: 25.22%


Epoch [529/700]:   3%|▎         | 21/782 [00:00<00:03, 205.49it/s]



Epoch [529/700]:   5%|▌         | 42/782 [00:00<00:03, 199.11it/s]



Epoch [529/700]:   8%|▊         | 63/782 [00:00<00:03, 200.78it/s]



Epoch [529/700]:  11%|█         | 85/782 [00:00<00:03, 206.18it/s]



Epoch [529/700]:  14%|█▎        | 107/782 [00:00<00:03, 208.70it/s]



Epoch [529/700]:  16%|█▋        | 129/782 [00:00<00:03, 209.77it/s]



Epoch [529/700]:  19%|█▉        | 151/782 [00:00<00:02, 211.93it/s]



Epoch [529/700]:  22%|██▏       | 173/782 [00:00<00:02, 213.24it/s]



Epoch [529/700]:  25%|██▍       | 195/782 [00:00<00:02, 215.24it/s]



Epoch [529/700]:  28%|██▊       | 217/782 [00:01<00:02, 213.04it/s]



Epoch [529/700]:  31%|███       | 239/782 [00:01<00:02, 212.18it/s]



Epoch [529/700]:  33%|███▎      | 261/782 [00:01<00:02, 213.20it/s]



Epoch [529/700]:  39%|███▉      | 305/782 [00:01<00:02, 211.11it/s]



Epoch [529/700]:  45%|████▍     | 349/782 [00:01<00:02, 212.00it/s]



Epoch [529/700]:  47%|████▋     | 371/782 [00:01<00:01, 212.13it/s]



Epoch [529/700]:  50%|█████     | 393/782 [00:01<00:01, 212.88it/s]



Epoch [529/700]:  53%|█████▎    | 415/782 [00:01<00:01, 214.03it/s]



Epoch [529/700]:  56%|█████▌    | 437/782 [00:02<00:01, 213.98it/s]



Epoch [529/700]:  59%|█████▊    | 459/782 [00:02<00:01, 214.04it/s]



Epoch [529/700]:  62%|██████▏   | 481/782 [00:02<00:01, 212.80it/s]



Epoch [529/700]:  64%|██████▍   | 503/782 [00:02<00:01, 207.39it/s]



Epoch [529/700]:  67%|██████▋   | 524/782 [00:02<00:01, 207.48it/s]



Epoch [529/700]:  70%|██████▉   | 545/782 [00:02<00:01, 206.53it/s]



Epoch [529/700]:  72%|███████▏  | 566/782 [00:02<00:01, 206.75it/s]



Epoch [529/700]:  75%|███████▌  | 588/782 [00:02<00:00, 209.36it/s]



Epoch [529/700]:  81%|████████  | 632/782 [00:02<00:00, 212.52it/s]



Epoch [529/700]:  86%|████████▋ | 676/782 [00:03<00:00, 212.44it/s]



Epoch [529/700]:  92%|█████████▏| 720/782 [00:03<00:00, 208.54it/s]



Epoch [529/700]: 100%|██████████| 782/782 [00:03<00:00, 210.44it/s]


Learning Rate: 0.004050
Train Loss: 0.0407, Accuracy: 98.71%, Confidence: 0.9825
Test Loss: 2.1701, Accuracy: 72.67%, Confidence: 0.9445
Train-Test Accuracy Gap: 26.04%


Epoch [530/700]:   3%|▎         | 22/782 [00:00<00:03, 215.08it/s]



Epoch [530/700]:   6%|▌         | 44/782 [00:00<00:03, 215.67it/s]



Epoch [530/700]:   8%|▊         | 66/782 [00:00<00:03, 209.37it/s]



Epoch [530/700]:  11%|█         | 87/782 [00:00<00:03, 208.72it/s]



Epoch [530/700]:  14%|█▍        | 109/782 [00:00<00:03, 210.99it/s]



Epoch [530/700]:  23%|██▎       | 176/782 [00:00<00:02, 216.35it/s]



Epoch [530/700]:  28%|██▊       | 220/782 [00:01<00:02, 213.50it/s]



Epoch [530/700]:  34%|███▍      | 264/782 [00:01<00:02, 209.78it/s]



Epoch [530/700]:  39%|███▉      | 308/782 [00:01<00:02, 210.21it/s]



Epoch [530/700]:  45%|████▌     | 352/782 [00:01<00:02, 210.80it/s]



Epoch [530/700]:  51%|█████     | 396/782 [00:01<00:01, 212.21it/s]



Epoch [530/700]:  56%|█████▋    | 440/782 [00:02<00:01, 210.55it/s]



Epoch [530/700]:  65%|██████▍   | 506/782 [00:02<00:01, 210.49it/s]



Epoch [530/700]:  70%|███████   | 550/782 [00:02<00:01, 206.65it/s]



Epoch [530/700]:  76%|███████▌  | 593/782 [00:02<00:00, 208.25it/s]



Epoch [530/700]:  82%|████████▏ | 638/782 [00:03<00:00, 213.49it/s]



Epoch [530/700]:  87%|████████▋ | 682/782 [00:03<00:00, 212.74it/s]



Epoch [530/700]:  93%|█████████▎| 726/782 [00:03<00:00, 211.29it/s]



Epoch [530/700]: 100%|██████████| 782/782 [00:03<00:00, 211.29it/s]






Learning Rate: 0.004050
Train Loss: 0.0468, Accuracy: 98.43%, Confidence: 0.9810
Test Loss: 2.0574, Accuracy: 73.89%, Confidence: 0.9448
Train-Test Accuracy Gap: 24.54%


Epoch [531/700]:   3%|▎         | 22/782 [00:00<00:03, 210.73it/s]



Epoch [531/700]:   6%|▌         | 44/782 [00:00<00:03, 205.39it/s]



Epoch [531/700]:   8%|▊         | 65/782 [00:00<00:03, 205.34it/s]



Epoch [531/700]:  11%|█         | 86/782 [00:00<00:03, 201.56it/s]



Epoch [531/700]:  14%|█▍        | 108/782 [00:00<00:03, 207.14it/s]



Epoch [531/700]:  16%|█▋        | 129/782 [00:00<00:03, 207.74it/s]



Epoch [531/700]:  19%|█▉        | 151/782 [00:00<00:03, 209.15it/s]



Epoch [531/700]:  22%|██▏       | 172/782 [00:00<00:02, 209.13it/s]



Epoch [531/700]:  25%|██▍       | 193/782 [00:00<00:02, 209.03it/s]



Epoch [531/700]:  27%|██▋       | 214/782 [00:01<00:02, 208.65it/s]



Epoch [531/700]:  30%|███       | 235/782 [00:01<00:02, 208.94it/s]



Epoch [531/700]:  33%|███▎      | 257/782 [00:01<00:02, 209.76it/s]



Epoch [531/700]:  38%|███▊      | 301/782 [00:01<00:02, 211.64it/s]



Epoch [531/700]:  44%|████▍     | 345/782 [00:01<00:02, 207.25it/s]



Epoch [531/700]:  49%|████▉     | 387/782 [00:01<00:01, 206.58it/s]



Epoch [531/700]:  55%|█████▌    | 431/782 [00:02<00:01, 209.18it/s]



Epoch [531/700]:  61%|██████    | 475/782 [00:02<00:01, 211.27it/s]



Epoch [531/700]:  66%|██████▌   | 518/782 [00:02<00:01, 204.71it/s]



Epoch [531/700]:  72%|███████▏  | 560/782 [00:02<00:01, 204.52it/s]



Epoch [531/700]:  77%|███████▋  | 603/782 [00:02<00:00, 207.32it/s]



Epoch [531/700]:  82%|████████▏ | 645/782 [00:03<00:00, 206.34it/s]



Epoch [531/700]:  88%|████████▊ | 687/782 [00:03<00:00, 205.90it/s]



Epoch [531/700]:  94%|█████████▎| 732/782 [00:03<00:00, 212.29it/s]



Epoch [531/700]: 100%|██████████| 782/782 [00:03<00:00, 207.82it/s]


Learning Rate: 0.004050
Train Loss: 0.0509, Accuracy: 98.29%, Confidence: 0.9810
Test Loss: 2.5231, Accuracy: 70.28%, Confidence: 0.9427
Train-Test Accuracy Gap: 28.01%


Epoch [532/700]:   3%|▎         | 21/782 [00:00<00:03, 202.76it/s]



Epoch [532/700]:   5%|▌         | 42/782 [00:00<00:03, 199.03it/s]



Epoch [532/700]:   8%|▊         | 62/782 [00:00<00:03, 195.99it/s]



Epoch [532/700]:  11%|█         | 83/782 [00:00<00:03, 198.53it/s]



Epoch [532/700]:  13%|█▎        | 104/782 [00:00<00:03, 202.28it/s]



Epoch [532/700]:  21%|██▏       | 167/782 [00:00<00:02, 206.64it/s]



Epoch [532/700]:  27%|██▋       | 210/782 [00:01<00:02, 209.38it/s]



Epoch [532/700]:  32%|███▏      | 253/782 [00:01<00:02, 211.15it/s]



Epoch [532/700]:  38%|███▊      | 297/782 [00:01<00:02, 207.97it/s]



Epoch [532/700]:  44%|████▎     | 341/782 [00:01<00:02, 209.83it/s]



Epoch [532/700]:  49%|████▉     | 385/782 [00:01<00:01, 210.35it/s]



Epoch [532/700]:  55%|█████▍    | 429/782 [00:02<00:01, 210.58it/s]



Epoch [532/700]:  63%|██████▎   | 495/782 [00:02<00:01, 205.02it/s]



Epoch [532/700]:  69%|██████▊   | 537/782 [00:02<00:01, 202.73it/s]



Epoch [532/700]:  74%|███████▍  | 580/782 [00:02<00:00, 206.06it/s]



Epoch [532/700]:  80%|███████▉  | 623/782 [00:03<00:00, 207.98it/s]



Epoch [532/700]:  85%|████████▌ | 666/782 [00:03<00:00, 208.39it/s]



Epoch [532/700]:  91%|█████████ | 710/782 [00:03<00:00, 210.29it/s]



Epoch [532/700]:  96%|█████████▋| 754/782 [00:03<00:00, 212.79it/s]



Epoch [532/700]: 100%|██████████| 782/782 [00:03<00:00, 207.91it/s]


Learning Rate: 0.004050
Train Loss: 0.0509, Accuracy: 98.30%, Confidence: 0.9807
Test Loss: 2.2268, Accuracy: 72.46%, Confidence: 0.9435
Train-Test Accuracy Gap: 25.84%


Epoch [533/700]:   3%|▎         | 21/782 [00:00<00:03, 209.58it/s]



Epoch [533/700]:   5%|▌         | 42/782 [00:00<00:03, 199.82it/s]



Epoch [533/700]:   8%|▊         | 63/782 [00:00<00:03, 204.11it/s]



Epoch [533/700]:  11%|█         | 84/782 [00:00<00:03, 204.65it/s]



Epoch [533/700]:  13%|█▎        | 105/782 [00:00<00:03, 202.16it/s]



Epoch [533/700]:  19%|█▉        | 149/782 [00:00<00:03, 208.42it/s]



Epoch [533/700]:  22%|██▏       | 171/782 [00:00<00:02, 211.20it/s]



Epoch [533/700]:  25%|██▍       | 193/782 [00:00<00:02, 208.89it/s]



Epoch [533/700]:  27%|██▋       | 215/782 [00:01<00:02, 210.48it/s]



Epoch [533/700]:  30%|███       | 237/782 [00:01<00:02, 209.10it/s]



Epoch [533/700]:  39%|███▊      | 303/782 [00:01<00:02, 214.92it/s]



Epoch [533/700]:  47%|████▋     | 369/782 [00:01<00:01, 214.68it/s]



Epoch [533/700]:  53%|█████▎    | 413/782 [00:01<00:01, 216.03it/s]



Epoch [533/700]:  58%|█████▊    | 457/782 [00:02<00:01, 211.37it/s]



Epoch [533/700]:  64%|██████▍   | 501/782 [00:02<00:01, 210.40it/s]



Epoch [533/700]:  70%|██████▉   | 545/782 [00:02<00:01, 210.17it/s]



Epoch [533/700]:  75%|███████▌  | 589/782 [00:02<00:00, 208.94it/s]



Epoch [533/700]:  81%|████████  | 631/782 [00:03<00:00, 205.37it/s]



Epoch [533/700]:  89%|████████▉ | 696/782 [00:03<00:00, 211.73it/s]



Epoch [533/700]:  95%|█████████▍| 740/782 [00:03<00:00, 212.88it/s]



Epoch [533/700]: 100%|██████████| 782/782 [00:03<00:00, 210.61it/s]


Learning Rate: 0.004050
Train Loss: 0.0426, Accuracy: 98.58%, Confidence: 0.9816
Test Loss: 2.3641, Accuracy: 71.30%, Confidence: 0.9442
Train-Test Accuracy Gap: 27.28%


Epoch [534/700]:   3%|▎         | 21/782 [00:00<00:03, 209.01it/s]



Epoch [534/700]:   8%|▊         | 63/782 [00:00<00:03, 204.01it/s]



Epoch [534/700]:  16%|█▋        | 128/782 [00:00<00:03, 205.99it/s]



Epoch [534/700]:  22%|██▏       | 172/782 [00:00<00:02, 208.74it/s]



Epoch [534/700]:  28%|██▊       | 216/782 [00:01<00:02, 211.27it/s]



Epoch [534/700]:  33%|███▎      | 259/782 [00:01<00:02, 207.97it/s]



Epoch [534/700]:  39%|███▊      | 302/782 [00:01<00:02, 206.98it/s]



Epoch [534/700]:  44%|████▍     | 344/782 [00:01<00:02, 205.97it/s]



Epoch [534/700]:  49%|████▉     | 387/782 [00:01<00:01, 207.83it/s]



Epoch [534/700]:  58%|█████▊    | 453/782 [00:02<00:01, 213.52it/s]



Epoch [534/700]:  66%|██████▋   | 519/782 [00:02<00:01, 209.96it/s]



Epoch [534/700]:  72%|███████▏  | 562/782 [00:02<00:01, 206.79it/s]



Epoch [534/700]:  77%|███████▋  | 605/782 [00:02<00:00, 208.86it/s]



Epoch [534/700]:  83%|████████▎ | 647/782 [00:03<00:00, 202.33it/s]



Epoch [534/700]:  88%|████████▊ | 690/782 [00:03<00:00, 204.24it/s]



Epoch [534/700]:  94%|█████████▎| 733/782 [00:03<00:00, 205.05it/s]



Epoch [534/700]: 100%|██████████| 782/782 [00:03<00:00, 207.46it/s]






Learning Rate: 0.004050
Train Loss: 0.0464, Accuracy: 98.53%, Confidence: 0.9811
Test Loss: 2.4760, Accuracy: 69.88%, Confidence: 0.9414
Train-Test Accuracy Gap: 28.65%


Epoch [535/700]:   3%|▎         | 20/782 [00:00<00:03, 190.57it/s]



Epoch [535/700]:  11%|█         | 83/782 [00:00<00:03, 202.07it/s]



Epoch [535/700]:  16%|█▌        | 126/782 [00:00<00:03, 207.26it/s]



Epoch [535/700]:  22%|██▏       | 170/782 [00:00<00:02, 209.88it/s]



Epoch [535/700]:  27%|██▋       | 212/782 [00:01<00:02, 209.11it/s]



Epoch [535/700]:  32%|███▏      | 254/782 [00:01<00:02, 209.05it/s]



Epoch [535/700]:  38%|███▊      | 298/782 [00:01<00:02, 208.56it/s]



Epoch [535/700]:  47%|████▋     | 364/782 [00:01<00:01, 211.96it/s]



Epoch [535/700]:  52%|█████▏    | 407/782 [00:01<00:01, 208.66it/s]



Epoch [535/700]:  58%|█████▊    | 451/782 [00:02<00:01, 201.98it/s]



Epoch [535/700]:  63%|██████▎   | 495/782 [00:02<00:01, 207.22it/s]



Epoch [535/700]:  69%|██████▉   | 539/782 [00:02<00:01, 210.52it/s]



Epoch [535/700]:  75%|███████▍  | 583/782 [00:02<00:00, 209.48it/s]



Epoch [535/700]:  80%|████████  | 627/782 [00:03<00:00, 208.98it/s]



Epoch [535/700]:  86%|████████▌ | 669/782 [00:03<00:00, 203.83it/s]



Epoch [535/700]:  91%|█████████ | 712/782 [00:03<00:00, 203.98it/s]



Epoch [535/700]:  97%|█████████▋| 755/782 [00:03<00:00, 208.67it/s]



Epoch [535/700]: 100%|██████████| 782/782 [00:03<00:00, 207.58it/s]


Learning Rate: 0.004050
Train Loss: 0.0484, Accuracy: 98.32%, Confidence: 0.9808
Test Loss: 2.1421, Accuracy: 73.40%, Confidence: 0.9452
Train-Test Accuracy Gap: 24.92%


Epoch [536/700]:   3%|▎         | 21/782 [00:00<00:03, 207.79it/s]



Epoch [536/700]:  14%|█▎        | 107/782 [00:00<00:03, 205.92it/s]



Epoch [536/700]:  19%|█▉        | 151/782 [00:00<00:03, 210.01it/s]



Epoch [536/700]:  28%|██▊       | 217/782 [00:01<00:02, 208.11it/s]



Epoch [536/700]:  33%|███▎      | 260/782 [00:01<00:02, 206.00it/s]



Epoch [536/700]:  39%|███▉      | 304/782 [00:01<00:02, 209.64it/s]



Epoch [536/700]:  45%|████▍     | 348/782 [00:01<00:02, 210.64it/s]



Epoch [536/700]:  50%|█████     | 392/782 [00:01<00:01, 213.32it/s]



Epoch [536/700]:  56%|█████▌    | 436/782 [00:02<00:01, 211.31it/s]



Epoch [536/700]:  61%|██████▏   | 480/782 [00:02<00:01, 211.46it/s]



Epoch [536/700]:  67%|██████▋   | 524/782 [00:02<00:01, 210.60it/s]



Epoch [536/700]:  73%|███████▎  | 568/782 [00:02<00:01, 212.56it/s]



Epoch [536/700]:  78%|███████▊  | 612/782 [00:02<00:00, 213.41it/s]



Epoch [536/700]:  87%|████████▋ | 678/782 [00:03<00:00, 213.06it/s]



Epoch [536/700]:  92%|█████████▏| 722/782 [00:03<00:00, 213.23it/s]



Epoch [536/700]:  98%|█████████▊| 766/782 [00:03<00:00, 215.29it/s]



Epoch [536/700]: 100%|██████████| 782/782 [00:03<00:00, 210.96it/s]


Learning Rate: 0.004050
Train Loss: 0.0430, Accuracy: 98.58%, Confidence: 0.9816
Test Loss: 2.1921, Accuracy: 73.09%, Confidence: 0.9456
Train-Test Accuracy Gap: 25.49%


Epoch [537/700]:   3%|▎         | 21/782 [00:00<00:03, 208.61it/s]



Epoch [537/700]:  11%|█         | 87/782 [00:00<00:03, 214.30it/s]



Epoch [537/700]:  17%|█▋        | 131/782 [00:00<00:03, 214.00it/s]



Epoch [537/700]:  22%|██▏       | 175/782 [00:00<00:02, 210.50it/s]



Epoch [537/700]:  28%|██▊       | 219/782 [00:01<00:02, 207.82it/s]



Epoch [537/700]:  34%|███▎      | 263/782 [00:01<00:02, 211.67it/s]



Epoch [537/700]:  39%|███▉      | 307/782 [00:01<00:02, 211.41it/s]



Epoch [537/700]:  45%|████▍     | 351/782 [00:01<00:02, 211.80it/s]



Epoch [537/700]:  51%|█████     | 395/782 [00:01<00:01, 209.38it/s]



Epoch [537/700]:  56%|█████▌    | 439/782 [00:02<00:01, 206.96it/s]



Epoch [537/700]:  62%|██████▏   | 482/782 [00:02<00:01, 209.62it/s]



Epoch [537/700]:  67%|██████▋   | 526/782 [00:02<00:01, 207.91it/s]



Epoch [537/700]:  75%|███████▌  | 590/782 [00:02<00:00, 209.35it/s]



Epoch [537/700]:  81%|████████  | 634/782 [00:03<00:00, 210.07it/s]



Epoch [537/700]:  87%|████████▋ | 677/782 [00:03<00:00, 205.53it/s]



Epoch [537/700]:  92%|█████████▏| 720/782 [00:03<00:00, 207.31it/s]



Epoch [537/700]:  98%|█████████▊| 764/782 [00:03<00:00, 209.14it/s]



Epoch [537/700]: 100%|██████████| 782/782 [00:03<00:00, 209.36it/s]


Learning Rate: 0.004050
Train Loss: 0.0426, Accuracy: 98.55%, Confidence: 0.9819
Test Loss: 2.3058, Accuracy: 72.66%, Confidence: 0.9451
Train-Test Accuracy Gap: 25.89%


Epoch [538/700]:   3%|▎         | 21/782 [00:00<00:03, 200.11it/s]



Epoch [538/700]:  11%|█         | 84/782 [00:00<00:03, 202.62it/s]



Epoch [538/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.56it/s]



Epoch [538/700]:  22%|██▏       | 170/782 [00:00<00:02, 208.52it/s]



Epoch [538/700]:  27%|██▋       | 214/782 [00:01<00:02, 209.78it/s]



Epoch [538/700]:  33%|███▎      | 257/782 [00:01<00:02, 210.94it/s]



Epoch [538/700]:  38%|███▊      | 301/782 [00:01<00:02, 206.32it/s]



Epoch [538/700]:  44%|████▍     | 344/782 [00:01<00:02, 208.28it/s]



Epoch [538/700]:  49%|████▉     | 386/782 [00:01<00:01, 208.77it/s]



Epoch [538/700]:  58%|█████▊    | 451/782 [00:02<00:01, 208.30it/s]



Epoch [538/700]:  63%|██████▎   | 494/782 [00:02<00:01, 209.67it/s]



Epoch [538/700]:  69%|██████▊   | 537/782 [00:02<00:01, 208.40it/s]



Epoch [538/700]:  74%|███████▍  | 580/782 [00:02<00:00, 208.04it/s]



Epoch [538/700]:  80%|███████▉  | 623/782 [00:03<00:00, 206.16it/s]



Epoch [538/700]:  85%|████████▌ | 666/782 [00:03<00:00, 206.55it/s]



Epoch [538/700]:  91%|█████████ | 709/782 [00:03<00:00, 208.12it/s]



Epoch [538/700]:  96%|█████████▋| 753/782 [00:03<00:00, 211.10it/s]



Epoch [538/700]: 100%|██████████| 782/782 [00:03<00:00, 207.48it/s]


Learning Rate: 0.004050
Train Loss: 0.0480, Accuracy: 98.37%, Confidence: 0.9810
Test Loss: 2.1484, Accuracy: 73.04%, Confidence: 0.9443
Train-Test Accuracy Gap: 25.33%


Epoch [539/700]:   3%|▎         | 21/782 [00:00<00:03, 206.37it/s]



Epoch [539/700]:   5%|▌         | 42/782 [00:00<00:03, 206.28it/s]



Epoch [539/700]:  11%|█         | 86/782 [00:00<00:03, 210.95it/s]



Epoch [539/700]:  14%|█▍        | 108/782 [00:00<00:03, 211.81it/s]



Epoch [539/700]:  22%|██▏       | 174/782 [00:00<00:02, 212.98it/s]



Epoch [539/700]:  28%|██▊       | 218/782 [00:01<00:02, 212.38it/s]



Epoch [539/700]:  34%|███▎      | 262/782 [00:01<00:02, 205.51it/s]



Epoch [539/700]:  42%|████▏     | 328/782 [00:01<00:02, 210.72it/s]



Epoch [539/700]:  48%|████▊     | 372/782 [00:01<00:01, 212.12it/s]



Epoch [539/700]:  53%|█████▎    | 416/782 [00:01<00:01, 208.38it/s]



Epoch [539/700]:  59%|█████▉    | 461/782 [00:02<00:01, 212.55it/s]



Epoch [539/700]:  65%|██████▍   | 505/782 [00:02<00:01, 213.16it/s]



Epoch [539/700]:  70%|███████   | 549/782 [00:02<00:01, 213.02it/s]



Epoch [539/700]:  76%|███████▌  | 593/782 [00:02<00:00, 210.82it/s]



Epoch [539/700]:  81%|████████▏ | 636/782 [00:03<00:00, 206.62it/s]



Epoch [539/700]:  87%|████████▋ | 680/782 [00:03<00:00, 210.59it/s]



Epoch [539/700]:  93%|█████████▎| 724/782 [00:03<00:00, 212.81it/s]



Epoch [539/700]: 100%|██████████| 782/782 [00:03<00:00, 210.31it/s]






Learning Rate: 0.004050
Train Loss: 0.0447, Accuracy: 98.39%, Confidence: 0.9821
Test Loss: 2.2554, Accuracy: 72.24%, Confidence: 0.9444
Train-Test Accuracy Gap: 26.15%


Epoch [540/700]:   3%|▎         | 22/782 [00:00<00:03, 213.84it/s]



Epoch [540/700]:  11%|█▏        | 88/782 [00:00<00:03, 215.47it/s]



Epoch [540/700]:  17%|█▋        | 132/782 [00:00<00:03, 209.99it/s]



Epoch [540/700]:  23%|██▎       | 176/782 [00:00<00:02, 213.41it/s]



Epoch [540/700]:  31%|███       | 243/782 [00:01<00:02, 216.51it/s]



Epoch [540/700]:  37%|███▋      | 287/782 [00:01<00:02, 214.17it/s]



Epoch [540/700]:  42%|████▏     | 331/782 [00:01<00:02, 213.67it/s]



Epoch [540/700]:  48%|████▊     | 375/782 [00:01<00:01, 214.66it/s]



Epoch [540/700]:  56%|█████▋    | 441/782 [00:02<00:01, 213.08it/s]



Epoch [540/700]:  62%|██████▏   | 485/782 [00:02<00:01, 212.83it/s]



Epoch [540/700]:  68%|██████▊   | 529/782 [00:02<00:01, 211.04it/s]



Epoch [540/700]:  73%|███████▎  | 573/782 [00:02<00:00, 210.69it/s]



Epoch [540/700]:  79%|███████▉  | 616/782 [00:02<00:00, 207.95it/s]



Epoch [540/700]:  84%|████████▍ | 658/782 [00:03<00:00, 206.27it/s]



Epoch [540/700]:  90%|████████▉ | 700/782 [00:03<00:00, 204.12it/s]



Epoch [540/700]:  95%|█████████▍| 742/782 [00:03<00:00, 202.57it/s]



Epoch [540/700]: 100%|██████████| 782/782 [00:03<00:00, 210.26it/s]


Learning Rate: 0.004050
Train Loss: 0.0441, Accuracy: 98.52%, Confidence: 0.9817
Test Loss: 2.1160, Accuracy: 73.33%, Confidence: 0.9440
Train-Test Accuracy Gap: 25.19%


Epoch [541/700]:   3%|▎         | 20/782 [00:00<00:03, 193.77it/s]



Epoch [541/700]:   5%|▌         | 40/782 [00:00<00:03, 195.96it/s]



Epoch [541/700]:  10%|█         | 80/782 [00:00<00:03, 196.96it/s]



Epoch [541/700]:  16%|█▌        | 123/782 [00:00<00:03, 202.60it/s]



Epoch [541/700]:  24%|██▍       | 188/782 [00:00<00:02, 208.76it/s]



Epoch [541/700]:  30%|██▉       | 232/782 [00:01<00:02, 211.85it/s]



Epoch [541/700]:  35%|███▌      | 276/782 [00:01<00:02, 211.59it/s]



Epoch [541/700]:  44%|████▎     | 341/782 [00:01<00:02, 209.22it/s]



Epoch [541/700]:  49%|████▉     | 385/782 [00:01<00:01, 210.98it/s]



Epoch [541/700]:  55%|█████▍    | 429/782 [00:02<00:01, 210.90it/s]



Epoch [541/700]:  60%|██████    | 473/782 [00:02<00:01, 212.89it/s]



Epoch [541/700]:  66%|██████▌   | 517/782 [00:02<00:01, 206.45it/s]



Epoch [541/700]:  74%|███████▍  | 582/782 [00:02<00:00, 211.29it/s]



Epoch [541/700]:  80%|████████  | 626/782 [00:03<00:00, 206.45it/s]



Epoch [541/700]:  86%|████████▌ | 670/782 [00:03<00:00, 209.53it/s]



Epoch [541/700]:  91%|█████████▏| 714/782 [00:03<00:00, 212.10it/s]



Epoch [541/700]:  97%|█████████▋| 758/782 [00:03<00:00, 214.87it/s]



Epoch [541/700]: 100%|██████████| 782/782 [00:03<00:00, 209.18it/s]


Learning Rate: 0.004050
Train Loss: 0.0434, Accuracy: 98.52%, Confidence: 0.9818
Test Loss: 2.4386, Accuracy: 70.66%, Confidence: 0.9446
Train-Test Accuracy Gap: 27.86%


Epoch [542/700]:   3%|▎         | 21/782 [00:00<00:03, 202.74it/s]



Epoch [542/700]:  11%|█         | 85/782 [00:00<00:03, 209.12it/s]



Epoch [542/700]:  16%|█▋        | 128/782 [00:00<00:03, 209.67it/s]



Epoch [542/700]:  22%|██▏       | 171/782 [00:00<00:02, 206.23it/s]



Epoch [542/700]:  27%|██▋       | 214/782 [00:01<00:02, 205.12it/s]



Epoch [542/700]:  33%|███▎      | 257/782 [00:01<00:02, 206.89it/s]



Epoch [542/700]:  38%|███▊      | 300/782 [00:01<00:02, 208.64it/s]



Epoch [542/700]:  44%|████▎     | 342/782 [00:01<00:02, 207.14it/s]



Epoch [542/700]:  49%|████▉     | 385/782 [00:01<00:01, 208.34it/s]



Epoch [542/700]:  55%|█████▍    | 427/782 [00:02<00:01, 207.23it/s]



Epoch [542/700]:  63%|██████▎   | 491/782 [00:02<00:01, 206.47it/s]



Epoch [542/700]:  68%|██████▊   | 535/782 [00:02<00:01, 209.57it/s]



Epoch [542/700]:  74%|███████▍  | 578/782 [00:02<00:00, 210.20it/s]



Epoch [542/700]:  82%|████████▏ | 644/782 [00:03<00:00, 213.78it/s]



Epoch [542/700]:  88%|████████▊ | 688/782 [00:03<00:00, 210.83it/s]



Epoch [542/700]:  94%|█████████▎| 732/782 [00:03<00:00, 212.50it/s]



Epoch [542/700]: 100%|██████████| 782/782 [00:03<00:00, 208.53it/s]






Learning Rate: 0.004050
Train Loss: 0.0456, Accuracy: 98.45%, Confidence: 0.9813
Test Loss: 2.1443, Accuracy: 73.22%, Confidence: 0.9456
Train-Test Accuracy Gap: 25.23%


Epoch [543/700]:   3%|▎         | 21/782 [00:00<00:03, 206.39it/s]



Epoch [543/700]:  11%|█         | 86/782 [00:00<00:03, 211.53it/s]



Epoch [543/700]:  17%|█▋        | 130/782 [00:00<00:03, 210.30it/s]



Epoch [543/700]:  19%|█▉        | 152/782 [00:00<00:02, 211.24it/s]



Epoch [543/700]:  28%|██▊       | 218/782 [00:01<00:02, 207.64it/s]



Epoch [543/700]:  34%|███▎      | 263/782 [00:01<00:02, 213.18it/s]



Epoch [543/700]:  39%|███▉      | 307/782 [00:01<00:02, 210.04it/s]



Epoch [543/700]:  45%|████▍     | 351/782 [00:01<00:02, 211.57it/s]



Epoch [543/700]:  51%|█████     | 395/782 [00:01<00:01, 212.03it/s]



Epoch [543/700]:  56%|█████▌    | 439/782 [00:02<00:01, 212.38it/s]



Epoch [543/700]:  62%|██████▏   | 483/782 [00:02<00:01, 204.36it/s]



Epoch [543/700]:  67%|██████▋   | 527/782 [00:02<00:01, 207.47it/s]



Epoch [543/700]:  76%|███████▌  | 591/782 [00:02<00:00, 204.79it/s]



Epoch [543/700]:  84%|████████▍ | 655/782 [00:03<00:00, 207.55it/s]



Epoch [543/700]:  89%|████████▉ | 699/782 [00:03<00:00, 209.46it/s]



Epoch [543/700]:  95%|█████████▍| 741/782 [00:03<00:00, 205.57it/s]



Epoch [543/700]: 100%|██████████| 782/782 [00:03<00:00, 208.80it/s]


Learning Rate: 0.004050
Train Loss: 0.0448, Accuracy: 98.53%, Confidence: 0.9822
Test Loss: 2.5235, Accuracy: 70.99%, Confidence: 0.9460
Train-Test Accuracy Gap: 27.54%


Epoch [544/700]:   3%|▎         | 21/782 [00:00<00:03, 209.53it/s]



Epoch [544/700]:   5%|▌         | 42/782 [00:00<00:03, 206.48it/s]



Epoch [544/700]:   8%|▊         | 63/782 [00:00<00:03, 206.51it/s]



Epoch [544/700]:  16%|█▌        | 127/782 [00:00<00:03, 206.32it/s]



Epoch [544/700]:  22%|██▏       | 169/782 [00:00<00:03, 202.99it/s]



Epoch [544/700]:  27%|██▋       | 213/782 [00:01<00:02, 210.28it/s]



Epoch [544/700]:  33%|███▎      | 257/782 [00:01<00:02, 209.95it/s]



Epoch [544/700]:  38%|███▊      | 301/782 [00:01<00:02, 212.76it/s]



Epoch [544/700]:  44%|████▍     | 345/782 [00:01<00:02, 214.35it/s]



Epoch [544/700]:  50%|████▉     | 389/782 [00:01<00:01, 211.97it/s]



Epoch [544/700]:  55%|█████▌    | 432/782 [00:02<00:01, 200.28it/s]



Epoch [544/700]:  61%|██████    | 475/782 [00:02<00:01, 204.26it/s]



Epoch [544/700]:  66%|██████▌   | 518/782 [00:02<00:01, 207.25it/s]



Epoch [544/700]:  72%|███████▏  | 563/782 [00:02<00:01, 209.79it/s]



Epoch [544/700]:  77%|███████▋  | 605/782 [00:02<00:00, 205.71it/s]



Epoch [544/700]:  83%|████████▎ | 648/782 [00:03<00:00, 206.47it/s]



Epoch [544/700]:  91%|█████████ | 713/782 [00:03<00:00, 210.57it/s]



Epoch [544/700]:  97%|█████████▋| 757/782 [00:03<00:00, 209.05it/s]



Epoch [544/700]: 100%|██████████| 782/782 [00:03<00:00, 208.34it/s]


Learning Rate: 0.004050
Train Loss: 0.0449, Accuracy: 98.51%, Confidence: 0.9815
Test Loss: 2.4201, Accuracy: 70.93%, Confidence: 0.9442
Train-Test Accuracy Gap: 27.58%


Epoch [545/700]:   3%|▎         | 22/782 [00:00<00:03, 214.86it/s]



Epoch [545/700]:   6%|▌         | 44/782 [00:00<00:03, 212.57it/s]



Epoch [545/700]:  11%|█▏        | 88/782 [00:00<00:03, 214.55it/s]



Epoch [545/700]:  17%|█▋        | 132/782 [00:00<00:03, 215.81it/s]



Epoch [545/700]:  23%|██▎       | 176/782 [00:00<00:02, 209.16it/s]



Epoch [545/700]:  28%|██▊       | 220/782 [00:01<00:02, 211.59it/s]



Epoch [545/700]:  34%|███▍      | 264/782 [00:01<00:02, 213.59it/s]



Epoch [545/700]:  39%|███▉      | 308/782 [00:01<00:02, 215.76it/s]



Epoch [545/700]:  45%|████▌     | 352/782 [00:01<00:01, 216.34it/s]



Epoch [545/700]:  51%|█████     | 396/782 [00:01<00:01, 211.42it/s]



Epoch [545/700]:  59%|█████▉    | 462/782 [00:02<00:01, 210.58it/s]



Epoch [545/700]:  65%|██████▍   | 506/782 [00:02<00:01, 210.69it/s]



Epoch [545/700]:  70%|███████   | 550/782 [00:02<00:01, 211.10it/s]



Epoch [545/700]:  76%|███████▌  | 594/782 [00:02<00:00, 213.12it/s]



Epoch [545/700]:  82%|████████▏ | 638/782 [00:02<00:00, 213.81it/s]



Epoch [545/700]:  87%|████████▋ | 682/782 [00:03<00:00, 211.72it/s]



Epoch [545/700]:  93%|█████████▎| 726/782 [00:03<00:00, 211.46it/s]



Epoch [545/700]: 100%|██████████| 782/782 [00:03<00:00, 212.04it/s]






Learning Rate: 0.004050
Train Loss: 0.0425, Accuracy: 98.55%, Confidence: 0.9818
Test Loss: 2.4539, Accuracy: 71.52%, Confidence: 0.9441
Train-Test Accuracy Gap: 27.03%


Epoch [546/700]:   3%|▎         | 22/782 [00:00<00:03, 210.90it/s]



Epoch [546/700]:  11%|█▏        | 88/782 [00:00<00:03, 212.29it/s]



Epoch [546/700]:  20%|█▉        | 154/782 [00:00<00:02, 213.54it/s]



Epoch [546/700]:  25%|██▌       | 198/782 [00:00<00:02, 207.99it/s]



Epoch [546/700]:  34%|███▎      | 263/782 [00:01<00:02, 210.89it/s]



Epoch [546/700]:  39%|███▉      | 307/782 [00:01<00:02, 209.76it/s]



Epoch [546/700]:  45%|████▍     | 351/782 [00:01<00:02, 211.77it/s]



Epoch [546/700]:  51%|█████     | 395/782 [00:01<00:01, 211.34it/s]



Epoch [546/700]:  56%|█████▌    | 439/782 [00:02<00:01, 208.85it/s]



Epoch [546/700]:  62%|██████▏   | 483/782 [00:02<00:01, 208.17it/s]



Epoch [546/700]:  67%|██████▋   | 526/782 [00:02<00:01, 209.88it/s]



Epoch [546/700]:  75%|███████▌  | 589/782 [00:02<00:00, 206.38it/s]



Epoch [546/700]:  81%|████████  | 631/782 [00:03<00:00, 206.84it/s]



Epoch [546/700]:  86%|████████▌ | 674/782 [00:03<00:00, 207.63it/s]



Epoch [546/700]:  92%|█████████▏| 716/782 [00:03<00:00, 206.32it/s]



Epoch [546/700]:  97%|█████████▋| 759/782 [00:03<00:00, 205.89it/s]



Epoch [546/700]: 100%|██████████| 782/782 [00:03<00:00, 208.96it/s]


Learning Rate: 0.004050
Train Loss: 0.0468, Accuracy: 98.39%, Confidence: 0.9819
Test Loss: 2.2515, Accuracy: 72.96%, Confidence: 0.9453
Train-Test Accuracy Gap: 25.43%


Epoch [547/700]:   3%|▎         | 22/782 [00:00<00:03, 211.60it/s]



Epoch [547/700]:   6%|▌         | 44/782 [00:00<00:03, 213.21it/s]



Epoch [547/700]:  11%|█▏        | 88/782 [00:00<00:03, 209.50it/s]



Epoch [547/700]:  19%|█▉        | 151/782 [00:00<00:03, 208.63it/s]



Epoch [547/700]:  25%|██▍       | 194/782 [00:00<00:02, 210.36it/s]



Epoch [547/700]:  30%|███       | 238/782 [00:01<00:02, 210.98it/s]



Epoch [547/700]:  36%|███▌      | 282/782 [00:01<00:02, 210.71it/s]



Epoch [547/700]:  42%|████▏     | 326/782 [00:01<00:02, 212.46it/s]



Epoch [547/700]:  50%|█████     | 391/782 [00:01<00:01, 206.52it/s]



Epoch [547/700]:  55%|█████▌    | 434/782 [00:02<00:01, 207.55it/s]



Epoch [547/700]:  61%|██████    | 478/782 [00:02<00:01, 210.67it/s]



Epoch [547/700]:  67%|██████▋   | 521/782 [00:02<00:01, 209.28it/s]



Epoch [547/700]:  72%|███████▏  | 565/782 [00:02<00:01, 211.75it/s]



Epoch [547/700]:  78%|███████▊  | 609/782 [00:02<00:00, 206.67it/s]



Epoch [547/700]:  83%|████████▎ | 651/782 [00:03<00:00, 200.70it/s]



Epoch [547/700]:  89%|████████▊ | 694/782 [00:03<00:00, 203.40it/s]



Epoch [547/700]:  94%|█████████▍| 737/782 [00:03<00:00, 208.27it/s]



Epoch [547/700]: 100%|██████████| 782/782 [00:03<00:00, 208.46it/s]






Learning Rate: 0.004050
Train Loss: 0.0498, Accuracy: 98.30%, Confidence: 0.9806
Test Loss: 2.3564, Accuracy: 71.42%, Confidence: 0.9457
Train-Test Accuracy Gap: 26.88%


Epoch [548/700]:   3%|▎         | 22/782 [00:00<00:03, 212.32it/s]



Epoch [548/700]:   8%|▊         | 66/782 [00:00<00:03, 209.39it/s]



Epoch [548/700]:  17%|█▋        | 131/782 [00:00<00:03, 208.82it/s]



Epoch [548/700]:  22%|██▏       | 174/782 [00:00<00:02, 209.60it/s]



Epoch [548/700]:  28%|██▊       | 218/782 [00:01<00:02, 212.93it/s]



Epoch [548/700]:  34%|███▎      | 262/782 [00:01<00:02, 215.57it/s]



Epoch [548/700]:  39%|███▉      | 306/782 [00:01<00:02, 215.68it/s]



Epoch [548/700]:  48%|████▊     | 372/782 [00:01<00:01, 216.41it/s]



Epoch [548/700]:  53%|█████▎    | 416/782 [00:01<00:01, 214.97it/s]



Epoch [548/700]:  59%|█████▉    | 460/782 [00:02<00:01, 210.73it/s]



Epoch [548/700]:  64%|██████▍   | 503/782 [00:02<00:01, 209.61it/s]



Epoch [548/700]:  70%|██████▉   | 545/782 [00:02<00:01, 207.81it/s]



Epoch [548/700]:  75%|███████▌  | 588/782 [00:02<00:00, 207.04it/s]



Epoch [548/700]:  81%|████████  | 631/782 [00:02<00:00, 206.41it/s]



Epoch [548/700]:  86%|████████▌ | 673/782 [00:03<00:00, 206.31it/s]



Epoch [548/700]:  92%|█████████▏| 716/782 [00:03<00:00, 209.00it/s]



Epoch [548/700]: 100%|██████████| 782/782 [00:03<00:00, 210.20it/s]






Learning Rate: 0.004050
Train Loss: 0.0398, Accuracy: 98.67%, Confidence: 0.9828
Test Loss: 2.3089, Accuracy: 72.20%, Confidence: 0.9446
Train-Test Accuracy Gap: 26.47%


Epoch [549/700]:   3%|▎         | 21/782 [00:00<00:03, 208.37it/s]



Epoch [549/700]:  11%|█         | 87/782 [00:00<00:03, 213.23it/s]



Epoch [549/700]:  17%|█▋        | 130/782 [00:00<00:03, 204.55it/s]



Epoch [549/700]:  22%|██▏       | 173/782 [00:00<00:02, 208.57it/s]



Epoch [549/700]:  28%|██▊       | 217/782 [00:01<00:02, 212.10it/s]



Epoch [549/700]:  33%|███▎      | 261/782 [00:01<00:02, 213.17it/s]



Epoch [549/700]:  39%|███▉      | 305/782 [00:01<00:02, 209.87it/s]



Epoch [549/700]:  48%|████▊     | 372/782 [00:01<00:01, 215.92it/s]



Epoch [549/700]:  53%|█████▎    | 416/782 [00:01<00:01, 211.32it/s]



Epoch [549/700]:  59%|█████▊    | 459/782 [00:02<00:01, 204.90it/s]



Epoch [549/700]:  64%|██████▍   | 502/782 [00:02<00:01, 208.37it/s]



Epoch [549/700]:  70%|██████▉   | 546/782 [00:02<00:01, 212.01it/s]



Epoch [549/700]:  78%|███████▊  | 612/782 [00:02<00:00, 209.74it/s]



Epoch [549/700]:  84%|████████▎ | 654/782 [00:03<00:00, 206.56it/s]



Epoch [549/700]:  89%|████████▉ | 698/782 [00:03<00:00, 208.51it/s]



Epoch [549/700]:  97%|█████████▋| 762/782 [00:03<00:00, 206.60it/s]



Epoch [549/700]: 100%|██████████| 782/782 [00:03<00:00, 209.01it/s]


Learning Rate: 0.004050
Train Loss: 0.0465, Accuracy: 98.47%, Confidence: 0.9815
Test Loss: 2.2048, Accuracy: 72.95%, Confidence: 0.9451
Train-Test Accuracy Gap: 25.52%


Epoch [550/700]:   3%|▎         | 21/782 [00:00<00:03, 201.72it/s]



Epoch [550/700]:   5%|▌         | 42/782 [00:00<00:03, 194.32it/s]



Epoch [550/700]:   8%|▊         | 62/782 [00:00<00:03, 193.97it/s]



Epoch [550/700]:  11%|█         | 84/782 [00:00<00:03, 201.75it/s]



Epoch [550/700]:  13%|█▎        | 105/782 [00:00<00:03, 204.53it/s]



Epoch [550/700]:  16%|█▌        | 126/782 [00:00<00:03, 202.19it/s]



Epoch [550/700]:  21%|██▏       | 168/782 [00:00<00:03, 203.01it/s]



Epoch [550/700]:  27%|██▋       | 212/782 [00:01<00:02, 207.20it/s]



Epoch [550/700]:  33%|███▎      | 256/782 [00:01<00:02, 210.02it/s]



Epoch [550/700]:  36%|███▌      | 278/782 [00:01<00:02, 210.86it/s]



Epoch [550/700]:  38%|███▊      | 300/782 [00:01<00:02, 212.43it/s]



Epoch [550/700]:  41%|████      | 322/782 [00:01<00:02, 213.12it/s]



Epoch [550/700]:  44%|████▍     | 344/782 [00:01<00:02, 211.47it/s]



Epoch [550/700]:  47%|████▋     | 366/782 [00:01<00:02, 207.54it/s]



Epoch [550/700]:  50%|████▉     | 388/782 [00:01<00:01, 209.25it/s]



Epoch [550/700]:  52%|█████▏    | 410/782 [00:01<00:01, 210.52it/s]



Epoch [550/700]:  55%|█████▌    | 432/782 [00:02<00:01, 211.05it/s]



Epoch [550/700]:  58%|█████▊    | 454/782 [00:02<00:01, 212.62it/s]



Epoch [550/700]:  64%|██████▎   | 498/782 [00:02<00:01, 213.40it/s]



Epoch [550/700]:  69%|██████▉   | 543/782 [00:02<00:01, 215.84it/s]



Epoch [550/700]:  75%|███████▌  | 587/782 [00:02<00:00, 211.53it/s]



Epoch [550/700]:  84%|████████▎ | 653/782 [00:03<00:00, 211.00it/s]



Epoch [550/700]:  89%|████████▉ | 697/782 [00:03<00:00, 213.26it/s]



Epoch [550/700]:  95%|█████████▍| 741/782 [00:03<00:00, 213.01it/s]



Epoch [550/700]: 100%|██████████| 782/782 [00:03<00:00, 209.49it/s]


Learning Rate: 0.004050
Train Loss: 0.0418, Accuracy: 98.57%, Confidence: 0.9823
Test Loss: 2.2237, Accuracy: 72.87%, Confidence: 0.9452
Train-Test Accuracy Gap: 25.70%


Epoch [551/700]:   3%|▎         | 21/782 [00:00<00:03, 206.46it/s]



Epoch [551/700]:   5%|▌         | 43/782 [00:00<00:03, 211.06it/s]



Epoch [551/700]:   8%|▊         | 65/782 [00:00<00:03, 209.83it/s]



Epoch [551/700]:  11%|█         | 87/782 [00:00<00:03, 211.54it/s]



Epoch [551/700]:  14%|█▍        | 109/782 [00:00<00:03, 210.64it/s]



Epoch [551/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.04it/s]



Epoch [551/700]:  20%|█▉        | 153/782 [00:00<00:03, 204.67it/s]



Epoch [551/700]:  22%|██▏       | 174/782 [00:00<00:02, 202.84it/s]



Epoch [551/700]:  25%|██▌       | 196/782 [00:00<00:02, 206.10it/s]



Epoch [551/700]:  28%|██▊       | 218/782 [00:01<00:02, 208.74it/s]



Epoch [551/700]:  34%|███▎      | 262/782 [00:01<00:02, 213.13it/s]



Epoch [551/700]:  39%|███▉      | 306/782 [00:01<00:02, 214.38it/s]



Epoch [551/700]:  45%|████▍     | 350/782 [00:01<00:02, 212.37it/s]



Epoch [551/700]:  53%|█████▎    | 415/782 [00:01<00:01, 209.98it/s]



Epoch [551/700]:  59%|█████▊    | 459/782 [00:02<00:01, 212.18it/s]



Epoch [551/700]:  64%|██████▍   | 503/782 [00:02<00:01, 212.10it/s]



Epoch [551/700]:  70%|██████▉   | 547/782 [00:02<00:01, 209.58it/s]



Epoch [551/700]:  76%|███████▌  | 591/782 [00:02<00:00, 206.70it/s]



Epoch [551/700]:  84%|████████▍ | 658/782 [00:03<00:00, 214.27it/s]



Epoch [551/700]:  90%|████████▉ | 702/782 [00:03<00:00, 214.98it/s]



Epoch [551/700]:  95%|█████████▌| 746/782 [00:03<00:00, 212.99it/s]



Epoch [551/700]: 100%|██████████| 782/782 [00:03<00:00, 210.18it/s]


Learning Rate: 0.004050
Train Loss: 0.0456, Accuracy: 98.46%, Confidence: 0.9817
Test Loss: 2.4926, Accuracy: 71.18%, Confidence: 0.9469
Train-Test Accuracy Gap: 27.28%


Epoch [552/700]:   3%|▎         | 22/782 [00:00<00:03, 211.19it/s]



Epoch [552/700]:  11%|█         | 86/782 [00:00<00:03, 206.45it/s]



Epoch [552/700]:  19%|█▉        | 150/782 [00:00<00:03, 210.05it/s]



Epoch [552/700]:  25%|██▍       | 194/782 [00:00<00:02, 207.66it/s]



Epoch [552/700]:  30%|███       | 238/782 [00:01<00:02, 212.06it/s]



Epoch [552/700]:  36%|███▌      | 282/782 [00:01<00:02, 206.42it/s]



Epoch [552/700]:  42%|████▏     | 326/782 [00:01<00:02, 210.23it/s]



Epoch [552/700]:  47%|████▋     | 370/782 [00:01<00:01, 211.81it/s]



Epoch [552/700]:  53%|█████▎    | 414/782 [00:01<00:01, 212.08it/s]



Epoch [552/700]:  59%|█████▊    | 458/782 [00:02<00:01, 210.36it/s]



Epoch [552/700]:  64%|██████▍   | 502/782 [00:02<00:01, 210.02it/s]



Epoch [552/700]:  70%|██████▉   | 546/782 [00:02<00:01, 211.41it/s]



Epoch [552/700]:  78%|███████▊  | 611/782 [00:02<00:00, 207.48it/s]



Epoch [552/700]:  84%|████████▍ | 655/782 [00:03<00:00, 210.05it/s]



Epoch [552/700]:  89%|████████▉ | 699/782 [00:03<00:00, 209.30it/s]



Epoch [552/700]:  95%|█████████▍| 742/782 [00:03<00:00, 209.00it/s]



Epoch [552/700]: 100%|██████████| 782/782 [00:03<00:00, 209.10it/s]


Learning Rate: 0.004050
Train Loss: 0.0417, Accuracy: 98.59%, Confidence: 0.9826
Test Loss: 2.2437, Accuracy: 72.75%, Confidence: 0.9464
Train-Test Accuracy Gap: 25.84%


Epoch [553/700]:   3%|▎         | 21/782 [00:00<00:03, 207.80it/s]



Epoch [553/700]:   8%|▊         | 65/782 [00:00<00:03, 201.22it/s]



Epoch [553/700]:  14%|█▍        | 108/782 [00:00<00:03, 206.12it/s]



Epoch [553/700]:  22%|██▏       | 170/782 [00:00<00:03, 196.48it/s]



Epoch [553/700]:  27%|██▋       | 213/782 [00:01<00:02, 203.18it/s]



Epoch [553/700]:  33%|███▎      | 255/782 [00:01<00:02, 205.56it/s]



Epoch [553/700]:  38%|███▊      | 298/782 [00:01<00:02, 206.35it/s]



Epoch [553/700]:  44%|████▎     | 342/782 [00:01<00:02, 211.06it/s]



Epoch [553/700]:  49%|████▉     | 386/782 [00:01<00:01, 212.91it/s]



Epoch [553/700]:  55%|█████▍    | 430/782 [00:02<00:01, 210.27it/s]



Epoch [553/700]:  61%|██████    | 474/782 [00:02<00:01, 210.56it/s]



Epoch [553/700]:  66%|██████▌   | 518/782 [00:02<00:01, 204.67it/s]



Epoch [553/700]:  72%|███████▏  | 561/782 [00:02<00:01, 207.12it/s]



Epoch [553/700]:  80%|████████  | 626/782 [00:03<00:00, 211.21it/s]



Epoch [553/700]:  88%|████████▊ | 690/782 [00:03<00:00, 206.82it/s]



Epoch [553/700]:  94%|█████████▎| 732/782 [00:03<00:00, 206.40it/s]



Epoch [553/700]:  99%|█████████▉| 775/782 [00:03<00:00, 208.11it/s]



Epoch [553/700]: 100%|██████████| 782/782 [00:03<00:00, 206.71it/s]


Learning Rate: 0.004050
Train Loss: 0.0436, Accuracy: 98.58%, Confidence: 0.9822
Test Loss: 2.6671, Accuracy: 69.55%, Confidence: 0.9457
Train-Test Accuracy Gap: 29.03%


Epoch [554/700]:   3%|▎         | 21/782 [00:00<00:03, 206.68it/s]



Epoch [554/700]:  11%|█         | 85/782 [00:00<00:03, 210.99it/s]



Epoch [554/700]:  16%|█▋        | 129/782 [00:00<00:03, 215.29it/s]



Epoch [554/700]:  25%|██▍       | 195/782 [00:00<00:02, 213.75it/s]



Epoch [554/700]:  31%|███       | 239/782 [00:01<00:02, 207.98it/s]



Epoch [554/700]:  36%|███▌      | 283/782 [00:01<00:02, 210.62it/s]



Epoch [554/700]:  42%|████▏     | 327/782 [00:01<00:02, 209.35it/s]



Epoch [554/700]:  47%|████▋     | 371/782 [00:01<00:01, 211.40it/s]



Epoch [554/700]:  53%|█████▎    | 414/782 [00:01<00:01, 205.37it/s]



Epoch [554/700]:  58%|█████▊    | 457/782 [00:02<00:01, 204.69it/s]



Epoch [554/700]:  64%|██████▍   | 499/782 [00:02<00:01, 202.68it/s]



Epoch [554/700]:  69%|██████▉   | 543/782 [00:02<00:01, 207.80it/s]



Epoch [554/700]:  75%|███████▍  | 586/782 [00:02<00:00, 207.81it/s]



Epoch [554/700]:  80%|████████  | 629/782 [00:03<00:00, 208.97it/s]



Epoch [554/700]:  86%|████████▌ | 672/782 [00:03<00:00, 210.93it/s]



Epoch [554/700]:  92%|█████████▏| 716/782 [00:03<00:00, 212.00it/s]



Epoch [554/700]:  97%|█████████▋| 760/782 [00:03<00:00, 213.44it/s]



Epoch [554/700]: 100%|██████████| 782/782 [00:03<00:00, 209.51it/s]


Learning Rate: 0.004050
Train Loss: 0.0449, Accuracy: 98.51%, Confidence: 0.9821
Test Loss: 2.3667, Accuracy: 72.43%, Confidence: 0.9482
Train-Test Accuracy Gap: 26.08%


Epoch [555/700]:   3%|▎         | 21/782 [00:00<00:03, 203.53it/s]



Epoch [555/700]:  11%|█         | 84/782 [00:00<00:03, 203.86it/s]



Epoch [555/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.77it/s]



Epoch [555/700]:  22%|██▏       | 170/782 [00:00<00:02, 208.10it/s]



Epoch [555/700]:  27%|██▋       | 214/782 [00:01<00:02, 210.90it/s]



Epoch [555/700]:  33%|███▎      | 258/782 [00:01<00:02, 212.95it/s]



Epoch [555/700]:  41%|████▏     | 324/782 [00:01<00:02, 214.70it/s]



Epoch [555/700]:  47%|████▋     | 368/782 [00:01<00:01, 212.40it/s]



Epoch [555/700]:  53%|█████▎    | 412/782 [00:01<00:01, 212.05it/s]



Epoch [555/700]:  58%|█████▊    | 456/782 [00:02<00:01, 210.17it/s]



Epoch [555/700]:  64%|██████▍   | 500/782 [00:02<00:01, 211.70it/s]



Epoch [555/700]:  70%|██████▉   | 544/782 [00:02<00:01, 213.29it/s]



Epoch [555/700]:  75%|███████▌  | 588/782 [00:02<00:00, 215.58it/s]



Epoch [555/700]:  81%|████████  | 632/782 [00:03<00:00, 208.00it/s]



Epoch [555/700]:  86%|████████▌ | 674/782 [00:03<00:00, 204.93it/s]



Epoch [555/700]:  92%|█████████▏| 717/782 [00:03<00:00, 207.32it/s]



Epoch [555/700]: 100%|██████████| 782/782 [00:03<00:00, 209.05it/s]






Learning Rate: 0.004050
Train Loss: 0.0446, Accuracy: 98.47%, Confidence: 0.9821
Test Loss: 2.1949, Accuracy: 73.18%, Confidence: 0.9454
Train-Test Accuracy Gap: 25.29%


Epoch [556/700]:   3%|▎         | 20/782 [00:00<00:03, 199.91it/s]



Epoch [556/700]:   5%|▌         | 42/782 [00:00<00:03, 205.80it/s]



Epoch [556/700]:  11%|█         | 85/782 [00:00<00:03, 207.35it/s]



Epoch [556/700]:  16%|█▌        | 127/782 [00:00<00:03, 203.73it/s]



Epoch [556/700]:  22%|██▏       | 169/782 [00:00<00:03, 200.76it/s]



Epoch [556/700]:  27%|██▋       | 212/782 [00:01<00:02, 198.88it/s]



Epoch [556/700]:  33%|███▎      | 255/782 [00:01<00:02, 203.51it/s]



Epoch [556/700]:  38%|███▊      | 299/782 [00:01<00:02, 207.14it/s]



Epoch [556/700]:  44%|████▎     | 342/782 [00:01<00:02, 208.11it/s]



Epoch [556/700]:  49%|████▉     | 384/782 [00:01<00:01, 207.90it/s]



Epoch [556/700]:  55%|█████▍    | 427/782 [00:02<00:01, 206.42it/s]



Epoch [556/700]:  60%|██████    | 471/782 [00:02<00:01, 209.58it/s]



Epoch [556/700]:  66%|██████▌   | 514/782 [00:02<00:01, 210.50it/s]



Epoch [556/700]:  74%|███████▍  | 580/782 [00:02<00:00, 210.04it/s]



Epoch [556/700]:  80%|███████▉  | 624/782 [00:03<00:00, 211.42it/s]



Epoch [556/700]:  85%|████████▌ | 668/782 [00:03<00:00, 210.29it/s]



Epoch [556/700]:  91%|█████████ | 712/782 [00:03<00:00, 209.45it/s]



Epoch [556/700]:  97%|█████████▋| 755/782 [00:03<00:00, 210.02it/s]



Epoch [556/700]: 100%|██████████| 782/782 [00:03<00:00, 206.51it/s]


Learning Rate: 0.004050
Train Loss: 0.0503, Accuracy: 98.25%, Confidence: 0.9813
Test Loss: 2.4486, Accuracy: 70.83%, Confidence: 0.9435
Train-Test Accuracy Gap: 27.42%


Epoch [557/700]:   3%|▎         | 21/782 [00:00<00:03, 204.51it/s]



Epoch [557/700]:   5%|▌         | 42/782 [00:00<00:03, 200.11it/s]



Epoch [557/700]:   8%|▊         | 64/782 [00:00<00:03, 207.39it/s]



Epoch [557/700]:  11%|█         | 86/782 [00:00<00:03, 209.72it/s]



Epoch [557/700]:  16%|█▋        | 129/782 [00:00<00:03, 210.90it/s]



Epoch [557/700]:  22%|██▏       | 173/782 [00:00<00:02, 212.05it/s]



Epoch [557/700]:  28%|██▊       | 217/782 [00:01<00:02, 210.37it/s]



Epoch [557/700]:  36%|███▌      | 283/782 [00:01<00:02, 213.40it/s]



Epoch [557/700]:  42%|████▏     | 327/782 [00:01<00:02, 208.76it/s]



Epoch [557/700]:  47%|████▋     | 369/782 [00:01<00:02, 205.26it/s]



Epoch [557/700]:  53%|█████▎    | 411/782 [00:01<00:01, 204.15it/s]



Epoch [557/700]:  58%|█████▊    | 454/782 [00:02<00:01, 208.50it/s]



Epoch [557/700]:  64%|██████▎   | 498/782 [00:02<00:01, 211.53it/s]



Epoch [557/700]:  69%|██████▉   | 542/782 [00:02<00:01, 206.93it/s]



Epoch [557/700]:  75%|███████▍  | 586/782 [00:02<00:00, 211.37it/s]



Epoch [557/700]:  83%|████████▎ | 652/782 [00:03<00:00, 212.79it/s]



Epoch [557/700]:  89%|████████▉ | 696/782 [00:03<00:00, 212.09it/s]



Epoch [557/700]:  95%|█████████▍| 740/782 [00:03<00:00, 209.64it/s]



Epoch [557/700]: 100%|██████████| 782/782 [00:03<00:00, 208.87it/s]






Learning Rate: 0.004050
Train Loss: 0.0496, Accuracy: 98.35%, Confidence: 0.9813
Test Loss: 2.8070, Accuracy: 69.25%, Confidence: 0.9444
Train-Test Accuracy Gap: 29.10%


Epoch [558/700]:   3%|▎         | 21/782 [00:00<00:03, 206.98it/s]



Epoch [558/700]:  11%|█         | 85/782 [00:00<00:03, 209.64it/s]



Epoch [558/700]:  16%|█▋        | 129/782 [00:00<00:03, 210.83it/s]



Epoch [558/700]:  22%|██▏       | 173/782 [00:00<00:02, 208.68it/s]



Epoch [558/700]:  28%|██▊       | 217/782 [00:01<00:02, 212.28it/s]



Epoch [558/700]:  36%|███▌      | 283/782 [00:01<00:02, 212.60it/s]



Epoch [558/700]:  42%|████▏     | 327/782 [00:01<00:02, 212.70it/s]



Epoch [558/700]:  47%|████▋     | 371/782 [00:01<00:01, 211.30it/s]



Epoch [558/700]:  53%|█████▎    | 415/782 [00:01<00:01, 213.19it/s]



Epoch [558/700]:  62%|██████▏   | 481/782 [00:02<00:01, 211.94it/s]



Epoch [558/700]:  67%|██████▋   | 525/782 [00:02<00:01, 208.80it/s]



Epoch [558/700]:  73%|███████▎  | 568/782 [00:02<00:01, 207.99it/s]



Epoch [558/700]:  78%|███████▊  | 610/782 [00:02<00:00, 205.07it/s]



Epoch [558/700]:  83%|████████▎ | 652/782 [00:03<00:00, 206.00it/s]



Epoch [558/700]:  89%|████████▊ | 694/782 [00:03<00:00, 205.46it/s]



Epoch [558/700]:  94%|█████████▍| 736/782 [00:03<00:00, 198.35it/s]



Epoch [558/700]: 100%|██████████| 782/782 [00:03<00:00, 207.34it/s]






Learning Rate: 0.004050
Train Loss: 0.0435, Accuracy: 98.54%, Confidence: 0.9828
Test Loss: 2.2289, Accuracy: 73.15%, Confidence: 0.9465
Train-Test Accuracy Gap: 25.39%


Epoch [559/700]:   2%|▏         | 19/782 [00:00<00:04, 189.85it/s]



Epoch [559/700]:   5%|▌         | 40/782 [00:00<00:03, 197.13it/s]



Epoch [559/700]:   8%|▊         | 61/782 [00:00<00:03, 199.14it/s]



Epoch [559/700]:  16%|█▌        | 124/782 [00:00<00:03, 199.29it/s]



Epoch [559/700]:  21%|██        | 166/782 [00:00<00:03, 200.14it/s]



Epoch [559/700]:  27%|██▋       | 208/782 [00:01<00:02, 201.60it/s]



Epoch [559/700]:  35%|███▍      | 272/782 [00:01<00:02, 206.88it/s]



Epoch [559/700]:  40%|████      | 314/782 [00:01<00:02, 202.44it/s]



Epoch [559/700]:  46%|████▌     | 358/782 [00:01<00:02, 206.98it/s]



Epoch [559/700]:  51%|█████▏    | 401/782 [00:01<00:01, 209.78it/s]



Epoch [559/700]:  57%|█████▋    | 443/782 [00:02<00:01, 208.59it/s]



Epoch [559/700]:  62%|██████▏   | 487/782 [00:02<00:01, 210.40it/s]



Epoch [559/700]:  71%|███████   | 553/782 [00:02<00:01, 211.11it/s]



Epoch [559/700]:  76%|███████▋  | 597/782 [00:02<00:00, 212.41it/s]



Epoch [559/700]:  82%|████████▏ | 641/782 [00:03<00:00, 209.87it/s]



Epoch [559/700]:  87%|████████▋ | 684/782 [00:03<00:00, 211.12it/s]



Epoch [559/700]:  93%|█████████▎| 728/782 [00:03<00:00, 213.78it/s]



Epoch [559/700]: 100%|██████████| 782/782 [00:03<00:00, 207.58it/s]






Learning Rate: 0.004050
Train Loss: 0.0409, Accuracy: 98.55%, Confidence: 0.9823
Test Loss: 2.1864, Accuracy: 73.40%, Confidence: 0.9463
Train-Test Accuracy Gap: 25.15%


Epoch [560/700]:   3%|▎         | 21/782 [00:00<00:03, 209.31it/s]



Epoch [560/700]:   5%|▌         | 42/782 [00:00<00:03, 209.32it/s]



Epoch [560/700]:  11%|█         | 86/782 [00:00<00:03, 212.56it/s]



Epoch [560/700]:  17%|█▋        | 130/782 [00:00<00:03, 213.49it/s]



Epoch [560/700]:  25%|██▌       | 196/782 [00:00<00:02, 214.63it/s]



Epoch [560/700]:  31%|███       | 240/782 [00:01<00:02, 209.28it/s]



Epoch [560/700]:  36%|███▋      | 284/782 [00:01<00:02, 209.43it/s]



Epoch [560/700]:  42%|████▏     | 328/782 [00:01<00:02, 210.98it/s]



Epoch [560/700]:  48%|████▊     | 372/782 [00:01<00:01, 211.43it/s]



Epoch [560/700]:  53%|█████▎    | 416/782 [00:01<00:01, 211.84it/s]



Epoch [560/700]:  59%|█████▉    | 460/782 [00:02<00:01, 210.62it/s]



Epoch [560/700]:  64%|██████▍   | 504/782 [00:02<00:01, 211.69it/s]



Epoch [560/700]:  70%|███████   | 548/782 [00:02<00:01, 211.20it/s]



Epoch [560/700]:  76%|███████▌  | 592/782 [00:02<00:00, 212.49it/s]



Epoch [560/700]:  84%|████████▍ | 658/782 [00:03<00:00, 216.31it/s]



Epoch [560/700]:  90%|████████▉ | 702/782 [00:03<00:00, 203.89it/s]



Epoch [560/700]:  95%|█████████▌| 746/782 [00:03<00:00, 209.88it/s]



Epoch [560/700]: 100%|██████████| 782/782 [00:03<00:00, 211.13it/s]


Learning Rate: 0.004050
Train Loss: 0.0441, Accuracy: 98.54%, Confidence: 0.9824
Test Loss: 2.3033, Accuracy: 71.95%, Confidence: 0.9444
Train-Test Accuracy Gap: 26.59%


Epoch [561/700]:   3%|▎         | 21/782 [00:00<00:03, 202.91it/s]



Epoch [561/700]:  11%|█▏        | 88/782 [00:00<00:03, 216.12it/s]



Epoch [561/700]:  17%|█▋        | 132/782 [00:00<00:03, 204.58it/s]



Epoch [561/700]:  25%|██▌       | 196/782 [00:00<00:02, 207.78it/s]



Epoch [561/700]:  31%|███       | 239/782 [00:01<00:02, 211.17it/s]



Epoch [561/700]:  36%|███▌      | 282/782 [00:01<00:02, 209.16it/s]



Epoch [561/700]:  42%|████▏     | 326/782 [00:01<00:02, 208.96it/s]



Epoch [561/700]:  47%|████▋     | 368/782 [00:01<00:02, 202.50it/s]



Epoch [561/700]:  53%|█████▎    | 411/782 [00:01<00:01, 204.66it/s]



Epoch [561/700]:  58%|█████▊    | 455/782 [00:02<00:01, 210.17it/s]



Epoch [561/700]:  64%|██████▍   | 499/782 [00:02<00:01, 213.20it/s]



Epoch [561/700]:  69%|██████▉   | 543/782 [00:02<00:01, 211.84it/s]



Epoch [561/700]:  75%|███████▌  | 587/782 [00:02<00:00, 210.10it/s]



Epoch [561/700]:  81%|████████  | 630/782 [00:03<00:00, 207.49it/s]



Epoch [561/700]:  86%|████████▌ | 673/782 [00:03<00:00, 207.75it/s]



Epoch [561/700]:  94%|█████████▍| 737/782 [00:03<00:00, 208.65it/s]



Epoch [561/700]: 100%|██████████| 782/782 [00:03<00:00, 207.81it/s]






Learning Rate: 0.004050
Train Loss: 0.0486, Accuracy: 98.36%, Confidence: 0.9817
Test Loss: 2.1271, Accuracy: 73.84%, Confidence: 0.9472
Train-Test Accuracy Gap: 24.52%


Epoch [562/700]:   3%|▎         | 21/782 [00:00<00:03, 206.90it/s]



Epoch [562/700]:   5%|▌         | 43/782 [00:00<00:03, 211.88it/s]



Epoch [562/700]:   8%|▊         | 65/782 [00:00<00:03, 207.31it/s]



Epoch [562/700]:  11%|█         | 86/782 [00:00<00:03, 196.80it/s]



Epoch [562/700]:  16%|█▋        | 128/782 [00:00<00:03, 199.18it/s]



Epoch [562/700]:  21%|██▏       | 168/782 [00:00<00:03, 197.36it/s]



Epoch [562/700]:  27%|██▋       | 211/782 [00:01<00:02, 203.47it/s]



Epoch [562/700]:  33%|███▎      | 255/782 [00:01<00:02, 208.64it/s]



Epoch [562/700]:  38%|███▊      | 299/782 [00:01<00:02, 210.75it/s]



Epoch [562/700]:  44%|████▍     | 343/782 [00:01<00:02, 209.67it/s]



Epoch [562/700]:  52%|█████▏    | 409/782 [00:01<00:01, 213.49it/s]



Epoch [562/700]:  58%|█████▊    | 453/782 [00:02<00:01, 213.93it/s]



Epoch [562/700]:  64%|██████▎   | 497/782 [00:02<00:01, 213.19it/s]



Epoch [562/700]:  69%|██████▉   | 541/782 [00:02<00:01, 210.94it/s]



Epoch [562/700]:  75%|███████▍  | 585/782 [00:02<00:00, 208.16it/s]



Epoch [562/700]:  83%|████████▎ | 651/782 [00:03<00:00, 212.00it/s]



Epoch [562/700]:  89%|████████▉ | 695/782 [00:03<00:00, 209.11it/s]



Epoch [562/700]:  94%|█████████▍| 738/782 [00:03<00:00, 209.06it/s]



Epoch [562/700]: 100%|██████████| 782/782 [00:03<00:00, 208.24it/s]






Learning Rate: 0.004050
Train Loss: 0.0435, Accuracy: 98.52%, Confidence: 0.9823
Test Loss: 2.1503, Accuracy: 73.06%, Confidence: 0.9457
Train-Test Accuracy Gap: 25.46%


Epoch [563/700]:   3%|▎         | 22/782 [00:00<00:03, 212.98it/s]



Epoch [563/700]:  11%|█▏        | 88/782 [00:00<00:03, 210.51it/s]



Epoch [563/700]:  17%|█▋        | 132/782 [00:00<00:03, 211.29it/s]



Epoch [563/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.41it/s]



Epoch [563/700]:  28%|██▊       | 221/782 [00:01<00:02, 215.42it/s]



Epoch [563/700]:  34%|███▍      | 265/782 [00:01<00:02, 212.86it/s]



Epoch [563/700]:  40%|███▉      | 309/782 [00:01<00:02, 208.85it/s]



Epoch [563/700]:  45%|████▍     | 351/782 [00:01<00:02, 208.53it/s]



Epoch [563/700]:  50%|█████     | 394/782 [00:01<00:01, 209.57it/s]



Epoch [563/700]:  56%|█████▌    | 438/782 [00:02<00:01, 211.94it/s]



Epoch [563/700]:  62%|██████▏   | 481/782 [00:02<00:01, 209.79it/s]



Epoch [563/700]:  67%|██████▋   | 523/782 [00:02<00:01, 203.61it/s]



Epoch [563/700]:  73%|███████▎  | 567/782 [00:02<00:01, 208.66it/s]



Epoch [563/700]:  78%|███████▊  | 611/782 [00:02<00:00, 211.32it/s]



Epoch [563/700]:  84%|████████▍ | 655/782 [00:03<00:00, 209.83it/s]



Epoch [563/700]:  89%|████████▉ | 698/782 [00:03<00:00, 209.84it/s]



Epoch [563/700]:  97%|█████████▋| 761/782 [00:03<00:00, 205.01it/s]



Epoch [563/700]: 100%|██████████| 782/782 [00:03<00:00, 209.25it/s]


Learning Rate: 0.004050
Train Loss: 0.0466, Accuracy: 98.39%, Confidence: 0.9819
Test Loss: 2.2462, Accuracy: 72.98%, Confidence: 0.9471
Train-Test Accuracy Gap: 25.41%


Epoch [564/700]:   3%|▎         | 22/782 [00:00<00:03, 212.53it/s]



Epoch [564/700]:   6%|▌         | 44/782 [00:00<00:03, 211.13it/s]



Epoch [564/700]:   8%|▊         | 66/782 [00:00<00:03, 209.79it/s]



Epoch [564/700]:  11%|█         | 87/782 [00:00<00:03, 207.59it/s]



Epoch [564/700]:  14%|█▍        | 108/782 [00:00<00:03, 205.62it/s]



Epoch [564/700]:  16%|█▋        | 129/782 [00:00<00:03, 206.01it/s]



Epoch [564/700]:  22%|██▏       | 173/782 [00:00<00:02, 209.91it/s]



Epoch [564/700]:  28%|██▊       | 217/782 [00:01<00:02, 214.12it/s]



Epoch [564/700]:  31%|███       | 239/782 [00:01<00:02, 212.18it/s]



Epoch [564/700]:  33%|███▎      | 261/782 [00:01<00:02, 208.69it/s]



Epoch [564/700]:  36%|███▌      | 282/782 [00:01<00:02, 208.64it/s]



Epoch [564/700]:  39%|███▉      | 304/782 [00:01<00:02, 209.64it/s]



Epoch [564/700]:  42%|████▏     | 325/782 [00:01<00:02, 209.45it/s]



Epoch [564/700]:  47%|████▋     | 369/782 [00:01<00:01, 210.85it/s]



Epoch [564/700]:  53%|█████▎    | 413/782 [00:01<00:01, 214.01it/s]



Epoch [564/700]:  58%|█████▊    | 457/782 [00:02<00:01, 209.56it/s]



Epoch [564/700]:  67%|██████▋   | 523/782 [00:02<00:01, 213.50it/s]



Epoch [564/700]:  73%|███████▎  | 567/782 [00:02<00:01, 212.31it/s]



Epoch [564/700]:  78%|███████▊  | 611/782 [00:02<00:00, 214.01it/s]



Epoch [564/700]:  84%|████████▍ | 655/782 [00:03<00:00, 206.84it/s]



Epoch [564/700]:  89%|████████▉ | 698/782 [00:03<00:00, 207.07it/s]



Epoch [564/700]:  95%|█████████▍| 740/782 [00:03<00:00, 202.13it/s]



Epoch [564/700]: 100%|██████████| 782/782 [00:03<00:00, 208.89it/s]


Learning Rate: 0.004050
Train Loss: 0.0389, Accuracy: 98.68%, Confidence: 0.9831
Test Loss: 2.3407, Accuracy: 73.19%, Confidence: 0.9491
Train-Test Accuracy Gap: 25.49%


Epoch [565/700]:   3%|▎         | 21/782 [00:00<00:03, 201.98it/s]



Epoch [565/700]:  11%|█         | 87/782 [00:00<00:03, 212.29it/s]



Epoch [565/700]:  20%|█▉        | 153/782 [00:00<00:03, 207.02it/s]



Epoch [565/700]:  25%|██▍       | 195/782 [00:00<00:02, 203.73it/s]



Epoch [565/700]:  30%|███       | 237/782 [00:01<00:02, 204.50it/s]



Epoch [565/700]:  36%|███▌      | 279/782 [00:01<00:02, 206.64it/s]



Epoch [565/700]:  41%|████      | 322/782 [00:01<00:02, 206.99it/s]



Epoch [565/700]:  47%|████▋     | 365/782 [00:01<00:02, 208.24it/s]



Epoch [565/700]:  52%|█████▏    | 409/782 [00:01<00:01, 211.03it/s]



Epoch [565/700]:  58%|█████▊    | 452/782 [00:02<00:01, 207.17it/s]



Epoch [565/700]:  63%|██████▎   | 495/782 [00:02<00:01, 207.01it/s]



Epoch [565/700]:  69%|██████▉   | 538/782 [00:02<00:01, 207.18it/s]



Epoch [565/700]:  74%|███████▍  | 582/782 [00:02<00:00, 209.76it/s]



Epoch [565/700]:  80%|████████  | 626/782 [00:03<00:00, 209.42it/s]



Epoch [565/700]:  86%|████████▌ | 670/782 [00:03<00:00, 211.96it/s]



Epoch [565/700]:  97%|█████████▋| 758/782 [00:03<00:00, 211.67it/s]



Epoch [565/700]: 100%|██████████| 782/782 [00:03<00:00, 208.50it/s]


Learning Rate: 0.004050
Train Loss: 0.0416, Accuracy: 98.61%, Confidence: 0.9829
Test Loss: 2.1295, Accuracy: 74.03%, Confidence: 0.9482
Train-Test Accuracy Gap: 24.58%


Epoch [566/700]:   3%|▎         | 20/782 [00:00<00:03, 191.99it/s]



Epoch [566/700]:  11%|█         | 84/782 [00:00<00:03, 206.49it/s]



Epoch [566/700]:  16%|█▌        | 127/782 [00:00<00:03, 209.37it/s]



Epoch [566/700]:  22%|██▏       | 171/782 [00:00<00:02, 213.91it/s]



Epoch [566/700]:  27%|██▋       | 215/782 [00:01<00:02, 211.41it/s]



Epoch [566/700]:  33%|███▎      | 258/782 [00:01<00:02, 206.79it/s]



Epoch [566/700]:  38%|███▊      | 301/782 [00:01<00:02, 206.19it/s]



Epoch [566/700]:  44%|████▍     | 344/782 [00:01<00:02, 208.29it/s]



Epoch [566/700]:  52%|█████▏    | 410/782 [00:01<00:01, 212.72it/s]



Epoch [566/700]:  58%|█████▊    | 454/782 [00:02<00:01, 211.56it/s]



Epoch [566/700]:  61%|██████    | 476/782 [00:02<00:01, 200.90it/s]



Epoch [566/700]:  66%|██████▋   | 519/782 [00:02<00:01, 205.41it/s]



Epoch [566/700]:  75%|███████▍  | 584/782 [00:02<00:00, 210.33it/s]



Epoch [566/700]:  80%|████████  | 628/782 [00:03<00:00, 210.89it/s]



Epoch [566/700]:  86%|████████▌ | 672/782 [00:03<00:00, 211.59it/s]



Epoch [566/700]:  92%|█████████▏| 716/782 [00:03<00:00, 208.68it/s]



Epoch [566/700]:  97%|█████████▋| 758/782 [00:03<00:00, 207.17it/s]



Epoch [566/700]: 100%|██████████| 782/782 [00:03<00:00, 208.06it/s]


Learning Rate: 0.004050
Train Loss: 0.0458, Accuracy: 98.46%, Confidence: 0.9821
Test Loss: 2.1329, Accuracy: 73.71%, Confidence: 0.9482
Train-Test Accuracy Gap: 24.75%


Epoch [567/700]:   3%|▎         | 21/782 [00:00<00:03, 209.66it/s]



Epoch [567/700]:   5%|▌         | 42/782 [00:00<00:03, 205.31it/s]



Epoch [567/700]:   8%|▊         | 64/782 [00:00<00:03, 208.24it/s]



Epoch [567/700]:  11%|█         | 85/782 [00:00<00:03, 207.23it/s]



Epoch [567/700]:  14%|█▎        | 106/782 [00:00<00:03, 207.41it/s]



Epoch [567/700]:  16%|█▋        | 128/782 [00:00<00:03, 208.57it/s]



Epoch [567/700]:  19%|█▉        | 150/782 [00:00<00:03, 209.46it/s]



Epoch [567/700]:  22%|██▏       | 172/782 [00:00<00:02, 210.47it/s]



Epoch [567/700]:  25%|██▍       | 194/782 [00:00<00:02, 209.08it/s]



Epoch [567/700]:  27%|██▋       | 215/782 [00:01<00:02, 207.58it/s]



Epoch [567/700]:  30%|███       | 237/782 [00:01<00:02, 209.77it/s]



Epoch [567/700]:  36%|███▌      | 281/782 [00:01<00:02, 209.88it/s]



Epoch [567/700]:  41%|████▏     | 323/782 [00:01<00:02, 202.52it/s]



Epoch [567/700]:  47%|████▋     | 366/782 [00:01<00:02, 205.21it/s]



Epoch [567/700]:  52%|█████▏    | 409/782 [00:01<00:01, 208.21it/s]



Epoch [567/700]:  55%|█████▌    | 431/782 [00:02<00:01, 210.24it/s]



Epoch [567/700]:  58%|█████▊    | 453/782 [00:02<00:01, 211.49it/s]



Epoch [567/700]:  61%|██████    | 475/782 [00:02<00:01, 211.96it/s]



Epoch [567/700]:  64%|██████▎   | 497/782 [00:02<00:01, 210.68it/s]



Epoch [567/700]:  72%|███████▏  | 563/782 [00:02<00:01, 213.32it/s]



Epoch [567/700]:  80%|████████  | 629/782 [00:02<00:00, 213.47it/s]



Epoch [567/700]:  86%|████████▌ | 673/782 [00:03<00:00, 211.56it/s]



Epoch [567/700]:  92%|█████████▏| 717/782 [00:03<00:00, 211.35it/s]



Epoch [567/700]:  97%|█████████▋| 761/782 [00:03<00:00, 210.98it/s]



Epoch [567/700]: 100%|██████████| 782/782 [00:03<00:00, 209.60it/s]


Learning Rate: 0.004050
Train Loss: 0.0408, Accuracy: 98.55%, Confidence: 0.9827
Test Loss: 2.4739, Accuracy: 71.34%, Confidence: 0.9466
Train-Test Accuracy Gap: 27.21%


Epoch [568/700]:   3%|▎         | 21/782 [00:00<00:03, 205.61it/s]



Epoch [568/700]:   5%|▌         | 42/782 [00:00<00:03, 201.28it/s]



Epoch [568/700]:   8%|▊         | 63/782 [00:00<00:03, 204.86it/s]



Epoch [568/700]:  13%|█▎        | 105/782 [00:00<00:03, 204.04it/s]



Epoch [568/700]:  16%|█▌        | 126/782 [00:00<00:03, 197.80it/s]



Epoch [568/700]:  19%|█▉        | 147/782 [00:00<00:03, 201.00it/s]



Epoch [568/700]:  24%|██▍       | 189/782 [00:00<00:02, 199.70it/s]



Epoch [568/700]:  27%|██▋       | 210/782 [00:01<00:02, 200.41it/s]



Epoch [568/700]:  30%|██▉       | 231/782 [00:01<00:02, 201.78it/s]



Epoch [568/700]:  32%|███▏      | 252/782 [00:01<00:02, 204.19it/s]



Epoch [568/700]:  35%|███▍      | 273/782 [00:01<00:02, 199.94it/s]



Epoch [568/700]:  38%|███▊      | 295/782 [00:01<00:02, 203.44it/s]



Epoch [568/700]:  41%|████      | 317/782 [00:01<00:02, 205.84it/s]



Epoch [568/700]:  43%|████▎     | 338/782 [00:01<00:02, 205.95it/s]



Epoch [568/700]:  46%|████▌     | 360/782 [00:01<00:02, 207.80it/s]



Epoch [568/700]:  49%|████▊     | 381/782 [00:01<00:01, 208.13it/s]



Epoch [568/700]:  52%|█████▏    | 403/782 [00:01<00:01, 209.17it/s]



Epoch [568/700]:  57%|█████▋    | 446/782 [00:02<00:01, 209.56it/s]



Epoch [568/700]:  62%|██████▏   | 488/782 [00:02<00:01, 205.49it/s]



Epoch [568/700]:  68%|██████▊   | 531/782 [00:02<00:01, 207.28it/s]



Epoch [568/700]:  73%|███████▎  | 574/782 [00:02<00:00, 209.33it/s]



Epoch [568/700]:  76%|███████▌  | 596/782 [00:02<00:00, 210.59it/s]



Epoch [568/700]:  79%|███████▉  | 618/782 [00:03<00:00, 212.82it/s]



Epoch [568/700]:  82%|████████▏ | 640/782 [00:03<00:00, 213.02it/s]



Epoch [568/700]:  85%|████████▍ | 662/782 [00:03<00:00, 213.52it/s]



Epoch [568/700]:  87%|████████▋ | 684/782 [00:03<00:00, 211.73it/s]



Epoch [568/700]:  96%|█████████▌| 750/782 [00:03<00:00, 210.91it/s]



Epoch [568/700]: 100%|██████████| 782/782 [00:03<00:00, 206.65it/s]


Learning Rate: 0.004050
Train Loss: 0.0448, Accuracy: 98.52%, Confidence: 0.9820
Test Loss: 2.8875, Accuracy: 68.41%, Confidence: 0.9459
Train-Test Accuracy Gap: 30.11%


Epoch [569/700]:   3%|▎         | 20/782 [00:00<00:03, 193.73it/s]



Epoch [569/700]:   5%|▌         | 41/782 [00:00<00:03, 199.33it/s]



Epoch [569/700]:   8%|▊         | 62/782 [00:00<00:03, 202.60it/s]



Epoch [569/700]:  11%|█         | 84/782 [00:00<00:03, 206.42it/s]



Epoch [569/700]:  16%|█▌        | 126/782 [00:00<00:03, 207.91it/s]



Epoch [569/700]:  21%|██▏       | 168/782 [00:00<00:02, 205.91it/s]



Epoch [569/700]:  27%|██▋       | 211/782 [00:01<00:02, 209.93it/s]



Epoch [569/700]:  35%|███▌      | 277/782 [00:01<00:02, 212.81it/s]



Epoch [569/700]:  41%|████      | 321/782 [00:01<00:02, 212.89it/s]



Epoch [569/700]:  49%|████▉     | 387/782 [00:01<00:01, 214.45it/s]



Epoch [569/700]:  55%|█████▌    | 431/782 [00:02<00:01, 212.73it/s]



Epoch [569/700]:  61%|██████    | 475/782 [00:02<00:01, 214.18it/s]



Epoch [569/700]:  66%|██████▋   | 519/782 [00:02<00:01, 215.37it/s]



Epoch [569/700]:  72%|███████▏  | 563/782 [00:02<00:01, 212.63it/s]



Epoch [569/700]:  78%|███████▊  | 607/782 [00:02<00:00, 210.40it/s]



Epoch [569/700]:  86%|████████▌ | 673/782 [00:03<00:00, 213.50it/s]



Epoch [569/700]:  92%|█████████▏| 717/782 [00:03<00:00, 212.68it/s]



Epoch [569/700]: 100%|██████████| 782/782 [00:03<00:00, 211.26it/s]






Learning Rate: 0.004050
Train Loss: 0.0385, Accuracy: 98.74%, Confidence: 0.9831
Test Loss: 2.1720, Accuracy: 73.51%, Confidence: 0.9452
Train-Test Accuracy Gap: 25.23%


Epoch [570/700]:   3%|▎         | 21/782 [00:00<00:03, 204.72it/s]



Epoch [570/700]:  11%|█         | 86/782 [00:00<00:03, 209.61it/s]



Epoch [570/700]:  16%|█▋        | 129/782 [00:00<00:03, 210.52it/s]



Epoch [570/700]:  22%|██▏       | 173/782 [00:00<00:02, 210.27it/s]



Epoch [570/700]:  28%|██▊       | 217/782 [00:01<00:02, 211.43it/s]



Epoch [570/700]:  33%|███▎      | 261/782 [00:01<00:02, 209.87it/s]



Epoch [570/700]:  39%|███▊      | 303/782 [00:01<00:02, 206.16it/s]



Epoch [570/700]:  47%|████▋     | 369/782 [00:01<00:01, 211.41it/s]



Epoch [570/700]:  53%|█████▎    | 414/782 [00:01<00:01, 214.88it/s]



Epoch [570/700]:  59%|█████▊    | 458/782 [00:02<00:01, 214.10it/s]



Epoch [570/700]:  64%|██████▍   | 501/782 [00:02<00:01, 206.89it/s]



Epoch [570/700]:  70%|██████▉   | 545/782 [00:02<00:01, 209.92it/s]



Epoch [570/700]:  75%|███████▌  | 588/782 [00:02<00:00, 203.76it/s]



Epoch [570/700]:  81%|████████  | 632/782 [00:03<00:00, 209.79it/s]



Epoch [570/700]:  89%|████████▉ | 697/782 [00:03<00:00, 208.59it/s]



Epoch [570/700]:  95%|█████████▍| 739/782 [00:03<00:00, 205.06it/s]



Epoch [570/700]: 100%|██████████| 782/782 [00:03<00:00, 207.54it/s]






Learning Rate: 0.004050
Train Loss: 0.0450, Accuracy: 98.52%, Confidence: 0.9823
Test Loss: 2.3789, Accuracy: 72.39%, Confidence: 0.9454
Train-Test Accuracy Gap: 26.13%


Epoch [571/700]:   3%|▎         | 22/782 [00:00<00:03, 210.61it/s]



Epoch [571/700]:  11%|█▏        | 88/782 [00:00<00:03, 210.04it/s]



Epoch [571/700]:  17%|█▋        | 132/782 [00:00<00:03, 210.62it/s]



Epoch [571/700]:  22%|██▏       | 175/782 [00:00<00:02, 207.55it/s]



Epoch [571/700]:  28%|██▊       | 217/782 [00:01<00:02, 202.20it/s]



Epoch [571/700]:  33%|███▎      | 259/782 [00:01<00:02, 201.48it/s]



Epoch [571/700]:  39%|███▊      | 302/782 [00:01<00:02, 205.09it/s]



Epoch [571/700]:  44%|████▍     | 344/782 [00:01<00:02, 203.84it/s]



Epoch [571/700]:  49%|████▉     | 386/782 [00:01<00:01, 204.36it/s]



Epoch [571/700]:  55%|█████▍    | 429/782 [00:02<00:01, 208.08it/s]



Epoch [571/700]:  60%|██████    | 473/782 [00:02<00:01, 212.16it/s]



Epoch [571/700]:  66%|██████▌   | 517/782 [00:02<00:01, 211.13it/s]



Epoch [571/700]:  72%|███████▏  | 560/782 [00:02<00:01, 204.90it/s]



Epoch [571/700]:  77%|███████▋  | 604/782 [00:02<00:00, 210.25it/s]



Epoch [571/700]:  83%|████████▎ | 648/782 [00:03<00:00, 210.97it/s]



Epoch [571/700]:  91%|█████████ | 712/782 [00:03<00:00, 209.23it/s]



Epoch [571/700]:  97%|█████████▋| 755/782 [00:03<00:00, 211.40it/s]



Epoch [571/700]: 100%|██████████| 782/782 [00:03<00:00, 207.64it/s]


Learning Rate: 0.004050
Train Loss: 0.0452, Accuracy: 98.44%, Confidence: 0.9824
Test Loss: 2.4969, Accuracy: 71.33%, Confidence: 0.9472
Train-Test Accuracy Gap: 27.11%


Epoch [572/700]:   3%|▎         | 22/782 [00:00<00:03, 213.26it/s]



Epoch [572/700]:  11%|█▏        | 88/782 [00:00<00:03, 215.13it/s]



Epoch [572/700]:  17%|█▋        | 132/782 [00:00<00:03, 215.16it/s]



Epoch [572/700]:  23%|██▎       | 176/782 [00:00<00:02, 212.24it/s]



Epoch [572/700]:  28%|██▊       | 220/782 [00:01<00:02, 213.59it/s]



Epoch [572/700]:  34%|███▍      | 264/782 [00:01<00:02, 211.52it/s]



Epoch [572/700]:  42%|████▏     | 328/782 [00:01<00:02, 204.05it/s]



Epoch [572/700]:  47%|████▋     | 371/782 [00:01<00:02, 204.55it/s]



Epoch [572/700]:  53%|█████▎    | 414/782 [00:01<00:01, 207.08it/s]



Epoch [572/700]:  59%|█████▊    | 458/782 [00:02<00:01, 209.73it/s]



Epoch [572/700]:  67%|██████▋   | 524/782 [00:02<00:01, 213.34it/s]



Epoch [572/700]:  76%|███████▌  | 591/782 [00:02<00:00, 212.25it/s]



Epoch [572/700]:  84%|████████▍ | 657/782 [00:03<00:00, 213.58it/s]



Epoch [572/700]:  87%|████████▋ | 679/782 [00:03<00:00, 211.79it/s]



Epoch [572/700]:  95%|█████████▌| 745/782 [00:03<00:00, 212.08it/s]



Epoch [572/700]: 100%|██████████| 782/782 [00:03<00:00, 210.57it/s]


Learning Rate: 0.004050
Train Loss: 0.0402, Accuracy: 98.61%, Confidence: 0.9827
Test Loss: 2.5539, Accuracy: 71.01%, Confidence: 0.9473
Train-Test Accuracy Gap: 27.60%


Epoch [573/700]:   3%|▎         | 21/782 [00:00<00:03, 209.81it/s]



Epoch [573/700]:   5%|▌         | 42/782 [00:00<00:03, 200.71it/s]



Epoch [573/700]:  11%|█         | 84/782 [00:00<00:03, 203.39it/s]



Epoch [573/700]:  16%|█▌        | 127/782 [00:00<00:03, 209.17it/s]



Epoch [573/700]:  22%|██▏       | 170/782 [00:00<00:02, 209.98it/s]



Epoch [573/700]:  27%|██▋       | 213/782 [00:01<00:02, 205.08it/s]



Epoch [573/700]:  33%|███▎      | 256/782 [00:01<00:02, 207.41it/s]



Epoch [573/700]:  38%|███▊      | 298/782 [00:01<00:02, 205.48it/s]



Epoch [573/700]:  44%|████▍     | 343/782 [00:01<00:02, 212.01it/s]



Epoch [573/700]:  49%|████▉     | 387/782 [00:01<00:01, 213.10it/s]



Epoch [573/700]:  55%|█████▌    | 431/782 [00:02<00:01, 208.66it/s]



Epoch [573/700]:  61%|██████    | 474/782 [00:02<00:01, 206.41it/s]



Epoch [573/700]:  66%|██████▌   | 517/782 [00:02<00:01, 208.71it/s]



Epoch [573/700]:  72%|███████▏  | 560/782 [00:02<00:01, 209.71it/s]



Epoch [573/700]:  77%|███████▋  | 604/782 [00:02<00:00, 210.51it/s]



Epoch [573/700]:  85%|████████▌ | 668/782 [00:03<00:00, 203.58it/s]



Epoch [573/700]:  91%|█████████ | 710/782 [00:03<00:00, 204.48it/s]



Epoch [573/700]:  96%|█████████▌| 752/782 [00:03<00:00, 202.44it/s]



Epoch [573/700]: 100%|██████████| 782/782 [00:03<00:00, 206.42it/s]


Learning Rate: 0.004050
Train Loss: 0.0484, Accuracy: 98.33%, Confidence: 0.9820
Test Loss: 2.2503, Accuracy: 72.55%, Confidence: 0.9457
Train-Test Accuracy Gap: 25.78%


Epoch [574/700]:   3%|▎         | 20/782 [00:00<00:03, 195.65it/s]



Epoch [574/700]:  11%|█         | 85/782 [00:00<00:03, 206.73it/s]



Epoch [574/700]:  16%|█▌        | 127/782 [00:00<00:03, 206.93it/s]



Epoch [574/700]:  22%|██▏       | 169/782 [00:00<00:03, 200.99it/s]



Epoch [574/700]:  27%|██▋       | 211/782 [00:01<00:02, 202.48it/s]



Epoch [574/700]:  32%|███▏      | 253/782 [00:01<00:02, 203.39it/s]



Epoch [574/700]:  41%|████      | 319/782 [00:01<00:02, 211.35it/s]



Epoch [574/700]:  46%|████▋     | 363/782 [00:01<00:01, 212.60it/s]



Epoch [574/700]:  52%|█████▏    | 407/782 [00:01<00:01, 212.74it/s]



Epoch [574/700]:  58%|█████▊    | 451/782 [00:02<00:01, 211.12it/s]



Epoch [574/700]:  63%|██████▎   | 495/782 [00:02<00:01, 209.11it/s]



Epoch [574/700]:  69%|██████▉   | 539/782 [00:02<00:01, 212.17it/s]



Epoch [574/700]:  75%|███████▍  | 583/782 [00:02<00:00, 212.02it/s]



Epoch [574/700]:  80%|████████  | 627/782 [00:03<00:00, 213.01it/s]



Epoch [574/700]:  86%|████████▌ | 670/782 [00:03<00:00, 204.70it/s]



Epoch [574/700]:  91%|█████████ | 713/782 [00:03<00:00, 206.87it/s]



Epoch [574/700]:  99%|█████████▉| 777/782 [00:03<00:00, 205.61it/s]



Epoch [574/700]: 100%|██████████| 782/782 [00:03<00:00, 207.52it/s]


Learning Rate: 0.004050
Train Loss: 0.0445, Accuracy: 98.47%, Confidence: 0.9822
Test Loss: 2.3033, Accuracy: 72.52%, Confidence: 0.9454
Train-Test Accuracy Gap: 25.95%


Epoch [575/700]:   3%|▎         | 20/782 [00:00<00:03, 196.91it/s]



Epoch [575/700]:   5%|▌         | 42/782 [00:00<00:03, 206.46it/s]



Epoch [575/700]:   8%|▊         | 64/782 [00:00<00:03, 211.61it/s]



Epoch [575/700]:  11%|█         | 86/782 [00:00<00:03, 214.17it/s]



Epoch [575/700]:  14%|█▍        | 108/782 [00:00<00:03, 214.66it/s]



Epoch [575/700]:  17%|█▋        | 130/782 [00:00<00:03, 215.00it/s]



Epoch [575/700]:  22%|██▏       | 174/782 [00:00<00:02, 215.18it/s]



Epoch [575/700]:  28%|██▊       | 218/782 [00:01<00:02, 214.86it/s]



Epoch [575/700]:  34%|███▎      | 262/782 [00:01<00:02, 211.13it/s]



Epoch [575/700]:  39%|███▉      | 305/782 [00:01<00:02, 207.47it/s]



Epoch [575/700]:  45%|████▍     | 349/782 [00:01<00:02, 212.40it/s]



Epoch [575/700]:  47%|████▋     | 371/782 [00:01<00:01, 212.48it/s]



Epoch [575/700]:  50%|█████     | 393/782 [00:01<00:01, 213.22it/s]



Epoch [575/700]:  53%|█████▎    | 415/782 [00:01<00:01, 210.42it/s]



Epoch [575/700]:  56%|█████▌    | 437/782 [00:02<00:01, 209.47it/s]



Epoch [575/700]:  59%|█████▊    | 459/782 [00:02<00:01, 210.78it/s]



Epoch [575/700]:  64%|██████▍   | 503/782 [00:02<00:01, 210.52it/s]



Epoch [575/700]:  70%|██████▉   | 547/782 [00:02<00:01, 212.48it/s]



Epoch [575/700]:  78%|███████▊  | 613/782 [00:02<00:00, 214.74it/s]



Epoch [575/700]:  87%|████████▋ | 679/782 [00:03<00:00, 211.92it/s]



Epoch [575/700]:  92%|█████████▏| 723/782 [00:03<00:00, 209.24it/s]



Epoch [575/700]:  98%|█████████▊| 767/782 [00:03<00:00, 211.69it/s]



Epoch [575/700]: 100%|██████████| 782/782 [00:03<00:00, 211.52it/s]


Learning Rate: 0.004050
Train Loss: 0.0406, Accuracy: 98.66%, Confidence: 0.9830
Test Loss: 2.1935, Accuracy: 73.20%, Confidence: 0.9471
Train-Test Accuracy Gap: 25.46%


Epoch [576/700]:   3%|▎         | 20/782 [00:00<00:03, 193.00it/s]



Epoch [576/700]:   5%|▌         | 41/782 [00:00<00:03, 202.75it/s]



Epoch [576/700]:  11%|█         | 83/782 [00:00<00:03, 206.74it/s]



Epoch [576/700]:  16%|█▌        | 126/782 [00:00<00:03, 207.03it/s]



Epoch [576/700]:  22%|██▏       | 170/782 [00:00<00:02, 209.39it/s]



Epoch [576/700]:  27%|██▋       | 213/782 [00:01<00:02, 211.52it/s]



Epoch [576/700]:  33%|███▎      | 257/782 [00:01<00:02, 206.29it/s]



Epoch [576/700]:  41%|████      | 322/782 [00:01<00:02, 208.85it/s]



Epoch [576/700]:  47%|████▋     | 366/782 [00:01<00:01, 211.84it/s]



Epoch [576/700]:  52%|█████▏    | 410/782 [00:01<00:01, 210.39it/s]



Epoch [576/700]:  61%|██████    | 476/782 [00:02<00:01, 212.45it/s]



Epoch [576/700]:  66%|██████▋   | 520/782 [00:02<00:01, 213.45it/s]



Epoch [576/700]:  72%|███████▏  | 564/782 [00:02<00:01, 213.84it/s]



Epoch [576/700]:  78%|███████▊  | 608/782 [00:02<00:00, 215.48it/s]



Epoch [576/700]:  83%|████████▎ | 652/782 [00:03<00:00, 209.52it/s]



Epoch [576/700]:  89%|████████▉ | 695/782 [00:03<00:00, 210.70it/s]



Epoch [576/700]:  95%|█████████▍| 739/782 [00:03<00:00, 204.42it/s]



Epoch [576/700]: 100%|██████████| 782/782 [00:03<00:00, 208.75it/s]






Learning Rate: 0.004050
Train Loss: 0.0452, Accuracy: 98.46%, Confidence: 0.9824
Test Loss: 2.2879, Accuracy: 72.61%, Confidence: 0.9470
Train-Test Accuracy Gap: 25.85%


Epoch [577/700]:   3%|▎         | 21/782 [00:00<00:03, 205.03it/s]



Epoch [577/700]:   5%|▌         | 42/782 [00:00<00:03, 205.50it/s]



Epoch [577/700]:  11%|█         | 87/782 [00:00<00:03, 214.18it/s]



Epoch [577/700]:  20%|█▉        | 153/782 [00:00<00:02, 211.88it/s]



Epoch [577/700]:  25%|██▌       | 197/782 [00:00<00:02, 210.86it/s]



Epoch [577/700]:  31%|███       | 240/782 [00:01<00:02, 204.14it/s]



Epoch [577/700]:  39%|███▉      | 306/782 [00:01<00:02, 211.17it/s]



Epoch [577/700]:  45%|████▍     | 350/782 [00:01<00:02, 213.94it/s]



Epoch [577/700]:  53%|█████▎    | 417/782 [00:01<00:01, 215.02it/s]



Epoch [577/700]:  62%|██████▏   | 483/782 [00:02<00:01, 210.43it/s]



Epoch [577/700]:  67%|██████▋   | 527/782 [00:02<00:01, 212.22it/s]



Epoch [577/700]:  73%|███████▎  | 572/782 [00:02<00:00, 215.18it/s]



Epoch [577/700]:  79%|███████▉  | 616/782 [00:02<00:00, 213.62it/s]



Epoch [577/700]:  84%|████████▍ | 660/782 [00:03<00:00, 209.96it/s]



Epoch [577/700]:  90%|█████████ | 704/782 [00:03<00:00, 209.89it/s]



Epoch [577/700]:  96%|█████████▌| 748/782 [00:03<00:00, 212.07it/s]



Epoch [577/700]: 100%|██████████| 782/782 [00:03<00:00, 211.08it/s]


Learning Rate: 0.004050
Train Loss: 0.0373, Accuracy: 98.79%, Confidence: 0.9836
Test Loss: 2.6719, Accuracy: 70.69%, Confidence: 0.9483
Train-Test Accuracy Gap: 28.10%


Epoch [578/700]:   3%|▎         | 22/782 [00:00<00:03, 209.98it/s]



Epoch [578/700]:  11%|█         | 87/782 [00:00<00:03, 213.56it/s]



Epoch [578/700]:  17%|█▋        | 131/782 [00:00<00:03, 215.02it/s]



Epoch [578/700]:  22%|██▏       | 175/782 [00:00<00:02, 214.74it/s]



Epoch [578/700]:  28%|██▊       | 219/782 [00:01<00:02, 207.12it/s]



Epoch [578/700]:  34%|███▎      | 262/782 [00:01<00:02, 208.86it/s]



Epoch [578/700]:  39%|███▉      | 305/782 [00:01<00:02, 209.78it/s]



Epoch [578/700]:  47%|████▋     | 371/782 [00:01<00:01, 213.07it/s]



Epoch [578/700]:  53%|█████▎    | 415/782 [00:01<00:01, 215.00it/s]



Epoch [578/700]:  59%|█████▊    | 459/782 [00:02<00:01, 212.58it/s]



Epoch [578/700]:  64%|██████▍   | 502/782 [00:02<00:01, 206.16it/s]



Epoch [578/700]:  70%|██████▉   | 545/782 [00:02<00:01, 208.05it/s]



Epoch [578/700]:  75%|███████▌  | 589/782 [00:02<00:00, 211.58it/s]



Epoch [578/700]:  81%|████████  | 633/782 [00:02<00:00, 211.51it/s]



Epoch [578/700]:  87%|████████▋ | 677/782 [00:03<00:00, 212.71it/s]



Epoch [578/700]:  92%|█████████▏| 720/782 [00:03<00:00, 208.20it/s]



Epoch [578/700]: 100%|██████████| 782/782 [00:03<00:00, 210.90it/s]


Learning Rate: 0.004050
Train Loss: 0.0434, Accuracy: 98.58%, Confidence: 0.9831
Test Loss: 2.1711, Accuracy: 73.18%, Confidence: 0.9463
Train-Test Accuracy Gap: 25.40%


Epoch [579/700]:   3%|▎         | 21/782 [00:00<00:03, 207.41it/s]



Epoch [579/700]:   8%|▊         | 64/782 [00:00<00:03, 208.68it/s]



Epoch [579/700]:  14%|█▍        | 108/782 [00:00<00:03, 212.63it/s]



Epoch [579/700]:  19%|█▉        | 152/782 [00:00<00:02, 211.92it/s]



Epoch [579/700]:  25%|██▌       | 196/782 [00:00<00:02, 209.61it/s]



Epoch [579/700]:  31%|███       | 240/782 [00:01<00:02, 209.93it/s]



Epoch [579/700]:  36%|███▌      | 282/782 [00:01<00:02, 206.73it/s]



Epoch [579/700]:  41%|████▏     | 324/782 [00:01<00:02, 205.41it/s]



Epoch [579/700]:  53%|█████▎    | 411/782 [00:01<00:01, 208.63it/s]



Epoch [579/700]:  58%|█████▊    | 455/782 [00:02<00:01, 211.07it/s]



Epoch [579/700]:  64%|██████▍   | 499/782 [00:02<00:01, 210.98it/s]



Epoch [579/700]:  67%|██████▋   | 521/782 [00:02<00:01, 211.49it/s]



Epoch [579/700]:  75%|███████▌  | 587/782 [00:02<00:00, 212.68it/s]



Epoch [579/700]:  78%|███████▊  | 609/782 [00:02<00:00, 212.00it/s]



Epoch [579/700]:  86%|████████▌ | 673/782 [00:03<00:00, 206.81it/s]



Epoch [579/700]:  92%|█████████▏| 716/782 [00:03<00:00, 209.89it/s]



Epoch [579/700]:  97%|█████████▋| 760/782 [00:03<00:00, 209.72it/s]



Epoch [579/700]: 100%|██████████| 782/782 [00:03<00:00, 209.09it/s]


Learning Rate: 0.004050
Train Loss: 0.0443, Accuracy: 98.49%, Confidence: 0.9832
Test Loss: 2.7233, Accuracy: 70.41%, Confidence: 0.9487
Train-Test Accuracy Gap: 28.08%


Epoch [580/700]:   3%|▎         | 21/782 [00:00<00:03, 208.37it/s]



Epoch [580/700]:   5%|▌         | 42/782 [00:00<00:03, 209.27it/s]



Epoch [580/700]:   8%|▊         | 63/782 [00:00<00:03, 201.47it/s]



Epoch [580/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.15it/s]



Epoch [580/700]:  22%|██▏       | 170/782 [00:00<00:03, 202.41it/s]



Epoch [580/700]:  27%|██▋       | 212/782 [00:01<00:02, 200.70it/s]



Epoch [580/700]:  33%|███▎      | 255/782 [00:01<00:02, 205.76it/s]



Epoch [580/700]:  38%|███▊      | 299/782 [00:01<00:02, 210.31it/s]



Epoch [580/700]:  44%|████▍     | 344/782 [00:01<00:02, 214.48it/s]



Epoch [580/700]:  50%|████▉     | 388/782 [00:01<00:01, 214.91it/s]



Epoch [580/700]:  55%|█████▌    | 432/782 [00:02<00:01, 209.42it/s]



Epoch [580/700]:  61%|██████    | 476/782 [00:02<00:01, 209.42it/s]



Epoch [580/700]:  69%|██████▉   | 542/782 [00:02<00:01, 213.46it/s]



Epoch [580/700]:  75%|███████▍  | 586/782 [00:02<00:00, 207.54it/s]



Epoch [580/700]:  81%|████████  | 630/782 [00:03<00:00, 210.44it/s]



Epoch [580/700]:  89%|████████▉ | 696/782 [00:03<00:00, 211.11it/s]



Epoch [580/700]:  95%|█████████▍| 740/782 [00:03<00:00, 212.86it/s]



Epoch [580/700]: 100%|██████████| 782/782 [00:03<00:00, 209.26it/s]


Learning Rate: 0.004050
Train Loss: 0.0431, Accuracy: 98.61%, Confidence: 0.9832
Test Loss: 2.3636, Accuracy: 71.98%, Confidence: 0.9453
Train-Test Accuracy Gap: 26.63%


Epoch [581/700]:   3%|▎         | 21/782 [00:00<00:03, 208.79it/s]



Epoch [581/700]:   5%|▌         | 43/782 [00:00<00:03, 210.41it/s]



Epoch [581/700]:  11%|█         | 87/782 [00:00<00:03, 214.60it/s]



Epoch [581/700]:  17%|█▋        | 131/782 [00:00<00:03, 214.49it/s]



Epoch [581/700]:  25%|██▌       | 197/782 [00:00<00:02, 213.39it/s]



Epoch [581/700]:  31%|███       | 241/782 [00:01<00:02, 213.80it/s]



Epoch [581/700]:  36%|███▋      | 285/782 [00:01<00:02, 211.55it/s]



Epoch [581/700]:  42%|████▏     | 329/782 [00:01<00:02, 212.75it/s]



Epoch [581/700]:  48%|████▊     | 373/782 [00:01<00:01, 212.55it/s]



Epoch [581/700]:  53%|█████▎    | 417/782 [00:01<00:01, 213.94it/s]



Epoch [581/700]:  62%|██████▏   | 483/782 [00:02<00:01, 214.89it/s]



Epoch [581/700]:  67%|██████▋   | 527/782 [00:02<00:01, 216.22it/s]



Epoch [581/700]:  73%|███████▎  | 571/782 [00:02<00:00, 215.61it/s]



Epoch [581/700]:  79%|███████▉  | 616/782 [00:02<00:00, 217.00it/s]



Epoch [581/700]:  84%|████████▍ | 660/782 [00:03<00:00, 216.43it/s]



Epoch [581/700]:  90%|█████████ | 704/782 [00:03<00:00, 215.32it/s]



Epoch [581/700]:  96%|█████████▌| 748/782 [00:03<00:00, 212.02it/s]



Epoch [581/700]: 100%|██████████| 782/782 [00:03<00:00, 213.65it/s]


Learning Rate: 0.004050
Train Loss: 0.0403, Accuracy: 98.65%, Confidence: 0.9833
Test Loss: 2.6606, Accuracy: 70.25%, Confidence: 0.9457
Train-Test Accuracy Gap: 28.40%


Epoch [582/700]:   3%|▎         | 21/782 [00:00<00:03, 206.66it/s]



Epoch [582/700]:   5%|▌         | 43/782 [00:00<00:03, 212.49it/s]



Epoch [582/700]:   8%|▊         | 65/782 [00:00<00:03, 214.32it/s]



Epoch [582/700]:  17%|█▋        | 133/782 [00:00<00:02, 218.95it/s]



Epoch [582/700]:  23%|██▎       | 177/782 [00:00<00:02, 209.99it/s]



Epoch [582/700]:  28%|██▊       | 221/782 [00:01<00:02, 210.74it/s]



Epoch [582/700]:  37%|███▋      | 287/782 [00:01<00:02, 213.31it/s]



Epoch [582/700]:  42%|████▏     | 331/782 [00:01<00:02, 210.80it/s]



Epoch [582/700]:  48%|████▊     | 376/782 [00:01<00:01, 214.90it/s]



Epoch [582/700]:  54%|█████▎    | 420/782 [00:01<00:01, 210.48it/s]



Epoch [582/700]:  62%|██████▏   | 486/782 [00:02<00:01, 211.03it/s]



Epoch [582/700]:  68%|██████▊   | 530/782 [00:02<00:01, 214.80it/s]



Epoch [582/700]:  73%|███████▎  | 574/782 [00:02<00:00, 214.69it/s]



Epoch [582/700]:  79%|███████▉  | 618/782 [00:02<00:00, 214.13it/s]



Epoch [582/700]:  85%|████████▍ | 662/782 [00:03<00:00, 208.35it/s]



Epoch [582/700]:  90%|█████████ | 704/782 [00:03<00:00, 207.49it/s]



Epoch [582/700]:  98%|█████████▊| 770/782 [00:03<00:00, 213.30it/s]



Epoch [582/700]: 100%|██████████| 782/782 [00:03<00:00, 211.97it/s]


Learning Rate: 0.004050
Train Loss: 0.0366, Accuracy: 98.76%, Confidence: 0.9838
Test Loss: 2.3039, Accuracy: 72.89%, Confidence: 0.9490
Train-Test Accuracy Gap: 25.87%


Epoch [583/700]:   3%|▎         | 21/782 [00:00<00:03, 201.67it/s]



Epoch [583/700]:   5%|▌         | 42/782 [00:00<00:03, 205.13it/s]



Epoch [583/700]:   8%|▊         | 64/782 [00:00<00:03, 206.85it/s]



Epoch [583/700]:  11%|█         | 85/782 [00:00<00:03, 207.46it/s]



Epoch [583/700]:  14%|█▎        | 106/782 [00:00<00:03, 208.11it/s]



Epoch [583/700]:  16%|█▌        | 127/782 [00:00<00:03, 208.62it/s]



Epoch [583/700]:  22%|██▏       | 171/782 [00:00<00:02, 212.70it/s]



Epoch [583/700]:  27%|██▋       | 214/782 [00:01<00:02, 207.23it/s]



Epoch [583/700]:  30%|███       | 235/782 [00:01<00:02, 206.87it/s]



Epoch [583/700]:  33%|███▎      | 256/782 [00:01<00:02, 203.79it/s]



Epoch [583/700]:  35%|███▌      | 277/782 [00:01<00:02, 205.17it/s]



Epoch [583/700]:  38%|███▊      | 298/782 [00:01<00:02, 206.43it/s]



Epoch [583/700]:  41%|████      | 319/782 [00:01<00:02, 207.15it/s]



Epoch [583/700]:  43%|████▎     | 340/782 [00:01<00:02, 207.07it/s]



Epoch [583/700]:  46%|████▌     | 361/782 [00:01<00:02, 204.06it/s]



Epoch [583/700]:  49%|████▉     | 383/782 [00:01<00:01, 206.45it/s]



Epoch [583/700]:  52%|█████▏    | 404/782 [00:01<00:01, 204.35it/s]



Epoch [583/700]:  54%|█████▍    | 426/782 [00:02<00:01, 208.05it/s]



Epoch [583/700]:  57%|█████▋    | 447/782 [00:02<00:01, 208.10it/s]



Epoch [583/700]:  63%|██████▎   | 491/782 [00:02<00:01, 210.59it/s]



Epoch [583/700]:  68%|██████▊   | 535/782 [00:02<00:01, 210.93it/s]



Epoch [583/700]:  71%|███████   | 557/782 [00:02<00:01, 211.21it/s]



Epoch [583/700]:  74%|███████▍  | 579/782 [00:02<00:00, 212.15it/s]



Epoch [583/700]:  77%|███████▋  | 601/782 [00:02<00:00, 213.53it/s]



Epoch [583/700]:  80%|███████▉  | 623/782 [00:02<00:00, 211.75it/s]



Epoch [583/700]:  82%|████████▏ | 645/782 [00:03<00:00, 212.04it/s]



Epoch [583/700]:  85%|████████▌ | 667/782 [00:03<00:00, 212.71it/s]



Epoch [583/700]:  88%|████████▊ | 689/782 [00:03<00:00, 213.16it/s]



Epoch [583/700]:  91%|█████████ | 711/782 [00:03<00:00, 213.05it/s]



Epoch [583/700]:  97%|█████████▋| 755/782 [00:03<00:00, 210.68it/s]



Epoch [583/700]: 100%|██████████| 782/782 [00:03<00:00, 208.77it/s]


Learning Rate: 0.004050
Train Loss: 0.0499, Accuracy: 98.34%, Confidence: 0.9826
Test Loss: 2.1472, Accuracy: 73.49%, Confidence: 0.9473
Train-Test Accuracy Gap: 24.85%


Epoch [584/700]:   3%|▎         | 21/782 [00:00<00:03, 204.76it/s]



Epoch [584/700]:   5%|▌         | 42/782 [00:00<00:03, 205.86it/s]



Epoch [584/700]:  11%|█         | 85/782 [00:00<00:03, 210.26it/s]



Epoch [584/700]:  16%|█▋        | 129/782 [00:00<00:03, 213.15it/s]



Epoch [584/700]:  22%|██▏       | 173/782 [00:00<00:02, 212.91it/s]



Epoch [584/700]:  28%|██▊       | 217/782 [00:01<00:02, 212.99it/s]



Epoch [584/700]:  33%|███▎      | 261/782 [00:01<00:02, 211.48it/s]



Epoch [584/700]:  39%|███▉      | 306/782 [00:01<00:02, 215.46it/s]



Epoch [584/700]:  45%|████▍     | 350/782 [00:01<00:02, 215.17it/s]



Epoch [584/700]:  50%|█████     | 394/782 [00:01<00:01, 210.34it/s]



Epoch [584/700]:  56%|█████▌    | 438/782 [00:02<00:01, 208.40it/s]



Epoch [584/700]:  64%|██████▍   | 504/782 [00:02<00:01, 212.59it/s]



Epoch [584/700]:  70%|██████▉   | 547/782 [00:02<00:01, 206.78it/s]



Epoch [584/700]:  76%|███████▌  | 591/782 [00:02<00:00, 210.66it/s]



Epoch [584/700]:  81%|████████  | 635/782 [00:03<00:00, 212.38it/s]



Epoch [584/700]:  87%|████████▋ | 679/782 [00:03<00:00, 209.83it/s]



Epoch [584/700]:  92%|█████████▏| 723/782 [00:03<00:00, 212.78it/s]



Epoch [584/700]:  98%|█████████▊| 767/782 [00:03<00:00, 210.84it/s]



Epoch [584/700]: 100%|██████████| 782/782 [00:03<00:00, 211.11it/s]


Learning Rate: 0.004050
Train Loss: 0.0459, Accuracy: 98.41%, Confidence: 0.9822
Test Loss: 2.6223, Accuracy: 70.83%, Confidence: 0.9480
Train-Test Accuracy Gap: 27.58%


Epoch [585/700]:   3%|▎         | 20/782 [00:00<00:03, 198.01it/s]



Epoch [585/700]:   5%|▌         | 42/782 [00:00<00:03, 207.83it/s]



Epoch [585/700]:   8%|▊         | 64/782 [00:00<00:03, 209.10it/s]



Epoch [585/700]:  17%|█▋        | 131/782 [00:00<00:03, 215.92it/s]



Epoch [585/700]:  25%|██▌       | 197/782 [00:00<00:02, 214.02it/s]



Epoch [585/700]:  31%|███       | 241/782 [00:01<00:02, 213.85it/s]



Epoch [585/700]:  36%|███▋      | 285/782 [00:01<00:02, 210.06it/s]



Epoch [585/700]:  42%|████▏     | 328/782 [00:01<00:02, 206.41it/s]



Epoch [585/700]:  47%|████▋     | 371/782 [00:01<00:01, 208.14it/s]



Epoch [585/700]:  56%|█████▌    | 435/782 [00:02<00:01, 208.34it/s]



Epoch [585/700]:  61%|██████    | 477/782 [00:02<00:01, 205.84it/s]



Epoch [585/700]:  67%|██████▋   | 521/782 [00:02<00:01, 211.13it/s]



Epoch [585/700]:  72%|███████▏  | 565/782 [00:02<00:01, 212.22it/s]



Epoch [585/700]:  81%|████████  | 631/782 [00:03<00:00, 207.56it/s]



Epoch [585/700]:  86%|████████▌ | 674/782 [00:03<00:00, 208.48it/s]



Epoch [585/700]:  92%|█████████▏| 717/782 [00:03<00:00, 205.40it/s]



Epoch [585/700]:  97%|█████████▋| 760/782 [00:03<00:00, 204.84it/s]



Epoch [585/700]: 100%|██████████| 782/782 [00:03<00:00, 209.18it/s]


Learning Rate: 0.004050
Train Loss: 0.0405, Accuracy: 98.66%, Confidence: 0.9833
Test Loss: 2.3561, Accuracy: 72.39%, Confidence: 0.9470
Train-Test Accuracy Gap: 26.27%


Epoch [586/700]:   3%|▎         | 21/782 [00:00<00:03, 204.93it/s]



Epoch [586/700]:   5%|▌         | 42/782 [00:00<00:03, 202.84it/s]



Epoch [586/700]:   8%|▊         | 63/782 [00:00<00:03, 198.08it/s]



Epoch [586/700]:  16%|█▋        | 128/782 [00:00<00:03, 209.39it/s]



Epoch [586/700]:  22%|██▏       | 172/782 [00:00<00:02, 211.12it/s]



Epoch [586/700]:  27%|██▋       | 215/782 [00:01<00:02, 202.21it/s]



Epoch [586/700]:  33%|███▎      | 257/782 [00:01<00:02, 205.15it/s]



Epoch [586/700]:  38%|███▊      | 300/782 [00:01<00:02, 210.01it/s]



Epoch [586/700]:  44%|████▍     | 344/782 [00:01<00:02, 212.97it/s]



Epoch [586/700]:  50%|████▉     | 388/782 [00:01<00:01, 207.89it/s]



Epoch [586/700]:  58%|█████▊    | 452/782 [00:02<00:01, 210.43it/s]



Epoch [586/700]:  63%|██████▎   | 496/782 [00:02<00:01, 207.56it/s]



Epoch [586/700]:  69%|██████▉   | 539/782 [00:02<00:01, 209.64it/s]



Epoch [586/700]:  74%|███████▍  | 582/782 [00:02<00:00, 209.80it/s]



Epoch [586/700]:  80%|███████▉  | 624/782 [00:02<00:00, 208.71it/s]



Epoch [586/700]:  85%|████████▌ | 667/782 [00:03<00:00, 205.98it/s]



Epoch [586/700]:  91%|█████████ | 710/782 [00:03<00:00, 205.72it/s]



Epoch [586/700]:  99%|█████████▉| 777/782 [00:03<00:00, 213.88it/s]



Epoch [586/700]: 100%|██████████| 782/782 [00:03<00:00, 208.11it/s]


Learning Rate: 0.004050
Train Loss: 0.0411, Accuracy: 98.64%, Confidence: 0.9835
Test Loss: 2.1372, Accuracy: 73.77%, Confidence: 0.9472
Train-Test Accuracy Gap: 24.87%


Epoch [587/700]:   3%|▎         | 22/782 [00:00<00:03, 216.75it/s]



Epoch [587/700]:   8%|▊         | 66/782 [00:00<00:03, 214.70it/s]



Epoch [587/700]:  14%|█▍        | 110/782 [00:00<00:03, 216.86it/s]



Epoch [587/700]:  17%|█▋        | 132/782 [00:00<00:02, 217.57it/s]



Epoch [587/700]:  20%|█▉        | 154/782 [00:00<00:02, 216.17it/s]



Epoch [587/700]:  23%|██▎       | 176/782 [00:00<00:02, 214.40it/s]



Epoch [587/700]:  28%|██▊       | 220/782 [00:01<00:02, 213.49it/s]



Epoch [587/700]:  34%|███▍      | 264/782 [00:01<00:02, 211.73it/s]



Epoch [587/700]:  39%|███▉      | 308/782 [00:01<00:02, 211.49it/s]



Epoch [587/700]:  45%|████▌     | 352/782 [00:01<00:02, 208.87it/s]



Epoch [587/700]:  51%|█████     | 395/782 [00:01<00:01, 210.64it/s]



Epoch [587/700]:  56%|█████▌    | 439/782 [00:02<00:01, 212.79it/s]



Epoch [587/700]:  62%|██████▏   | 483/782 [00:02<00:01, 211.67it/s]



Epoch [587/700]:  67%|██████▋   | 527/782 [00:02<00:01, 214.58it/s]



Epoch [587/700]:  73%|███████▎  | 571/782 [00:02<00:00, 212.59it/s]



Epoch [587/700]:  81%|████████▏ | 637/782 [00:03<00:00, 207.64it/s]



Epoch [587/700]:  87%|████████▋ | 679/782 [00:03<00:00, 200.25it/s]



Epoch [587/700]:  90%|████████▉ | 701/782 [00:03<00:00, 203.68it/s]



Epoch [587/700]:  95%|█████████▌| 745/782 [00:03<00:00, 208.21it/s]



Epoch [587/700]: 100%|██████████| 782/782 [00:03<00:00, 210.75it/s]


Learning Rate: 0.004050
Train Loss: 0.0412, Accuracy: 98.66%, Confidence: 0.9835
Test Loss: 2.2065, Accuracy: 72.97%, Confidence: 0.9465
Train-Test Accuracy Gap: 25.69%


Epoch [588/700]:   3%|▎         | 22/782 [00:00<00:03, 212.16it/s]



Epoch [588/700]:   6%|▌         | 44/782 [00:00<00:03, 209.06it/s]



Epoch [588/700]:  11%|█         | 87/782 [00:00<00:03, 209.41it/s]



Epoch [588/700]:  17%|█▋        | 130/782 [00:00<00:03, 211.26it/s]



Epoch [588/700]:  22%|██▏       | 174/782 [00:00<00:02, 210.44it/s]



Epoch [588/700]:  25%|██▌       | 196/782 [00:00<00:02, 207.31it/s]



Epoch [588/700]:  30%|███       | 238/782 [00:01<00:02, 206.04it/s]



Epoch [588/700]:  33%|███▎      | 259/782 [00:01<00:02, 204.91it/s]



Epoch [588/700]:  36%|███▌      | 281/782 [00:01<00:02, 207.88it/s]



Epoch [588/700]:  42%|████▏     | 325/782 [00:01<00:02, 210.85it/s]



Epoch [588/700]:  47%|████▋     | 369/782 [00:01<00:01, 212.25it/s]



Epoch [588/700]:  50%|█████     | 391/782 [00:01<00:01, 212.03it/s]



Epoch [588/700]:  53%|█████▎    | 413/782 [00:01<00:01, 211.41it/s]



Epoch [588/700]:  56%|█████▌    | 435/782 [00:02<00:01, 210.92it/s]



Epoch [588/700]:  58%|█████▊    | 457/782 [00:02<00:01, 211.32it/s]



Epoch [588/700]:  61%|██████▏   | 479/782 [00:02<00:01, 211.60it/s]



Epoch [588/700]:  64%|██████▍   | 501/782 [00:02<00:01, 211.23it/s]



Epoch [588/700]:  67%|██████▋   | 523/782 [00:02<00:01, 207.47it/s]



Epoch [588/700]:  70%|██████▉   | 545/782 [00:02<00:01, 209.50it/s]



Epoch [588/700]:  73%|███████▎  | 567/782 [00:02<00:01, 211.45it/s]



Epoch [588/700]:  75%|███████▌  | 589/782 [00:02<00:00, 211.35it/s]



Epoch [588/700]:  78%|███████▊  | 611/782 [00:02<00:00, 211.47it/s]



Epoch [588/700]:  81%|████████  | 633/782 [00:03<00:00, 209.22it/s]



Epoch [588/700]:  87%|████████▋ | 677/782 [00:03<00:00, 210.13it/s]



Epoch [588/700]:  92%|█████████▏| 721/782 [00:03<00:00, 212.47it/s]



Epoch [588/700]: 100%|██████████| 782/782 [00:03<00:00, 209.24it/s]






Learning Rate: 0.004050
Train Loss: 0.0461, Accuracy: 98.50%, Confidence: 0.9830
Test Loss: 2.4698, Accuracy: 71.20%, Confidence: 0.9466
Train-Test Accuracy Gap: 27.30%


Epoch [589/700]:   2%|▏         | 18/782 [00:00<00:04, 179.35it/s]



Epoch [589/700]:  10%|█         | 79/782 [00:00<00:03, 198.53it/s]



Epoch [589/700]:  16%|█▌        | 123/782 [00:00<00:03, 207.10it/s]



Epoch [589/700]:  24%|██▍       | 186/782 [00:00<00:02, 205.12it/s]



Epoch [589/700]:  26%|██▋       | 207/782 [00:01<00:02, 204.00it/s]



Epoch [589/700]:  32%|███▏      | 250/782 [00:01<00:02, 205.17it/s]



Epoch [589/700]:  40%|████      | 315/782 [00:01<00:02, 208.51it/s]



Epoch [589/700]:  43%|████▎     | 336/782 [00:01<00:02, 206.87it/s]



Epoch [589/700]:  52%|█████▏    | 403/782 [00:01<00:01, 212.92it/s]



Epoch [589/700]:  54%|█████▍    | 425/782 [00:02<00:01, 210.63it/s]



Epoch [589/700]:  63%|██████▎   | 491/782 [00:02<00:01, 212.37it/s]



Epoch [589/700]:  68%|██████▊   | 535/782 [00:02<00:01, 213.79it/s]



Epoch [589/700]:  74%|███████▍  | 579/782 [00:02<00:00, 214.53it/s]



Epoch [589/700]:  83%|████████▎ | 646/782 [00:03<00:00, 215.92it/s]



Epoch [589/700]:  91%|█████████ | 712/782 [00:03<00:00, 215.52it/s]



Epoch [589/700]:  97%|█████████▋| 756/782 [00:03<00:00, 216.00it/s]



Epoch [589/700]: 100%|██████████| 782/782 [00:03<00:00, 209.66it/s]


Learning Rate: 0.004050
Train Loss: 0.0410, Accuracy: 98.62%, Confidence: 0.9834
Test Loss: 2.5136, Accuracy: 70.95%, Confidence: 0.9459
Train-Test Accuracy Gap: 27.67%


Epoch [590/700]:   3%|▎         | 21/782 [00:00<00:03, 201.51it/s]



Epoch [590/700]:  11%|█         | 86/782 [00:00<00:03, 208.82it/s]



Epoch [590/700]:  17%|█▋        | 130/782 [00:00<00:03, 213.60it/s]



Epoch [590/700]:  22%|██▏       | 174/782 [00:00<00:02, 211.50it/s]



Epoch [590/700]:  28%|██▊       | 218/782 [00:01<00:02, 209.96it/s]



Epoch [590/700]:  36%|███▌      | 283/782 [00:01<00:02, 211.61it/s]



Epoch [590/700]:  42%|████▏     | 327/782 [00:01<00:02, 213.20it/s]



Epoch [590/700]:  48%|████▊     | 372/782 [00:01<00:01, 215.53it/s]



Epoch [590/700]:  53%|█████▎    | 416/782 [00:01<00:01, 207.11it/s]



Epoch [590/700]:  59%|█████▉    | 460/782 [00:02<00:01, 211.88it/s]



Epoch [590/700]:  67%|██████▋   | 526/782 [00:02<00:01, 213.19it/s]



Epoch [590/700]:  73%|███████▎  | 571/782 [00:02<00:00, 215.54it/s]



Epoch [590/700]:  79%|███████▊  | 615/782 [00:02<00:00, 215.32it/s]



Epoch [590/700]:  84%|████████▍ | 659/782 [00:03<00:00, 211.19it/s]



Epoch [590/700]:  90%|████████▉ | 703/782 [00:03<00:00, 213.58it/s]



Epoch [590/700]:  96%|█████████▌| 747/782 [00:03<00:00, 213.40it/s]



Epoch [590/700]: 100%|██████████| 782/782 [00:03<00:00, 211.28it/s]


Learning Rate: 0.004050
Train Loss: 0.0391, Accuracy: 98.69%, Confidence: 0.9836
Test Loss: 2.1870, Accuracy: 73.99%, Confidence: 0.9491
Train-Test Accuracy Gap: 24.70%


Epoch [591/700]:   3%|▎         | 22/782 [00:00<00:03, 212.34it/s]



Epoch [591/700]:   6%|▌         | 44/782 [00:00<00:03, 210.21it/s]



Epoch [591/700]:   8%|▊         | 66/782 [00:00<00:03, 211.13it/s]



Epoch [591/700]:  11%|█▏        | 88/782 [00:00<00:03, 212.94it/s]



Epoch [591/700]:  14%|█▍        | 110/782 [00:00<00:03, 213.49it/s]



Epoch [591/700]:  17%|█▋        | 132/782 [00:00<00:03, 212.72it/s]



Epoch [591/700]:  23%|██▎       | 176/782 [00:00<00:02, 208.45it/s]



Epoch [591/700]:  28%|██▊       | 220/782 [00:01<00:02, 198.33it/s]



Epoch [591/700]:  34%|███▎      | 262/782 [00:01<00:02, 202.72it/s]



Epoch [591/700]:  39%|███▉      | 306/782 [00:01<00:02, 207.65it/s]



Epoch [591/700]:  45%|████▍     | 349/782 [00:01<00:02, 210.65it/s]



Epoch [591/700]:  47%|████▋     | 371/782 [00:01<00:01, 211.32it/s]



Epoch [591/700]:  50%|█████     | 393/782 [00:01<00:01, 210.71it/s]



Epoch [591/700]:  53%|█████▎    | 415/782 [00:01<00:01, 210.44it/s]



Epoch [591/700]:  64%|██████▍   | 501/782 [00:02<00:01, 209.87it/s]



Epoch [591/700]:  70%|██████▉   | 545/782 [00:02<00:01, 213.36it/s]



Epoch [591/700]:  75%|███████▌  | 589/782 [00:02<00:00, 212.40it/s]



Epoch [591/700]:  81%|████████  | 633/782 [00:03<00:00, 209.79it/s]



Epoch [591/700]:  87%|████████▋ | 677/782 [00:03<00:00, 212.75it/s]



Epoch [591/700]:  92%|█████████▏| 721/782 [00:03<00:00, 211.58it/s]



Epoch [591/700]:  98%|█████████▊| 765/782 [00:03<00:00, 211.92it/s]



Epoch [591/700]: 100%|██████████| 782/782 [00:03<00:00, 209.82it/s]


Learning Rate: 0.004050
Train Loss: 0.0373, Accuracy: 98.77%, Confidence: 0.9837
Test Loss: 2.3212, Accuracy: 72.69%, Confidence: 0.9466
Train-Test Accuracy Gap: 26.08%


Epoch [592/700]:   3%|▎         | 21/782 [00:00<00:03, 202.63it/s]



Epoch [592/700]:   5%|▌         | 43/782 [00:00<00:03, 208.14it/s]



Epoch [592/700]:  11%|█         | 87/782 [00:00<00:03, 212.39it/s]



Epoch [592/700]:  17%|█▋        | 131/782 [00:00<00:03, 214.27it/s]



Epoch [592/700]:  22%|██▏       | 175/782 [00:00<00:02, 208.88it/s]



Epoch [592/700]:  28%|██▊       | 217/782 [00:01<00:02, 208.60it/s]



Epoch [592/700]:  33%|███▎      | 259/782 [00:01<00:02, 204.87it/s]



Epoch [592/700]:  39%|███▊      | 302/782 [00:01<00:02, 208.03it/s]



Epoch [592/700]:  44%|████▍     | 345/782 [00:01<00:02, 209.32it/s]



Epoch [592/700]:  47%|████▋     | 367/782 [00:01<00:01, 211.02it/s]



Epoch [592/700]:  50%|████▉     | 389/782 [00:01<00:01, 212.29it/s]



Epoch [592/700]:  58%|█████▊    | 455/782 [00:02<00:01, 213.02it/s]



Epoch [592/700]:  64%|██████▍   | 499/782 [00:02<00:01, 213.64it/s]



Epoch [592/700]:  69%|██████▉   | 543/782 [00:02<00:01, 212.46it/s]



Epoch [592/700]:  75%|███████▌  | 587/782 [00:02<00:00, 212.66it/s]



Epoch [592/700]:  81%|████████  | 631/782 [00:02<00:00, 211.41it/s]



Epoch [592/700]:  86%|████████▋ | 675/782 [00:03<00:00, 211.25it/s]



Epoch [592/700]:  92%|█████████▏| 719/782 [00:03<00:00, 212.61it/s]



Epoch [592/700]:  98%|█████████▊| 763/782 [00:03<00:00, 213.30it/s]



Epoch [592/700]: 100%|██████████| 782/782 [00:03<00:00, 210.62it/s]


Learning Rate: 0.004050
Train Loss: 0.0459, Accuracy: 98.42%, Confidence: 0.9826
Test Loss: 2.6249, Accuracy: 70.49%, Confidence: 0.9456
Train-Test Accuracy Gap: 27.93%


Epoch [593/700]:   3%|▎         | 21/782 [00:00<00:03, 208.19it/s]



Epoch [593/700]:   5%|▌         | 42/782 [00:00<00:03, 205.59it/s]



Epoch [593/700]:   8%|▊         | 64/782 [00:00<00:03, 207.43it/s]



Epoch [593/700]:  11%|█         | 85/782 [00:00<00:03, 208.12it/s]



Epoch [593/700]:  14%|█▎        | 107/782 [00:00<00:03, 209.89it/s]



Epoch [593/700]:  22%|██▏       | 172/782 [00:00<00:02, 210.51it/s]



Epoch [593/700]:  28%|██▊       | 216/782 [00:01<00:02, 211.27it/s]



Epoch [593/700]:  33%|███▎      | 260/782 [00:01<00:02, 212.01it/s]



Epoch [593/700]:  42%|████▏     | 326/782 [00:01<00:02, 212.98it/s]



Epoch [593/700]:  47%|████▋     | 370/782 [00:01<00:01, 212.27it/s]



Epoch [593/700]:  53%|█████▎    | 414/782 [00:01<00:01, 213.36it/s]



Epoch [593/700]:  59%|█████▊    | 458/782 [00:02<00:01, 213.23it/s]



Epoch [593/700]:  64%|██████▍   | 502/782 [00:02<00:01, 213.53it/s]



Epoch [593/700]:  70%|██████▉   | 546/782 [00:02<00:01, 214.23it/s]



Epoch [593/700]:  75%|███████▌  | 590/782 [00:02<00:00, 213.57it/s]



Epoch [593/700]:  81%|████████  | 634/782 [00:02<00:00, 211.83it/s]



Epoch [593/700]:  90%|████████▉ | 700/782 [00:03<00:00, 213.99it/s]



Epoch [593/700]:  95%|█████████▌| 744/782 [00:03<00:00, 214.55it/s]



Epoch [593/700]: 100%|██████████| 782/782 [00:03<00:00, 212.28it/s]


Learning Rate: 0.004050
Train Loss: 0.0435, Accuracy: 98.59%, Confidence: 0.9831
Test Loss: 2.3064, Accuracy: 72.95%, Confidence: 0.9478
Train-Test Accuracy Gap: 25.64%


Epoch [594/700]:   2%|▏         | 19/782 [00:00<00:04, 183.69it/s]



Epoch [594/700]:  11%|█         | 84/782 [00:00<00:03, 208.38it/s]



Epoch [594/700]:  16%|█▋        | 128/782 [00:00<00:03, 212.80it/s]



Epoch [594/700]:  22%|██▏       | 172/782 [00:00<00:02, 212.96it/s]



Epoch [594/700]:  27%|██▋       | 215/782 [00:01<00:02, 204.44it/s]



Epoch [594/700]:  36%|███▌      | 280/782 [00:01<00:02, 209.47it/s]



Epoch [594/700]:  41%|████▏     | 324/782 [00:01<00:02, 212.50it/s]



Epoch [594/700]:  47%|████▋     | 368/782 [00:01<00:01, 211.41it/s]



Epoch [594/700]:  53%|█████▎    | 412/782 [00:01<00:01, 206.03it/s]



Epoch [594/700]:  58%|█████▊    | 455/782 [00:02<00:01, 207.72it/s]



Epoch [594/700]:  64%|██████▎   | 497/782 [00:02<00:01, 205.16it/s]



Epoch [594/700]:  69%|██████▉   | 541/782 [00:02<00:01, 208.39it/s]




Epoch [594/700]:  75%|███████▍  | 584/782 [00:02<00:00, 211.28it/s]



Epoch [594/700]:  80%|████████  | 628/782 [00:03<00:00, 213.55it/s]



Epoch [594/700]:  89%|████████▊ | 694/782 [00:03<00:00, 202.84it/s]



Epoch [594/700]:  94%|█████████▍| 736/782 [00:03<00:00, 204.57it/s]



Epoch [594/700]: 100%|██████████| 782/782 [00:03<00:00, 208.20it/s]






Learning Rate: 0.004050
Train Loss: 0.0392, Accuracy: 98.69%, Confidence: 0.9838
Test Loss: 2.1893, Accuracy: 73.71%, Confidence: 0.9492
Train-Test Accuracy Gap: 24.98%


Epoch [595/700]:   3%|▎         | 21/782 [00:00<00:03, 203.18it/s]



Epoch [595/700]:  11%|█         | 87/782 [00:00<00:03, 211.86it/s]



Epoch [595/700]:  17%|█▋        | 131/782 [00:00<00:03, 212.22it/s]



Epoch [595/700]:  22%|██▏       | 175/782 [00:00<00:02, 211.52it/s]



Epoch [595/700]:  31%|███       | 240/782 [00:01<00:02, 208.38it/s]



Epoch [595/700]:  36%|███▌      | 283/782 [00:01<00:02, 207.87it/s]



Epoch [595/700]:  42%|████▏     | 326/782 [00:01<00:02, 207.45it/s]



Epoch [595/700]:  47%|████▋     | 369/782 [00:01<00:01, 207.63it/s]



Epoch [595/700]:  53%|█████▎    | 411/782 [00:01<00:01, 201.89it/s]



Epoch [595/700]:  58%|█████▊    | 453/782 [00:02<00:01, 203.36it/s]



Epoch [595/700]:  64%|██████▎   | 497/782 [00:02<00:01, 208.96it/s]



Epoch [595/700]:  69%|██████▉   | 540/782 [00:02<00:01, 210.86it/s]



Epoch [595/700]:  75%|███████▍  | 584/782 [00:02<00:00, 210.26it/s]



Epoch [595/700]:  80%|████████  | 628/782 [00:03<00:00, 212.17it/s]



Epoch [595/700]:  86%|████████▌ | 671/782 [00:03<00:00, 208.05it/s]



Epoch [595/700]:  94%|█████████▍| 736/782 [00:03<00:00, 210.27it/s]



Epoch [595/700]: 100%|██████████| 782/782 [00:03<00:00, 208.34it/s]






Learning Rate: 0.004050
Train Loss: 0.0441, Accuracy: 98.51%, Confidence: 0.9833
Test Loss: 2.3761, Accuracy: 73.11%, Confidence: 0.9485
Train-Test Accuracy Gap: 25.40%


Epoch [596/700]:   3%|▎         | 21/782 [00:00<00:03, 200.57it/s]



Epoch [596/700]:   5%|▌         | 42/782 [00:00<00:03, 198.26it/s]



Epoch [596/700]:   8%|▊         | 63/782 [00:00<00:03, 201.10it/s]



Epoch [596/700]:  13%|█▎        | 105/782 [00:00<00:03, 202.41it/s]



Epoch [596/700]:  16%|█▌        | 127/782 [00:00<00:03, 205.32it/s]



Epoch [596/700]:  19%|█▉        | 148/782 [00:00<00:03, 204.93it/s]



Epoch [596/700]:  27%|██▋       | 212/782 [00:01<00:02, 208.20it/s]



Epoch [596/700]:  32%|███▏      | 254/782 [00:01<00:02, 206.81it/s]



Epoch [596/700]:  38%|███▊      | 297/782 [00:01<00:02, 207.40it/s]



Epoch [596/700]:  43%|████▎     | 339/782 [00:01<00:02, 204.35it/s]



Epoch [596/700]:  49%|████▉     | 383/782 [00:01<00:01, 209.34it/s]



Epoch [596/700]:  55%|█████▍    | 427/782 [00:02<00:01, 209.29it/s]



Epoch [596/700]:  60%|██████    | 471/782 [00:02<00:01, 211.54it/s]



Epoch [596/700]:  69%|██████▊   | 537/782 [00:02<00:01, 210.81it/s]



Epoch [596/700]:  74%|███████▍  | 581/782 [00:02<00:00, 212.63it/s]



Epoch [596/700]:  80%|███████▉  | 625/782 [00:03<00:00, 209.84it/s]



Epoch [596/700]:  86%|████████▌ | 669/782 [00:03<00:00, 209.61it/s]



Epoch [596/700]:  91%|█████████ | 711/782 [00:03<00:00, 204.03it/s]



Epoch [596/700]:  96%|█████████▋| 754/782 [00:03<00:00, 207.22it/s]



Epoch [596/700]: 100%|██████████| 782/782 [00:03<00:00, 207.38it/s]


Learning Rate: 0.004050
Train Loss: 0.0471, Accuracy: 98.39%, Confidence: 0.9826
Test Loss: 2.3648, Accuracy: 72.72%, Confidence: 0.9477
Train-Test Accuracy Gap: 25.67%


Epoch [597/700]:   3%|▎         | 21/782 [00:00<00:03, 206.69it/s]



Epoch [597/700]:  11%|█         | 87/782 [00:00<00:03, 209.11it/s]



Epoch [597/700]:  14%|█▍        | 108/782 [00:00<00:03, 201.52it/s]



Epoch [597/700]:  19%|█▉        | 151/782 [00:00<00:03, 205.05it/s]



Epoch [597/700]:  25%|██▍       | 195/782 [00:00<00:02, 208.49it/s]



Epoch [597/700]:  31%|███       | 239/782 [00:01<00:02, 212.37it/s]



Epoch [597/700]:  36%|███▌      | 283/782 [00:01<00:02, 214.31it/s]



Epoch [597/700]:  42%|████▏     | 327/782 [00:01<00:02, 212.73it/s]



Epoch [597/700]:  50%|█████     | 393/782 [00:01<00:01, 212.17it/s]



Epoch [597/700]:  56%|█████▌    | 437/782 [00:02<00:01, 211.45it/s]



Epoch [597/700]:  62%|██████▏   | 481/782 [00:02<00:01, 212.72it/s]



Epoch [597/700]:  67%|██████▋   | 526/782 [00:02<00:01, 214.80it/s]



Epoch [597/700]:  73%|███████▎  | 570/782 [00:02<00:00, 216.18it/s]



Epoch [597/700]:  79%|███████▊  | 614/782 [00:02<00:00, 212.80it/s]



Epoch [597/700]:  87%|████████▋ | 680/782 [00:03<00:00, 209.41it/s]



Epoch [597/700]:  92%|█████████▏| 723/782 [00:03<00:00, 205.73it/s]



Epoch [597/700]:  98%|█████████▊| 766/782 [00:03<00:00, 207.51it/s]



Epoch [597/700]: 100%|██████████| 782/782 [00:03<00:00, 209.85it/s]


Learning Rate: 0.004050
Train Loss: 0.0423, Accuracy: 98.59%, Confidence: 0.9834
Test Loss: 2.3432, Accuracy: 72.59%, Confidence: 0.9487
Train-Test Accuracy Gap: 26.00%


Epoch [598/700]:   2%|▏         | 18/782 [00:00<00:04, 179.24it/s]



Epoch [598/700]:  11%|█         | 84/782 [00:00<00:03, 208.22it/s]



Epoch [598/700]:  19%|█▉        | 149/782 [00:00<00:03, 210.09it/s]



Epoch [598/700]:  22%|██▏       | 171/782 [00:00<00:03, 200.75it/s]



Epoch [598/700]:  27%|██▋       | 212/782 [00:01<00:02, 195.05it/s]



Epoch [598/700]:  32%|███▏      | 253/782 [00:01<00:02, 196.31it/s]



Epoch [598/700]:  38%|███▊      | 297/782 [00:01<00:02, 204.68it/s]



Epoch [598/700]:  43%|████▎     | 340/782 [00:01<00:02, 205.89it/s]



Epoch [598/700]:  49%|████▉     | 382/782 [00:01<00:01, 203.97it/s]



Epoch [598/700]:  54%|█████▍    | 424/782 [00:02<00:01, 200.92it/s]



Epoch [598/700]:  60%|█████▉    | 467/782 [00:02<00:01, 206.84it/s]



Epoch [598/700]:  68%|██████▊   | 532/782 [00:02<00:01, 206.36it/s]



Epoch [598/700]:  74%|███████▎  | 575/782 [00:02<00:00, 208.29it/s]



Epoch [598/700]:  79%|███████▉  | 617/782 [00:03<00:00, 207.16it/s]



Epoch [598/700]:  85%|████████▍ | 661/782 [00:03<00:00, 211.76it/s]



Epoch [598/700]:  90%|█████████ | 705/782 [00:03<00:00, 209.21it/s]



Epoch [598/700]:  96%|█████████▌| 748/782 [00:03<00:00, 210.05it/s]



Epoch [598/700]: 100%|██████████| 782/782 [00:03<00:00, 204.49it/s]


Learning Rate: 0.004050
Train Loss: 0.0460, Accuracy: 98.50%, Confidence: 0.9833
Test Loss: 2.4788, Accuracy: 71.85%, Confidence: 0.9465
Train-Test Accuracy Gap: 26.65%


Epoch [599/700]:   3%|▎         | 21/782 [00:00<00:03, 203.04it/s]



Epoch [599/700]:  11%|█         | 86/782 [00:00<00:03, 212.08it/s]



Epoch [599/700]:  17%|█▋        | 130/782 [00:00<00:03, 210.26it/s]



Epoch [599/700]:  22%|██▏       | 173/782 [00:00<00:03, 202.35it/s]



Epoch [599/700]:  28%|██▊       | 216/782 [00:01<00:02, 205.62it/s]



Epoch [599/700]:  36%|███▌      | 282/782 [00:01<00:02, 212.34it/s]



Epoch [599/700]:  42%|████▏     | 326/782 [00:01<00:02, 213.51it/s]



Epoch [599/700]:  47%|████▋     | 370/782 [00:01<00:01, 212.77it/s]



Epoch [599/700]:  53%|█████▎    | 414/782 [00:01<00:01, 213.58it/s]



Epoch [599/700]:  59%|█████▊    | 458/782 [00:02<00:01, 212.20it/s]



Epoch [599/700]:  64%|██████▍   | 502/782 [00:02<00:01, 211.46it/s]



Epoch [599/700]:  73%|███████▎  | 568/782 [00:02<00:01, 213.42it/s]



Epoch [599/700]:  78%|███████▊  | 612/782 [00:02<00:00, 211.53it/s]



Epoch [599/700]:  84%|████████▍ | 656/782 [00:03<00:00, 210.92it/s]



Epoch [599/700]:  90%|████████▉ | 700/782 [00:03<00:00, 212.51it/s]



Epoch [599/700]:  98%|█████████▊| 766/782 [00:03<00:00, 214.38it/s]



Epoch [599/700]: 100%|██████████| 782/782 [00:03<00:00, 210.66it/s]


Learning Rate: 0.004050
Train Loss: 0.0399, Accuracy: 98.65%, Confidence: 0.9832
Test Loss: 2.3300, Accuracy: 72.73%, Confidence: 0.9479
Train-Test Accuracy Gap: 25.92%


Epoch [600/700]:   3%|▎         | 21/782 [00:00<00:03, 205.18it/s]



Epoch [600/700]:  11%|█         | 85/782 [00:00<00:03, 208.31it/s]



Epoch [600/700]:  16%|█▋        | 128/782 [00:00<00:03, 209.10it/s]



Epoch [600/700]:  22%|██▏       | 172/782 [00:00<00:02, 209.43it/s]



Epoch [600/700]:  30%|███       | 238/782 [00:01<00:02, 209.89it/s]



Epoch [600/700]:  36%|███▌      | 282/782 [00:01<00:02, 206.99it/s]



Epoch [600/700]:  42%|████▏     | 326/782 [00:01<00:02, 211.40it/s]



Epoch [600/700]:  47%|████▋     | 370/782 [00:01<00:01, 210.73it/s]



Epoch [600/700]:  53%|█████▎    | 414/782 [00:01<00:01, 211.34it/s]



Epoch [600/700]:  59%|█████▊    | 458/782 [00:02<00:01, 210.72it/s]



Epoch [600/700]:  64%|██████▍   | 502/782 [00:02<00:01, 210.80it/s]



Epoch [600/700]:  70%|██████▉   | 545/782 [00:02<00:01, 206.88it/s]



Epoch [600/700]:  75%|███████▌  | 587/782 [00:02<00:00, 204.25it/s]



Epoch [600/700]:  80%|████████  | 629/782 [00:03<00:00, 200.60it/s]



Epoch [600/700]:  86%|████████▌ | 671/782 [00:03<00:00, 201.83it/s]



Epoch [600/700]:  91%|█████████ | 713/782 [00:03<00:00, 203.68it/s]



Epoch [600/700]:  97%|█████████▋| 756/782 [00:03<00:00, 207.97it/s]



Epoch [600/700]: 100%|██████████| 782/782 [00:03<00:00, 207.41it/s]


Learning Rate: 0.004050
Train Loss: 0.0436, Accuracy: 98.50%, Confidence: 0.9832
Test Loss: 2.2618, Accuracy: 73.55%, Confidence: 0.9503
Train-Test Accuracy Gap: 24.95%


Epoch [601/700]:   5%|▍         | 38/782 [00:00<00:03, 187.68it/s]



Epoch [601/700]:  10%|▉         | 78/782 [00:00<00:03, 194.51it/s]



Epoch [601/700]:  15%|█▌        | 119/782 [00:00<00:03, 197.90it/s]



Epoch [601/700]:  21%|██        | 162/782 [00:00<00:03, 205.07it/s]



Epoch [601/700]:  26%|██▌       | 205/782 [00:01<00:02, 207.54it/s]



Epoch [601/700]:  32%|███▏      | 248/782 [00:01<00:02, 208.89it/s]



Epoch [601/700]:  40%|████      | 314/782 [00:01<00:02, 212.62it/s]



Epoch [601/700]:  46%|████▌     | 358/782 [00:01<00:01, 212.81it/s]



Epoch [601/700]:  51%|█████▏    | 402/782 [00:01<00:01, 214.83it/s]



Epoch [601/700]:  57%|█████▋    | 446/782 [00:02<00:01, 214.87it/s]



Epoch [601/700]:  63%|██████▎   | 490/782 [00:02<00:01, 213.70it/s]



Epoch [601/700]:  71%|███████   | 556/782 [00:02<00:01, 214.15it/s]



Epoch [601/700]:  77%|███████▋  | 600/782 [00:02<00:00, 212.27it/s]



Epoch [601/700]:  82%|████████▏ | 644/782 [00:03<00:00, 208.47it/s]



Epoch [601/700]:  88%|████████▊ | 687/782 [00:03<00:00, 207.66it/s]



Epoch [601/700]:  93%|█████████▎| 731/782 [00:03<00:00, 211.12it/s]



Epoch [601/700]: 100%|██████████| 782/782 [00:03<00:00, 208.99it/s]






Learning Rate: 0.004050
Train Loss: 0.0379, Accuracy: 98.71%, Confidence: 0.9840
Test Loss: 2.3597, Accuracy: 72.24%, Confidence: 0.9475
Train-Test Accuracy Gap: 26.47%


Epoch [602/700]:   3%|▎         | 22/782 [00:00<00:03, 212.41it/s]



Epoch [602/700]:   6%|▌         | 44/782 [00:00<00:03, 206.48it/s]



Epoch [602/700]:   8%|▊         | 66/782 [00:00<00:03, 209.22it/s]



Epoch [602/700]:  11%|█▏        | 88/782 [00:00<00:03, 211.88it/s]



Epoch [602/700]:  14%|█▍        | 110/782 [00:00<00:03, 212.23it/s]



Epoch [602/700]:  17%|█▋        | 132/782 [00:00<00:03, 211.34it/s]



Epoch [602/700]:  20%|█▉        | 154/782 [00:00<00:02, 212.69it/s]



Epoch [602/700]:  23%|██▎       | 176/782 [00:00<00:02, 213.80it/s]



Epoch [602/700]:  34%|███▍      | 264/782 [00:01<00:02, 212.47it/s]



Epoch [602/700]:  39%|███▉      | 308/782 [00:01<00:02, 213.87it/s]



Epoch [602/700]:  45%|████▌     | 352/782 [00:01<00:02, 209.51it/s]



Epoch [602/700]:  51%|█████     | 395/782 [00:01<00:01, 206.96it/s]



Epoch [602/700]:  56%|█████▌    | 439/782 [00:02<00:01, 210.28it/s]



Epoch [602/700]:  62%|██████▏   | 483/782 [00:02<00:01, 211.97it/s]



Epoch [602/700]:  67%|██████▋   | 527/782 [00:02<00:01, 208.32it/s]



Epoch [602/700]:  73%|███████▎  | 570/782 [00:02<00:01, 208.11it/s]



Epoch [602/700]:  78%|███████▊  | 612/782 [00:02<00:00, 206.79it/s]



Epoch [602/700]:  84%|████████▍ | 655/782 [00:03<00:00, 208.54it/s]



Epoch [602/700]:  92%|█████████▏| 718/782 [00:03<00:00, 208.45it/s]



Epoch [602/700]:  97%|█████████▋| 761/782 [00:03<00:00, 210.08it/s]



Epoch [602/700]: 100%|██████████| 782/782 [00:03<00:00, 209.76it/s]


Learning Rate: 0.004050
Train Loss: 0.0441, Accuracy: 98.50%, Confidence: 0.9837
Test Loss: 2.4121, Accuracy: 71.78%, Confidence: 0.9471
Train-Test Accuracy Gap: 26.72%


Epoch [603/700]:   3%|▎         | 22/782 [00:00<00:03, 211.03it/s]



Epoch [603/700]:   6%|▌         | 44/782 [00:00<00:03, 215.72it/s]



Epoch [603/700]:  11%|█▏        | 88/782 [00:00<00:03, 213.89it/s]



Epoch [603/700]:  14%|█▍        | 110/782 [00:00<00:03, 212.15it/s]



Epoch [603/700]:  17%|█▋        | 132/782 [00:00<00:03, 211.62it/s]



Epoch [603/700]:  23%|██▎       | 176/782 [00:00<00:02, 211.20it/s]



Epoch [603/700]:  28%|██▊       | 219/782 [00:01<00:02, 207.83it/s]



Epoch [603/700]:  33%|███▎      | 261/782 [00:01<00:02, 206.73it/s]



Epoch [603/700]:  36%|███▌      | 282/782 [00:01<00:02, 204.29it/s]



Epoch [603/700]:  39%|███▉      | 304/782 [00:01<00:02, 207.17it/s]



Epoch [603/700]:  42%|████▏     | 325/782 [00:01<00:02, 207.07it/s]



Epoch [603/700]:  44%|████▍     | 346/782 [00:01<00:02, 207.01it/s]



Epoch [603/700]:  47%|████▋     | 367/782 [00:01<00:02, 207.01it/s]



Epoch [603/700]:  50%|████▉     | 388/782 [00:01<00:01, 206.77it/s]



Epoch [603/700]:  52%|█████▏    | 410/782 [00:01<00:01, 208.53it/s]



Epoch [603/700]:  55%|█████▌    | 431/782 [00:02<00:01, 208.83it/s]



Epoch [603/700]:  58%|█████▊    | 453/782 [00:02<00:01, 210.12it/s]



Epoch [603/700]:  61%|██████    | 475/782 [00:02<00:01, 208.22it/s]



Epoch [603/700]:  64%|██████▎   | 497/782 [00:02<00:01, 209.54it/s]



Epoch [603/700]:  66%|██████▋   | 519/782 [00:02<00:01, 209.77it/s]



Epoch [603/700]:  72%|███████▏  | 561/782 [00:02<00:01, 206.56it/s]



Epoch [603/700]:  77%|███████▋  | 603/782 [00:02<00:00, 204.65it/s]



Epoch [603/700]:  83%|████████▎ | 646/782 [00:03<00:00, 206.77it/s]



Epoch [603/700]:  88%|████████▊ | 689/782 [00:03<00:00, 208.82it/s]



Epoch [603/700]:  94%|█████████▎| 732/782 [00:03<00:00, 208.60it/s]



Epoch [603/700]: 100%|██████████| 782/782 [00:03<00:00, 208.17it/s]






Learning Rate: 0.004050
Train Loss: 0.0427, Accuracy: 98.50%, Confidence: 0.9837
Test Loss: 2.5024, Accuracy: 72.39%, Confidence: 0.9487
Train-Test Accuracy Gap: 26.11%


Epoch [604/700]:   3%|▎         | 21/782 [00:00<00:03, 200.80it/s]



Epoch [604/700]:  11%|█         | 84/782 [00:00<00:03, 205.71it/s]



Epoch [604/700]:  19%|█▉        | 147/782 [00:00<00:03, 205.30it/s]



Epoch [604/700]:  24%|██▍       | 191/782 [00:00<00:02, 209.11it/s]



Epoch [604/700]:  30%|██▉       | 233/782 [00:01<00:02, 208.60it/s]



Epoch [604/700]:  35%|███▌      | 277/782 [00:01<00:02, 209.79it/s]



Epoch [604/700]:  41%|████      | 320/782 [00:01<00:02, 207.58it/s]



Epoch [604/700]:  49%|████▉     | 385/782 [00:01<00:01, 210.75it/s]



Epoch [604/700]:  55%|█████▍    | 429/782 [00:02<00:01, 211.47it/s]



Epoch [604/700]:  60%|██████    | 473/782 [00:02<00:01, 210.32it/s]



Epoch [604/700]:  66%|██████▌   | 517/782 [00:02<00:01, 209.36it/s]



Epoch [604/700]:  72%|███████▏  | 561/782 [00:02<00:01, 211.97it/s]



Epoch [604/700]:  77%|███████▋  | 606/782 [00:02<00:00, 215.23it/s]



Epoch [604/700]:  83%|████████▎ | 650/782 [00:03<00:00, 215.92it/s]



Epoch [604/700]:  89%|████████▊ | 694/782 [00:03<00:00, 208.74it/s]



Epoch [604/700]:  94%|█████████▍| 738/782 [00:03<00:00, 213.06it/s]



Epoch [604/700]: 100%|██████████| 782/782 [00:03<00:00, 209.53it/s]






Learning Rate: 0.004050
Train Loss: 0.0400, Accuracy: 98.64%, Confidence: 0.9834
Test Loss: 2.3869, Accuracy: 72.56%, Confidence: 0.9489
Train-Test Accuracy Gap: 26.08%


Epoch [605/700]:   3%|▎         | 22/782 [00:00<00:03, 213.40it/s]



Epoch [605/700]:  11%|█▏        | 88/782 [00:00<00:03, 214.08it/s]



Epoch [605/700]:  17%|█▋        | 132/782 [00:00<00:03, 210.33it/s]



Epoch [605/700]:  22%|██▏       | 175/782 [00:00<00:02, 204.40it/s]



Epoch [605/700]:  28%|██▊       | 219/782 [00:01<00:02, 210.64it/s]



Epoch [605/700]:  34%|███▎      | 263/782 [00:01<00:02, 210.89it/s]



Epoch [605/700]:  39%|███▉      | 307/782 [00:01<00:02, 212.88it/s]



Epoch [605/700]:  45%|████▍     | 351/782 [00:01<00:02, 212.60it/s]



Epoch [605/700]:  53%|█████▎    | 417/782 [00:01<00:01, 212.13it/s]



Epoch [605/700]:  59%|█████▉    | 461/782 [00:02<00:01, 209.96it/s]



Epoch [605/700]:  65%|██████▍   | 505/782 [00:02<00:01, 213.24it/s]



Epoch [605/700]:  73%|███████▎  | 571/782 [00:02<00:00, 212.58it/s]



Epoch [605/700]:  79%|███████▊  | 615/782 [00:02<00:00, 210.46it/s]



Epoch [605/700]:  84%|████████▍ | 659/782 [00:03<00:00, 213.17it/s]



Epoch [605/700]:  90%|████████▉ | 703/782 [00:03<00:00, 211.99it/s]



Epoch [605/700]:  96%|█████████▌| 747/782 [00:03<00:00, 207.83it/s]



Epoch [605/700]: 100%|██████████| 782/782 [00:03<00:00, 210.87it/s]


Learning Rate: 0.004050
Train Loss: 0.0406, Accuracy: 98.66%, Confidence: 0.9841
Test Loss: 2.4644, Accuracy: 72.23%, Confidence: 0.9478
Train-Test Accuracy Gap: 26.43%


Epoch [606/700]:   3%|▎         | 22/782 [00:00<00:03, 217.58it/s]



Epoch [606/700]:  11%|█▏        | 88/782 [00:00<00:03, 204.02it/s]



Epoch [606/700]:  19%|█▉        | 151/782 [00:00<00:03, 207.15it/s]



Epoch [606/700]:  22%|██▏       | 173/782 [00:00<00:02, 208.25it/s]



Epoch [606/700]:  28%|██▊       | 216/782 [00:01<00:02, 207.61it/s]



Epoch [606/700]:  33%|███▎      | 259/782 [00:01<00:02, 205.72it/s]



Epoch [606/700]:  41%|████▏     | 324/782 [00:01<00:02, 208.67it/s]



Epoch [606/700]:  47%|████▋     | 366/782 [00:01<00:02, 207.24it/s]



Epoch [606/700]:  52%|█████▏    | 408/782 [00:01<00:01, 205.45it/s]



Epoch [606/700]:  58%|█████▊    | 450/782 [00:02<00:01, 205.61it/s]



Epoch [606/700]:  63%|██████▎   | 494/782 [00:02<00:01, 208.42it/s]



Epoch [606/700]:  77%|███████▋  | 602/782 [00:02<00:00, 209.46it/s]



Epoch [606/700]:  83%|████████▎ | 646/782 [00:03<00:00, 201.86it/s]



Epoch [606/700]:  88%|████████▊ | 689/782 [00:03<00:00, 206.10it/s]



Epoch [606/700]:  94%|█████████▎| 732/782 [00:03<00:00, 206.80it/s]



Epoch [606/700]: 100%|██████████| 782/782 [00:03<00:00, 207.10it/s]


Learning Rate: 0.004050
Train Loss: 0.0388, Accuracy: 98.73%, Confidence: 0.9845
Test Loss: 2.5041, Accuracy: 71.20%, Confidence: 0.9439
Train-Test Accuracy Gap: 27.53%


Epoch [607/700]:   3%|▎         | 21/782 [00:00<00:03, 205.14it/s]



Epoch [607/700]:   5%|▌         | 42/782 [00:00<00:03, 206.90it/s]



Epoch [607/700]:   8%|▊         | 63/782 [00:00<00:03, 204.34it/s]



Epoch [607/700]:  11%|█         | 85/782 [00:00<00:03, 208.20it/s]



Epoch [607/700]:  14%|█▎        | 106/782 [00:00<00:03, 205.78it/s]



Epoch [607/700]:  16%|█▋        | 128/782 [00:00<00:03, 208.18it/s]



Epoch [607/700]:  19%|█▉        | 149/782 [00:00<00:03, 208.69it/s]



Epoch [607/700]:  22%|██▏       | 170/782 [00:00<00:02, 207.94it/s]



Epoch [607/700]:  25%|██▍       | 192/782 [00:00<00:02, 208.91it/s]



Epoch [607/700]:  27%|██▋       | 214/782 [00:01<00:02, 209.49it/s]



Epoch [607/700]:  33%|███▎      | 257/782 [00:01<00:02, 210.78it/s]



Epoch [607/700]:  38%|███▊      | 301/782 [00:01<00:02, 214.15it/s]



Epoch [607/700]:  41%|████▏     | 323/782 [00:01<00:02, 214.95it/s]



Epoch [607/700]:  44%|████▍     | 345/782 [00:01<00:02, 213.83it/s]



Epoch [607/700]:  47%|████▋     | 367/782 [00:01<00:01, 213.60it/s]



Epoch [607/700]:  53%|█████▎    | 411/782 [00:01<00:01, 213.30it/s]



Epoch [607/700]:  58%|█████▊    | 455/782 [00:02<00:01, 209.41it/s]



Epoch [607/700]:  64%|██████▎   | 497/782 [00:02<00:01, 205.97it/s]



Epoch [607/700]:  66%|██████▌   | 518/782 [00:02<00:01, 205.88it/s]



Epoch [607/700]:  69%|██████▉   | 540/782 [00:02<00:01, 208.57it/s]



Epoch [607/700]:  72%|███████▏  | 561/782 [00:02<00:01, 207.53it/s]



Epoch [607/700]:  75%|███████▍  | 583/782 [00:02<00:00, 209.00it/s]



Epoch [607/700]:  77%|███████▋  | 605/782 [00:02<00:00, 209.94it/s]



Epoch [607/700]:  80%|████████  | 626/782 [00:02<00:00, 209.43it/s]



Epoch [607/700]:  83%|████████▎ | 647/782 [00:03<00:00, 209.29it/s]



Epoch [607/700]:  86%|████████▌ | 669/782 [00:03<00:00, 210.80it/s]



Epoch [607/700]:  88%|████████▊ | 691/782 [00:03<00:00, 206.42it/s]



Epoch [607/700]:  91%|█████████ | 712/782 [00:03<00:00, 207.10it/s]



Epoch [607/700]:  97%|█████████▋| 755/782 [00:03<00:00, 208.85it/s]



Epoch [607/700]: 100%|██████████| 782/782 [00:03<00:00, 209.38it/s]


Learning Rate: 0.004050
Train Loss: 0.0448, Accuracy: 98.49%, Confidence: 0.9828
Test Loss: 2.2425, Accuracy: 73.11%, Confidence: 0.9490
Train-Test Accuracy Gap: 25.38%


Epoch [608/700]:   3%|▎         | 22/782 [00:00<00:03, 213.97it/s]



Epoch [608/700]:   6%|▌         | 44/782 [00:00<00:03, 206.86it/s]



Epoch [608/700]:   8%|▊         | 66/782 [00:00<00:03, 209.36it/s]



Epoch [608/700]:  11%|█         | 87/782 [00:00<00:03, 206.67it/s]



Epoch [608/700]:  17%|█▋        | 130/782 [00:00<00:03, 207.66it/s]



Epoch [608/700]:  22%|██▏       | 173/782 [00:00<00:02, 209.76it/s]



Epoch [608/700]:  31%|███       | 239/782 [00:01<00:02, 214.34it/s]



Epoch [608/700]:  36%|███▌      | 283/782 [00:01<00:02, 206.87it/s]



Epoch [608/700]:  42%|████▏     | 327/782 [00:01<00:02, 209.10it/s]



Epoch [608/700]:  47%|████▋     | 370/782 [00:01<00:01, 209.99it/s]



Epoch [608/700]:  53%|█████▎    | 414/782 [00:01<00:01, 210.46it/s]



Epoch [608/700]:  59%|█████▊    | 458/782 [00:02<00:01, 210.20it/s]



Epoch [608/700]:  64%|██████▍   | 502/782 [00:02<00:01, 212.21it/s]



Epoch [608/700]:  70%|██████▉   | 546/782 [00:02<00:01, 213.22it/s]



Epoch [608/700]:  75%|███████▌  | 590/782 [00:02<00:00, 208.71it/s]



Epoch [608/700]:  81%|████████  | 634/782 [00:03<00:00, 210.84it/s]



Epoch [608/700]:  87%|████████▋ | 678/782 [00:03<00:00, 211.33it/s]



Epoch [608/700]:  92%|█████████▏| 722/782 [00:03<00:00, 213.18it/s]



Epoch [608/700]: 100%|██████████| 782/782 [00:03<00:00, 210.56it/s]






Learning Rate: 0.004050
Train Loss: 0.0434, Accuracy: 98.55%, Confidence: 0.9828
Test Loss: 2.3806, Accuracy: 72.41%, Confidence: 0.9485
Train-Test Accuracy Gap: 26.14%


Epoch [609/700]:   3%|▎         | 22/782 [00:00<00:03, 212.33it/s]



Epoch [609/700]:   6%|▌         | 44/782 [00:00<00:03, 212.70it/s]



Epoch [609/700]:  11%|█▏        | 88/782 [00:00<00:03, 213.03it/s]



Epoch [609/700]:  17%|█▋        | 132/782 [00:00<00:03, 207.95it/s]



Epoch [609/700]:  22%|██▏       | 174/782 [00:00<00:02, 205.80it/s]



Epoch [609/700]:  25%|██▍       | 195/782 [00:00<00:02, 201.16it/s]



Epoch [609/700]:  28%|██▊       | 216/782 [00:01<00:02, 203.32it/s]



Epoch [609/700]:  30%|███       | 237/782 [00:01<00:02, 203.99it/s]



Epoch [609/700]:  33%|███▎      | 259/782 [00:01<00:02, 206.04it/s]



Epoch [609/700]:  36%|███▌      | 280/782 [00:01<00:02, 206.84it/s]



Epoch [609/700]:  39%|███▊      | 302/782 [00:01<00:02, 208.58it/s]



Epoch [609/700]:  41%|████▏     | 323/782 [00:01<00:02, 207.39it/s]



Epoch [609/700]:  44%|████▍     | 345/782 [00:01<00:02, 208.69it/s]



Epoch [609/700]:  50%|████▉     | 388/782 [00:01<00:01, 207.63it/s]



Epoch [609/700]:  55%|█████▍    | 430/782 [00:02<00:01, 206.59it/s]



Epoch [609/700]:  58%|█████▊    | 451/782 [00:02<00:01, 202.64it/s]



Epoch [609/700]:  60%|██████    | 473/782 [00:02<00:01, 206.29it/s]



Epoch [609/700]:  63%|██████▎   | 494/782 [00:02<00:01, 206.08it/s]



Epoch [609/700]:  66%|██████▌   | 515/782 [00:02<00:01, 207.22it/s]



Epoch [609/700]:  69%|██████▊   | 536/782 [00:02<00:01, 206.22it/s]



Epoch [609/700]:  71%|███████   | 557/782 [00:02<00:01, 205.93it/s]



Epoch [609/700]:  74%|███████▍  | 578/782 [00:02<00:00, 205.86it/s]



Epoch [609/700]:  77%|███████▋  | 599/782 [00:02<00:00, 198.37it/s]



Epoch [609/700]:  79%|███████▉  | 620/782 [00:03<00:00, 199.51it/s]



Epoch [609/700]:  82%|████████▏ | 642/782 [00:03<00:00, 202.77it/s]



Epoch [609/700]:  85%|████████▍ | 664/782 [00:03<00:00, 205.86it/s]



Epoch [609/700]:  88%|████████▊ | 685/782 [00:03<00:00, 205.11it/s]



Epoch [609/700]:  90%|█████████ | 706/782 [00:03<00:00, 205.40it/s]



Epoch [609/700]:  93%|█████████▎| 727/782 [00:03<00:00, 205.68it/s]



Epoch [609/700]:  96%|█████████▌| 748/782 [00:03<00:00, 205.98it/s]



Epoch [609/700]: 100%|██████████| 782/782 [00:03<00:00, 206.38it/s]






Learning Rate: 0.004050
Train Loss: 0.0411, Accuracy: 98.57%, Confidence: 0.9835
Test Loss: 2.1756, Accuracy: 73.59%, Confidence: 0.9486
Train-Test Accuracy Gap: 24.98%


Epoch [610/700]:   3%|▎         | 20/782 [00:00<00:03, 197.20it/s]



Epoch [610/700]:   5%|▌         | 42/782 [00:00<00:03, 205.71it/s]



Epoch [610/700]:  11%|█         | 85/782 [00:00<00:03, 208.38it/s]



Epoch [610/700]:  16%|█▋        | 129/782 [00:00<00:03, 209.64it/s]



Epoch [610/700]:  22%|██▏       | 173/782 [00:00<00:02, 213.12it/s]



Epoch [610/700]:  28%|██▊       | 217/782 [00:01<00:02, 214.39it/s]



Epoch [610/700]:  33%|███▎      | 261/782 [00:01<00:02, 211.56it/s]



Epoch [610/700]:  36%|███▌      | 283/782 [00:01<00:02, 209.55it/s]



Epoch [610/700]:  39%|███▉      | 305/782 [00:01<00:02, 211.77it/s]



Epoch [610/700]:  42%|████▏     | 327/782 [00:01<00:02, 212.15it/s]



Epoch [610/700]:  45%|████▍     | 349/782 [00:01<00:02, 212.21it/s]



Epoch [610/700]:  47%|████▋     | 371/782 [00:01<00:01, 213.09it/s]



Epoch [610/700]:  50%|█████     | 393/782 [00:01<00:01, 212.60it/s]



Epoch [610/700]:  53%|█████▎    | 415/782 [00:01<00:01, 210.83it/s]



Epoch [610/700]:  56%|█████▌    | 437/782 [00:02<00:01, 211.70it/s]



Epoch [610/700]:  59%|█████▊    | 459/782 [00:02<00:01, 211.35it/s]



Epoch [610/700]:  62%|██████▏   | 481/782 [00:02<00:01, 211.91it/s]



Epoch [610/700]:  64%|██████▍   | 503/782 [00:02<00:01, 212.46it/s]



Epoch [610/700]:  67%|██████▋   | 525/782 [00:02<00:01, 212.56it/s]



Epoch [610/700]:  76%|███████▌  | 592/782 [00:02<00:00, 215.42it/s]



Epoch [610/700]:  81%|████████▏ | 636/782 [00:02<00:00, 215.46it/s]



Epoch [610/700]:  90%|████████▉ | 701/782 [00:03<00:00, 206.30it/s]



Epoch [610/700]:  95%|█████████▌| 743/782 [00:03<00:00, 206.05it/s]



Epoch [610/700]: 100%|██████████| 782/782 [00:03<00:00, 210.63it/s]


Learning Rate: 0.004050
Train Loss: 0.0391, Accuracy: 98.70%, Confidence: 0.9840
Test Loss: 2.8165, Accuracy: 69.99%, Confidence: 0.9454
Train-Test Accuracy Gap: 28.71%


Epoch [611/700]:   3%|▎         | 21/782 [00:00<00:03, 204.44it/s]



Epoch [611/700]:  11%|█▏        | 88/782 [00:00<00:03, 215.18it/s]



Epoch [611/700]:  17%|█▋        | 132/782 [00:00<00:03, 215.14it/s]



Epoch [611/700]:  23%|██▎       | 176/782 [00:00<00:02, 211.13it/s]



Epoch [611/700]:  31%|███       | 242/782 [00:01<00:02, 213.57it/s]



Epoch [611/700]:  37%|███▋      | 286/782 [00:01<00:02, 210.07it/s]



Epoch [611/700]:  42%|████▏     | 329/782 [00:01<00:02, 207.44it/s]



Epoch [611/700]:  48%|████▊     | 373/782 [00:01<00:01, 209.76it/s]



Epoch [611/700]:  53%|█████▎    | 415/782 [00:01<00:01, 206.35it/s]



Epoch [611/700]:  59%|█████▊    | 459/782 [00:02<00:01, 209.85it/s]



Epoch [611/700]:  67%|██████▋   | 524/782 [00:02<00:01, 210.63it/s]



Epoch [611/700]:  73%|███████▎  | 568/782 [00:02<00:01, 210.40it/s]



Epoch [611/700]:  78%|███████▊  | 612/782 [00:02<00:00, 212.27it/s]



Epoch [611/700]:  84%|████████▍ | 657/782 [00:03<00:00, 214.45it/s]



Epoch [611/700]:  90%|████████▉ | 701/782 [00:03<00:00, 214.28it/s]



Epoch [611/700]:  95%|█████████▌| 745/782 [00:03<00:00, 215.39it/s]



Epoch [611/700]: 100%|██████████| 782/782 [00:03<00:00, 211.36it/s]


Learning Rate: 0.004050
Train Loss: 0.0411, Accuracy: 98.59%, Confidence: 0.9834
Test Loss: 2.2836, Accuracy: 73.22%, Confidence: 0.9488
Train-Test Accuracy Gap: 25.37%


Epoch [612/700]:   3%|▎         | 21/782 [00:00<00:03, 204.89it/s]



Epoch [612/700]:  14%|█▍        | 108/782 [00:00<00:03, 211.75it/s]



Epoch [612/700]:  19%|█▉        | 152/782 [00:00<00:03, 206.74it/s]



Epoch [612/700]:  31%|███       | 239/782 [00:01<00:02, 204.40it/s]



Epoch [612/700]:  36%|███▌      | 281/782 [00:01<00:02, 204.77it/s]



Epoch [612/700]:  41%|████▏     | 323/782 [00:01<00:02, 201.14it/s]



Epoch [612/700]:  47%|████▋     | 365/782 [00:01<00:02, 203.31it/s]



Epoch [612/700]:  52%|█████▏    | 408/782 [00:01<00:01, 207.77it/s]



Epoch [612/700]:  60%|██████    | 472/782 [00:02<00:01, 208.74it/s]



Epoch [612/700]:  66%|██████▌   | 516/782 [00:02<00:01, 209.67it/s]



Epoch [612/700]:  71%|███████▏  | 558/782 [00:02<00:01, 206.07it/s]



Epoch [612/700]:  79%|███████▉  | 621/782 [00:03<00:00, 199.81it/s]



Epoch [612/700]:  85%|████████▌ | 665/782 [00:03<00:00, 205.54it/s]



Epoch [612/700]:  90%|█████████ | 707/782 [00:03<00:00, 205.46it/s]



Epoch [612/700]:  96%|█████████▌| 751/782 [00:03<00:00, 210.50it/s]



Epoch [612/700]: 100%|██████████| 782/782 [00:03<00:00, 206.40it/s]


Learning Rate: 0.004050
Train Loss: 0.0378, Accuracy: 98.74%, Confidence: 0.9845
Test Loss: 2.4100, Accuracy: 72.21%, Confidence: 0.9478
Train-Test Accuracy Gap: 26.53%


Epoch [613/700]:   3%|▎         | 21/782 [00:00<00:03, 202.80it/s]



Epoch [613/700]:   5%|▌         | 43/782 [00:00<00:03, 208.90it/s]



Epoch [613/700]:  11%|█         | 87/782 [00:00<00:03, 213.09it/s]



Epoch [613/700]:  14%|█▍        | 109/782 [00:00<00:03, 212.05it/s]



Epoch [613/700]:  20%|█▉        | 153/782 [00:00<00:02, 210.55it/s]



Epoch [613/700]:  25%|██▌       | 197/782 [00:00<00:02, 209.87it/s]



Epoch [613/700]:  31%|███       | 239/782 [00:01<00:02, 208.17it/s]



Epoch [613/700]:  33%|███▎      | 261/782 [00:01<00:02, 210.07it/s]



Epoch [613/700]:  36%|███▌      | 283/782 [00:01<00:02, 210.78it/s]



Epoch [613/700]:  39%|███▉      | 305/782 [00:01<00:02, 211.34it/s]



Epoch [613/700]:  42%|████▏     | 327/782 [00:01<00:02, 211.90it/s]



Epoch [613/700]:  45%|████▍     | 349/782 [00:01<00:02, 211.42it/s]



Epoch [613/700]:  47%|████▋     | 371/782 [00:01<00:01, 211.48it/s]



Epoch [613/700]:  50%|█████     | 393/782 [00:01<00:01, 206.91it/s]



Epoch [613/700]:  53%|█████▎    | 415/782 [00:01<00:01, 210.24it/s]



Epoch [613/700]:  56%|█████▌    | 437/782 [00:02<00:01, 209.95it/s]



Epoch [613/700]:  59%|█████▊    | 459/782 [00:02<00:01, 210.96it/s]



Epoch [613/700]:  62%|██████▏   | 481/782 [00:02<00:01, 212.66it/s]



Epoch [613/700]:  64%|██████▍   | 503/782 [00:02<00:01, 212.68it/s]



Epoch [613/700]:  67%|██████▋   | 525/782 [00:02<00:01, 214.04it/s]



Epoch [613/700]:  70%|██████▉   | 547/782 [00:02<00:01, 212.33it/s]



Epoch [613/700]:  73%|███████▎  | 569/782 [00:02<00:01, 211.12it/s]



Epoch [613/700]:  84%|████████▍ | 657/782 [00:03<00:00, 213.19it/s]



Epoch [613/700]:  90%|████████▉ | 701/782 [00:03<00:00, 211.04it/s]



Epoch [613/700]:  95%|█████████▌| 745/782 [00:03<00:00, 206.52it/s]



Epoch [613/700]: 100%|██████████| 782/782 [00:03<00:00, 210.12it/s]


Learning Rate: 0.004050
Train Loss: 0.0392, Accuracy: 98.78%, Confidence: 0.9846
Test Loss: 2.2928, Accuracy: 73.25%, Confidence: 0.9466
Train-Test Accuracy Gap: 25.53%


Epoch [614/700]:   3%|▎         | 20/782 [00:00<00:03, 194.99it/s]



Epoch [614/700]:  11%|█         | 85/782 [00:00<00:03, 209.10it/s]



Epoch [614/700]:  16%|█▋        | 128/782 [00:00<00:03, 202.10it/s]



Epoch [614/700]:  22%|██▏       | 172/782 [00:00<00:02, 207.62it/s]



Epoch [614/700]:  28%|██▊       | 216/782 [00:01<00:02, 210.82it/s]



Epoch [614/700]:  33%|███▎      | 259/782 [00:01<00:02, 208.28it/s]



Epoch [614/700]:  39%|███▊      | 302/782 [00:01<00:02, 207.95it/s]



Epoch [614/700]:  44%|████▍     | 344/782 [00:01<00:02, 204.83it/s]



Epoch [614/700]:  49%|████▉     | 387/782 [00:01<00:01, 208.20it/s]



Epoch [614/700]:  55%|█████▍    | 430/782 [00:02<00:01, 208.63it/s]



Epoch [614/700]:  63%|██████▎   | 496/782 [00:02<00:01, 212.24it/s]



Epoch [614/700]:  69%|██████▉   | 540/782 [00:02<00:01, 212.40it/s]



Epoch [614/700]:  75%|███████▍  | 585/782 [00:02<00:00, 215.66it/s]



Epoch [614/700]:  80%|████████  | 629/782 [00:03<00:00, 215.63it/s]



Epoch [614/700]:  86%|████████▌ | 673/782 [00:03<00:00, 213.27it/s]



Epoch [614/700]:  92%|█████████▏| 717/782 [00:03<00:00, 212.22it/s]



Epoch [614/700]:  97%|█████████▋| 760/782 [00:03<00:00, 209.25it/s]



Epoch [614/700]: 100%|██████████| 782/782 [00:03<00:00, 209.03it/s]


Learning Rate: 0.004050
Train Loss: 0.0435, Accuracy: 98.50%, Confidence: 0.9832
Test Loss: 3.3650, Accuracy: 66.91%, Confidence: 0.9485
Train-Test Accuracy Gap: 31.59%


Epoch [615/700]:   3%|▎         | 21/782 [00:00<00:03, 207.58it/s]



Epoch [615/700]:   8%|▊         | 63/782 [00:00<00:03, 202.26it/s]



Epoch [615/700]:  16%|█▋        | 128/782 [00:00<00:03, 205.46it/s]



Epoch [615/700]:  22%|██▏       | 172/782 [00:00<00:02, 211.14it/s]



Epoch [615/700]:  27%|██▋       | 215/782 [00:01<00:02, 205.53it/s]



Epoch [615/700]:  36%|███▌      | 280/782 [00:01<00:02, 210.29it/s]



Epoch [615/700]:  41%|████▏     | 324/782 [00:01<00:02, 209.89it/s]



Epoch [615/700]:  50%|████▉     | 389/782 [00:01<00:01, 206.28it/s]



Epoch [615/700]:  55%|█████▌    | 431/782 [00:02<00:01, 206.06it/s]



Epoch [615/700]:  60%|██████    | 473/782 [00:02<00:01, 204.43it/s]



Epoch [615/700]:  69%|██████▊   | 537/782 [00:02<00:01, 206.61it/s]



Epoch [615/700]:  74%|███████▍  | 579/782 [00:02<00:00, 204.02it/s]



Epoch [615/700]:  80%|███████▉  | 622/782 [00:03<00:00, 208.65it/s]



Epoch [615/700]:  85%|████████▌ | 666/782 [00:03<00:00, 209.68it/s]



Epoch [615/700]:  93%|█████████▎| 731/782 [00:03<00:00, 210.42it/s]



Epoch [615/700]:  99%|█████████▉| 775/782 [00:03<00:00, 211.08it/s]



Epoch [615/700]: 100%|██████████| 782/782 [00:03<00:00, 207.15it/s]


Learning Rate: 0.004050
Train Loss: 0.0454, Accuracy: 98.46%, Confidence: 0.9829
Test Loss: 2.3342, Accuracy: 73.01%, Confidence: 0.9504
Train-Test Accuracy Gap: 25.45%


Epoch [616/700]:   3%|▎         | 20/782 [00:00<00:03, 195.45it/s]



Epoch [616/700]:  11%|█         | 85/782 [00:00<00:03, 203.35it/s]



Epoch [616/700]:  16%|█▌        | 126/782 [00:00<00:03, 198.41it/s]



Epoch [616/700]:  22%|██▏       | 170/782 [00:00<00:02, 206.89it/s]



Epoch [616/700]:  27%|██▋       | 213/782 [00:01<00:02, 207.64it/s]



Epoch [616/700]:  36%|███▌      | 280/782 [00:01<00:02, 212.07it/s]



Epoch [616/700]:  44%|████▍     | 346/782 [00:01<00:02, 215.42it/s]



Epoch [616/700]:  50%|████▉     | 390/782 [00:01<00:01, 208.54it/s]



Epoch [616/700]:  55%|█████▌    | 434/782 [00:02<00:01, 211.79it/s]



Epoch [616/700]:  61%|██████    | 478/782 [00:02<00:01, 208.86it/s]



Epoch [616/700]:  67%|██████▋   | 522/782 [00:02<00:01, 210.65it/s]



Epoch [616/700]:  72%|███████▏  | 566/782 [00:02<00:01, 208.22it/s]



Epoch [616/700]:  81%|████████  | 630/782 [00:03<00:00, 208.24it/s]



Epoch [616/700]:  86%|████████▌ | 673/782 [00:03<00:00, 207.59it/s]



Epoch [616/700]:  92%|█████████▏| 717/782 [00:03<00:00, 211.90it/s]



Epoch [616/700]:  97%|█████████▋| 761/782 [00:03<00:00, 212.20it/s]



Epoch [616/700]: 100%|██████████| 782/782 [00:03<00:00, 208.96it/s]


Learning Rate: 0.004050
Train Loss: 0.0410, Accuracy: 98.64%, Confidence: 0.9845
Test Loss: 2.3555, Accuracy: 72.74%, Confidence: 0.9491
Train-Test Accuracy Gap: 25.90%


Epoch [617/700]:   3%|▎         | 20/782 [00:00<00:03, 192.92it/s]



Epoch [617/700]:   5%|▌         | 42/782 [00:00<00:03, 204.18it/s]



Epoch [617/700]:   8%|▊         | 63/782 [00:00<00:03, 205.20it/s]



Epoch [617/700]:  11%|█         | 85/782 [00:00<00:03, 208.73it/s]



Epoch [617/700]:  16%|█▋        | 129/782 [00:00<00:03, 212.06it/s]



Epoch [617/700]:  22%|██▏       | 173/782 [00:00<00:02, 210.01it/s]



Epoch [617/700]:  28%|██▊       | 217/782 [00:01<00:02, 209.73it/s]



Epoch [617/700]:  33%|███▎      | 261/782 [00:01<00:02, 212.25it/s]



Epoch [617/700]:  36%|███▌      | 283/782 [00:01<00:02, 214.03it/s]



Epoch [617/700]:  42%|████▏     | 327/782 [00:01<00:02, 214.53it/s]



Epoch [617/700]:  47%|████▋     | 371/782 [00:01<00:01, 210.95it/s]



Epoch [617/700]:  53%|█████▎    | 414/782 [00:01<00:01, 209.62it/s]



Epoch [617/700]:  59%|█████▊    | 458/782 [00:02<00:01, 212.18it/s]



Epoch [617/700]:  64%|██████▍   | 502/782 [00:02<00:01, 211.67it/s]



Epoch [617/700]:  70%|██████▉   | 546/782 [00:02<00:01, 211.09it/s]



Epoch [617/700]:  75%|███████▌  | 589/782 [00:02<00:00, 208.60it/s]



Epoch [617/700]:  81%|████████  | 633/782 [00:03<00:00, 207.08it/s]



Epoch [617/700]:  89%|████████▉ | 699/782 [00:03<00:00, 208.98it/s]



Epoch [617/700]:  95%|█████████▍| 742/782 [00:03<00:00, 208.80it/s]



Epoch [617/700]: 100%|██████████| 782/782 [00:03<00:00, 210.01it/s]


Learning Rate: 0.004050
Train Loss: 0.0391, Accuracy: 98.72%, Confidence: 0.9845
Test Loss: 2.3040, Accuracy: 72.54%, Confidence: 0.9479
Train-Test Accuracy Gap: 26.18%


Epoch [618/700]:   3%|▎         | 21/782 [00:00<00:03, 208.01it/s]



Epoch [618/700]:   5%|▌         | 42/782 [00:00<00:03, 208.42it/s]



Epoch [618/700]:   8%|▊         | 63/782 [00:00<00:03, 207.67it/s]



Epoch [618/700]:  11%|█         | 85/782 [00:00<00:03, 209.51it/s]



Epoch [618/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.14it/s]



Epoch [618/700]:  22%|██▏       | 170/782 [00:00<00:02, 208.07it/s]



Epoch [618/700]:  27%|██▋       | 212/782 [00:01<00:02, 199.99it/s]



Epoch [618/700]:  36%|███▌      | 279/782 [00:01<00:02, 210.83it/s]



Epoch [618/700]:  41%|████▏     | 323/782 [00:01<00:02, 206.71it/s]



Epoch [618/700]:  47%|████▋     | 365/782 [00:01<00:02, 202.11it/s]



Epoch [618/700]:  52%|█████▏    | 407/782 [00:01<00:01, 205.55it/s]



Epoch [618/700]:  58%|█████▊    | 451/782 [00:02<00:01, 209.40it/s]



Epoch [618/700]:  63%|██████▎   | 495/782 [00:02<00:01, 208.98it/s]



Epoch [618/700]:  69%|██████▊   | 537/782 [00:02<00:01, 200.76it/s]



Epoch [618/700]:  74%|███████▍  | 579/782 [00:02<00:01, 202.46it/s]



Epoch [618/700]:  80%|███████▉  | 622/782 [00:03<00:00, 205.29it/s]



Epoch [618/700]:  85%|████████▍ | 664/782 [00:03<00:00, 206.54it/s]



Epoch [618/700]:  93%|█████████▎| 729/782 [00:03<00:00, 209.52it/s]



Epoch [618/700]:  99%|█████████▉| 773/782 [00:03<00:00, 208.17it/s]



Epoch [618/700]: 100%|██████████| 782/782 [00:03<00:00, 206.44it/s]


Learning Rate: 0.004050
Train Loss: 0.0424, Accuracy: 98.57%, Confidence: 0.9839
Test Loss: 2.4499, Accuracy: 72.20%, Confidence: 0.9490
Train-Test Accuracy Gap: 26.37%


Epoch [619/700]:   3%|▎         | 21/782 [00:00<00:03, 209.95it/s]



Epoch [619/700]:  11%|█         | 83/782 [00:00<00:03, 194.98it/s]



Epoch [619/700]:  18%|█▊        | 143/782 [00:00<00:03, 193.37it/s]



Epoch [619/700]:  23%|██▎       | 183/782 [00:00<00:03, 190.13it/s]



Epoch [619/700]:  26%|██▌       | 203/782 [00:01<00:03, 186.55it/s]



Epoch [619/700]:  31%|███       | 243/782 [00:01<00:02, 190.77it/s]



Epoch [619/700]:  39%|███▊      | 303/782 [00:01<00:02, 193.03it/s]



Epoch [619/700]:  44%|████▍     | 343/782 [00:01<00:02, 190.22it/s]



Epoch [619/700]:  49%|████▉     | 383/782 [00:02<00:02, 188.24it/s]



Epoch [619/700]:  54%|█████▍    | 422/782 [00:02<00:01, 190.29it/s]



Epoch [619/700]:  59%|█████▉    | 462/782 [00:02<00:01, 190.61it/s]



Epoch [619/700]:  66%|██████▋   | 520/782 [00:02<00:01, 188.89it/s]



Epoch [619/700]:  71%|███████▏  | 559/782 [00:02<00:01, 188.70it/s]



Epoch [619/700]:  77%|███████▋  | 599/782 [00:03<00:00, 189.60it/s]



Epoch [619/700]:  82%|████████▏ | 639/782 [00:03<00:00, 190.08it/s]



Epoch [619/700]:  87%|████████▋ | 678/782 [00:03<00:00, 188.11it/s]



Epoch [619/700]:  94%|█████████▍| 736/782 [00:03<00:00, 185.58it/s]



Epoch [619/700]:  99%|█████████▉| 774/782 [00:04<00:00, 182.12it/s]



Epoch [619/700]: 100%|██████████| 782/782 [00:04<00:00, 189.24it/s]


Learning Rate: 0.004050
Train Loss: 0.0410, Accuracy: 98.60%, Confidence: 0.9839
Test Loss: 2.4052, Accuracy: 72.77%, Confidence: 0.9490
Train-Test Accuracy Gap: 25.83%


Epoch [620/700]:   2%|▏         | 17/782 [00:00<00:04, 166.55it/s]



Epoch [620/700]:   4%|▍         | 34/782 [00:00<00:04, 165.31it/s]



Epoch [620/700]:   7%|▋         | 52/782 [00:00<00:04, 170.92it/s]



Epoch [620/700]:   9%|▉         | 70/782 [00:00<00:04, 174.30it/s]



Epoch [620/700]:  11%|█▏        | 88/782 [00:00<00:03, 173.72it/s]



Epoch [620/700]:  14%|█▎        | 107/782 [00:00<00:03, 177.31it/s]



Epoch [620/700]:  19%|█▊        | 145/782 [00:00<00:03, 181.94it/s]



Epoch [620/700]:  23%|██▎       | 183/782 [00:01<00:03, 184.05it/s]



Epoch [620/700]:  28%|██▊       | 221/782 [00:01<00:03, 183.46it/s]



Epoch [620/700]:  33%|███▎      | 259/782 [00:01<00:02, 183.49it/s]



Epoch [620/700]:  40%|████      | 316/782 [00:01<00:02, 185.73it/s]



Epoch [620/700]:  45%|████▌     | 354/782 [00:01<00:02, 186.85it/s]



Epoch [620/700]:  50%|█████     | 392/782 [00:02<00:02, 186.79it/s]



Epoch [620/700]:  58%|█████▊    | 456/782 [00:02<00:01, 200.94it/s]



Epoch [620/700]:  64%|██████▍   | 499/782 [00:02<00:01, 203.06it/s]



Epoch [620/700]:  69%|██████▉   | 541/782 [00:02<00:01, 205.59it/s]



Epoch [620/700]:  75%|███████▍  | 583/782 [00:03<00:00, 206.04it/s]



Epoch [620/700]:  80%|████████  | 626/782 [00:03<00:00, 209.31it/s]



Epoch [620/700]:  88%|████████▊ | 692/782 [00:03<00:00, 212.63it/s]



Epoch [620/700]:  94%|█████████▍| 736/782 [00:03<00:00, 207.92it/s]



Epoch [620/700]: 100%|██████████| 782/782 [00:04<00:00, 194.53it/s]






Learning Rate: 0.004050
Train Loss: 0.0378, Accuracy: 98.73%, Confidence: 0.9844
Test Loss: 2.3353, Accuracy: 72.35%, Confidence: 0.9497
Train-Test Accuracy Gap: 26.38%


Epoch [621/700]:   3%|▎         | 21/782 [00:00<00:03, 200.00it/s]



Epoch [621/700]:  11%|█         | 84/782 [00:00<00:03, 204.94it/s]



Epoch [621/700]:  16%|█▌        | 126/782 [00:00<00:03, 203.64it/s]



Epoch [621/700]:  22%|██▏       | 169/782 [00:00<00:02, 204.98it/s]



Epoch [621/700]:  27%|██▋       | 211/782 [00:01<00:02, 200.86it/s]



Epoch [621/700]:  32%|███▏      | 253/782 [00:01<00:02, 202.66it/s]



Epoch [621/700]:  38%|███▊      | 295/782 [00:01<00:02, 204.63it/s]



Epoch [621/700]:  43%|████▎     | 337/782 [00:01<00:02, 202.89it/s]



Epoch [621/700]:  48%|████▊     | 379/782 [00:01<00:02, 201.45it/s]



Epoch [621/700]:  54%|█████▍    | 422/782 [00:02<00:01, 200.96it/s]



Epoch [621/700]:  59%|█████▉    | 465/782 [00:02<00:01, 204.86it/s]



Epoch [621/700]:  65%|██████▍   | 508/782 [00:02<00:01, 207.61it/s]



Epoch [621/700]:  70%|███████   | 550/782 [00:02<00:01, 207.93it/s]



Epoch [621/700]:  76%|███████▌  | 594/782 [00:02<00:00, 209.62it/s]



Epoch [621/700]:  82%|████████▏ | 638/782 [00:03<00:00, 210.83it/s]



Epoch [621/700]:  87%|████████▋ | 682/782 [00:03<00:00, 209.70it/s]



Epoch [621/700]:  96%|█████████▌| 748/782 [00:03<00:00, 208.51it/s]



Epoch [621/700]: 100%|██████████| 782/782 [00:03<00:00, 205.01it/s]


Learning Rate: 0.004050
Train Loss: 0.0405, Accuracy: 98.63%, Confidence: 0.9842
Test Loss: 2.2975, Accuracy: 73.30%, Confidence: 0.9492
Train-Test Accuracy Gap: 25.33%


Epoch [622/700]:   3%|▎         | 20/782 [00:00<00:03, 198.39it/s]



Epoch [622/700]:   5%|▌         | 41/782 [00:00<00:03, 203.24it/s]



Epoch [622/700]:  11%|█         | 85/782 [00:00<00:03, 211.53it/s]



Epoch [622/700]:  16%|█▋        | 129/782 [00:00<00:03, 208.74it/s]



Epoch [622/700]:  22%|██▏       | 171/782 [00:00<00:02, 208.61it/s]



Epoch [622/700]:  27%|██▋       | 215/782 [00:01<00:02, 206.93it/s]



Epoch [622/700]:  36%|███▌      | 280/782 [00:01<00:02, 211.06it/s]



Epoch [622/700]:  41%|████▏     | 324/782 [00:01<00:02, 210.15it/s]



Epoch [622/700]:  47%|████▋     | 368/782 [00:01<00:01, 207.55it/s]



Epoch [622/700]:  53%|█████▎    | 411/782 [00:01<00:01, 208.99it/s]



Epoch [622/700]:  58%|█████▊    | 453/782 [00:02<00:01, 208.41it/s]



Epoch [622/700]:  63%|██████▎   | 496/782 [00:02<00:01, 208.05it/s]



Epoch [622/700]:  69%|██████▉   | 540/782 [00:02<00:01, 208.96it/s]



Epoch [622/700]:  75%|███████▍  | 583/782 [00:02<00:00, 209.01it/s]



Epoch [622/700]:  80%|███████▉  | 625/782 [00:03<00:00, 205.20it/s]



Epoch [622/700]:  85%|████████▌ | 667/782 [00:03<00:00, 204.26it/s]



Epoch [622/700]:  91%|█████████ | 709/782 [00:03<00:00, 203.80it/s]



Epoch [622/700]:  99%|█████████▉| 774/782 [00:03<00:00, 209.39it/s]



Epoch [622/700]: 100%|██████████| 782/782 [00:03<00:00, 207.52it/s]


Learning Rate: 0.004050
Train Loss: 0.0462, Accuracy: 98.41%, Confidence: 0.9835
Test Loss: 2.9883, Accuracy: 69.74%, Confidence: 0.9506
Train-Test Accuracy Gap: 28.67%


Epoch [623/700]:   3%|▎         | 21/782 [00:00<00:03, 201.39it/s]



Epoch [623/700]:   8%|▊         | 63/782 [00:00<00:03, 202.32it/s]



Epoch [623/700]:  13%|█▎        | 105/782 [00:00<00:03, 204.12it/s]



Epoch [623/700]:  19%|█▉        | 148/782 [00:00<00:03, 204.01it/s]



Epoch [623/700]:  27%|██▋       | 213/782 [00:01<00:02, 206.43it/s]



Epoch [623/700]:  33%|███▎      | 257/782 [00:01<00:02, 209.35it/s]



Epoch [623/700]:  38%|███▊      | 301/782 [00:01<00:02, 211.29it/s]



Epoch [623/700]:  47%|████▋     | 367/782 [00:01<00:01, 210.13it/s]



Epoch [623/700]:  53%|█████▎    | 411/782 [00:01<00:01, 213.34it/s]



Epoch [623/700]:  61%|██████    | 477/782 [00:02<00:01, 204.96it/s]



Epoch [623/700]:  66%|██████▋   | 520/782 [00:02<00:01, 208.17it/s]



Epoch [623/700]:  72%|███████▏  | 563/782 [00:02<00:01, 211.01it/s]



Epoch [623/700]:  78%|███████▊  | 607/782 [00:02<00:00, 212.59it/s]



Epoch [623/700]:  83%|████████▎ | 651/782 [00:03<00:00, 208.62it/s]



Epoch [623/700]:  89%|████████▊ | 694/782 [00:03<00:00, 209.28it/s]



Epoch [623/700]:  94%|█████████▍| 738/782 [00:03<00:00, 207.94it/s]



Epoch [623/700]: 100%|██████████| 782/782 [00:03<00:00, 208.43it/s]






Learning Rate: 0.004050
Train Loss: 0.0414, Accuracy: 98.63%, Confidence: 0.9843
Test Loss: 2.7710, Accuracy: 70.20%, Confidence: 0.9458
Train-Test Accuracy Gap: 28.43%


Epoch [624/700]:   2%|▏         | 19/782 [00:00<00:04, 183.32it/s]



Epoch [624/700]:   5%|▍         | 39/782 [00:00<00:03, 192.96it/s]



Epoch [624/700]:   8%|▊         | 59/782 [00:00<00:03, 195.92it/s]



Epoch [624/700]:  15%|█▌        | 120/782 [00:00<00:03, 198.37it/s]



Epoch [624/700]:  23%|██▎       | 180/782 [00:00<00:03, 192.56it/s]



Epoch [624/700]:  28%|██▊       | 221/782 [00:01<00:02, 194.43it/s]



Epoch [624/700]:  33%|███▎      | 261/782 [00:01<00:02, 194.27it/s]



Epoch [624/700]:  38%|███▊      | 301/782 [00:01<00:02, 192.89it/s]



Epoch [624/700]:  44%|████▎     | 341/782 [00:01<00:02, 189.18it/s]



Epoch [624/700]:  51%|█████     | 399/782 [00:02<00:02, 189.97it/s]



Epoch [624/700]:  59%|█████▉    | 461/782 [00:02<00:01, 199.01it/s]



Epoch [624/700]:  67%|██████▋   | 525/782 [00:02<00:01, 204.80it/s]



Epoch [624/700]:  73%|███████▎  | 567/782 [00:02<00:01, 200.88it/s]



Epoch [624/700]:  80%|████████  | 629/782 [00:03<00:00, 197.69it/s]



Epoch [624/700]:  86%|████████▌ | 671/782 [00:03<00:00, 201.33it/s]



Epoch [624/700]:  91%|█████████ | 713/782 [00:03<00:00, 204.14it/s]



Epoch [624/700]:  97%|█████████▋| 755/782 [00:03<00:00, 203.97it/s]



Epoch [624/700]: 100%|██████████| 782/782 [00:03<00:00, 196.80it/s]


Learning Rate: 0.004050
Train Loss: 0.0358, Accuracy: 98.83%, Confidence: 0.9850
Test Loss: 2.6676, Accuracy: 70.78%, Confidence: 0.9505
Train-Test Accuracy Gap: 28.05%


Epoch [625/700]:   3%|▎         | 21/782 [00:00<00:03, 207.62it/s]



Epoch [625/700]:   5%|▌         | 42/782 [00:00<00:03, 207.44it/s]



Epoch [625/700]:   8%|▊         | 63/782 [00:00<00:03, 208.49it/s]



Epoch [625/700]:  11%|█         | 85/782 [00:00<00:03, 210.29it/s]



Epoch [625/700]:  14%|█▎        | 107/782 [00:00<00:03, 210.27it/s]



Epoch [625/700]:  16%|█▋        | 129/782 [00:00<00:03, 206.53it/s]



Epoch [625/700]:  19%|█▉        | 150/782 [00:00<00:03, 204.00it/s]



Epoch [625/700]:  27%|██▋       | 213/782 [00:01<00:02, 205.07it/s]



Epoch [625/700]:  33%|███▎      | 256/782 [00:01<00:02, 207.73it/s]



Epoch [625/700]:  38%|███▊      | 298/782 [00:01<00:02, 206.97it/s]



Epoch [625/700]:  44%|████▎     | 341/782 [00:01<00:02, 207.62it/s]



Epoch [625/700]:  49%|████▉     | 385/782 [00:01<00:01, 208.84it/s]



Epoch [625/700]:  57%|█████▋    | 449/782 [00:02<00:01, 208.92it/s]



Epoch [625/700]:  66%|██████▌   | 515/782 [00:02<00:01, 213.32it/s]



Epoch [625/700]:  71%|███████▏  | 559/782 [00:02<00:01, 213.94it/s]



Epoch [625/700]:  77%|███████▋  | 603/782 [00:02<00:00, 215.08it/s]



Epoch [625/700]:  86%|████████▌ | 669/782 [00:03<00:00, 216.50it/s]



Epoch [625/700]:  94%|█████████▍| 735/782 [00:03<00:00, 211.59it/s]



Epoch [625/700]: 100%|█████████▉| 779/782 [00:03<00:00, 210.50it/s]



Epoch [625/700]: 100%|██████████| 782/782 [00:03<00:00, 209.74it/s]


Learning Rate: 0.004050
Train Loss: 0.0409, Accuracy: 98.64%, Confidence: 0.9841
Test Loss: 2.7218, Accuracy: 70.30%, Confidence: 0.9478
Train-Test Accuracy Gap: 28.34%


Epoch [626/700]:   3%|▎         | 21/782 [00:00<00:03, 209.07it/s]



Epoch [626/700]:   5%|▌         | 42/782 [00:00<00:03, 208.39it/s]



Epoch [626/700]:  11%|█         | 86/782 [00:00<00:03, 211.80it/s]



Epoch [626/700]:  17%|█▋        | 130/782 [00:00<00:03, 213.37it/s]



Epoch [626/700]:  22%|██▏       | 174/782 [00:00<00:02, 210.63it/s]



Epoch [626/700]:  28%|██▊       | 217/782 [00:01<00:02, 208.30it/s]



Epoch [626/700]:  33%|███▎      | 259/782 [00:01<00:02, 207.43it/s]



Epoch [626/700]:  39%|███▊      | 303/782 [00:01<00:02, 211.68it/s]



Epoch [626/700]:  44%|████▍     | 347/782 [00:01<00:02, 212.60it/s]



Epoch [626/700]:  50%|█████     | 391/782 [00:01<00:01, 211.34it/s]



Epoch [626/700]:  58%|█████▊    | 457/782 [00:02<00:01, 212.33it/s]



Epoch [626/700]:  61%|██████▏   | 479/782 [00:02<00:01, 210.59it/s]



Epoch [626/700]:  67%|██████▋   | 522/782 [00:02<00:01, 209.38it/s]



Epoch [626/700]:  75%|███████▍  | 586/782 [00:02<00:00, 206.95it/s]



Epoch [626/700]:  80%|████████  | 629/782 [00:02<00:00, 208.85it/s]



Epoch [626/700]:  86%|████████▌ | 672/782 [00:03<00:00, 205.61it/s]



Epoch [626/700]:  91%|█████████▏| 714/782 [00:03<00:00, 203.70it/s]



Epoch [626/700]:  97%|█████████▋| 758/782 [00:03<00:00, 208.34it/s]



Epoch [626/700]: 100%|██████████| 782/782 [00:03<00:00, 208.68it/s]


Learning Rate: 0.004050
Train Loss: 0.0455, Accuracy: 98.45%, Confidence: 0.9833
Test Loss: 3.0675, Accuracy: 67.97%, Confidence: 0.9475
Train-Test Accuracy Gap: 30.48%


Epoch [627/700]:   3%|▎         | 20/782 [00:00<00:03, 193.56it/s]



Epoch [627/700]:   5%|▌         | 41/782 [00:00<00:03, 198.69it/s]



Epoch [627/700]:   8%|▊         | 63/782 [00:00<00:03, 204.39it/s]



Epoch [627/700]:  16%|█▋        | 128/782 [00:00<00:03, 208.11it/s]



Epoch [627/700]:  25%|██▍       | 192/782 [00:00<00:02, 205.19it/s]



Epoch [627/700]:  30%|███       | 235/782 [00:01<00:02, 207.25it/s]



Epoch [627/700]:  36%|███▌      | 278/782 [00:01<00:02, 207.17it/s]



Epoch [627/700]:  41%|████      | 320/782 [00:01<00:02, 207.37it/s]



Epoch [627/700]:  46%|████▋     | 362/782 [00:01<00:02, 204.51it/s]



Epoch [627/700]:  52%|█████▏    | 405/782 [00:01<00:01, 204.41it/s]



Epoch [627/700]:  63%|██████▎   | 492/782 [00:02<00:01, 209.61it/s]



Epoch [627/700]:  68%|██████▊   | 535/782 [00:02<00:01, 212.38it/s]



Epoch [627/700]:  77%|███████▋  | 601/782 [00:02<00:00, 212.07it/s]



Epoch [627/700]:  82%|████████▏ | 645/782 [00:03<00:00, 212.80it/s]



Epoch [627/700]:  88%|████████▊ | 689/782 [00:03<00:00, 211.00it/s]



Epoch [627/700]:  94%|█████████▎| 733/782 [00:03<00:00, 212.25it/s]



Epoch [627/700]: 100%|██████████| 782/782 [00:03<00:00, 208.08it/s]






Learning Rate: 0.004050
Train Loss: 0.0390, Accuracy: 98.73%, Confidence: 0.9844
Test Loss: 2.3411, Accuracy: 73.19%, Confidence: 0.9498
Train-Test Accuracy Gap: 25.54%


Epoch [628/700]:   3%|▎         | 21/782 [00:00<00:03, 208.81it/s]



Epoch [628/700]:   5%|▌         | 42/782 [00:00<00:03, 209.12it/s]



Epoch [628/700]:   8%|▊         | 63/782 [00:00<00:03, 209.48it/s]



Epoch [628/700]:  11%|█         | 84/782 [00:00<00:03, 204.10it/s]



Epoch [628/700]:  14%|█▎        | 106/782 [00:00<00:03, 207.62it/s]



Epoch [628/700]:  16%|█▌        | 127/782 [00:00<00:03, 208.39it/s]



Epoch [628/700]:  22%|██▏       | 171/782 [00:00<00:02, 209.92it/s]



Epoch [628/700]:  27%|██▋       | 213/782 [00:01<00:02, 206.46it/s]



Epoch [628/700]:  35%|███▌      | 277/782 [00:01<00:02, 208.00it/s]



Epoch [628/700]:  41%|████      | 320/782 [00:01<00:02, 208.86it/s]



Epoch [628/700]:  47%|████▋     | 364/782 [00:01<00:01, 210.81it/s]



Epoch [628/700]:  55%|█████▍    | 430/782 [00:02<00:01, 213.80it/s]



Epoch [628/700]:  61%|██████    | 474/782 [00:02<00:01, 209.74it/s]



Epoch [628/700]:  66%|██████▌   | 517/782 [00:02<00:01, 209.25it/s]



Epoch [628/700]:  72%|███████▏  | 560/782 [00:02<00:01, 209.54it/s]



Epoch [628/700]:  77%|███████▋  | 603/782 [00:02<00:00, 209.53it/s]



Epoch [628/700]:  83%|████████▎ | 647/782 [00:03<00:00, 213.47it/s]



Epoch [628/700]:  88%|████████▊ | 691/782 [00:03<00:00, 210.51it/s]



Epoch [628/700]:  97%|█████████▋| 757/782 [00:03<00:00, 209.44it/s]



Epoch [628/700]: 100%|██████████| 782/782 [00:03<00:00, 209.37it/s]


Learning Rate: 0.004050
Train Loss: 0.0404, Accuracy: 98.61%, Confidence: 0.9848
Test Loss: 2.9446, Accuracy: 68.31%, Confidence: 0.9454
Train-Test Accuracy Gap: 30.30%


Epoch [629/700]:   3%|▎         | 21/782 [00:00<00:03, 207.30it/s]



Epoch [629/700]:  11%|█         | 84/782 [00:00<00:03, 208.85it/s]



Epoch [629/700]:  16%|█▋        | 128/782 [00:00<00:03, 212.85it/s]



Epoch [629/700]:  22%|██▏       | 172/782 [00:00<00:02, 214.34it/s]



Epoch [629/700]:  28%|██▊       | 216/782 [00:01<00:02, 212.92it/s]



Epoch [629/700]:  33%|███▎      | 260/782 [00:01<00:02, 210.51it/s]



Epoch [629/700]:  42%|████▏     | 325/782 [00:01<00:02, 208.23it/s]



Epoch [629/700]:  47%|████▋     | 368/782 [00:01<00:01, 208.73it/s]



Epoch [629/700]:  52%|█████▏    | 410/782 [00:01<00:01, 207.19it/s]



Epoch [629/700]:  61%|██████    | 476/782 [00:02<00:01, 211.33it/s]



Epoch [629/700]:  66%|██████▋   | 520/782 [00:02<00:01, 211.66it/s]



Epoch [629/700]:  72%|███████▏  | 564/782 [00:02<00:01, 212.39it/s]



Epoch [629/700]:  78%|███████▊  | 608/782 [00:02<00:00, 212.02it/s]



Epoch [629/700]:  83%|████████▎ | 652/782 [00:03<00:00, 210.18it/s]



Epoch [629/700]:  92%|█████████▏| 718/782 [00:03<00:00, 212.20it/s]



Epoch [629/700]:  95%|█████████▍| 740/782 [00:03<00:00, 208.46it/s]



Epoch [629/700]: 100%|██████████| 782/782 [00:03<00:00, 209.91it/s]


Learning Rate: 0.004050
Train Loss: 0.0460, Accuracy: 98.43%, Confidence: 0.9836
Test Loss: 2.3208, Accuracy: 72.96%, Confidence: 0.9490
Train-Test Accuracy Gap: 25.47%


Epoch [630/700]:   2%|▏         | 19/782 [00:00<00:04, 188.80it/s]



Epoch [630/700]:   7%|▋         | 56/782 [00:00<00:04, 174.49it/s]



Epoch [630/700]:  12%|█▏        | 91/782 [00:00<00:04, 162.28it/s]



Epoch [630/700]:  16%|█▌        | 126/782 [00:00<00:03, 166.15it/s]



Epoch [630/700]:  21%|██        | 163/782 [00:00<00:03, 173.76it/s]



Epoch [630/700]:  26%|██▌       | 203/782 [00:01<00:03, 186.31it/s]



Epoch [630/700]:  34%|███▍      | 265/782 [00:01<00:02, 199.05it/s]



Epoch [630/700]:  42%|████▏     | 328/782 [00:01<00:02, 202.84it/s]



Epoch [630/700]:  47%|████▋     | 371/782 [00:01<00:01, 208.81it/s]



Epoch [630/700]:  53%|█████▎    | 415/782 [00:02<00:01, 210.78it/s]



Epoch [630/700]:  59%|█████▊    | 459/782 [00:02<00:01, 211.52it/s]



Epoch [630/700]:  64%|██████▍   | 503/782 [00:02<00:01, 210.98it/s]



Epoch [630/700]:  70%|██████▉   | 547/782 [00:02<00:01, 209.47it/s]



Epoch [630/700]:  75%|███████▌  | 589/782 [00:03<00:00, 208.15it/s]



Epoch [630/700]:  84%|████████▎ | 654/782 [00:03<00:00, 212.11it/s]



Epoch [630/700]:  89%|████████▉ | 697/782 [00:03<00:00, 207.45it/s]



Epoch [630/700]:  95%|█████████▍| 739/782 [00:03<00:00, 203.65it/s]



Epoch [630/700]: 100%|██████████| 782/782 [00:03<00:00, 197.94it/s]






Learning Rate: 0.004050
Train Loss: 0.0405, Accuracy: 98.65%, Confidence: 0.9846
Test Loss: 2.3089, Accuracy: 73.05%, Confidence: 0.9483
Train-Test Accuracy Gap: 25.60%


Epoch [631/700]:   3%|▎         | 20/782 [00:00<00:03, 197.74it/s]



Epoch [631/700]:  11%|█         | 83/782 [00:00<00:03, 198.37it/s]



Epoch [631/700]:  16%|█▌        | 124/782 [00:00<00:03, 198.98it/s]



Epoch [631/700]:  24%|██▍       | 187/782 [00:00<00:02, 203.94it/s]



Epoch [631/700]:  30%|██▉       | 231/782 [00:01<00:02, 208.97it/s]



Epoch [631/700]:  35%|███▍      | 273/782 [00:01<00:02, 205.29it/s]



Epoch [631/700]:  40%|████      | 316/782 [00:01<00:02, 206.94it/s]



Epoch [631/700]:  46%|████▌     | 359/782 [00:01<00:02, 207.06it/s]



Epoch [631/700]:  54%|█████▍    | 425/782 [00:02<00:01, 211.77it/s]



Epoch [631/700]:  60%|█████▉    | 469/782 [00:02<00:01, 209.16it/s]



Epoch [631/700]:  66%|██████▌   | 513/782 [00:02<00:01, 209.80it/s]



Epoch [631/700]:  74%|███████▍  | 579/782 [00:02<00:00, 212.16it/s]



Epoch [631/700]:  80%|███████▉  | 623/782 [00:03<00:00, 212.14it/s]



Epoch [631/700]:  85%|████████▌ | 668/782 [00:03<00:00, 213.71it/s]



Epoch [631/700]:  91%|█████████ | 712/782 [00:03<00:00, 211.08it/s]



Epoch [631/700]:  97%|█████████▋| 755/782 [00:03<00:00, 209.49it/s]



Epoch [631/700]: 100%|██████████| 782/782 [00:03<00:00, 208.01it/s]


Learning Rate: 0.004050
Train Loss: 0.0419, Accuracy: 98.57%, Confidence: 0.9841
Test Loss: 2.1967, Accuracy: 74.12%, Confidence: 0.9498
Train-Test Accuracy Gap: 24.45%


Epoch [632/700]:   3%|▎         | 21/782 [00:00<00:03, 206.32it/s]



Epoch [632/700]:   5%|▌         | 42/782 [00:00<00:03, 203.49it/s]



Epoch [632/700]:   8%|▊         | 64/782 [00:00<00:03, 206.98it/s]



Epoch [632/700]:  11%|█         | 85/782 [00:00<00:03, 206.89it/s]



Epoch [632/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.01it/s]



Epoch [632/700]:  22%|██▏       | 169/782 [00:00<00:03, 197.71it/s]



Epoch [632/700]:  29%|██▉       | 229/782 [00:01<00:02, 196.54it/s]



Epoch [632/700]:  32%|███▏      | 249/782 [00:01<00:02, 189.42it/s]



Epoch [632/700]:  37%|███▋      | 288/782 [00:01<00:02, 184.88it/s]



Epoch [632/700]:  44%|████▍     | 346/782 [00:01<00:02, 188.02it/s]



Epoch [632/700]:  49%|████▉     | 387/782 [00:01<00:02, 193.56it/s]



Epoch [632/700]:  55%|█████▍    | 430/782 [00:02<00:01, 201.55it/s]



Epoch [632/700]:  61%|██████    | 474/782 [00:02<00:01, 208.08it/s]



Epoch [632/700]:  66%|██████▌   | 517/782 [00:02<00:01, 211.12it/s]



Epoch [632/700]:  72%|███████▏  | 560/782 [00:02<00:01, 206.96it/s]



Epoch [632/700]:  77%|███████▋  | 603/782 [00:03<00:00, 208.71it/s]



Epoch [632/700]:  82%|████████▏ | 645/782 [00:03<00:00, 207.26it/s]



Epoch [632/700]:  88%|████████▊ | 687/782 [00:03<00:00, 204.18it/s]



Epoch [632/700]:  93%|█████████▎| 729/782 [00:03<00:00, 205.29it/s]



Epoch [632/700]: 100%|██████████| 782/782 [00:03<00:00, 200.06it/s]






Learning Rate: 0.004050
Train Loss: 0.0431, Accuracy: 98.63%, Confidence: 0.9843
Test Loss: 2.3062, Accuracy: 73.77%, Confidence: 0.9479
Train-Test Accuracy Gap: 24.86%


Epoch [633/700]:   2%|▏         | 19/782 [00:00<00:04, 185.43it/s]



Epoch [633/700]:   5%|▌         | 40/782 [00:00<00:03, 197.22it/s]



Epoch [633/700]:  10%|█         | 80/782 [00:00<00:03, 193.27it/s]



Epoch [633/700]:  24%|██▍       | 190/782 [00:00<00:02, 212.26it/s]



Epoch [633/700]:  30%|██▉       | 233/782 [00:01<00:02, 204.13it/s]



Epoch [633/700]:  35%|███▌      | 275/782 [00:01<00:02, 198.15it/s]



Epoch [633/700]:  41%|████      | 317/782 [00:01<00:02, 202.29it/s]



Epoch [633/700]:  46%|████▌     | 359/782 [00:01<00:02, 203.38it/s]



Epoch [633/700]:  51%|█████▏    | 401/782 [00:01<00:01, 204.02it/s]



Epoch [633/700]:  57%|█████▋    | 443/782 [00:02<00:01, 198.45it/s]



Epoch [633/700]:  62%|██████▏   | 485/782 [00:02<00:01, 202.73it/s]



Epoch [633/700]:  68%|██████▊   | 529/782 [00:02<00:01, 208.17it/s]



Epoch [633/700]:  76%|███████▌  | 592/782 [00:02<00:00, 207.62it/s]



Epoch [633/700]:  81%|████████  | 634/782 [00:03<00:00, 206.66it/s]



Epoch [633/700]:  87%|████████▋ | 677/782 [00:03<00:00, 208.20it/s]



Epoch [633/700]:  95%|█████████▌| 743/782 [00:03<00:00, 207.25it/s]



Epoch [633/700]: 100%|██████████| 782/782 [00:03<00:00, 204.75it/s]


Learning Rate: 0.004050
Train Loss: 0.0383, Accuracy: 98.68%, Confidence: 0.9849
Test Loss: 2.8812, Accuracy: 69.52%, Confidence: 0.9480
Train-Test Accuracy Gap: 29.16%


Epoch [634/700]:   3%|▎         | 20/782 [00:00<00:03, 195.32it/s]



Epoch [634/700]:  14%|█▎        | 106/782 [00:00<00:03, 208.57it/s]



Epoch [634/700]:  19%|█▉        | 150/782 [00:00<00:02, 211.48it/s]



Epoch [634/700]:  25%|██▍       | 194/782 [00:00<00:02, 210.81it/s]



Epoch [634/700]:  30%|███       | 237/782 [00:01<00:02, 208.08it/s]



Epoch [634/700]:  36%|███▌      | 280/782 [00:01<00:02, 211.17it/s]



Epoch [634/700]:  41%|████▏     | 323/782 [00:01<00:02, 206.75it/s]



Epoch [634/700]:  47%|████▋     | 366/782 [00:01<00:01, 208.79it/s]



Epoch [634/700]:  52%|█████▏    | 408/782 [00:01<00:01, 207.87it/s]



Epoch [634/700]:  58%|█████▊    | 452/782 [00:02<00:01, 209.55it/s]



Epoch [634/700]:  63%|██████▎   | 495/782 [00:02<00:01, 210.63it/s]



Epoch [634/700]:  69%|██████▉   | 539/782 [00:02<00:01, 211.11it/s]



Epoch [634/700]:  77%|███████▋  | 605/782 [00:02<00:00, 210.41it/s]



Epoch [634/700]:  83%|████████▎ | 649/782 [00:03<00:00, 211.56it/s]



Epoch [634/700]:  89%|████████▊ | 693/782 [00:03<00:00, 209.24it/s]



Epoch [634/700]:  94%|█████████▍| 736/782 [00:03<00:00, 208.43it/s]



Epoch [634/700]: 100%|██████████| 782/782 [00:03<00:00, 208.64it/s]


Learning Rate: 0.004050
Train Loss: 0.0366, Accuracy: 98.74%, Confidence: 0.9847
Test Loss: 2.3914, Accuracy: 72.61%, Confidence: 0.9472
Train-Test Accuracy Gap: 26.13%


Epoch [635/700]:   3%|▎         | 21/782 [00:00<00:03, 203.83it/s]



Epoch [635/700]:   5%|▌         | 43/782 [00:00<00:03, 209.30it/s]



Epoch [635/700]:  11%|█         | 87/782 [00:00<00:03, 210.28it/s]



Epoch [635/700]:  17%|█▋        | 130/782 [00:00<00:03, 207.02it/s]



Epoch [635/700]:  22%|██▏       | 172/782 [00:00<00:02, 206.94it/s]



Epoch [635/700]:  27%|██▋       | 215/782 [00:01<00:02, 208.28it/s]



Epoch [635/700]:  33%|███▎      | 258/782 [00:01<00:02, 208.13it/s]



Epoch [635/700]:  38%|███▊      | 301/782 [00:01<00:02, 209.37it/s]



Epoch [635/700]:  47%|████▋     | 366/782 [00:01<00:01, 213.31it/s]



Epoch [635/700]:  52%|█████▏    | 410/782 [00:01<00:01, 209.95it/s]



Epoch [635/700]:  58%|█████▊    | 454/782 [00:02<00:01, 211.23it/s]



Epoch [635/700]:  64%|██████▎   | 497/782 [00:02<00:01, 208.47it/s]



Epoch [635/700]:  69%|██████▉   | 541/782 [00:02<00:01, 212.68it/s]



Epoch [635/700]:  75%|███████▍  | 585/782 [00:02<00:00, 211.76it/s]



Epoch [635/700]:  80%|████████  | 629/782 [00:03<00:00, 211.09it/s]



Epoch [635/700]:  89%|████████▉ | 695/782 [00:03<00:00, 210.58it/s]



Epoch [635/700]:  92%|█████████▏| 717/782 [00:03<00:00, 207.00it/s]



Epoch [635/700]: 100%|██████████| 782/782 [00:03<00:00, 208.62it/s]






Learning Rate: 0.004050
Train Loss: 0.0404, Accuracy: 98.62%, Confidence: 0.9843
Test Loss: 2.2567, Accuracy: 73.55%, Confidence: 0.9467
Train-Test Accuracy Gap: 25.07%


Epoch [636/700]:   3%|▎         | 21/782 [00:00<00:03, 204.97it/s]



Epoch [636/700]:   5%|▌         | 43/782 [00:00<00:03, 207.37it/s]



Epoch [636/700]:   8%|▊         | 64/782 [00:00<00:03, 201.87it/s]



Epoch [636/700]:  16%|█▌        | 127/782 [00:00<00:03, 203.15it/s]



Epoch [636/700]:  22%|██▏       | 169/782 [00:00<00:03, 200.05it/s]



Epoch [636/700]:  30%|██▉       | 231/782 [00:01<00:02, 195.71it/s]



Epoch [636/700]:  35%|███▍      | 272/782 [00:01<00:02, 196.70it/s]



Epoch [636/700]:  40%|████      | 314/782 [00:01<00:02, 201.35it/s]



Epoch [636/700]:  46%|████▌     | 356/782 [00:01<00:02, 202.22it/s]



Epoch [636/700]:  51%|█████     | 399/782 [00:01<00:01, 204.22it/s]



Epoch [636/700]:  56%|█████▋    | 441/782 [00:02<00:01, 202.58it/s]



Epoch [636/700]:  65%|██████▍   | 505/782 [00:02<00:01, 207.48it/s]



Epoch [636/700]:  70%|███████   | 549/782 [00:02<00:01, 209.41it/s]



Epoch [636/700]:  76%|███████▌  | 593/782 [00:02<00:00, 212.32it/s]



Epoch [636/700]:  81%|████████▏ | 637/782 [00:03<00:00, 211.47it/s]



Epoch [636/700]:  87%|████████▋ | 681/782 [00:03<00:00, 204.33it/s]



Epoch [636/700]:  92%|█████████▏| 723/782 [00:03<00:00, 205.18it/s]



Epoch [636/700]:  98%|█████████▊| 766/782 [00:03<00:00, 209.15it/s]



Epoch [636/700]: 100%|██████████| 782/782 [00:03<00:00, 204.44it/s]


Learning Rate: 0.004050
Train Loss: 0.0442, Accuracy: 98.54%, Confidence: 0.9839
Test Loss: 2.3456, Accuracy: 73.11%, Confidence: 0.9504
Train-Test Accuracy Gap: 25.43%


Epoch [637/700]:   3%|▎         | 21/782 [00:00<00:03, 204.68it/s]



Epoch [637/700]:  11%|█         | 87/782 [00:00<00:03, 211.55it/s]



Epoch [637/700]:  17%|█▋        | 131/782 [00:00<00:03, 210.93it/s]



Epoch [637/700]:  22%|██▏       | 175/782 [00:00<00:02, 211.07it/s]



Epoch [637/700]:  28%|██▊       | 219/782 [00:01<00:02, 210.91it/s]



Epoch [637/700]:  36%|███▋      | 285/782 [00:01<00:02, 212.49it/s]



Epoch [637/700]:  42%|████▏     | 329/782 [00:01<00:02, 213.22it/s]



Epoch [637/700]:  48%|████▊     | 373/782 [00:01<00:01, 212.72it/s]



Epoch [637/700]:  53%|█████▎    | 416/782 [00:01<00:01, 206.44it/s]



Epoch [637/700]:  59%|█████▊    | 458/782 [00:02<00:01, 206.53it/s]



Epoch [637/700]:  64%|██████▍   | 501/782 [00:02<00:01, 208.51it/s]



Epoch [637/700]:  72%|███████▏  | 565/782 [00:02<00:01, 208.43it/s]



Epoch [637/700]:  78%|███████▊  | 607/782 [00:02<00:00, 208.43it/s]



Epoch [637/700]:  86%|████████▌ | 673/782 [00:03<00:00, 213.34it/s]



Epoch [637/700]:  92%|█████████▏| 718/782 [00:03<00:00, 216.99it/s]



Epoch [637/700]:  97%|█████████▋| 762/782 [00:03<00:00, 216.60it/s]



Epoch [637/700]: 100%|██████████| 782/782 [00:03<00:00, 211.38it/s]


Learning Rate: 0.004050
Train Loss: 0.0389, Accuracy: 98.69%, Confidence: 0.9848
Test Loss: 2.3742, Accuracy: 72.29%, Confidence: 0.9496
Train-Test Accuracy Gap: 26.40%


Epoch [638/700]:   3%|▎         | 21/782 [00:00<00:03, 203.26it/s]



Epoch [638/700]:   8%|▊         | 63/782 [00:00<00:03, 204.65it/s]



Epoch [638/700]:  19%|█▉        | 152/782 [00:00<00:02, 213.79it/s]



Epoch [638/700]:  25%|██▌       | 196/782 [00:00<00:02, 211.24it/s]



Epoch [638/700]:  31%|███       | 240/782 [00:01<00:02, 207.26it/s]



Epoch [638/700]:  36%|███▋      | 284/782 [00:01<00:02, 211.28it/s]



Epoch [638/700]:  42%|████▏     | 328/782 [00:01<00:02, 209.92it/s]



Epoch [638/700]:  50%|█████     | 392/782 [00:01<00:01, 207.35it/s]



Epoch [638/700]:  56%|█████▌    | 435/782 [00:02<00:01, 207.17it/s]



Epoch [638/700]:  61%|██████    | 478/782 [00:02<00:01, 209.17it/s]



Epoch [638/700]:  66%|██████▋   | 520/782 [00:02<00:01, 208.80it/s]



Epoch [638/700]:  75%|███████▌  | 587/782 [00:02<00:00, 213.95it/s]



Epoch [638/700]:  86%|████████▋ | 675/782 [00:03<00:00, 212.65it/s]



Epoch [638/700]:  92%|█████████▏| 719/782 [00:03<00:00, 209.95it/s]



Epoch [638/700]:  98%|█████████▊| 763/782 [00:03<00:00, 209.52it/s]



Epoch [638/700]: 100%|██████████| 782/782 [00:03<00:00, 209.77it/s]


Learning Rate: 0.004050
Train Loss: 0.0362, Accuracy: 98.87%, Confidence: 0.9857
Test Loss: 2.5664, Accuracy: 71.84%, Confidence: 0.9485
Train-Test Accuracy Gap: 27.03%


Epoch [639/700]:   3%|▎         | 21/782 [00:00<00:03, 204.81it/s]



Epoch [639/700]:  11%|█         | 84/782 [00:00<00:03, 200.70it/s]



Epoch [639/700]:  16%|█▋        | 128/782 [00:00<00:03, 209.35it/s]



Epoch [639/700]:  22%|██▏       | 170/782 [00:00<00:02, 208.77it/s]



Epoch [639/700]:  27%|██▋       | 212/782 [00:01<00:02, 198.34it/s]



Epoch [639/700]:  35%|███▌      | 275/782 [00:01<00:02, 206.16it/s]



Epoch [639/700]:  41%|████      | 319/782 [00:01<00:02, 209.49it/s]



Epoch [639/700]:  46%|████▋     | 362/782 [00:01<00:02, 208.42it/s]



Epoch [639/700]:  55%|█████▍    | 428/782 [00:02<00:01, 209.29it/s]



Epoch [639/700]:  60%|██████    | 470/782 [00:02<00:01, 208.42it/s]



Epoch [639/700]:  66%|██████▌   | 514/782 [00:02<00:01, 209.98it/s]



Epoch [639/700]:  71%|███████   | 557/782 [00:02<00:01, 209.21it/s]



Epoch [639/700]:  77%|███████▋  | 599/782 [00:02<00:00, 205.72it/s]



Epoch [639/700]:  82%|████████▏ | 641/782 [00:03<00:00, 204.73it/s]



Epoch [639/700]:  90%|█████████ | 706/782 [00:03<00:00, 209.85it/s]



Epoch [639/700]:  96%|█████████▌| 749/782 [00:03<00:00, 210.46it/s]



Epoch [639/700]: 100%|██████████| 782/782 [00:03<00:00, 207.35it/s]


Learning Rate: 0.004050
Train Loss: 0.0390, Accuracy: 98.68%, Confidence: 0.9844
Test Loss: 2.3472, Accuracy: 73.22%, Confidence: 0.9483
Train-Test Accuracy Gap: 25.46%


Epoch [640/700]:   3%|▎         | 22/782 [00:00<00:03, 213.46it/s]



Epoch [640/700]:   6%|▌         | 44/782 [00:00<00:03, 215.41it/s]



Epoch [640/700]:   8%|▊         | 66/782 [00:00<00:03, 216.20it/s]



Epoch [640/700]:  11%|█▏        | 88/782 [00:00<00:03, 215.66it/s]



Epoch [640/700]:  14%|█▍        | 110/782 [00:00<00:03, 214.61it/s]



Epoch [640/700]:  17%|█▋        | 132/782 [00:00<00:03, 214.58it/s]



Epoch [640/700]:  20%|█▉        | 154/782 [00:00<00:02, 214.04it/s]



Epoch [640/700]:  23%|██▎       | 176/782 [00:00<00:02, 211.66it/s]



Epoch [640/700]:  25%|██▌       | 198/782 [00:00<00:02, 207.23it/s]



Epoch [640/700]:  28%|██▊       | 219/782 [00:01<00:02, 204.21it/s]



Epoch [640/700]:  31%|███       | 240/782 [00:01<00:02, 201.41it/s]



Epoch [640/700]:  33%|███▎      | 261/782 [00:01<00:02, 200.91it/s]



Epoch [640/700]:  36%|███▌      | 283/782 [00:01<00:02, 206.22it/s]



Epoch [640/700]:  39%|███▉      | 305/782 [00:01<00:02, 207.80it/s]



Epoch [640/700]:  42%|████▏     | 327/782 [00:01<00:02, 210.83it/s]



Epoch [640/700]:  45%|████▍     | 349/782 [00:01<00:02, 207.50it/s]



Epoch [640/700]:  50%|█████     | 393/782 [00:01<00:01, 209.43it/s]



Epoch [640/700]:  53%|█████▎    | 415/782 [00:01<00:01, 210.07it/s]



Epoch [640/700]:  56%|█████▌    | 437/782 [00:02<00:01, 209.08it/s]



Epoch [640/700]:  59%|█████▊    | 458/782 [00:02<00:01, 208.67it/s]



Epoch [640/700]:  61%|██████▏   | 480/782 [00:02<00:01, 211.94it/s]



Epoch [640/700]:  64%|██████▍   | 502/782 [00:02<00:01, 211.38it/s]



Epoch [640/700]:  67%|██████▋   | 524/782 [00:02<00:01, 211.61it/s]



Epoch [640/700]:  70%|██████▉   | 546/782 [00:02<00:01, 210.07it/s]



Epoch [640/700]:  73%|███████▎  | 568/782 [00:02<00:01, 208.78it/s]



Epoch [640/700]:  75%|███████▌  | 589/782 [00:02<00:00, 207.08it/s]



Epoch [640/700]:  78%|███████▊  | 610/782 [00:02<00:00, 203.93it/s]



Epoch [640/700]:  81%|████████  | 631/782 [00:03<00:00, 204.77it/s]



Epoch [640/700]:  83%|████████▎ | 652/782 [00:03<00:00, 204.70it/s]



Epoch [640/700]:  86%|████████▌ | 674/782 [00:03<00:00, 207.11it/s]



Epoch [640/700]:  89%|████████▉ | 696/782 [00:03<00:00, 208.15it/s]



Epoch [640/700]:  92%|█████████▏| 717/782 [00:03<00:00, 208.69it/s]



Epoch [640/700]:  95%|█████████▍| 739/782 [00:03<00:00, 210.30it/s]



Epoch [640/700]:  97%|█████████▋| 761/782 [00:03<00:00, 210.67it/s]



Epoch [640/700]: 100%|██████████| 782/782 [00:03<00:00, 209.16it/s]


Learning Rate: 0.004050
Train Loss: 0.0460, Accuracy: 98.47%, Confidence: 0.9836
Test Loss: 2.5521, Accuracy: 72.47%, Confidence: 0.9496
Train-Test Accuracy Gap: 26.00%


Epoch [641/700]:   3%|▎         | 21/782 [00:00<00:03, 208.17it/s]



Epoch [641/700]:   5%|▌         | 43/782 [00:00<00:03, 210.67it/s]



Epoch [641/700]:   8%|▊         | 65/782 [00:00<00:03, 213.07it/s]



Epoch [641/700]:  11%|█         | 87/782 [00:00<00:03, 211.66it/s]



Epoch [641/700]:  17%|█▋        | 131/782 [00:00<00:03, 212.94it/s]



Epoch [641/700]:  28%|██▊       | 219/782 [00:01<00:02, 213.59it/s]



Epoch [641/700]:  34%|███▎      | 263/782 [00:01<00:02, 205.29it/s]



Epoch [641/700]:  39%|███▉      | 304/782 [00:01<00:02, 194.30it/s]



Epoch [641/700]:  47%|████▋     | 364/782 [00:01<00:02, 190.59it/s]



Epoch [641/700]:  52%|█████▏    | 404/782 [00:02<00:01, 191.31it/s]



Epoch [641/700]:  57%|█████▋    | 444/782 [00:02<00:01, 192.91it/s]



Epoch [641/700]:  64%|██████▍   | 504/782 [00:02<00:01, 190.50it/s]



Epoch [641/700]:  70%|██████▉   | 544/782 [00:02<00:01, 193.21it/s]



Epoch [641/700]:  75%|███████▍  | 584/782 [00:02<00:01, 193.57it/s]



Epoch [641/700]:  80%|███████▉  | 623/782 [00:03<00:00, 179.36it/s]



Epoch [641/700]:  84%|████████▍ | 660/782 [00:03<00:00, 171.24it/s]



Epoch [641/700]:  89%|████████▉ | 696/782 [00:03<00:00, 170.11it/s]



Epoch [641/700]:  96%|█████████▌| 748/782 [00:03<00:00, 162.86it/s]



Epoch [641/700]: 100%|██████████| 782/782 [00:04<00:00, 188.24it/s]






Learning Rate: 0.004050
Train Loss: 0.0366, Accuracy: 98.79%, Confidence: 0.9849
Test Loss: 2.7697, Accuracy: 70.94%, Confidence: 0.9493
Train-Test Accuracy Gap: 27.85%


Epoch [642/700]:   2%|▏         | 16/782 [00:00<00:04, 156.20it/s]



Epoch [642/700]:   4%|▍         | 33/782 [00:00<00:04, 158.78it/s]



Epoch [642/700]:   6%|▋         | 50/782 [00:00<00:04, 163.64it/s]



Epoch [642/700]:   9%|▊         | 67/782 [00:00<00:04, 162.19it/s]



Epoch [642/700]:  11%|█         | 84/782 [00:00<00:04, 160.63it/s]



Epoch [642/700]:  13%|█▎        | 101/782 [00:00<00:04, 161.76it/s]



Epoch [642/700]:  15%|█▌        | 118/782 [00:00<00:04, 161.12it/s]



Epoch [642/700]:  17%|█▋        | 135/782 [00:00<00:04, 161.50it/s]



Epoch [642/700]:  20%|█▉        | 153/782 [00:00<00:03, 165.37it/s]



Epoch [642/700]:  22%|██▏       | 171/782 [00:01<00:03, 168.71it/s]



Epoch [642/700]:  27%|██▋       | 208/782 [00:01<00:03, 173.93it/s]



Epoch [642/700]:  29%|██▉       | 226/782 [00:01<00:03, 175.06it/s]



Epoch [642/700]:  31%|███▏      | 245/782 [00:01<00:03, 176.78it/s]



Epoch [642/700]:  38%|███▊      | 299/782 [00:01<00:02, 174.52it/s]



Epoch [642/700]:  43%|████▎     | 335/782 [00:01<00:02, 174.15it/s]



Epoch [642/700]:  48%|████▊     | 372/782 [00:02<00:02, 176.64it/s]



Epoch [642/700]:  52%|█████▏    | 409/782 [00:02<00:02, 177.29it/s]



Epoch [642/700]:  58%|█████▊    | 451/782 [00:02<00:01, 191.05it/s]



Epoch [642/700]:  66%|██████▌   | 515/782 [00:02<00:01, 203.08it/s]



Epoch [642/700]:  71%|███████▏  | 559/782 [00:03<00:01, 208.19it/s]



Epoch [642/700]:  77%|███████▋  | 602/782 [00:03<00:00, 208.89it/s]



Epoch [642/700]:  85%|████████▌ | 667/782 [00:03<00:00, 210.59it/s]



Epoch [642/700]:  91%|█████████ | 711/782 [00:03<00:00, 210.12it/s]



Epoch [642/700]:  97%|█████████▋| 755/782 [00:04<00:00, 210.60it/s]



Epoch [642/700]: 100%|██████████| 782/782 [00:04<00:00, 187.35it/s]


Learning Rate: 0.004050
Train Loss: 0.0399, Accuracy: 98.66%, Confidence: 0.9848
Test Loss: 2.4384, Accuracy: 72.37%, Confidence: 0.9484
Train-Test Accuracy Gap: 26.29%


Epoch [643/700]:   3%|▎         | 21/782 [00:00<00:03, 206.70it/s]



Epoch [643/700]:   5%|▌         | 43/782 [00:00<00:03, 212.75it/s]



Epoch [643/700]:   8%|▊         | 65/782 [00:00<00:03, 212.31it/s]



Epoch [643/700]:  11%|█         | 87/782 [00:00<00:03, 213.19it/s]



Epoch [643/700]:  17%|█▋        | 131/782 [00:00<00:03, 212.66it/s]



Epoch [643/700]:  20%|█▉        | 153/782 [00:00<00:02, 210.12it/s]



Epoch [643/700]:  22%|██▏       | 175/782 [00:00<00:02, 207.10it/s]



Epoch [643/700]:  25%|██▌       | 196/782 [00:00<00:02, 205.31it/s]



Epoch [643/700]:  28%|██▊       | 218/782 [00:01<00:02, 206.90it/s]



Epoch [643/700]:  31%|███       | 239/782 [00:01<00:02, 204.91it/s]



Epoch [643/700]:  33%|███▎      | 260/782 [00:01<00:02, 204.88it/s]



Epoch [643/700]:  36%|███▌      | 282/782 [00:01<00:02, 207.05it/s]



Epoch [643/700]:  39%|███▊      | 303/782 [00:01<00:02, 207.30it/s]



Epoch [643/700]:  41%|████▏     | 324/782 [00:01<00:02, 205.96it/s]



Epoch [643/700]:  52%|█████▏    | 409/782 [00:01<00:01, 208.23it/s]



Epoch [643/700]:  61%|██████    | 475/782 [00:02<00:01, 211.87it/s]



Epoch [643/700]:  66%|██████▋   | 519/782 [00:02<00:01, 213.05it/s]



Epoch [643/700]:  72%|███████▏  | 563/782 [00:02<00:01, 215.51it/s]



Epoch [643/700]:  78%|███████▊  | 607/782 [00:02<00:00, 215.13it/s]



Epoch [643/700]:  83%|████████▎ | 651/782 [00:03<00:00, 213.58it/s]



Epoch [643/700]:  89%|████████▉ | 695/782 [00:03<00:00, 212.27it/s]



Epoch [643/700]:  95%|█████████▍| 739/782 [00:03<00:00, 212.39it/s]



Epoch [643/700]: 100%|██████████| 782/782 [00:03<00:00, 210.40it/s]


Learning Rate: 0.004050
Train Loss: 0.0381, Accuracy: 98.73%, Confidence: 0.9849
Test Loss: 2.3445, Accuracy: 72.99%, Confidence: 0.9488
Train-Test Accuracy Gap: 25.74%


Epoch [644/700]:   3%|▎         | 21/782 [00:00<00:03, 209.25it/s]



Epoch [644/700]:   5%|▌         | 42/782 [00:00<00:03, 209.15it/s]



Epoch [644/700]:  11%|█         | 86/782 [00:00<00:03, 213.99it/s]



Epoch [644/700]:  17%|█▋        | 130/782 [00:00<00:03, 206.38it/s]



Epoch [644/700]:  22%|██▏       | 173/782 [00:00<00:02, 205.85it/s]



Epoch [644/700]:  27%|██▋       | 215/782 [00:01<00:02, 203.65it/s]



Epoch [644/700]:  33%|███▎      | 258/782 [00:01<00:02, 206.72it/s]



Epoch [644/700]:  39%|███▊      | 302/782 [00:01<00:02, 209.27it/s]



Epoch [644/700]:  47%|████▋     | 365/782 [00:01<00:02, 206.29it/s]



Epoch [644/700]:  52%|█████▏    | 407/782 [00:01<00:01, 205.56it/s]



Epoch [644/700]:  58%|█████▊    | 450/782 [00:02<00:01, 207.60it/s]



Epoch [644/700]:  63%|██████▎   | 493/782 [00:02<00:01, 209.97it/s]



Epoch [644/700]:  68%|██████▊   | 535/782 [00:02<00:01, 207.13it/s]



Epoch [644/700]:  74%|███████▍  | 578/782 [00:02<00:00, 205.70it/s]



Epoch [644/700]:  79%|███████▉  | 620/782 [00:02<00:00, 204.42it/s]



Epoch [644/700]:  85%|████████▍ | 662/782 [00:03<00:00, 205.71it/s]



Epoch [644/700]:  93%|█████████▎| 728/782 [00:03<00:00, 210.03it/s]



Epoch [644/700]: 100%|██████████| 782/782 [00:03<00:00, 207.49it/s]






Learning Rate: 0.004050
Train Loss: 0.0431, Accuracy: 98.50%, Confidence: 0.9839
Test Loss: 2.3375, Accuracy: 73.55%, Confidence: 0.9502
Train-Test Accuracy Gap: 24.95%


Epoch [645/700]:   3%|▎         | 21/782 [00:00<00:03, 203.30it/s]



Epoch [645/700]:   5%|▌         | 42/782 [00:00<00:03, 203.37it/s]



Epoch [645/700]:   8%|▊         | 63/782 [00:00<00:03, 202.23it/s]



Epoch [645/700]:  16%|█▋        | 128/782 [00:00<00:03, 210.73it/s]



Epoch [645/700]:  22%|██▏       | 172/782 [00:00<00:02, 210.49it/s]



Epoch [645/700]:  27%|██▋       | 215/782 [00:01<00:02, 208.21it/s]



Epoch [645/700]:  33%|███▎      | 258/782 [00:01<00:02, 209.02it/s]



Epoch [645/700]:  39%|███▊      | 302/782 [00:01<00:02, 211.62it/s]



Epoch [645/700]:  44%|████▍     | 346/782 [00:01<00:02, 211.69it/s]



Epoch [645/700]:  53%|█████▎    | 412/782 [00:01<00:01, 211.76it/s]



Epoch [645/700]:  58%|█████▊    | 456/782 [00:02<00:01, 210.06it/s]



Epoch [645/700]:  64%|██████▍   | 500/782 [00:02<00:01, 210.79it/s]



Epoch [645/700]:  69%|██████▉   | 543/782 [00:02<00:01, 209.36it/s]



Epoch [645/700]:  75%|███████▍  | 585/782 [00:02<00:00, 207.72it/s]



Epoch [645/700]:  80%|████████  | 627/782 [00:02<00:00, 207.21it/s]



Epoch [645/700]:  86%|████████▌ | 670/782 [00:03<00:00, 209.11it/s]



Epoch [645/700]:  91%|█████████▏| 714/782 [00:03<00:00, 212.51it/s]



Epoch [645/700]:  97%|█████████▋| 758/782 [00:03<00:00, 211.03it/s]



Epoch [645/700]: 100%|██████████| 782/782 [00:03<00:00, 209.02it/s]


Learning Rate: 0.004050
Train Loss: 0.0403, Accuracy: 98.61%, Confidence: 0.9841
Test Loss: 2.4020, Accuracy: 72.54%, Confidence: 0.9506
Train-Test Accuracy Gap: 26.07%


Epoch [646/700]:   3%|▎         | 21/782 [00:00<00:03, 205.05it/s]



Epoch [646/700]:   5%|▌         | 43/782 [00:00<00:03, 208.92it/s]



Epoch [646/700]:   8%|▊         | 64/782 [00:00<00:03, 208.27it/s]



Epoch [646/700]:  11%|█         | 86/782 [00:00<00:03, 210.75it/s]



Epoch [646/700]:  14%|█▍        | 108/782 [00:00<00:03, 208.83it/s]



Epoch [646/700]:  17%|█▋        | 130/782 [00:00<00:03, 209.69it/s]



Epoch [646/700]:  19%|█▉        | 151/782 [00:00<00:03, 208.82it/s]



Epoch [646/700]:  22%|██▏       | 172/782 [00:00<00:02, 208.40it/s]



Epoch [646/700]:  27%|██▋       | 215/782 [00:01<00:02, 208.38it/s]



Epoch [646/700]:  33%|███▎      | 259/782 [00:01<00:02, 211.39it/s]



Epoch [646/700]:  42%|████▏     | 325/782 [00:01<00:02, 212.39it/s]



Epoch [646/700]:  47%|████▋     | 369/782 [00:01<00:01, 211.31it/s]



Epoch [646/700]:  53%|█████▎    | 412/782 [00:01<00:01, 207.62it/s]



Epoch [646/700]:  58%|█████▊    | 454/782 [00:02<00:01, 204.83it/s]



Epoch [646/700]:  64%|██████▎   | 497/782 [00:02<00:01, 204.63it/s]



Epoch [646/700]:  69%|██████▉   | 539/782 [00:02<00:01, 205.77it/s]



Epoch [646/700]:  74%|███████▍  | 581/782 [00:02<00:00, 205.15it/s]



Epoch [646/700]:  80%|███████▉  | 624/782 [00:02<00:00, 207.59it/s]



Epoch [646/700]:  85%|████████▌ | 667/782 [00:03<00:00, 208.01it/s]



Epoch [646/700]:  94%|█████████▎| 732/782 [00:03<00:00, 211.27it/s]



Epoch [646/700]:  99%|█████████▉| 776/782 [00:03<00:00, 209.83it/s]



Epoch [646/700]: 100%|██████████| 782/782 [00:03<00:00, 208.36it/s]


Learning Rate: 0.004050
Train Loss: 0.0498, Accuracy: 98.37%, Confidence: 0.9838
Test Loss: 2.4036, Accuracy: 72.54%, Confidence: 0.9484
Train-Test Accuracy Gap: 25.83%


Epoch [647/700]:   3%|▎         | 22/782 [00:00<00:03, 212.03it/s]



Epoch [647/700]:  11%|█▏        | 88/782 [00:00<00:03, 212.90it/s]



Epoch [647/700]:  17%|█▋        | 132/782 [00:00<00:03, 209.57it/s]



Epoch [647/700]:  22%|██▏       | 175/782 [00:00<00:02, 209.55it/s]



Epoch [647/700]:  28%|██▊       | 217/782 [00:01<00:02, 206.07it/s]



Epoch [647/700]:  33%|███▎      | 260/782 [00:01<00:02, 207.48it/s]



Epoch [647/700]:  42%|████▏     | 326/782 [00:01<00:02, 212.51it/s]



Epoch [647/700]:  47%|████▋     | 370/782 [00:01<00:01, 212.93it/s]



Epoch [647/700]:  56%|█████▌    | 436/782 [00:02<00:01, 212.77it/s]



Epoch [647/700]:  61%|██████▏   | 480/782 [00:02<00:01, 208.77it/s]



Epoch [647/700]:  67%|██████▋   | 522/782 [00:02<00:01, 202.54it/s]



Epoch [647/700]:  72%|███████▏  | 565/782 [00:02<00:01, 206.17it/s]



Epoch [647/700]:  80%|████████  | 629/782 [00:03<00:00, 205.92it/s]



Epoch [647/700]:  86%|████████▌ | 671/782 [00:03<00:00, 206.52it/s]



Epoch [647/700]:  91%|█████████▏| 714/782 [00:03<00:00, 208.52it/s]



Epoch [647/700]:  97%|█████████▋| 758/782 [00:03<00:00, 212.04it/s]



Epoch [647/700]: 100%|██████████| 782/782 [00:03<00:00, 208.75it/s]


Learning Rate: 0.004050
Train Loss: 0.0333, Accuracy: 98.86%, Confidence: 0.9858
Test Loss: 2.6961, Accuracy: 71.25%, Confidence: 0.9493
Train-Test Accuracy Gap: 27.61%


Epoch [648/700]:   3%|▎         | 21/782 [00:00<00:03, 200.16it/s]



Epoch [648/700]:   8%|▊         | 63/782 [00:00<00:03, 200.54it/s]



Epoch [648/700]:  16%|█▌        | 127/782 [00:00<00:03, 206.30it/s]



Epoch [648/700]:  22%|██▏       | 169/782 [00:00<00:03, 203.00it/s]



Epoch [648/700]:  27%|██▋       | 213/782 [00:01<00:02, 207.82it/s]



Epoch [648/700]:  33%|███▎      | 256/782 [00:01<00:02, 208.66it/s]



Epoch [648/700]:  38%|███▊      | 298/782 [00:01<00:02, 208.22it/s]



Epoch [648/700]:  44%|████▎     | 341/782 [00:01<00:02, 209.23it/s]



Epoch [648/700]:  52%|█████▏    | 407/782 [00:01<00:01, 210.03it/s]



Epoch [648/700]:  58%|█████▊    | 451/782 [00:02<00:01, 210.07it/s]



Epoch [648/700]:  63%|██████▎   | 494/782 [00:02<00:01, 207.75it/s]



Epoch [648/700]:  69%|██████▉   | 538/782 [00:02<00:01, 209.33it/s]



Epoch [648/700]:  77%|███████▋  | 604/782 [00:02<00:00, 209.23it/s]



Epoch [648/700]:  83%|████████▎ | 648/782 [00:03<00:00, 209.85it/s]



Epoch [648/700]:  88%|████████▊ | 690/782 [00:03<00:00, 207.77it/s]



Epoch [648/700]:  94%|█████████▎| 733/782 [00:03<00:00, 207.49it/s]



Epoch [648/700]: 100%|██████████| 782/782 [00:03<00:00, 207.48it/s]






Learning Rate: 0.004050
Train Loss: 0.0471, Accuracy: 98.48%, Confidence: 0.9843
Test Loss: 2.3482, Accuracy: 72.09%, Confidence: 0.9469
Train-Test Accuracy Gap: 26.39%


Epoch [649/700]:   3%|▎         | 22/782 [00:00<00:03, 211.90it/s]



Epoch [649/700]:  11%|█▏        | 88/782 [00:00<00:03, 214.96it/s]



Epoch [649/700]:  20%|█▉        | 154/782 [00:00<00:02, 209.60it/s]



Epoch [649/700]:  25%|██▌       | 196/782 [00:00<00:02, 208.07it/s]



Epoch [649/700]:  31%|███       | 239/782 [00:01<00:02, 210.69it/s]



Epoch [649/700]:  36%|███▌      | 283/782 [00:01<00:02, 208.15it/s]



Epoch [649/700]:  45%|████▍     | 348/782 [00:01<00:02, 205.53it/s]



Epoch [649/700]:  50%|█████     | 391/782 [00:01<00:01, 207.86it/s]



Epoch [649/700]:  56%|█████▌    | 435/782 [00:02<00:01, 212.35it/s]



Epoch [649/700]:  61%|██████▏   | 479/782 [00:02<00:01, 208.82it/s]



Epoch [649/700]:  67%|██████▋   | 523/782 [00:02<00:01, 211.00it/s]



Epoch [649/700]:  73%|███████▎  | 567/782 [00:02<00:01, 210.63it/s]



Epoch [649/700]:  78%|███████▊  | 610/782 [00:02<00:00, 204.95it/s]



Epoch [649/700]:  86%|████████▋ | 676/782 [00:03<00:00, 209.68it/s]



Epoch [649/700]:  92%|█████████▏| 719/782 [00:03<00:00, 208.90it/s]



Epoch [649/700]:  97%|█████████▋| 762/782 [00:03<00:00, 210.04it/s]



Epoch [649/700]: 100%|██████████| 782/782 [00:03<00:00, 208.93it/s]


Learning Rate: 0.004050
Train Loss: 0.0344, Accuracy: 98.85%, Confidence: 0.9858
Test Loss: 2.6423, Accuracy: 70.85%, Confidence: 0.9478
Train-Test Accuracy Gap: 28.00%


Epoch [650/700]:   3%|▎         | 22/782 [00:00<00:03, 213.24it/s]



Epoch [650/700]:  11%|█▏        | 89/782 [00:00<00:03, 214.77it/s]



Epoch [650/700]:  20%|█▉        | 155/782 [00:00<00:02, 215.69it/s]



Epoch [650/700]:  28%|██▊       | 221/782 [00:01<00:02, 215.73it/s]



Epoch [650/700]:  34%|███▍      | 265/782 [00:01<00:02, 214.47it/s]



Epoch [650/700]:  40%|███▉      | 309/782 [00:01<00:02, 215.22it/s]



Epoch [650/700]:  45%|████▌     | 353/782 [00:01<00:02, 213.63it/s]



Epoch [650/700]:  51%|█████     | 397/782 [00:01<00:01, 211.00it/s]



Epoch [650/700]:  56%|█████▋    | 441/782 [00:02<00:01, 213.27it/s]



Epoch [650/700]:  62%|██████▏   | 485/782 [00:02<00:01, 209.07it/s]



Epoch [650/700]:  68%|██████▊   | 528/782 [00:02<00:01, 208.00it/s]



Epoch [650/700]:  73%|███████▎  | 570/782 [00:02<00:01, 206.87it/s]



Epoch [650/700]:  78%|███████▊  | 613/782 [00:02<00:00, 207.09it/s]



Epoch [650/700]:  84%|████████▍ | 655/782 [00:03<00:00, 202.72it/s]



Epoch [650/700]:  92%|█████████▏| 719/782 [00:03<00:00, 205.82it/s]



Epoch [650/700]:  98%|█████████▊| 763/782 [00:03<00:00, 208.91it/s]



Epoch [650/700]: 100%|██████████| 782/782 [00:03<00:00, 210.18it/s]


Learning Rate: 0.004050
Train Loss: 0.0383, Accuracy: 98.72%, Confidence: 0.9848
Test Loss: 2.3844, Accuracy: 72.63%, Confidence: 0.9471
Train-Test Accuracy Gap: 26.09%


Epoch [651/700]:   3%|▎         | 20/782 [00:00<00:03, 194.98it/s]



Epoch [651/700]:   5%|▌         | 41/782 [00:00<00:03, 197.43it/s]



Epoch [651/700]:   8%|▊         | 61/782 [00:00<00:03, 194.93it/s]



Epoch [651/700]:  10%|█         | 82/782 [00:00<00:03, 199.08it/s]



Epoch [651/700]:  16%|█▌        | 126/782 [00:00<00:03, 206.54it/s]



Epoch [651/700]:  21%|██▏       | 168/782 [00:00<00:02, 205.81it/s]



Epoch [651/700]:  27%|██▋       | 210/782 [00:01<00:02, 207.47it/s]



Epoch [651/700]:  30%|██▉       | 231/782 [00:01<00:02, 207.23it/s]



Epoch [651/700]:  32%|███▏      | 253/782 [00:01<00:02, 209.12it/s]



Epoch [651/700]:  35%|███▌      | 275/782 [00:01<00:02, 211.01it/s]



Epoch [651/700]:  38%|███▊      | 297/782 [00:01<00:02, 208.85it/s]



Epoch [651/700]:  41%|████      | 318/782 [00:01<00:02, 206.80it/s]



Epoch [651/700]:  43%|████▎     | 340/782 [00:01<00:02, 209.89it/s]



Epoch [651/700]:  46%|████▋     | 362/782 [00:01<00:01, 211.88it/s]



Epoch [651/700]:  49%|████▉     | 384/782 [00:01<00:01, 211.17it/s]



Epoch [651/700]:  55%|█████▍    | 428/782 [00:02<00:01, 211.78it/s]



Epoch [651/700]:  58%|█████▊    | 450/782 [00:02<00:01, 211.06it/s]



Epoch [651/700]:  60%|██████    | 472/782 [00:02<00:01, 212.09it/s]



Epoch [651/700]:  66%|██████▌   | 516/782 [00:02<00:01, 213.81it/s]



Epoch [651/700]:  72%|███████▏  | 560/782 [00:02<00:01, 207.61it/s]



Epoch [651/700]:  77%|███████▋  | 604/782 [00:02<00:00, 210.92it/s]



Epoch [651/700]:  83%|████████▎ | 648/782 [00:03<00:00, 207.28it/s]



Epoch [651/700]:  88%|████████▊ | 691/782 [00:03<00:00, 206.74it/s]



Epoch [651/700]:  97%|█████████▋| 757/782 [00:03<00:00, 210.23it/s]



Epoch [651/700]: 100%|██████████| 782/782 [00:03<00:00, 208.49it/s]


Learning Rate: 0.004050
Train Loss: 0.0401, Accuracy: 98.59%, Confidence: 0.9848
Test Loss: 2.4509, Accuracy: 72.76%, Confidence: 0.9513
Train-Test Accuracy Gap: 25.83%


Epoch [652/700]:   3%|▎         | 22/782 [00:00<00:03, 214.45it/s]



Epoch [652/700]:   8%|▊         | 66/782 [00:00<00:03, 210.82it/s]



Epoch [652/700]:  11%|█▏        | 88/782 [00:00<00:03, 203.51it/s]



Epoch [652/700]:  14%|█▍        | 109/782 [00:00<00:03, 201.30it/s]



Epoch [652/700]:  20%|█▉        | 153/782 [00:00<00:03, 206.19it/s]



Epoch [652/700]:  22%|██▏       | 174/782 [00:00<00:02, 204.38it/s]



Epoch [652/700]:  31%|███       | 240/782 [00:01<00:02, 210.54it/s]



Epoch [652/700]:  36%|███▋      | 284/782 [00:01<00:02, 210.42it/s]



Epoch [652/700]:  42%|████▏     | 328/782 [00:01<00:02, 210.84it/s]



Epoch [652/700]:  48%|████▊     | 372/782 [00:01<00:01, 214.07it/s]



Epoch [652/700]:  53%|█████▎    | 416/782 [00:01<00:01, 211.94it/s]



Epoch [652/700]:  59%|█████▉    | 460/782 [00:02<00:01, 213.47it/s]



Epoch [652/700]:  64%|██████▍   | 504/782 [00:02<00:01, 213.36it/s]



Epoch [652/700]:  70%|██████▉   | 547/782 [00:02<00:01, 207.92it/s]



Epoch [652/700]:  78%|███████▊  | 611/782 [00:02<00:00, 207.22it/s]



Epoch [652/700]:  84%|████████▎ | 653/782 [00:03<00:00, 207.14it/s]



Epoch [652/700]:  89%|████████▉ | 697/782 [00:03<00:00, 208.46it/s]



Epoch [652/700]:  95%|█████████▍| 741/782 [00:03<00:00, 208.30it/s]



Epoch [652/700]: 100%|██████████| 782/782 [00:03<00:00, 209.05it/s]


Learning Rate: 0.004050
Train Loss: 0.0444, Accuracy: 98.54%, Confidence: 0.9838
Test Loss: 2.5241, Accuracy: 72.06%, Confidence: 0.9474
Train-Test Accuracy Gap: 26.48%


Epoch [653/700]:   3%|▎         | 21/782 [00:00<00:03, 208.47it/s]



Epoch [653/700]:   5%|▌         | 43/782 [00:00<00:03, 209.59it/s]



Epoch [653/700]:   8%|▊         | 64/782 [00:00<00:03, 206.87it/s]



Epoch [653/700]:  11%|█         | 86/782 [00:00<00:03, 209.99it/s]



Epoch [653/700]:  16%|█▋        | 128/782 [00:00<00:03, 205.80it/s]



Epoch [653/700]:  22%|██▏       | 170/782 [00:00<00:03, 203.87it/s]



Epoch [653/700]:  25%|██▍       | 192/782 [00:00<00:02, 207.07it/s]



Epoch [653/700]:  27%|██▋       | 214/782 [00:01<00:02, 208.96it/s]



Epoch [653/700]:  30%|███       | 236/782 [00:01<00:02, 209.78it/s]



Epoch [653/700]:  33%|███▎      | 258/782 [00:01<00:02, 210.29it/s]



Epoch [653/700]:  39%|███▊      | 302/782 [00:01<00:02, 213.30it/s]



Epoch [653/700]:  44%|████▍     | 346/782 [00:01<00:02, 208.55it/s]



Epoch [653/700]:  47%|████▋     | 367/782 [00:01<00:01, 207.93it/s]



Epoch [653/700]:  50%|████▉     | 389/782 [00:01<00:01, 209.50it/s]



Epoch [653/700]:  53%|█████▎    | 411/782 [00:01<00:01, 211.38it/s]



Epoch [653/700]:  58%|█████▊    | 455/782 [00:02<00:01, 211.75it/s]



Epoch [653/700]:  64%|██████▎   | 498/782 [00:02<00:01, 206.83it/s]



Epoch [653/700]:  66%|██████▋   | 520/782 [00:02<00:01, 209.03it/s]



Epoch [653/700]:  69%|██████▉   | 542/782 [00:02<00:01, 210.38it/s]



Epoch [653/700]:  72%|███████▏  | 564/782 [00:02<00:01, 208.66it/s]



Epoch [653/700]:  75%|███████▍  | 585/782 [00:02<00:00, 208.89it/s]



Epoch [653/700]:  77%|███████▋  | 606/782 [00:02<00:00, 205.99it/s]



Epoch [653/700]:  80%|████████  | 627/782 [00:03<00:00, 204.26it/s]



Epoch [653/700]:  83%|████████▎ | 648/782 [00:03<00:00, 203.69it/s]



Epoch [653/700]:  86%|████████▌ | 669/782 [00:03<00:00, 204.23it/s]



Epoch [653/700]:  88%|████████▊ | 691/782 [00:03<00:00, 208.07it/s]



Epoch [653/700]:  94%|█████████▎| 733/782 [00:03<00:00, 204.23it/s]



Epoch [653/700]:  96%|█████████▋| 754/782 [00:03<00:00, 205.89it/s]



Epoch [653/700]: 100%|██████████| 782/782 [00:03<00:00, 207.64it/s]






Learning Rate: 0.004050
Train Loss: 0.0382, Accuracy: 98.72%, Confidence: 0.9853
Test Loss: 2.2951, Accuracy: 73.42%, Confidence: 0.9501
Train-Test Accuracy Gap: 25.30%


Epoch [654/700]:   3%|▎         | 21/782 [00:00<00:03, 200.64it/s]



Epoch [654/700]:   5%|▌         | 42/782 [00:00<00:03, 196.27it/s]



Epoch [654/700]:  11%|█         | 85/782 [00:00<00:03, 206.31it/s]



Epoch [654/700]:  16%|█▌        | 127/782 [00:00<00:03, 204.47it/s]



Epoch [654/700]:  22%|██▏       | 170/782 [00:00<00:02, 207.51it/s]



Epoch [654/700]:  27%|██▋       | 214/782 [00:01<00:02, 210.57it/s]



Epoch [654/700]:  33%|███▎      | 258/782 [00:01<00:02, 212.45it/s]



Epoch [654/700]:  39%|███▊      | 302/782 [00:01<00:02, 210.08it/s]



Epoch [654/700]:  47%|████▋     | 368/782 [00:01<00:01, 211.72it/s]



Epoch [654/700]:  55%|█████▌    | 434/782 [00:02<00:01, 214.67it/s]



Epoch [654/700]:  61%|██████    | 478/782 [00:02<00:01, 210.92it/s]



Epoch [654/700]:  67%|██████▋   | 521/782 [00:02<00:01, 209.00it/s]



Epoch [654/700]:  75%|███████▍  | 585/782 [00:02<00:00, 208.98it/s]



Epoch [654/700]:  80%|████████  | 629/782 [00:03<00:00, 209.99it/s]



Epoch [654/700]:  86%|████████▌ | 673/782 [00:03<00:00, 210.65it/s]



Epoch [654/700]:  92%|█████████▏| 717/782 [00:03<00:00, 211.37it/s]



Epoch [654/700]:  97%|█████████▋| 761/782 [00:03<00:00, 210.79it/s]



Epoch [654/700]: 100%|██████████| 782/782 [00:03<00:00, 209.57it/s]


Learning Rate: 0.004050
Train Loss: 0.0392, Accuracy: 98.70%, Confidence: 0.9847
Test Loss: 2.2331, Accuracy: 73.51%, Confidence: 0.9488
Train-Test Accuracy Gap: 25.19%


Epoch [655/700]:   3%|▎         | 21/782 [00:00<00:03, 209.85it/s]



Epoch [655/700]:  11%|█         | 87/782 [00:00<00:03, 205.61it/s]



Epoch [655/700]:  17%|█▋        | 130/782 [00:00<00:03, 209.01it/s]



Epoch [655/700]:  22%|██▏       | 175/782 [00:00<00:02, 213.48it/s]



Epoch [655/700]:  28%|██▊       | 219/782 [00:01<00:02, 211.23it/s]



Epoch [655/700]:  34%|███▎      | 262/782 [00:01<00:02, 203.45it/s]



Epoch [655/700]:  39%|███▉      | 304/782 [00:01<00:02, 204.64it/s]



Epoch [655/700]:  44%|████▍     | 347/782 [00:01<00:02, 208.46it/s]



Epoch [655/700]:  50%|████▉     | 390/782 [00:01<00:01, 206.50it/s]



Epoch [655/700]:  55%|█████▌    | 432/782 [00:02<00:01, 204.13it/s]



Epoch [655/700]:  61%|██████    | 474/782 [00:02<00:01, 206.40it/s]



Epoch [655/700]:  66%|██████▌   | 517/782 [00:02<00:01, 208.63it/s]



Epoch [655/700]:  71%|███████▏  | 559/782 [00:02<00:01, 207.08it/s]



Epoch [655/700]:  77%|███████▋  | 602/782 [00:02<00:00, 206.07it/s]



Epoch [655/700]:  82%|████████▏ | 644/782 [00:03<00:00, 206.25it/s]



Epoch [655/700]:  88%|████████▊ | 686/782 [00:03<00:00, 205.00it/s]



Epoch [655/700]:  93%|█████████▎| 729/782 [00:03<00:00, 205.60it/s]



Epoch [655/700]: 100%|██████████| 782/782 [00:03<00:00, 206.98it/s]






Learning Rate: 0.004050
Train Loss: 0.0425, Accuracy: 98.59%, Confidence: 0.9842
Test Loss: 2.4047, Accuracy: 72.94%, Confidence: 0.9509
Train-Test Accuracy Gap: 25.65%


Epoch [656/700]:   3%|▎         | 21/782 [00:00<00:03, 207.78it/s]



Epoch [656/700]:   5%|▌         | 42/782 [00:00<00:03, 199.35it/s]



Epoch [656/700]:   8%|▊         | 63/782 [00:00<00:03, 203.83it/s]



Epoch [656/700]:  11%|█         | 84/782 [00:00<00:03, 204.22it/s]



Epoch [656/700]:  13%|█▎        | 105/782 [00:00<00:03, 205.42it/s]



Epoch [656/700]:  16%|█▌        | 127/782 [00:00<00:03, 207.18it/s]



Epoch [656/700]:  22%|██▏       | 170/782 [00:00<00:02, 209.17it/s]



Epoch [656/700]:  27%|██▋       | 212/782 [00:01<00:02, 206.32it/s]



Epoch [656/700]:  32%|███▏      | 254/782 [00:01<00:02, 206.97it/s]



Epoch [656/700]:  35%|███▌      | 276/782 [00:01<00:02, 209.19it/s]



Epoch [656/700]:  38%|███▊      | 298/782 [00:01<00:02, 210.40it/s]



Epoch [656/700]:  44%|████▎     | 342/782 [00:01<00:02, 209.80it/s]



Epoch [656/700]:  52%|█████▏    | 405/782 [00:01<00:01, 204.84it/s]



Epoch [656/700]:  60%|█████▉    | 468/782 [00:02<00:01, 202.92it/s]



Epoch [656/700]:  65%|██████▌   | 511/782 [00:02<00:01, 206.18it/s]



Epoch [656/700]:  73%|███████▎  | 574/782 [00:02<00:01, 205.17it/s]



Epoch [656/700]:  82%|████████▏ | 638/782 [00:03<00:00, 204.19it/s]



Epoch [656/700]:  87%|████████▋ | 680/782 [00:03<00:00, 205.11it/s]



Epoch [656/700]:  95%|█████████▌| 744/782 [00:03<00:00, 204.34it/s]



Epoch [656/700]: 100%|██████████| 782/782 [00:03<00:00, 205.08it/s]


Learning Rate: 0.004050
Train Loss: 0.0341, Accuracy: 98.88%, Confidence: 0.9857
Test Loss: 2.4576, Accuracy: 72.48%, Confidence: 0.9489
Train-Test Accuracy Gap: 26.40%


Epoch [657/700]:   2%|▏         | 18/782 [00:00<00:04, 170.88it/s]



Epoch [657/700]:  10%|█         | 82/782 [00:00<00:03, 203.28it/s]



Epoch [657/700]:  16%|█▌        | 124/782 [00:00<00:03, 204.40it/s]



Epoch [657/700]:  21%|██        | 166/782 [00:00<00:03, 201.08it/s]



Epoch [657/700]:  27%|██▋       | 208/782 [00:01<00:02, 203.95it/s]



Epoch [657/700]:  32%|███▏      | 252/782 [00:01<00:02, 208.92it/s]



Epoch [657/700]:  38%|███▊      | 296/782 [00:01<00:02, 210.69it/s]



Epoch [657/700]:  46%|████▌     | 361/782 [00:01<00:02, 207.95it/s]



Epoch [657/700]:  54%|█████▍    | 425/782 [00:02<00:01, 209.03it/s]



Epoch [657/700]:  60%|█████▉    | 467/782 [00:02<00:01, 209.01it/s]



Epoch [657/700]:  65%|██████▌   | 510/782 [00:02<00:01, 208.29it/s]



Epoch [657/700]:  71%|███████   | 553/782 [00:02<00:01, 209.46it/s]



Epoch [657/700]:  76%|███████▌  | 596/782 [00:02<00:00, 209.77it/s]



Epoch [657/700]:  82%|████████▏ | 638/782 [00:03<00:00, 208.01it/s]



Epoch [657/700]:  87%|████████▋ | 681/782 [00:03<00:00, 210.53it/s]



Epoch [657/700]:  93%|█████████▎| 725/782 [00:03<00:00, 210.90it/s]



Epoch [657/700]: 100%|██████████| 782/782 [00:03<00:00, 207.20it/s]


Learning Rate: 0.004050
Train Loss: 0.0430, Accuracy: 98.53%, Confidence: 0.9842
Test Loss: 2.3048, Accuracy: 73.35%, Confidence: 0.9488
Train-Test Accuracy Gap: 25.18%


Epoch [658/700]:   3%|▎         | 22/782 [00:00<00:03, 209.70it/s]



Epoch [658/700]:   5%|▌         | 43/782 [00:00<00:03, 201.51it/s]



Epoch [658/700]:   8%|▊         | 65/782 [00:00<00:03, 205.75it/s]



Epoch [658/700]:  20%|█▉        | 153/782 [00:00<00:02, 214.65it/s]



Epoch [658/700]:  28%|██▊       | 219/782 [00:01<00:02, 215.01it/s]



Epoch [658/700]:  34%|███▎      | 263/782 [00:01<00:02, 212.43it/s]



Epoch [658/700]:  39%|███▉      | 307/782 [00:01<00:02, 210.61it/s]



Epoch [658/700]:  45%|████▍     | 350/782 [00:01<00:02, 207.51it/s]



Epoch [658/700]:  50%|█████     | 393/782 [00:01<00:01, 209.65it/s]



Epoch [658/700]:  56%|█████▌    | 436/782 [00:02<00:01, 211.98it/s]



Epoch [658/700]:  61%|██████▏   | 480/782 [00:02<00:01, 212.90it/s]



Epoch [658/700]:  67%|██████▋   | 524/782 [00:02<00:01, 214.67it/s]



Epoch [658/700]:  73%|███████▎  | 568/782 [00:02<00:00, 215.46it/s]



Epoch [658/700]:  78%|███████▊  | 612/782 [00:02<00:00, 214.96it/s]



Epoch [658/700]:  84%|████████▍ | 656/782 [00:03<00:00, 214.85it/s]



Epoch [658/700]:  90%|████████▉ | 700/782 [00:03<00:00, 214.49it/s]



Epoch [658/700]:  95%|█████████▌| 744/782 [00:03<00:00, 212.09it/s]



Epoch [658/700]: 100%|██████████| 782/782 [00:03<00:00, 212.09it/s]


Learning Rate: 0.004050
Train Loss: 0.0359, Accuracy: 98.84%, Confidence: 0.9856
Test Loss: 2.5222, Accuracy: 72.64%, Confidence: 0.9526
Train-Test Accuracy Gap: 26.20%


Epoch [659/700]:   3%|▎         | 22/782 [00:00<00:03, 212.72it/s]



Epoch [659/700]:  11%|█         | 87/782 [00:00<00:03, 208.89it/s]



Epoch [659/700]:  16%|█▋        | 129/782 [00:00<00:03, 205.61it/s]



Epoch [659/700]:  22%|██▏       | 171/782 [00:00<00:02, 204.46it/s]



Epoch [659/700]:  27%|██▋       | 213/782 [00:01<00:02, 203.90it/s]



Epoch [659/700]:  33%|███▎      | 256/782 [00:01<00:02, 203.65it/s]



Epoch [659/700]:  38%|███▊      | 298/782 [00:01<00:02, 200.83it/s]



Epoch [659/700]:  43%|████▎     | 340/782 [00:01<00:02, 203.69it/s]



Epoch [659/700]:  49%|████▉     | 382/782 [00:01<00:01, 205.68it/s]



Epoch [659/700]:  54%|█████▍    | 425/782 [00:02<00:01, 208.63it/s]



Epoch [659/700]:  60%|█████▉    | 467/782 [00:02<00:01, 200.27it/s]



Epoch [659/700]:  70%|███████   | 551/782 [00:02<00:01, 201.73it/s]



Epoch [659/700]:  76%|███████▌  | 593/782 [00:02<00:00, 198.06it/s]



Epoch [659/700]:  84%|████████▍ | 656/782 [00:03<00:00, 201.47it/s]



Epoch [659/700]:  89%|████████▉ | 699/782 [00:03<00:00, 205.06it/s]



Epoch [659/700]:  95%|█████████▍| 741/782 [00:03<00:00, 205.04it/s]



Epoch [659/700]: 100%|██████████| 782/782 [00:03<00:00, 203.45it/s]






Learning Rate: 0.004050
Train Loss: 0.0411, Accuracy: 98.56%, Confidence: 0.9846
Test Loss: 2.2442, Accuracy: 73.94%, Confidence: 0.9508
Train-Test Accuracy Gap: 24.62%


Epoch [660/700]:   3%|▎         | 20/782 [00:00<00:03, 198.48it/s]



Epoch [660/700]:   5%|▌         | 40/782 [00:00<00:03, 195.08it/s]



Epoch [660/700]:  10%|█         | 81/782 [00:00<00:03, 198.85it/s]



Epoch [660/700]:  16%|█▌        | 124/782 [00:00<00:03, 204.27it/s]



Epoch [660/700]:  21%|██▏       | 167/782 [00:00<00:02, 207.81it/s]



Epoch [660/700]:  30%|██▉       | 233/782 [00:01<00:02, 209.69it/s]



Epoch [660/700]:  35%|███▌      | 275/782 [00:01<00:02, 208.33it/s]



Epoch [660/700]:  41%|████      | 318/782 [00:01<00:02, 207.40it/s]



Epoch [660/700]:  46%|████▌     | 360/782 [00:01<00:02, 207.85it/s]



Epoch [660/700]:  51%|█████▏    | 402/782 [00:01<00:01, 206.80it/s]



Epoch [660/700]:  60%|█████▉    | 467/782 [00:02<00:01, 208.13it/s]



Epoch [660/700]:  65%|██████▌   | 509/782 [00:02<00:01, 208.68it/s]



Epoch [660/700]:  71%|███████   | 552/782 [00:02<00:01, 208.63it/s]



Epoch [660/700]:  76%|███████▌  | 596/782 [00:02<00:00, 210.08it/s]



Epoch [660/700]:  82%|████████▏ | 639/782 [00:03<00:00, 208.60it/s]



Epoch [660/700]:  87%|████████▋ | 683/782 [00:03<00:00, 210.44it/s]



Epoch [660/700]:  93%|█████████▎| 727/782 [00:03<00:00, 211.19it/s]



Epoch [660/700]: 100%|██████████| 782/782 [00:03<00:00, 207.75it/s]






Learning Rate: 0.004050
Train Loss: 0.0434, Accuracy: 98.60%, Confidence: 0.9851
Test Loss: 2.1724, Accuracy: 73.64%, Confidence: 0.9482
Train-Test Accuracy Gap: 24.96%


Epoch [661/700]:   3%|▎         | 21/782 [00:00<00:03, 208.94it/s]



Epoch [661/700]:   5%|▌         | 42/782 [00:00<00:03, 209.17it/s]



Epoch [661/700]:  11%|█         | 84/782 [00:00<00:03, 208.39it/s]



Epoch [661/700]:  16%|█▌        | 127/782 [00:00<00:03, 210.00it/s]



Epoch [661/700]:  22%|██▏       | 169/782 [00:00<00:02, 205.57it/s]



Epoch [661/700]:  27%|██▋       | 213/782 [00:01<00:02, 208.38it/s]



Epoch [661/700]:  33%|███▎      | 257/782 [00:01<00:02, 211.06it/s]



Epoch [661/700]:  41%|████▏     | 323/782 [00:01<00:02, 212.73it/s]



Epoch [661/700]:  47%|████▋     | 367/782 [00:01<00:01, 213.64it/s]



Epoch [661/700]:  53%|█████▎    | 411/782 [00:01<00:01, 214.17it/s]



Epoch [661/700]:  58%|█████▊    | 455/782 [00:02<00:01, 212.85it/s]



Epoch [661/700]:  64%|██████▍   | 499/782 [00:02<00:01, 211.67it/s]



Epoch [661/700]:  69%|██████▉   | 543/782 [00:02<00:01, 209.76it/s]



Epoch [661/700]:  75%|███████▌  | 587/782 [00:02<00:00, 210.26it/s]



Epoch [661/700]:  81%|████████  | 631/782 [00:02<00:00, 209.66it/s]



Epoch [661/700]:  86%|████████▌ | 674/782 [00:03<00:00, 207.71it/s]



Epoch [661/700]:  92%|█████████▏| 718/782 [00:03<00:00, 210.90it/s]



Epoch [661/700]:  97%|█████████▋| 762/782 [00:03<00:00, 210.25it/s]



Epoch [661/700]: 100%|██████████| 782/782 [00:03<00:00, 209.87it/s]


Learning Rate: 0.004050
Train Loss: 0.0415, Accuracy: 98.58%, Confidence: 0.9843
Test Loss: 2.2754, Accuracy: 73.17%, Confidence: 0.9476
Train-Test Accuracy Gap: 25.41%


Epoch [662/700]:   3%|▎         | 21/782 [00:00<00:03, 208.49it/s]



Epoch [662/700]:   5%|▌         | 43/782 [00:00<00:03, 211.61it/s]



Epoch [662/700]:  11%|█         | 87/782 [00:00<00:03, 213.34it/s]



Epoch [662/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.30it/s]



Epoch [662/700]:  22%|██▏       | 175/782 [00:00<00:02, 208.90it/s]



Epoch [662/700]:  28%|██▊       | 217/782 [00:01<00:02, 207.61it/s]



Epoch [662/700]:  33%|███▎      | 260/782 [00:01<00:02, 209.43it/s]



Epoch [662/700]:  39%|███▊      | 303/782 [00:01<00:02, 209.98it/s]



Epoch [662/700]:  47%|████▋     | 369/782 [00:01<00:01, 211.65it/s]



Epoch [662/700]:  53%|█████▎    | 413/782 [00:01<00:01, 210.99it/s]




Epoch [662/700]:  58%|█████▊    | 456/782 [00:02<00:01, 200.53it/s]



Epoch [662/700]:  66%|██████▋   | 520/782 [00:02<00:01, 206.80it/s]



Epoch [662/700]:  72%|███████▏  | 562/782 [00:02<00:01, 206.41it/s]



Epoch [662/700]:  77%|███████▋  | 604/782 [00:02<00:00, 204.50it/s]



Epoch [662/700]:  83%|████████▎ | 646/782 [00:03<00:00, 197.15it/s]



Epoch [662/700]:  88%|████████▊ | 690/782 [00:03<00:00, 204.76it/s]



Epoch [662/700]:  94%|█████████▎| 733/782 [00:03<00:00, 208.89it/s]



Epoch [662/700]: 100%|██████████| 782/782 [00:03<00:00, 207.00it/s]


Learning Rate: 0.004050
Train Loss: 0.0370, Accuracy: 98.71%, Confidence: 0.9852
Test Loss: 2.3816, Accuracy: 72.85%, Confidence: 0.9502
Train-Test Accuracy Gap: 25.86%


Epoch [663/700]:   3%|▎         | 20/782 [00:00<00:03, 195.71it/s]



Epoch [663/700]:   5%|▌         | 42/782 [00:00<00:03, 207.86it/s]



Epoch [663/700]:  11%|█         | 85/782 [00:00<00:03, 208.86it/s]



Epoch [663/700]:  16%|█▋        | 129/782 [00:00<00:03, 208.94it/s]



Epoch [663/700]:  22%|██▏       | 172/782 [00:00<00:02, 209.71it/s]



Epoch [663/700]:  27%|██▋       | 215/782 [00:01<00:02, 210.02it/s]



Epoch [663/700]:  33%|███▎      | 259/782 [00:01<00:02, 209.81it/s]



Epoch [663/700]:  41%|████▏     | 324/782 [00:01<00:02, 210.70it/s]



Epoch [663/700]:  47%|████▋     | 368/782 [00:01<00:02, 206.79it/s]



Epoch [663/700]:  53%|█████▎    | 411/782 [00:01<00:01, 209.28it/s]



Epoch [663/700]:  58%|█████▊    | 455/782 [00:02<00:01, 209.96it/s]



Epoch [663/700]:  67%|██████▋   | 521/782 [00:02<00:01, 210.98it/s]



Epoch [663/700]:  72%|███████▏  | 565/782 [00:02<00:01, 210.62it/s]



Epoch [663/700]:  78%|███████▊  | 609/782 [00:02<00:00, 211.04it/s]



Epoch [663/700]:  84%|████████▎ | 653/782 [00:03<00:00, 211.28it/s]



Epoch [663/700]:  92%|█████████▏| 719/782 [00:03<00:00, 213.11it/s]



Epoch [663/700]:  98%|█████████▊| 763/782 [00:03<00:00, 213.06it/s]



Epoch [663/700]: 100%|██████████| 782/782 [00:03<00:00, 210.16it/s]


Learning Rate: 0.004050
Train Loss: 0.0407, Accuracy: 98.65%, Confidence: 0.9849
Test Loss: 2.3986, Accuracy: 72.96%, Confidence: 0.9508
Train-Test Accuracy Gap: 25.69%


Epoch [664/700]:   3%|▎         | 20/782 [00:00<00:03, 194.20it/s]



Epoch [664/700]:  11%|█         | 85/782 [00:00<00:03, 211.32it/s]



Epoch [664/700]:  16%|█▋        | 129/782 [00:00<00:03, 213.84it/s]



Epoch [664/700]:  22%|██▏       | 173/782 [00:00<00:02, 211.03it/s]



Epoch [664/700]:  28%|██▊       | 217/782 [00:01<00:02, 209.92it/s]



Epoch [664/700]:  33%|███▎      | 261/782 [00:01<00:02, 212.01it/s]



Epoch [664/700]:  39%|███▉      | 305/782 [00:01<00:02, 211.43it/s]



Epoch [664/700]:  45%|████▍     | 349/782 [00:01<00:02, 210.53it/s]



Epoch [664/700]:  50%|█████     | 393/782 [00:01<00:01, 213.98it/s]



Epoch [664/700]:  56%|█████▌    | 437/782 [00:02<00:01, 215.03it/s]



Epoch [664/700]:  62%|██████▏   | 481/782 [00:02<00:01, 213.83it/s]



Epoch [664/700]:  70%|██████▉   | 547/782 [00:02<00:01, 211.56it/s]



Epoch [664/700]:  76%|███████▌  | 591/782 [00:02<00:00, 212.75it/s]



Epoch [664/700]:  81%|████████  | 635/782 [00:02<00:00, 213.21it/s]



Epoch [664/700]:  90%|████████▉ | 701/782 [00:03<00:00, 214.15it/s]



Epoch [664/700]:  95%|█████████▌| 745/782 [00:03<00:00, 215.26it/s]



Epoch [664/700]: 100%|██████████| 782/782 [00:03<00:00, 212.13it/s]


Learning Rate: 0.004050
Train Loss: 0.0354, Accuracy: 98.81%, Confidence: 0.9855
Test Loss: 2.2724, Accuracy: 73.73%, Confidence: 0.9521
Train-Test Accuracy Gap: 25.08%


Epoch [665/700]:   3%|▎         | 21/782 [00:00<00:03, 203.65it/s]



Epoch [665/700]:  11%|█         | 85/782 [00:00<00:03, 208.57it/s]



Epoch [665/700]:  16%|█▋        | 129/782 [00:00<00:03, 212.34it/s]



Epoch [665/700]:  22%|██▏       | 173/782 [00:00<00:02, 212.10it/s]



Epoch [665/700]:  31%|███       | 239/782 [00:01<00:02, 212.55it/s]



Epoch [665/700]:  36%|███▌      | 283/782 [00:01<00:02, 209.26it/s]



Epoch [665/700]:  42%|████▏     | 325/782 [00:01<00:02, 207.52it/s]



Epoch [665/700]:  47%|████▋     | 368/782 [00:01<00:01, 210.32it/s]



Epoch [665/700]:  53%|█████▎    | 412/782 [00:01<00:01, 211.41it/s]



Epoch [665/700]:  58%|█████▊    | 456/782 [00:02<00:01, 207.11it/s]



Epoch [665/700]:  64%|██████▎   | 498/782 [00:02<00:01, 207.78it/s]



Epoch [665/700]:  69%|██████▉   | 542/782 [00:02<00:01, 209.48it/s]



Epoch [665/700]:  75%|███████▍  | 584/782 [00:02<00:00, 206.43it/s]



Epoch [665/700]:  80%|████████  | 628/782 [00:02<00:00, 209.64it/s]



Epoch [665/700]:  88%|████████▊ | 692/782 [00:03<00:00, 209.61it/s]



Epoch [665/700]:  94%|█████████▍| 734/782 [00:03<00:00, 205.98it/s]



Epoch [665/700]: 100%|██████████| 782/782 [00:03<00:00, 208.92it/s]






Learning Rate: 0.004050
Train Loss: 0.0353, Accuracy: 98.86%, Confidence: 0.9863
Test Loss: 2.6123, Accuracy: 72.07%, Confidence: 0.9505
Train-Test Accuracy Gap: 26.79%


Epoch [666/700]:   2%|▏         | 19/782 [00:00<00:04, 189.63it/s]



Epoch [666/700]:  10%|█         | 80/782 [00:00<00:03, 198.89it/s]



Epoch [666/700]:  16%|█▌        | 122/782 [00:00<00:03, 202.95it/s]



Epoch [666/700]:  21%|██        | 165/782 [00:00<00:02, 206.46it/s]



Epoch [666/700]:  26%|██▋       | 207/782 [00:01<00:02, 206.08it/s]



Epoch [666/700]:  32%|███▏      | 249/782 [00:01<00:02, 205.75it/s]



Epoch [666/700]:  37%|███▋      | 292/782 [00:01<00:02, 209.73it/s]



Epoch [666/700]:  46%|████▌     | 358/782 [00:01<00:02, 208.66it/s]



Epoch [666/700]:  51%|█████▏    | 402/782 [00:01<00:01, 211.23it/s]



Epoch [666/700]:  57%|█████▋    | 445/782 [00:02<00:01, 204.94it/s]



Epoch [666/700]:  62%|██████▏   | 488/782 [00:02<00:01, 205.48it/s]



Epoch [666/700]:  68%|██████▊   | 530/782 [00:02<00:01, 206.72it/s]



Epoch [666/700]:  73%|███████▎  | 574/782 [00:02<00:00, 211.03it/s]



Epoch [666/700]:  79%|███████▉  | 618/782 [00:02<00:00, 213.43it/s]



Epoch [666/700]:  85%|████████▍ | 662/782 [00:03<00:00, 211.60it/s]



Epoch [666/700]:  93%|█████████▎| 727/782 [00:03<00:00, 210.23it/s]



Epoch [666/700]:  96%|█████████▌| 749/782 [00:03<00:00, 206.19it/s]



Epoch [666/700]: 100%|██████████| 782/782 [00:03<00:00, 207.01it/s]


Learning Rate: 0.004050
Train Loss: 0.0430, Accuracy: 98.54%, Confidence: 0.9846
Test Loss: 2.2830, Accuracy: 73.79%, Confidence: 0.9491
Train-Test Accuracy Gap: 24.75%


Epoch [667/700]:   3%|▎         | 22/782 [00:00<00:03, 211.47it/s]



Epoch [667/700]:   6%|▌         | 44/782 [00:00<00:03, 212.63it/s]



Epoch [667/700]:   8%|▊         | 66/782 [00:00<00:03, 213.08it/s]



Epoch [667/700]:  11%|█▏        | 88/782 [00:00<00:03, 210.68it/s]



Epoch [667/700]:  14%|█▍        | 110/782 [00:00<00:03, 212.31it/s]



Epoch [667/700]:  23%|██▎       | 177/782 [00:00<00:02, 215.02it/s]



Epoch [667/700]:  28%|██▊       | 221/782 [00:01<00:02, 214.95it/s]



Epoch [667/700]:  34%|███▍      | 264/782 [00:01<00:02, 209.09it/s]



Epoch [667/700]:  39%|███▉      | 307/782 [00:01<00:02, 209.09it/s]



Epoch [667/700]:  45%|████▍     | 350/782 [00:01<00:02, 209.12it/s]



Epoch [667/700]:  53%|█████▎    | 416/782 [00:01<00:01, 211.76it/s]



Epoch [667/700]:  59%|█████▉    | 460/782 [00:02<00:01, 212.34it/s]



Epoch [667/700]:  64%|██████▍   | 504/782 [00:02<00:01, 208.07it/s]



Epoch [667/700]:  70%|███████   | 548/782 [00:02<00:01, 209.54it/s]



Epoch [667/700]:  76%|███████▌  | 591/782 [00:02<00:00, 206.73it/s]



Epoch [667/700]:  81%|████████  | 635/782 [00:03<00:00, 211.06it/s]



Epoch [667/700]:  87%|████████▋ | 679/782 [00:03<00:00, 212.51it/s]



Epoch [667/700]:  92%|█████████▏| 722/782 [00:03<00:00, 206.13it/s]



Epoch [667/700]: 100%|██████████| 782/782 [00:03<00:00, 210.26it/s]






Learning Rate: 0.004050
Train Loss: 0.0372, Accuracy: 98.72%, Confidence: 0.9855
Test Loss: 2.3345, Accuracy: 72.81%, Confidence: 0.9483
Train-Test Accuracy Gap: 25.91%


Epoch [668/700]:   3%|▎         | 21/782 [00:00<00:03, 201.10it/s]



Epoch [668/700]:  11%|█         | 86/782 [00:00<00:03, 207.67it/s]



Epoch [668/700]:  17%|█▋        | 130/782 [00:00<00:03, 210.12it/s]



Epoch [668/700]:  22%|██▏       | 174/782 [00:00<00:02, 209.91it/s]



Epoch [668/700]:  28%|██▊       | 216/782 [00:01<00:02, 206.99it/s]



Epoch [668/700]:  36%|███▌      | 280/782 [00:01<00:02, 207.73it/s]



Epoch [668/700]:  41%|████▏     | 323/782 [00:01<00:02, 208.69it/s]



Epoch [668/700]:  47%|████▋     | 367/782 [00:01<00:01, 211.87it/s]



Epoch [668/700]:  53%|█████▎    | 411/782 [00:01<00:01, 214.11it/s]



Epoch [668/700]:  58%|█████▊    | 455/782 [00:02<00:01, 208.48it/s]



Epoch [668/700]:  64%|██████▎   | 497/782 [00:02<00:01, 205.66it/s]



Epoch [668/700]:  69%|██████▉   | 540/782 [00:02<00:01, 208.50it/s]



Epoch [668/700]:  75%|███████▍  | 584/782 [00:02<00:00, 209.63it/s]



Epoch [668/700]:  80%|████████  | 627/782 [00:03<00:00, 205.33it/s]



Epoch [668/700]:  86%|████████▌ | 669/782 [00:03<00:00, 205.81it/s]



Epoch [668/700]:  91%|█████████ | 711/782 [00:03<00:00, 199.70it/s]



Epoch [668/700]:  96%|█████████▋| 753/782 [00:03<00:00, 203.21it/s]



Epoch [668/700]: 100%|██████████| 782/782 [00:03<00:00, 206.42it/s]


Learning Rate: 0.004050
Train Loss: 0.0405, Accuracy: 98.65%, Confidence: 0.9851
Test Loss: 2.3199, Accuracy: 73.65%, Confidence: 0.9512
Train-Test Accuracy Gap: 25.00%


Epoch [669/700]:   3%|▎         | 20/782 [00:00<00:03, 193.64it/s]



Epoch [669/700]:   5%|▌         | 40/782 [00:00<00:03, 193.09it/s]



Epoch [669/700]:  11%|█         | 84/782 [00:00<00:03, 205.59it/s]



Epoch [669/700]:  16%|█▌        | 127/782 [00:00<00:03, 210.56it/s]



Epoch [669/700]:  19%|█▉        | 149/782 [00:00<00:03, 206.92it/s]



Epoch [669/700]:  22%|██▏       | 170/782 [00:00<00:02, 205.59it/s]



Epoch [669/700]:  27%|██▋       | 212/782 [00:01<00:02, 200.12it/s]



Epoch [669/700]:  33%|███▎      | 255/782 [00:01<00:02, 202.91it/s]



Epoch [669/700]:  41%|████      | 319/782 [00:01<00:02, 207.50it/s]



Epoch [669/700]:  46%|████▋     | 362/782 [00:01<00:02, 206.75it/s]



Epoch [669/700]:  52%|█████▏    | 404/782 [00:01<00:01, 200.87it/s]



Epoch [669/700]:  57%|█████▋    | 447/782 [00:02<00:01, 206.06it/s]



Epoch [669/700]:  63%|██████▎   | 489/782 [00:02<00:01, 204.87it/s]



Epoch [669/700]:  68%|██████▊   | 532/782 [00:02<00:01, 207.55it/s]



Epoch [669/700]:  73%|███████▎  | 574/782 [00:02<00:01, 207.93it/s]



Epoch [669/700]:  79%|███████▉  | 618/782 [00:03<00:00, 210.38it/s]



Epoch [669/700]:  85%|████████▍ | 662/782 [00:03<00:00, 208.90it/s]



Epoch [669/700]:  93%|█████████▎| 726/782 [00:03<00:00, 209.24it/s]



Epoch [669/700]:  98%|█████████▊| 768/782 [00:03<00:00, 204.54it/s]



Epoch [669/700]: 100%|██████████| 782/782 [00:03<00:00, 204.80it/s]


Learning Rate: 0.004050
Train Loss: 0.0373, Accuracy: 98.71%, Confidence: 0.9853
Test Loss: 2.7559, Accuracy: 71.28%, Confidence: 0.9513
Train-Test Accuracy Gap: 27.43%


Epoch [670/700]:   3%|▎         | 21/782 [00:00<00:03, 199.65it/s]



Epoch [670/700]:  11%|█         | 86/782 [00:00<00:03, 211.50it/s]



Epoch [670/700]:  17%|█▋        | 130/782 [00:00<00:03, 212.72it/s]



Epoch [670/700]:  22%|██▏       | 174/782 [00:00<00:02, 213.60it/s]



Epoch [670/700]:  28%|██▊       | 218/782 [00:01<00:02, 208.03it/s]



Epoch [670/700]:  33%|███▎      | 261/782 [00:01<00:02, 205.73it/s]



Epoch [670/700]:  39%|███▉      | 304/782 [00:01<00:02, 207.04it/s]



Epoch [670/700]:  44%|████▍     | 347/782 [00:01<00:02, 207.77it/s]



Epoch [670/700]:  50%|████▉     | 390/782 [00:01<00:01, 207.98it/s]



Epoch [670/700]:  55%|█████▌    | 433/782 [00:02<00:01, 209.33it/s]



Epoch [670/700]:  61%|██████    | 477/782 [00:02<00:01, 212.61it/s]



Epoch [670/700]:  67%|██████▋   | 521/782 [00:02<00:01, 209.32it/s]



Epoch [670/700]:  72%|███████▏  | 565/782 [00:02<00:01, 209.59it/s]



Epoch [670/700]:  78%|███████▊  | 608/782 [00:02<00:00, 209.43it/s]



Epoch [670/700]:  83%|████████▎ | 652/782 [00:03<00:00, 210.66it/s]



Epoch [670/700]:  92%|█████████▏| 718/782 [00:03<00:00, 211.76it/s]



Epoch [670/700]:  97%|█████████▋| 762/782 [00:03<00:00, 211.59it/s]



Epoch [670/700]: 100%|██████████| 782/782 [00:03<00:00, 209.46it/s]


Learning Rate: 0.004050
Train Loss: 0.0455, Accuracy: 98.48%, Confidence: 0.9847
Test Loss: 2.3175, Accuracy: 72.40%, Confidence: 0.9487
Train-Test Accuracy Gap: 26.08%


Epoch [671/700]:   3%|▎         | 21/782 [00:00<00:03, 206.45it/s]



Epoch [671/700]:   5%|▌         | 42/782 [00:00<00:03, 206.85it/s]



Epoch [671/700]:   8%|▊         | 63/782 [00:00<00:03, 199.28it/s]



Epoch [671/700]:  11%|█         | 83/782 [00:00<00:03, 186.11it/s]



Epoch [671/700]:  13%|█▎        | 104/782 [00:00<00:03, 192.10it/s]



Epoch [671/700]:  16%|█▌        | 126/782 [00:00<00:03, 198.67it/s]



Epoch [671/700]:  19%|█▉        | 148/782 [00:00<00:03, 203.69it/s]



Epoch [671/700]:  22%|██▏       | 170/782 [00:00<00:02, 205.90it/s]



Epoch [671/700]:  25%|██▍       | 192/782 [00:00<00:02, 208.95it/s]



Epoch [671/700]:  27%|██▋       | 213/782 [00:01<00:02, 205.05it/s]



Epoch [671/700]:  30%|██▉       | 234/782 [00:01<00:02, 206.34it/s]



Epoch [671/700]:  33%|███▎      | 255/782 [00:01<00:02, 206.37it/s]



Epoch [671/700]:  35%|███▌      | 277/782 [00:01<00:02, 207.64it/s]



Epoch [671/700]:  38%|███▊      | 299/782 [00:01<00:02, 208.93it/s]



Epoch [671/700]:  41%|████      | 321/782 [00:01<00:02, 210.64it/s]



Epoch [671/700]:  47%|████▋     | 365/782 [00:01<00:01, 210.50it/s]



Epoch [671/700]:  49%|████▉     | 387/782 [00:01<00:01, 210.86it/s]



Epoch [671/700]:  52%|█████▏    | 409/782 [00:01<00:01, 207.85it/s]



Epoch [671/700]:  55%|█████▍    | 430/782 [00:02<00:01, 207.20it/s]



Epoch [671/700]:  58%|█████▊    | 451/782 [00:02<00:01, 205.57it/s]



Epoch [671/700]:  60%|██████    | 472/782 [00:02<00:01, 206.42it/s]



Epoch [671/700]:  63%|██████▎   | 493/782 [00:02<00:01, 205.31it/s]



Epoch [671/700]:  68%|██████▊   | 535/782 [00:02<00:01, 204.31it/s]



Epoch [671/700]:  74%|███████▍  | 577/782 [00:02<00:01, 204.53it/s]



Epoch [671/700]:  76%|███████▋  | 598/782 [00:02<00:00, 202.89it/s]



Epoch [671/700]:  79%|███████▉  | 619/782 [00:03<00:00, 198.64it/s]



Epoch [671/700]:  82%|████████▏ | 640/782 [00:03<00:00, 200.56it/s]



Epoch [671/700]:  85%|████████▍ | 662/782 [00:03<00:00, 203.92it/s]



Epoch [671/700]:  87%|████████▋ | 684/782 [00:03<00:00, 207.11it/s]



Epoch [671/700]:  90%|█████████ | 706/782 [00:03<00:00, 208.15it/s]



Epoch [671/700]:  93%|█████████▎| 727/782 [00:03<00:00, 207.22it/s]



Epoch [671/700]:  96%|█████████▌| 748/782 [00:03<00:00, 206.80it/s]



Epoch [671/700]: 100%|██████████| 782/782 [00:03<00:00, 204.87it/s]


Learning Rate: 0.004050
Train Loss: 0.0398, Accuracy: 98.66%, Confidence: 0.9850
Test Loss: 2.4148, Accuracy: 72.86%, Confidence: 0.9500
Train-Test Accuracy Gap: 25.80%


Epoch [672/700]:   3%|▎         | 21/782 [00:00<00:03, 202.10it/s]



Epoch [672/700]:   5%|▌         | 43/782 [00:00<00:03, 210.79it/s]



Epoch [672/700]:   8%|▊         | 65/782 [00:00<00:03, 209.91it/s]



Epoch [672/700]:  11%|█         | 87/782 [00:00<00:03, 212.11it/s]



Epoch [672/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.76it/s]



Epoch [672/700]:  22%|██▏       | 175/782 [00:00<00:02, 209.27it/s]



Epoch [672/700]:  28%|██▊       | 219/782 [00:01<00:02, 208.83it/s]



Epoch [672/700]:  33%|███▎      | 261/782 [00:01<00:02, 208.75it/s]



Epoch [672/700]:  39%|███▊      | 303/782 [00:01<00:02, 206.45it/s]



Epoch [672/700]:  47%|████▋     | 366/782 [00:01<00:02, 204.76it/s]



Epoch [672/700]:  55%|█████▌    | 431/782 [00:02<00:01, 209.34it/s]



Epoch [672/700]:  61%|██████    | 475/782 [00:02<00:01, 210.96it/s]



Epoch [672/700]:  66%|██████▌   | 518/782 [00:02<00:01, 208.45it/s]



Epoch [672/700]:  72%|███████▏  | 560/782 [00:02<00:01, 204.65it/s]



Epoch [672/700]:  77%|███████▋  | 602/782 [00:02<00:00, 202.06it/s]



Epoch [672/700]:  82%|████████▏ | 645/782 [00:03<00:00, 204.48it/s]



Epoch [672/700]:  88%|████████▊ | 688/782 [00:03<00:00, 207.77it/s]



Epoch [672/700]:  96%|█████████▋| 753/782 [00:03<00:00, 209.05it/s]



Epoch [672/700]: 100%|██████████| 782/782 [00:03<00:00, 207.56it/s]


Learning Rate: 0.004050
Train Loss: 0.0358, Accuracy: 98.79%, Confidence: 0.9860
Test Loss: 3.1567, Accuracy: 68.60%, Confidence: 0.9499
Train-Test Accuracy Gap: 30.19%


Epoch [673/700]:   3%|▎         | 21/782 [00:00<00:03, 206.95it/s]



Epoch [673/700]:  11%|█         | 84/782 [00:00<00:03, 203.40it/s]



Epoch [673/700]:  16%|█▋        | 128/782 [00:00<00:03, 208.19it/s]



Epoch [673/700]:  22%|██▏       | 171/782 [00:00<00:02, 210.77it/s]



Epoch [673/700]:  27%|██▋       | 214/782 [00:01<00:02, 208.05it/s]



Epoch [673/700]:  33%|███▎      | 256/782 [00:01<00:02, 207.23it/s]



Epoch [673/700]:  41%|████      | 320/782 [00:01<00:02, 209.57it/s]



Epoch [673/700]:  47%|████▋     | 364/782 [00:01<00:02, 208.62it/s]



Epoch [673/700]:  52%|█████▏    | 406/782 [00:01<00:01, 208.39it/s]



Epoch [673/700]:  57%|█████▋    | 449/782 [00:02<00:01, 205.44it/s]



Epoch [673/700]:  63%|██████▎   | 491/782 [00:02<00:01, 201.68it/s]



Epoch [673/700]:  68%|██████▊   | 534/782 [00:02<00:01, 204.06it/s]



Epoch [673/700]:  74%|███████▍  | 577/782 [00:02<00:01, 203.72it/s]



Epoch [673/700]:  79%|███████▉  | 621/782 [00:03<00:00, 209.67it/s]



Epoch [673/700]:  85%|████████▌ | 665/782 [00:03<00:00, 212.32it/s]



Epoch [673/700]:  93%|█████████▎| 731/782 [00:03<00:00, 210.34it/s]



Epoch [673/700]:  99%|█████████▉| 774/782 [00:03<00:00, 206.30it/s]



Epoch [673/700]: 100%|██████████| 782/782 [00:03<00:00, 206.70it/s]


Learning Rate: 0.004050
Train Loss: 0.0419, Accuracy: 98.55%, Confidence: 0.9846
Test Loss: 2.7982, Accuracy: 70.86%, Confidence: 0.9507
Train-Test Accuracy Gap: 27.69%


Epoch [674/700]:   3%|▎         | 20/782 [00:00<00:03, 199.38it/s]



Epoch [674/700]:   5%|▌         | 42/782 [00:00<00:03, 207.72it/s]



Epoch [674/700]:  11%|█         | 85/782 [00:00<00:03, 210.23it/s]



Epoch [674/700]:  14%|█▎        | 107/782 [00:00<00:03, 208.38it/s]



Epoch [674/700]:  16%|█▋        | 128/782 [00:00<00:03, 201.66it/s]



Epoch [674/700]:  22%|██▏       | 171/782 [00:00<00:02, 206.41it/s]



Epoch [674/700]:  27%|██▋       | 215/782 [00:01<00:02, 209.68it/s]



Epoch [674/700]:  33%|███▎      | 258/782 [00:01<00:02, 211.65it/s]



Epoch [674/700]:  39%|███▊      | 302/782 [00:01<00:02, 214.44it/s]



Epoch [674/700]:  44%|████▍     | 346/782 [00:01<00:02, 213.41it/s]



Epoch [674/700]:  50%|████▉     | 390/782 [00:01<00:01, 211.19it/s]



Epoch [674/700]:  55%|█████▌    | 433/782 [00:02<00:01, 207.57it/s]



Epoch [674/700]:  64%|██████▍   | 499/782 [00:02<00:01, 210.26it/s]



Epoch [674/700]:  69%|██████▉   | 543/782 [00:02<00:01, 210.70it/s]



Epoch [674/700]:  75%|███████▍  | 586/782 [00:02<00:00, 205.42it/s]



Epoch [674/700]:  80%|████████  | 628/782 [00:03<00:00, 205.54it/s]



Epoch [674/700]:  86%|████████▌ | 670/782 [00:03<00:00, 207.53it/s]



Epoch [674/700]:  91%|█████████ | 713/782 [00:03<00:00, 206.32it/s]



Epoch [674/700]:  99%|█████████▉| 777/782 [00:03<00:00, 204.37it/s]



Epoch [674/700]: 100%|██████████| 782/782 [00:03<00:00, 207.96it/s]


Learning Rate: 0.004050
Train Loss: 0.0381, Accuracy: 98.76%, Confidence: 0.9855
Test Loss: 2.3246, Accuracy: 73.36%, Confidence: 0.9510
Train-Test Accuracy Gap: 25.40%


Epoch [675/700]:   3%|▎         | 21/782 [00:00<00:03, 204.74it/s]



Epoch [675/700]:  11%|█         | 87/782 [00:00<00:03, 213.43it/s]



Epoch [675/700]:  17%|█▋        | 131/782 [00:00<00:03, 213.05it/s]



Epoch [675/700]:  22%|██▏       | 175/782 [00:00<00:02, 212.17it/s]



Epoch [675/700]:  28%|██▊       | 219/782 [00:01<00:02, 208.55it/s]



Epoch [675/700]:  34%|███▎      | 263/782 [00:01<00:02, 212.18it/s]



Epoch [675/700]:  39%|███▉      | 307/782 [00:01<00:02, 210.45it/s]



Epoch [675/700]:  45%|████▍     | 351/782 [00:01<00:02, 209.71it/s]



Epoch [675/700]:  51%|█████     | 395/782 [00:01<00:01, 210.51it/s]



Epoch [675/700]:  56%|█████▌    | 439/782 [00:02<00:01, 208.99it/s]



Epoch [675/700]:  62%|██████▏   | 482/782 [00:02<00:01, 205.60it/s]



Epoch [675/700]:  70%|███████   | 548/782 [00:02<00:01, 211.87it/s]



Epoch [675/700]:  76%|███████▌  | 592/782 [00:02<00:00, 210.65it/s]



Epoch [675/700]:  81%|████████▏ | 636/782 [00:03<00:00, 210.34it/s]



Epoch [675/700]:  87%|████████▋ | 680/782 [00:03<00:00, 213.37it/s]



Epoch [675/700]:  93%|█████████▎| 724/782 [00:03<00:00, 214.73it/s]



Epoch [675/700]:  98%|█████████▊| 768/782 [00:03<00:00, 214.70it/s]



Epoch [675/700]: 100%|██████████| 782/782 [00:03<00:00, 210.67it/s]


Learning Rate: 0.004050
Train Loss: 0.0410, Accuracy: 98.60%, Confidence: 0.9850
Test Loss: 2.6361, Accuracy: 72.30%, Confidence: 0.9526
Train-Test Accuracy Gap: 26.30%


Epoch [676/700]:   3%|▎         | 21/782 [00:00<00:03, 207.89it/s]



Epoch [676/700]:  11%|█         | 87/782 [00:00<00:03, 213.71it/s]



Epoch [676/700]:  17%|█▋        | 131/782 [00:00<00:03, 211.83it/s]



Epoch [676/700]:  22%|██▏       | 175/782 [00:00<00:02, 215.40it/s]



Epoch [676/700]:  28%|██▊       | 219/782 [00:01<00:02, 212.83it/s]



Epoch [676/700]:  34%|███▎      | 263/782 [00:01<00:02, 211.13it/s]



Epoch [676/700]:  39%|███▉      | 307/782 [00:01<00:02, 211.10it/s]



Epoch [676/700]:  45%|████▍     | 351/782 [00:01<00:02, 209.27it/s]



Epoch [676/700]:  50%|█████     | 393/782 [00:01<00:01, 200.99it/s]



Epoch [676/700]:  56%|█████▌    | 437/782 [00:02<00:01, 207.23it/s]



Epoch [676/700]:  61%|██████▏   | 479/782 [00:02<00:01, 204.92it/s]



Epoch [676/700]:  67%|██████▋   | 523/782 [00:02<00:01, 209.18it/s]



Epoch [676/700]:  73%|███████▎  | 568/782 [00:02<00:01, 213.45it/s]



Epoch [676/700]:  78%|███████▊  | 612/782 [00:02<00:00, 213.94it/s]



Epoch [676/700]:  84%|████████▍ | 655/782 [00:03<00:00, 207.36it/s]



Epoch [676/700]:  92%|█████████▏| 720/782 [00:03<00:00, 210.39it/s]



Epoch [676/700]:  98%|█████████▊| 763/782 [00:03<00:00, 208.26it/s]



Epoch [676/700]: 100%|██████████| 782/782 [00:03<00:00, 208.89it/s]


Learning Rate: 0.004050
Train Loss: 0.0422, Accuracy: 98.55%, Confidence: 0.9850
Test Loss: 2.3662, Accuracy: 74.00%, Confidence: 0.9524
Train-Test Accuracy Gap: 24.55%


Epoch [677/700]:   3%|▎         | 22/782 [00:00<00:03, 214.12it/s]



Epoch [677/700]:  11%|█▏        | 89/782 [00:00<00:03, 214.37it/s]



Epoch [677/700]:  17%|█▋        | 133/782 [00:00<00:03, 212.03it/s]



Epoch [677/700]:  23%|██▎       | 177/782 [00:00<00:02, 210.62it/s]



Epoch [677/700]:  28%|██▊       | 221/782 [00:01<00:02, 211.24it/s]



Epoch [677/700]:  34%|███▍      | 265/782 [00:01<00:02, 211.56it/s]



Epoch [677/700]:  40%|███▉      | 309/782 [00:01<00:02, 209.15it/s]



Epoch [677/700]:  45%|████▍     | 351/782 [00:01<00:02, 204.60it/s]



Epoch [677/700]:  53%|█████▎    | 417/782 [00:01<00:01, 210.48it/s]



Epoch [677/700]:  59%|█████▉    | 461/782 [00:02<00:01, 210.26it/s]



Epoch [677/700]:  65%|██████▍   | 505/782 [00:02<00:01, 212.64it/s]



Epoch [677/700]:  70%|███████   | 548/782 [00:02<00:01, 208.19it/s]



Epoch [677/700]:  76%|███████▌  | 591/782 [00:02<00:00, 206.84it/s]



Epoch [677/700]:  81%|████████  | 634/782 [00:03<00:00, 202.61it/s]



Epoch [677/700]:  87%|████████▋ | 677/782 [00:03<00:00, 206.17it/s]



Epoch [677/700]:  95%|█████████▌| 743/782 [00:03<00:00, 212.42it/s]



Epoch [677/700]: 100%|██████████| 782/782 [00:03<00:00, 208.62it/s]


Learning Rate: 0.004050
Train Loss: 0.0331, Accuracy: 98.91%, Confidence: 0.9863
Test Loss: 2.7261, Accuracy: 72.03%, Confidence: 0.9506
Train-Test Accuracy Gap: 26.88%


Epoch [678/700]:   3%|▎         | 20/782 [00:00<00:03, 190.72it/s]



Epoch [678/700]:   5%|▌         | 40/782 [00:00<00:03, 194.87it/s]



Epoch [678/700]:   8%|▊         | 61/782 [00:00<00:03, 199.17it/s]



Epoch [678/700]:  16%|█▌        | 127/782 [00:00<00:03, 209.62it/s]



Epoch [678/700]:  24%|██▍       | 191/782 [00:00<00:02, 208.13it/s]



Epoch [678/700]:  30%|██▉       | 234/782 [00:01<00:02, 208.83it/s]



Epoch [678/700]:  38%|███▊      | 298/782 [00:01<00:02, 207.28it/s]



Epoch [678/700]:  43%|████▎     | 340/782 [00:01<00:02, 206.69it/s]



Epoch [678/700]:  49%|████▉     | 383/782 [00:01<00:01, 209.61it/s]



Epoch [678/700]:  54%|█████▍    | 425/782 [00:02<00:01, 204.78it/s]



Epoch [678/700]:  60%|█████▉    | 467/782 [00:02<00:01, 200.43it/s]



Epoch [678/700]:  65%|██████▌   | 509/782 [00:02<00:01, 198.91it/s]



Epoch [678/700]:  70%|███████   | 549/782 [00:02<00:01, 197.86it/s]



Epoch [678/700]:  75%|███████▌  | 590/782 [00:02<00:00, 200.03it/s]



Epoch [678/700]:  84%|████████▍ | 655/782 [00:03<00:00, 208.33it/s]



Epoch [678/700]:  89%|████████▉ | 699/782 [00:03<00:00, 211.60it/s]



Epoch [678/700]:  95%|█████████▌| 743/782 [00:03<00:00, 208.76it/s]



Epoch [678/700]: 100%|██████████| 782/782 [00:03<00:00, 205.30it/s]


Learning Rate: 0.004050
Train Loss: 0.0408, Accuracy: 98.69%, Confidence: 0.9854
Test Loss: 2.6839, Accuracy: 71.93%, Confidence: 0.9521
Train-Test Accuracy Gap: 26.76%


Epoch [679/700]:   3%|▎         | 21/782 [00:00<00:03, 207.39it/s]



Epoch [679/700]:   8%|▊         | 65/782 [00:00<00:03, 212.29it/s]



Epoch [679/700]:  14%|█▍        | 108/782 [00:00<00:03, 204.79it/s]



Epoch [679/700]:  19%|█▉        | 150/782 [00:00<00:03, 205.99it/s]



Epoch [679/700]:  25%|██▍       | 194/782 [00:00<00:02, 211.10it/s]



Epoch [679/700]:  30%|███       | 238/782 [00:01<00:02, 210.99it/s]



Epoch [679/700]:  36%|███▌      | 282/782 [00:01<00:02, 210.29it/s]



Epoch [679/700]:  42%|████▏     | 326/782 [00:01<00:02, 211.91it/s]



Epoch [679/700]:  50%|█████     | 392/782 [00:01<00:01, 211.34it/s]



Epoch [679/700]:  56%|█████▌    | 436/782 [00:02<00:01, 213.47it/s]



Epoch [679/700]:  61%|██████▏   | 480/782 [00:02<00:01, 214.24it/s]



Epoch [679/700]:  67%|██████▋   | 524/782 [00:02<00:01, 212.11it/s]



Epoch [679/700]:  73%|███████▎  | 568/782 [00:02<00:01, 211.30it/s]



Epoch [679/700]:  81%|████████  | 634/782 [00:03<00:00, 212.85it/s]



Epoch [679/700]:  87%|████████▋ | 678/782 [00:03<00:00, 211.77it/s]



Epoch [679/700]:  98%|█████████▊| 766/782 [00:03<00:00, 212.58it/s]



Epoch [679/700]: 100%|██████████| 782/782 [00:03<00:00, 210.92it/s]


Learning Rate: 0.004050
Train Loss: 0.0382, Accuracy: 98.73%, Confidence: 0.9854
Test Loss: 2.6568, Accuracy: 72.02%, Confidence: 0.9511
Train-Test Accuracy Gap: 26.71%


Epoch [680/700]:   3%|▎         | 21/782 [00:00<00:03, 207.05it/s]



Epoch [680/700]:   8%|▊         | 66/782 [00:00<00:03, 214.99it/s]



Epoch [680/700]:  17%|█▋        | 132/782 [00:00<00:03, 208.67it/s]



Epoch [680/700]:  22%|██▏       | 174/782 [00:00<00:02, 206.46it/s]



Epoch [680/700]:  28%|██▊       | 216/782 [00:01<00:02, 205.92it/s]



Epoch [680/700]:  33%|███▎      | 259/782 [00:01<00:02, 207.27it/s]



Epoch [680/700]:  38%|███▊      | 301/782 [00:01<00:02, 205.98it/s]



Epoch [680/700]:  47%|████▋     | 365/782 [00:01<00:02, 204.86it/s]



Epoch [680/700]:  52%|█████▏    | 407/782 [00:01<00:01, 206.74it/s]



Epoch [680/700]:  57%|█████▋    | 449/782 [00:02<00:01, 204.71it/s]



Epoch [680/700]:  63%|██████▎   | 491/782 [00:02<00:01, 201.15it/s]



Epoch [680/700]:  68%|██████▊   | 533/782 [00:02<00:01, 202.11it/s]



Epoch [680/700]:  74%|███████▎  | 576/782 [00:02<00:01, 204.92it/s]



Epoch [680/700]:  79%|███████▉  | 618/782 [00:03<00:00, 201.87it/s]



Epoch [680/700]:  87%|████████▋ | 683/782 [00:03<00:00, 209.62it/s]



Epoch [680/700]:  93%|█████████▎| 727/782 [00:03<00:00, 212.60it/s]



Epoch [680/700]: 100%|██████████| 782/782 [00:03<00:00, 205.86it/s]






Learning Rate: 0.004050
Train Loss: 0.0377, Accuracy: 98.76%, Confidence: 0.9856
Test Loss: 2.6794, Accuracy: 70.62%, Confidence: 0.9459
Train-Test Accuracy Gap: 28.14%


Epoch [681/700]:   3%|▎         | 22/782 [00:00<00:03, 213.03it/s]



Epoch [681/700]:  11%|█         | 87/782 [00:00<00:03, 211.28it/s]



Epoch [681/700]:  17%|█▋        | 131/782 [00:00<00:03, 210.80it/s]



Epoch [681/700]:  22%|██▏       | 174/782 [00:00<00:02, 207.83it/s]



Epoch [681/700]:  28%|██▊       | 216/782 [00:01<00:02, 206.79it/s]



Epoch [681/700]:  33%|███▎      | 258/782 [00:01<00:02, 205.86it/s]



Epoch [681/700]:  38%|███▊      | 301/782 [00:01<00:02, 208.86it/s]



Epoch [681/700]:  44%|████▍     | 344/782 [00:01<00:02, 210.12it/s]



Epoch [681/700]:  52%|█████▏    | 408/782 [00:01<00:01, 204.74it/s]



Epoch [681/700]:  58%|█████▊    | 452/782 [00:02<00:01, 208.73it/s]



Epoch [681/700]:  63%|██████▎   | 494/782 [00:02<00:01, 208.57it/s]



Epoch [681/700]:  69%|██████▉   | 538/782 [00:02<00:01, 210.37it/s]



Epoch [681/700]:  74%|███████▍  | 582/782 [00:02<00:00, 209.67it/s]



Epoch [681/700]:  80%|████████  | 626/782 [00:02<00:00, 212.34it/s]



Epoch [681/700]:  88%|████████▊ | 692/782 [00:03<00:00, 213.27it/s]



Epoch [681/700]:  94%|█████████▍| 736/782 [00:03<00:00, 212.82it/s]



Epoch [681/700]: 100%|██████████| 782/782 [00:03<00:00, 209.49it/s]






Learning Rate: 0.004050
Train Loss: 0.0425, Accuracy: 98.58%, Confidence: 0.9847
Test Loss: 2.3577, Accuracy: 73.64%, Confidence: 0.9509
Train-Test Accuracy Gap: 24.94%


Epoch [682/700]:   3%|▎         | 22/782 [00:00<00:03, 213.26it/s]



Epoch [682/700]:  11%|█▏        | 88/782 [00:00<00:03, 213.82it/s]



Epoch [682/700]:  17%|█▋        | 132/782 [00:00<00:03, 212.54it/s]



Epoch [682/700]:  28%|██▊       | 220/782 [00:01<00:02, 211.76it/s]



Epoch [682/700]:  34%|███▎      | 263/782 [00:01<00:02, 208.26it/s]



Epoch [682/700]:  39%|███▉      | 307/782 [00:01<00:02, 212.08it/s]



Epoch [682/700]:  45%|████▍     | 351/782 [00:01<00:02, 213.18it/s]



Epoch [682/700]:  51%|█████     | 395/782 [00:01<00:01, 205.83it/s]



Epoch [682/700]:  56%|█████▌    | 438/782 [00:02<00:01, 208.02it/s]



Epoch [682/700]:  62%|██████▏   | 482/782 [00:02<00:01, 212.65it/s]



Epoch [682/700]:  67%|██████▋   | 525/782 [00:02<00:01, 209.12it/s]



Epoch [682/700]:  73%|███████▎  | 568/782 [00:02<00:01, 210.71it/s]



Epoch [682/700]:  78%|███████▊  | 613/782 [00:02<00:00, 214.54it/s]



Epoch [682/700]:  87%|████████▋ | 679/782 [00:03<00:00, 214.92it/s]



Epoch [682/700]:  92%|█████████▏| 723/782 [00:03<00:00, 214.42it/s]



Epoch [682/700]:  98%|█████████▊| 767/782 [00:03<00:00, 207.68it/s]



Epoch [682/700]: 100%|██████████| 782/782 [00:03<00:00, 210.77it/s]


Learning Rate: 0.004050
Train Loss: 0.0358, Accuracy: 98.76%, Confidence: 0.9860
Test Loss: 2.5666, Accuracy: 72.57%, Confidence: 0.9499
Train-Test Accuracy Gap: 26.19%


Epoch [683/700]:   3%|▎         | 21/782 [00:00<00:03, 208.32it/s]



Epoch [683/700]:  11%|█         | 87/782 [00:00<00:03, 211.50it/s]



Epoch [683/700]:  17%|█▋        | 130/782 [00:00<00:03, 207.39it/s]



Epoch [683/700]:  22%|██▏       | 172/782 [00:00<00:02, 206.86it/s]



Epoch [683/700]:  27%|██▋       | 215/782 [00:01<00:02, 208.20it/s]



Epoch [683/700]:  33%|███▎      | 258/782 [00:01<00:02, 210.98it/s]



Epoch [683/700]:  39%|███▊      | 302/782 [00:01<00:02, 210.82it/s]



Epoch [683/700]:  44%|████▍     | 346/782 [00:01<00:02, 212.68it/s]



Epoch [683/700]:  50%|████▉     | 390/782 [00:01<00:01, 206.45it/s]



Epoch [683/700]:  58%|█████▊    | 454/782 [00:02<00:01, 208.40it/s]



Epoch [683/700]:  64%|██████▎   | 497/782 [00:02<00:01, 207.96it/s]



Epoch [683/700]:  69%|██████▉   | 540/782 [00:02<00:01, 206.59it/s]



Epoch [683/700]:  74%|███████▍  | 582/782 [00:02<00:00, 205.85it/s]



Epoch [683/700]:  80%|███████▉  | 624/782 [00:03<00:00, 204.87it/s]



Epoch [683/700]:  85%|████████▌ | 666/782 [00:03<00:00, 201.49it/s]



Epoch [683/700]:  91%|█████████ | 711/782 [00:03<00:00, 208.96it/s]



Epoch [683/700]:  96%|█████████▋| 754/782 [00:03<00:00, 206.65it/s]



Epoch [683/700]: 100%|██████████| 782/782 [00:03<00:00, 207.01it/s]


Learning Rate: 0.004050
Train Loss: 0.0408, Accuracy: 98.67%, Confidence: 0.9851
Test Loss: 2.9119, Accuracy: 71.26%, Confidence: 0.9498
Train-Test Accuracy Gap: 27.41%


Epoch [684/700]:   3%|▎         | 20/782 [00:00<00:03, 198.87it/s]



Epoch [684/700]:  11%|█         | 85/782 [00:00<00:03, 211.76it/s]



Epoch [684/700]:  16%|█▋        | 129/782 [00:00<00:03, 210.26it/s]



Epoch [684/700]:  25%|██▌       | 197/782 [00:00<00:02, 215.01it/s]



Epoch [684/700]:  31%|███       | 241/782 [00:01<00:02, 212.45it/s]



Epoch [684/700]:  36%|███▋      | 285/782 [00:01<00:02, 211.28it/s]



Epoch [684/700]:  42%|████▏     | 329/782 [00:01<00:02, 213.27it/s]



Epoch [684/700]:  51%|█████     | 395/782 [00:01<00:01, 212.07it/s]



Epoch [684/700]:  56%|█████▌    | 438/782 [00:02<00:01, 205.21it/s]



Epoch [684/700]:  61%|██████▏   | 480/782 [00:02<00:01, 203.43it/s]



Epoch [684/700]:  67%|██████▋   | 523/782 [00:02<00:01, 206.22it/s]



Epoch [684/700]:  73%|███████▎  | 567/782 [00:02<00:01, 210.28it/s]



Epoch [684/700]:  78%|███████▊  | 611/782 [00:02<00:00, 210.96it/s]



Epoch [684/700]:  84%|████████▎ | 654/782 [00:03<00:00, 209.68it/s]



Epoch [684/700]:  89%|████████▉ | 698/782 [00:03<00:00, 211.60it/s]



Epoch [684/700]:  95%|█████████▍| 742/782 [00:03<00:00, 208.77it/s]



Epoch [684/700]: 100%|██████████| 782/782 [00:03<00:00, 209.32it/s]


Learning Rate: 0.004050
Train Loss: 0.0367, Accuracy: 98.78%, Confidence: 0.9858
Test Loss: 2.6096, Accuracy: 72.18%, Confidence: 0.9523
Train-Test Accuracy Gap: 26.60%


Epoch [685/700]:   3%|▎         | 20/782 [00:00<00:03, 191.32it/s]



Epoch [685/700]:  11%|█         | 85/782 [00:00<00:03, 211.63it/s]



Epoch [685/700]:  16%|█▋        | 129/782 [00:00<00:03, 213.81it/s]



Epoch [685/700]:  25%|██▍       | 195/782 [00:00<00:02, 216.01it/s]



Epoch [685/700]:  31%|███       | 239/782 [00:01<00:02, 211.68it/s]



Epoch [685/700]:  36%|███▌      | 283/782 [00:01<00:02, 212.15it/s]



Epoch [685/700]:  42%|████▏     | 326/782 [00:01<00:02, 204.48it/s]



Epoch [685/700]:  50%|████▉     | 390/782 [00:01<00:01, 208.52it/s]



Epoch [685/700]:  55%|█████▌    | 433/782 [00:02<00:01, 208.44it/s]



Epoch [685/700]:  61%|██████    | 476/782 [00:02<00:01, 209.19it/s]



Epoch [685/700]:  66%|██████▌   | 518/782 [00:02<00:01, 205.46it/s]



Epoch [685/700]:  72%|███████▏  | 560/782 [00:02<00:01, 201.67it/s]



Epoch [685/700]:  77%|███████▋  | 603/782 [00:02<00:00, 201.86it/s]



Epoch [685/700]:  82%|████████▏ | 645/782 [00:03<00:00, 202.70it/s]



Epoch [685/700]:  91%|█████████ | 710/782 [00:03<00:00, 204.69it/s]



Epoch [685/700]:  96%|█████████▋| 753/782 [00:03<00:00, 208.81it/s]



Epoch [685/700]: 100%|██████████| 782/782 [00:03<00:00, 207.34it/s]


Learning Rate: 0.004050
Train Loss: 0.0437, Accuracy: 98.55%, Confidence: 0.9852
Test Loss: 2.4302, Accuracy: 72.66%, Confidence: 0.9504
Train-Test Accuracy Gap: 25.89%


Epoch [686/700]:   3%|▎         | 21/782 [00:00<00:03, 208.38it/s]



Epoch [686/700]:  11%|█         | 87/782 [00:00<00:03, 209.42it/s]



Epoch [686/700]:  16%|█▋        | 129/782 [00:00<00:03, 208.29it/s]



Epoch [686/700]:  22%|██▏       | 171/782 [00:00<00:02, 208.16it/s]



Epoch [686/700]:  30%|███       | 238/782 [00:01<00:02, 213.88it/s]



Epoch [686/700]:  39%|███▉      | 304/782 [00:01<00:02, 213.04it/s]



Epoch [686/700]:  47%|████▋     | 370/782 [00:01<00:01, 215.15it/s]



Epoch [686/700]:  53%|█████▎    | 414/782 [00:01<00:01, 215.03it/s]



Epoch [686/700]:  61%|██████▏   | 480/782 [00:02<00:01, 209.46it/s]



Epoch [686/700]:  64%|██████▍   | 501/782 [00:02<00:01, 205.99it/s]



Epoch [686/700]:  72%|███████▏  | 564/782 [00:02<00:01, 205.04it/s]



Epoch [686/700]:  77%|███████▋  | 606/782 [00:02<00:00, 204.41it/s]



Epoch [686/700]:  83%|████████▎ | 648/782 [00:03<00:00, 204.85it/s]



Epoch [686/700]:  88%|████████▊ | 690/782 [00:03<00:00, 203.58it/s]



Epoch [686/700]:  96%|█████████▋| 754/782 [00:03<00:00, 205.56it/s]



Epoch [686/700]: 100%|██████████| 782/782 [00:03<00:00, 208.41it/s]


Learning Rate: 0.004050
Train Loss: 0.0364, Accuracy: 98.71%, Confidence: 0.9858
Test Loss: 2.4551, Accuracy: 73.12%, Confidence: 0.9517
Train-Test Accuracy Gap: 25.59%


Epoch [687/700]:   2%|▏         | 19/782 [00:00<00:04, 181.67it/s]



Epoch [687/700]:   5%|▌         | 41/782 [00:00<00:03, 199.97it/s]



Epoch [687/700]:   8%|▊         | 62/782 [00:00<00:03, 199.61it/s]



Epoch [687/700]:  11%|█         | 83/782 [00:00<00:03, 202.36it/s]



Epoch [687/700]:  13%|█▎        | 104/782 [00:00<00:03, 199.20it/s]



Epoch [687/700]:  16%|█▌        | 124/782 [00:00<00:03, 198.68it/s]



Epoch [687/700]:  19%|█▊        | 146/782 [00:00<00:03, 200.52it/s]



Epoch [687/700]:  21%|██▏       | 167/782 [00:00<00:03, 201.47it/s]



Epoch [687/700]:  27%|██▋       | 210/782 [00:01<00:02, 203.53it/s]



Epoch [687/700]:  30%|██▉       | 231/782 [00:01<00:02, 203.07it/s]



Epoch [687/700]:  32%|███▏      | 252/782 [00:01<00:02, 202.00it/s]



Epoch [687/700]:  35%|███▍      | 273/782 [00:01<00:02, 201.96it/s]



Epoch [687/700]:  38%|███▊      | 295/782 [00:01<00:02, 206.44it/s]



Epoch [687/700]:  43%|████▎     | 338/782 [00:01<00:02, 206.80it/s]



Epoch [687/700]:  52%|█████▏    | 405/782 [00:01<00:01, 212.97it/s]



Epoch [687/700]:  60%|██████    | 471/782 [00:02<00:01, 212.29it/s]



Epoch [687/700]:  66%|██████▌   | 515/782 [00:02<00:01, 213.17it/s]



Epoch [687/700]:  71%|███████▏  | 559/782 [00:02<00:01, 212.16it/s]



Epoch [687/700]:  77%|███████▋  | 603/782 [00:02<00:00, 213.56it/s]



Epoch [687/700]:  83%|████████▎ | 647/782 [00:03<00:00, 214.17it/s]



Epoch [687/700]:  91%|█████████ | 713/782 [00:03<00:00, 216.18it/s]



Epoch [687/700]:  97%|█████████▋| 757/782 [00:03<00:00, 214.53it/s]



Epoch [687/700]: 100%|██████████| 782/782 [00:03<00:00, 209.02it/s]


Learning Rate: 0.004050
Train Loss: 0.0404, Accuracy: 98.66%, Confidence: 0.9851
Test Loss: 2.5140, Accuracy: 72.65%, Confidence: 0.9495
Train-Test Accuracy Gap: 26.01%


Epoch [688/700]:   3%|▎         | 21/782 [00:00<00:03, 208.43it/s]



Epoch [688/700]:   5%|▌         | 42/782 [00:00<00:03, 208.33it/s]



Epoch [688/700]:  11%|█         | 84/782 [00:00<00:03, 206.96it/s]



Epoch [688/700]:  16%|█▋        | 128/782 [00:00<00:03, 210.57it/s]



Epoch [688/700]:  22%|██▏       | 171/782 [00:00<00:02, 208.45it/s]



Epoch [688/700]:  27%|██▋       | 213/782 [00:01<00:02, 207.72it/s]



Epoch [688/700]:  33%|███▎      | 256/782 [00:01<00:02, 208.01it/s]



Epoch [688/700]:  38%|███▊      | 300/782 [00:01<00:02, 210.68it/s]



Epoch [688/700]:  44%|████▍     | 344/782 [00:01<00:02, 211.73it/s]



Epoch [688/700]:  47%|████▋     | 366/782 [00:01<00:01, 211.24it/s]



Epoch [688/700]:  50%|████▉     | 388/782 [00:01<00:01, 208.48it/s]



Epoch [688/700]:  52%|█████▏    | 410/782 [00:01<00:01, 209.65it/s]



Epoch [688/700]:  55%|█████▌    | 432/782 [00:02<00:01, 211.27it/s]



Epoch [688/700]:  58%|█████▊    | 454/782 [00:02<00:01, 210.78it/s]



Epoch [688/700]:  61%|██████    | 476/782 [00:02<00:01, 207.41it/s]



Epoch [688/700]:  64%|██████▎   | 498/782 [00:02<00:01, 209.26it/s]



Epoch [688/700]:  66%|██████▋   | 520/782 [00:02<00:01, 210.01it/s]



Epoch [688/700]:  69%|██████▉   | 542/782 [00:02<00:01, 206.66it/s]



Epoch [688/700]:  75%|███████▍  | 584/782 [00:02<00:00, 205.40it/s]



Epoch [688/700]:  80%|████████  | 627/782 [00:03<00:00, 206.87it/s]



Epoch [688/700]:  86%|████████▌ | 670/782 [00:03<00:00, 208.74it/s]



Epoch [688/700]:  91%|█████████ | 712/782 [00:03<00:00, 205.85it/s]



Epoch [688/700]: 100%|██████████| 782/782 [00:03<00:00, 207.99it/s]






Learning Rate: 0.004050
Train Loss: 0.0393, Accuracy: 98.64%, Confidence: 0.9856
Test Loss: 2.2464, Accuracy: 73.47%, Confidence: 0.9489
Train-Test Accuracy Gap: 25.17%


Epoch [689/700]:   3%|▎         | 21/782 [00:00<00:03, 203.98it/s]



Epoch [689/700]:   5%|▌         | 43/782 [00:00<00:03, 210.74it/s]



Epoch [689/700]:  11%|█         | 87/782 [00:00<00:03, 210.20it/s]



Epoch [689/700]:  14%|█▍        | 109/782 [00:00<00:03, 209.33it/s]



Epoch [689/700]:  17%|█▋        | 130/782 [00:00<00:03, 208.54it/s]



Epoch [689/700]:  19%|█▉        | 151/782 [00:00<00:03, 206.37it/s]



Epoch [689/700]:  22%|██▏       | 172/782 [00:00<00:02, 203.89it/s]



Epoch [689/700]:  25%|██▍       | 193/782 [00:00<00:02, 204.85it/s]



Epoch [689/700]:  27%|██▋       | 215/782 [00:01<00:02, 208.24it/s]



Epoch [689/700]:  30%|███       | 237/782 [00:01<00:02, 209.24it/s]



Epoch [689/700]:  36%|███▌      | 279/782 [00:01<00:02, 208.85it/s]



Epoch [689/700]:  38%|███▊      | 300/782 [00:01<00:02, 207.89it/s]



Epoch [689/700]:  41%|████▏     | 323/782 [00:01<00:02, 212.07it/s]



Epoch [689/700]:  47%|████▋     | 367/782 [00:01<00:01, 211.02it/s]



Epoch [689/700]:  53%|█████▎    | 411/782 [00:01<00:01, 210.97it/s]



Epoch [689/700]:  58%|█████▊    | 455/782 [00:02<00:01, 209.14it/s]



Epoch [689/700]:  64%|██████▎   | 498/782 [00:02<00:01, 208.94it/s]



Epoch [689/700]:  72%|███████▏  | 564/782 [00:02<00:01, 210.16it/s]



Epoch [689/700]:  78%|███████▊  | 607/782 [00:02<00:00, 209.37it/s]



Epoch [689/700]:  83%|████████▎ | 651/782 [00:03<00:00, 210.40it/s]



Epoch [689/700]:  89%|████████▉ | 695/782 [00:03<00:00, 207.80it/s]



Epoch [689/700]:  95%|█████████▍| 739/782 [00:03<00:00, 210.60it/s]



Epoch [689/700]: 100%|██████████| 782/782 [00:03<00:00, 208.67it/s]






Learning Rate: 0.004050
Train Loss: 0.0359, Accuracy: 98.79%, Confidence: 0.9861
Test Loss: 2.2657, Accuracy: 73.72%, Confidence: 0.9487
Train-Test Accuracy Gap: 25.07%


Epoch [690/700]:   3%|▎         | 21/782 [00:00<00:03, 205.87it/s]



Epoch [690/700]:  11%|█         | 86/782 [00:00<00:03, 210.72it/s]



Epoch [690/700]:  17%|█▋        | 130/782 [00:00<00:03, 212.99it/s]



Epoch [690/700]:  22%|██▏       | 174/782 [00:00<00:02, 212.50it/s]



Epoch [690/700]:  31%|███       | 240/782 [00:01<00:02, 211.83it/s]



Epoch [690/700]:  36%|███▋      | 284/782 [00:01<00:02, 211.03it/s]



Epoch [690/700]:  42%|████▏     | 328/782 [00:01<00:02, 212.48it/s]



Epoch [690/700]:  48%|████▊     | 372/782 [00:01<00:01, 213.46it/s]



Epoch [690/700]:  53%|█████▎    | 416/782 [00:01<00:01, 212.45it/s]



Epoch [690/700]:  59%|█████▉    | 460/782 [00:02<00:01, 211.02it/s]



Epoch [690/700]:  64%|██████▍   | 503/782 [00:02<00:01, 204.21it/s]



Epoch [690/700]:  70%|██████▉   | 546/782 [00:02<00:01, 206.51it/s]



Epoch [690/700]:  75%|███████▌  | 590/782 [00:02<00:00, 210.27it/s]



Epoch [690/700]:  81%|████████  | 634/782 [00:03<00:00, 211.99it/s]



Epoch [690/700]:  87%|████████▋ | 678/782 [00:03<00:00, 210.97it/s]



Epoch [690/700]:  95%|█████████▌| 744/782 [00:03<00:00, 212.15it/s]



Epoch [690/700]: 100%|██████████| 782/782 [00:03<00:00, 210.34it/s]


Learning Rate: 0.004050
Train Loss: 0.0376, Accuracy: 98.73%, Confidence: 0.9857
Test Loss: 2.9317, Accuracy: 69.46%, Confidence: 0.9487
Train-Test Accuracy Gap: 29.27%


Epoch [691/700]:   3%|▎         | 20/782 [00:00<00:03, 197.90it/s]



Epoch [691/700]:   5%|▌         | 41/782 [00:00<00:03, 201.59it/s]



Epoch [691/700]:  11%|█         | 85/782 [00:00<00:03, 210.65it/s]



Epoch [691/700]:  16%|█▋        | 128/782 [00:00<00:03, 208.54it/s]



Epoch [691/700]:  22%|██▏       | 173/782 [00:00<00:02, 213.98it/s]



Epoch [691/700]:  28%|██▊       | 217/782 [00:01<00:02, 211.40it/s]



Epoch [691/700]:  31%|███       | 239/782 [00:01<00:02, 210.91it/s]



Epoch [691/700]:  33%|███▎      | 261/782 [00:01<00:02, 211.44it/s]



Epoch [691/700]:  36%|███▌      | 283/782 [00:01<00:02, 211.36it/s]



Epoch [691/700]:  39%|███▉      | 305/782 [00:01<00:02, 211.24it/s]



Epoch [691/700]:  42%|████▏     | 327/782 [00:01<00:02, 212.36it/s]



Epoch [691/700]:  45%|████▍     | 349/782 [00:01<00:02, 214.05it/s]



Epoch [691/700]:  47%|████▋     | 371/782 [00:01<00:01, 211.93it/s]



Epoch [691/700]:  50%|█████     | 393/782 [00:01<00:01, 209.19it/s]



Epoch [691/700]:  53%|█████▎    | 414/782 [00:01<00:01, 207.60it/s]



Epoch [691/700]:  56%|█████▌    | 435/782 [00:02<00:01, 206.19it/s]



Epoch [691/700]:  58%|█████▊    | 456/782 [00:02<00:01, 205.14it/s]



Epoch [691/700]:  61%|██████    | 477/782 [00:02<00:01, 201.92it/s]



Epoch [691/700]:  64%|██████▎   | 498/782 [00:02<00:01, 203.51it/s]



Epoch [691/700]:  66%|██████▋   | 519/782 [00:02<00:01, 203.58it/s]



Epoch [691/700]:  69%|██████▉   | 541/782 [00:02<00:01, 205.76it/s]



Epoch [691/700]:  72%|███████▏  | 563/782 [00:02<00:01, 209.18it/s]



Epoch [691/700]:  75%|███████▍  | 584/782 [00:02<00:00, 206.86it/s]



Epoch [691/700]:  77%|███████▋  | 606/782 [00:02<00:00, 208.39it/s]



Epoch [691/700]:  80%|████████  | 628/782 [00:03<00:00, 209.07it/s]



Epoch [691/700]:  83%|████████▎ | 649/782 [00:03<00:00, 207.59it/s]



Epoch [691/700]:  88%|████████▊ | 692/782 [00:03<00:00, 207.84it/s]



Epoch [691/700]:  94%|█████████▍| 737/782 [00:03<00:00, 212.97it/s]



Epoch [691/700]: 100%|██████████| 782/782 [00:03<00:00, 209.18it/s]


Learning Rate: 0.004050
Train Loss: 0.0401, Accuracy: 98.61%, Confidence: 0.9858
Test Loss: 2.6170, Accuracy: 71.21%, Confidence: 0.9497
Train-Test Accuracy Gap: 27.40%


Epoch [692/700]:   3%|▎         | 21/782 [00:00<00:03, 202.81it/s]



Epoch [692/700]:  11%|█         | 86/782 [00:00<00:03, 210.00it/s]



Epoch [692/700]:  17%|█▋        | 130/782 [00:00<00:03, 213.35it/s]



Epoch [692/700]:  22%|██▏       | 175/782 [00:00<00:02, 214.93it/s]



Epoch [692/700]:  31%|███       | 241/782 [00:01<00:02, 215.47it/s]



Epoch [692/700]:  36%|███▋      | 285/782 [00:01<00:02, 212.97it/s]



Epoch [692/700]:  42%|████▏     | 329/782 [00:01<00:02, 209.19it/s]



Epoch [692/700]:  48%|████▊     | 373/782 [00:01<00:01, 211.31it/s]



Epoch [692/700]:  53%|█████▎    | 417/782 [00:01<00:01, 214.14it/s]



Epoch [692/700]:  59%|█████▉    | 461/782 [00:02<00:01, 209.84it/s]



Epoch [692/700]:  64%|██████▍   | 503/782 [00:02<00:01, 204.21it/s]



Epoch [692/700]:  73%|███████▎  | 567/782 [00:02<00:01, 201.75it/s]



Epoch [692/700]:  78%|███████▊  | 610/782 [00:02<00:00, 204.10it/s]



Epoch [692/700]:  86%|████████▌ | 674/782 [00:03<00:00, 204.93it/s]



Epoch [692/700]:  92%|█████████▏| 716/782 [00:03<00:00, 202.73it/s]



Epoch [692/700]:  97%|█████████▋| 759/782 [00:03<00:00, 204.21it/s]



Epoch [692/700]: 100%|██████████| 782/782 [00:03<00:00, 207.88it/s]


Learning Rate: 0.004050
Train Loss: 0.0346, Accuracy: 98.86%, Confidence: 0.9864
Test Loss: 2.5705, Accuracy: 71.94%, Confidence: 0.9500
Train-Test Accuracy Gap: 26.92%


Epoch [693/700]:   3%|▎         | 22/782 [00:00<00:03, 214.04it/s]



Epoch [693/700]:  11%|█▏        | 88/782 [00:00<00:03, 210.41it/s]



Epoch [693/700]:  17%|█▋        | 132/782 [00:00<00:03, 209.48it/s]



Epoch [693/700]:  23%|██▎       | 176/782 [00:00<00:02, 210.15it/s]



Epoch [693/700]:  28%|██▊       | 220/782 [00:01<00:02, 209.78it/s]



Epoch [693/700]:  34%|███▎      | 262/782 [00:01<00:02, 208.01it/s]



Epoch [693/700]:  39%|███▉      | 305/782 [00:01<00:02, 207.75it/s]



Epoch [693/700]:  45%|████▍     | 349/782 [00:01<00:02, 208.74it/s]



Epoch [693/700]:  50%|█████     | 392/782 [00:01<00:01, 211.00it/s]



Epoch [693/700]:  59%|█████▊    | 458/782 [00:02<00:01, 205.99it/s]



Epoch [693/700]:  67%|██████▋   | 525/782 [00:02<00:01, 211.76it/s]



Epoch [693/700]:  73%|███████▎  | 569/782 [00:02<00:00, 214.15it/s]



Epoch [693/700]:  81%|████████  | 635/782 [00:03<00:00, 211.02it/s]



Epoch [693/700]:  87%|████████▋ | 679/782 [00:03<00:00, 210.61it/s]



Epoch [693/700]:  92%|█████████▏| 723/782 [00:03<00:00, 212.17it/s]



Epoch [693/700]:  98%|█████████▊| 767/782 [00:03<00:00, 210.17it/s]



Epoch [693/700]: 100%|██████████| 782/782 [00:03<00:00, 210.11it/s]


Learning Rate: 0.004050
Train Loss: 0.0385, Accuracy: 98.72%, Confidence: 0.9852
Test Loss: 2.4976, Accuracy: 73.12%, Confidence: 0.9518
Train-Test Accuracy Gap: 25.60%


Epoch [694/700]:   3%|▎         | 22/782 [00:00<00:03, 215.23it/s]



Epoch [694/700]:   8%|▊         | 65/782 [00:00<00:03, 207.42it/s]



Epoch [694/700]:  14%|█▍        | 108/782 [00:00<00:03, 209.69it/s]



Epoch [694/700]:  17%|█▋        | 130/782 [00:00<00:03, 211.43it/s]



Epoch [694/700]:  22%|██▏       | 174/782 [00:00<00:02, 213.54it/s]



Epoch [694/700]:  28%|██▊       | 218/782 [00:01<00:02, 214.11it/s]



Epoch [694/700]:  34%|███▎      | 262/782 [00:01<00:02, 210.23it/s]



Epoch [694/700]:  39%|███▉      | 306/782 [00:01<00:02, 211.36it/s]



Epoch [694/700]:  45%|████▍     | 351/782 [00:01<00:02, 214.86it/s]



Epoch [694/700]:  51%|█████     | 395/782 [00:01<00:01, 215.87it/s]



Epoch [694/700]:  59%|█████▉    | 461/782 [00:02<00:01, 212.63it/s]



Epoch [694/700]:  65%|██████▍   | 505/782 [00:02<00:01, 213.49it/s]



Epoch [694/700]:  70%|███████   | 549/782 [00:02<00:01, 213.10it/s]



Epoch [694/700]:  76%|███████▌  | 593/782 [00:02<00:00, 214.26it/s]



Epoch [694/700]:  84%|████████▍ | 659/782 [00:03<00:00, 214.51it/s]



Epoch [694/700]:  90%|████████▉ | 703/782 [00:03<00:00, 215.00it/s]



Epoch [694/700]:  96%|█████████▌| 747/782 [00:03<00:00, 215.66it/s]



Epoch [694/700]: 100%|██████████| 782/782 [00:03<00:00, 212.69it/s]


Learning Rate: 0.004050
Train Loss: 0.0418, Accuracy: 98.67%, Confidence: 0.9856
Test Loss: 2.5020, Accuracy: 72.73%, Confidence: 0.9506
Train-Test Accuracy Gap: 25.94%


Epoch [695/700]:   3%|▎         | 22/782 [00:00<00:03, 211.97it/s]



Epoch [695/700]:   6%|▌         | 44/782 [00:00<00:03, 210.24it/s]



Epoch [695/700]:  11%|█▏        | 88/782 [00:00<00:03, 213.08it/s]



Epoch [695/700]:  14%|█▍        | 110/782 [00:00<00:03, 207.20it/s]



Epoch [695/700]:  26%|██▌       | 200/782 [00:00<00:02, 215.25it/s]



Epoch [695/700]:  31%|███       | 244/782 [00:01<00:02, 214.94it/s]



Epoch [695/700]:  40%|███▉      | 310/782 [00:01<00:02, 214.13it/s]



Epoch [695/700]:  45%|████▌     | 354/782 [00:01<00:02, 213.70it/s]



Epoch [695/700]:  51%|█████     | 398/782 [00:01<00:01, 214.84it/s]



Epoch [695/700]:  57%|█████▋    | 442/782 [00:02<00:01, 215.66it/s]



Epoch [695/700]:  65%|██████▍   | 506/782 [00:02<00:01, 205.70it/s]



Epoch [695/700]:  70%|███████   | 550/782 [00:02<00:01, 209.87it/s]



Epoch [695/700]:  76%|███████▌  | 593/782 [00:02<00:00, 205.90it/s]



Epoch [695/700]:  84%|████████▍ | 656/782 [00:03<00:00, 203.08it/s]



Epoch [695/700]:  90%|████████▉ | 700/782 [00:03<00:00, 209.39it/s]



Epoch [695/700]:  95%|█████████▌| 743/782 [00:03<00:00, 208.99it/s]



Epoch [695/700]: 100%|██████████| 782/782 [00:03<00:00, 210.04it/s]


Learning Rate: 0.004050
Train Loss: 0.0416, Accuracy: 98.70%, Confidence: 0.9856
Test Loss: 2.4194, Accuracy: 73.28%, Confidence: 0.9511
Train-Test Accuracy Gap: 25.42%


Epoch [696/700]:   3%|▎         | 21/782 [00:00<00:03, 205.39it/s]



Epoch [696/700]:   5%|▌         | 42/782 [00:00<00:03, 207.69it/s]



Epoch [696/700]:   8%|▊         | 63/782 [00:00<00:03, 208.44it/s]



Epoch [696/700]:  16%|█▋        | 129/782 [00:00<00:03, 212.58it/s]



Epoch [696/700]:  22%|██▏       | 173/782 [00:00<00:02, 213.15it/s]



Epoch [696/700]:  28%|██▊       | 217/782 [00:01<00:02, 214.56it/s]



Epoch [696/700]:  33%|███▎      | 261/782 [00:01<00:02, 213.94it/s]



Epoch [696/700]:  39%|███▉      | 305/782 [00:01<00:02, 213.43it/s]



Epoch [696/700]:  45%|████▍     | 349/782 [00:01<00:02, 212.88it/s]



Epoch [696/700]:  53%|█████▎    | 415/782 [00:01<00:01, 212.94it/s]



Epoch [696/700]:  59%|█████▊    | 459/782 [00:02<00:01, 213.66it/s]



Epoch [696/700]:  64%|██████▍   | 504/782 [00:02<00:01, 214.36it/s]



Epoch [696/700]:  70%|███████   | 548/782 [00:02<00:01, 213.66it/s]



Epoch [696/700]:  79%|███████▊  | 614/782 [00:02<00:00, 214.78it/s]



Epoch [696/700]:  84%|████████▍ | 658/782 [00:03<00:00, 213.08it/s]



Epoch [696/700]:  90%|████████▉ | 702/782 [00:03<00:00, 211.07it/s]



Epoch [696/700]:  95%|█████████▌| 746/782 [00:03<00:00, 212.18it/s]



Epoch [696/700]: 100%|██████████| 782/782 [00:03<00:00, 212.47it/s]


Learning Rate: 0.004050
Train Loss: 0.0364, Accuracy: 98.78%, Confidence: 0.9860
Test Loss: 2.3964, Accuracy: 73.20%, Confidence: 0.9514
Train-Test Accuracy Gap: 25.58%


Epoch [697/700]:   3%|▎         | 21/782 [00:00<00:03, 202.56it/s]



Epoch [697/700]:  11%|█         | 87/782 [00:00<00:03, 210.94it/s]



Epoch [697/700]:  17%|█▋        | 131/782 [00:00<00:03, 212.46it/s]



Epoch [697/700]:  25%|██▌       | 197/782 [00:00<00:02, 215.77it/s]



Epoch [697/700]:  28%|██▊       | 219/782 [00:01<00:02, 215.63it/s]



Epoch [697/700]:  36%|███▋      | 285/782 [00:01<00:02, 211.79it/s]



Epoch [697/700]:  42%|████▏     | 329/782 [00:01<00:02, 213.94it/s]



Epoch [697/700]:  48%|████▊     | 373/782 [00:01<00:01, 213.19it/s]



Epoch [697/700]:  59%|█████▉    | 461/782 [00:02<00:01, 211.26it/s]



Epoch [697/700]:  65%|██████▍   | 505/782 [00:02<00:01, 211.47it/s]



Epoch [697/700]:  70%|███████   | 548/782 [00:02<00:01, 206.38it/s]



Epoch [697/700]:  76%|███████▌  | 591/782 [00:02<00:00, 208.97it/s]



Epoch [697/700]:  81%|████████  | 635/782 [00:02<00:00, 212.51it/s]



Epoch [697/700]:  87%|████████▋ | 679/782 [00:03<00:00, 212.53it/s]



Epoch [697/700]:  95%|█████████▌| 745/782 [00:03<00:00, 211.10it/s]



Epoch [697/700]: 100%|██████████| 782/782 [00:03<00:00, 211.75it/s]


Learning Rate: 0.004050
Train Loss: 0.0361, Accuracy: 98.83%, Confidence: 0.9863
Test Loss: 2.3749, Accuracy: 73.24%, Confidence: 0.9505
Train-Test Accuracy Gap: 25.59%


Epoch [698/700]:   3%|▎         | 20/782 [00:00<00:03, 197.23it/s]



Epoch [698/700]:   5%|▌         | 42/782 [00:00<00:03, 209.83it/s]



Epoch [698/700]:  11%|█         | 85/782 [00:00<00:03, 212.47it/s]



Epoch [698/700]:  16%|█▋        | 129/782 [00:00<00:03, 213.32it/s]



Epoch [698/700]:  25%|██▍       | 195/782 [00:00<00:02, 213.57it/s]



Epoch [698/700]:  31%|███       | 239/782 [00:01<00:02, 214.32it/s]



Epoch [698/700]:  36%|███▌      | 283/782 [00:01<00:02, 213.66it/s]



Epoch [698/700]:  42%|████▏     | 327/782 [00:01<00:02, 213.92it/s]



Epoch [698/700]:  47%|████▋     | 371/782 [00:01<00:01, 215.41it/s]



Epoch [698/700]:  53%|█████▎    | 415/782 [00:01<00:01, 214.18it/s]



Epoch [698/700]:  59%|█████▊    | 459/782 [00:02<00:01, 210.64it/s]



Epoch [698/700]:  64%|██████▍   | 503/782 [00:02<00:01, 211.87it/s]



Epoch [698/700]:  70%|██████▉   | 547/782 [00:02<00:01, 211.64it/s]



Epoch [698/700]:  78%|███████▊  | 613/782 [00:02<00:00, 208.34it/s]



Epoch [698/700]:  84%|████████▍ | 655/782 [00:03<00:00, 205.16it/s]



Epoch [698/700]:  89%|████████▉ | 698/782 [00:03<00:00, 206.26it/s]



Epoch [698/700]:  95%|█████████▍| 740/782 [00:03<00:00, 206.13it/s]



Epoch [698/700]: 100%|██████████| 782/782 [00:03<00:00, 210.46it/s]






Learning Rate: 0.004050
Train Loss: 0.0346, Accuracy: 98.83%, Confidence: 0.9862
Test Loss: 2.5708, Accuracy: 72.20%, Confidence: 0.9498
Train-Test Accuracy Gap: 26.63%


Epoch [699/700]:   3%|▎         | 20/782 [00:00<00:03, 194.70it/s]



Epoch [699/700]:  11%|█         | 84/782 [00:00<00:03, 208.46it/s]



Epoch [699/700]:  16%|█▌        | 127/782 [00:00<00:03, 208.42it/s]



Epoch [699/700]:  22%|██▏       | 170/782 [00:00<00:02, 209.77it/s]



Epoch [699/700]:  27%|██▋       | 213/782 [00:01<00:02, 209.38it/s]



Epoch [699/700]:  33%|███▎      | 257/782 [00:01<00:02, 211.51it/s]



Epoch [699/700]:  38%|███▊      | 301/782 [00:01<00:02, 205.54it/s]



Epoch [699/700]:  44%|████▍     | 344/782 [00:01<00:02, 208.83it/s]



Epoch [699/700]:  49%|████▉     | 386/782 [00:01<00:01, 207.05it/s]



Epoch [699/700]:  55%|█████▍    | 428/782 [00:02<00:01, 204.98it/s]



Epoch [699/700]:  60%|██████    | 472/782 [00:02<00:01, 208.98it/s]



Epoch [699/700]:  66%|██████▌   | 516/782 [00:02<00:01, 211.39it/s]



Epoch [699/700]:  72%|███████▏  | 560/782 [00:02<00:01, 210.63it/s]



Epoch [699/700]:  77%|███████▋  | 604/782 [00:02<00:00, 209.49it/s]



Epoch [699/700]:  83%|████████▎ | 646/782 [00:03<00:00, 204.04it/s]



Epoch [699/700]:  88%|████████▊ | 690/782 [00:03<00:00, 208.94it/s]



Epoch [699/700]:  94%|█████████▎| 732/782 [00:03<00:00, 207.84it/s]



Epoch [699/700]: 100%|██████████| 782/782 [00:03<00:00, 208.10it/s]






Learning Rate: 0.004050
Train Loss: 0.0502, Accuracy: 98.39%, Confidence: 0.9842
Test Loss: 2.3778, Accuracy: 73.28%, Confidence: 0.9523
Train-Test Accuracy Gap: 25.11%


Epoch [700/700]:   3%|▎         | 21/782 [00:00<00:03, 204.47it/s]



Epoch [700/700]:  11%|█         | 87/782 [00:00<00:03, 212.16it/s]



Epoch [700/700]:  17%|█▋        | 131/782 [00:00<00:03, 212.40it/s]



Epoch [700/700]:  22%|██▏       | 175/782 [00:00<00:02, 214.96it/s]



Epoch [700/700]:  31%|███       | 241/782 [00:01<00:02, 213.57it/s]



Epoch [700/700]:  36%|███▋      | 285/782 [00:01<00:02, 209.23it/s]



Epoch [700/700]:  42%|████▏     | 328/782 [00:01<00:02, 209.09it/s]



Epoch [700/700]:  48%|████▊     | 372/782 [00:01<00:01, 210.51it/s]



Epoch [700/700]:  53%|█████▎    | 416/782 [00:01<00:01, 208.56it/s]



Epoch [700/700]:  59%|█████▊    | 459/782 [00:02<00:01, 210.22it/s]



Epoch [700/700]:  64%|██████▍   | 503/782 [00:02<00:01, 212.42it/s]



Epoch [700/700]:  70%|██████▉   | 547/782 [00:02<00:01, 208.86it/s]



Epoch [700/700]:  79%|███████▊  | 614/782 [00:02<00:00, 210.68it/s]



Epoch [700/700]:  84%|████████▍ | 658/782 [00:03<00:00, 210.54it/s]



Epoch [700/700]:  90%|████████▉ | 701/782 [00:03<00:00, 207.43it/s]



Epoch [700/700]:  95%|█████████▌| 743/782 [00:03<00:00, 207.86it/s]



Epoch [700/700]: 100%|██████████| 782/782 [00:03<00:00, 209.80it/s]


Learning Rate: 0.004050
Train Loss: 0.0358, Accuracy: 98.84%, Confidence: 0.9864
Test Loss: 2.4211, Accuracy: 72.96%, Confidence: 0.9502
Train-Test Accuracy Gap: 25.88%


In [12]:
import shutil
import os

source_dir = '/content/cnn_checkpoints'
destination_dir = '/content/drive/MyDrive/better_cnn_checkpoints'

for filename in os.listdir(source_dir):
    source_path = os.path.join(source_dir, filename)
    destination_path = os.path.join(destination_dir, filename)
    shutil.copy(source_path, destination_path)
    print(f"Copied: {filename}")

print("All files copied!")

Copied: cnn_epoch_500.pt
Copied: cnn_epoch_200.pt
Copied: cnn_epoch_700.pt
Copied: cnn_epoch_300.pt
Copied: cnn_epoch_400.pt
Copied: cnn_best_model.pt
Copied: cnn_epoch_600.pt
Copied: training_metrics.json
Copied: cnn_epoch_100.pt
All files copied!
