In [1]:
import numpy as np
import torch
import torchvision
from torch import nn
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.models import ResNet152_Weights, resnet152
from tqdm import tqdm

In [2]:
RANDOM_STATE = 2025
BATCH_SIZE = 16
LEARNING_RATE = 1e-5
# LEARNING_RATE = 5e-6
NO_OF_ITERATIONS = 300
CUDA_DEVICE = "cuda:1" if torch.cuda.is_available() else "cpu"
PATH_TO_MODEL_SAVE = "resnet_model_weight_for_Flowers_Without_Weight_Freeze.pth"

In [3]:
transforms_functions = transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.RandomCrop((224, 224)),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(degrees=30),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

In [4]:
transforms_functions_for_test = transforms.Compose(
    [
        transforms.Resize((244, 244)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

In [5]:
dataset_of_cnc = ImageFolder(
    root="FlowerImageDatasetArchive/train", transform=transforms_functions
)

In [6]:
dataset_of_cnc.classes

['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

In [7]:
generator_for_random_split = torch.Generator().manual_seed(RANDOM_STATE)

In [8]:
training_dataset, test_dataset = random_split(
    dataset_of_cnc, [0.7, 0.3], generator=generator_for_random_split
)

In [9]:
dataloader_for_flower_training = DataLoader(
    training_dataset, batch_size=BATCH_SIZE, shuffle=True
)

In [10]:
dataloader_for_flower_test = DataLoader(
    test_dataset, batch_size=BATCH_SIZE, shuffle=False
)

In [11]:
resnet_model_for_cnc = resnet152(weights=ResNet152_Weights.DEFAULT)
# resnet_model_for_cnc = resnet152()

In [12]:
resnet_model_for_cnc

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [13]:
# resnet_model_for_cnc.requires_grad_(requires_grad = False)

In [14]:
resnet_model_for_cnc.fc.requires_grad_(requires_grad=True)

Linear(in_features=2048, out_features=1000, bias=True)

In [15]:
resnet_model_for_cnc.fc = torch.nn.Sequential(
    torch.nn.Linear(in_features=2048, out_features=512),
    torch.nn.ReLU(),
    torch.nn.Dropout(p=0.2),
    torch.nn.Linear(
        in_features=512,
        out_features=len(dataset_of_cnc.classes),
    ),
    torch.nn.Softmax(dim=1),
)

In [16]:
for module in resnet_model_for_cnc.modules():
    if type(module) == nn.Linear:
        nn.init.kaiming_normal_(
            module.weight.data,
            a=0,
            mode="fan_out",
            nonlinearity="relu",
        )
        if module.bias is not None:
            fan_in, fan_out = nn.init._calculate_fan_in_and_fan_out(module.weight.data)
            bound = 1 / (fan_out) ** 0.5
            nn.init.normal_(module.bias, -bound, bound)

In [17]:
resnet_model_for_cnc.to(device=CUDA_DEVICE)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [18]:
# resnet_model_for_cnc.load_state_dict(torch.load(PATH_TO_MODEL_SAVE, map_location = CUDA_DEVICE))

In [19]:
torch.cuda.is_available()

True

In [20]:
optimizer = Adam(resnet_model_for_cnc.parameters(), lr=LEARNING_RATE)

In [21]:
loss_function = CrossEntropyLoss()

In [22]:
for i_iteration in tqdm(range(NO_OF_ITERATIONS)):
    resnet_model_for_cnc.train()
    training_loss = []
    test_loss = []
    for i_image, i_target in dataloader_for_flower_training:
        i_image = i_image.to(device=CUDA_DEVICE)
        i_target = i_target.to(device=CUDA_DEVICE)
        optimizer.zero_grad()

        output = resnet_model_for_cnc(i_image)
        loss = loss_function(output, i_target)
        loss.backward()
        optimizer.step()

        training_loss.append(loss.item())

    print("Training loss at", i_iteration, "iteration is", np.mean(training_loss))

    torch.save(resnet_model_for_cnc.state_dict(), PATH_TO_MODEL_SAVE)
    resnet_model_for_cnc.eval()
    for test_image, test_target in dataloader_for_flower_test:
        test_image = test_image.to(device=CUDA_DEVICE)
        test_target = test_target.to(device=CUDA_DEVICE)
        test_prediction = resnet_model_for_cnc(test_image)

        temp_test_loss = loss_function(test_prediction, test_target)

        test_loss = temp_test_loss.item()

    print("***", "Test loss at", i_iteration, "iteration is", np.mean(test_loss))

  0%|                                                   | 0/300 [00:00<?, ?it/s]

Training loss at 0 iteration is 1.5794785712376114


  0%|▏                                        | 1/300 [00:29<2:25:26, 29.18s/it]

*** Test loss at 0 iteration is 1.3383524417877197
Training loss at 1 iteration is 1.4275552123046118


  1%|▎                                        | 2/300 [00:58<2:24:41, 29.13s/it]

*** Test loss at 1 iteration is 1.4147040843963623
Training loss at 2 iteration is 1.3573067079890857


  1%|▍                                        | 3/300 [01:27<2:25:02, 29.30s/it]

*** Test loss at 2 iteration is 1.1518232822418213
Training loss at 3 iteration is 1.2504045500243006


  1%|▌                                        | 4/300 [01:57<2:26:12, 29.64s/it]

*** Test loss at 3 iteration is 1.3307431936264038
Training loss at 4 iteration is 1.1958130489696155


  2%|▋                                        | 5/300 [02:28<2:26:45, 29.85s/it]

*** Test loss at 4 iteration is 1.223585844039917
Training loss at 5 iteration is 1.1344913002873256


  2%|▊                                        | 6/300 [02:58<2:27:22, 30.08s/it]

*** Test loss at 5 iteration is 1.185376524925232
Training loss at 6 iteration is 1.1130036957992995


  2%|▉                                        | 7/300 [03:29<2:27:22, 30.18s/it]

*** Test loss at 6 iteration is 1.1891363859176636
Training loss at 7 iteration is 1.0786291587451273


  3%|█                                        | 8/300 [03:59<2:26:59, 30.20s/it]

*** Test loss at 7 iteration is 1.1840983629226685
Training loss at 8 iteration is 1.0704569993925488


  3%|█▏                                       | 9/300 [04:29<2:27:11, 30.35s/it]

*** Test loss at 8 iteration is 1.2178195714950562
Training loss at 9 iteration is 1.0499294597255298


  3%|█▎                                      | 10/300 [05:00<2:27:11, 30.45s/it]

*** Test loss at 9 iteration is 1.3028151988983154
Training loss at 10 iteration is 1.0388056222072317


  4%|█▍                                      | 11/300 [05:31<2:26:29, 30.41s/it]

*** Test loss at 10 iteration is 1.1860774755477905
Training loss at 11 iteration is 1.0381660042715466


  4%|█▌                                      | 12/300 [06:03<2:28:57, 31.03s/it]

*** Test loss at 11 iteration is 1.188503623008728
Training loss at 12 iteration is 1.0191150223913272


  4%|█▋                                      | 13/300 [06:33<2:27:38, 30.87s/it]

*** Test loss at 12 iteration is 1.1910642385482788
Training loss at 13 iteration is 1.026729491131365


  5%|█▊                                      | 14/300 [07:04<2:26:39, 30.77s/it]

*** Test loss at 13 iteration is 1.1804580688476562
Training loss at 14 iteration is 1.014590247110887


  5%|██                                      | 15/300 [07:35<2:26:05, 30.76s/it]

*** Test loss at 14 iteration is 1.2510231733322144
Training loss at 15 iteration is 1.009438459045631


  5%|██▏                                     | 16/300 [08:06<2:25:59, 30.84s/it]

*** Test loss at 15 iteration is 1.0598599910736084
Training loss at 16 iteration is 1.00846552947336


  6%|██▎                                     | 17/300 [08:36<2:25:13, 30.79s/it]

*** Test loss at 16 iteration is 1.052228331565857
Training loss at 17 iteration is 0.9938897795913634


  6%|██▍                                     | 18/300 [09:07<2:24:47, 30.81s/it]

*** Test loss at 17 iteration is 1.0468255281448364
Training loss at 18 iteration is 0.9933595302676367


  6%|██▌                                     | 19/300 [09:38<2:24:15, 30.80s/it]

*** Test loss at 18 iteration is 1.1511669158935547
Training loss at 19 iteration is 0.986554795060276


  7%|██▋                                     | 20/300 [10:09<2:23:30, 30.75s/it]

*** Test loss at 19 iteration is 1.0672508478164673
Training loss at 20 iteration is 0.9890609688995299


  7%|██▊                                     | 21/300 [10:39<2:22:34, 30.66s/it]

*** Test loss at 20 iteration is 1.0764914751052856
Training loss at 21 iteration is 0.9831620967092592


  7%|██▉                                     | 22/300 [11:10<2:22:14, 30.70s/it]

*** Test loss at 21 iteration is 1.188226342201233
Training loss at 22 iteration is 0.9766909780581136


  8%|███                                     | 23/300 [11:41<2:21:54, 30.74s/it]

*** Test loss at 22 iteration is 1.1903765201568604
Training loss at 23 iteration is 0.976388111094798


  8%|███▏                                    | 24/300 [12:11<2:21:22, 30.73s/it]

*** Test loss at 23 iteration is 1.0477179288864136
Training loss at 24 iteration is 0.9745318175347384


  8%|███▎                                    | 25/300 [12:42<2:20:44, 30.71s/it]

*** Test loss at 24 iteration is 1.0626170635223389
Training loss at 25 iteration is 0.9677830624186303


  9%|███▍                                    | 26/300 [13:13<2:20:18, 30.72s/it]

*** Test loss at 25 iteration is 1.0653396844863892
Training loss at 26 iteration is 0.9596678338760187


  9%|███▌                                    | 27/300 [13:43<2:19:30, 30.66s/it]

*** Test loss at 26 iteration is 1.1394157409667969
Training loss at 27 iteration is 0.9639585954098662


  9%|███▋                                    | 28/300 [14:14<2:18:45, 30.61s/it]

*** Test loss at 27 iteration is 1.0606263875961304
Training loss at 28 iteration is 0.9650348779583765


 10%|███▊                                    | 29/300 [14:45<2:19:13, 30.82s/it]

*** Test loss at 28 iteration is 1.0518319606781006
Training loss at 29 iteration is 0.9623500877175449


 10%|████                                    | 30/300 [15:16<2:18:18, 30.73s/it]

*** Test loss at 29 iteration is 1.04768967628479
Training loss at 30 iteration is 0.9521468882718362


 10%|████▏                                   | 31/300 [15:46<2:17:44, 30.72s/it]

*** Test loss at 30 iteration is 1.04942786693573
Training loss at 31 iteration is 0.9652661794473317


 11%|████▎                                   | 32/300 [16:17<2:17:34, 30.80s/it]

*** Test loss at 31 iteration is 1.1859742403030396
Training loss at 32 iteration is 0.9651526946666812


 11%|████▍                                   | 33/300 [16:48<2:16:16, 30.62s/it]

*** Test loss at 32 iteration is 1.047706127166748
Training loss at 33 iteration is 0.9600869023110256


 11%|████▌                                   | 34/300 [17:18<2:15:55, 30.66s/it]

*** Test loss at 33 iteration is 1.047878623008728
Training loss at 34 iteration is 0.9504921249121674


 12%|████▋                                   | 35/300 [17:50<2:16:40, 30.94s/it]

*** Test loss at 34 iteration is 1.1849125623703003
Training loss at 35 iteration is 0.9500996958125721


 12%|████▊                                   | 36/300 [18:21<2:16:01, 30.92s/it]

*** Test loss at 35 iteration is 1.080915093421936
Training loss at 36 iteration is 0.9494522869094344


 12%|████▉                                   | 37/300 [18:51<2:15:00, 30.80s/it]

*** Test loss at 36 iteration is 1.0478154420852661
Training loss at 37 iteration is 0.9428770024914387


 13%|█████                                   | 38/300 [19:22<2:14:17, 30.75s/it]

*** Test loss at 37 iteration is 1.047664761543274
Training loss at 38 iteration is 0.940587857045418


 13%|█████▏                                  | 39/300 [19:53<2:14:40, 30.96s/it]

*** Test loss at 38 iteration is 1.100164771080017
Training loss at 39 iteration is 0.9479191278623156


 13%|█████▎                                  | 40/300 [20:25<2:14:29, 31.04s/it]

*** Test loss at 39 iteration is 1.0389506816864014
Training loss at 40 iteration is 0.9485687401669085


 14%|█████▍                                  | 41/300 [20:56<2:13:58, 31.04s/it]

*** Test loss at 40 iteration is 1.1987314224243164
Training loss at 41 iteration is 0.9442639813935461


 14%|█████▌                                  | 42/300 [21:26<2:13:04, 30.95s/it]

*** Test loss at 41 iteration is 1.1554880142211914
Training loss at 42 iteration is 0.9415214426261335


 14%|█████▋                                  | 43/300 [21:57<2:12:05, 30.84s/it]

*** Test loss at 42 iteration is 1.04768967628479
Training loss at 43 iteration is 0.9428851106935296


 15%|█████▊                                  | 44/300 [22:28<2:11:21, 30.79s/it]

*** Test loss at 43 iteration is 1.1881743669509888
Training loss at 44 iteration is 0.9419102442166036


 15%|██████                                  | 45/300 [22:58<2:10:32, 30.72s/it]

*** Test loss at 44 iteration is 1.053482174873352
Training loss at 45 iteration is 0.9391610169213666


 15%|██████▏                                 | 46/300 [23:30<2:11:07, 30.98s/it]

*** Test loss at 45 iteration is 1.0475651025772095
Training loss at 46 iteration is 0.9309510830020117


 16%|██████▎                                 | 47/300 [24:02<2:12:01, 31.31s/it]

*** Test loss at 46 iteration is 1.0476895570755005
Training loss at 47 iteration is 0.9382591218002572


 16%|██████▍                                 | 48/300 [24:37<2:15:44, 32.32s/it]

*** Test loss at 47 iteration is 1.0035921335220337
Training loss at 48 iteration is 0.9323006306798005


 16%|██████▌                                 | 49/300 [25:11<2:17:21, 32.83s/it]

*** Test loss at 48 iteration is 1.047140121459961
Training loss at 49 iteration is 0.934712747404398


 17%|██████▋                                 | 50/300 [25:42<2:14:41, 32.33s/it]

*** Test loss at 49 iteration is 0.9053623080253601
Training loss at 50 iteration is 0.9289463817580672


 17%|██████▊                                 | 51/300 [26:13<2:12:27, 31.92s/it]

*** Test loss at 50 iteration is 1.0477166175842285
Training loss at 51 iteration is 0.9336059630409745


 17%|██████▉                                 | 52/300 [26:44<2:10:47, 31.64s/it]

*** Test loss at 51 iteration is 1.04768967628479
Training loss at 52 iteration is 0.9306823670371505


 18%|███████                                 | 53/300 [27:16<2:10:41, 31.75s/it]

*** Test loss at 52 iteration is 1.0476847887039185
Training loss at 53 iteration is 0.9314480980565726


 18%|███████▏                                | 54/300 [27:46<2:08:54, 31.44s/it]

*** Test loss at 53 iteration is 0.9321407079696655
Training loss at 54 iteration is 0.9297057005984724


 18%|███████▎                                | 55/300 [28:17<2:07:35, 31.25s/it]

*** Test loss at 54 iteration is 1.190481185913086
Training loss at 55 iteration is 0.9286888059505747


 19%|███████▍                                | 56/300 [28:48<2:06:53, 31.20s/it]

*** Test loss at 55 iteration is 1.0549427270889282
Training loss at 56 iteration is 0.9248192192109164


 19%|███████▌                                | 57/300 [29:19<2:06:09, 31.15s/it]

*** Test loss at 56 iteration is 1.025651454925537
Training loss at 57 iteration is 0.924511933129681


 19%|███████▋                                | 58/300 [29:50<2:05:24, 31.09s/it]

*** Test loss at 57 iteration is 1.1850720643997192
Training loss at 58 iteration is 0.9245341090131397


 20%|███████▊                                | 59/300 [30:23<2:06:28, 31.49s/it]

*** Test loss at 58 iteration is 1.0472346544265747
Training loss at 59 iteration is 0.9316314380030987


 20%|████████                                | 60/300 [30:54<2:05:07, 31.28s/it]

*** Test loss at 59 iteration is 0.9386945366859436
Training loss at 60 iteration is 0.9284246997399763


 20%|████████▏                               | 61/300 [31:25<2:04:16, 31.20s/it]

*** Test loss at 60 iteration is 1.0435221195220947
Training loss at 61 iteration is 0.9237867253871004


 21%|████████▎                               | 62/300 [31:56<2:03:34, 31.16s/it]

*** Test loss at 61 iteration is 1.04768967628479
Training loss at 62 iteration is 0.9228469172785104


 21%|████████▍                               | 63/300 [32:27<2:03:06, 31.17s/it]

*** Test loss at 62 iteration is 0.9066126942634583
Training loss at 63 iteration is 0.9270887596548096


 21%|████████▌                               | 64/300 [32:58<2:02:36, 31.17s/it]

*** Test loss at 63 iteration is 1.047703504562378
Training loss at 64 iteration is 0.930071092341557


 22%|████████▋                               | 65/300 [33:29<2:02:20, 31.24s/it]

*** Test loss at 64 iteration is 1.047689437866211
Training loss at 65 iteration is 0.9226254786341643


 22%|████████▊                               | 66/300 [34:00<2:01:30, 31.16s/it]

*** Test loss at 65 iteration is 1.0476869344711304
Training loss at 66 iteration is 0.9198817891522872


 22%|████████▉                               | 67/300 [34:31<2:00:58, 31.15s/it]

*** Test loss at 66 iteration is 1.061204433441162
Training loss at 67 iteration is 0.9254472551267009


 23%|█████████                               | 68/300 [35:03<2:01:15, 31.36s/it]

*** Test loss at 67 iteration is 1.1904405355453491
Training loss at 68 iteration is 0.9188461771681289


 23%|█████████▏                              | 69/300 [35:35<2:01:10, 31.47s/it]

*** Test loss at 68 iteration is 1.0467332601547241
Training loss at 69 iteration is 0.9194690762472547


 23%|█████████▎                              | 70/300 [36:07<2:01:37, 31.73s/it]

*** Test loss at 69 iteration is 1.046385645866394
Training loss at 70 iteration is 0.9181760958403595


 24%|█████████▍                              | 71/300 [36:39<2:01:16, 31.78s/it]

*** Test loss at 70 iteration is 1.0476162433624268
Training loss at 71 iteration is 0.924651745429709


 24%|█████████▌                              | 72/300 [37:11<2:00:40, 31.76s/it]

*** Test loss at 71 iteration is 1.0476809740066528
Training loss at 72 iteration is 0.9187300870241213


 24%|█████████▋                              | 73/300 [37:42<1:59:26, 31.57s/it]

*** Test loss at 72 iteration is 1.047706961631775
Training loss at 73 iteration is 0.9205064344997248


 25%|█████████▊                              | 74/300 [38:13<1:58:19, 31.41s/it]

*** Test loss at 73 iteration is 1.1905978918075562
Training loss at 74 iteration is 0.9183684183546335


 25%|██████████                              | 75/300 [38:45<1:57:57, 31.46s/it]

*** Test loss at 74 iteration is 1.0444077253341675
Training loss at 75 iteration is 0.9178521219363882


 25%|██████████▏                             | 76/300 [39:16<1:57:00, 31.34s/it]

*** Test loss at 75 iteration is 1.0476901531219482
Training loss at 76 iteration is 0.9182487248389188


 26%|██████████▎                             | 77/300 [39:48<1:57:52, 31.71s/it]

*** Test loss at 76 iteration is 1.0318114757537842
Training loss at 77 iteration is 0.9173165983404995


 26%|██████████▍                             | 78/300 [40:20<1:57:28, 31.75s/it]

*** Test loss at 77 iteration is 1.0483425855636597
Training loss at 78 iteration is 0.918667790810924


 26%|██████████▌                             | 79/300 [40:52<1:57:12, 31.82s/it]

*** Test loss at 78 iteration is 1.0310075283050537
Training loss at 79 iteration is 0.918141899522671


 27%|██████████▋                             | 80/300 [41:24<1:56:28, 31.76s/it]

*** Test loss at 79 iteration is 1.0427813529968262
Training loss at 80 iteration is 0.9178887605667114


 27%|██████████▊                             | 81/300 [41:56<1:56:37, 31.95s/it]

*** Test loss at 80 iteration is 0.9962320327758789
Training loss at 81 iteration is 0.919297058720234


 27%|██████████▉                             | 82/300 [42:27<1:55:14, 31.72s/it]

*** Test loss at 81 iteration is 1.0726568698883057
Training loss at 82 iteration is 0.9156092464431258


 28%|███████████                             | 83/300 [42:59<1:55:02, 31.81s/it]

*** Test loss at 82 iteration is 1.04768967628479
Training loss at 83 iteration is 0.920743935364337


 28%|███████████▏                            | 84/300 [43:31<1:54:15, 31.74s/it]

*** Test loss at 83 iteration is 1.0476895570755005
Training loss at 84 iteration is 0.9120451906495843


 28%|███████████▎                            | 85/300 [44:02<1:52:38, 31.43s/it]

*** Test loss at 84 iteration is 1.0476808547973633
Training loss at 85 iteration is 0.9153486588769708


 29%|███████████▍                            | 86/300 [44:32<1:51:18, 31.21s/it]

*** Test loss at 85 iteration is 1.0479806661605835
Training loss at 86 iteration is 0.9164497610951258


 29%|███████████▌                            | 87/300 [45:05<1:51:49, 31.50s/it]

*** Test loss at 86 iteration is 1.0759724378585815
Training loss at 87 iteration is 0.9157251267393759


 29%|███████████▋                            | 88/300 [45:35<1:50:35, 31.30s/it]

*** Test loss at 87 iteration is 1.1575676202774048
Training loss at 88 iteration is 0.9109515375342251


 30%|███████████▊                            | 89/300 [46:07<1:50:40, 31.47s/it]

*** Test loss at 88 iteration is 1.0436245203018188
Training loss at 89 iteration is 0.915585409018619


 30%|████████████                            | 90/300 [46:39<1:50:44, 31.64s/it]

*** Test loss at 89 iteration is 1.0422369241714478
Training loss at 90 iteration is 0.9121882393340434


 30%|████████████▏                           | 91/300 [47:10<1:49:19, 31.39s/it]

*** Test loss at 90 iteration is 1.0449942350387573
Training loss at 91 iteration is 0.9125756296244535


 31%|████████████▎                           | 92/300 [47:42<1:49:45, 31.66s/it]

*** Test loss at 91 iteration is 1.0405226945877075
Training loss at 92 iteration is 0.9131176210632009


 31%|████████████▍                           | 93/300 [48:14<1:48:53, 31.56s/it]

*** Test loss at 92 iteration is 0.962673544883728
Training loss at 93 iteration is 0.9121429210852


 31%|████████████▌                           | 94/300 [48:44<1:47:24, 31.28s/it]

*** Test loss at 93 iteration is 1.0483438968658447
Training loss at 94 iteration is 0.9120135149679893


 32%|████████████▋                           | 95/300 [49:16<1:46:48, 31.26s/it]

*** Test loss at 94 iteration is 1.0476914644241333
Training loss at 95 iteration is 0.9170676893439175


 32%|████████████▊                           | 96/300 [49:47<1:46:18, 31.27s/it]

*** Test loss at 95 iteration is 1.1750808954238892
Training loss at 96 iteration is 0.911706562869805


 32%|████████████▉                           | 97/300 [50:20<1:47:19, 31.72s/it]

*** Test loss at 96 iteration is 0.9054147601127625
Training loss at 97 iteration is 0.9187376671586155


 33%|█████████████                           | 98/300 [50:50<1:45:45, 31.41s/it]

*** Test loss at 97 iteration is 1.047350287437439
Training loss at 98 iteration is 0.9111722441744213


 33%|█████████████▏                          | 99/300 [51:21<1:44:30, 31.19s/it]

*** Test loss at 98 iteration is 1.0476056337356567
Training loss at 99 iteration is 0.9131208323250132


 33%|█████████████                          | 100/300 [51:52<1:43:35, 31.08s/it]

*** Test loss at 99 iteration is 1.1592971086502075
Training loss at 100 iteration is 0.9191433575527728


 34%|█████████████▏                         | 101/300 [52:23<1:42:41, 30.96s/it]

*** Test loss at 100 iteration is 1.0473120212554932
Training loss at 101 iteration is 0.9120088943765183


 34%|█████████████▎                         | 102/300 [52:53<1:41:50, 30.86s/it]

*** Test loss at 101 iteration is 1.0335441827774048
Training loss at 102 iteration is 0.9161266393897948


 34%|█████████████▍                         | 103/300 [53:24<1:41:25, 30.89s/it]

*** Test loss at 102 iteration is 1.0421066284179688
Training loss at 103 iteration is 0.9122530601241372


 35%|█████████████▌                         | 104/300 [53:55<1:40:58, 30.91s/it]

*** Test loss at 103 iteration is 1.04768967628479
Training loss at 104 iteration is 0.9078656671460995


 35%|█████████████▋                         | 105/300 [54:27<1:41:04, 31.10s/it]

*** Test loss at 104 iteration is 1.0476880073547363
Training loss at 105 iteration is 0.9108947357855552


 35%|█████████████▊                         | 106/300 [54:57<1:40:08, 30.97s/it]

*** Test loss at 105 iteration is 1.048162579536438
Training loss at 106 iteration is 0.9120719368792762


 36%|█████████████▉                         | 107/300 [55:28<1:39:34, 30.95s/it]

*** Test loss at 106 iteration is 1.0477067232131958
Training loss at 107 iteration is 0.9106589383330227


 36%|██████████████                         | 108/300 [55:59<1:38:40, 30.84s/it]

*** Test loss at 107 iteration is 1.2404192686080933
Training loss at 108 iteration is 0.9145796451686827


 36%|██████████████▏                        | 109/300 [56:31<1:39:13, 31.17s/it]

*** Test loss at 108 iteration is 1.0476890802383423
Training loss at 109 iteration is 0.909897800319451


 37%|██████████████▎                        | 110/300 [57:02<1:38:26, 31.09s/it]

*** Test loss at 109 iteration is 1.0477044582366943
Training loss at 110 iteration is 0.9112157998991407


 37%|██████████████▍                        | 111/300 [57:34<1:38:58, 31.42s/it]

*** Test loss at 110 iteration is 0.9048556089401245
Training loss at 111 iteration is 0.9115600906127741


 37%|██████████████▌                        | 112/300 [58:05<1:37:57, 31.27s/it]

*** Test loss at 111 iteration is 0.9606423377990723
Training loss at 112 iteration is 0.9111719732442178


 38%|██████████████▋                        | 113/300 [58:36<1:37:13, 31.19s/it]

*** Test loss at 112 iteration is 0.9057149291038513
Training loss at 113 iteration is 0.9100401091181543


 38%|██████████████▊                        | 114/300 [59:06<1:35:59, 30.97s/it]

*** Test loss at 113 iteration is 0.9122328758239746
Training loss at 114 iteration is 0.9113644952616415


 38%|██████████████▉                        | 115/300 [59:38<1:36:29, 31.30s/it]

*** Test loss at 114 iteration is 1.0476387739181519
Training loss at 115 iteration is 0.9116331856112835


 39%|██████████████▎                      | 116/300 [1:00:09<1:35:41, 31.21s/it]

*** Test loss at 115 iteration is 1.1827656030654907
Training loss at 116 iteration is 0.9107596071298457


 39%|██████████████▍                      | 117/300 [1:00:40<1:34:45, 31.07s/it]

*** Test loss at 116 iteration is 1.0479867458343506
Training loss at 117 iteration is 0.9117169961456425


 39%|██████████████▌                      | 118/300 [1:01:11<1:34:06, 31.03s/it]

*** Test loss at 117 iteration is 1.0865941047668457
Training loss at 118 iteration is 0.9158078372971086


 40%|██████████████▋                      | 119/300 [1:01:42<1:33:31, 31.00s/it]

*** Test loss at 118 iteration is 1.0448449850082397
Training loss at 119 iteration is 0.907169213472319


 40%|██████████████▊                      | 120/300 [1:02:13<1:32:49, 30.94s/it]

*** Test loss at 119 iteration is 0.9658335447311401
Training loss at 120 iteration is 0.9096825511987544


 40%|██████████████▉                      | 121/300 [1:02:44<1:32:58, 31.16s/it]

*** Test loss at 120 iteration is 1.0453115701675415
Training loss at 121 iteration is 0.9095628478310325


 41%|███████████████                      | 122/300 [1:03:15<1:31:54, 30.98s/it]

*** Test loss at 121 iteration is 1.0350887775421143
Training loss at 122 iteration is 0.9092155749147589


 41%|███████████████▏                     | 123/300 [1:03:45<1:30:41, 30.74s/it]

*** Test loss at 122 iteration is 1.0474298000335693
Training loss at 123 iteration is 0.9087348521248368


 41%|███████████████▎                     | 124/300 [1:04:15<1:29:29, 30.51s/it]

*** Test loss at 123 iteration is 1.0474607944488525
Training loss at 124 iteration is 0.9080820467846453


 42%|███████████████▍                     | 125/300 [1:04:45<1:28:18, 30.27s/it]

*** Test loss at 124 iteration is 0.9730559587478638
Training loss at 125 iteration is 0.9123464601098998


 42%|███████████████▌                     | 126/300 [1:05:14<1:27:12, 30.07s/it]

*** Test loss at 125 iteration is 0.9144271612167358
Training loss at 126 iteration is 0.9085642779169004


 42%|███████████████▋                     | 127/300 [1:05:44<1:26:02, 29.84s/it]

*** Test loss at 126 iteration is 1.0408527851104736
Training loss at 127 iteration is 0.9102957071351611


 43%|███████████████▊                     | 128/300 [1:06:12<1:24:31, 29.49s/it]

*** Test loss at 127 iteration is 1.1237303018569946
Training loss at 128 iteration is 0.9101722994126564


 43%|███████████████▉                     | 129/300 [1:06:41<1:23:41, 29.37s/it]

*** Test loss at 128 iteration is 0.9987272024154663
Training loss at 129 iteration is 0.9085173986174844


 43%|████████████████                     | 130/300 [1:07:10<1:22:38, 29.17s/it]

*** Test loss at 129 iteration is 1.047690987586975
Training loss at 130 iteration is 0.9074134314355772


 44%|████████████████▏                    | 131/300 [1:07:39<1:21:51, 29.06s/it]

*** Test loss at 130 iteration is 1.0438404083251953
Training loss at 131 iteration is 0.9103884293028146


 44%|████████████████▎                    | 132/300 [1:08:07<1:20:48, 28.86s/it]

*** Test loss at 131 iteration is 1.0480104684829712
Training loss at 132 iteration is 0.9114018319066891


 44%|████████████████▍                    | 133/300 [1:08:36<1:20:02, 28.76s/it]

*** Test loss at 132 iteration is 1.162430763244629
Training loss at 133 iteration is 0.9082581277721185


 45%|████████████████▌                    | 134/300 [1:09:04<1:19:25, 28.71s/it]

*** Test loss at 133 iteration is 1.1595516204833984
Training loss at 134 iteration is 0.9085613705895164


 45%|████████████████▋                    | 135/300 [1:09:33<1:19:11, 28.80s/it]

*** Test loss at 134 iteration is 1.0478311777114868
Training loss at 135 iteration is 0.9079929797117375


 45%|████████████████▊                    | 136/300 [1:10:02<1:18:28, 28.71s/it]

*** Test loss at 135 iteration is 1.0465927124023438
Training loss at 136 iteration is 0.9056251324897955


 46%|████████████████▉                    | 137/300 [1:10:30<1:17:45, 28.63s/it]

*** Test loss at 136 iteration is 0.9054142832756042
Training loss at 137 iteration is 0.9113593515285776


 46%|█████████████████                    | 138/300 [1:10:59<1:17:03, 28.54s/it]

*** Test loss at 137 iteration is 1.0477123260498047
Training loss at 138 iteration is 0.9103974867458186


 46%|█████████████████▏                   | 139/300 [1:11:27<1:16:31, 28.52s/it]

*** Test loss at 138 iteration is 1.0444138050079346
Training loss at 139 iteration is 0.9105685171016977


 47%|█████████████████▎                   | 140/300 [1:11:55<1:15:51, 28.45s/it]

*** Test loss at 139 iteration is 1.083929181098938
Training loss at 140 iteration is 0.9077017070833316


 47%|█████████████████▍                   | 141/300 [1:12:24<1:15:39, 28.55s/it]

*** Test loss at 140 iteration is 1.163388967514038
Training loss at 141 iteration is 0.9106492104609151


 47%|█████████████████▌                   | 142/300 [1:12:53<1:15:02, 28.50s/it]

*** Test loss at 141 iteration is 1.0476957559585571
Training loss at 142 iteration is 0.9087749463467558


 48%|█████████████████▋                   | 143/300 [1:13:21<1:14:18, 28.40s/it]

*** Test loss at 142 iteration is 1.0477405786514282
Training loss at 143 iteration is 0.9064078513255789


 48%|█████████████████▊                   | 144/300 [1:13:49<1:13:40, 28.34s/it]

*** Test loss at 143 iteration is 1.0435678958892822
Training loss at 144 iteration is 0.9076619300960509


 48%|█████████████████▉                   | 145/300 [1:14:17<1:13:04, 28.29s/it]

*** Test loss at 144 iteration is 1.0477229356765747
Training loss at 145 iteration is 0.9088596992256227


 49%|██████████████████                   | 146/300 [1:14:45<1:12:27, 28.23s/it]

*** Test loss at 145 iteration is 1.0477101802825928
Training loss at 146 iteration is 0.9074273986264694


 49%|██████████████████▏                  | 147/300 [1:15:13<1:11:54, 28.20s/it]

*** Test loss at 146 iteration is 1.0480762720108032
Training loss at 147 iteration is 0.9069122795231086


 49%|██████████████████▎                  | 148/300 [1:15:42<1:11:40, 28.29s/it]

*** Test loss at 147 iteration is 1.0476893186569214
Training loss at 148 iteration is 0.9058322482857822


 50%|██████████████████▍                  | 149/300 [1:16:10<1:11:09, 28.27s/it]

*** Test loss at 148 iteration is 1.0442713499069214
Training loss at 149 iteration is 0.9060976209719319


 50%|██████████████████▌                  | 150/300 [1:16:38<1:10:32, 28.21s/it]

*** Test loss at 149 iteration is 1.04768967628479
Training loss at 150 iteration is 0.9102379256043552


 50%|██████████████████▌                  | 151/300 [1:17:08<1:10:52, 28.54s/it]

*** Test loss at 150 iteration is 1.0472050905227661
Training loss at 151 iteration is 0.9080031849136038


 51%|██████████████████▋                  | 152/300 [1:17:36<1:10:15, 28.48s/it]

*** Test loss at 151 iteration is 1.0476895570755005
Training loss at 152 iteration is 0.9057447373374434


 51%|██████████████████▊                  | 153/300 [1:18:04<1:09:43, 28.46s/it]

*** Test loss at 152 iteration is 1.04768967628479
Training loss at 153 iteration is 0.906522901097605


 51%|██████████████████▉                  | 154/300 [1:18:33<1:09:10, 28.43s/it]

*** Test loss at 153 iteration is 1.0476853847503662
Training loss at 154 iteration is 0.9082396493470373


 52%|███████████████████                  | 155/300 [1:19:01<1:08:50, 28.49s/it]

*** Test loss at 154 iteration is 1.0476889610290527
Training loss at 155 iteration is 0.9104324850169095


 52%|███████████████████▏                 | 156/300 [1:19:30<1:08:18, 28.46s/it]

*** Test loss at 155 iteration is 1.0476897954940796
Training loss at 156 iteration is 0.9061215165232824


 52%|███████████████████▎                 | 157/300 [1:19:59<1:08:10, 28.61s/it]

*** Test loss at 156 iteration is 1.0476915836334229
Training loss at 157 iteration is 0.9066912162402445


 53%|███████████████████▍                 | 158/300 [1:20:27<1:07:30, 28.52s/it]

*** Test loss at 157 iteration is 1.047690510749817
Training loss at 158 iteration is 0.9079666177103342


 53%|███████████████████▌                 | 159/300 [1:20:55<1:06:55, 28.48s/it]

*** Test loss at 158 iteration is 1.0477226972579956
Training loss at 159 iteration is 0.9091876518627828


 53%|███████████████████▋                 | 160/300 [1:21:24<1:06:26, 28.48s/it]

*** Test loss at 159 iteration is 1.0857619047164917
Training loss at 160 iteration is 0.9144974863233645


 54%|███████████████████▊                 | 161/300 [1:21:52<1:06:00, 28.49s/it]

*** Test loss at 160 iteration is 1.0477168560028076
Training loss at 161 iteration is 0.9098472944961107


 54%|███████████████████▉                 | 162/300 [1:22:20<1:05:19, 28.40s/it]

*** Test loss at 161 iteration is 1.04768967628479
Training loss at 162 iteration is 0.907895801973737


 54%|████████████████████                 | 163/300 [1:22:49<1:04:57, 28.45s/it]

*** Test loss at 162 iteration is 1.0476895570755005
Training loss at 163 iteration is 0.9072238354643515


 55%|████████████████████▏                | 164/300 [1:23:17<1:04:21, 28.39s/it]

*** Test loss at 163 iteration is 1.04768967628479
Training loss at 164 iteration is 0.9085707703897775


 55%|████████████████████▎                | 165/300 [1:23:46<1:03:46, 28.35s/it]

*** Test loss at 164 iteration is 1.0476897954940796
Training loss at 165 iteration is 0.9071257089780382


 55%|████████████████████▍                | 166/300 [1:24:14<1:03:18, 28.35s/it]

*** Test loss at 165 iteration is 1.0476906299591064
Training loss at 166 iteration is 0.9060517992855104


 56%|████████████████████▌                | 167/300 [1:24:42<1:02:45, 28.31s/it]

*** Test loss at 166 iteration is 1.0476897954940796
Training loss at 167 iteration is 0.9101780618517852


 56%|████████████████████▋                | 168/300 [1:25:10<1:02:16, 28.31s/it]

*** Test loss at 167 iteration is 1.0476897954940796
Training loss at 168 iteration is 0.9095644901606662


 56%|████████████████████▊                | 169/300 [1:25:39<1:01:40, 28.25s/it]

*** Test loss at 168 iteration is 1.0483604669570923
Training loss at 169 iteration is 0.9080948770538835


 57%|████████████████████▉                | 170/300 [1:26:07<1:01:11, 28.24s/it]

*** Test loss at 169 iteration is 1.0480523109436035
Training loss at 170 iteration is 0.907969491540893


 57%|█████████████████████                | 171/300 [1:26:35<1:00:36, 28.19s/it]

*** Test loss at 170 iteration is 1.059377908706665
Training loss at 171 iteration is 0.9062208311616882


 57%|█████████████████████▏               | 172/300 [1:27:03<1:00:02, 28.15s/it]

*** Test loss at 171 iteration is 1.0477558374404907
Training loss at 172 iteration is 0.9076294371904421


 58%|██████████████████████▍                | 173/300 [1:27:32<59:57, 28.33s/it]

*** Test loss at 172 iteration is 1.048566460609436
Training loss at 173 iteration is 0.9090225607895654


 58%|█████████████████████▍               | 174/300 [1:28:01<1:00:23, 28.76s/it]

*** Test loss at 173 iteration is 1.0477601289749146
Training loss at 174 iteration is 0.9073948436532139


 58%|█████████████████████▌               | 175/300 [1:28:31<1:00:14, 28.91s/it]

*** Test loss at 174 iteration is 1.04768967628479
Training loss at 175 iteration is 0.9089245810981624


 59%|██████████████████████▉                | 176/300 [1:28:59<59:24, 28.75s/it]

*** Test loss at 175 iteration is 1.0476897954940796
Training loss at 176 iteration is 0.9064030460089691


 59%|███████████████████████                | 177/300 [1:29:27<58:35, 28.58s/it]

*** Test loss at 176 iteration is 1.04769766330719
Training loss at 177 iteration is 0.9093802073770318


 59%|███████████████████████▏               | 178/300 [1:29:56<58:05, 28.57s/it]

*** Test loss at 177 iteration is 1.04768967628479
Training loss at 178 iteration is 0.907447937106298


 60%|███████████████████████▎               | 179/300 [1:30:24<57:16, 28.40s/it]

*** Test loss at 178 iteration is 1.04768967628479
Training loss at 179 iteration is 0.9097385041969867


 60%|███████████████████████▍               | 180/300 [1:30:52<56:36, 28.31s/it]

*** Test loss at 179 iteration is 1.047698974609375
Training loss at 180 iteration is 0.9111654595895247


 60%|███████████████████████▌               | 181/300 [1:31:20<56:16, 28.37s/it]

*** Test loss at 180 iteration is 1.047690749168396
Training loss at 181 iteration is 0.906695387580178


 61%|███████████████████████▋               | 182/300 [1:31:49<56:03, 28.50s/it]

*** Test loss at 181 iteration is 1.0505515336990356
Training loss at 182 iteration is 0.9083687623670279


 61%|███████████████████████▊               | 183/300 [1:32:18<55:35, 28.51s/it]

*** Test loss at 182 iteration is 1.04768967628479
Training loss at 183 iteration is 0.9070353769073801


 61%|███████████████████████▉               | 184/300 [1:32:46<54:57, 28.43s/it]

*** Test loss at 183 iteration is 1.04768967628479
Training loss at 184 iteration is 0.9071228770185108


 62%|████████████████████████               | 185/300 [1:33:15<54:45, 28.57s/it]

*** Test loss at 184 iteration is 1.047690510749817
Training loss at 185 iteration is 0.9055327196751745


 62%|████████████████████████▏              | 186/300 [1:33:44<54:25, 28.64s/it]

*** Test loss at 185 iteration is 1.04768967628479
Training loss at 186 iteration is 0.906557757007189


 62%|████████████████████████▎              | 187/300 [1:34:12<53:48, 28.57s/it]

*** Test loss at 186 iteration is 1.0477102994918823
Training loss at 187 iteration is 0.9068657346993438


 63%|████████████████████████▍              | 188/300 [1:34:40<53:12, 28.50s/it]

*** Test loss at 187 iteration is 1.0478993654251099
Training loss at 188 iteration is 0.9061319502917203


 63%|████████████████████████▌              | 189/300 [1:35:10<53:05, 28.70s/it]

*** Test loss at 188 iteration is 1.04768967628479
Training loss at 189 iteration is 0.9093905677480146


 63%|████████████████████████▋              | 190/300 [1:35:39<52:50, 28.83s/it]

*** Test loss at 189 iteration is 1.0479143857955933
Training loss at 190 iteration is 0.9065052492559449


 64%|████████████████████████▊              | 191/300 [1:36:07<52:21, 28.82s/it]

*** Test loss at 190 iteration is 1.0708500146865845
Training loss at 191 iteration is 0.9076346656507697


 64%|████████████████████████▉              | 192/300 [1:36:36<51:55, 28.85s/it]

*** Test loss at 191 iteration is 1.04768967628479
Training loss at 192 iteration is 0.9085157094908155


 64%|█████████████████████████              | 193/300 [1:37:05<51:10, 28.69s/it]

*** Test loss at 192 iteration is 1.04768967628479
Training loss at 193 iteration is 0.906430533602218


 65%|█████████████████████████▏             | 194/300 [1:37:33<50:26, 28.55s/it]

*** Test loss at 193 iteration is 1.04768967628479
Training loss at 194 iteration is 0.9063789322356547


 65%|█████████████████████████▎             | 195/300 [1:38:01<49:57, 28.55s/it]

*** Test loss at 194 iteration is 1.0476902723312378
Training loss at 195 iteration is 0.9077576940709894


 65%|█████████████████████████▍             | 196/300 [1:38:31<49:56, 28.81s/it]

*** Test loss at 195 iteration is 1.0477173328399658
Training loss at 196 iteration is 0.9060605557496882


 66%|█████████████████████████▌             | 197/300 [1:38:59<49:00, 28.55s/it]

*** Test loss at 196 iteration is 1.0476900339126587
Training loss at 197 iteration is 0.9092596473772664


 66%|█████████████████████████▋             | 198/300 [1:39:27<48:24, 28.48s/it]

*** Test loss at 197 iteration is 1.04768967628479
Training loss at 198 iteration is 0.9060333893318807


 66%|█████████████████████████▊             | 199/300 [1:39:56<47:59, 28.51s/it]

*** Test loss at 198 iteration is 1.0476964712142944
Training loss at 199 iteration is 0.9053898172930253


 67%|██████████████████████████             | 200/300 [1:40:24<47:22, 28.42s/it]

*** Test loss at 199 iteration is 1.04768967628479
Training loss at 200 iteration is 0.90639052065936


 67%|██████████████████████████▏            | 201/300 [1:40:52<46:46, 28.35s/it]

*** Test loss at 200 iteration is 1.0476897954940796
Training loss at 201 iteration is 0.9057315735777548


 67%|██████████████████████████▎            | 202/300 [1:41:21<46:20, 28.37s/it]

*** Test loss at 201 iteration is 1.0477064847946167
Training loss at 202 iteration is 0.9087553984862714


 68%|██████████████████████████▍            | 203/300 [1:41:49<45:46, 28.31s/it]

*** Test loss at 202 iteration is 1.04768967628479
Training loss at 203 iteration is 0.9085467773035538


 68%|██████████████████████████▌            | 204/300 [1:42:17<45:23, 28.37s/it]

*** Test loss at 203 iteration is 1.0477031469345093
Training loss at 204 iteration is 0.9067029145138323


 68%|██████████████████████████▋            | 205/300 [1:42:45<44:50, 28.32s/it]

*** Test loss at 204 iteration is 1.0476993322372437
Training loss at 205 iteration is 0.9058827196270967


 69%|██████████████████████████▊            | 206/300 [1:43:14<44:32, 28.43s/it]

*** Test loss at 205 iteration is 1.04768967628479
Training loss at 206 iteration is 0.9070391251035959


 69%|██████████████████████████▉            | 207/300 [1:43:42<44:02, 28.41s/it]

*** Test loss at 206 iteration is 1.0476897954940796
Training loss at 207 iteration is 0.9084442499255346


 69%|███████████████████████████            | 208/300 [1:44:11<43:24, 28.31s/it]

*** Test loss at 207 iteration is 1.0476936101913452
Training loss at 208 iteration is 0.9059522531249307


 70%|███████████████████████████▏           | 209/300 [1:44:39<42:52, 28.27s/it]

*** Test loss at 208 iteration is 1.0522814989089966
Training loss at 209 iteration is 0.9051835857147028


 70%|███████████████████████████▎           | 210/300 [1:45:07<42:23, 28.26s/it]

*** Test loss at 209 iteration is 1.0476897954940796
Training loss at 210 iteration is 0.9066332563881047


 70%|███████████████████████████▍           | 211/300 [1:45:35<41:56, 28.27s/it]

*** Test loss at 210 iteration is 1.0477005243301392
Training loss at 211 iteration is 0.9067387600575597


 71%|███████████████████████████▌           | 212/300 [1:46:05<41:56, 28.60s/it]

*** Test loss at 211 iteration is 1.0477592945098877
Training loss at 212 iteration is 0.9081549255315923


 71%|███████████████████████████▋           | 213/300 [1:46:34<41:48, 28.83s/it]

*** Test loss at 212 iteration is 1.066275954246521
Training loss at 213 iteration is 0.9055525478252695


 71%|███████████████████████████▊           | 214/300 [1:47:02<41:08, 28.70s/it]

*** Test loss at 213 iteration is 1.0476669073104858
Training loss at 214 iteration is 0.9053442798370173


 72%|███████████████████████████▉           | 215/300 [1:47:31<40:48, 28.80s/it]

*** Test loss at 214 iteration is 1.0476895570755005
Training loss at 215 iteration is 0.9081470990969129


 72%|████████████████████████████           | 216/300 [1:48:00<40:18, 28.79s/it]

*** Test loss at 215 iteration is 0.9848775267601013
Training loss at 216 iteration is 0.9094506300185338


 72%|████████████████████████████▏          | 217/300 [1:48:29<39:59, 28.91s/it]

*** Test loss at 216 iteration is 0.9572054147720337
Training loss at 217 iteration is 0.9069794155349417


 73%|████████████████████████████▎          | 218/300 [1:48:58<39:30, 28.91s/it]

*** Test loss at 217 iteration is 1.0476888418197632
Training loss at 218 iteration is 0.9058946247928399


 73%|████████████████████████████▍          | 219/300 [1:49:28<39:20, 29.14s/it]

*** Test loss at 218 iteration is 1.0476819276809692
Training loss at 219 iteration is 0.9099799990653992


 73%|████████████████████████████▌          | 220/300 [1:49:57<38:46, 29.08s/it]

*** Test loss at 219 iteration is 1.04768967628479
Training loss at 220 iteration is 0.9059693241907545


 74%|████████████████████████████▋          | 221/300 [1:50:26<38:09, 28.99s/it]

*** Test loss at 220 iteration is 1.0476895570755005
Training loss at 221 iteration is 0.9101364903213564


 74%|████████████████████████████▊          | 222/300 [1:50:55<37:48, 29.08s/it]

*** Test loss at 221 iteration is 1.0290729999542236
Training loss at 222 iteration is 0.9126386386303862


 74%|████████████████████████████▉          | 223/300 [1:51:24<37:28, 29.20s/it]

*** Test loss at 222 iteration is 1.0476768016815186
Training loss at 223 iteration is 0.9048586152801829


 75%|█████████████████████████████          | 224/300 [1:51:53<36:53, 29.13s/it]

*** Test loss at 223 iteration is 0.9299924969673157
Training loss at 224 iteration is 0.9070203644185026


 75%|█████████████████████████████▎         | 225/300 [1:52:22<36:15, 29.01s/it]

*** Test loss at 224 iteration is 1.047838568687439
Training loss at 225 iteration is 0.9064607457681135


 75%|█████████████████████████████▍         | 226/300 [1:52:52<36:02, 29.22s/it]

*** Test loss at 225 iteration is 1.04768967628479
Training loss at 226 iteration is 0.9054445855873675


 76%|█████████████████████████████▌         | 227/300 [1:53:21<35:21, 29.06s/it]

*** Test loss at 226 iteration is 1.041540265083313
Training loss at 227 iteration is 0.9058061754407961


 76%|█████████████████████████████▋         | 228/300 [1:53:49<34:48, 29.00s/it]

*** Test loss at 227 iteration is 0.9106640219688416
Training loss at 228 iteration is 0.9057658708785191


 76%|█████████████████████████████▊         | 229/300 [1:54:18<34:18, 28.99s/it]

*** Test loss at 228 iteration is 1.0477221012115479
Training loss at 229 iteration is 0.9062689941776686


 77%|█████████████████████████████▉         | 230/300 [1:54:47<33:45, 28.94s/it]

*** Test loss at 229 iteration is 1.0476914644241333
Training loss at 230 iteration is 0.9054316055676168


 77%|██████████████████████████████         | 231/300 [1:55:16<33:12, 28.88s/it]

*** Test loss at 230 iteration is 1.0493911504745483
Training loss at 231 iteration is 0.9079500047628545


 77%|██████████████████████████████▏        | 232/300 [1:55:44<32:34, 28.75s/it]

*** Test loss at 231 iteration is 1.0480856895446777
Training loss at 232 iteration is 0.9073033263860655


 78%|██████████████████████████████▎        | 233/300 [1:56:13<31:58, 28.64s/it]

*** Test loss at 232 iteration is 1.0476895570755005
Training loss at 233 iteration is 0.9049618086539024


 78%|██████████████████████████████▍        | 234/300 [1:56:41<31:30, 28.65s/it]

*** Test loss at 233 iteration is 1.047629714012146
Training loss at 234 iteration is 0.9071892898930006


 78%|██████████████████████████████▌        | 235/300 [1:57:11<31:11, 28.79s/it]

*** Test loss at 234 iteration is 1.04768967628479
Training loss at 235 iteration is 0.9049380938868877


 79%|██████████████████████████████▋        | 236/300 [1:57:40<30:55, 28.99s/it]

*** Test loss at 235 iteration is 1.0232954025268555
Training loss at 236 iteration is 0.905760703008037


 79%|██████████████████████████████▊        | 237/300 [1:58:09<30:22, 28.93s/it]

*** Test loss at 236 iteration is 1.0477548837661743
Training loss at 237 iteration is 0.905704699764567


 79%|██████████████████████████████▉        | 238/300 [1:58:37<29:46, 28.81s/it]

*** Test loss at 237 iteration is 1.0467368364334106
Training loss at 238 iteration is 0.907188853941673


 80%|███████████████████████████████        | 239/300 [1:59:06<29:13, 28.74s/it]

*** Test loss at 238 iteration is 1.0476816892623901
Training loss at 239 iteration is 0.907627509645194


 80%|███████████████████████████████▏       | 240/300 [1:59:34<28:38, 28.65s/it]

*** Test loss at 239 iteration is 1.0411790609359741
Training loss at 240 iteration is 0.9086347616408482


 80%|███████████████████████████████▎       | 241/300 [2:00:03<28:10, 28.65s/it]

*** Test loss at 240 iteration is 1.047210931777954
Training loss at 241 iteration is 0.9067373152606744


 81%|███████████████████████████████▍       | 242/300 [2:00:32<27:45, 28.71s/it]

*** Test loss at 241 iteration is 1.0494202375411987
Training loss at 242 iteration is 0.9053538638698168


 81%|███████████████████████████████▌       | 243/300 [2:01:00<27:11, 28.61s/it]

*** Test loss at 242 iteration is 1.04768967628479
Training loss at 243 iteration is 0.9054457136422149


 81%|███████████████████████████████▋       | 244/300 [2:01:30<27:01, 28.95s/it]

*** Test loss at 243 iteration is 1.04768967628479
Training loss at 244 iteration is 0.9091388652147341


 82%|███████████████████████████████▊       | 245/300 [2:01:59<26:34, 28.98s/it]

*** Test loss at 244 iteration is 1.04768967628479
Training loss at 245 iteration is 0.9114415221963047


 82%|███████████████████████████████▉       | 246/300 [2:02:27<25:55, 28.80s/it]

*** Test loss at 245 iteration is 1.0476901531219482
Training loss at 246 iteration is 0.9057319036200027


 82%|████████████████████████████████       | 247/300 [2:02:57<25:32, 28.92s/it]

*** Test loss at 246 iteration is 1.0476964712142944
Training loss at 247 iteration is 0.9061793822887515


 83%|████████████████████████████████▏      | 248/300 [2:03:26<25:16, 29.16s/it]

*** Test loss at 247 iteration is 1.0476891994476318
Training loss at 248 iteration is 0.9057370907018992


 83%|████████████████████████████████▎      | 249/300 [2:03:56<24:55, 29.33s/it]

*** Test loss at 248 iteration is 1.0473929643630981
Training loss at 249 iteration is 0.9060245348402292


 83%|████████████████████████████████▌      | 250/300 [2:04:25<24:15, 29.11s/it]

*** Test loss at 249 iteration is 1.0190421342849731
Training loss at 250 iteration is 0.9072522545649


 84%|████████████████████████████████▋      | 251/300 [2:04:53<23:39, 28.97s/it]

*** Test loss at 250 iteration is 0.9051631093025208
Training loss at 251 iteration is 0.906196886842901


 84%|████████████████████████████████▊      | 252/300 [2:05:22<23:03, 28.82s/it]

*** Test loss at 251 iteration is 1.0473344326019287
Training loss at 252 iteration is 0.905332052510632


 84%|████████████████████████████████▉      | 253/300 [2:05:51<22:34, 28.81s/it]

*** Test loss at 252 iteration is 0.9578033089637756
Training loss at 253 iteration is 0.9064631422689139


 85%|█████████████████████████████████      | 254/300 [2:06:20<22:13, 28.99s/it]

*** Test loss at 253 iteration is 1.0476897954940796
Training loss at 254 iteration is 0.9067234968350939


 85%|█████████████████████████████████▏     | 255/300 [2:06:48<21:35, 28.78s/it]

*** Test loss at 254 iteration is 0.9475013017654419
Training loss at 255 iteration is 0.9123223069285559


 85%|█████████████████████████████████▎     | 256/300 [2:07:17<21:07, 28.80s/it]

*** Test loss at 255 iteration is 1.0476765632629395
Training loss at 256 iteration is 0.9051038440594004


 86%|█████████████████████████████████▍     | 257/300 [2:07:46<20:36, 28.76s/it]

*** Test loss at 256 iteration is 0.9048339128494263
Training loss at 257 iteration is 0.9094077171372973


 86%|█████████████████████████████████▌     | 258/300 [2:08:15<20:10, 28.82s/it]

*** Test loss at 257 iteration is 1.018385648727417
Training loss at 258 iteration is 0.9065639184526175


 86%|█████████████████████████████████▋     | 259/300 [2:08:43<19:36, 28.71s/it]

*** Test loss at 258 iteration is 1.03408682346344
Training loss at 259 iteration is 0.9060494293851301


 87%|█████████████████████████████████▊     | 260/300 [2:09:12<19:05, 28.65s/it]

*** Test loss at 259 iteration is 1.0470508337020874
Training loss at 260 iteration is 0.9065707040227149


 87%|█████████████████████████████████▉     | 261/300 [2:09:40<18:36, 28.64s/it]

*** Test loss at 260 iteration is 1.0466567277908325
Training loss at 261 iteration is 0.906445082061547


 87%|██████████████████████████████████     | 262/300 [2:10:09<18:12, 28.75s/it]

*** Test loss at 261 iteration is 1.0476893186569214
Training loss at 262 iteration is 0.9086839926144308


 88%|██████████████████████████████████▏    | 263/300 [2:10:38<17:41, 28.70s/it]

*** Test loss at 262 iteration is 1.0476887226104736
Training loss at 263 iteration is 0.9066878493167152


 88%|██████████████████████████████████▎    | 264/300 [2:11:07<17:12, 28.67s/it]

*** Test loss at 263 iteration is 1.04768967628479
Training loss at 264 iteration is 0.9072313545163998


 88%|██████████████████████████████████▍    | 265/300 [2:11:36<16:46, 28.76s/it]

*** Test loss at 264 iteration is 0.9048353433609009
Training loss at 265 iteration is 0.9059117470891023


 89%|██████████████████████████████████▌    | 266/300 [2:12:05<16:24, 28.95s/it]

*** Test loss at 265 iteration is 1.04768967628479
Training loss at 266 iteration is 0.906655325377283


 89%|██████████████████████████████████▋    | 267/300 [2:12:34<15:56, 28.99s/it]

*** Test loss at 266 iteration is 1.0476895570755005
Training loss at 267 iteration is 0.9073911274760222


 89%|██████████████████████████████████▊    | 268/300 [2:13:03<15:24, 28.88s/it]

*** Test loss at 267 iteration is 1.1178936958312988
Training loss at 268 iteration is 0.9050807149942256


 90%|██████████████████████████████████▉    | 269/300 [2:13:31<14:50, 28.74s/it]

*** Test loss at 268 iteration is 1.0476752519607544
Training loss at 269 iteration is 0.9063451620172863


 90%|███████████████████████████████████    | 270/300 [2:14:00<14:20, 28.69s/it]

*** Test loss at 269 iteration is 0.9220488667488098
Training loss at 270 iteration is 0.9092200058550874


 90%|███████████████████████████████████▏   | 271/300 [2:14:28<13:48, 28.57s/it]

*** Test loss at 270 iteration is 1.0451363325119019
Training loss at 271 iteration is 0.9064210791233157


 91%|███████████████████████████████████▎   | 272/300 [2:14:57<13:22, 28.66s/it]

*** Test loss at 271 iteration is 1.042590856552124
Training loss at 272 iteration is 0.9061238396266276


 91%|███████████████████████████████████▍   | 273/300 [2:15:25<12:50, 28.55s/it]

*** Test loss at 272 iteration is 1.047676920890808
Training loss at 273 iteration is 0.9052876593652835


 91%|███████████████████████████████████▌   | 274/300 [2:15:54<12:28, 28.80s/it]

*** Test loss at 273 iteration is 0.905331552028656
Training loss at 274 iteration is 0.9055530182586229


 92%|███████████████████████████████████▊   | 275/300 [2:16:24<12:04, 29.00s/it]

*** Test loss at 274 iteration is 1.0476897954940796
Training loss at 275 iteration is 0.9072279304512276


 92%|███████████████████████████████████▉   | 276/300 [2:16:53<11:40, 29.17s/it]

*** Test loss at 275 iteration is 1.0476714372634888
Training loss at 276 iteration is 0.9086853536692533


 92%|████████████████████████████████████   | 277/300 [2:17:22<11:09, 29.13s/it]

*** Test loss at 276 iteration is 0.9048327803611755
Training loss at 277 iteration is 0.9051948780855856


 93%|████████████████████████████████████▏  | 278/300 [2:17:51<10:39, 29.05s/it]

*** Test loss at 277 iteration is 1.0476891994476318
Training loss at 278 iteration is 0.9069670286060365


 93%|████████████████████████████████████▎  | 279/300 [2:18:20<10:06, 28.87s/it]

*** Test loss at 278 iteration is 0.9051918387413025
Training loss at 279 iteration is 0.9056873700835488


 93%|████████████████████████████████████▍  | 280/300 [2:18:49<09:39, 28.98s/it]

*** Test loss at 279 iteration is 1.0476919412612915
Training loss at 280 iteration is 0.9054584675583958


 94%|████████████████████████████████████▌  | 281/300 [2:19:17<09:07, 28.80s/it]

*** Test loss at 280 iteration is 1.0471224784851074
Training loss at 281 iteration is 0.9056877286966182


 94%|████████████████████████████████████▋  | 282/300 [2:19:46<08:36, 28.69s/it]

*** Test loss at 281 iteration is 1.0476908683776855
Training loss at 282 iteration is 0.9060090514253979


 94%|████████████████████████████████████▊  | 283/300 [2:20:14<08:05, 28.57s/it]

*** Test loss at 282 iteration is 1.047686219215393
Training loss at 283 iteration is 0.9061487021525044


 95%|████████████████████████████████████▉  | 284/300 [2:20:43<07:36, 28.51s/it]

*** Test loss at 283 iteration is 1.04768967628479
Training loss at 284 iteration is 0.9093125630016169


 95%|█████████████████████████████████████  | 285/300 [2:21:11<07:08, 28.54s/it]

*** Test loss at 284 iteration is 1.0476888418197632
Training loss at 285 iteration is 0.9066821297338187


 95%|█████████████████████████████████████▏ | 286/300 [2:21:40<06:38, 28.50s/it]

*** Test loss at 285 iteration is 1.0476833581924438
Training loss at 286 iteration is 0.9061259809604361


 96%|█████████████████████████████████████▎ | 287/300 [2:22:09<06:13, 28.73s/it]

*** Test loss at 286 iteration is 1.0476911067962646
Training loss at 287 iteration is 0.906735999525086


 96%|█████████████████████████████████████▍ | 288/300 [2:22:37<05:43, 28.65s/it]

*** Test loss at 287 iteration is 1.0476895570755005
Training loss at 288 iteration is 0.9059831780835617


 96%|█████████████████████████████████████▌ | 289/300 [2:23:06<05:15, 28.67s/it]

*** Test loss at 288 iteration is 1.0420804023742676
Training loss at 289 iteration is 0.9078611849753324


 97%|█████████████████████████████████████▋ | 290/300 [2:23:34<04:45, 28.54s/it]

*** Test loss at 289 iteration is 1.04768967628479
Training loss at 290 iteration is 0.9052249203043535


 97%|█████████████████████████████████████▊ | 291/300 [2:24:03<04:17, 28.62s/it]

*** Test loss at 290 iteration is 1.0476877689361572
Training loss at 291 iteration is 0.9048419200684413


 97%|█████████████████████████████████████▉ | 292/300 [2:24:31<03:48, 28.55s/it]

*** Test loss at 291 iteration is 1.0476906299591064
Training loss at 292 iteration is 0.9052569038611799


 98%|██████████████████████████████████████ | 293/300 [2:25:00<03:18, 28.43s/it]

*** Test loss at 292 iteration is 1.04768967628479
Training loss at 293 iteration is 0.9054816650950219


 98%|██████████████████████████████████████▏| 294/300 [2:25:28<02:50, 28.47s/it]

*** Test loss at 293 iteration is 1.04768967628479
Training loss at 294 iteration is 0.9056525525967937


 98%|██████████████████████████████████████▎| 295/300 [2:25:57<02:23, 28.66s/it]

*** Test loss at 294 iteration is 1.04768967628479
Training loss at 295 iteration is 0.9068562304678042


 99%|██████████████████████████████████████▍| 296/300 [2:26:26<01:54, 28.67s/it]

*** Test loss at 295 iteration is 1.0485535860061646
Training loss at 296 iteration is 0.9059373598453427


 99%|██████████████████████████████████████▌| 297/300 [2:26:54<01:25, 28.56s/it]

*** Test loss at 296 iteration is 1.04767644405365
Training loss at 297 iteration is 0.9053713748277712


 99%|██████████████████████████████████████▋| 298/300 [2:27:23<00:57, 28.69s/it]

*** Test loss at 297 iteration is 1.021481990814209
Training loss at 298 iteration is 0.9054806744756777


100%|██████████████████████████████████████▊| 299/300 [2:27:52<00:28, 28.62s/it]

*** Test loss at 298 iteration is 1.0477343797683716
Training loss at 299 iteration is 0.9054721771192945


100%|███████████████████████████████████████| 300/300 [2:28:21<00:00, 29.67s/it]

*** Test loss at 299 iteration is 1.0546525716781616



