In [5]:
import os, sys
# Root directory of the project
ROOT_DIR = os.path.abspath("../")

# To find local version of the library
sys.path.append(ROOT_DIR)

import dateutil.tz
import datetime
import pprint

import skopt
from skopt import gp_minimize


import numpy as np
import torch
from torchvision import transforms


from utils.dataloader import SVHNDataset
from utils.config import cfg, cfg_from_file
from utils.dataloader import prepare_dataloaders
from utils.misc import mkdir_p
from utils.transforms import FirstCrop, Rescale, RandomCrop, ToTensor
from utils.misc import load_obj
from utils.visualization import visualize_sample

from models.vgg import VGG
from trainer.trainer import train_model

from utils.checkpointer import CheckpointSaver

import matplotlib.pyplot as plt
%matplotlib inline

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [27]:
cfg_from_file("../config/base_config.yml")

vgg19 = VGG('VGG19', num_classes=7, num_digits=11)

checkpoint = CheckpointSaver("../tmp_results")
checkpoint.save(vgg19, 11)

model,config = checkpoint.load("checkpoint_epoch11")

print(model)
print(config)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3)

In [12]:
# Wrapper function for bayesian optimization
# The space contains the parameters to explore
def train_model_opt(space):
    print(space)
    (train_loader,
     valid_loader) = prepare_dataloaders(
        dataset_split='train',
        dataset_path='../data/SVHN/train',
        metadata_filename='../data/SVHN/train_metadata.pkl',
        batch_size=32,
        sample_size=1000,
        valid_split=0.8)
    
    vgg19 = VGG(space[1], num_classes=7, num_digits=11)


    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device used: ", device)
    return -train_model(vgg19,
                train_loader=train_loader,
                valid_loader=valid_loader,
                num_epochs=2,
                device=device,
                lr=space[0],
                checkpoint_dir="../checkpoints",
                output_dir="../tmp_results")


# Define the range 
space = [skopt.space.Real(10**-5, 10**0, "log-uniform", name='lr'), 
         skopt.space.Categorical(["VGG11", "VGG13", "VGG16", "VGG19"])]

mkdir_p("../tmp_results")


res_gp = gp_minimize(train_model_opt, space, n_calls=10,
                     random_state=0)

print("Best score: {0}".format(-res_gp.fun))
print("Best lr: {0}".format(res_gp.x[0]))

[0.009209225155490905, 'VGG19']
Device used:  cuda:0
Learning rate is: 0.009209225155490905
Directory  run  already exists
# Start training #



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 13.4671(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 13.4671(avg) :   4%|▍         | 1/25 [00:00<00:14,  1.63it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 11.0749(avg) :   4%|▍         | 1/25 [00:00<00:14,  1.63it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 11.0749(avg) :   8%|▊         | 2/25 [00:00<00:11,  1.98it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 10.1812(avg) :   8%|▊         | 2/25 [00:01<00:11,  1.98it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 10.1812(avg) :  12%|█▏        | 3/25 [00:01<00:09,  2.31it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 9.8692(avg) :  12%|█▏        | 3/25 [00:01<00:09,  2.31it/s] [A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 9.8692(avg) :  16%|█▌        | 4/25 [00:01<00:07,  2.64it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:04,  1.46it/s][A[A[A[A[A




 29%|██▊       | 2/7 [00:00<00:02,  1.96it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 6,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10,  1, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 6, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 1,
        1, 6, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10,  1, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  1, 10, 10,
        10,  1, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')







 57%|█████▋    | 4/7 [00:00<00:01,  2.63it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        6, 1, 1, 1, 6, 6, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  1, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6,
        1, 1, 6, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  1, 10, 10, 10, 10, 10,  1,
        10,  1, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 6, 1, 6, 1, 1, 1, 1, 6, 6, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 6, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 6, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 






 86%|████████▌ | 6/7 [00:01<00:00,  3.50it/s][A[A[A[A[A




100%|██████████| 7/7 [00:01<00:00,  3.75it/s][A[A[A[A[A

tensor([1, 1, 6, 6, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10], device='cuda:0')
Checkpointing new model ...

Epoch: 1/2
	Train Loss: 7.8548
	Valid Loss: 12.0726
	Valid Sequence Length Accuracy: 0.6300
	Valid Digit Accuracy 0.0000



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.8532(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.8532(avg) :   4%|▍         | 1/25 [00:00<00:15,  1.59it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.7519(avg) :   4%|▍         | 1/25 [00:00<00:15,  1.59it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.7519(avg) :   8%|▊         | 2/25 [00:00<00:12,  1.91it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.8307(avg) :   8%|▊         | 2/25 [00:01<00:12,  1.91it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.8307(avg) :  12%|█▏        | 3/25 [00:01<00:09,  2.27it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.5703(avg) :  12%|█▏        | 3/25 [00:01<00:09,  2.27it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.5703(avg) :  16%|█▌        | 4/25 [00:01<00:08,  2.60it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BA

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:03,  1.53it/s][A[A[A[A[A




 43%|████▎     | 3/7 [00:00<00:01,  2.08it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 3, 10, 10, 10,  0,  0,  3,  0,  2, 10,  2, 10, 10,  2, 10,  0,  2,  2,
        10,  2,  3, 10, 10, 10,  2, 10,  0,  0, 10,  0,  2, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 0,  0, 10,  0, 10, 10,  2,  0,  0, 10, 10,  3,  2,  0, 10,  2,  0, 10,
         2,  2, 10, 10,  0, 10, 10, 10,  2, 10, 10, 10,  2, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10,  2,  2,  2,  3,  2,  0,  2,  2,  0, 10,  2,  0,  9,  0, 10, 10,
         2,  2, 10, 10, 10, 10,  0,  2,  2,  0,  0, 10,  0,  0],
       device='cuda:0')







 71%|███████▏  | 5/7 [00:00<00:00,  2.80it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10,  0, 10, 10, 10, 10, 10, 10,  3, 10,  2, 10, 10,  0, 10, 10,  0,
        10,  0,  2,  9, 10,  0, 10, 10,  3, 10,  0,  0, 10,  0],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10,  0, 10, 10,  2, 10, 10,  0, 10,  0,  9, 10,  0,  2, 10,  9,  2, 10,
         3, 10,  3,  2, 10,  2,  0, 10,  2,  3, 10, 10,  0,  2],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10,  2,  0, 10,  2,  0,  3,  3,  2, 10,  0, 10,  2, 10,  2,  2,  9,  2,
         0, 10,  2, 10, 10, 10, 10, 10,  2,  2,  0,  0,  0,  2],
       device='cuda:0')







100%|██████████| 7/7 [00:01<00:00,  3.42it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 0, 10,  9, 10,  0,  2, 10, 10], device='cuda:0')

Epoch: 2/2
	Train Loss: 6.8844
	Valid Loss: 6.7328
	Valid Sequence Length Accuracy: 0.5750
	Valid Digit Accuracy 0.0100


Training complete in 0m 18s
Saving model ...
Best model saved to : ../tmp_results/best_model.pth
[0.19486241836466403, 'VGG19']
Device used:  cuda:0
Learning rate is: 0.19486241836466403
Directory  run  already exists
# Start training #



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 13.8514(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 13.8514(avg) :   4%|▍         | 1/25 [00:00<00:14,  1.65it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 12.9404(avg) :   4%|▍         | 1/25 [00:00<00:14,  1.65it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 12.9404(avg) :   8%|▊         | 2/25 [00:00<00:11,  1.98it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 12.8680(avg) :   8%|▊         | 2/25 [00:01<00:11,  1.98it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 12.8680(avg) :  12%|█▏        | 3/25 [00:01<00:09,  2.34it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 12.4590(avg) :  12%|█▏        | 3/25 [00:01<00:09,  2.34it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 12.4590(avg) :  16%|█▌        | 4/25 [00:01<00:07,  2.67it/s][A[A[A[A[A




[TRAIN] - EPOCH 1

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:03,  1.58it/s][A[A[A[A[A




 29%|██▊       | 2/7 [00:00<00:02,  2.11it/s][A[A[A[A[A

tensor([4, 4, 4, 4, 4, 1, 1, 4, 4, 1, 1, 4, 1, 4, 4, 4, 4, 4, 4, 4, 1, 4, 4, 4,
        4, 1, 1, 4, 1, 1, 1, 4], device='cuda:0')
tensor([ 3,  3,  3,  3,  3, 10,  3,  3,  3,  3, 10,  3, 10,  3,  3,  3,  3,  3,
         3,  3,  3,  3,  3,  3,  3, 10, 10,  3, 10, 10, 10,  3],
       device='cuda:0')
tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 4, 4, 4, 4, 4, 1,
        4, 1, 4, 4, 4, 4, 4, 1], device='cuda:0')
tensor([ 3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, 10, 10,
         3,  3,  3,  3,  3, 10,  3, 10,  3,  3,  3,  3,  3, 10],
       device='cuda:0')
tensor([1, 4, 4, 4, 1, 4, 4, 4, 4, 1, 4, 4, 1, 4, 1, 4, 4, 4, 4, 4, 1, 4, 4, 1,
        4, 1, 4, 4, 4, 4, 1, 4], device='cuda:0')
tensor([10,  3,  3,  3,  1,  3,  3,  3,  3, 10,  3,  3,  3,  3,  3,  3,  3,  3,
         3,  3,  3,  3,  3, 10,  3, 10,  3,  3,  3,  3, 10,  3],
       device='cuda:0')







 57%|█████▋    | 4/7 [00:00<00:01,  2.83it/s][A[A[A[A[A




 86%|████████▌ | 6/7 [00:01<00:00,  3.74it/s][A[A[A[A[A

tensor([1, 4, 4, 1, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 1, 4, 1, 4, 4, 4, 4, 4, 4, 1,
        4, 4, 4, 1, 4, 4, 4, 4], device='cuda:0')
tensor([10,  3,  3,  3,  3,  3,  1,  3,  3,  3,  3,  3,  3,  3, 10,  3, 10,  3,
         3,  3,  3,  3,  3, 10,  3,  3,  3,  1,  3,  3,  3,  3],
       device='cuda:0')
tensor([1, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 1, 4, 4, 1, 1, 4, 4], device='cuda:0')
tensor([ 1,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, 10,  3,  3,  3,  3,  3,
         3,  3,  3,  3,  3,  3,  3, 10,  3,  3,  3,  1,  3,  3],
       device='cuda:0')
tensor([4, 4, 4, 1, 1, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 1, 4, 4, 1, 4, 4, 4, 1, 1,
        1, 4, 1, 4, 1, 4, 4, 1], device='cuda:0')
tensor([ 3,  3,  3,  3,  1,  3, 10,  3,  3,  3,  3,  3,  3,  3,  3, 10,  3,  3,
        10,  3,  3,  3,  3,  3,  1,  3, 10,  3, 10,  3,  3, 10],
       device='cuda:0')







100%|██████████| 7/7 [00:01<00:00,  3.95it/s][A[A[A[A[A

tensor([4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0')
tensor([3, 3, 3, 3, 3, 3, 3, 3], device='cuda:0')
Checkpointing new model ...

Epoch: 1/2
	Train Loss: 9.1167
	Valid Loss: 120.2432
	Valid Sequence Length Accuracy: 0.6300
	Valid Digit Accuracy 0.0000



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.4173(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.4173(avg) :   4%|▍         | 1/25 [00:00<00:13,  1.80it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.3058(avg) :   4%|▍         | 1/25 [00:00<00:13,  1.80it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.3058(avg) :   8%|▊         | 2/25 [00:00<00:10,  2.13it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.2273(avg) :   8%|▊         | 2/25 [00:01<00:10,  2.13it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.2273(avg) :  12%|█▏        | 3/25 [00:01<00:08,  2.48it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.2395(avg) :  12%|█▏        | 3/25 [00:01<00:08,  2.48it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.2395(avg) :  16%|█▌        | 4/25 [00:01<00:07,  2.79it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BA

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:04,  1.41it/s][A[A[A[A[A




 29%|██▊       | 2/7 [00:00<00:02,  1.90it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10,  1, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10,  1, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10,  1, 10, 10, 10, 10,  5, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')







 57%|█████▋    | 4/7 [00:00<00:01,  2.56it/s][A[A[A[A[A




 86%|████████▌ | 6/7 [00:01<00:00,  3.41it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        2, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  5,
        10, 10, 10, 10, 10, 10,  1, 10, 10, 10, 10,  5, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 2], device='cuda:0')
tensor([10, 10,  5, 10, 10, 10, 10, 10, 10, 10, 10, 10,  1, 10, 10, 10, 10, 10,
        10, 10, 10,  1, 10, 10, 10, 10, 10, 10, 10, 10,  5,  1],
       device='cuda:0')







100%|██████████| 7/7 [00:01<00:00,  3.69it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10], device='cuda:0')

Epoch: 2/2
	Train Loss: 7.2173
	Valid Loss: 7.0408
	Valid Sequence Length Accuracy: 0.5900
	Valid Digit Accuracy 0.0000


Training complete in 0m 21s
Saving model ...
Best model saved to : ../tmp_results/best_model.pth
[0.013116515715358098, 'VGG13']
Device used:  cuda:0
Learning rate is: 0.013116515715358098
Directory  run  already exists
# Start training #



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.2624(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.2624(avg) :   4%|▍         | 1/25 [00:00<00:14,  1.67it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 11.4562(avg) :   4%|▍         | 1/25 [00:00<00:14,  1.67it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 11.4562(avg) :   8%|▊         | 2/25 [00:00<00:10,  2.10it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 10.2243(avg) :   8%|▊         | 2/25 [00:00<00:10,  2.10it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 10.2243(avg) :  12%|█▏        | 3/25 [00:00<00:08,  2.60it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 9.6458(avg) :  12%|█▏        | 3/25 [00:01<00:08,  2.60it/s] [A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 9.6458(avg) :  16%|█▌        | 4/25 [00:01<00:06,  3.07it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:04,  1.46it/s][A[A[A[A[A




 43%|████▎     | 3/7 [00:00<00:02,  1.99it/s][A[A[A[A[A

tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10,  2, 10, 10,  2, 10, 10, 10, 10, 10, 10,  1, 10, 10,
        10, 10, 10,  2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1,
        2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10,  1,  1,  1, 10,  1, 10,  1, 10, 10, 10,  1, 10, 10,
        10, 10,  1,  2,  2,  1, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2,
        2, 2, 1, 2, 2, 2, 2, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10,  1,  2, 10,  1, 10, 10, 10, 10,  1,  1,
        10, 10, 10, 10, 10, 10, 10, 10,  1, 10, 10, 10,  2, 10],
       device='cuda:0')







 86%|████████▌ | 6/7 [00:00<00:00,  2.74it/s][A[A[A[A[A

tensor([2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 1, 2, 2, 2], device='cuda:0')
tensor([10, 10,  2, 10, 10, 10,  1, 10, 10, 10, 10,  2, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  1, 10, 10, 10],
       device='cuda:0')
tensor([2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2,
        2, 2, 2, 2, 2, 1, 2, 2], device='cuda:0')
tensor([10, 10, 10, 10,  1, 10, 10, 10, 10, 10,  1, 10,  2, 10, 10,  2, 10, 10,
        10,  1, 10, 10,  1, 10, 10, 10, 10, 10, 10,  1, 10, 10],
       device='cuda:0')
tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
tensor([10, 10, 10,  2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  6, 10, 10, 10],
       device='cuda:0')







100%|██████████| 7/7 [00:01<00:00,  3.43it/s][A[A[A[A[A

tensor([2, 1, 1, 2, 1, 2, 2, 2], device='cuda:0')
tensor([10,  1,  1,  2, 10, 10, 10, 10], device='cuda:0')
Checkpointing new model ...

Epoch: 1/2
	Train Loss: 7.7838
	Valid Loss: 8.7011
	Valid Sequence Length Accuracy: 0.3600
	Valid Digit Accuracy 0.0350



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 6.5969(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 6.5969(avg) :   4%|▍         | 1/25 [00:00<00:13,  1.73it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1604(avg) :   4%|▍         | 1/25 [00:00<00:13,  1.73it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1604(avg) :   8%|▊         | 2/25 [00:00<00:10,  2.17it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1427(avg) :   8%|▊         | 2/25 [00:00<00:10,  2.17it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1427(avg) :  12%|█▏        | 3/25 [00:00<00:08,  2.66it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1580(avg) :  12%|█▏        | 3/25 [00:01<00:08,  2.66it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1580(avg) :  16%|█▌        | 4/25 [00:01<00:06,  3.11it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BA

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:03,  1.50it/s][A[A[A[A[A




 43%|████▎     | 3/7 [00:00<00:01,  2.05it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 0,  0,  1,  0,  0, 10,  0,  1,  0, 10, 10,  1,  1,  1,  1, 10,  1,  1,
        10,  1,  0, 10,  1,  1, 10,  0,  1,  1,  0,  1,  1,  1],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 1,  0, 10, 10,  0,  0,  0,  0, 10,  1, 10, 10, 10,  1,  1,  0, 10, 10,
        10,  1, 10, 10,  1,  1, 10,  0,  1,  1, 10,  1,  1,  1],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10,  1, 10, 10, 10, 10,  0, 10,  0,  1,  1,  0,  1, 10,  1,  1,  1, 10,
        10, 10, 10,  0,  0,  1,  0,  0,  0, 10,  0,  0, 10, 10],
       device='cuda:0')







 86%|████████▌ | 6/7 [00:00<00:00,  2.81it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 0,  1,  0,  1,  0,  0,  1,  0,  0, 10,  0, 10,  1, 10,  1,  1,  1,  1,
        10, 10,  1,  1,  1, 10,  0,  0,  1,  1,  0, 10, 10,  1],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 0,  0,  1, 10,  1, 10, 10,  1,  0, 10,  1,  1, 10,  0,  1,  0, 10,  1,
        10, 10,  0, 10, 10,  0,  1,  0, 10, 10,  0, 10,  1, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10,  0,  0,  0,  1,  1, 10,  0, 10, 10,  0,  0,  1, 10,  1,  0,  1,  1,
         0, 10,  1,  1, 10,  1,  1,  1,  0,  1, 10,  1, 10,  1],
       device='cuda:0')







100%|██████████| 7/7 [00:01<00:00,  3.51it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 0, 10, 10, 10,  0,  1, 10, 10], device='cuda:0')

Epoch: 2/2
	Train Loss: 6.4713
	Valid Loss: 6.8729
	Valid Sequence Length Accuracy: 0.6050
	Valid Digit Accuracy 0.0150


Training complete in 0m 13s
Saving model ...
Best model saved to : ../tmp_results/best_model.pth
[0.0003073781785362612, 'VGG11']
Device used:  cuda:0
Learning rate is: 0.0003073781785362612
Directory  run  already exists
# Start training #



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 15.1711(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 15.1711(avg) :   4%|▍         | 1/25 [00:00<00:10,  2.29it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.9007(avg) :   4%|▍         | 1/25 [00:00<00:10,  2.29it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.9007(avg) :   8%|▊         | 2/25 [00:00<00:08,  2.83it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.5196(avg) :   8%|▊         | 2/25 [00:00<00:08,  2.83it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.5196(avg) :  12%|█▏        | 3/25 [00:00<00:06,  3.45it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.1516(avg) :  12%|█▏        | 3/25 [00:00<00:06,  3.45it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.1516(avg) :  16%|█▌        | 4/25 [00:00<00:05,  4.05it/s][A[A[A[A[A




[TRAIN] - EPOCH 1

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:04,  1.47it/s][A[A[A[A[A




 43%|████▎     | 3/7 [00:00<00:01,  2.01it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')







 86%|████████▌ | 6/7 [00:00<00:00,  2.77it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10], device='cuda






100%|██████████| 7/7 [00:01<00:00,  5.97it/s][A[A[A[A[A

Checkpointing new model ...

Epoch: 1/2
	Train Loss: 8.8577
	Valid Loss: 9.9765
	Valid Sequence Length Accuracy: 0.6300
	Valid Digit Accuracy 0.0000



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.4985(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.4985(avg) :   4%|▍         | 1/25 [00:00<00:10,  2.20it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1979(avg) :   4%|▍         | 1/25 [00:00<00:10,  2.20it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1979(avg) :   8%|▊         | 2/25 [00:00<00:08,  2.70it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1921(avg) :   8%|▊         | 2/25 [00:00<00:08,  2.70it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1921(avg) :  12%|█▏        | 3/25 [00:00<00:06,  3.27it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 6.9735(avg) :  12%|█▏        | 3/25 [00:00<00:06,  3.27it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 6.9735(avg) :  16%|█▌        | 4/25 [00:00<00:05,  3.82it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BA

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:04,  1.44it/s][A[A[A[A[A




 43%|████▎     | 3/7 [00:00<00:02,  1.99it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  3, 10,
        10, 10, 10, 10, 10, 10, 10, 10,  3, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  3, 10, 10, 10,  6,
        10,  3, 10,  1,  3,  3, 10, 10,  1, 10, 10, 10, 10,  8],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10,  3, 10,  3, 10, 10,  6, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 






 86%|████████▌ | 6/7 [00:00<00:00,  2.75it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10,  3, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  3, 10, 10, 10,
        10, 10,  3, 10,  6, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 3,  3, 10, 10,  3, 10,  3, 10,  3,  6, 10, 10, 10, 10, 10,  3, 10, 10,
        10, 10, 10,  3, 10, 10, 10,  6, 10,  3, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 3, 10,  3, 10, 10, 10, 10, 10], device='cuda:0')







100%|██████████| 7/7 [00:01<00:00,  6.06it/s][A[A[A[A[A


Epoch: 2/2
	Train Loss: 6.5456
	Valid Loss: 6.4011
	Valid Sequence Length Accuracy: 0.6350
	Valid Digit Accuracy 0.0000


Training complete in 0m 11s
Saving model ...
Best model saved to : ../tmp_results/best_model.pth
[0.00023082427114609125, 'VGG13']
Device used:  cuda:0
Learning rate is: 0.00023082427114609125
Directory  run  already exists
# Start training #



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 15.2938(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 15.2938(avg) :   4%|▍         | 1/25 [00:00<00:12,  1.95it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 15.2366(avg) :   4%|▍         | 1/25 [00:00<00:12,  1.95it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 15.2366(avg) :   8%|▊         | 2/25 [00:00<00:09,  2.44it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 15.0136(avg) :   8%|▊         | 2/25 [00:00<00:09,  2.44it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 15.0136(avg) :  12%|█▏        | 3/25 [00:00<00:07,  2.82it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.7625(avg) :  12%|█▏        | 3/25 [00:01<00:07,  2.82it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.7625(avg) :  16%|█▌        | 4/25 [00:01<00:06,  3.27it/s][A[A[A[A[A




[TRAIN] - EPOCH 1

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:03,  1.50it/s][A[A[A[A[A




 43%|████▎     | 3/7 [00:00<00:01,  2.04it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')







 71%|███████▏  | 5/7 [00:00<00:00,  2.79it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')







100%|██████████| 7/7 [00:01<00:00,  3.63it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10], device='cuda:0')
Checkpointing new model ...

Epoch: 1/2
	Train Loss: 9.4826
	Valid Loss: 10.6707
	Valid Sequence Length Accuracy: 0.6300
	Valid Digit Accuracy 0.0000



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.5154(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.5154(avg) :   4%|▍         | 1/25 [00:00<00:11,  2.15it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1944(avg) :   4%|▍         | 1/25 [00:00<00:11,  2.15it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1944(avg) :   8%|▊         | 2/25 [00:00<00:08,  2.61it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.0710(avg) :   8%|▊         | 2/25 [00:00<00:08,  2.61it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.0710(avg) :  12%|█▏        | 3/25 [00:00<00:07,  3.11it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1269(avg) :  12%|█▏        | 3/25 [00:01<00:07,  3.11it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1269(avg) :  16%|█▌        | 4/25 [00:01<00:05,  3.53it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BA

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:04,  1.42it/s][A[A[A[A[A




 43%|████▎     | 3/7 [00:00<00:02,  1.96it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 1, 10, 10,  8, 10, 10, 10, 10, 10, 10, 10, 10,  1, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10,  1, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  0, 10, 10,  1, 10, 10,
        10, 10, 10, 10,  1, 10, 10, 10, 10, 10, 10,  1, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10,  1, 10,  0, 10,  0, 10,  1, 10, 10, 10,  1, 10, 10, 10, 10,
         1, 10, 10, 10,  1, 10, 10, 10, 10, 10, 10,  5, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        2, 1, 1, 1, 1, 






 86%|████████▌ | 6/7 [00:00<00:00,  2.69it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10,  0, 10, 10, 10, 10, 10,  1, 10, 10, 10, 10, 10, 10, 10, 10,
         1, 10, 10, 10, 10,  0, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10,  0, 10, 10,  1, 10, 10, 10, 10, 10, 10,  1, 10, 10,  1, 10,
         1, 10, 10, 10,  1, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10,  0, 10, 10, 10], device='cuda:0')







100%|██████████| 7/7 [00:01<00:00,  5.80it/s][A[A[A[A[A


Epoch: 2/2
	Train Loss: 6.6904
	Valid Loss: 6.7549
	Valid Sequence Length Accuracy: 0.6100
	Valid Digit Accuracy 0.0000


Training complete in 0m 13s
Saving model ...
Best model saved to : ../tmp_results/best_model.pth
[0.11503861485898605, 'VGG13']
Device used:  cuda:0
Learning rate is: 0.11503861485898605
Directory  run  already exists
# Start training #



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 13.8237(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 13.8237(avg) :   4%|▍         | 1/25 [00:00<00:11,  2.15it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 12.9106(avg) :   4%|▍         | 1/25 [00:00<00:11,  2.15it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 12.9106(avg) :   8%|▊         | 2/25 [00:00<00:08,  2.63it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 11.6806(avg) :   8%|▊         | 2/25 [00:00<00:08,  2.63it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 11.6806(avg) :  12%|█▏        | 3/25 [00:00<00:07,  3.09it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 11.6280(avg) :  12%|█▏        | 3/25 [00:01<00:07,  3.09it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 11.6280(avg) :  16%|█▌        | 4/25 [00:01<00:05,  3.56it/s][A[A[A[A[A




[TRAIN] - EPOCH 1

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:04,  1.47it/s][A[A[A[A[A




 43%|████▎     | 3/7 [00:00<00:01,  2.01it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 3,  3, 10, 10,  3, 10, 10, 10,  3,  3, 10, 10, 10,  3, 10, 10, 10,  3,
        10, 10,  3, 10,  3, 10,  3, 10,  3, 10,  3, 10,  3,  8],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10,  3, 10, 10, 10,  3,  3, 10, 10, 10, 10,  3,  3, 10,  3,  3, 10,  3,
        10, 10, 10, 10,  3,  3, 10,  8, 10, 10, 10, 10, 10,  3],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 3, 10, 10, 10, 10,  8, 10,  3, 10,  8, 10, 10,  3, 10,  3,  3, 10, 10,
         3,  3, 10, 10,  3, 10,  3, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')







 71%|███████▏  | 5/7 [00:00<00:00,  2.76it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 3, 10, 10,  3,  8,  3,  3, 10, 10, 10,  3, 10,  3,  3,  8, 10, 10, 10,
         3,  3,  3,  3,  3,  3,  3, 10, 10,  8,  3, 10, 10,  3],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 8,  3, 10,  3,  3,  3,  3,  3,  3,  3,  3, 10, 10,  3, 10, 10, 10,  3,
        10,  3,  3, 10, 10, 10,  3,  3,  3, 10, 10,  3, 10,  3],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10,  3, 10, 10,  3, 10,  3, 10, 10, 10,  3, 10,  3,  3,  3, 10,  8, 10,
        10,  8, 10, 10, 10, 10,  8, 10,  3, 10, 10, 10,  3, 10],
       device='cuda:0')







100%|██████████| 7/7 [00:01<00:00,  3.60it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([ 3, 10,  3, 10,  3,  3, 10, 10], device='cuda:0')
Checkpointing new model ...

Epoch: 1/2
	Train Loss: 9.2847
	Valid Loss: 17.8907
	Valid Sequence Length Accuracy: 0.4800
	Valid Digit Accuracy 0.0150



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1702(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.1702(avg) :   4%|▍         | 1/25 [00:00<00:12,  1.97it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.4537(avg) :   4%|▍         | 1/25 [00:00<00:12,  1.97it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.4537(avg) :   8%|▊         | 2/25 [00:00<00:09,  2.46it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.3780(avg) :   8%|▊         | 2/25 [00:00<00:09,  2.46it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.3780(avg) :  12%|█▏        | 3/25 [00:00<00:07,  2.89it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.2260(avg) :  12%|█▏        | 3/25 [00:01<00:07,  2.89it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 7.2260(avg) :  16%|█▌        | 4/25 [00:01<00:06,  3.35it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BA

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:04,  1.45it/s][A[A[A[A[A




 43%|████▎     | 3/7 [00:00<00:02,  1.97it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')







 71%|███████▏  | 5/7 [00:00<00:00,  2.70it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')







100%|██████████| 7/7 [00:01<00:00,  3.53it/s][A[A[A[A[A

tensor([1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10], device='cuda:0')

Epoch: 2/2
	Train Loss: 7.2423
	Valid Loss: 6.9789
	Valid Sequence Length Accuracy: 0.6200
	Valid Digit Accuracy 0.0000


Training complete in 0m 13s
Saving model ...
Best model saved to : ../tmp_results/best_model.pth
[0.0009202884691104562, 'VGG19']
Device used:  cuda:0
Learning rate is: 0.0009202884691104562
Directory  run  already exists
# Start training #



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 15.1266(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 15.1266(avg) :   4%|▍         | 1/25 [00:00<00:14,  1.66it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.4710(avg) :   4%|▍         | 1/25 [00:00<00:14,  1.66it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 14.4710(avg) :   8%|▊         | 2/25 [00:00<00:11,  2.00it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 13.6868(avg) :   8%|▊         | 2/25 [00:01<00:11,  2.00it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 13.6868(avg) :  12%|█▏        | 3/25 [00:01<00:09,  2.35it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 12.9415(avg) :  12%|█▏        | 3/25 [00:01<00:09,  2.35it/s][A[A[A[A[A




[TRAIN] - EPOCH 1/ 2 - BATCH LOSS: 12.9415(avg) :  16%|█▌        | 4/25 [00:01<00:07,  2.67it/s][A[A[A[A[A




[TRAIN] - EPOCH 1

Iterating over validation data...







 14%|█▍        | 1/7 [00:00<00:04,  1.46it/s][A[A[A[A[A




 43%|████▎     | 3/7 [00:00<00:02,  1.99it/s][A[A[A[A[A

tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')







 71%|███████▏  | 5/7 [00:00<00:00,  2.69it/s][A[A[A[A[A

tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')







100%|██████████| 7/7 [00:01<00:00,  3.31it/s][A[A[A[A[A

tensor([2, 2, 2, 2, 2, 2, 2, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10], device='cuda:0')
Checkpointing new model ...

Epoch: 1/2
	Train Loss: 8.1950
	Valid Loss: 8.6041
	Valid Sequence Length Accuracy: 0.6300
	Valid Digit Accuracy 0.0350



Iterating over training data...







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 5.7037(avg) :   0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 5.7037(avg) :   4%|▍         | 1/25 [00:00<00:13,  1.77it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 6.0066(avg) :   4%|▍         | 1/25 [00:00<00:13,  1.77it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 6.0066(avg) :   8%|▊         | 2/25 [00:00<00:11,  2.09it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 6.3301(avg) :   8%|▊         | 2/25 [00:01<00:11,  2.09it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 6.3301(avg) :  12%|█▏        | 3/25 [00:01<00:09,  2.44it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 6.4152(avg) :  12%|█▏        | 3/25 [00:01<00:09,  2.44it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BATCH LOSS: 6.4152(avg) :  16%|█▌        | 4/25 [00:01<00:07,  2.75it/s][A[A[A[A[A




[TRAIN] - EPOCH 2/ 2 - BA

KeyboardInterrupt: 