<a href="https://colab.research.google.com/github/Ayush-327/Ayush-327/blob/main/ResNe50_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import  torch

In [2]:
import torch.nn as nn
torch.cuda.is_available()

True

In [3]:
class block(nn.Module):
  def __init__(self, in_channels , inter_channels , identity_downsample=None, stride=1 ):
    super(block, self).__init__()
    self.conv1 = nn.Conv2d( in_channels, inter_channels, kernel_size=(1,1), stride=1 )
    self.bn1 = nn.BatchNorm2d( inter_channels)
    self.conv2 = nn.Conv2d( inter_channels, inter_channels, kernel_size=3, stride=stride, padding=1 )
    self.bn2 = nn.BatchNorm2d( inter_channels)
    self.conv3 = nn.Conv2d( inter_channels, inter_channels*4, kernel_size=1, stride = 1)
    self.bn3 = nn.BatchNorm2d( inter_channels*4)
    self.relu = nn.ReLU()
    self.identity_downsample = identity_downsample
    self.stride = stride

  def forward(self, x):
    identity = x.clone()
    x=self.conv1(x)
    x=self.bn1(x)
    x=self.conv2(x)
    x=self.bn2(x)
    x=self.conv3(x)
    x=self.bn3(x)
    if self.identity_downsample is not None:
      identity = self.identity_downsample(identity)
    x+=identity
    x= self.relu(x)

    return x

class ResNet50(nn.Module):
  def __init__(self, image_channels, num_classes):
    super(ResNet50, self).__init__()
    self.in_channels = 64
    self.conv1 = nn.Conv2d(image_channels, out_channels=64, kernel_size=(7,7), stride=(2,2),padding=(3,3))
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU()
    self.maxpool = nn.MaxPool2d(kernel_size=(3,3), stride = (2,2), padding = (1,1))
    
    self.layer1 = self.layer( 3 , 64 , 1)
    self.layer2 = self.layer( 4 , 128 , 2)
    self.layer3 = self.layer( 6 , 256 , 2)
    self.layer4 = self.layer( 3 , 512 , 2)
    self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
    self.fc = nn.Linear(512 * 4, num_classes)


  def forward(self ,x):
    x=self.conv1(x)
    x=self.bn1(x)
    x=self.relu(x)
    x=self.maxpool(x)
    x=self.layer1(x)
    x=self.layer2(x)
    x=self.layer3(x)
    x=self.layer4(x)
    x = self.avgpool(x)
    
    x = x.reshape(x.shape[0], -1)
    
    x = self.fc(x)
#     x = nn.Softmax(dim=1)(x)
    
    return x
  
  def layer( self, no_of_blocks , inter_channels , stride):

    identity_downsample = None
    layers = []

    if stride != 1 or self.in_channels != 4*inter_channels :
        identity_downsample = nn.Sequential( nn.Conv2d(self.in_channels, inter_channels*4, kernel_size =1 , stride = stride) , nn.BatchNorm2d(inter_channels*4),)

    layers.append( block(self.in_channels, inter_channels, identity_downsample, stride=stride))

    self.in_channels = inter_channels*4

    for i in range(no_of_blocks-1):
        layers.append(block(self.in_channels , inter_channels))

    return nn.Sequential(*layers)






In [4]:
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch import optim
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
import datetime

In [5]:
batch_size = 64

In [6]:
data_path = '../data-unversioned/p1ch7/'
train_set = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))
test_set = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data-unversioned/p1ch7/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ../data-unversioned/p1ch7/cifar-10-python.tar.gz to ../data-unversioned/p1ch7/
Files already downloaded and verified


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [8]:

in_channels = 3
num_classes = 10


In [9]:
model = ResNet50(image_channels=in_channels, num_classes=num_classes).to(device)

In [10]:
num_epochs = 5
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, dampening=0, weight_decay=0.0001, nesterov=False)

In [27]:
from tqdm import tqdm

train_loader = torch.utils.data.DataLoader(train_set, batch_size=64,
                                           shuffle=True)

def training_model( num_epochs , optimizer):
 for epoch in range(num_epochs):       
        running_loss=0
        for batch_idx, (data, targets) in enumerate(train_loader):
        # Get data to cuda if possible
            data = data.to(device)
            targets = targets.to(device)

        # forward
            scores = model(data)
            loss = criterion(scores, targets)

        # backward
            optimizer.zero_grad()
            loss.backward()

        # gradient descent or adam step
            optimizer.step()
            running_loss += loss.item()
        print('{} Epoch {}, Training loss {}'.format(datetime.datetime.now(), epoch,
            running_loss / len(train_loader)))
        

In [23]:
def validate(model , train_loader, test_loader):
  
  def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)


            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)


    
    return num_correct/num_samples

 
  print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:.2f}")
  print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f}")

In [24]:
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, dampening=0, weight_decay=0.0001, nesterov=False)
training_model( 50 , optimizer)




  0%|          | 0/782 [00:00<?, ?it/s][A[A[A


  0%|          | 1/782 [00:00<01:41,  7.67it/s][A[A[A


  0%|          | 2/782 [00:00<01:42,  7.57it/s][A[A[A


  1%|          | 4/782 [00:00<01:31,  8.49it/s][A[A[A


  1%|          | 6/782 [00:00<01:21,  9.48it/s][A[A[A


  1%|          | 8/782 [00:00<01:13, 10.50it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:08, 11.29it/s][A[A[A


  2%|▏         | 12/782 [00:01<01:05, 11.81it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:02, 12.35it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.80it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.01it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:57, 13.15it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:57, 13.28it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:56, 13.39it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:55, 13.55it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:55, 13.67it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:54, 13.78it/s][A

2021-07-31 09:39:51.593660 Epoch 0, Training loss 1.8591219916977846





  1%|          | 4/782 [00:00<00:57, 13.49it/s][A[A[A


  1%|          | 6/782 [00:00<00:57, 13.58it/s][A[A[A


  1%|          | 8/782 [00:00<00:57, 13.48it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:57, 13.50it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:57, 13.44it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:57, 13.40it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:56, 13.52it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:56, 13.54it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:56, 13.44it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:56, 13.48it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:56, 13.53it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:55, 13.53it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:55, 13.55it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:55, 13.48it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:55, 13.43it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:55, 13.36it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:55, 13

2021-07-31 09:40:49.743501 Epoch 1, Training loss 1.7478367975910607





  1%|          | 4/782 [00:00<00:58, 13.31it/s][A[A[A


  1%|          | 6/782 [00:00<00:58, 13.27it/s][A[A[A


  1%|          | 8/782 [00:00<00:58, 13.16it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:58, 13.26it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:57, 13.35it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:57, 13.41it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:57, 13.29it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.17it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 12.99it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:57, 13.13it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:57, 13.22it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:56, 13.33it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:56, 13.29it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:56, 13.28it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:56, 13.20it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:56, 13.27it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:55, 13

2021-07-31 09:41:49.309555 Epoch 2, Training loss 1.6859831660604843





  1%|          | 4/782 [00:00<00:57, 13.59it/s][A[A[A


  1%|          | 6/782 [00:00<00:57, 13.52it/s][A[A[A


  1%|          | 8/782 [00:00<00:57, 13.44it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:57, 13.49it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:57, 13.47it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:57, 13.47it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:57, 13.34it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:57, 13.26it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:56, 13.39it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:56, 13.45it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:56, 13.31it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:56, 13.45it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:56, 13.40it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:56, 13.38it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:56, 13.38it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:55, 13.40it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:55, 13

2021-07-31 09:42:48.828629 Epoch 3, Training loss 1.629803315910232





  1%|          | 4/782 [00:00<00:57, 13.43it/s][A[A[A


  1%|          | 6/782 [00:00<00:58, 13.29it/s][A[A[A


  1%|          | 8/782 [00:00<00:58, 13.17it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:58, 13.09it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 13.02it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:58, 13.02it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.94it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.87it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 13.00it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 12.96it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 13.04it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:57, 13.06it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.18it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.18it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:56, 13.20it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:56, 13.23it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:56, 13

2021-07-31 09:43:48.921891 Epoch 4, Training loss 1.5948049022108697





  1%|          | 4/782 [00:00<01:00, 12.83it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.85it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.87it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.93it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.95it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.86it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.85it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.01it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.89it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 12.96it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 12.94it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.91it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.03it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.82it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.89it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:59, 12.65it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:58, 12

2021-07-31 09:44:49.229830 Epoch 5, Training loss 1.5736345170099106





  1%|          | 4/782 [00:00<01:03, 12.23it/s][A[A[A


  1%|          | 6/782 [00:00<01:02, 12.45it/s][A[A[A


  1%|          | 8/782 [00:00<01:01, 12.61it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:02, 12.44it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:01, 12.55it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:01, 12.52it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:00, 12.69it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:00, 12.71it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.78it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.78it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:59, 12.84it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.89it/s][A[A[A


  4%|▎         | 28/782 [00:02<01:00, 12.45it/s][A[A[A


  4%|▍         | 30/782 [00:02<01:00, 12.47it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:59, 12.60it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.76it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:58, 12

2021-07-31 09:45:49.667988 Epoch 6, Training loss 1.5471496249708678





  1%|          | 4/782 [00:00<01:00, 12.86it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.80it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.73it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:00, 12.83it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.76it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.81it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:00, 12.62it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:00, 12.70it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:00, 12.67it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.69it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:59, 12.75it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.83it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.88it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.80it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.83it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.81it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 09:46:50.620978 Epoch 7, Training loss 1.5263874968299476





  1%|          | 4/782 [00:00<00:57, 13.56it/s][A[A[A


  1%|          | 6/782 [00:00<00:57, 13.48it/s][A[A[A


  1%|          | 8/782 [00:00<00:57, 13.48it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:57, 13.38it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:57, 13.31it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:57, 13.26it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:57, 13.23it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.88it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.77it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 12.89it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 13.00it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:57, 13.11it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.01it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.02it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 13.08it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:56, 13.16it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 13

2021-07-31 09:47:51.106816 Epoch 8, Training loss 1.499102342159242





  1%|          | 4/782 [00:00<00:59, 13.13it/s][A[A[A


  1%|          | 6/782 [00:00<00:58, 13.17it/s][A[A[A


  1%|          | 8/782 [00:00<00:59, 12.99it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 13.02it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.89it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.90it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:58, 13.05it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.00it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 13.02it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 13.04it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 12.94it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 13.01it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.03it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.17it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 13.07it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 13.10it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 09:48:51.796825 Epoch 9, Training loss 1.469563865753086





  1%|          | 4/782 [00:00<01:01, 12.55it/s][A[A[A


  1%|          | 6/782 [00:00<01:01, 12.59it/s][A[A[A


  1%|          | 8/782 [00:00<01:02, 12.41it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:01, 12.57it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.75it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.91it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.95it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.02it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 13.03it/s][A[A[A


  3%|▎         | 22/782 [00:01<01:01, 12.43it/s][A[A[A


  3%|▎         | 24/782 [00:01<01:02, 12.15it/s][A[A[A


  3%|▎         | 26/782 [00:02<01:01, 12.37it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:59, 12.58it/s][A[A[A


  4%|▍         | 30/782 [00:02<01:00, 12.37it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:59, 12.53it/s][A[A[A


  4%|▍         | 34/782 [00:02<01:00, 12.44it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:59, 12

2021-07-31 09:49:53.316957 Epoch 10, Training loss 1.452760420949258





  1%|          | 4/782 [00:00<01:02, 12.40it/s][A[A[A


  1%|          | 6/782 [00:00<01:02, 12.50it/s][A[A[A


  1%|          | 8/782 [00:00<01:01, 12.55it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:01, 12.61it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.72it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:01, 12.48it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:00, 12.59it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:01, 12.47it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:01, 12.47it/s][A[A[A


  3%|▎         | 22/782 [00:01<01:00, 12.48it/s][A[A[A


  3%|▎         | 24/782 [00:01<01:00, 12.60it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:59, 12.65it/s][A[A[A


  4%|▎         | 28/782 [00:02<01:00, 12.46it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:59, 12.58it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:59, 12.57it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:59, 12.64it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:58, 12

2021-07-31 09:50:54.121103 Epoch 11, Training loss 1.4421761714284071





  1%|          | 4/782 [00:00<00:59, 13.02it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.93it/s][A[A[A


  1%|          | 8/782 [00:00<00:59, 13.00it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 13.00it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.83it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:01, 12.58it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:00, 12.73it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.93it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 12.93it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 12.93it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:59, 12.80it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.89it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.80it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.76it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.76it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.88it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 09:51:54.893086 Epoch 12, Training loss 1.419449392791904





  1%|          | 4/782 [00:00<00:59, 13.14it/s][A[A[A


  1%|          | 6/782 [00:00<00:59, 13.04it/s][A[A[A


  1%|          | 8/782 [00:00<00:59, 12.94it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:00, 12.69it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.67it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:01, 12.53it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:01, 12.41it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:01, 12.38it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:00, 12.60it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.70it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:59, 12.67it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:59, 12.63it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:59, 12.67it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:59, 12.62it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.74it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:59, 12.64it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:58, 12

2021-07-31 09:52:56.165242 Epoch 13, Training loss 1.4051114385542662





  1%|          | 4/782 [00:00<00:57, 13.48it/s][A[A[A


  1%|          | 6/782 [00:00<00:57, 13.41it/s][A[A[A


  1%|          | 8/782 [00:00<00:58, 13.28it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:58, 13.19it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:58, 13.10it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.91it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.92it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.90it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 13.02it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 13.10it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:57, 13.15it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:57, 13.19it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.01it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.03it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 12.97it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 13.09it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 13

2021-07-31 09:53:56.931914 Epoch 14, Training loss 1.3956734800277768





  1%|          | 4/782 [00:00<00:58, 13.22it/s][A[A[A


  1%|          | 6/782 [00:00<00:58, 13.32it/s][A[A[A


  1%|          | 8/782 [00:00<00:58, 13.23it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.98it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:58, 13.06it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:58, 13.10it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:58, 13.04it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.02it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.83it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 12.94it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 12.92it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.88it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.98it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.14it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 13.05it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.88it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:59, 12

2021-07-31 09:54:57.858332 Epoch 15, Training loss 1.3802237625012312





  1%|          | 4/782 [00:00<01:00, 12.96it/s][A[A[A


  1%|          | 6/782 [00:00<00:59, 13.08it/s][A[A[A


  1%|          | 8/782 [00:00<00:58, 13.17it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 13.07it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.94it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.88it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.89it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.83it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.80it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.75it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:59, 12.68it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:59, 12.73it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.88it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.93it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 12.95it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.80it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:58, 12

2021-07-31 09:55:58.567330 Epoch 16, Training loss 1.376677617926122





  1%|          | 4/782 [00:00<01:04, 12.02it/s][A[A[A


  1%|          | 6/782 [00:00<01:03, 12.24it/s][A[A[A


  1%|          | 8/782 [00:00<01:01, 12.49it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:01, 12.55it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.69it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.81it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.81it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.86it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.83it/s][A[A[A


  3%|▎         | 22/782 [00:01<01:00, 12.64it/s][A[A[A


  3%|▎         | 24/782 [00:01<01:00, 12.61it/s][A[A[A


  3%|▎         | 26/782 [00:02<01:00, 12.58it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:59, 12.66it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:59, 12.64it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:59, 12.71it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.74it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 09:56:59.125534 Epoch 17, Training loss 1.3581114396872118





  1%|          | 4/782 [00:00<01:00, 12.80it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.81it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.85it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:00, 12.82it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.82it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:00, 12.72it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.78it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:00, 12.56it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:00, 12.61it/s][A[A[A


  3%|▎         | 22/782 [00:01<01:00, 12.64it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:59, 12.67it/s][A[A[A


  3%|▎         | 26/782 [00:02<01:00, 12.49it/s][A[A[A


  4%|▎         | 28/782 [00:02<01:00, 12.55it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:59, 12.59it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:59, 12.67it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:59, 12.64it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:58, 12

2021-07-31 09:57:59.820668 Epoch 18, Training loss 1.3495236767832275





  1%|          | 4/782 [00:00<01:01, 12.58it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.86it/s][A[A[A


  1%|          | 8/782 [00:00<00:59, 13.03it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.93it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.95it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:58, 13.08it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:58, 13.19it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:57, 13.22it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:57, 13.14it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:57, 13.20it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:57, 13.30it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:56, 13.29it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:56, 13.32it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:56, 13.39it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:56, 13.36it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:56, 13.31it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:55, 13

2021-07-31 09:59:00.214554 Epoch 19, Training loss 1.3376089207199224





  1%|          | 4/782 [00:00<00:59, 13.05it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.90it/s][A[A[A


  1%|          | 8/782 [00:00<00:59, 13.03it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.91it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.92it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.94it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.97it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.82it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 12.98it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:57, 13.11it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:57, 13.10it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:57, 13.15it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.15it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:56, 13.28it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:56, 13.34it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:56, 13.23it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:55, 13

2021-07-31 10:00:00.785202 Epoch 20, Training loss 1.3262764404496878





  1%|          | 4/782 [00:00<01:00, 12.80it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.86it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.84it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:00, 12.86it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.92it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:58, 13.06it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:58, 13.00it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.09it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 13.11it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:57, 13.15it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:57, 13.09it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:58, 12.99it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.04it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.85it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.88it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.83it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 10:01:01.484075 Epoch 21, Training loss 1.3248138343129316





  1%|          | 4/782 [00:00<01:00, 12.96it/s][A[A[A


  1%|          | 6/782 [00:00<00:59, 13.03it/s][A[A[A


  1%|          | 8/782 [00:00<00:58, 13.17it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.96it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.97it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:58, 13.07it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:57, 13.22it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.15it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:57, 13.15it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:57, 13.14it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:57, 13.12it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:57, 13.05it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.85it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.08it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 13.02it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 12.95it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 13

2021-07-31 10:02:01.876442 Epoch 22, Training loss 1.3142015893593468





  1%|          | 4/782 [00:00<01:02, 12.53it/s][A[A[A


  1%|          | 6/782 [00:00<01:01, 12.70it/s][A[A[A


  1%|          | 8/782 [00:00<01:01, 12.58it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:01, 12.52it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:02, 12.36it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:01, 12.46it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:01, 12.47it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:01, 12.35it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:01, 12.41it/s][A[A[A


  3%|▎         | 22/782 [00:01<01:00, 12.50it/s][A[A[A


  3%|▎         | 24/782 [00:01<01:01, 12.38it/s][A[A[A


  3%|▎         | 26/782 [00:02<01:00, 12.57it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:59, 12.63it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:59, 12.59it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:59, 12.54it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.69it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:58, 12

2021-07-31 10:03:02.289223 Epoch 23, Training loss 1.3035367696029145





  1%|          | 4/782 [00:00<01:03, 12.31it/s][A[A[A


  1%|          | 6/782 [00:00<01:02, 12.45it/s][A[A[A


  1%|          | 8/782 [00:00<01:01, 12.50it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:01, 12.64it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.73it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:00, 12.78it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.84it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.76it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.84it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.68it/s][A[A[A


  3%|▎         | 24/782 [00:01<01:01, 12.36it/s][A[A[A


  3%|▎         | 26/782 [00:02<01:00, 12.42it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:59, 12.68it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:59, 12.69it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:59, 12.66it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.73it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:58, 12

2021-07-31 10:04:03.151702 Epoch 24, Training loss 1.295706980749774





  1%|          | 4/782 [00:00<01:00, 12.82it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.80it/s][A[A[A


  1%|          | 8/782 [00:00<00:59, 12.94it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 13.01it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:58, 13.10it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:58, 13.09it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:58, 13.16it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.11it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:57, 13.27it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:56, 13.40it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:57, 13.30it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:57, 13.26it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:56, 13.30it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:56, 13.27it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:56, 13.23it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:56, 13.19it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:56, 13

2021-07-31 10:05:03.895901 Epoch 25, Training loss 1.283390033275575





  1%|          | 4/782 [00:00<01:00, 12.85it/s][A[A[A


  1%|          | 6/782 [00:00<00:59, 12.97it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.86it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.87it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.65it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:00, 12.76it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:00, 12.67it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:00, 12.72it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:00, 12.59it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.80it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:59, 12.73it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:59, 12.66it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:59, 12.62it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.80it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.83it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.86it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 13

2021-07-31 10:06:04.664499 Epoch 26, Training loss 1.2692557716613535





  1%|          | 4/782 [00:00<01:00, 12.81it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.76it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.72it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:00, 12.80it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.78it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.84it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:58, 12.99it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.07it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 13.05it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:57, 13.16it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:57, 13.17it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.98it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.95it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.96it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 12.95it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 13.03it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 13

2021-07-31 10:07:05.261185 Epoch 27, Training loss 1.2584578323242304





  1%|          | 4/782 [00:00<01:02, 12.50it/s][A[A[A


  1%|          | 6/782 [00:00<01:01, 12.56it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.70it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:00, 12.70it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.72it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.88it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.89it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.06it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 13.04it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 13.08it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 13.07it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:57, 13.04it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.07it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:56, 13.22it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:56, 13.27it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:56, 13.22it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:56, 13

2021-07-31 10:08:05.632213 Epoch 28, Training loss 1.2494406190217304





  1%|          | 4/782 [00:00<01:01, 12.68it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.73it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.90it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:00, 12.84it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.82it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.82it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.85it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.92it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.88it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.68it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:59, 12.64it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.82it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.79it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:59, 12.72it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.78it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.80it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:58, 12

2021-07-31 10:09:06.340401 Epoch 29, Training loss 1.2622426172808918





  1%|          | 4/782 [00:00<00:59, 13.05it/s][A[A[A


  1%|          | 6/782 [00:00<00:58, 13.16it/s][A[A[A


  1%|          | 8/782 [00:00<00:58, 13.30it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:58, 13.23it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.93it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:58, 13.11it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:57, 13.21it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.13it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 13.00it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 13.08it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:57, 13.13it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:57, 13.13it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:56, 13.23it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.07it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 13.04it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:56, 13.13it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 13

2021-07-31 10:10:06.999762 Epoch 30, Training loss 1.239433901922782





  1%|          | 4/782 [00:00<01:00, 12.95it/s][A[A[A


  1%|          | 6/782 [00:00<00:59, 12.96it/s][A[A[A


  1%|          | 8/782 [00:00<00:59, 13.03it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.97it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 13.04it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:58, 13.02it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.95it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.79it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.87it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 12.93it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 13.06it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:57, 13.11it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.06it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.83it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.92it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 12.91it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 10:11:08.041800 Epoch 31, Training loss 1.2372816936744144





  1%|          | 4/782 [00:00<01:01, 12.72it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.85it/s][A[A[A


  1%|          | 8/782 [00:00<00:59, 12.95it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.99it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:58, 13.09it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.99it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:58, 13.08it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.07it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 13.09it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:57, 13.17it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:57, 13.21it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:58, 13.01it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.05it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.06it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 13.01it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.84it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 10:12:09.026127 Epoch 32, Training loss 1.2314585403103353





  1%|          | 4/782 [00:00<00:59, 13.18it/s][A[A[A


  1%|          | 6/782 [00:00<00:59, 13.10it/s][A[A[A


  1%|          | 8/782 [00:00<00:58, 13.14it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:58, 13.20it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.87it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:58, 13.02it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.94it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 12.95it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.85it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.79it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 12.93it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 13.03it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.11it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.12it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:56, 13.16it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 13.11it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:56, 13

2021-07-31 10:13:09.979651 Epoch 33, Training loss 1.2236999641446507





  1%|          | 4/782 [00:00<01:00, 12.77it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.82it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.86it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:00, 12.84it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.82it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:00, 12.74it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:00, 12.68it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.85it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.91it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 13.07it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 12.91it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.83it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.04it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.08it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 13.07it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 13.03it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 10:14:11.100236 Epoch 34, Training loss 1.2083217017638408





  1%|          | 4/782 [00:00<01:00, 12.80it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.89it/s][A[A[A


  1%|          | 7/782 [00:00<01:07, 11.55it/s][A[A[A


  1%|          | 9/782 [00:00<01:04, 12.00it/s][A[A[A


  1%|▏         | 11/782 [00:00<01:03, 12.12it/s][A[A[A


  2%|▏         | 13/782 [00:01<01:02, 12.29it/s][A[A[A


  2%|▏         | 15/782 [00:01<01:02, 12.26it/s][A[A[A


  2%|▏         | 17/782 [00:01<01:01, 12.47it/s][A[A[A


  2%|▏         | 19/782 [00:01<01:00, 12.66it/s][A[A[A


  3%|▎         | 21/782 [00:01<00:59, 12.77it/s][A[A[A


  3%|▎         | 23/782 [00:01<00:59, 12.84it/s][A[A[A


  3%|▎         | 25/782 [00:01<00:58, 12.92it/s][A[A[A


  3%|▎         | 27/782 [00:02<00:57, 13.03it/s][A[A[A


  4%|▎         | 29/782 [00:02<00:57, 13.03it/s][A[A[A


  4%|▍         | 31/782 [00:02<00:57, 13.03it/s][A[A[A


  4%|▍         | 33/782 [00:02<00:58, 12.90it/s][A[A[A


  4%|▍         | 35/782 [00:02<00:57, 12.

2021-07-31 10:15:12.367157 Epoch 35, Training loss 1.2227195430442195





  1%|          | 4/782 [00:00<00:59, 13.09it/s][A[A[A


  1%|          | 6/782 [00:00<00:58, 13.17it/s][A[A[A


  1%|          | 8/782 [00:00<00:59, 13.07it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:58, 13.11it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:58, 13.13it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:58, 13.15it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:58, 12.98it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.03it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 12.93it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 12.94it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 13.01it/s][A[A[A


  3%|▎         | 26/782 [00:01<00:58, 12.99it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.03it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.07it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 13.10it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:56, 13.12it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:56, 13

2021-07-31 10:16:13.648477 Epoch 36, Training loss 1.202584474440426





  1%|          | 4/782 [00:00<01:00, 12.92it/s][A[A[A


  1%|          | 6/782 [00:00<00:59, 12.95it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.87it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.94it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.81it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.86it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.95it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:58, 13.02it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 12.97it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 13.00it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 12.98it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:57, 13.05it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:57, 13.05it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.00it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 13.05it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 13.07it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 13

2021-07-31 10:17:15.256217 Epoch 37, Training loss 1.1953826761611588





  1%|          | 4/782 [00:00<01:02, 12.45it/s][A[A[A


  1%|          | 6/782 [00:00<01:01, 12.64it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.73it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.87it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.87it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.99it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.94it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.90it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.85it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.87it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 12.88it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.92it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.96it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 12.98it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.87it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 12.95it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 13

2021-07-31 10:18:16.688050 Epoch 38, Training loss 1.1909745536039553





  1%|          | 4/782 [00:00<00:59, 13.01it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.75it/s][A[A[A


  1%|          | 8/782 [00:00<01:02, 12.42it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:01, 12.50it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:01, 12.62it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:00, 12.78it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.82it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.82it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.84it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.83it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:59, 12.82it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.84it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.98it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.95it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 12.96it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.90it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 10:19:18.238259 Epoch 39, Training loss 1.191630472917386





  1%|          | 4/782 [00:00<01:00, 12.84it/s][A[A[A


  1%|          | 6/782 [00:00<01:01, 12.69it/s][A[A[A


  1%|          | 8/782 [00:00<01:01, 12.64it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:02, 12.42it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:01, 12.42it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:01, 12.43it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:01, 12.47it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:01, 12.48it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:01, 12.39it/s][A[A[A


  3%|▎         | 22/782 [00:01<01:01, 12.41it/s][A[A[A


  3%|▎         | 24/782 [00:01<01:00, 12.51it/s][A[A[A


  3%|▎         | 26/782 [00:02<01:01, 12.37it/s][A[A[A


  4%|▎         | 28/782 [00:02<01:00, 12.50it/s][A[A[A


  4%|▍         | 30/782 [00:02<01:00, 12.42it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:59, 12.62it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:59, 12.51it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:59, 12

2021-07-31 10:20:20.036435 Epoch 40, Training loss 1.1698782696291004





  1%|          | 4/782 [00:00<01:00, 12.77it/s][A[A[A


  1%|          | 6/782 [00:00<00:59, 12.94it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.86it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:00, 12.79it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:00, 12.80it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.90it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.81it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:00, 12.73it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:00, 12.64it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.77it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 12.88it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.94it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.98it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:57, 13.02it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 13.00it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.87it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 10:21:22.001545 Epoch 41, Training loss 1.187865479065634





  1%|          | 4/782 [00:00<01:00, 12.78it/s][A[A[A


  1%|          | 6/782 [00:00<01:01, 12.68it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.75it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.88it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.84it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.87it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.87it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.88it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.76it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 12.89it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 12.91it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:59, 12.72it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.83it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:59, 12.73it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.78it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.87it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 10:22:24.060950 Epoch 42, Training loss 1.1687050925953615





  1%|          | 4/782 [00:00<01:01, 12.67it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.73it/s][A[A[A


  1%|          | 8/782 [00:00<01:01, 12.68it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:01, 12.54it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:01, 12.52it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:01, 12.41it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:00, 12.60it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.82it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:00, 12.61it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.69it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:59, 12.64it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:59, 12.77it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.79it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.81it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:57, 12.96it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 12.95it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 13

2021-07-31 10:23:25.671837 Epoch 43, Training loss 1.1653795541095002





  1%|          | 4/782 [00:00<00:59, 13.10it/s][A[A[A


  1%|          | 6/782 [00:00<00:59, 13.02it/s][A[A[A


  1%|          | 8/782 [00:00<00:59, 12.99it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.92it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 13.00it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.92it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.95it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.94it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 12.97it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 13.06it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 13.04it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 13.00it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.92it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.87it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.86it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 12.95it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:58, 12

2021-07-31 10:24:27.713594 Epoch 44, Training loss 1.1607938613123296





  1%|          | 4/782 [00:00<01:01, 12.74it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.75it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.85it/s][A[A[A


  1%|▏         | 10/782 [00:00<00:59, 12.93it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 13.01it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.96it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:59, 12.82it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:00, 12.70it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:59, 12.76it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:59, 12.78it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:59, 12.79it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:59, 12.78it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:59, 12.74it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.81it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.91it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 12.98it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:58, 12

2021-07-31 10:25:30.013674 Epoch 45, Training loss 1.148737202138852





  1%|          | 4/782 [00:00<01:01, 12.72it/s][A[A[A


  1%|          | 6/782 [00:00<01:01, 12.52it/s][A[A[A


  1%|          | 8/782 [00:00<01:02, 12.41it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:02, 12.42it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:02, 12.30it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:01, 12.43it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:01, 12.43it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:01, 12.52it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:01, 12.30it/s][A[A[A


  3%|▎         | 22/782 [00:01<01:01, 12.39it/s][A[A[A


  3%|▎         | 24/782 [00:01<01:01, 12.40it/s][A[A[A


  3%|▎         | 26/782 [00:02<01:00, 12.49it/s][A[A[A


  4%|▎         | 28/782 [00:02<01:00, 12.53it/s][A[A[A


  4%|▍         | 30/782 [00:02<01:00, 12.51it/s][A[A[A


  4%|▍         | 32/782 [00:02<01:01, 12.19it/s][A[A[A


  4%|▍         | 34/782 [00:02<01:00, 12.31it/s][A[A[A


  5%|▍         | 36/782 [00:02<01:01, 12

2021-07-31 10:26:31.804802 Epoch 46, Training loss 1.1517563604028023





  1%|          | 4/782 [00:00<01:02, 12.46it/s][A[A[A


  1%|          | 6/782 [00:00<01:02, 12.51it/s][A[A[A


  1%|          | 8/782 [00:00<01:02, 12.45it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:01, 12.48it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:02, 12.42it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:01, 12.42it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:02, 12.24it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:01, 12.38it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:01, 12.41it/s][A[A[A


  3%|▎         | 22/782 [00:01<01:01, 12.37it/s][A[A[A


  3%|▎         | 24/782 [00:01<01:01, 12.42it/s][A[A[A


  3%|▎         | 26/782 [00:02<01:00, 12.56it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:59, 12.65it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:59, 12.66it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:59, 12.65it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:58, 12.70it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:59, 12

2021-07-31 10:27:33.499079 Epoch 47, Training loss 1.1418350242897677





  1%|          | 4/782 [00:00<01:00, 12.76it/s][A[A[A


  1%|          | 6/782 [00:00<01:01, 12.69it/s][A[A[A


  1%|          | 8/782 [00:00<01:01, 12.51it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:02, 12.35it/s][A[A[A


  2%|▏         | 12/782 [00:00<01:01, 12.45it/s][A[A[A


  2%|▏         | 14/782 [00:01<01:02, 12.23it/s][A[A[A


  2%|▏         | 16/782 [00:01<01:02, 12.23it/s][A[A[A


  2%|▏         | 18/782 [00:01<01:02, 12.19it/s][A[A[A


  3%|▎         | 20/782 [00:01<01:02, 12.23it/s][A[A[A


  3%|▎         | 22/782 [00:01<01:01, 12.34it/s][A[A[A


  3%|▎         | 24/782 [00:01<01:01, 12.31it/s][A[A[A


  3%|▎         | 26/782 [00:02<01:01, 12.33it/s][A[A[A


  4%|▎         | 28/782 [00:02<01:01, 12.32it/s][A[A[A


  4%|▍         | 30/782 [00:02<01:01, 12.22it/s][A[A[A


  4%|▍         | 32/782 [00:02<01:00, 12.30it/s][A[A[A


  4%|▍         | 34/782 [00:02<01:00, 12.44it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:59, 12

2021-07-31 10:28:35.474242 Epoch 48, Training loss 1.140650984957395





  1%|          | 4/782 [00:00<01:00, 12.86it/s][A[A[A


  1%|          | 6/782 [00:00<01:00, 12.74it/s][A[A[A


  1%|          | 8/782 [00:00<01:00, 12.77it/s][A[A[A


  1%|▏         | 10/782 [00:00<01:00, 12.76it/s][A[A[A


  2%|▏         | 12/782 [00:00<00:59, 12.88it/s][A[A[A


  2%|▏         | 14/782 [00:01<00:59, 12.96it/s][A[A[A


  2%|▏         | 16/782 [00:01<00:58, 13.01it/s][A[A[A


  2%|▏         | 18/782 [00:01<00:59, 12.93it/s][A[A[A


  3%|▎         | 20/782 [00:01<00:58, 13.02it/s][A[A[A


  3%|▎         | 22/782 [00:01<00:58, 12.91it/s][A[A[A


  3%|▎         | 24/782 [00:01<00:58, 12.93it/s][A[A[A


  3%|▎         | 26/782 [00:02<00:58, 12.87it/s][A[A[A


  4%|▎         | 28/782 [00:02<00:58, 12.96it/s][A[A[A


  4%|▍         | 30/782 [00:02<00:58, 12.79it/s][A[A[A


  4%|▍         | 32/782 [00:02<00:58, 12.91it/s][A[A[A


  4%|▍         | 34/782 [00:02<00:57, 13.02it/s][A[A[A


  5%|▍         | 36/782 [00:02<00:57, 12

2021-07-31 10:29:37.376528 Epoch 49, Training loss 1.1335980078143537





In [25]:
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64,shuffle=True)
validate(model , train_loader, test_loader)

Accuracy on training set: 60.61
Accuracy on test set: 57.96


In [29]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, dampening=0, weight_decay=0.0001, nesterov=False)
training_model( 10 , optimizer)

2021-07-31 10:33:19.499311 Epoch 0, Training loss 0.8806894733320416
2021-07-31 10:34:14.135663 Epoch 1, Training loss 0.8678132188899438
2021-07-31 10:35:08.933007 Epoch 2, Training loss 0.8605710213903881
2021-07-31 10:36:03.608535 Epoch 3, Training loss 0.8538148628781214
2021-07-31 10:36:58.712683 Epoch 4, Training loss 0.8460716751911451
2021-07-31 10:37:53.634099 Epoch 5, Training loss 0.8394193684353548
2021-07-31 10:38:48.383479 Epoch 6, Training loss 0.834183173258896
2021-07-31 10:39:43.000448 Epoch 7, Training loss 0.8284038355969408
2021-07-31 10:40:37.768787 Epoch 8, Training loss 0.8224023816835545
2021-07-31 10:41:32.802476 Epoch 9, Training loss 0.8173945073581412


In [30]:
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64,shuffle=True)
validate(model , train_loader, test_loader)

Accuracy on training set: 72.16
Accuracy on test set: 64.09


In [31]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, dampening=0, weight_decay=0.0001, nesterov=False)
training_model( 10 , optimizer)

2021-07-31 10:42:42.959965 Epoch 0, Training loss 0.7722171797319446
2021-07-31 10:43:37.484371 Epoch 1, Training loss 0.7657790691651347
2021-07-31 10:44:32.001713 Epoch 2, Training loss 0.7626425309483048
2021-07-31 10:45:26.597542 Epoch 3, Training loss 0.7608341263688129
2021-07-31 10:46:21.359219 Epoch 4, Training loss 0.758834692065978
2021-07-31 10:47:16.093007 Epoch 5, Training loss 0.757974858212349
2021-07-31 10:48:10.782421 Epoch 6, Training loss 0.7567204834174013
2021-07-31 10:49:05.556769 Epoch 7, Training loss 0.7555176828752088
2021-07-31 10:50:00.262458 Epoch 8, Training loss 0.7547483785682932
2021-07-31 10:50:54.829985 Epoch 9, Training loss 0.7530825112558082


In [32]:
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64,shuffle=True)
validate(model , train_loader, test_loader)

Accuracy on training set: 73.97
Accuracy on test set: 65.16


In [33]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.8, dampening=0, weight_decay=0.0001, nesterov=False)
training_model( 10 , optimizer)

2021-07-31 11:01:35.822132 Epoch 0, Training loss 0.7488747414420632
2021-07-31 11:02:29.180215 Epoch 1, Training loss 0.7480975735522902
2021-07-31 11:03:23.155372 Epoch 2, Training loss 0.7474077529919422
2021-07-31 11:04:17.366580 Epoch 3, Training loss 0.7472747687793448
2021-07-31 11:05:11.764520 Epoch 4, Training loss 0.7467154222147544
2021-07-31 11:06:06.302157 Epoch 5, Training loss 0.7459837502759435
2021-07-31 11:07:00.903385 Epoch 6, Training loss 0.7452064703797441
2021-07-31 11:07:55.287631 Epoch 7, Training loss 0.7442965005212427
2021-07-31 11:08:50.150448 Epoch 8, Training loss 0.7441011720987232
2021-07-31 11:09:44.748619 Epoch 9, Training loss 0.7431781743950856


In [34]:
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64,shuffle=True)
validate(model , train_loader, test_loader)

Accuracy on training set: 74.19
Accuracy on test set: 65.25


In [35]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.8, dampening=0, weight_decay=0.0001, nesterov=False)
training_model( 20 , optimizer)

2021-07-31 11:12:55.760245 Epoch 0, Training loss 0.7432154070233445
2021-07-31 11:13:50.125382 Epoch 1, Training loss 0.7426144381237152
2021-07-31 11:14:45.218957 Epoch 2, Training loss 0.7420617205941159
2021-07-31 11:15:39.700393 Epoch 3, Training loss 0.7413884367403167
2021-07-31 11:16:34.309105 Epoch 4, Training loss 0.740531475342753
2021-07-31 11:17:29.116535 Epoch 5, Training loss 0.7401792288893629
2021-07-31 11:18:24.001422 Epoch 6, Training loss 0.7397601275569032
2021-07-31 11:19:18.782881 Epoch 7, Training loss 0.7389551340923894
2021-07-31 11:20:13.619610 Epoch 8, Training loss 0.7389602864642277
2021-07-31 11:21:08.350176 Epoch 9, Training loss 0.7381633300610515
2021-07-31 11:22:03.049971 Epoch 10, Training loss 0.7376925711284208
2021-07-31 11:22:57.653615 Epoch 11, Training loss 0.7369042082744486
2021-07-31 11:23:52.150822 Epoch 12, Training loss 0.7364937430986053
2021-07-31 11:24:46.654590 Epoch 13, Training loss 0.7358559697409115
2021-07-31 11:25:41.701141 Epoc

In [36]:
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64,shuffle=True)
validate(model , train_loader, test_loader)

Accuracy on training set: 74.59
Accuracy on test set: 64.92


In [37]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.8, dampening=0, weight_decay=0.0001, nesterov=False)
training_model( 20 , optimizer)

2021-07-31 11:31:52.982186 Epoch 0, Training loss 0.7326835971659101
2021-07-31 11:32:47.623086 Epoch 1, Training loss 0.7312020422781215
2021-07-31 11:33:42.724478 Epoch 2, Training loss 0.7308438120748076
2021-07-31 11:34:37.414813 Epoch 3, Training loss 0.7305300707935982
2021-07-31 11:35:32.352589 Epoch 4, Training loss 0.7299969063314331
2021-07-31 11:36:27.331805 Epoch 5, Training loss 0.7293835193528544
2021-07-31 11:37:22.258381 Epoch 6, Training loss 0.7290311869605423
2021-07-31 11:38:17.253761 Epoch 7, Training loss 0.7285633188912936
2021-07-31 11:39:12.206773 Epoch 8, Training loss 0.7285770720914196
2021-07-31 11:40:07.243699 Epoch 9, Training loss 0.7276047475426398
2021-07-31 11:41:02.098571 Epoch 10, Training loss 0.72705215512944
2021-07-31 11:41:57.229358 Epoch 11, Training loss 0.7268512885817482
2021-07-31 11:42:52.204254 Epoch 12, Training loss 0.7256419078620804
2021-07-31 11:43:47.131559 Epoch 13, Training loss 0.7252497392542222
2021-07-31 11:44:41.930367 Epoch

In [38]:
validate(model , train_loader, test_loader)

Accuracy on training set: 75.01
Accuracy on test set: 64.93


In [40]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, dampening=0, weight_decay=0.0001, nesterov=False)
training_model( 10 , optimizer)

2021-07-31 11:53:59.914490 Epoch 0, Training loss 0.735098236204718
2021-07-31 11:54:54.665690 Epoch 1, Training loss 0.7237203684837922
2021-07-31 11:55:49.349346 Epoch 2, Training loss 0.7189526903583571
2021-07-31 11:56:44.011045 Epoch 3, Training loss 0.7168787199136851
2021-07-31 11:57:39.131119 Epoch 4, Training loss 0.7147903126828811
2021-07-31 11:58:33.721194 Epoch 5, Training loss 0.712586276595245
2021-07-31 11:59:28.187270 Epoch 6, Training loss 0.7116443433267686
2021-07-31 12:00:22.884880 Epoch 7, Training loss 0.7100490050395126
2021-07-31 12:01:17.655767 Epoch 8, Training loss 0.7086112354798695
2021-07-31 12:02:12.349641 Epoch 9, Training loss 0.7076973429192668


In [41]:
validate(model , train_loader, test_loader)

Accuracy on training set: 75.73
Accuracy on test set: 65.14


In [42]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, dampening=0, weight_decay=0.0001, nesterov=False)
training_model( 50 , optimizer)

2021-07-31 12:06:47.574355 Epoch 0, Training loss 0.7068178419338162
2021-07-31 12:07:41.982171 Epoch 1, Training loss 0.7048152410008414
2021-07-31 12:08:37.039802 Epoch 2, Training loss 0.703881263809131
2021-07-31 12:09:31.551607 Epoch 3, Training loss 0.7028741403232754
2021-07-31 12:10:26.130116 Epoch 4, Training loss 0.7016396247746085
2021-07-31 12:11:20.779140 Epoch 5, Training loss 0.7004843196067054
2021-07-31 12:12:15.463624 Epoch 6, Training loss 0.6992410527699439
2021-07-31 12:13:10.118443 Epoch 7, Training loss 0.6983920137595643
2021-07-31 12:14:04.825047 Epoch 8, Training loss 0.6973797036025225
2021-07-31 12:14:59.812713 Epoch 9, Training loss 0.6967694298614322
2021-07-31 12:15:54.571387 Epoch 10, Training loss 0.6959011823777348
2021-07-31 12:16:49.172094 Epoch 11, Training loss 0.6939544239464928
2021-07-31 12:17:44.115604 Epoch 12, Training loss 0.6933541709885878
2021-07-31 12:18:38.860307 Epoch 13, Training loss 0.6921670634652037
2021-07-31 12:19:33.877722 Epoc

In [43]:
validate(model , train_loader, test_loader)

Accuracy on training set: 77.64
Accuracy on test set: 64.42


In [44]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, dampening=0, weight_decay=0.001, nesterov=False)
training_model( 50 , optimizer)

2021-07-31 12:55:04.118524 Epoch 0, Training loss 0.6584132027519328
2021-07-31 12:55:58.736472 Epoch 1, Training loss 0.6604305086157206
2021-07-31 12:56:53.506665 Epoch 2, Training loss 0.6621903101806446
2021-07-31 12:57:48.657553 Epoch 3, Training loss 0.6645624916571791
2021-07-31 12:58:43.534562 Epoch 4, Training loss 0.6659140501485761
2021-07-31 12:59:38.284903 Epoch 5, Training loss 0.6677107137944692
2021-07-31 13:00:33.088219 Epoch 6, Training loss 0.669331663785993


KeyboardInterrupt: ignored

In [45]:
validate(model , train_loader, test_loader)

Accuracy on training set: 77.17
Accuracy on test set: 64.18


In [46]:
from torchsummary import summary
summary(model,(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 16, 16]           9,472
       BatchNorm2d-2           [-1, 64, 16, 16]             128
              ReLU-3           [-1, 64, 16, 16]               0
         MaxPool2d-4             [-1, 64, 8, 8]               0
            Conv2d-5             [-1, 64, 8, 8]           4,160
       BatchNorm2d-6             [-1, 64, 8, 8]             128
            Conv2d-7             [-1, 64, 8, 8]          36,928
       BatchNorm2d-8             [-1, 64, 8, 8]             128
            Conv2d-9            [-1, 256, 8, 8]          16,640
      BatchNorm2d-10            [-1, 256, 8, 8]             512
           Conv2d-11            [-1, 256, 8, 8]          16,640
      BatchNorm2d-12            [-1, 256, 8, 8]             512
             ReLU-13            [-1, 256, 8, 8]               0
            block-14            [-1, 25