In [1]:
# !pip install cloud-tpu-client==0.10 torch==1.13.0 https://storage.googleapis.com/tpu-pytorch/wheels/colab/torch_xla-1.13-cp38-cp38-linux_x86_64.whl

In [2]:
import numpy as np
from tqdm import tqdm

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim

## Torchvision
import torchvision
from torchvision.datasets import CIFAR10
from torchvision import transforms

In [4]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
# device = xm.xla_device()
print("Device:", device)

Device: cuda:0


In [5]:
# transformers definition
test_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
                                     ])
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                      transforms.RandomResizedCrop((32,32),scale=(0.8,1.0),ratio=(0.9,1.1)),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
                                     ])

# CIFAR10 dataset loading

train_set = CIFAR10(root="./data", train=True, transform=train_transform, download=True)
test_set = CIFAR10(root="./data", train=False, transform=test_transform, download=True)

# Dataloaders creation
batch_size = 200
train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=batch_size, shuffle=True, num_workers=4
)
test_loader = torch.utils.data.DataLoader(
    test_set, batch_size=batch_size, shuffle=False, num_workers=4
)

# classes
classes = (
    "plane",
    "car",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
def img_to_patch(x, patch_size, flatten_channels=True):
    """
    x is a torch.Tensor representing an image with dimensions [Batch size, Number of channels, Height, Width]. 
    The patch_size is the number of pixels per dimension of the patches 
    flatten_channels is a boolean indicating whether the patches should be returned 
    as a flattened feature vector or as an image grid."
    """
    B, C, H, W = x.shape
    x = x.reshape(B, C, H//patch_size, patch_size, W//patch_size, patch_size)
    # [B, H', W', C, p_H, p_W] then [B, H'*W', C, p_H, p_W]
    x = x.permute(0, 2, 4, 1, 3, 5).flatten(1,2)            
    if flatten_channels:
        # [B, H'*W', C*p_H*p_W]
        x = x.flatten(2,4)          
    return x

In [7]:
class AttentionBlock(nn.Module):

    def __init__(self, embed_dim, hidden_dim, num_heads, dropout=0.0):
        """
        Inputs:
            The embed_dim is the dimensionality of the input and attention feature vectors. 
            The hidden_dim is the dimensionality of the hidden layer in the feed-forward network, 
            The num_heads is the number of heads used in the Multi-Head Attention block,
            the dropout is the amount of dropout applied in the feed-forward network
        """
        super().__init__()

        self.layer_norm_1 = nn.LayerNorm(embed_dim)
        self.attn = nn.MultiheadAttention(embed_dim, num_heads,
                                          dropout=dropout)
        self.layer_norm_2 = nn.LayerNorm(embed_dim)
        self.linear = nn.Sequential(
            nn.Linear(embed_dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, embed_dim),
            nn.Dropout(dropout)
        )


    def forward(self, x):
        inp_x = self.layer_norm_1(x)
        x = x + self.attn(inp_x, inp_x, inp_x)[0]
        x = x + self.linear(self.layer_norm_2(x))
        return x

In [8]:
class VisionTransformer(nn.Module):

    def __init__(self, embed_dim, hidden_dim, num_channels, num_heads, num_layers, num_classes, patch_size, num_patches, dropout=0.0):
        """
        Inputs:
            embed_dim - Dimensionality of the input feature vectors to the Transformer
            hidden_dim - Dimensionality of the hidden layer in the feed-forward networks
                         within the Transformer
            num_channels - Number of channels of the input (3 for RGB)
            num_heads - Number of heads to use in the Multi-Head Attention block
            num_layers - Number of layers to use in the Transformer
            num_classes - Number of classes to predict
            patch_size - Number of pixels that the patches have per dimension
            num_patches - Maximum number of patches an image can have
            dropout - Amount of dropout to apply in the feed-forward network and
                      on the input encoding
        """
        super().__init__()

        self.patch_size = patch_size

        # define layers and nets
        self.input_layer = nn.Linear(num_channels*(patch_size**2), embed_dim)
        self.transformer = nn.Sequential(*[AttentionBlock(embed_dim, hidden_dim, num_heads, dropout=dropout) for _ in range(num_layers)])
        self.mlp_head = nn.Sequential(
            nn.LayerNorm(embed_dim),
            nn.Linear(embed_dim, num_classes)
        )
        self.dropout = nn.Dropout(dropout)

        # Parameters/Embeddings
        self.cls_token = nn.Parameter(torch.randn(1,1,embed_dim))
        self.pos_embedding = nn.Parameter(torch.randn(1,1+num_patches,embed_dim))


    def forward(self, x):
        # input preprocessing
        x = img_to_patch(x, self.patch_size)
        B, T, _ = x.shape
        x = self.input_layer(x)

        # CLS token and positional encoding are added
        cls_token = self.cls_token.repeat(B, 1, 1)
        x = torch.cat([cls_token, x], dim=1)
        x = x + self.pos_embedding[:,:T+1]

        # transformer application
        x = self.dropout(x)
        x = x.transpose(0, 1)
        x = self.transformer(x)

        # classification prediction
        cls = x[0]
        out = self.mlp_head(cls)
        return out

In [9]:
#define parameters
#in this case, the patch size, or length of the input sequences fed into the Transformer model, 
#was tested at values of 2, 4, and 8. A smaller patch size allows the Transformer to model more 
#complex functions but also requires more computation time due to its quadratic memory usage in the attention layer,
#and can make the task more difficult by requiring the model to learn which patches are nearby and which are 
#far away. The best performance was achieved with a patch size of 4. The embedding and hidden dimensionality
#of the Transformer were chosen to be 256 and 512, respectively, with the query-key sizes in the multi-head 
#attention layers being considered to ensure that the attention maps were not too restricted.
#dropout value of 0.2 was used to prevent overfitting.
model_kwargs={'embed_dim': 256,
'hidden_dim': 512,
'num_heads': 8,
'num_layers': 6,
'patch_size': 4,
'num_channels': 3,
'num_patches': 64,
'num_classes': 10,
'dropout': 0.2}

In [10]:
model = VisionTransformer(**model_kwargs).to(device)

In [11]:
from tqdm import tqdm

In [12]:
# AdamW optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100,150], gamma=0.1)
# save loss
criterion = nn.CrossEntropyLoss().to(device)

# Training loop
for epoch in range(200):
    running_loss = 0.0
    for i, (images, labels) in tqdm(enumerate(train_loader)):
        # Clear the gradients
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 50 == 49:  # Print every 50 mini-batches
            print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / 50))
            running_loss = 0.0
    scheduler.step()

print("Finished Training")

52it [00:03, 23.21it/s]

[1,    50] loss: 2.063


103it [00:05, 23.42it/s]

[1,   100] loss: 1.853


154it [00:08, 23.48it/s]

[1,   150] loss: 1.714


202it [00:10, 23.43it/s]

[1,   200] loss: 1.644


250it [00:12, 20.47it/s]

[1,   250] loss: 1.566



52it [00:02, 23.60it/s]

[2,    50] loss: 1.512


103it [00:04, 23.46it/s]

[2,   100] loss: 1.473


154it [00:06, 23.28it/s]

[2,   150] loss: 1.460


202it [00:08, 23.58it/s]

[2,   200] loss: 1.414


250it [00:10, 23.12it/s]

[2,   250] loss: 1.412



52it [00:02, 23.52it/s]

[3,    50] loss: 1.372


103it [00:04, 23.43it/s]

[3,   100] loss: 1.345


154it [00:06, 23.54it/s]

[3,   150] loss: 1.337


202it [00:08, 23.52it/s]

[3,   200] loss: 1.319


250it [00:10, 23.21it/s]

[3,   250] loss: 1.317



52it [00:02, 23.32it/s]

[4,    50] loss: 1.294


103it [00:04, 23.43it/s]

[4,   100] loss: 1.272


154it [00:06, 23.27it/s]

[4,   150] loss: 1.263


202it [00:08, 23.32it/s]

[4,   200] loss: 1.233


250it [00:10, 23.12it/s]

[4,   250] loss: 1.250



52it [00:02, 22.96it/s]

[5,    50] loss: 1.223


103it [00:04, 23.49it/s]

[5,   100] loss: 1.224


154it [00:06, 23.41it/s]

[5,   150] loss: 1.203


202it [00:08, 23.25it/s]

[5,   200] loss: 1.186


250it [00:10, 23.07it/s]

[5,   250] loss: 1.210



52it [00:02, 23.38it/s]

[6,    50] loss: 1.173


103it [00:04, 23.16it/s]

[6,   100] loss: 1.163


154it [00:06, 23.42it/s]

[6,   150] loss: 1.143


202it [00:08, 23.29it/s]

[6,   200] loss: 1.137


250it [00:10, 22.99it/s]

[6,   250] loss: 1.152



52it [00:02, 23.47it/s]

[7,    50] loss: 1.120


103it [00:04, 23.42it/s]

[7,   100] loss: 1.108


154it [00:06, 23.48it/s]

[7,   150] loss: 1.117


202it [00:08, 23.42it/s]

[7,   200] loss: 1.126


250it [00:10, 23.02it/s]

[7,   250] loss: 1.117



52it [00:02, 23.35it/s]

[8,    50] loss: 1.085


103it [00:04, 23.45it/s]

[8,   100] loss: 1.061


154it [00:06, 23.37it/s]

[8,   150] loss: 1.101


202it [00:08, 23.33it/s]

[8,   200] loss: 1.085


250it [00:10, 23.00it/s]

[8,   250] loss: 1.078



52it [00:02, 23.40it/s]

[9,    50] loss: 1.038


103it [00:04, 23.50it/s]

[9,   100] loss: 1.053


154it [00:06, 23.34it/s]

[9,   150] loss: 1.059


202it [00:08, 23.36it/s]

[9,   200] loss: 1.064


250it [00:10, 23.05it/s]

[9,   250] loss: 1.034



52it [00:02, 23.46it/s]

[10,    50] loss: 1.013


103it [00:04, 23.51it/s]

[10,   100] loss: 1.032


154it [00:06, 23.41it/s]

[10,   150] loss: 1.021


202it [00:08, 23.25it/s]

[10,   200] loss: 1.027


250it [00:10, 23.05it/s]

[10,   250] loss: 1.027



52it [00:02, 23.42it/s]

[11,    50] loss: 0.985


103it [00:04, 23.40it/s]

[11,   100] loss: 0.997


154it [00:06, 23.49it/s]

[11,   150] loss: 1.003


202it [00:08, 23.21it/s]

[11,   200] loss: 0.999


250it [00:10, 23.02it/s]

[11,   250] loss: 0.974



52it [00:02, 23.40it/s]

[12,    50] loss: 0.968


103it [00:04, 23.39it/s]

[12,   100] loss: 0.968


154it [00:06, 23.41it/s]

[12,   150] loss: 0.962


202it [00:08, 23.41it/s]

[12,   200] loss: 0.970


250it [00:10, 23.04it/s]

[12,   250] loss: 0.978



52it [00:02, 23.05it/s]

[13,    50] loss: 0.959


103it [00:04, 23.19it/s]

[13,   100] loss: 0.930


154it [00:06, 23.24it/s]

[13,   150] loss: 0.930


202it [00:08, 23.30it/s]

[13,   200] loss: 0.947


250it [00:10, 23.01it/s]

[13,   250] loss: 0.953



52it [00:02, 23.36it/s]

[14,    50] loss: 0.929


103it [00:04, 23.27it/s]

[14,   100] loss: 0.919


154it [00:06, 23.42it/s]

[14,   150] loss: 0.918


202it [00:08, 23.25it/s]

[14,   200] loss: 0.924


250it [00:10, 23.10it/s]

[14,   250] loss: 0.929



52it [00:02, 23.33it/s]

[15,    50] loss: 0.895


103it [00:04, 23.48it/s]

[15,   100] loss: 0.913


154it [00:06, 23.33it/s]

[15,   150] loss: 0.888


202it [00:08, 23.45it/s]

[15,   200] loss: 0.910


250it [00:10, 23.06it/s]

[15,   250] loss: 0.921



52it [00:02, 23.38it/s]

[16,    50] loss: 0.884


103it [00:04, 23.14it/s]

[16,   100] loss: 0.893


154it [00:06, 23.27it/s]

[16,   150] loss: 0.897


202it [00:08, 23.41it/s]

[16,   200] loss: 0.869


250it [00:10, 22.99it/s]

[16,   250] loss: 0.893



52it [00:02, 23.32it/s]

[17,    50] loss: 0.836


103it [00:04, 23.31it/s]

[17,   100] loss: 0.857


154it [00:06, 23.30it/s]

[17,   150] loss: 0.864


202it [00:08, 23.33it/s]

[17,   200] loss: 0.888


250it [00:10, 23.03it/s]

[17,   250] loss: 0.879



52it [00:02, 23.38it/s]

[18,    50] loss: 0.835


103it [00:04, 23.32it/s]

[18,   100] loss: 0.859


154it [00:06, 23.41it/s]

[18,   150] loss: 0.836


202it [00:08, 23.44it/s]

[18,   200] loss: 0.854


250it [00:10, 23.02it/s]

[18,   250] loss: 0.839



52it [00:02, 23.49it/s]

[19,    50] loss: 0.820


103it [00:04, 23.01it/s]

[19,   100] loss: 0.820


154it [00:06, 23.42it/s]

[19,   150] loss: 0.847


202it [00:08, 23.39it/s]

[19,   200] loss: 0.839


250it [00:10, 23.07it/s]

[19,   250] loss: 0.830



52it [00:02, 23.43it/s]

[20,    50] loss: 0.811


103it [00:04, 23.30it/s]

[20,   100] loss: 0.811


154it [00:06, 23.37it/s]

[20,   150] loss: 0.810


202it [00:08, 23.19it/s]

[20,   200] loss: 0.806


250it [00:10, 23.03it/s]

[20,   250] loss: 0.802



52it [00:02, 23.42it/s]

[21,    50] loss: 0.788


103it [00:04, 23.15it/s]

[21,   100] loss: 0.786


154it [00:06, 23.40it/s]

[21,   150] loss: 0.798


202it [00:08, 23.48it/s]

[21,   200] loss: 0.786


250it [00:10, 23.02it/s]

[21,   250] loss: 0.809



52it [00:02, 23.02it/s]

[22,    50] loss: 0.783


103it [00:04, 22.75it/s]

[22,   100] loss: 0.761


154it [00:06, 23.22it/s]

[22,   150] loss: 0.764


202it [00:08, 23.43it/s]

[22,   200] loss: 0.772


250it [00:10, 22.93it/s]

[22,   250] loss: 0.778



52it [00:02, 23.01it/s]

[23,    50] loss: 0.733


103it [00:04, 23.31it/s]

[23,   100] loss: 0.780


154it [00:06, 23.46it/s]

[23,   150] loss: 0.739


202it [00:08, 23.45it/s]

[23,   200] loss: 0.763


250it [00:10, 23.07it/s]

[23,   250] loss: 0.755



52it [00:02, 23.45it/s]

[24,    50] loss: 0.750


103it [00:04, 23.33it/s]

[24,   100] loss: 0.750


154it [00:06, 23.41it/s]

[24,   150] loss: 0.753


202it [00:08, 23.41it/s]

[24,   200] loss: 0.736


250it [00:10, 23.08it/s]

[24,   250] loss: 0.760



52it [00:02, 23.46it/s]

[25,    50] loss: 0.689


103it [00:04, 23.51it/s]

[25,   100] loss: 0.742


154it [00:06, 23.44it/s]

[25,   150] loss: 0.732


202it [00:08, 23.37it/s]

[25,   200] loss: 0.747


250it [00:10, 23.11it/s]

[25,   250] loss: 0.739



52it [00:02, 23.47it/s]

[26,    50] loss: 0.692


103it [00:04, 23.29it/s]

[26,   100] loss: 0.705


154it [00:06, 23.24it/s]

[26,   150] loss: 0.727


202it [00:08, 23.57it/s]

[26,   200] loss: 0.725


250it [00:10, 23.17it/s]

[26,   250] loss: 0.714



52it [00:02, 23.67it/s]

[27,    50] loss: 0.683


103it [00:04, 23.05it/s]

[27,   100] loss: 0.688


154it [00:06, 23.10it/s]

[27,   150] loss: 0.693


202it [00:08, 23.21it/s]

[27,   200] loss: 0.700


250it [00:10, 23.05it/s]

[27,   250] loss: 0.722



52it [00:02, 23.37it/s]

[28,    50] loss: 0.676


103it [00:04, 23.33it/s]

[28,   100] loss: 0.667


154it [00:06, 23.23it/s]

[28,   150] loss: 0.692


202it [00:08, 23.38it/s]

[28,   200] loss: 0.676


250it [00:10, 23.12it/s]

[28,   250] loss: 0.693



52it [00:02, 23.51it/s]

[29,    50] loss: 0.663


103it [00:04, 23.47it/s]

[29,   100] loss: 0.677


154it [00:06, 23.43it/s]

[29,   150] loss: 0.668


202it [00:08, 23.49it/s]

[29,   200] loss: 0.677


250it [00:10, 23.19it/s]

[29,   250] loss: 0.669



52it [00:02, 23.53it/s]

[30,    50] loss: 0.637


103it [00:04, 23.47it/s]

[30,   100] loss: 0.646


154it [00:06, 23.48it/s]

[30,   150] loss: 0.662


202it [00:08, 23.40it/s]

[30,   200] loss: 0.662


250it [00:10, 23.08it/s]

[30,   250] loss: 0.674



52it [00:02, 23.37it/s]

[31,    50] loss: 0.630


103it [00:04, 23.34it/s]

[31,   100] loss: 0.631


154it [00:06, 23.34it/s]

[31,   150] loss: 0.650


202it [00:08, 23.38it/s]

[31,   200] loss: 0.652


250it [00:10, 22.99it/s]

[31,   250] loss: 0.644



52it [00:02, 23.35it/s]

[32,    50] loss: 0.619


103it [00:04, 23.44it/s]

[32,   100] loss: 0.636


154it [00:06, 23.42it/s]

[32,   150] loss: 0.631


202it [00:08, 23.41it/s]

[32,   200] loss: 0.646


250it [00:10, 23.01it/s]

[32,   250] loss: 0.629



52it [00:02, 23.24it/s]

[33,    50] loss: 0.583


103it [00:04, 23.04it/s]

[33,   100] loss: 0.634


154it [00:06, 23.08it/s]

[33,   150] loss: 0.624


202it [00:08, 23.41it/s]

[33,   200] loss: 0.627


250it [00:10, 23.01it/s]

[33,   250] loss: 0.630



52it [00:02, 23.20it/s]

[34,    50] loss: 0.596


103it [00:04, 23.40it/s]

[34,   100] loss: 0.593


154it [00:06, 23.28it/s]

[34,   150] loss: 0.612


202it [00:08, 23.21it/s]

[34,   200] loss: 0.608


250it [00:10, 22.99it/s]

[34,   250] loss: 0.622



52it [00:02, 23.48it/s]

[35,    50] loss: 0.580


103it [00:04, 23.34it/s]

[35,   100] loss: 0.596


154it [00:06, 23.33it/s]

[35,   150] loss: 0.610


202it [00:08, 23.49it/s]

[35,   200] loss: 0.594


250it [00:10, 23.07it/s]

[35,   250] loss: 0.593



52it [00:02, 23.34it/s]

[36,    50] loss: 0.567


103it [00:04, 23.09it/s]

[36,   100] loss: 0.572


154it [00:06, 23.47it/s]

[36,   150] loss: 0.585


202it [00:08, 23.41it/s]

[36,   200] loss: 0.595


250it [00:10, 23.09it/s]

[36,   250] loss: 0.610



52it [00:02, 23.24it/s]

[37,    50] loss: 0.565


103it [00:04, 23.21it/s]

[37,   100] loss: 0.558


154it [00:06, 23.40it/s]

[37,   150] loss: 0.585


202it [00:08, 23.37it/s]

[37,   200] loss: 0.564


250it [00:10, 23.00it/s]

[37,   250] loss: 0.574



52it [00:02, 23.19it/s]

[38,    50] loss: 0.550


103it [00:04, 23.17it/s]

[38,   100] loss: 0.563


154it [00:06, 23.47it/s]

[38,   150] loss: 0.561


202it [00:08, 23.14it/s]

[38,   200] loss: 0.590


250it [00:10, 22.98it/s]

[38,   250] loss: 0.568



52it [00:02, 23.35it/s]

[39,    50] loss: 0.521


103it [00:04, 23.25it/s]

[39,   100] loss: 0.541


154it [00:06, 23.39it/s]

[39,   150] loss: 0.567


202it [00:08, 23.37it/s]

[39,   200] loss: 0.550


250it [00:10, 23.03it/s]

[39,   250] loss: 0.563



52it [00:02, 23.37it/s]

[40,    50] loss: 0.526


103it [00:04, 23.32it/s]

[40,   100] loss: 0.521


154it [00:06, 23.44it/s]

[40,   150] loss: 0.538


202it [00:08, 23.41it/s]

[40,   200] loss: 0.541


250it [00:10, 23.04it/s]

[40,   250] loss: 0.549



52it [00:02, 23.29it/s]

[41,    50] loss: 0.512


103it [00:04, 23.37it/s]

[41,   100] loss: 0.526


154it [00:06, 23.16it/s]

[41,   150] loss: 0.516


202it [00:08, 23.21it/s]

[41,   200] loss: 0.544


250it [00:10, 23.01it/s]

[41,   250] loss: 0.543



52it [00:02, 23.25it/s]

[42,    50] loss: 0.511


103it [00:04, 23.40it/s]

[42,   100] loss: 0.525


154it [00:06, 23.33it/s]

[42,   150] loss: 0.521


202it [00:08, 23.24it/s]

[42,   200] loss: 0.528


250it [00:10, 23.05it/s]

[42,   250] loss: 0.522



52it [00:02, 23.30it/s]

[43,    50] loss: 0.495


103it [00:04, 23.34it/s]

[43,   100] loss: 0.512


154it [00:06, 23.36it/s]

[43,   150] loss: 0.495


202it [00:08, 23.39it/s]

[43,   200] loss: 0.520


250it [00:10, 23.00it/s]

[43,   250] loss: 0.523



52it [00:02, 23.26it/s]

[44,    50] loss: 0.496


103it [00:04, 23.19it/s]

[44,   100] loss: 0.488


154it [00:06, 23.28it/s]

[44,   150] loss: 0.480


202it [00:08, 23.42it/s]

[44,   200] loss: 0.494


250it [00:10, 22.98it/s]

[44,   250] loss: 0.516



52it [00:02, 23.27it/s]

[45,    50] loss: 0.467


103it [00:04, 23.43it/s]

[45,   100] loss: 0.474


154it [00:06, 23.37it/s]

[45,   150] loss: 0.495


202it [00:08, 23.04it/s]

[45,   200] loss: 0.483


250it [00:10, 23.04it/s]

[45,   250] loss: 0.507



52it [00:02, 23.31it/s]

[46,    50] loss: 0.457


103it [00:04, 23.30it/s]

[46,   100] loss: 0.482


154it [00:06, 23.05it/s]

[46,   150] loss: 0.485


202it [00:08, 23.12it/s]

[46,   200] loss: 0.488


250it [00:10, 22.97it/s]

[46,   250] loss: 0.485



52it [00:02, 23.41it/s]

[47,    50] loss: 0.436


103it [00:04, 23.30it/s]

[47,   100] loss: 0.462


154it [00:06, 23.24it/s]

[47,   150] loss: 0.465


202it [00:08, 23.29it/s]

[47,   200] loss: 0.472


250it [00:10, 23.00it/s]

[47,   250] loss: 0.483



52it [00:02, 23.42it/s]

[48,    50] loss: 0.447


103it [00:04, 23.26it/s]

[48,   100] loss: 0.459


154it [00:06, 23.47it/s]

[48,   150] loss: 0.456


202it [00:08, 23.40it/s]

[48,   200] loss: 0.457


250it [00:10, 23.08it/s]

[48,   250] loss: 0.479



52it [00:02, 23.38it/s]

[49,    50] loss: 0.444


103it [00:04, 23.20it/s]

[49,   100] loss: 0.436


154it [00:06, 23.45it/s]

[49,   150] loss: 0.446


202it [00:08, 23.39it/s]

[49,   200] loss: 0.464


250it [00:10, 23.05it/s]

[49,   250] loss: 0.453



52it [00:02, 23.26it/s]

[50,    50] loss: 0.428


103it [00:04, 23.29it/s]

[50,   100] loss: 0.419


154it [00:06, 23.53it/s]

[50,   150] loss: 0.440


202it [00:08, 23.47it/s]

[50,   200] loss: 0.440


250it [00:10, 23.04it/s]

[50,   250] loss: 0.471



52it [00:02, 23.30it/s]

[51,    50] loss: 0.409


103it [00:04, 23.43it/s]

[51,   100] loss: 0.415


154it [00:06, 23.53it/s]

[51,   150] loss: 0.427


202it [00:08, 23.39it/s]

[51,   200] loss: 0.443


250it [00:10, 23.13it/s]

[51,   250] loss: 0.446



52it [00:02, 23.43it/s]

[52,    50] loss: 0.396


103it [00:04, 23.51it/s]

[52,   100] loss: 0.411


154it [00:06, 23.54it/s]

[52,   150] loss: 0.415


202it [00:08, 23.47it/s]

[52,   200] loss: 0.432


250it [00:10, 23.13it/s]

[52,   250] loss: 0.442



52it [00:02, 23.35it/s]

[53,    50] loss: 0.398


103it [00:04, 23.31it/s]

[53,   100] loss: 0.392


154it [00:06, 23.53it/s]

[53,   150] loss: 0.413


202it [00:08, 23.40it/s]

[53,   200] loss: 0.423


250it [00:10, 23.17it/s]

[53,   250] loss: 0.416



52it [00:02, 23.54it/s]

[54,    50] loss: 0.375


103it [00:04, 23.49it/s]

[54,   100] loss: 0.399


154it [00:06, 23.42it/s]

[54,   150] loss: 0.394


202it [00:08, 23.28it/s]

[54,   200] loss: 0.405


250it [00:10, 23.16it/s]

[54,   250] loss: 0.429



52it [00:02, 23.54it/s]

[55,    50] loss: 0.374


103it [00:04, 23.35it/s]

[55,   100] loss: 0.387


154it [00:06, 23.40it/s]

[55,   150] loss: 0.398


202it [00:08, 23.30it/s]

[55,   200] loss: 0.397


250it [00:10, 23.12it/s]

[55,   250] loss: 0.416



52it [00:02, 23.10it/s]

[56,    50] loss: 0.364


103it [00:04, 23.36it/s]

[56,   100] loss: 0.366


154it [00:06, 23.39it/s]

[56,   150] loss: 0.398


202it [00:08, 23.34it/s]

[56,   200] loss: 0.399


250it [00:10, 23.01it/s]

[56,   250] loss: 0.391



52it [00:02, 23.32it/s]

[57,    50] loss: 0.355


103it [00:04, 23.33it/s]

[57,   100] loss: 0.363


154it [00:06, 23.33it/s]

[57,   150] loss: 0.367


202it [00:08, 23.36it/s]

[57,   200] loss: 0.390


250it [00:10, 22.95it/s]

[57,   250] loss: 0.380



52it [00:02, 23.31it/s]

[58,    50] loss: 0.346


103it [00:04, 23.42it/s]

[58,   100] loss: 0.355


154it [00:06, 23.36it/s]

[58,   150] loss: 0.378


202it [00:08, 23.27it/s]

[58,   200] loss: 0.395


250it [00:10, 22.96it/s]

[58,   250] loss: 0.383



52it [00:02, 23.37it/s]

[59,    50] loss: 0.330


103it [00:04, 23.34it/s]

[59,   100] loss: 0.347


154it [00:06, 23.47it/s]

[59,   150] loss: 0.378


202it [00:08, 23.30it/s]

[59,   200] loss: 0.365


250it [00:10, 23.02it/s]

[59,   250] loss: 0.376



52it [00:02, 23.38it/s]

[60,    50] loss: 0.326


103it [00:04, 23.30it/s]

[60,   100] loss: 0.347


154it [00:06, 23.43it/s]

[60,   150] loss: 0.347


202it [00:08, 23.25it/s]

[60,   200] loss: 0.364


250it [00:10, 23.00it/s]

[60,   250] loss: 0.366



52it [00:02, 23.32it/s]

[61,    50] loss: 0.317


103it [00:04, 23.28it/s]

[61,   100] loss: 0.338


154it [00:06, 23.38it/s]

[61,   150] loss: 0.340


202it [00:08, 23.40it/s]

[61,   200] loss: 0.368


250it [00:10, 22.99it/s]

[61,   250] loss: 0.359



52it [00:02, 23.20it/s]

[62,    50] loss: 0.304


103it [00:04, 23.22it/s]

[62,   100] loss: 0.339


154it [00:06, 23.32it/s]

[62,   150] loss: 0.341


202it [00:08, 23.24it/s]

[62,   200] loss: 0.352


250it [00:10, 22.96it/s]

[62,   250] loss: 0.357



52it [00:02, 23.30it/s]

[63,    50] loss: 0.308


103it [00:04, 23.41it/s]

[63,   100] loss: 0.336


154it [00:06, 23.42it/s]

[63,   150] loss: 0.321


202it [00:08, 23.23it/s]

[63,   200] loss: 0.328


250it [00:10, 23.05it/s]

[63,   250] loss: 0.362



52it [00:02, 23.20it/s]

[64,    50] loss: 0.316


103it [00:04, 23.13it/s]

[64,   100] loss: 0.321


154it [00:06, 23.40it/s]

[64,   150] loss: 0.315


202it [00:08, 23.40it/s]

[64,   200] loss: 0.336


250it [00:10, 23.02it/s]

[64,   250] loss: 0.346



54it [00:02, 23.42it/s]

[65,    50] loss: 0.299


102it [00:04, 23.06it/s]

[65,   100] loss: 0.324


153it [00:06, 23.26it/s]

[65,   150] loss: 0.303


204it [00:08, 23.37it/s]

[65,   200] loss: 0.317


250it [00:10, 22.93it/s]

[65,   250] loss: 0.326



52it [00:02, 23.16it/s]

[66,    50] loss: 0.297


103it [00:04, 23.34it/s]

[66,   100] loss: 0.294


154it [00:06, 23.30it/s]

[66,   150] loss: 0.313


202it [00:08, 23.37it/s]

[66,   200] loss: 0.319


250it [00:10, 22.99it/s]

[66,   250] loss: 0.328



52it [00:02, 23.22it/s]

[67,    50] loss: 0.289


103it [00:04, 23.35it/s]

[67,   100] loss: 0.289


154it [00:06, 23.40it/s]

[67,   150] loss: 0.296


202it [00:08, 23.42it/s]

[67,   200] loss: 0.320


250it [00:10, 22.93it/s]

[67,   250] loss: 0.324



52it [00:02, 23.39it/s]

[68,    50] loss: 0.281


103it [00:04, 23.44it/s]

[68,   100] loss: 0.287


154it [00:06, 23.44it/s]

[68,   150] loss: 0.289


202it [00:08, 23.41it/s]

[68,   200] loss: 0.313


250it [00:10, 23.07it/s]

[68,   250] loss: 0.302



52it [00:02, 23.28it/s]

[69,    50] loss: 0.270


103it [00:04, 23.47it/s]

[69,   100] loss: 0.287


154it [00:06, 23.30it/s]

[69,   150] loss: 0.304


202it [00:08, 23.46it/s]

[69,   200] loss: 0.306


250it [00:10, 23.07it/s]

[69,   250] loss: 0.315



52it [00:02, 23.39it/s]

[70,    50] loss: 0.280


103it [00:04, 23.37it/s]

[70,   100] loss: 0.279


154it [00:06, 23.45it/s]

[70,   150] loss: 0.296


202it [00:08, 23.47it/s]

[70,   200] loss: 0.285


250it [00:10, 23.08it/s]

[70,   250] loss: 0.304



52it [00:02, 23.30it/s]

[71,    50] loss: 0.270


103it [00:04, 23.40it/s]

[71,   100] loss: 0.265


154it [00:06, 23.36it/s]

[71,   150] loss: 0.285


202it [00:08, 23.23it/s]

[71,   200] loss: 0.295


250it [00:10, 23.00it/s]

[71,   250] loss: 0.288



52it [00:02, 23.49it/s]

[72,    50] loss: 0.261


103it [00:04, 23.50it/s]

[72,   100] loss: 0.264


154it [00:06, 23.49it/s]

[72,   150] loss: 0.280


202it [00:08, 23.47it/s]

[72,   200] loss: 0.289


250it [00:10, 23.13it/s]

[72,   250] loss: 0.285



52it [00:02, 23.38it/s]

[73,    50] loss: 0.252


103it [00:04, 23.41it/s]

[73,   100] loss: 0.265


154it [00:06, 23.45it/s]

[73,   150] loss: 0.274


202it [00:08, 23.36it/s]

[73,   200] loss: 0.281


250it [00:10, 23.12it/s]

[73,   250] loss: 0.290



52it [00:02, 23.34it/s]

[74,    50] loss: 0.246


103it [00:04, 23.27it/s]

[74,   100] loss: 0.264


154it [00:06, 23.36it/s]

[74,   150] loss: 0.269


202it [00:08, 23.41it/s]

[74,   200] loss: 0.265


250it [00:10, 23.10it/s]

[74,   250] loss: 0.262



52it [00:02, 23.38it/s]

[75,    50] loss: 0.233


103it [00:04, 23.59it/s]

[75,   100] loss: 0.256


154it [00:06, 23.40it/s]

[75,   150] loss: 0.260


202it [00:08, 23.31it/s]

[75,   200] loss: 0.280


250it [00:10, 23.05it/s]

[75,   250] loss: 0.273



52it [00:02, 23.47it/s]

[76,    50] loss: 0.242


103it [00:04, 23.48it/s]

[76,   100] loss: 0.245


154it [00:06, 23.61it/s]

[76,   150] loss: 0.264


202it [00:08, 23.63it/s]

[76,   200] loss: 0.265


250it [00:10, 23.21it/s]

[76,   250] loss: 0.269



52it [00:02, 23.59it/s]

[77,    50] loss: 0.229


103it [00:04, 23.67it/s]

[77,   100] loss: 0.228


154it [00:06, 23.60it/s]

[77,   150] loss: 0.243


202it [00:08, 23.57it/s]

[77,   200] loss: 0.257


250it [00:10, 23.31it/s]

[77,   250] loss: 0.251



52it [00:02, 23.56it/s]

[78,    50] loss: 0.238


103it [00:04, 23.55it/s]

[78,   100] loss: 0.238


154it [00:06, 23.64it/s]

[78,   150] loss: 0.239


202it [00:08, 23.58it/s]

[78,   200] loss: 0.261


250it [00:10, 23.23it/s]

[78,   250] loss: 0.256



52it [00:02, 23.35it/s]

[79,    50] loss: 0.228


103it [00:04, 23.58it/s]

[79,   100] loss: 0.227


154it [00:06, 23.62it/s]

[79,   150] loss: 0.253


202it [00:08, 23.58it/s]

[79,   200] loss: 0.252


250it [00:10, 23.24it/s]

[79,   250] loss: 0.249



52it [00:02, 23.62it/s]

[80,    50] loss: 0.218


103it [00:04, 23.50it/s]

[80,   100] loss: 0.227


154it [00:06, 23.50it/s]

[80,   150] loss: 0.235


202it [00:08, 23.32it/s]

[80,   200] loss: 0.247


250it [00:10, 23.29it/s]

[80,   250] loss: 0.245



52it [00:02, 23.56it/s]

[81,    50] loss: 0.214


103it [00:04, 23.62it/s]

[81,   100] loss: 0.223


154it [00:06, 23.59it/s]

[81,   150] loss: 0.239


202it [00:08, 23.73it/s]

[81,   200] loss: 0.234


250it [00:10, 23.36it/s]

[81,   250] loss: 0.239



52it [00:02, 23.61it/s]

[82,    50] loss: 0.217


103it [00:04, 23.60it/s]

[82,   100] loss: 0.216


154it [00:06, 23.38it/s]

[82,   150] loss: 0.222


202it [00:08, 23.34it/s]

[82,   200] loss: 0.234


250it [00:10, 23.15it/s]

[82,   250] loss: 0.258



52it [00:02, 23.41it/s]

[83,    50] loss: 0.221


103it [00:04, 23.26it/s]

[83,   100] loss: 0.208


154it [00:06, 23.45it/s]

[83,   150] loss: 0.222


202it [00:08, 23.41it/s]

[83,   200] loss: 0.242


250it [00:10, 22.99it/s]

[83,   250] loss: 0.236



52it [00:02, 23.33it/s]

[84,    50] loss: 0.198


103it [00:04, 23.43it/s]

[84,   100] loss: 0.204


154it [00:06, 23.43it/s]

[84,   150] loss: 0.227


202it [00:08, 23.49it/s]

[84,   200] loss: 0.221


250it [00:10, 23.05it/s]

[84,   250] loss: 0.222



52it [00:02, 23.32it/s]

[85,    50] loss: 0.196


103it [00:04, 23.45it/s]

[85,   100] loss: 0.217


154it [00:06, 23.45it/s]

[85,   150] loss: 0.209


202it [00:08, 23.37it/s]

[85,   200] loss: 0.225


250it [00:10, 23.11it/s]

[85,   250] loss: 0.231



52it [00:02, 23.39it/s]

[86,    50] loss: 0.200


103it [00:04, 23.56it/s]

[86,   100] loss: 0.205


154it [00:06, 23.27it/s]

[86,   150] loss: 0.204


202it [00:08, 23.30it/s]

[86,   200] loss: 0.221


250it [00:10, 23.09it/s]

[86,   250] loss: 0.224



52it [00:02, 23.51it/s]

[87,    50] loss: 0.193


103it [00:04, 23.47it/s]

[87,   100] loss: 0.194


154it [00:06, 23.10it/s]

[87,   150] loss: 0.219


202it [00:08, 23.45it/s]

[87,   200] loss: 0.210


250it [00:10, 23.13it/s]

[87,   250] loss: 0.221



52it [00:02, 23.35it/s]

[88,    50] loss: 0.188


103it [00:04, 23.36it/s]

[88,   100] loss: 0.191


154it [00:06, 23.51it/s]

[88,   150] loss: 0.206


202it [00:08, 23.45it/s]

[88,   200] loss: 0.222


250it [00:10, 23.13it/s]

[88,   250] loss: 0.209



52it [00:02, 23.48it/s]

[89,    50] loss: 0.202


103it [00:04, 23.34it/s]

[89,   100] loss: 0.205


154it [00:06, 23.31it/s]

[89,   150] loss: 0.201


202it [00:08, 23.47it/s]

[89,   200] loss: 0.195


250it [00:10, 23.06it/s]

[89,   250] loss: 0.212



52it [00:02, 23.18it/s]

[90,    50] loss: 0.180


103it [00:04, 23.41it/s]

[90,   100] loss: 0.185


154it [00:06, 23.35it/s]

[90,   150] loss: 0.193


202it [00:08, 23.43it/s]

[90,   200] loss: 0.194


250it [00:10, 23.00it/s]

[90,   250] loss: 0.200



52it [00:02, 23.31it/s]

[91,    50] loss: 0.168


103it [00:04, 23.46it/s]

[91,   100] loss: 0.189


154it [00:06, 23.37it/s]

[91,   150] loss: 0.199


202it [00:08, 23.36it/s]

[91,   200] loss: 0.205


250it [00:10, 23.04it/s]

[91,   250] loss: 0.208



52it [00:02, 23.05it/s]

[92,    50] loss: 0.176


103it [00:04, 23.45it/s]

[92,   100] loss: 0.189


154it [00:06, 23.31it/s]

[92,   150] loss: 0.190


202it [00:08, 23.40it/s]

[92,   200] loss: 0.197


250it [00:10, 23.04it/s]

[92,   250] loss: 0.204



52it [00:02, 23.48it/s]

[93,    50] loss: 0.181


103it [00:04, 23.43it/s]

[93,   100] loss: 0.182


154it [00:06, 23.21it/s]

[93,   150] loss: 0.183


202it [00:08, 23.41it/s]

[93,   200] loss: 0.178


250it [00:10, 23.08it/s]

[93,   250] loss: 0.201



52it [00:02, 23.44it/s]

[94,    50] loss: 0.163


103it [00:04, 23.40it/s]

[94,   100] loss: 0.175


154it [00:06, 23.31it/s]

[94,   150] loss: 0.186


202it [00:08, 22.94it/s]

[94,   200] loss: 0.189


250it [00:10, 23.07it/s]

[94,   250] loss: 0.195



52it [00:02, 23.40it/s]

[95,    50] loss: 0.180


103it [00:04, 23.49it/s]

[95,   100] loss: 0.184


154it [00:06, 23.48it/s]

[95,   150] loss: 0.179


202it [00:08, 23.48it/s]

[95,   200] loss: 0.188


250it [00:10, 23.10it/s]

[95,   250] loss: 0.194



52it [00:02, 23.09it/s]

[96,    50] loss: 0.178


103it [00:04, 23.38it/s]

[96,   100] loss: 0.154


154it [00:06, 23.47it/s]

[96,   150] loss: 0.174


202it [00:08, 23.45it/s]

[96,   200] loss: 0.176


250it [00:10, 23.02it/s]

[96,   250] loss: 0.197



52it [00:02, 23.33it/s]

[97,    50] loss: 0.173


103it [00:04, 23.43it/s]

[97,   100] loss: 0.166


154it [00:06, 23.39it/s]

[97,   150] loss: 0.172


202it [00:08, 23.29it/s]

[97,   200] loss: 0.180


250it [00:10, 23.07it/s]

[97,   250] loss: 0.186



52it [00:02, 23.44it/s]

[98,    50] loss: 0.160


103it [00:04, 23.46it/s]

[98,   100] loss: 0.158


154it [00:06, 23.55it/s]

[98,   150] loss: 0.172


202it [00:08, 23.36it/s]

[98,   200] loss: 0.184


250it [00:10, 23.17it/s]

[98,   250] loss: 0.179



52it [00:02, 23.48it/s]

[99,    50] loss: 0.156


103it [00:04, 23.44it/s]

[99,   100] loss: 0.155


154it [00:06, 23.36it/s]

[99,   150] loss: 0.173


202it [00:08, 23.49it/s]

[99,   200] loss: 0.173


250it [00:10, 23.05it/s]

[99,   250] loss: 0.171



52it [00:02, 23.43it/s]

[100,    50] loss: 0.149


103it [00:04, 23.33it/s]

[100,   100] loss: 0.167


154it [00:06, 23.47it/s]

[100,   150] loss: 0.167


202it [00:08, 23.28it/s]

[100,   200] loss: 0.180


250it [00:10, 23.09it/s]

[100,   250] loss: 0.179



52it [00:02, 23.43it/s]

[101,    50] loss: 0.138


103it [00:04, 23.46it/s]

[101,   100] loss: 0.124


154it [00:06, 23.49it/s]

[101,   150] loss: 0.123


202it [00:08, 23.44it/s]

[101,   200] loss: 0.119


250it [00:10, 23.08it/s]

[101,   250] loss: 0.113



52it [00:02, 23.33it/s]

[102,    50] loss: 0.101


103it [00:04, 23.61it/s]

[102,   100] loss: 0.107


154it [00:06, 23.57it/s]

[102,   150] loss: 0.094


202it [00:08, 23.68it/s]

[102,   200] loss: 0.103


250it [00:10, 23.20it/s]

[102,   250] loss: 0.100



52it [00:02, 23.57it/s]

[103,    50] loss: 0.092


103it [00:04, 23.55it/s]

[103,   100] loss: 0.097


154it [00:06, 23.45it/s]

[103,   150] loss: 0.089


202it [00:08, 23.54it/s]

[103,   200] loss: 0.088


250it [00:10, 23.29it/s]

[103,   250] loss: 0.090



52it [00:02, 23.45it/s]

[104,    50] loss: 0.087


103it [00:04, 23.63it/s]

[104,   100] loss: 0.085


154it [00:06, 23.55it/s]

[104,   150] loss: 0.083


202it [00:08, 23.53it/s]

[104,   200] loss: 0.082


250it [00:10, 23.30it/s]

[104,   250] loss: 0.083



52it [00:02, 23.61it/s]

[105,    50] loss: 0.086


103it [00:04, 23.73it/s]

[105,   100] loss: 0.080


154it [00:06, 23.67it/s]

[105,   150] loss: 0.085


202it [00:08, 23.74it/s]

[105,   200] loss: 0.078


250it [00:10, 23.30it/s]

[105,   250] loss: 0.086



52it [00:02, 23.60it/s]

[106,    50] loss: 0.081


103it [00:04, 23.55it/s]

[106,   100] loss: 0.083


154it [00:06, 23.61it/s]

[106,   150] loss: 0.083


202it [00:08, 23.67it/s]

[106,   200] loss: 0.082


250it [00:10, 23.25it/s]

[106,   250] loss: 0.083



52it [00:02, 23.47it/s]

[107,    50] loss: 0.075


103it [00:04, 23.57it/s]

[107,   100] loss: 0.076


154it [00:06, 23.74it/s]

[107,   150] loss: 0.082


202it [00:08, 23.77it/s]

[107,   200] loss: 0.077


250it [00:10, 23.26it/s]

[107,   250] loss: 0.081



52it [00:02, 23.54it/s]

[108,    50] loss: 0.070


103it [00:04, 23.53it/s]

[108,   100] loss: 0.076


154it [00:06, 23.54it/s]

[108,   150] loss: 0.069


202it [00:08, 23.53it/s]

[108,   200] loss: 0.078


250it [00:10, 23.15it/s]

[108,   250] loss: 0.075



52it [00:02, 23.37it/s]

[109,    50] loss: 0.074


103it [00:04, 23.16it/s]

[109,   100] loss: 0.074


154it [00:06, 23.43it/s]

[109,   150] loss: 0.068


202it [00:08, 23.23it/s]

[109,   200] loss: 0.075


250it [00:10, 23.03it/s]

[109,   250] loss: 0.073



52it [00:02, 23.12it/s]

[110,    50] loss: 0.074


103it [00:04, 23.28it/s]

[110,   100] loss: 0.071


154it [00:06, 23.36it/s]

[110,   150] loss: 0.070


202it [00:08, 23.03it/s]

[110,   200] loss: 0.073


250it [00:10, 23.00it/s]

[110,   250] loss: 0.074



52it [00:02, 22.66it/s]

[111,    50] loss: 0.070


103it [00:04, 23.02it/s]

[111,   100] loss: 0.074


154it [00:06, 23.24it/s]

[111,   150] loss: 0.066


202it [00:08, 23.39it/s]

[111,   200] loss: 0.071


250it [00:10, 23.02it/s]

[111,   250] loss: 0.070



52it [00:02, 23.39it/s]

[112,    50] loss: 0.067


103it [00:04, 23.42it/s]

[112,   100] loss: 0.070


154it [00:06, 23.28it/s]

[112,   150] loss: 0.065


202it [00:08, 23.40it/s]

[112,   200] loss: 0.069


250it [00:10, 23.01it/s]

[112,   250] loss: 0.070



52it [00:02, 23.43it/s]

[113,    50] loss: 0.065


103it [00:04, 23.29it/s]

[113,   100] loss: 0.066


154it [00:06, 23.37it/s]

[113,   150] loss: 0.062


202it [00:08, 23.48it/s]

[113,   200] loss: 0.063


250it [00:10, 23.06it/s]

[113,   250] loss: 0.068



52it [00:02, 23.25it/s]

[114,    50] loss: 0.067


103it [00:04, 23.45it/s]

[114,   100] loss: 0.063


154it [00:06, 23.42it/s]

[114,   150] loss: 0.065


202it [00:08, 23.46it/s]

[114,   200] loss: 0.060


250it [00:10, 23.09it/s]

[114,   250] loss: 0.059



52it [00:02, 23.41it/s]

[115,    50] loss: 0.060


103it [00:04, 23.41it/s]

[115,   100] loss: 0.062


154it [00:06, 23.47it/s]

[115,   150] loss: 0.069


202it [00:08, 23.34it/s]

[115,   200] loss: 0.063


250it [00:10, 23.10it/s]

[115,   250] loss: 0.063



52it [00:02, 23.31it/s]

[116,    50] loss: 0.060


103it [00:04, 23.35it/s]

[116,   100] loss: 0.061


154it [00:06, 23.43it/s]

[116,   150] loss: 0.065


202it [00:08, 23.44it/s]

[116,   200] loss: 0.059


250it [00:10, 23.10it/s]

[116,   250] loss: 0.061



52it [00:02, 23.36it/s]

[117,    50] loss: 0.061


103it [00:04, 23.20it/s]

[117,   100] loss: 0.060


154it [00:06, 23.54it/s]

[117,   150] loss: 0.061


202it [00:08, 23.31it/s]

[117,   200] loss: 0.056


250it [00:10, 23.08it/s]

[117,   250] loss: 0.066



52it [00:02, 23.51it/s]

[118,    50] loss: 0.064


103it [00:04, 23.43it/s]

[118,   100] loss: 0.063


154it [00:06, 23.49it/s]

[118,   150] loss: 0.063


202it [00:08, 23.27it/s]

[118,   200] loss: 0.058


250it [00:10, 23.13it/s]

[118,   250] loss: 0.068



52it [00:02, 23.46it/s]

[119,    50] loss: 0.056


103it [00:04, 23.11it/s]

[119,   100] loss: 0.058


154it [00:06, 23.47it/s]

[119,   150] loss: 0.064


202it [00:08, 22.72it/s]

[119,   200] loss: 0.061


250it [00:10, 23.03it/s]

[119,   250] loss: 0.055



52it [00:02, 23.28it/s]

[120,    50] loss: 0.058


103it [00:04, 23.46it/s]

[120,   100] loss: 0.058


154it [00:06, 23.43it/s]

[120,   150] loss: 0.060


202it [00:08, 23.39it/s]

[120,   200] loss: 0.062


250it [00:10, 23.03it/s]

[120,   250] loss: 0.061



52it [00:02, 23.49it/s]

[121,    50] loss: 0.057


103it [00:04, 23.49it/s]

[121,   100] loss: 0.059


154it [00:06, 23.42it/s]

[121,   150] loss: 0.063


202it [00:08, 23.35it/s]

[121,   200] loss: 0.055


250it [00:10, 23.06it/s]

[121,   250] loss: 0.059



52it [00:02, 23.31it/s]

[122,    50] loss: 0.056


103it [00:04, 23.41it/s]

[122,   100] loss: 0.057


154it [00:06, 23.46it/s]

[122,   150] loss: 0.054


202it [00:08, 23.46it/s]

[122,   200] loss: 0.055


250it [00:10, 23.06it/s]

[122,   250] loss: 0.060



52it [00:02, 23.49it/s]

[123,    50] loss: 0.056


103it [00:04, 23.27it/s]

[123,   100] loss: 0.060


154it [00:06, 23.44it/s]

[123,   150] loss: 0.057


202it [00:08, 23.48it/s]

[123,   200] loss: 0.062


250it [00:10, 23.13it/s]

[123,   250] loss: 0.057



52it [00:02, 23.39it/s]

[124,    50] loss: 0.054


103it [00:04, 23.37it/s]

[124,   100] loss: 0.052


154it [00:06, 23.42it/s]

[124,   150] loss: 0.053


202it [00:08, 23.20it/s]

[124,   200] loss: 0.062


250it [00:10, 23.04it/s]

[124,   250] loss: 0.056



52it [00:02, 23.30it/s]

[125,    50] loss: 0.051


103it [00:04, 23.30it/s]

[125,   100] loss: 0.059


154it [00:06, 23.22it/s]

[125,   150] loss: 0.052


202it [00:08, 23.34it/s]

[125,   200] loss: 0.062


250it [00:10, 23.05it/s]

[125,   250] loss: 0.056



52it [00:02, 23.47it/s]

[126,    50] loss: 0.054


103it [00:04, 23.35it/s]

[126,   100] loss: 0.050


154it [00:06, 23.31it/s]

[126,   150] loss: 0.053


202it [00:08, 23.31it/s]

[126,   200] loss: 0.053


250it [00:10, 23.06it/s]

[126,   250] loss: 0.052



52it [00:02, 23.30it/s]

[127,    50] loss: 0.050


103it [00:04, 23.37it/s]

[127,   100] loss: 0.059


154it [00:06, 23.30it/s]

[127,   150] loss: 0.052


202it [00:08, 23.37it/s]

[127,   200] loss: 0.053


250it [00:10, 23.07it/s]

[127,   250] loss: 0.049



52it [00:02, 23.60it/s]

[128,    50] loss: 0.049


103it [00:04, 23.60it/s]

[128,   100] loss: 0.053


154it [00:06, 23.47it/s]

[128,   150] loss: 0.053


202it [00:08, 23.58it/s]

[128,   200] loss: 0.055


250it [00:10, 23.32it/s]

[128,   250] loss: 0.056



52it [00:02, 23.52it/s]

[129,    50] loss: 0.052


103it [00:04, 23.54it/s]

[129,   100] loss: 0.055


154it [00:06, 23.53it/s]

[129,   150] loss: 0.052


202it [00:08, 23.57it/s]

[129,   200] loss: 0.052


250it [00:10, 23.28it/s]

[129,   250] loss: 0.056



52it [00:02, 23.64it/s]

[130,    50] loss: 0.047


103it [00:04, 23.39it/s]

[130,   100] loss: 0.052


154it [00:06, 23.41it/s]

[130,   150] loss: 0.052


202it [00:08, 23.29it/s]

[130,   200] loss: 0.055


250it [00:10, 23.04it/s]

[130,   250] loss: 0.048



52it [00:02, 23.55it/s]

[131,    50] loss: 0.051


103it [00:04, 23.59it/s]

[131,   100] loss: 0.051


154it [00:06, 23.43it/s]

[131,   150] loss: 0.052


202it [00:08, 23.56it/s]

[131,   200] loss: 0.052


250it [00:10, 23.23it/s]

[131,   250] loss: 0.056



52it [00:02, 23.63it/s]

[132,    50] loss: 0.052


103it [00:04, 23.51it/s]

[132,   100] loss: 0.046


154it [00:06, 23.52it/s]

[132,   150] loss: 0.055


202it [00:08, 23.47it/s]

[132,   200] loss: 0.054


250it [00:10, 23.23it/s]

[132,   250] loss: 0.058



52it [00:02, 23.56it/s]

[133,    50] loss: 0.050


103it [00:04, 23.46it/s]

[133,   100] loss: 0.052


154it [00:06, 23.56it/s]

[133,   150] loss: 0.053


202it [00:08, 23.30it/s]

[133,   200] loss: 0.046


250it [00:10, 23.25it/s]

[133,   250] loss: 0.054



52it [00:02, 23.54it/s]

[134,    50] loss: 0.050


103it [00:04, 23.46it/s]

[134,   100] loss: 0.053


154it [00:06, 23.57it/s]

[134,   150] loss: 0.045


202it [00:08, 23.40it/s]

[134,   200] loss: 0.054


250it [00:10, 23.15it/s]

[134,   250] loss: 0.047



52it [00:02, 23.42it/s]

[135,    50] loss: 0.048


103it [00:04, 23.43it/s]

[135,   100] loss: 0.054


154it [00:06, 23.25it/s]

[135,   150] loss: 0.054


202it [00:08, 23.42it/s]

[135,   200] loss: 0.045


250it [00:10, 23.07it/s]

[135,   250] loss: 0.053



52it [00:02, 23.00it/s]

[136,    50] loss: 0.048


103it [00:04, 23.46it/s]

[136,   100] loss: 0.046


154it [00:06, 23.42it/s]

[136,   150] loss: 0.049


202it [00:08, 23.31it/s]

[136,   200] loss: 0.054


250it [00:10, 23.10it/s]

[136,   250] loss: 0.049



52it [00:02, 23.44it/s]

[137,    50] loss: 0.051


103it [00:04, 23.25it/s]

[137,   100] loss: 0.046


154it [00:06, 23.46it/s]

[137,   150] loss: 0.048


202it [00:08, 23.41it/s]

[137,   200] loss: 0.050


250it [00:10, 23.11it/s]

[137,   250] loss: 0.050



52it [00:02, 23.42it/s]

[138,    50] loss: 0.049


103it [00:04, 23.36it/s]

[138,   100] loss: 0.049


154it [00:06, 23.35it/s]

[138,   150] loss: 0.048


202it [00:08, 23.34it/s]

[138,   200] loss: 0.047


250it [00:10, 23.04it/s]

[138,   250] loss: 0.059



52it [00:02, 23.42it/s]

[139,    50] loss: 0.051


103it [00:04, 23.40it/s]

[139,   100] loss: 0.049


154it [00:06, 23.26it/s]

[139,   150] loss: 0.046


202it [00:08, 23.32it/s]

[139,   200] loss: 0.045


250it [00:10, 23.08it/s]

[139,   250] loss: 0.046



52it [00:02, 23.39it/s]

[140,    50] loss: 0.050


103it [00:04, 23.06it/s]

[140,   100] loss: 0.049


154it [00:06, 23.43it/s]

[140,   150] loss: 0.052


202it [00:08, 23.01it/s]

[140,   200] loss: 0.051


250it [00:10, 22.97it/s]

[140,   250] loss: 0.048



52it [00:02, 23.34it/s]

[141,    50] loss: 0.049


103it [00:04, 23.46it/s]

[141,   100] loss: 0.047


154it [00:06, 23.42it/s]

[141,   150] loss: 0.052


202it [00:08, 23.38it/s]

[141,   200] loss: 0.048


250it [00:10, 23.10it/s]

[141,   250] loss: 0.050



52it [00:02, 23.22it/s]

[142,    50] loss: 0.048


103it [00:04, 23.46it/s]

[142,   100] loss: 0.045


154it [00:06, 23.47it/s]

[142,   150] loss: 0.048


202it [00:08, 23.25it/s]

[142,   200] loss: 0.045


250it [00:10, 23.01it/s]

[142,   250] loss: 0.045



52it [00:02, 23.30it/s]

[143,    50] loss: 0.041


103it [00:04, 23.32it/s]

[143,   100] loss: 0.044


154it [00:06, 23.55it/s]

[143,   150] loss: 0.043


202it [00:08, 23.29it/s]

[143,   200] loss: 0.039


250it [00:10, 23.09it/s]

[143,   250] loss: 0.053



52it [00:02, 23.23it/s]

[144,    50] loss: 0.040


103it [00:04, 22.80it/s]

[144,   100] loss: 0.045


154it [00:06, 23.46it/s]

[144,   150] loss: 0.047


202it [00:08, 23.33it/s]

[144,   200] loss: 0.048


250it [00:10, 22.95it/s]

[144,   250] loss: 0.046



52it [00:02, 23.41it/s]

[145,    50] loss: 0.046


103it [00:04, 23.46it/s]

[145,   100] loss: 0.041


154it [00:06, 23.51it/s]

[145,   150] loss: 0.049


202it [00:08, 23.47it/s]

[145,   200] loss: 0.056


250it [00:10, 23.09it/s]

[145,   250] loss: 0.048



52it [00:02, 23.41it/s]

[146,    50] loss: 0.042


103it [00:04, 23.00it/s]

[146,   100] loss: 0.045


154it [00:06, 23.46it/s]

[146,   150] loss: 0.049


202it [00:08, 23.40it/s]

[146,   200] loss: 0.040


250it [00:10, 23.07it/s]

[146,   250] loss: 0.046



52it [00:02, 23.24it/s]

[147,    50] loss: 0.038


103it [00:04, 23.38it/s]

[147,   100] loss: 0.049


154it [00:06, 23.04it/s]

[147,   150] loss: 0.045


202it [00:08, 23.49it/s]

[147,   200] loss: 0.048


250it [00:10, 23.04it/s]

[147,   250] loss: 0.047



52it [00:02, 23.27it/s]

[148,    50] loss: 0.047


103it [00:04, 23.35it/s]

[148,   100] loss: 0.046


154it [00:06, 23.51it/s]

[148,   150] loss: 0.047


202it [00:08, 23.40it/s]

[148,   200] loss: 0.048


250it [00:10, 23.06it/s]

[148,   250] loss: 0.046



52it [00:02, 23.41it/s]

[149,    50] loss: 0.043


103it [00:04, 23.40it/s]

[149,   100] loss: 0.047


154it [00:06, 23.47it/s]

[149,   150] loss: 0.045


202it [00:08, 23.26it/s]

[149,   200] loss: 0.041


250it [00:10, 23.04it/s]

[149,   250] loss: 0.044



52it [00:02, 23.31it/s]

[150,    50] loss: 0.043


103it [00:04, 23.31it/s]

[150,   100] loss: 0.050


154it [00:06, 23.21it/s]

[150,   150] loss: 0.045


202it [00:08, 23.39it/s]

[150,   200] loss: 0.040


250it [00:10, 22.98it/s]

[150,   250] loss: 0.053



52it [00:02, 23.25it/s]

[151,    50] loss: 0.045


103it [00:04, 23.25it/s]

[151,   100] loss: 0.046


154it [00:06, 23.42it/s]

[151,   150] loss: 0.042


202it [00:08, 23.46it/s]

[151,   200] loss: 0.039


250it [00:10, 23.06it/s]

[151,   250] loss: 0.045



52it [00:02, 23.43it/s]

[152,    50] loss: 0.040


103it [00:04, 23.15it/s]

[152,   100] loss: 0.040


154it [00:06, 23.18it/s]

[152,   150] loss: 0.045


202it [00:08, 23.27it/s]

[152,   200] loss: 0.041


250it [00:10, 23.02it/s]

[152,   250] loss: 0.040



52it [00:02, 23.33it/s]

[153,    50] loss: 0.040


103it [00:04, 23.31it/s]

[153,   100] loss: 0.039


154it [00:06, 23.37it/s]

[153,   150] loss: 0.042


202it [00:08, 23.36it/s]

[153,   200] loss: 0.043


250it [00:10, 23.01it/s]

[153,   250] loss: 0.039



52it [00:02, 23.53it/s]

[154,    50] loss: 0.039


103it [00:04, 23.55it/s]

[154,   100] loss: 0.039


154it [00:06, 23.54it/s]

[154,   150] loss: 0.038


202it [00:08, 23.59it/s]

[154,   200] loss: 0.039


250it [00:10, 23.20it/s]

[154,   250] loss: 0.041



52it [00:02, 23.49it/s]

[155,    50] loss: 0.040


103it [00:04, 23.59it/s]

[155,   100] loss: 0.041


154it [00:06, 23.45it/s]

[155,   150] loss: 0.036


202it [00:08, 23.51it/s]

[155,   200] loss: 0.039


250it [00:10, 23.16it/s]

[155,   250] loss: 0.042



52it [00:02, 23.59it/s]

[156,    50] loss: 0.038


103it [00:04, 23.56it/s]

[156,   100] loss: 0.039


154it [00:06, 23.36it/s]

[156,   150] loss: 0.039


202it [00:08, 23.41it/s]

[156,   200] loss: 0.041


250it [00:10, 23.18it/s]

[156,   250] loss: 0.037



52it [00:02, 23.45it/s]

[157,    50] loss: 0.044


103it [00:04, 23.46it/s]

[157,   100] loss: 0.040


154it [00:06, 23.33it/s]

[157,   150] loss: 0.045


202it [00:08, 23.54it/s]

[157,   200] loss: 0.040


250it [00:10, 23.12it/s]

[157,   250] loss: 0.041



52it [00:02, 23.46it/s]

[158,    50] loss: 0.039


103it [00:04, 23.50it/s]

[158,   100] loss: 0.036


154it [00:06, 23.60it/s]

[158,   150] loss: 0.041


202it [00:08, 23.65it/s]

[158,   200] loss: 0.040


250it [00:10, 23.25it/s]

[158,   250] loss: 0.038



52it [00:02, 23.53it/s]

[159,    50] loss: 0.038


103it [00:04, 23.37it/s]

[159,   100] loss: 0.041


154it [00:06, 23.80it/s]

[159,   150] loss: 0.038


202it [00:08, 23.43it/s]

[159,   200] loss: 0.037


250it [00:10, 23.27it/s]

[159,   250] loss: 0.037



52it [00:02, 23.40it/s]

[160,    50] loss: 0.036


103it [00:04, 23.61it/s]

[160,   100] loss: 0.041


154it [00:06, 23.46it/s]

[160,   150] loss: 0.042


202it [00:08, 23.49it/s]

[160,   200] loss: 0.042


250it [00:10, 23.17it/s]

[160,   250] loss: 0.037



52it [00:02, 23.44it/s]

[161,    50] loss: 0.041


103it [00:04, 23.44it/s]

[161,   100] loss: 0.039


154it [00:06, 23.47it/s]

[161,   150] loss: 0.037


202it [00:08, 23.43it/s]

[161,   200] loss: 0.040


250it [00:10, 23.08it/s]

[161,   250] loss: 0.042



52it [00:02, 23.23it/s]

[162,    50] loss: 0.040


103it [00:04, 23.27it/s]

[162,   100] loss: 0.037


154it [00:06, 23.42it/s]

[162,   150] loss: 0.038


202it [00:08, 23.41it/s]

[162,   200] loss: 0.036


250it [00:10, 23.09it/s]

[162,   250] loss: 0.036



52it [00:02, 23.48it/s]

[163,    50] loss: 0.037


103it [00:04, 23.33it/s]

[163,   100] loss: 0.036


154it [00:06, 23.45it/s]

[163,   150] loss: 0.041


202it [00:08, 23.44it/s]

[163,   200] loss: 0.042


250it [00:10, 23.08it/s]

[163,   250] loss: 0.039



52it [00:02, 23.17it/s]

[164,    50] loss: 0.041


103it [00:04, 23.36it/s]

[164,   100] loss: 0.036


154it [00:06, 23.50it/s]

[164,   150] loss: 0.035


202it [00:08, 23.48it/s]

[164,   200] loss: 0.037


250it [00:10, 23.07it/s]

[164,   250] loss: 0.041



52it [00:02, 23.14it/s]

[165,    50] loss: 0.035


103it [00:04, 23.46it/s]

[165,   100] loss: 0.040


154it [00:06, 23.41it/s]

[165,   150] loss: 0.038


202it [00:08, 23.42it/s]

[165,   200] loss: 0.035


250it [00:10, 23.04it/s]

[165,   250] loss: 0.041



52it [00:02, 23.27it/s]

[166,    50] loss: 0.036


103it [00:04, 23.34it/s]

[166,   100] loss: 0.036


154it [00:06, 23.35it/s]

[166,   150] loss: 0.040


202it [00:08, 23.19it/s]

[166,   200] loss: 0.039


250it [00:10, 23.08it/s]

[166,   250] loss: 0.041



52it [00:02, 23.36it/s]

[167,    50] loss: 0.036


103it [00:04, 23.48it/s]

[167,   100] loss: 0.039


154it [00:06, 23.45it/s]

[167,   150] loss: 0.034


202it [00:08, 23.43it/s]

[167,   200] loss: 0.038


250it [00:10, 23.12it/s]

[167,   250] loss: 0.036



52it [00:02, 23.36it/s]

[168,    50] loss: 0.038


103it [00:04, 23.49it/s]

[168,   100] loss: 0.031


154it [00:06, 23.53it/s]

[168,   150] loss: 0.041


202it [00:08, 23.45it/s]

[168,   200] loss: 0.036


250it [00:10, 23.11it/s]

[168,   250] loss: 0.037



52it [00:02, 23.45it/s]

[169,    50] loss: 0.037


103it [00:04, 23.33it/s]

[169,   100] loss: 0.036


154it [00:06, 23.53it/s]

[169,   150] loss: 0.042


202it [00:08, 23.42it/s]

[169,   200] loss: 0.040


250it [00:10, 23.09it/s]

[169,   250] loss: 0.033



54it [00:02, 23.35it/s]

[170,    50] loss: 0.038


102it [00:04, 23.40it/s]

[170,   100] loss: 0.041


153it [00:06, 23.25it/s]

[170,   150] loss: 0.038


204it [00:08, 23.31it/s]

[170,   200] loss: 0.037


250it [00:10, 22.93it/s]

[170,   250] loss: 0.036



52it [00:02, 23.21it/s]

[171,    50] loss: 0.035


103it [00:04, 23.28it/s]

[171,   100] loss: 0.037


154it [00:06, 23.43it/s]

[171,   150] loss: 0.041


202it [00:08, 23.36it/s]

[171,   200] loss: 0.034


250it [00:10, 22.99it/s]

[171,   250] loss: 0.033



52it [00:02, 23.37it/s]

[172,    50] loss: 0.039


103it [00:04, 23.43it/s]

[172,   100] loss: 0.038


154it [00:06, 23.22it/s]

[172,   150] loss: 0.036


202it [00:08, 23.30it/s]

[172,   200] loss: 0.040


250it [00:10, 23.02it/s]

[172,   250] loss: 0.040



52it [00:02, 23.29it/s]

[173,    50] loss: 0.039


103it [00:04, 23.28it/s]

[173,   100] loss: 0.037


154it [00:06, 23.42it/s]

[173,   150] loss: 0.040


202it [00:08, 23.25it/s]

[173,   200] loss: 0.037


250it [00:10, 23.03it/s]

[173,   250] loss: 0.035



52it [00:02, 23.39it/s]

[174,    50] loss: 0.035


103it [00:04, 23.22it/s]

[174,   100] loss: 0.037


154it [00:06, 23.39it/s]

[174,   150] loss: 0.035


202it [00:08, 23.46it/s]

[174,   200] loss: 0.038


250it [00:10, 23.03it/s]

[174,   250] loss: 0.041



52it [00:02, 23.32it/s]

[175,    50] loss: 0.036


103it [00:04, 23.30it/s]

[175,   100] loss: 0.038


154it [00:06, 23.47it/s]

[175,   150] loss: 0.034


202it [00:08, 23.19it/s]

[175,   200] loss: 0.039


250it [00:10, 23.01it/s]

[175,   250] loss: 0.038



52it [00:02, 23.25it/s]

[176,    50] loss: 0.032


103it [00:04, 23.43it/s]

[176,   100] loss: 0.036


154it [00:06, 23.29it/s]

[176,   150] loss: 0.037


202it [00:08, 22.95it/s]

[176,   200] loss: 0.037


250it [00:10, 23.01it/s]

[176,   250] loss: 0.037



52it [00:02, 23.20it/s]

[177,    50] loss: 0.037


103it [00:04, 23.40it/s]

[177,   100] loss: 0.040


154it [00:06, 23.36it/s]

[177,   150] loss: 0.040


202it [00:08, 23.39it/s]

[177,   200] loss: 0.037


250it [00:10, 22.98it/s]

[177,   250] loss: 0.035



52it [00:02, 23.20it/s]

[178,    50] loss: 0.040


103it [00:04, 23.21it/s]

[178,   100] loss: 0.042


154it [00:06, 23.38it/s]

[178,   150] loss: 0.033


202it [00:08, 23.37it/s]

[178,   200] loss: 0.034


250it [00:10, 23.03it/s]

[178,   250] loss: 0.039



52it [00:02, 23.23it/s]

[179,    50] loss: 0.040


103it [00:04, 23.11it/s]

[179,   100] loss: 0.039


154it [00:06, 23.48it/s]

[179,   150] loss: 0.035


202it [00:08, 23.45it/s]

[179,   200] loss: 0.034


250it [00:10, 23.07it/s]

[179,   250] loss: 0.035



52it [00:02, 23.49it/s]

[180,    50] loss: 0.037


103it [00:04, 23.57it/s]

[180,   100] loss: 0.034


154it [00:06, 23.71it/s]

[180,   150] loss: 0.039


202it [00:08, 23.56it/s]

[180,   200] loss: 0.035


250it [00:10, 23.20it/s]

[180,   250] loss: 0.041



52it [00:02, 23.46it/s]

[181,    50] loss: 0.039


103it [00:04, 23.53it/s]

[181,   100] loss: 0.038


154it [00:06, 23.62it/s]

[181,   150] loss: 0.035


202it [00:08, 23.46it/s]

[181,   200] loss: 0.036


250it [00:10, 23.23it/s]

[181,   250] loss: 0.035



52it [00:02, 23.25it/s]

[182,    50] loss: 0.034


103it [00:04, 23.43it/s]

[182,   100] loss: 0.036


154it [00:06, 23.47it/s]

[182,   150] loss: 0.036


202it [00:08, 23.58it/s]

[182,   200] loss: 0.035


250it [00:10, 23.20it/s]

[182,   250] loss: 0.037



52it [00:02, 23.47it/s]

[183,    50] loss: 0.037


103it [00:04, 23.62it/s]

[183,   100] loss: 0.035


154it [00:06, 23.42it/s]

[183,   150] loss: 0.043


202it [00:08, 23.40it/s]

[183,   200] loss: 0.041


250it [00:10, 23.22it/s]

[183,   250] loss: 0.038



52it [00:02, 23.74it/s]

[184,    50] loss: 0.038


103it [00:04, 23.65it/s]

[184,   100] loss: 0.036


154it [00:06, 23.46it/s]

[184,   150] loss: 0.035


202it [00:08, 23.63it/s]

[184,   200] loss: 0.037


250it [00:10, 23.31it/s]

[184,   250] loss: 0.034



52it [00:02, 23.54it/s]

[185,    50] loss: 0.038


103it [00:04, 23.51it/s]

[185,   100] loss: 0.033


154it [00:06, 23.66it/s]

[185,   150] loss: 0.035


202it [00:08, 23.52it/s]

[185,   200] loss: 0.034


250it [00:10, 23.27it/s]

[185,   250] loss: 0.037



52it [00:02, 23.48it/s]

[186,    50] loss: 0.033


103it [00:04, 23.48it/s]

[186,   100] loss: 0.035


154it [00:06, 23.51it/s]

[186,   150] loss: 0.037


202it [00:08, 23.30it/s]

[186,   200] loss: 0.038


250it [00:10, 23.10it/s]

[186,   250] loss: 0.033



52it [00:02, 23.30it/s]

[187,    50] loss: 0.037


103it [00:04, 23.33it/s]

[187,   100] loss: 0.038


154it [00:06, 23.37it/s]

[187,   150] loss: 0.037


202it [00:08, 23.45it/s]

[187,   200] loss: 0.035


250it [00:10, 23.00it/s]

[187,   250] loss: 0.038



52it [00:02, 23.43it/s]

[188,    50] loss: 0.037


103it [00:04, 23.17it/s]

[188,   100] loss: 0.035


154it [00:06, 23.37it/s]

[188,   150] loss: 0.035


202it [00:08, 23.33it/s]

[188,   200] loss: 0.038


250it [00:10, 23.04it/s]

[188,   250] loss: 0.035



52it [00:02, 23.42it/s]

[189,    50] loss: 0.037


103it [00:04, 23.32it/s]

[189,   100] loss: 0.036


154it [00:06, 23.32it/s]

[189,   150] loss: 0.038


202it [00:08, 23.39it/s]

[189,   200] loss: 0.037


250it [00:10, 23.02it/s]

[189,   250] loss: 0.037



52it [00:02, 23.37it/s]

[190,    50] loss: 0.034


103it [00:04, 23.38it/s]

[190,   100] loss: 0.036


154it [00:06, 23.36it/s]

[190,   150] loss: 0.032


202it [00:08, 23.33it/s]

[190,   200] loss: 0.033


250it [00:10, 23.05it/s]

[190,   250] loss: 0.033



52it [00:02, 23.35it/s]

[191,    50] loss: 0.035


103it [00:04, 23.29it/s]

[191,   100] loss: 0.031


154it [00:06, 23.38it/s]

[191,   150] loss: 0.034


202it [00:08, 23.41it/s]

[191,   200] loss: 0.036


250it [00:10, 23.00it/s]

[191,   250] loss: 0.032



52it [00:02, 23.18it/s]

[192,    50] loss: 0.037


103it [00:04, 23.32it/s]

[192,   100] loss: 0.033


154it [00:06, 23.46it/s]

[192,   150] loss: 0.041


202it [00:08, 23.40it/s]

[192,   200] loss: 0.038


250it [00:10, 23.09it/s]

[192,   250] loss: 0.035



52it [00:02, 23.42it/s]

[193,    50] loss: 0.039


103it [00:04, 23.46it/s]

[193,   100] loss: 0.034


154it [00:06, 23.36it/s]

[193,   150] loss: 0.034


202it [00:08, 23.45it/s]

[193,   200] loss: 0.033


250it [00:10, 23.08it/s]

[193,   250] loss: 0.034



52it [00:02, 23.35it/s]

[194,    50] loss: 0.035


103it [00:04, 23.45it/s]

[194,   100] loss: 0.038


154it [00:06, 23.45it/s]

[194,   150] loss: 0.035


202it [00:08, 23.41it/s]

[194,   200] loss: 0.036


250it [00:10, 23.07it/s]

[194,   250] loss: 0.031



52it [00:02, 23.35it/s]

[195,    50] loss: 0.036


103it [00:04, 23.41it/s]

[195,   100] loss: 0.034


154it [00:06, 23.34it/s]

[195,   150] loss: 0.035


202it [00:08, 23.36it/s]

[195,   200] loss: 0.032


250it [00:10, 23.05it/s]

[195,   250] loss: 0.038



52it [00:02, 23.31it/s]

[196,    50] loss: 0.036


103it [00:04, 23.46it/s]

[196,   100] loss: 0.031


154it [00:06, 23.40it/s]

[196,   150] loss: 0.032


202it [00:08, 23.34it/s]

[196,   200] loss: 0.036


250it [00:10, 22.97it/s]

[196,   250] loss: 0.037



52it [00:02, 23.46it/s]

[197,    50] loss: 0.037


103it [00:04, 23.31it/s]

[197,   100] loss: 0.036


154it [00:06, 23.39it/s]

[197,   150] loss: 0.035


202it [00:08, 23.16it/s]

[197,   200] loss: 0.035


250it [00:10, 23.05it/s]

[197,   250] loss: 0.035



52it [00:02, 23.48it/s]

[198,    50] loss: 0.035


103it [00:04, 23.36it/s]

[198,   100] loss: 0.037


154it [00:06, 23.28it/s]

[198,   150] loss: 0.035


202it [00:08, 23.34it/s]

[198,   200] loss: 0.036


250it [00:10, 23.04it/s]

[198,   250] loss: 0.034



52it [00:02, 23.30it/s]

[199,    50] loss: 0.032


103it [00:04, 23.38it/s]

[199,   100] loss: 0.037


154it [00:06, 23.45it/s]

[199,   150] loss: 0.039


202it [00:08, 23.29it/s]

[199,   200] loss: 0.035


250it [00:10, 23.09it/s]

[199,   250] loss: 0.034



52it [00:02, 23.39it/s]

[200,    50] loss: 0.032


103it [00:04, 23.16it/s]

[200,   100] loss: 0.032


154it [00:06, 23.06it/s]

[200,   150] loss: 0.042


202it [00:08, 23.41it/s]

[200,   200] loss: 0.038


250it [00:10, 23.03it/s]

[200,   250] loss: 0.032
Finished Training





In [13]:
with torch.no_grad():
    correct = 0
    total = 0
    for (img, label) in tqdm(test_loader):
        img = img.to(device)
        label = label.to(device)
        outputs = model(img)
        predicted = torch.argmax(outputs.data, 1)
        total += label.size(0)
        correct += (predicted == label).sum().item()

100%|██████████| 50/50 [00:01<00:00, 47.95it/s]


In [14]:
correct / total

0.7589

In [16]:
torch.save(model.state_dict(), 'model.pt')

In [17]:
with torch.no_grad():
    correct = 0
    total = 0
    for (img, label) in tqdm(train_loader):
        img = img.to(device)
        label = label.to(device)
        outputs = model(img)
        predicted = torch.argmax(outputs.data, 1)
        total += label.size(0)
        correct += (predicted == label).sum().item()

100%|██████████| 250/250 [00:06<00:00, 41.64it/s]


In [18]:
correct / total

0.98846