# HW3 Image Classification
## We strongly recommend that you run with Kaggle for this homework
https://www.kaggle.com/c/ml2022spring-hw3b/code?competitionId=34954&sortBy=dateCreated

# Get Data
Notes: if the links are dead, you can download the data directly from Kaggle and upload it to the workspace, or you can use the Kaggle API to directly download the data into colab.


In [None]:
#! wget https://www.dropbox.com/s/6l2vcvxl54b0b6w/food11.zip
# ! wget -O food11.zip "https://github.com/virginiakm1988/ML2022-Spring/blob/main/HW03/food11.zip?raw=true"

In [None]:
# ! unzip food11.zip

# Training

In [1]:
_exp_name = "sample"

In [1]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import torch.hub
import os
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [2]:
myseed = 3231  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)
# "cuda" only when GPUs are available.
device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [3]:
print(device)

cuda:0


## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [4]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    # ToTensor() should be the last one of the transforms.
    transforms.RandomAffine(degrees=18, translate=(0.15, 0.15), scale=(0.8, 1.2)),
    transforms.RandomHorizontalFlip(),
    transforms.AutoAugment(),
    transforms.ToTensor(),
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [5]:
def split_set(path, ratio = 0.8, files = None, shuffle = True):
    
    files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
    if shuffle == True:
        random.shuffle(files)
    if ratio < 1:
        train_image_paths, valid_image_paths = files[:int(ratio*len(files))], files[int(ratio*len(files)):]    
        return train_image_paths, valid_image_paths
    elif ratio == 1:
        test_image_paths = files[:]
        return test_image_paths

In [6]:
class FoodDataset(Dataset):

    def __init__(self,pathes,tfm=test_tfm):
        super(FoodDataset).__init__()
        self.pathes = sorted(pathes)
        print(f"dataset with {len(self.pathes)} images")
        self.transform = tfm
  
    def __len__(self):
        return len(self.pathes)
  
    def __getitem__(self,idx):
        fname = self.pathes[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("\\")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label


## **Models**

In [7]:
se_resnet50 = torch.hub.load(
    'moskomule/senet.pytorch',
    'se_resnet50',
    pretrained=False, num_classes = 11).to(device)

Using cache found in C:\Users\User/.cache\torch\hub\moskomule_senet.pytorch_master


# Train

In [9]:
batch_size = 128
_dataset_dir = "./food11"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
# train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
# valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
train_image_paths, valid_image_paths = split_set(os.path.join(_dataset_dir,"all"), ratio = 0.85)


train_set = FoodDataset(train_image_paths,tfm=train_tfm)
train_set2 = FoodDataset(train_image_paths,tfm=test_tfm)

train_set_all = ConcatDataset([train_set,train_set2])
print('training set:',len(train_set_all))
valid_set = FoodDataset(valid_image_paths,tfm=test_tfm)

train_loader = DataLoader(train_set_all, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)


dataset with 11301 images
dataset with 11301 images
training set: 22602
dataset with 1995 images


In [10]:
def mixup_data(x, y, alpha=1.0):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(device)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [11]:
# The number of training epochs and patience.
n_epochs = 3000
patience = 300 # If no improvement in 'patience' epochs, early stop
learning_rate = 1e-3
# Initialize a model, and put it on the device specified.
# model = Classifier().to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
# optimizer = torch.optim.AdamW(model.parameters(), lr=0.0003, weight_decay=1e-5) 
optimizer = torch.optim.AdamW(se_resnet50.parameters(), betas=(0.9, 0.98), lr=learning_rate, weight_decay=0.005)
# optimizer = torch.optim.SGD(se_resnet50.parameters(), lr=learning_rate, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.5, patience = 20, min_lr = 0.0001)

In [14]:
# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0
# se_resnet50.load_state_dict(torch.load("./se_resnet50_best.ckpt"))
for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    se_resnet50.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        imgs, labels = imgs.to(device), labels.to(device)
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)
        inputs, targets_a, targets_b, lam = mixup_data(imgs, labels,alpha = 1)
#         inputs, targets_a, targets_b = map(Variable, (imgs,targets_a, targets_b))
        # Forward the data. (Make sure data and model are on the same device.)
        logits = se_resnet50(inputs.to(device))
        
        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
#         loss = criterion(logits, labels.to(device))
        loss = mixup_criterion(criterion, logits, targets_a, targets_b, lam)
        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(se_resnet50.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()
        
        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
        scheduler.step(acc)
        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    se_resnet50.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = se_resnet50(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./se_resnet50_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./se_resnet50_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(se_resnet50.state_dict(), f"se_resnet50_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break
    #torch.save(se_resnet50.state_dict()

  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 001/3000 ] loss = 1.94171, acc = 0.28070


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 001/3000 ] loss = 1.63021, acc = 0.44883
[ Valid | 001/3000 ] loss = 1.63021, acc = 0.44883 -> best
Best model found at epoch 0, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 002/3000 ] loss = 1.88173, acc = 0.28851


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 002/3000 ] loss = 1.58085, acc = 0.45934
[ Valid | 002/3000 ] loss = 1.58085, acc = 0.45934 -> best
Best model found at epoch 1, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 003/3000 ] loss = 1.83444, acc = 0.30123


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 003/3000 ] loss = 1.53272, acc = 0.47847
[ Valid | 003/3000 ] loss = 1.53272, acc = 0.47847 -> best
Best model found at epoch 2, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 004/3000 ] loss = 1.79927, acc = 0.31237


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 004/3000 ] loss = 1.45403, acc = 0.51185
[ Valid | 004/3000 ] loss = 1.45403, acc = 0.51185 -> best
Best model found at epoch 3, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 005/3000 ] loss = 1.75820, acc = 0.30957


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 005/3000 ] loss = 1.51639, acc = 0.49496
[ Valid | 005/3000 ] loss = 1.51639, acc = 0.49496


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 006/3000 ] loss = 1.73725, acc = 0.36797


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 006/3000 ] loss = 1.39988, acc = 0.54471
[ Valid | 006/3000 ] loss = 1.39988, acc = 0.54471 -> best
Best model found at epoch 5, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 007/3000 ] loss = 1.71178, acc = 0.31090


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 007/3000 ] loss = 1.39703, acc = 0.54014
[ Valid | 007/3000 ] loss = 1.39703, acc = 0.54014


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 008/3000 ] loss = 1.66017, acc = 0.34380


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 008/3000 ] loss = 1.46810, acc = 0.50309
[ Valid | 008/3000 ] loss = 1.46810, acc = 0.50309


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 009/3000 ] loss = 1.66180, acc = 0.32617


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 009/3000 ] loss = 1.29734, acc = 0.56751
[ Valid | 009/3000 ] loss = 1.29734, acc = 0.56751 -> best
Best model found at epoch 8, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 010/3000 ] loss = 1.62924, acc = 0.35576


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 010/3000 ] loss = 1.38940, acc = 0.52475
[ Valid | 010/3000 ] loss = 1.38940, acc = 0.52475


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 011/3000 ] loss = 1.61084, acc = 0.35371


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 011/3000 ] loss = 1.25874, acc = 0.57409
[ Valid | 011/3000 ] loss = 1.25874, acc = 0.57409 -> best
Best model found at epoch 10, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 012/3000 ] loss = 1.58225, acc = 0.35958


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 012/3000 ] loss = 1.30434, acc = 0.55890
[ Valid | 012/3000 ] loss = 1.30434, acc = 0.55890


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 013/3000 ] loss = 1.57382, acc = 0.38099


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 013/3000 ] loss = 1.33801, acc = 0.55539
[ Valid | 013/3000 ] loss = 1.33801, acc = 0.55539


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 014/3000 ] loss = 1.56631, acc = 0.38288


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 014/3000 ] loss = 1.24380, acc = 0.57958
[ Valid | 014/3000 ] loss = 1.24380, acc = 0.57958 -> best
Best model found at epoch 13, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 015/3000 ] loss = 1.55221, acc = 0.37903


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 015/3000 ] loss = 1.22197, acc = 0.60173
[ Valid | 015/3000 ] loss = 1.22197, acc = 0.60173 -> best
Best model found at epoch 14, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 016/3000 ] loss = 1.53233, acc = 0.38833


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 016/3000 ] loss = 1.18441, acc = 0.60755
[ Valid | 016/3000 ] loss = 1.18441, acc = 0.60755 -> best
Best model found at epoch 15, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 017/3000 ] loss = 1.50977, acc = 0.37193


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 017/3000 ] loss = 1.24627, acc = 0.58228
[ Valid | 017/3000 ] loss = 1.24627, acc = 0.58228


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 018/3000 ] loss = 1.49411, acc = 0.36818


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 018/3000 ] loss = 1.22409, acc = 0.60152
[ Valid | 018/3000 ] loss = 1.22409, acc = 0.60152


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 019/3000 ] loss = 1.51166, acc = 0.39277


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 019/3000 ] loss = 1.19003, acc = 0.61172
[ Valid | 019/3000 ] loss = 1.19003, acc = 0.61172 -> best
Best model found at epoch 18, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 020/3000 ] loss = 1.51054, acc = 0.41450


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 020/3000 ] loss = 1.23391, acc = 0.60132
[ Valid | 020/3000 ] loss = 1.23391, acc = 0.60132


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 021/3000 ] loss = 1.45634, acc = 0.42766


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 021/3000 ] loss = 1.20506, acc = 0.60698
[ Valid | 021/3000 ] loss = 1.20506, acc = 0.60698


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 022/3000 ] loss = 1.44839, acc = 0.38486


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 022/3000 ] loss = 1.24586, acc = 0.59221
[ Valid | 022/3000 ] loss = 1.24586, acc = 0.59221


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 023/3000 ] loss = 1.46317, acc = 0.40812


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 023/3000 ] loss = 1.18331, acc = 0.61408
[ Valid | 023/3000 ] loss = 1.18331, acc = 0.61408 -> best
Best model found at epoch 22, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 024/3000 ] loss = 1.43824, acc = 0.38267


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 024/3000 ] loss = 1.24181, acc = 0.60592
[ Valid | 024/3000 ] loss = 1.24181, acc = 0.60592


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 025/3000 ] loss = 1.44610, acc = 0.39085


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 025/3000 ] loss = 1.17417, acc = 0.62206
[ Valid | 025/3000 ] loss = 1.17417, acc = 0.62206 -> best
Best model found at epoch 24, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 026/3000 ] loss = 1.41914, acc = 0.37642


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 026/3000 ] loss = 1.21166, acc = 0.59917
[ Valid | 026/3000 ] loss = 1.21166, acc = 0.59917


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 027/3000 ] loss = 1.45243, acc = 0.36552


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 027/3000 ] loss = 1.14711, acc = 0.63829
[ Valid | 027/3000 ] loss = 1.14711, acc = 0.63829 -> best
Best model found at epoch 26, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 028/3000 ] loss = 1.44915, acc = 0.38641


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 028/3000 ] loss = 1.13394, acc = 0.65179
[ Valid | 028/3000 ] loss = 1.13394, acc = 0.65179 -> best
Best model found at epoch 27, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 029/3000 ] loss = 1.41492, acc = 0.44591


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 029/3000 ] loss = 1.15738, acc = 0.62246
[ Valid | 029/3000 ] loss = 1.15738, acc = 0.62246


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 030/3000 ] loss = 1.41546, acc = 0.42451


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 030/3000 ] loss = 1.29912, acc = 0.56967
[ Valid | 030/3000 ] loss = 1.29912, acc = 0.56967


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 031/3000 ] loss = 1.40098, acc = 0.41338


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 031/3000 ] loss = 1.27349, acc = 0.58492
[ Valid | 031/3000 ] loss = 1.27349, acc = 0.58492


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 032/3000 ] loss = 1.39590, acc = 0.39820


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 032/3000 ] loss = 1.19256, acc = 0.62295
[ Valid | 032/3000 ] loss = 1.19256, acc = 0.62295


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 033/3000 ] loss = 1.33833, acc = 0.41231


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 033/3000 ] loss = 1.15014, acc = 0.64113
[ Valid | 033/3000 ] loss = 1.15014, acc = 0.64113


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 034/3000 ] loss = 1.42126, acc = 0.45807


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 034/3000 ] loss = 1.09928, acc = 0.64673
[ Valid | 034/3000 ] loss = 1.09928, acc = 0.64673


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 035/3000 ] loss = 1.39543, acc = 0.43148


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 035/3000 ] loss = 1.21503, acc = 0.61347
[ Valid | 035/3000 ] loss = 1.21503, acc = 0.61347


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 036/3000 ] loss = 1.36271, acc = 0.43746


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 036/3000 ] loss = 1.03928, acc = 0.67264
[ Valid | 036/3000 ] loss = 1.03928, acc = 0.67264 -> best
Best model found at epoch 35, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 037/3000 ] loss = 1.34276, acc = 0.43666


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 037/3000 ] loss = 1.13739, acc = 0.63139
[ Valid | 037/3000 ] loss = 1.13739, acc = 0.63139


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 038/3000 ] loss = 1.39730, acc = 0.41091


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 038/3000 ] loss = 1.31746, acc = 0.58236
[ Valid | 038/3000 ] loss = 1.31746, acc = 0.58236


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 039/3000 ] loss = 1.38006, acc = 0.44052


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 039/3000 ] loss = 1.05442, acc = 0.66945
[ Valid | 039/3000 ] loss = 1.05442, acc = 0.66945


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 040/3000 ] loss = 1.36420, acc = 0.41570


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 040/3000 ] loss = 1.15115, acc = 0.63479
[ Valid | 040/3000 ] loss = 1.15115, acc = 0.63479


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 041/3000 ] loss = 1.31740, acc = 0.42221


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 041/3000 ] loss = 1.14034, acc = 0.64070
[ Valid | 041/3000 ] loss = 1.14034, acc = 0.64070


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 042/3000 ] loss = 1.34425, acc = 0.40814


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 042/3000 ] loss = 1.13511, acc = 0.64245
[ Valid | 042/3000 ] loss = 1.13511, acc = 0.64245


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 043/3000 ] loss = 1.33749, acc = 0.48464


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 043/3000 ] loss = 1.02977, acc = 0.68318
[ Valid | 043/3000 ] loss = 1.02977, acc = 0.68318 -> best
Best model found at epoch 42, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 044/3000 ] loss = 1.34056, acc = 0.43103


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 044/3000 ] loss = 1.09041, acc = 0.65236
[ Valid | 044/3000 ] loss = 1.09041, acc = 0.65236


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 045/3000 ] loss = 1.35806, acc = 0.40603


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 045/3000 ] loss = 1.06964, acc = 0.67870
[ Valid | 045/3000 ] loss = 1.06964, acc = 0.67870


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 046/3000 ] loss = 1.29977, acc = 0.48307


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 046/3000 ] loss = 1.07300, acc = 0.67215
[ Valid | 046/3000 ] loss = 1.07300, acc = 0.67215


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 047/3000 ] loss = 1.31828, acc = 0.42543


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 047/3000 ] loss = 1.01180, acc = 0.68893
[ Valid | 047/3000 ] loss = 1.01180, acc = 0.68893 -> best
Best model found at epoch 46, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 048/3000 ] loss = 1.31244, acc = 0.42513


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 048/3000 ] loss = 1.00588, acc = 0.68123
[ Valid | 048/3000 ] loss = 1.00588, acc = 0.68123


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 049/3000 ] loss = 1.34148, acc = 0.43604


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 049/3000 ] loss = 1.05708, acc = 0.66604
[ Valid | 049/3000 ] loss = 1.05708, acc = 0.66604


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 050/3000 ] loss = 1.32181, acc = 0.39996


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 050/3000 ] loss = 1.09250, acc = 0.66417
[ Valid | 050/3000 ] loss = 1.09250, acc = 0.66417


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 051/3000 ] loss = 1.28948, acc = 0.45984


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 051/3000 ] loss = 1.05363, acc = 0.66575
[ Valid | 051/3000 ] loss = 1.05363, acc = 0.66575


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 052/3000 ] loss = 1.29317, acc = 0.44190


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 052/3000 ] loss = 1.12142, acc = 0.64889
[ Valid | 052/3000 ] loss = 1.12142, acc = 0.64889


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 053/3000 ] loss = 1.35123, acc = 0.49028


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 053/3000 ] loss = 1.17932, acc = 0.62016
[ Valid | 053/3000 ] loss = 1.17932, acc = 0.62016


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 054/3000 ] loss = 1.24529, acc = 0.49683


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 054/3000 ] loss = 1.08720, acc = 0.66477
[ Valid | 054/3000 ] loss = 1.08720, acc = 0.66477


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 055/3000 ] loss = 1.26647, acc = 0.47994


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 055/3000 ] loss = 1.09526, acc = 0.65566
[ Valid | 055/3000 ] loss = 1.09526, acc = 0.65566


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 056/3000 ] loss = 1.28482, acc = 0.47315


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 056/3000 ] loss = 0.98681, acc = 0.69617
[ Valid | 056/3000 ] loss = 0.98681, acc = 0.69617 -> best
Best model found at epoch 55, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 057/3000 ] loss = 1.22328, acc = 0.44537


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 057/3000 ] loss = 0.96494, acc = 0.70524
[ Valid | 057/3000 ] loss = 0.96494, acc = 0.70524 -> best
Best model found at epoch 56, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 058/3000 ] loss = 1.32652, acc = 0.41118


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 058/3000 ] loss = 1.06447, acc = 0.67770
[ Valid | 058/3000 ] loss = 1.06447, acc = 0.67770


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 059/3000 ] loss = 1.31994, acc = 0.42577


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 059/3000 ] loss = 1.04806, acc = 0.68040
[ Valid | 059/3000 ] loss = 1.04806, acc = 0.68040


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 060/3000 ] loss = 1.34221, acc = 0.41910


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 060/3000 ] loss = 1.17091, acc = 0.64343
[ Valid | 060/3000 ] loss = 1.17091, acc = 0.64343


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 061/3000 ] loss = 1.32671, acc = 0.45441


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 061/3000 ] loss = 1.05243, acc = 0.67672
[ Valid | 061/3000 ] loss = 1.05243, acc = 0.67672


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 062/3000 ] loss = 1.29203, acc = 0.44248


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 062/3000 ] loss = 1.11112, acc = 0.64547
[ Valid | 062/3000 ] loss = 1.11112, acc = 0.64547


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 063/3000 ] loss = 1.30190, acc = 0.42456


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 063/3000 ] loss = 1.00614, acc = 0.68717
[ Valid | 063/3000 ] loss = 1.00614, acc = 0.68717


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 064/3000 ] loss = 1.29682, acc = 0.48313


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 064/3000 ] loss = 1.01727, acc = 0.67663
[ Valid | 064/3000 ] loss = 1.01727, acc = 0.67663


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 065/3000 ] loss = 1.30653, acc = 0.47010


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 065/3000 ] loss = 0.99997, acc = 0.69881
[ Valid | 065/3000 ] loss = 0.99997, acc = 0.69881


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 066/3000 ] loss = 1.23057, acc = 0.45282


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 066/3000 ] loss = 0.97252, acc = 0.70361
[ Valid | 066/3000 ] loss = 0.97252, acc = 0.70361


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 067/3000 ] loss = 1.27738, acc = 0.45643


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 067/3000 ] loss = 0.97634, acc = 0.70162
[ Valid | 067/3000 ] loss = 0.97634, acc = 0.70162


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 068/3000 ] loss = 1.29286, acc = 0.43211


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 068/3000 ] loss = 1.03064, acc = 0.68172
[ Valid | 068/3000 ] loss = 1.03064, acc = 0.68172


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 069/3000 ] loss = 1.22252, acc = 0.46625


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 069/3000 ] loss = 1.17949, acc = 0.63857
[ Valid | 069/3000 ] loss = 1.17949, acc = 0.63857


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 070/3000 ] loss = 1.27453, acc = 0.46058


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 070/3000 ] loss = 1.05434, acc = 0.68361
[ Valid | 070/3000 ] loss = 1.05434, acc = 0.68361


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 071/3000 ] loss = 1.22456, acc = 0.47589


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 071/3000 ] loss = 1.02993, acc = 0.68083
[ Valid | 071/3000 ] loss = 1.02993, acc = 0.68083


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 072/3000 ] loss = 1.24451, acc = 0.45195


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 072/3000 ] loss = 1.07480, acc = 0.66721
[ Valid | 072/3000 ] loss = 1.07480, acc = 0.66721


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 073/3000 ] loss = 1.25968, acc = 0.45167


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 073/3000 ] loss = 1.02702, acc = 0.67971
[ Valid | 073/3000 ] loss = 1.02702, acc = 0.67971


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 074/3000 ] loss = 1.27544, acc = 0.44768


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 074/3000 ] loss = 1.06948, acc = 0.66856
[ Valid | 074/3000 ] loss = 1.06948, acc = 0.66856


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 075/3000 ] loss = 1.24495, acc = 0.43554


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 075/3000 ] loss = 1.01594, acc = 0.68890
[ Valid | 075/3000 ] loss = 1.01594, acc = 0.68890


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 076/3000 ] loss = 1.21715, acc = 0.44172


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 076/3000 ] loss = 1.00115, acc = 0.68439
[ Valid | 076/3000 ] loss = 1.00115, acc = 0.68439


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 077/3000 ] loss = 1.28173, acc = 0.46721


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 077/3000 ] loss = 1.03462, acc = 0.66960
[ Valid | 077/3000 ] loss = 1.03462, acc = 0.66960


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 078/3000 ] loss = 1.20665, acc = 0.45160


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 078/3000 ] loss = 1.01235, acc = 0.69151
[ Valid | 078/3000 ] loss = 1.01235, acc = 0.69151


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 079/3000 ] loss = 1.23749, acc = 0.45407


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 079/3000 ] loss = 1.11920, acc = 0.65320
[ Valid | 079/3000 ] loss = 1.11920, acc = 0.65320


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 080/3000 ] loss = 1.20864, acc = 0.46822


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 080/3000 ] loss = 0.94349, acc = 0.72690
[ Valid | 080/3000 ] loss = 0.94349, acc = 0.72690 -> best
Best model found at epoch 79, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 081/3000 ] loss = 1.26169, acc = 0.43788


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 081/3000 ] loss = 1.00130, acc = 0.69191
[ Valid | 081/3000 ] loss = 1.00130, acc = 0.69191


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 082/3000 ] loss = 1.23924, acc = 0.48081


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 082/3000 ] loss = 0.99879, acc = 0.69120
[ Valid | 082/3000 ] loss = 0.99879, acc = 0.69120


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 083/3000 ] loss = 1.23874, acc = 0.45708


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 083/3000 ] loss = 0.96823, acc = 0.69588
[ Valid | 083/3000 ] loss = 0.96823, acc = 0.69588


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 084/3000 ] loss = 1.19904, acc = 0.46268


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 084/3000 ] loss = 1.04819, acc = 0.66557
[ Valid | 084/3000 ] loss = 1.04819, acc = 0.66557


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 085/3000 ] loss = 1.22768, acc = 0.45765


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 085/3000 ] loss = 0.95251, acc = 0.71395
[ Valid | 085/3000 ] loss = 0.95251, acc = 0.71395


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 086/3000 ] loss = 1.21848, acc = 0.48916


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 086/3000 ] loss = 0.97208, acc = 0.69527
[ Valid | 086/3000 ] loss = 0.97208, acc = 0.69527


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 087/3000 ] loss = 1.22728, acc = 0.46091


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 087/3000 ] loss = 1.08738, acc = 0.66839
[ Valid | 087/3000 ] loss = 1.08738, acc = 0.66839


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 088/3000 ] loss = 1.20622, acc = 0.46927


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 088/3000 ] loss = 1.00654, acc = 0.68579
[ Valid | 088/3000 ] loss = 1.00654, acc = 0.68579


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 089/3000 ] loss = 1.22815, acc = 0.46349


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 089/3000 ] loss = 0.95448, acc = 0.71561
[ Valid | 089/3000 ] loss = 0.95448, acc = 0.71561


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 090/3000 ] loss = 1.20094, acc = 0.47764


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 090/3000 ] loss = 0.96843, acc = 0.70005
[ Valid | 090/3000 ] loss = 0.96843, acc = 0.70005


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 091/3000 ] loss = 1.27976, acc = 0.43319


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 091/3000 ] loss = 1.00715, acc = 0.68941
[ Valid | 091/3000 ] loss = 1.00715, acc = 0.68941


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 092/3000 ] loss = 1.20623, acc = 0.48772


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 092/3000 ] loss = 0.94343, acc = 0.72314
[ Valid | 092/3000 ] loss = 0.94343, acc = 0.72314


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 093/3000 ] loss = 1.17568, acc = 0.53080


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 093/3000 ] loss = 0.85683, acc = 0.74072
[ Valid | 093/3000 ] loss = 0.85683, acc = 0.74072 -> best
Best model found at epoch 92, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 094/3000 ] loss = 1.20986, acc = 0.48001


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 094/3000 ] loss = 0.88388, acc = 0.71730
[ Valid | 094/3000 ] loss = 0.88388, acc = 0.71730


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 095/3000 ] loss = 1.23839, acc = 0.46293


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 095/3000 ] loss = 1.01879, acc = 0.69499
[ Valid | 095/3000 ] loss = 1.01879, acc = 0.69499


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 096/3000 ] loss = 1.18732, acc = 0.46050


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 096/3000 ] loss = 0.92426, acc = 0.72664
[ Valid | 096/3000 ] loss = 0.92426, acc = 0.72664


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 097/3000 ] loss = 1.20653, acc = 0.47490


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 097/3000 ] loss = 1.08148, acc = 0.67566
[ Valid | 097/3000 ] loss = 1.08148, acc = 0.67566


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 098/3000 ] loss = 1.20689, acc = 0.46075


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 098/3000 ] loss = 1.01127, acc = 0.68628
[ Valid | 098/3000 ] loss = 1.01127, acc = 0.68628


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 099/3000 ] loss = 1.20057, acc = 0.44742


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 099/3000 ] loss = 0.91113, acc = 0.72070
[ Valid | 099/3000 ] loss = 0.91113, acc = 0.72070


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 100/3000 ] loss = 1.16669, acc = 0.46499


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 100/3000 ] loss = 0.93307, acc = 0.71458
[ Valid | 100/3000 ] loss = 0.93307, acc = 0.71458


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 101/3000 ] loss = 1.20048, acc = 0.49609


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 101/3000 ] loss = 0.95182, acc = 0.70665
[ Valid | 101/3000 ] loss = 0.95182, acc = 0.70665


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 102/3000 ] loss = 1.17193, acc = 0.49470


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 102/3000 ] loss = 0.89784, acc = 0.73087
[ Valid | 102/3000 ] loss = 0.89784, acc = 0.73087


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 103/3000 ] loss = 1.20238, acc = 0.51094


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 103/3000 ] loss = 0.97029, acc = 0.70133
[ Valid | 103/3000 ] loss = 0.97029, acc = 0.70133


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 104/3000 ] loss = 1.21499, acc = 0.45155


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 104/3000 ] loss = 0.91881, acc = 0.73144
[ Valid | 104/3000 ] loss = 0.91881, acc = 0.73144


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 105/3000 ] loss = 1.19403, acc = 0.50620


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 105/3000 ] loss = 1.05562, acc = 0.66445
[ Valid | 105/3000 ] loss = 1.05562, acc = 0.66445


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 106/3000 ] loss = 1.21015, acc = 0.45450


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 106/3000 ] loss = 0.90762, acc = 0.73632
[ Valid | 106/3000 ] loss = 0.90762, acc = 0.73632


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 107/3000 ] loss = 1.20031, acc = 0.45971


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 107/3000 ] loss = 0.97744, acc = 0.70487
[ Valid | 107/3000 ] loss = 0.97744, acc = 0.70487


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 108/3000 ] loss = 1.12579, acc = 0.50873


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 108/3000 ] loss = 0.91246, acc = 0.72837
[ Valid | 108/3000 ] loss = 0.91246, acc = 0.72837


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 109/3000 ] loss = 1.12392, acc = 0.47655


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 109/3000 ] loss = 1.04766, acc = 0.68459
[ Valid | 109/3000 ] loss = 1.04766, acc = 0.68459


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 110/3000 ] loss = 1.16882, acc = 0.49669


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 110/3000 ] loss = 0.91935, acc = 0.72917
[ Valid | 110/3000 ] loss = 0.91935, acc = 0.72917


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 111/3000 ] loss = 1.18160, acc = 0.47016


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 111/3000 ] loss = 1.04242, acc = 0.68286
[ Valid | 111/3000 ] loss = 1.04242, acc = 0.68286


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 112/3000 ] loss = 1.16207, acc = 0.47756


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 112/3000 ] loss = 0.98515, acc = 0.69631
[ Valid | 112/3000 ] loss = 0.98515, acc = 0.69631


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 113/3000 ] loss = 1.18161, acc = 0.51207


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 113/3000 ] loss = 0.95547, acc = 0.71854
[ Valid | 113/3000 ] loss = 0.95547, acc = 0.71854


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 114/3000 ] loss = 1.16318, acc = 0.49660


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 114/3000 ] loss = 1.09118, acc = 0.67962
[ Valid | 114/3000 ] loss = 1.09118, acc = 0.67962


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 115/3000 ] loss = 1.20560, acc = 0.52581


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 115/3000 ] loss = 0.89061, acc = 0.73600
[ Valid | 115/3000 ] loss = 0.89061, acc = 0.73600


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 116/3000 ] loss = 1.18358, acc = 0.45572


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 116/3000 ] loss = 0.85964, acc = 0.75738
[ Valid | 116/3000 ] loss = 0.85964, acc = 0.75738 -> best
Best model found at epoch 115, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 117/3000 ] loss = 1.15538, acc = 0.50667


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 117/3000 ] loss = 0.98664, acc = 0.69519
[ Valid | 117/3000 ] loss = 0.98664, acc = 0.69519


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 118/3000 ] loss = 1.11161, acc = 0.50945


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 118/3000 ] loss = 0.91728, acc = 0.73112
[ Valid | 118/3000 ] loss = 0.91728, acc = 0.73112


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 119/3000 ] loss = 1.16969, acc = 0.50004


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 119/3000 ] loss = 1.05076, acc = 0.67971
[ Valid | 119/3000 ] loss = 1.05076, acc = 0.67971


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 120/3000 ] loss = 1.12419, acc = 0.46257


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 120/3000 ] loss = 0.95389, acc = 0.71171
[ Valid | 120/3000 ] loss = 0.95389, acc = 0.71171


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 121/3000 ] loss = 1.12898, acc = 0.46692


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 121/3000 ] loss = 0.85771, acc = 0.75223
[ Valid | 121/3000 ] loss = 0.85771, acc = 0.75223


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 122/3000 ] loss = 1.20343, acc = 0.46894


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 122/3000 ] loss = 0.94729, acc = 0.72113
[ Valid | 122/3000 ] loss = 0.94729, acc = 0.72113


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 123/3000 ] loss = 1.22047, acc = 0.46710


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 123/3000 ] loss = 1.02141, acc = 0.68089
[ Valid | 123/3000 ] loss = 1.02141, acc = 0.68089


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 124/3000 ] loss = 1.16228, acc = 0.49230


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 124/3000 ] loss = 0.96837, acc = 0.70438
[ Valid | 124/3000 ] loss = 0.96837, acc = 0.70438


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 125/3000 ] loss = 1.13817, acc = 0.48304


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 125/3000 ] loss = 0.95855, acc = 0.70935
[ Valid | 125/3000 ] loss = 0.95855, acc = 0.70935


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 126/3000 ] loss = 1.16530, acc = 0.51123


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 126/3000 ] loss = 0.89195, acc = 0.73974
[ Valid | 126/3000 ] loss = 0.89195, acc = 0.73974


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 127/3000 ] loss = 1.13656, acc = 0.48233


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 127/3000 ] loss = 0.94216, acc = 0.71716
[ Valid | 127/3000 ] loss = 0.94216, acc = 0.71716


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 128/3000 ] loss = 1.16725, acc = 0.54288


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 128/3000 ] loss = 0.95396, acc = 0.71992
[ Valid | 128/3000 ] loss = 0.95396, acc = 0.71992


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 129/3000 ] loss = 1.12895, acc = 0.47957


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 129/3000 ] loss = 0.87177, acc = 0.74637
[ Valid | 129/3000 ] loss = 0.87177, acc = 0.74637


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 130/3000 ] loss = 1.09696, acc = 0.46689


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 130/3000 ] loss = 0.97702, acc = 0.70725
[ Valid | 130/3000 ] loss = 0.97702, acc = 0.70725


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 131/3000 ] loss = 1.13023, acc = 0.51823


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 131/3000 ] loss = 0.91707, acc = 0.73460
[ Valid | 131/3000 ] loss = 0.91707, acc = 0.73460


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 132/3000 ] loss = 1.13423, acc = 0.48250


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 132/3000 ] loss = 0.99195, acc = 0.69510
[ Valid | 132/3000 ] loss = 0.99195, acc = 0.69510


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 133/3000 ] loss = 1.13484, acc = 0.52328


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 133/3000 ] loss = 0.93643, acc = 0.72253
[ Valid | 133/3000 ] loss = 0.93643, acc = 0.72253


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 134/3000 ] loss = 1.13160, acc = 0.48416


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 134/3000 ] loss = 0.85199, acc = 0.74215
[ Valid | 134/3000 ] loss = 0.85199, acc = 0.74215


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 135/3000 ] loss = 1.20214, acc = 0.48701


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 135/3000 ] loss = 0.89110, acc = 0.73529
[ Valid | 135/3000 ] loss = 0.89110, acc = 0.73529


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 136/3000 ] loss = 1.11748, acc = 0.50564


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 136/3000 ] loss = 0.94468, acc = 0.72170
[ Valid | 136/3000 ] loss = 0.94468, acc = 0.72170


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 137/3000 ] loss = 1.15003, acc = 0.49583


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 137/3000 ] loss = 0.86612, acc = 0.74890
[ Valid | 137/3000 ] loss = 0.86612, acc = 0.74890


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 138/3000 ] loss = 1.10693, acc = 0.54727


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 138/3000 ] loss = 0.90241, acc = 0.74029
[ Valid | 138/3000 ] loss = 0.90241, acc = 0.74029


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 139/3000 ] loss = 1.11996, acc = 0.44448


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 139/3000 ] loss = 0.90544, acc = 0.72156
[ Valid | 139/3000 ] loss = 0.90544, acc = 0.72156


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 140/3000 ] loss = 1.15779, acc = 0.49219


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 140/3000 ] loss = 1.00619, acc = 0.69352
[ Valid | 140/3000 ] loss = 1.00619, acc = 0.69352


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 141/3000 ] loss = 1.13598, acc = 0.53318


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 141/3000 ] loss = 0.84619, acc = 0.76157
[ Valid | 141/3000 ] loss = 0.84619, acc = 0.76157 -> best
Best model found at epoch 140, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 142/3000 ] loss = 1.12231, acc = 0.50233


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 142/3000 ] loss = 0.86936, acc = 0.74339
[ Valid | 142/3000 ] loss = 0.86936, acc = 0.74339


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 143/3000 ] loss = 1.15192, acc = 0.49456


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 143/3000 ] loss = 1.01165, acc = 0.70406
[ Valid | 143/3000 ] loss = 1.01165, acc = 0.70406


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 144/3000 ] loss = 1.15367, acc = 0.46667


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 144/3000 ] loss = 0.94949, acc = 0.72509
[ Valid | 144/3000 ] loss = 0.94949, acc = 0.72509


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 145/3000 ] loss = 1.10924, acc = 0.46173


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 145/3000 ] loss = 0.93322, acc = 0.71527
[ Valid | 145/3000 ] loss = 0.93322, acc = 0.71527


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 146/3000 ] loss = 1.17709, acc = 0.52429


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 146/3000 ] loss = 0.89350, acc = 0.74210
[ Valid | 146/3000 ] loss = 0.89350, acc = 0.74210


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 147/3000 ] loss = 1.09339, acc = 0.55187


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 147/3000 ] loss = 0.86733, acc = 0.75557
[ Valid | 147/3000 ] loss = 0.86733, acc = 0.75557


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 148/3000 ] loss = 1.04218, acc = 0.54205


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 148/3000 ] loss = 0.94373, acc = 0.71486
[ Valid | 148/3000 ] loss = 0.94373, acc = 0.71486


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 149/3000 ] loss = 1.14164, acc = 0.45572


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 149/3000 ] loss = 0.97589, acc = 0.70510
[ Valid | 149/3000 ] loss = 0.97589, acc = 0.70510


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 150/3000 ] loss = 1.10786, acc = 0.45461


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 150/3000 ] loss = 0.84380, acc = 0.75895
[ Valid | 150/3000 ] loss = 0.84380, acc = 0.75895


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 151/3000 ] loss = 1.11472, acc = 0.50689


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 151/3000 ] loss = 0.94929, acc = 0.72587
[ Valid | 151/3000 ] loss = 0.94929, acc = 0.72587


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 152/3000 ] loss = 1.04092, acc = 0.54053


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 152/3000 ] loss = 0.90850, acc = 0.73144
[ Valid | 152/3000 ] loss = 0.90850, acc = 0.73144


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 153/3000 ] loss = 1.13993, acc = 0.48269


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 153/3000 ] loss = 0.83121, acc = 0.76157
[ Valid | 153/3000 ] loss = 0.83121, acc = 0.76157


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 154/3000 ] loss = 1.11491, acc = 0.53129


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 154/3000 ] loss = 0.86773, acc = 0.74298
[ Valid | 154/3000 ] loss = 0.86773, acc = 0.74298


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 155/3000 ] loss = 1.07514, acc = 0.53366


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 155/3000 ] loss = 0.81115, acc = 0.76197
[ Valid | 155/3000 ] loss = 0.81115, acc = 0.76197 -> best
Best model found at epoch 154, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 156/3000 ] loss = 1.09575, acc = 0.51661


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 156/3000 ] loss = 0.88192, acc = 0.74540
[ Valid | 156/3000 ] loss = 0.88192, acc = 0.74540


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 157/3000 ] loss = 1.13079, acc = 0.47816


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 157/3000 ] loss = 0.86232, acc = 0.74324
[ Valid | 157/3000 ] loss = 0.86232, acc = 0.74324


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 158/3000 ] loss = 1.06983, acc = 0.45617


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 158/3000 ] loss = 0.82132, acc = 0.76625
[ Valid | 158/3000 ] loss = 0.82132, acc = 0.76625 -> best
Best model found at epoch 157, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 159/3000 ] loss = 1.10090, acc = 0.53205


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 159/3000 ] loss = 0.84508, acc = 0.75858
[ Valid | 159/3000 ] loss = 0.84508, acc = 0.75858


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 160/3000 ] loss = 1.07627, acc = 0.44735


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 160/3000 ] loss = 0.89006, acc = 0.73161
[ Valid | 160/3000 ] loss = 0.89006, acc = 0.73161


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 161/3000 ] loss = 1.06805, acc = 0.50903


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 161/3000 ] loss = 0.79693, acc = 0.76010
[ Valid | 161/3000 ] loss = 0.79693, acc = 0.76010


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 162/3000 ] loss = 1.12557, acc = 0.48547


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 162/3000 ] loss = 0.94067, acc = 0.72845
[ Valid | 162/3000 ] loss = 0.94067, acc = 0.72845


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 163/3000 ] loss = 1.12889, acc = 0.49491


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 163/3000 ] loss = 0.94797, acc = 0.72512
[ Valid | 163/3000 ] loss = 0.94797, acc = 0.72512


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 164/3000 ] loss = 1.08796, acc = 0.44320


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 164/3000 ] loss = 0.83356, acc = 0.75628
[ Valid | 164/3000 ] loss = 0.83356, acc = 0.75628


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 165/3000 ] loss = 1.09334, acc = 0.50426


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 165/3000 ] loss = 0.97073, acc = 0.71863
[ Valid | 165/3000 ] loss = 0.97073, acc = 0.71863


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 166/3000 ] loss = 1.07026, acc = 0.50317


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 166/3000 ] loss = 0.84208, acc = 0.74594
[ Valid | 166/3000 ] loss = 0.84208, acc = 0.74594


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 167/3000 ] loss = 1.07339, acc = 0.49596


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 167/3000 ] loss = 0.93013, acc = 0.72176
[ Valid | 167/3000 ] loss = 0.93013, acc = 0.72176


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 168/3000 ] loss = 1.06890, acc = 0.50142


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 168/3000 ] loss = 0.82415, acc = 0.76226
[ Valid | 168/3000 ] loss = 0.82415, acc = 0.76226


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 169/3000 ] loss = 1.10294, acc = 0.45259


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 169/3000 ] loss = 0.81012, acc = 0.76298
[ Valid | 169/3000 ] loss = 0.81012, acc = 0.76298


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 170/3000 ] loss = 1.12243, acc = 0.49737


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 170/3000 ] loss = 0.86946, acc = 0.75795
[ Valid | 170/3000 ] loss = 0.86946, acc = 0.75795


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 171/3000 ] loss = 1.05429, acc = 0.54047


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 171/3000 ] loss = 0.83990, acc = 0.74902
[ Valid | 171/3000 ] loss = 0.83990, acc = 0.74902


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 172/3000 ] loss = 1.10822, acc = 0.48483


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 172/3000 ] loss = 0.79940, acc = 0.76487
[ Valid | 172/3000 ] loss = 0.79940, acc = 0.76487


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 173/3000 ] loss = 1.08844, acc = 0.53921


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 173/3000 ] loss = 0.96578, acc = 0.71561
[ Valid | 173/3000 ] loss = 0.96578, acc = 0.71561


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 174/3000 ] loss = 1.11407, acc = 0.52716


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 174/3000 ] loss = 0.87474, acc = 0.74775
[ Valid | 174/3000 ] loss = 0.87474, acc = 0.74775


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 175/3000 ] loss = 1.08942, acc = 0.49760


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 175/3000 ] loss = 0.76941, acc = 0.77648
[ Valid | 175/3000 ] loss = 0.76941, acc = 0.77648 -> best
Best model found at epoch 174, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 176/3000 ] loss = 1.07059, acc = 0.47485


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 176/3000 ] loss = 0.94603, acc = 0.71877
[ Valid | 176/3000 ] loss = 0.94603, acc = 0.71877


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 177/3000 ] loss = 1.01582, acc = 0.52084


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 177/3000 ] loss = 0.88094, acc = 0.74706
[ Valid | 177/3000 ] loss = 0.88094, acc = 0.74706


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 178/3000 ] loss = 1.03255, acc = 0.49624


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 178/3000 ] loss = 0.84159, acc = 0.74887
[ Valid | 178/3000 ] loss = 0.84159, acc = 0.74887


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 179/3000 ] loss = 1.10602, acc = 0.50650


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 179/3000 ] loss = 0.82655, acc = 0.76053
[ Valid | 179/3000 ] loss = 0.82655, acc = 0.76053


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 180/3000 ] loss = 1.00641, acc = 0.54014


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 180/3000 ] loss = 0.80114, acc = 0.77139
[ Valid | 180/3000 ] loss = 0.80114, acc = 0.77139


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 181/3000 ] loss = 1.09142, acc = 0.54176


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 181/3000 ] loss = 0.84300, acc = 0.75634
[ Valid | 181/3000 ] loss = 0.84300, acc = 0.75634


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 182/3000 ] loss = 1.10685, acc = 0.49725


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 182/3000 ] loss = 0.85795, acc = 0.74589
[ Valid | 182/3000 ] loss = 0.85795, acc = 0.74589


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 183/3000 ] loss = 1.07529, acc = 0.46583


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 183/3000 ] loss = 0.88217, acc = 0.74617
[ Valid | 183/3000 ] loss = 0.88217, acc = 0.74617


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 184/3000 ] loss = 1.12284, acc = 0.50429


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 184/3000 ] loss = 0.82184, acc = 0.76590
[ Valid | 184/3000 ] loss = 0.82184, acc = 0.76590


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 185/3000 ] loss = 1.07797, acc = 0.51098


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 185/3000 ] loss = 0.84864, acc = 0.73669
[ Valid | 185/3000 ] loss = 0.84864, acc = 0.73669


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 186/3000 ] loss = 0.99034, acc = 0.50795


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 186/3000 ] loss = 0.80749, acc = 0.77087
[ Valid | 186/3000 ] loss = 0.80749, acc = 0.77087


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 187/3000 ] loss = 1.10520, acc = 0.49246


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 187/3000 ] loss = 0.88652, acc = 0.74861
[ Valid | 187/3000 ] loss = 0.88652, acc = 0.74861


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 188/3000 ] loss = 1.11632, acc = 0.49467


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 188/3000 ] loss = 0.83794, acc = 0.75034
[ Valid | 188/3000 ] loss = 0.83794, acc = 0.75034


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 189/3000 ] loss = 1.12454, acc = 0.50822


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 189/3000 ] loss = 0.90867, acc = 0.74951
[ Valid | 189/3000 ] loss = 0.90867, acc = 0.74951


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 190/3000 ] loss = 1.07535, acc = 0.53637


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 190/3000 ] loss = 0.82634, acc = 0.77168
[ Valid | 190/3000 ] loss = 0.82634, acc = 0.77168


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 191/3000 ] loss = 1.09345, acc = 0.52215


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 191/3000 ] loss = 0.88625, acc = 0.75238
[ Valid | 191/3000 ] loss = 0.88625, acc = 0.75238


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 192/3000 ] loss = 1.13502, acc = 0.48776


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 192/3000 ] loss = 0.86397, acc = 0.76088
[ Valid | 192/3000 ] loss = 0.86397, acc = 0.76088


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 193/3000 ] loss = 1.04346, acc = 0.49882


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 193/3000 ] loss = 0.88100, acc = 0.73681
[ Valid | 193/3000 ] loss = 0.88100, acc = 0.73681


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 194/3000 ] loss = 1.11384, acc = 0.49763


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 194/3000 ] loss = 0.87224, acc = 0.74365
[ Valid | 194/3000 ] loss = 0.87224, acc = 0.74365


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 195/3000 ] loss = 1.10557, acc = 0.48243


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 195/3000 ] loss = 0.85987, acc = 0.75519
[ Valid | 195/3000 ] loss = 0.85987, acc = 0.75519


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 196/3000 ] loss = 1.03964, acc = 0.53167


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 196/3000 ] loss = 1.02556, acc = 0.69507
[ Valid | 196/3000 ] loss = 1.02556, acc = 0.69507


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 197/3000 ] loss = 1.05988, acc = 0.52379


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 197/3000 ] loss = 0.85169, acc = 0.75132
[ Valid | 197/3000 ] loss = 0.85169, acc = 0.75132


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 198/3000 ] loss = 1.03325, acc = 0.54906


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 198/3000 ] loss = 0.87775, acc = 0.74855
[ Valid | 198/3000 ] loss = 0.87775, acc = 0.74855


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 199/3000 ] loss = 1.04124, acc = 0.52436


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 199/3000 ] loss = 0.85742, acc = 0.74591
[ Valid | 199/3000 ] loss = 0.85742, acc = 0.74591


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 200/3000 ] loss = 1.03917, acc = 0.52467


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 200/3000 ] loss = 0.92065, acc = 0.73333
[ Valid | 200/3000 ] loss = 0.92065, acc = 0.73333


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 201/3000 ] loss = 1.08066, acc = 0.47299


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 201/3000 ] loss = 0.78836, acc = 0.76780
[ Valid | 201/3000 ] loss = 0.78836, acc = 0.76780


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 202/3000 ] loss = 0.99859, acc = 0.50391


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 202/3000 ] loss = 0.86130, acc = 0.75126
[ Valid | 202/3000 ] loss = 0.86130, acc = 0.75126


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 203/3000 ] loss = 1.11082, acc = 0.53384


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 203/3000 ] loss = 0.86687, acc = 0.74442
[ Valid | 203/3000 ] loss = 0.86687, acc = 0.74442


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 204/3000 ] loss = 1.06419, acc = 0.51120


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 204/3000 ] loss = 0.88575, acc = 0.74517
[ Valid | 204/3000 ] loss = 0.88575, acc = 0.74517


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 205/3000 ] loss = 1.06100, acc = 0.54700


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 205/3000 ] loss = 0.88208, acc = 0.74373
[ Valid | 205/3000 ] loss = 0.88208, acc = 0.74373


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 206/3000 ] loss = 1.05857, acc = 0.48449


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 206/3000 ] loss = 0.99218, acc = 0.70229
[ Valid | 206/3000 ] loss = 0.99218, acc = 0.70229


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 207/3000 ] loss = 1.04307, acc = 0.50314


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 207/3000 ] loss = 0.85546, acc = 0.75111
[ Valid | 207/3000 ] loss = 0.85546, acc = 0.75111


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 208/3000 ] loss = 1.08201, acc = 0.53896


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 208/3000 ] loss = 0.82281, acc = 0.75401
[ Valid | 208/3000 ] loss = 0.82281, acc = 0.75401


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 209/3000 ] loss = 1.00705, acc = 0.51194


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 209/3000 ] loss = 0.79761, acc = 0.77783
[ Valid | 209/3000 ] loss = 0.79761, acc = 0.77783 -> best
Best model found at epoch 208, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 210/3000 ] loss = 1.02319, acc = 0.49740


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 210/3000 ] loss = 0.96716, acc = 0.71920
[ Valid | 210/3000 ] loss = 0.96716, acc = 0.71920


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 211/3000 ] loss = 1.05039, acc = 0.49539


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 211/3000 ] loss = 0.82680, acc = 0.75973
[ Valid | 211/3000 ] loss = 0.82680, acc = 0.75973


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 212/3000 ] loss = 1.03270, acc = 0.52653


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 212/3000 ] loss = 0.79852, acc = 0.76660
[ Valid | 212/3000 ] loss = 0.79852, acc = 0.76660


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 213/3000 ] loss = 0.99141, acc = 0.51061


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 213/3000 ] loss = 0.77380, acc = 0.77803
[ Valid | 213/3000 ] loss = 0.77380, acc = 0.77803 -> best
Best model found at epoch 212, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 214/3000 ] loss = 1.03112, acc = 0.52180


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 214/3000 ] loss = 0.78449, acc = 0.77211
[ Valid | 214/3000 ] loss = 0.78449, acc = 0.77211


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 215/3000 ] loss = 1.03285, acc = 0.54806


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 215/3000 ] loss = 0.79261, acc = 0.76680
[ Valid | 215/3000 ] loss = 0.79261, acc = 0.76680


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 216/3000 ] loss = 1.08183, acc = 0.52689


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 216/3000 ] loss = 0.85600, acc = 0.75404
[ Valid | 216/3000 ] loss = 0.85600, acc = 0.75404


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 217/3000 ] loss = 1.04076, acc = 0.48498


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 217/3000 ] loss = 0.95790, acc = 0.72084
[ Valid | 217/3000 ] loss = 0.95790, acc = 0.72084


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 218/3000 ] loss = 1.01145, acc = 0.48840


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 218/3000 ] loss = 0.87138, acc = 0.75085
[ Valid | 218/3000 ] loss = 0.87138, acc = 0.75085


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 219/3000 ] loss = 1.02367, acc = 0.52683


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 219/3000 ] loss = 0.82634, acc = 0.77182
[ Valid | 219/3000 ] loss = 0.82634, acc = 0.77182


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 220/3000 ] loss = 1.02614, acc = 0.48820


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 220/3000 ] loss = 0.84874, acc = 0.75390
[ Valid | 220/3000 ] loss = 0.84874, acc = 0.75390


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 221/3000 ] loss = 1.07381, acc = 0.50426


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 221/3000 ] loss = 0.89614, acc = 0.73770
[ Valid | 221/3000 ] loss = 0.89614, acc = 0.73770


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 222/3000 ] loss = 1.06725, acc = 0.52835


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 222/3000 ] loss = 0.86702, acc = 0.75364
[ Valid | 222/3000 ] loss = 0.86702, acc = 0.75364


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 223/3000 ] loss = 1.08839, acc = 0.53057


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 223/3000 ] loss = 0.82154, acc = 0.76958
[ Valid | 223/3000 ] loss = 0.82154, acc = 0.76958


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 224/3000 ] loss = 1.04572, acc = 0.50998


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 224/3000 ] loss = 0.83288, acc = 0.77268
[ Valid | 224/3000 ] loss = 0.83288, acc = 0.77268


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 225/3000 ] loss = 1.01258, acc = 0.49723


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 225/3000 ] loss = 0.75652, acc = 0.78018
[ Valid | 225/3000 ] loss = 0.75652, acc = 0.78018 -> best
Best model found at epoch 224, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 226/3000 ] loss = 1.08419, acc = 0.49284


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 226/3000 ] loss = 0.76193, acc = 0.78515
[ Valid | 226/3000 ] loss = 0.76193, acc = 0.78515 -> best
Best model found at epoch 225, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 227/3000 ] loss = 0.99846, acc = 0.51533


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 227/3000 ] loss = 0.82422, acc = 0.76039
[ Valid | 227/3000 ] loss = 0.82422, acc = 0.76039


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 228/3000 ] loss = 1.01260, acc = 0.55354


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 228/3000 ] loss = 0.81002, acc = 0.76969
[ Valid | 228/3000 ] loss = 0.81002, acc = 0.76969


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 229/3000 ] loss = 1.02400, acc = 0.53260


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 229/3000 ] loss = 0.84713, acc = 0.76220
[ Valid | 229/3000 ] loss = 0.84713, acc = 0.76220


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 230/3000 ] loss = 1.03330, acc = 0.50586


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 230/3000 ] loss = 0.83008, acc = 0.77154
[ Valid | 230/3000 ] loss = 0.83008, acc = 0.77154


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 231/3000 ] loss = 1.05186, acc = 0.47168


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 231/3000 ] loss = 0.78648, acc = 0.77823
[ Valid | 231/3000 ] loss = 0.78648, acc = 0.77823


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 232/3000 ] loss = 0.99656, acc = 0.51202


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 232/3000 ] loss = 0.76013, acc = 0.78196
[ Valid | 232/3000 ] loss = 0.76013, acc = 0.78196


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 233/3000 ] loss = 1.03265, acc = 0.49811


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 233/3000 ] loss = 0.93845, acc = 0.73293
[ Valid | 233/3000 ] loss = 0.93845, acc = 0.73293


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 234/3000 ] loss = 1.02780, acc = 0.47767


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 234/3000 ] loss = 0.87844, acc = 0.74057
[ Valid | 234/3000 ] loss = 0.87844, acc = 0.74057


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 235/3000 ] loss = 1.03693, acc = 0.53227


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 235/3000 ] loss = 0.80517, acc = 0.77874
[ Valid | 235/3000 ] loss = 0.80517, acc = 0.77874


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 236/3000 ] loss = 0.99650, acc = 0.48700


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 236/3000 ] loss = 0.84001, acc = 0.75459
[ Valid | 236/3000 ] loss = 0.84001, acc = 0.75459


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 237/3000 ] loss = 1.07325, acc = 0.51286


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 237/3000 ] loss = 0.83732, acc = 0.76694
[ Valid | 237/3000 ] loss = 0.83732, acc = 0.76694


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 238/3000 ] loss = 1.09961, acc = 0.52083


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 238/3000 ] loss = 0.78451, acc = 0.78880
[ Valid | 238/3000 ] loss = 0.78451, acc = 0.78880 -> best
Best model found at epoch 237, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 239/3000 ] loss = 1.06447, acc = 0.50838


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 239/3000 ] loss = 0.82306, acc = 0.76536
[ Valid | 239/3000 ] loss = 0.82306, acc = 0.76536


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 240/3000 ] loss = 1.02424, acc = 0.53617


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 240/3000 ] loss = 0.83277, acc = 0.77421
[ Valid | 240/3000 ] loss = 0.83277, acc = 0.77421


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 241/3000 ] loss = 1.05177, acc = 0.45385


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 241/3000 ] loss = 0.78423, acc = 0.77266
[ Valid | 241/3000 ] loss = 0.78423, acc = 0.77266


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 242/3000 ] loss = 1.02781, acc = 0.57393


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 242/3000 ] loss = 0.89316, acc = 0.73939
[ Valid | 242/3000 ] loss = 0.89316, acc = 0.73939


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 243/3000 ] loss = 1.01612, acc = 0.50471


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 243/3000 ] loss = 0.77898, acc = 0.78251
[ Valid | 243/3000 ] loss = 0.77898, acc = 0.78251


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 244/3000 ] loss = 1.00754, acc = 0.53837


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 244/3000 ] loss = 0.86500, acc = 0.74896
[ Valid | 244/3000 ] loss = 0.86500, acc = 0.74896


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 245/3000 ] loss = 1.02965, acc = 0.51964


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 245/3000 ] loss = 0.79308, acc = 0.77909
[ Valid | 245/3000 ] loss = 0.79308, acc = 0.77909


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 246/3000 ] loss = 1.05310, acc = 0.47227


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 246/3000 ] loss = 0.87215, acc = 0.75272
[ Valid | 246/3000 ] loss = 0.87215, acc = 0.75272


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 247/3000 ] loss = 1.06996, acc = 0.50936


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 247/3000 ] loss = 0.85107, acc = 0.76171
[ Valid | 247/3000 ] loss = 0.85107, acc = 0.76171


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 248/3000 ] loss = 1.02000, acc = 0.54994


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 248/3000 ] loss = 0.79833, acc = 0.77777
[ Valid | 248/3000 ] loss = 0.79833, acc = 0.77777


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 249/3000 ] loss = 0.99744, acc = 0.54872


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 249/3000 ] loss = 0.81723, acc = 0.76539
[ Valid | 249/3000 ] loss = 0.81723, acc = 0.76539


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 250/3000 ] loss = 1.06976, acc = 0.50178


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 250/3000 ] loss = 0.74927, acc = 0.78633
[ Valid | 250/3000 ] loss = 0.74927, acc = 0.78633


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 251/3000 ] loss = 0.96227, acc = 0.51600


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 251/3000 ] loss = 0.80260, acc = 0.78265
[ Valid | 251/3000 ] loss = 0.80260, acc = 0.78265


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 252/3000 ] loss = 1.00468, acc = 0.45670


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 252/3000 ] loss = 0.78541, acc = 0.78751
[ Valid | 252/3000 ] loss = 0.78541, acc = 0.78751


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 253/3000 ] loss = 1.09004, acc = 0.53532


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 253/3000 ] loss = 0.79386, acc = 0.76995
[ Valid | 253/3000 ] loss = 0.79386, acc = 0.76995


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 254/3000 ] loss = 1.06872, acc = 0.52606


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 254/3000 ] loss = 0.75852, acc = 0.79483
[ Valid | 254/3000 ] loss = 0.75852, acc = 0.79483 -> best
Best model found at epoch 253, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 255/3000 ] loss = 0.97352, acc = 0.51820


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 255/3000 ] loss = 0.88132, acc = 0.75528
[ Valid | 255/3000 ] loss = 0.88132, acc = 0.75528


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 256/3000 ] loss = 1.04077, acc = 0.48396


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 256/3000 ] loss = 0.74959, acc = 0.78920
[ Valid | 256/3000 ] loss = 0.74959, acc = 0.78920


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 257/3000 ] loss = 1.01741, acc = 0.52368


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 257/3000 ] loss = 0.90828, acc = 0.72920
[ Valid | 257/3000 ] loss = 0.90828, acc = 0.72920


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 258/3000 ] loss = 1.01296, acc = 0.52787


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 258/3000 ] loss = 0.83132, acc = 0.76088
[ Valid | 258/3000 ] loss = 0.83132, acc = 0.76088


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 259/3000 ] loss = 1.04010, acc = 0.54847


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 259/3000 ] loss = 0.75826, acc = 0.79184
[ Valid | 259/3000 ] loss = 0.75826, acc = 0.79184


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 260/3000 ] loss = 0.96945, acc = 0.51683


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 260/3000 ] loss = 0.80766, acc = 0.76605
[ Valid | 260/3000 ] loss = 0.80766, acc = 0.76605


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 261/3000 ] loss = 1.06259, acc = 0.51216


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 261/3000 ] loss = 0.78112, acc = 0.78897
[ Valid | 261/3000 ] loss = 0.78112, acc = 0.78897


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 262/3000 ] loss = 0.99078, acc = 0.52796


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 262/3000 ] loss = 0.75548, acc = 0.78702
[ Valid | 262/3000 ] loss = 0.75548, acc = 0.78702


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 263/3000 ] loss = 0.96548, acc = 0.50284


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 263/3000 ] loss = 0.91668, acc = 0.73040
[ Valid | 263/3000 ] loss = 0.91668, acc = 0.73040


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 264/3000 ] loss = 0.99902, acc = 0.47471


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 264/3000 ] loss = 0.78753, acc = 0.77915
[ Valid | 264/3000 ] loss = 0.78753, acc = 0.77915


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 265/3000 ] loss = 1.01883, acc = 0.54645


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 265/3000 ] loss = 0.84918, acc = 0.75852
[ Valid | 265/3000 ] loss = 0.84918, acc = 0.75852


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 266/3000 ] loss = 1.02683, acc = 0.48927


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 266/3000 ] loss = 0.82004, acc = 0.76814
[ Valid | 266/3000 ] loss = 0.82004, acc = 0.76814


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 267/3000 ] loss = 1.02678, acc = 0.49902


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 267/3000 ] loss = 0.76973, acc = 0.79233
[ Valid | 267/3000 ] loss = 0.76973, acc = 0.79233


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 268/3000 ] loss = 1.00797, acc = 0.53543


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 268/3000 ] loss = 0.78812, acc = 0.78208
[ Valid | 268/3000 ] loss = 0.78812, acc = 0.78208


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 269/3000 ] loss = 1.01112, acc = 0.52569


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 269/3000 ] loss = 0.78309, acc = 0.78708
[ Valid | 269/3000 ] loss = 0.78309, acc = 0.78708


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 270/3000 ] loss = 1.01795, acc = 0.47660


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 270/3000 ] loss = 0.78533, acc = 0.78716
[ Valid | 270/3000 ] loss = 0.78533, acc = 0.78716


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 271/3000 ] loss = 1.02704, acc = 0.56678


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 271/3000 ] loss = 0.76634, acc = 0.77791
[ Valid | 271/3000 ] loss = 0.76634, acc = 0.77791


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 272/3000 ] loss = 0.99811, acc = 0.52703


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 272/3000 ] loss = 0.80899, acc = 0.77507
[ Valid | 272/3000 ] loss = 0.80899, acc = 0.77507


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 273/3000 ] loss = 0.96468, acc = 0.53816


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 273/3000 ] loss = 0.78428, acc = 0.77665
[ Valid | 273/3000 ] loss = 0.78428, acc = 0.77665


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 274/3000 ] loss = 1.01982, acc = 0.49832


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 274/3000 ] loss = 0.83684, acc = 0.76932
[ Valid | 274/3000 ] loss = 0.83684, acc = 0.76932


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 275/3000 ] loss = 0.99074, acc = 0.48626


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 275/3000 ] loss = 0.84257, acc = 0.76700
[ Valid | 275/3000 ] loss = 0.84257, acc = 0.76700


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 276/3000 ] loss = 1.05562, acc = 0.48705


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 276/3000 ] loss = 0.88424, acc = 0.75304
[ Valid | 276/3000 ] loss = 0.88424, acc = 0.75304


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 277/3000 ] loss = 1.05460, acc = 0.52079


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 277/3000 ] loss = 0.74426, acc = 0.79107
[ Valid | 277/3000 ] loss = 0.74426, acc = 0.79107


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 278/3000 ] loss = 1.08083, acc = 0.49578


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 278/3000 ] loss = 0.80848, acc = 0.77495
[ Valid | 278/3000 ] loss = 0.80848, acc = 0.77495


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 279/3000 ] loss = 1.03115, acc = 0.52867


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 279/3000 ] loss = 0.84360, acc = 0.76605
[ Valid | 279/3000 ] loss = 0.84360, acc = 0.76605


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 280/3000 ] loss = 1.04551, acc = 0.49990


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 280/3000 ] loss = 0.84891, acc = 0.76458
[ Valid | 280/3000 ] loss = 0.84891, acc = 0.76458


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 281/3000 ] loss = 1.05685, acc = 0.54498


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 281/3000 ] loss = 0.82393, acc = 0.76122
[ Valid | 281/3000 ] loss = 0.82393, acc = 0.76122


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 282/3000 ] loss = 0.97475, acc = 0.50066


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 282/3000 ] loss = 0.76608, acc = 0.77817
[ Valid | 282/3000 ] loss = 0.76608, acc = 0.77817


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 283/3000 ] loss = 1.02217, acc = 0.54001


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 283/3000 ] loss = 0.83143, acc = 0.76777
[ Valid | 283/3000 ] loss = 0.83143, acc = 0.76777


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 284/3000 ] loss = 1.04137, acc = 0.50267


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 284/3000 ] loss = 0.87317, acc = 0.75284
[ Valid | 284/3000 ] loss = 0.87317, acc = 0.75284


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 285/3000 ] loss = 1.00954, acc = 0.52647


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 285/3000 ] loss = 0.78911, acc = 0.76932
[ Valid | 285/3000 ] loss = 0.78911, acc = 0.76932


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 286/3000 ] loss = 1.00519, acc = 0.53172


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 286/3000 ] loss = 0.75309, acc = 0.79670
[ Valid | 286/3000 ] loss = 0.75309, acc = 0.79670 -> best
Best model found at epoch 285, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 287/3000 ] loss = 1.01568, acc = 0.52300


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 287/3000 ] loss = 0.80550, acc = 0.77602
[ Valid | 287/3000 ] loss = 0.80550, acc = 0.77602


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 288/3000 ] loss = 0.97072, acc = 0.55348


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 288/3000 ] loss = 0.77010, acc = 0.78084
[ Valid | 288/3000 ] loss = 0.77010, acc = 0.78084


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 289/3000 ] loss = 1.01421, acc = 0.54396


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 289/3000 ] loss = 0.81475, acc = 0.76668
[ Valid | 289/3000 ] loss = 0.81475, acc = 0.76668


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 290/3000 ] loss = 0.98192, acc = 0.49989


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 290/3000 ] loss = 0.76788, acc = 0.78799
[ Valid | 290/3000 ] loss = 0.76788, acc = 0.78799


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 291/3000 ] loss = 1.04553, acc = 0.48656


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 291/3000 ] loss = 0.85304, acc = 0.75272
[ Valid | 291/3000 ] loss = 0.85304, acc = 0.75272


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 292/3000 ] loss = 1.00769, acc = 0.52720


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 292/3000 ] loss = 0.87309, acc = 0.75758
[ Valid | 292/3000 ] loss = 0.87309, acc = 0.75758


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 293/3000 ] loss = 0.99513, acc = 0.51415


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 293/3000 ] loss = 0.76411, acc = 0.78383
[ Valid | 293/3000 ] loss = 0.76411, acc = 0.78383


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 294/3000 ] loss = 0.97316, acc = 0.54491


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 294/3000 ] loss = 0.87744, acc = 0.74882
[ Valid | 294/3000 ] loss = 0.87744, acc = 0.74882


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 295/3000 ] loss = 0.99932, acc = 0.55392


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 295/3000 ] loss = 0.78616, acc = 0.78598
[ Valid | 295/3000 ] loss = 0.78616, acc = 0.78598


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 296/3000 ] loss = 1.00225, acc = 0.47605


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 296/3000 ] loss = 0.86055, acc = 0.76493
[ Valid | 296/3000 ] loss = 0.86055, acc = 0.76493


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 297/3000 ] loss = 1.03747, acc = 0.51089


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 297/3000 ] loss = 0.75316, acc = 0.79345
[ Valid | 297/3000 ] loss = 0.75316, acc = 0.79345


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 298/3000 ] loss = 1.00699, acc = 0.52818


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 298/3000 ] loss = 0.83168, acc = 0.76814
[ Valid | 298/3000 ] loss = 0.83168, acc = 0.76814


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 299/3000 ] loss = 1.01518, acc = 0.51807


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 299/3000 ] loss = 0.84933, acc = 0.75413
[ Valid | 299/3000 ] loss = 0.84933, acc = 0.75413


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 300/3000 ] loss = 1.02798, acc = 0.49482


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 300/3000 ] loss = 0.74048, acc = 0.79652
[ Valid | 300/3000 ] loss = 0.74048, acc = 0.79652


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 301/3000 ] loss = 1.07359, acc = 0.51602


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 301/3000 ] loss = 0.79442, acc = 0.78667
[ Valid | 301/3000 ] loss = 0.79442, acc = 0.78667


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 302/3000 ] loss = 1.00792, acc = 0.51462


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 302/3000 ] loss = 0.82901, acc = 0.76521
[ Valid | 302/3000 ] loss = 0.82901, acc = 0.76521


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 303/3000 ] loss = 0.97724, acc = 0.51511


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 303/3000 ] loss = 0.78736, acc = 0.77518
[ Valid | 303/3000 ] loss = 0.78736, acc = 0.77518


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 304/3000 ] loss = 0.98582, acc = 0.54431


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 304/3000 ] loss = 0.70665, acc = 0.80049
[ Valid | 304/3000 ] loss = 0.70665, acc = 0.80049 -> best
Best model found at epoch 303, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 305/3000 ] loss = 0.96516, acc = 0.53953


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 305/3000 ] loss = 0.77918, acc = 0.78389
[ Valid | 305/3000 ] loss = 0.77918, acc = 0.78389


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 306/3000 ] loss = 0.96333, acc = 0.53583


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 306/3000 ] loss = 0.80788, acc = 0.77874
[ Valid | 306/3000 ] loss = 0.80788, acc = 0.77874


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 307/3000 ] loss = 1.03902, acc = 0.55698


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 307/3000 ] loss = 0.77814, acc = 0.78570
[ Valid | 307/3000 ] loss = 0.77814, acc = 0.78570


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 308/3000 ] loss = 0.97957, acc = 0.50479


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 308/3000 ] loss = 0.79548, acc = 0.78061
[ Valid | 308/3000 ] loss = 0.79548, acc = 0.78061


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 309/3000 ] loss = 1.01582, acc = 0.51005


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 309/3000 ] loss = 0.90925, acc = 0.74508
[ Valid | 309/3000 ] loss = 0.90925, acc = 0.74508


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 310/3000 ] loss = 0.94422, acc = 0.49118


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 310/3000 ] loss = 0.77491, acc = 0.78549
[ Valid | 310/3000 ] loss = 0.77491, acc = 0.78549


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 311/3000 ] loss = 1.00054, acc = 0.54892


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 311/3000 ] loss = 0.79893, acc = 0.76898
[ Valid | 311/3000 ] loss = 0.79893, acc = 0.76898


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 312/3000 ] loss = 1.02181, acc = 0.48767


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 312/3000 ] loss = 0.90691, acc = 0.73276
[ Valid | 312/3000 ] loss = 0.90691, acc = 0.73276


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 313/3000 ] loss = 1.00646, acc = 0.52460


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 313/3000 ] loss = 0.76987, acc = 0.78352
[ Valid | 313/3000 ] loss = 0.76987, acc = 0.78352


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 314/3000 ] loss = 0.96480, acc = 0.51546


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 314/3000 ] loss = 0.73209, acc = 0.80089
[ Valid | 314/3000 ] loss = 0.73209, acc = 0.80089 -> best
Best model found at epoch 313, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 315/3000 ] loss = 0.96577, acc = 0.48749


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 315/3000 ] loss = 0.76413, acc = 0.78891
[ Valid | 315/3000 ] loss = 0.76413, acc = 0.78891


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 316/3000 ] loss = 1.01232, acc = 0.49378


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 316/3000 ] loss = 0.76343, acc = 0.78328
[ Valid | 316/3000 ] loss = 0.76343, acc = 0.78328


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 317/3000 ] loss = 0.99308, acc = 0.50819


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 317/3000 ] loss = 0.76323, acc = 0.78549
[ Valid | 317/3000 ] loss = 0.76323, acc = 0.78549


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 318/3000 ] loss = 0.99994, acc = 0.57039


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 318/3000 ] loss = 0.84218, acc = 0.76005
[ Valid | 318/3000 ] loss = 0.84218, acc = 0.76005


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 319/3000 ] loss = 1.00271, acc = 0.54895


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 319/3000 ] loss = 0.73784, acc = 0.79454
[ Valid | 319/3000 ] loss = 0.73784, acc = 0.79454


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 320/3000 ] loss = 1.01583, acc = 0.52736


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 320/3000 ] loss = 0.82124, acc = 0.77053
[ Valid | 320/3000 ] loss = 0.82124, acc = 0.77053


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 321/3000 ] loss = 0.94572, acc = 0.56815


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 321/3000 ] loss = 0.83877, acc = 0.77398
[ Valid | 321/3000 ] loss = 0.83877, acc = 0.77398


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 322/3000 ] loss = 0.97435, acc = 0.52286


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 322/3000 ] loss = 0.80534, acc = 0.77533
[ Valid | 322/3000 ] loss = 0.80534, acc = 0.77533


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 323/3000 ] loss = 0.96661, acc = 0.55879


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 323/3000 ] loss = 0.77423, acc = 0.78877
[ Valid | 323/3000 ] loss = 0.77423, acc = 0.78877


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 324/3000 ] loss = 0.95230, acc = 0.54929


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 324/3000 ] loss = 0.86164, acc = 0.74735
[ Valid | 324/3000 ] loss = 0.86164, acc = 0.74735


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 325/3000 ] loss = 1.00188, acc = 0.52788


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 325/3000 ] loss = 0.80325, acc = 0.77998
[ Valid | 325/3000 ] loss = 0.80325, acc = 0.77998


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 326/3000 ] loss = 0.97905, acc = 0.54350


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 326/3000 ] loss = 0.87176, acc = 0.75223
[ Valid | 326/3000 ] loss = 0.87176, acc = 0.75223


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 327/3000 ] loss = 0.97437, acc = 0.51602


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 327/3000 ] loss = 0.76662, acc = 0.78745
[ Valid | 327/3000 ] loss = 0.76662, acc = 0.78745


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 328/3000 ] loss = 0.97233, acc = 0.54601


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 328/3000 ] loss = 0.71717, acc = 0.80494
[ Valid | 328/3000 ] loss = 0.71717, acc = 0.80494 -> best
Best model found at epoch 327, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 329/3000 ] loss = 0.97018, acc = 0.47536


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 329/3000 ] loss = 0.79784, acc = 0.77148
[ Valid | 329/3000 ] loss = 0.79784, acc = 0.77148


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 330/3000 ] loss = 1.00474, acc = 0.55407


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 330/3000 ] loss = 0.77963, acc = 0.78501
[ Valid | 330/3000 ] loss = 0.77963, acc = 0.78501


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 331/3000 ] loss = 0.95553, acc = 0.53216


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 331/3000 ] loss = 0.79000, acc = 0.78647
[ Valid | 331/3000 ] loss = 0.79000, acc = 0.78647


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 332/3000 ] loss = 0.97653, acc = 0.52739


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 332/3000 ] loss = 0.76667, acc = 0.78486
[ Valid | 332/3000 ] loss = 0.76667, acc = 0.78486


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 333/3000 ] loss = 1.01149, acc = 0.50727


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 333/3000 ] loss = 0.86279, acc = 0.75927
[ Valid | 333/3000 ] loss = 0.86279, acc = 0.75927


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 334/3000 ] loss = 1.00846, acc = 0.50294


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 334/3000 ] loss = 0.78377, acc = 0.79196
[ Valid | 334/3000 ] loss = 0.78377, acc = 0.79196


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 335/3000 ] loss = 0.97086, acc = 0.54726


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 335/3000 ] loss = 0.73584, acc = 0.79204
[ Valid | 335/3000 ] loss = 0.73584, acc = 0.79204


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 336/3000 ] loss = 0.99490, acc = 0.51857


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 336/3000 ] loss = 0.72891, acc = 0.80083
[ Valid | 336/3000 ] loss = 0.72891, acc = 0.80083


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 337/3000 ] loss = 0.97119, acc = 0.49167


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 337/3000 ] loss = 0.78540, acc = 0.77754
[ Valid | 337/3000 ] loss = 0.78540, acc = 0.77754


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 338/3000 ] loss = 0.98668, acc = 0.49407


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 338/3000 ] loss = 0.76431, acc = 0.78785
[ Valid | 338/3000 ] loss = 0.76431, acc = 0.78785


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 339/3000 ] loss = 1.01945, acc = 0.56422


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 339/3000 ] loss = 0.80144, acc = 0.77797
[ Valid | 339/3000 ] loss = 0.80144, acc = 0.77797


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 340/3000 ] loss = 0.99541, acc = 0.52943


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 340/3000 ] loss = 0.77420, acc = 0.78423
[ Valid | 340/3000 ] loss = 0.77420, acc = 0.78423


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 341/3000 ] loss = 0.97822, acc = 0.56616


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 341/3000 ] loss = 0.75223, acc = 0.79965
[ Valid | 341/3000 ] loss = 0.75223, acc = 0.79965


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 342/3000 ] loss = 0.99014, acc = 0.54747


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 342/3000 ] loss = 0.75627, acc = 0.79178
[ Valid | 342/3000 ] loss = 0.75627, acc = 0.79178


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 343/3000 ] loss = 0.96863, acc = 0.54569


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 343/3000 ] loss = 0.80718, acc = 0.77303
[ Valid | 343/3000 ] loss = 0.80718, acc = 0.77303


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 344/3000 ] loss = 0.96506, acc = 0.51257


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 344/3000 ] loss = 0.79534, acc = 0.78188
[ Valid | 344/3000 ] loss = 0.79534, acc = 0.78188


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 345/3000 ] loss = 0.98638, acc = 0.52772


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 345/3000 ] loss = 0.74857, acc = 0.78894
[ Valid | 345/3000 ] loss = 0.74857, acc = 0.78894


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 346/3000 ] loss = 0.99381, acc = 0.54952


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 346/3000 ] loss = 0.74932, acc = 0.80307
[ Valid | 346/3000 ] loss = 0.74932, acc = 0.80307


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 347/3000 ] loss = 0.94313, acc = 0.55053


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 347/3000 ] loss = 0.75489, acc = 0.79038
[ Valid | 347/3000 ] loss = 0.75489, acc = 0.79038


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 348/3000 ] loss = 0.98592, acc = 0.54290


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 348/3000 ] loss = 0.74433, acc = 0.79506
[ Valid | 348/3000 ] loss = 0.74433, acc = 0.79506


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 349/3000 ] loss = 0.93724, acc = 0.50737


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 349/3000 ] loss = 0.73692, acc = 0.80201
[ Valid | 349/3000 ] loss = 0.73692, acc = 0.80201


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 350/3000 ] loss = 0.97219, acc = 0.49169


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 350/3000 ] loss = 0.74421, acc = 0.79595
[ Valid | 350/3000 ] loss = 0.74421, acc = 0.79595


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 351/3000 ] loss = 1.03593, acc = 0.51661


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 351/3000 ] loss = 0.82993, acc = 0.77607
[ Valid | 351/3000 ] loss = 0.82993, acc = 0.77607


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 352/3000 ] loss = 0.97190, acc = 0.53389


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 352/3000 ] loss = 0.75502, acc = 0.79331
[ Valid | 352/3000 ] loss = 0.75502, acc = 0.79331


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 353/3000 ] loss = 0.98904, acc = 0.51048


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 353/3000 ] loss = 0.80413, acc = 0.78486
[ Valid | 353/3000 ] loss = 0.80413, acc = 0.78486


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 354/3000 ] loss = 0.97197, acc = 0.50423


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 354/3000 ] loss = 0.73286, acc = 0.79518
[ Valid | 354/3000 ] loss = 0.73286, acc = 0.79518


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 355/3000 ] loss = 0.96441, acc = 0.50289


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 355/3000 ] loss = 0.79388, acc = 0.77679
[ Valid | 355/3000 ] loss = 0.79388, acc = 0.77679


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 356/3000 ] loss = 0.93549, acc = 0.51320


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 356/3000 ] loss = 0.75900, acc = 0.78730
[ Valid | 356/3000 ] loss = 0.75900, acc = 0.78730


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 357/3000 ] loss = 0.92969, acc = 0.56070


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 357/3000 ] loss = 0.72993, acc = 0.78940
[ Valid | 357/3000 ] loss = 0.72993, acc = 0.78940


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 358/3000 ] loss = 1.00911, acc = 0.47552


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 358/3000 ] loss = 0.81173, acc = 0.77398
[ Valid | 358/3000 ] loss = 0.81173, acc = 0.77398


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 359/3000 ] loss = 0.96432, acc = 0.53180


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 359/3000 ] loss = 0.82958, acc = 0.76605
[ Valid | 359/3000 ] loss = 0.82958, acc = 0.76605


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 360/3000 ] loss = 0.95430, acc = 0.53608


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 360/3000 ] loss = 0.74478, acc = 0.79170
[ Valid | 360/3000 ] loss = 0.74478, acc = 0.79170


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 361/3000 ] loss = 0.90439, acc = 0.54722


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 361/3000 ] loss = 0.77853, acc = 0.78291
[ Valid | 361/3000 ] loss = 0.77853, acc = 0.78291


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 362/3000 ] loss = 0.92167, acc = 0.47278


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 362/3000 ] loss = 0.70823, acc = 0.80732
[ Valid | 362/3000 ] loss = 0.70823, acc = 0.80732 -> best
Best model found at epoch 361, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 363/3000 ] loss = 0.97146, acc = 0.52646


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 363/3000 ] loss = 0.73972, acc = 0.79770
[ Valid | 363/3000 ] loss = 0.73972, acc = 0.79770


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 364/3000 ] loss = 0.96066, acc = 0.54288


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 364/3000 ] loss = 0.84025, acc = 0.76760
[ Valid | 364/3000 ] loss = 0.84025, acc = 0.76760


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 365/3000 ] loss = 1.00856, acc = 0.50807


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 365/3000 ] loss = 0.80397, acc = 0.78190
[ Valid | 365/3000 ] loss = 0.80397, acc = 0.78190


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 366/3000 ] loss = 0.97237, acc = 0.54049


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 366/3000 ] loss = 0.76550, acc = 0.78914
[ Valid | 366/3000 ] loss = 0.76550, acc = 0.78914


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 367/3000 ] loss = 0.93202, acc = 0.56088


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 367/3000 ] loss = 0.77922, acc = 0.78863
[ Valid | 367/3000 ] loss = 0.77922, acc = 0.78863


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 368/3000 ] loss = 0.96414, acc = 0.51465


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 368/3000 ] loss = 0.78775, acc = 0.78503
[ Valid | 368/3000 ] loss = 0.78775, acc = 0.78503


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 369/3000 ] loss = 0.93554, acc = 0.52746


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 369/3000 ] loss = 0.75542, acc = 0.78429
[ Valid | 369/3000 ] loss = 0.75542, acc = 0.78429


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 370/3000 ] loss = 0.94111, acc = 0.47727


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 370/3000 ] loss = 0.74984, acc = 0.78880
[ Valid | 370/3000 ] loss = 0.74984, acc = 0.78880


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 371/3000 ] loss = 0.97417, acc = 0.55653


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 371/3000 ] loss = 0.75065, acc = 0.79825
[ Valid | 371/3000 ] loss = 0.75065, acc = 0.79825


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 372/3000 ] loss = 0.96905, acc = 0.52683


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 372/3000 ] loss = 0.75713, acc = 0.78604
[ Valid | 372/3000 ] loss = 0.75713, acc = 0.78604


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 373/3000 ] loss = 0.96467, acc = 0.56631


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 373/3000 ] loss = 0.77486, acc = 0.78199
[ Valid | 373/3000 ] loss = 0.77486, acc = 0.78199


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 374/3000 ] loss = 0.94367, acc = 0.58421


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 374/3000 ] loss = 0.79772, acc = 0.77421
[ Valid | 374/3000 ] loss = 0.79772, acc = 0.77421


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 375/3000 ] loss = 0.93563, acc = 0.50680


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 375/3000 ] loss = 0.72497, acc = 0.79833
[ Valid | 375/3000 ] loss = 0.72497, acc = 0.79833


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 376/3000 ] loss = 0.95852, acc = 0.54687


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 376/3000 ] loss = 0.82903, acc = 0.78242
[ Valid | 376/3000 ] loss = 0.82903, acc = 0.78242


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 377/3000 ] loss = 0.92392, acc = 0.51621


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 377/3000 ] loss = 0.74721, acc = 0.79061
[ Valid | 377/3000 ] loss = 0.74721, acc = 0.79061


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 378/3000 ] loss = 0.95543, acc = 0.55055


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 378/3000 ] loss = 0.75791, acc = 0.79380
[ Valid | 378/3000 ] loss = 0.75791, acc = 0.79380


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 379/3000 ] loss = 0.91615, acc = 0.47675


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 379/3000 ] loss = 0.79770, acc = 0.77240
[ Valid | 379/3000 ] loss = 0.79770, acc = 0.77240


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 380/3000 ] loss = 0.97314, acc = 0.54313


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 380/3000 ] loss = 0.74117, acc = 0.80072
[ Valid | 380/3000 ] loss = 0.74117, acc = 0.80072


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 381/3000 ] loss = 0.92657, acc = 0.53259


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 381/3000 ] loss = 0.71964, acc = 0.79839
[ Valid | 381/3000 ] loss = 0.71964, acc = 0.79839


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 382/3000 ] loss = 0.93804, acc = 0.53070


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 382/3000 ] loss = 0.76176, acc = 0.78627
[ Valid | 382/3000 ] loss = 0.76176, acc = 0.78627


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 383/3000 ] loss = 0.97041, acc = 0.57746


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 383/3000 ] loss = 0.77072, acc = 0.77958
[ Valid | 383/3000 ] loss = 0.77072, acc = 0.77958


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 384/3000 ] loss = 0.96819, acc = 0.55610


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 384/3000 ] loss = 0.73133, acc = 0.79882
[ Valid | 384/3000 ] loss = 0.73133, acc = 0.79882


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 385/3000 ] loss = 0.98694, acc = 0.58064


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 385/3000 ] loss = 0.80292, acc = 0.78745
[ Valid | 385/3000 ] loss = 0.80292, acc = 0.78745


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 386/3000 ] loss = 0.98631, acc = 0.47369


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 386/3000 ] loss = 0.76719, acc = 0.79127
[ Valid | 386/3000 ] loss = 0.76719, acc = 0.79127


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 387/3000 ] loss = 0.90313, acc = 0.49365


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 387/3000 ] loss = 0.76453, acc = 0.78495
[ Valid | 387/3000 ] loss = 0.76453, acc = 0.78495


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 388/3000 ] loss = 0.96728, acc = 0.50037


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 388/3000 ] loss = 0.73802, acc = 0.78920
[ Valid | 388/3000 ] loss = 0.73802, acc = 0.78920


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 389/3000 ] loss = 0.97750, acc = 0.53616


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 389/3000 ] loss = 0.77899, acc = 0.78719
[ Valid | 389/3000 ] loss = 0.77899, acc = 0.78719


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 390/3000 ] loss = 0.92349, acc = 0.49227


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 390/3000 ] loss = 0.78368, acc = 0.78196
[ Valid | 390/3000 ] loss = 0.78368, acc = 0.78196


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 391/3000 ] loss = 0.99847, acc = 0.49295


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 391/3000 ] loss = 0.70175, acc = 0.80801
[ Valid | 391/3000 ] loss = 0.70175, acc = 0.80801 -> best
Best model found at epoch 390, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 392/3000 ] loss = 0.96366, acc = 0.51087


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 392/3000 ] loss = 0.78083, acc = 0.78472
[ Valid | 392/3000 ] loss = 0.78083, acc = 0.78472


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 393/3000 ] loss = 0.98205, acc = 0.52052


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 393/3000 ] loss = 0.75262, acc = 0.79518
[ Valid | 393/3000 ] loss = 0.75262, acc = 0.79518


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 394/3000 ] loss = 0.96543, acc = 0.56480


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 394/3000 ] loss = 0.74264, acc = 0.79684
[ Valid | 394/3000 ] loss = 0.74264, acc = 0.79684


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 395/3000 ] loss = 0.97178, acc = 0.53395


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 395/3000 ] loss = 0.71916, acc = 0.80301
[ Valid | 395/3000 ] loss = 0.71916, acc = 0.80301


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 396/3000 ] loss = 0.92333, acc = 0.53935


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 396/3000 ] loss = 0.87077, acc = 0.75927
[ Valid | 396/3000 ] loss = 0.87077, acc = 0.75927


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 397/3000 ] loss = 0.90818, acc = 0.52317


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 397/3000 ] loss = 0.79250, acc = 0.78348
[ Valid | 397/3000 ] loss = 0.79250, acc = 0.78348


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 398/3000 ] loss = 0.97750, acc = 0.51534


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 398/3000 ] loss = 0.75818, acc = 0.78682
[ Valid | 398/3000 ] loss = 0.75818, acc = 0.78682


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 399/3000 ] loss = 1.01792, acc = 0.54759


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 399/3000 ] loss = 0.82168, acc = 0.77214
[ Valid | 399/3000 ] loss = 0.82168, acc = 0.77214


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 400/3000 ] loss = 1.02139, acc = 0.53119


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 400/3000 ] loss = 0.78134, acc = 0.78570
[ Valid | 400/3000 ] loss = 0.78134, acc = 0.78570


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 401/3000 ] loss = 0.96699, acc = 0.49498


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 401/3000 ] loss = 0.76347, acc = 0.78954
[ Valid | 401/3000 ] loss = 0.76347, acc = 0.78954


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 402/3000 ] loss = 0.93670, acc = 0.50418


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 402/3000 ] loss = 0.72691, acc = 0.79506
[ Valid | 402/3000 ] loss = 0.72691, acc = 0.79506


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 403/3000 ] loss = 0.96740, acc = 0.56262


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 403/3000 ] loss = 0.79505, acc = 0.77917
[ Valid | 403/3000 ] loss = 0.79505, acc = 0.77917


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 404/3000 ] loss = 0.92845, acc = 0.55622


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 404/3000 ] loss = 0.78608, acc = 0.79038
[ Valid | 404/3000 ] loss = 0.78608, acc = 0.79038


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 405/3000 ] loss = 0.94972, acc = 0.50554


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 405/3000 ] loss = 0.89737, acc = 0.75005
[ Valid | 405/3000 ] loss = 0.89737, acc = 0.75005


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 406/3000 ] loss = 0.95061, acc = 0.52573


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 406/3000 ] loss = 0.73771, acc = 0.79199
[ Valid | 406/3000 ] loss = 0.73771, acc = 0.79199


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 407/3000 ] loss = 0.94590, acc = 0.51082


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 407/3000 ] loss = 0.77096, acc = 0.78466
[ Valid | 407/3000 ] loss = 0.77096, acc = 0.78466


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 408/3000 ] loss = 0.89842, acc = 0.50537


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 408/3000 ] loss = 0.74815, acc = 0.79477
[ Valid | 408/3000 ] loss = 0.74815, acc = 0.79477


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 409/3000 ] loss = 0.94436, acc = 0.54536


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 409/3000 ] loss = 0.75261, acc = 0.78751
[ Valid | 409/3000 ] loss = 0.75261, acc = 0.78751


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 410/3000 ] loss = 0.98140, acc = 0.48000


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 410/3000 ] loss = 0.86974, acc = 0.75327
[ Valid | 410/3000 ] loss = 0.86974, acc = 0.75327


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 411/3000 ] loss = 0.93656, acc = 0.56184


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 411/3000 ] loss = 0.78098, acc = 0.77921
[ Valid | 411/3000 ] loss = 0.78098, acc = 0.77921


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 412/3000 ] loss = 0.91827, acc = 0.57295


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 412/3000 ] loss = 0.74312, acc = 0.78952
[ Valid | 412/3000 ] loss = 0.74312, acc = 0.78952


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 413/3000 ] loss = 0.98616, acc = 0.52454


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 413/3000 ] loss = 0.76336, acc = 0.79262
[ Valid | 413/3000 ] loss = 0.76336, acc = 0.79262


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 414/3000 ] loss = 0.92025, acc = 0.52173


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 414/3000 ] loss = 0.73512, acc = 0.80411
[ Valid | 414/3000 ] loss = 0.73512, acc = 0.80411


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 415/3000 ] loss = 0.93564, acc = 0.55597


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 415/3000 ] loss = 0.80423, acc = 0.77288
[ Valid | 415/3000 ] loss = 0.80423, acc = 0.77288


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 416/3000 ] loss = 0.91399, acc = 0.55006


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 416/3000 ] loss = 0.74633, acc = 0.79135
[ Valid | 416/3000 ] loss = 0.74633, acc = 0.79135


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 417/3000 ] loss = 0.99727, acc = 0.52058


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 417/3000 ] loss = 0.75355, acc = 0.79581
[ Valid | 417/3000 ] loss = 0.75355, acc = 0.79581


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 418/3000 ] loss = 0.98663, acc = 0.50849


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 418/3000 ] loss = 0.75609, acc = 0.79156
[ Valid | 418/3000 ] loss = 0.75609, acc = 0.79156


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 419/3000 ] loss = 0.94735, acc = 0.54991


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 419/3000 ] loss = 0.78012, acc = 0.78702
[ Valid | 419/3000 ] loss = 0.78012, acc = 0.78702


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 420/3000 ] loss = 0.99522, acc = 0.53181


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 420/3000 ] loss = 0.83361, acc = 0.76674
[ Valid | 420/3000 ] loss = 0.83361, acc = 0.76674


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 421/3000 ] loss = 0.91833, acc = 0.51704


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 421/3000 ] loss = 0.73539, acc = 0.79443
[ Valid | 421/3000 ] loss = 0.73539, acc = 0.79443


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 422/3000 ] loss = 0.92650, acc = 0.53641


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 422/3000 ] loss = 0.74257, acc = 0.79805
[ Valid | 422/3000 ] loss = 0.74257, acc = 0.79805


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 423/3000 ] loss = 0.93867, acc = 0.50818


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 423/3000 ] loss = 0.81196, acc = 0.77809
[ Valid | 423/3000 ] loss = 0.81196, acc = 0.77809


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 424/3000 ] loss = 0.94884, acc = 0.52780


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 424/3000 ] loss = 0.74744, acc = 0.79414
[ Valid | 424/3000 ] loss = 0.74744, acc = 0.79414


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 425/3000 ] loss = 0.92405, acc = 0.55529


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 425/3000 ] loss = 0.76445, acc = 0.78271
[ Valid | 425/3000 ] loss = 0.76445, acc = 0.78271


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 426/3000 ] loss = 0.88307, acc = 0.51250


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 426/3000 ] loss = 0.71691, acc = 0.80069
[ Valid | 426/3000 ] loss = 0.71691, acc = 0.80069


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 427/3000 ] loss = 0.91784, acc = 0.55896


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 427/3000 ] loss = 0.68425, acc = 0.80551
[ Valid | 427/3000 ] loss = 0.68425, acc = 0.80551


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 428/3000 ] loss = 0.93066, acc = 0.51827


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 428/3000 ] loss = 0.75184, acc = 0.79066
[ Valid | 428/3000 ] loss = 0.75184, acc = 0.79066


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 429/3000 ] loss = 0.98131, acc = 0.55365


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 429/3000 ] loss = 0.73121, acc = 0.80865
[ Valid | 429/3000 ] loss = 0.73121, acc = 0.80865 -> best
Best model found at epoch 428, saving model


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 430/3000 ] loss = 0.90854, acc = 0.55127


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 430/3000 ] loss = 0.73012, acc = 0.79882
[ Valid | 430/3000 ] loss = 0.73012, acc = 0.79882


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 431/3000 ] loss = 0.93147, acc = 0.53787


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 431/3000 ] loss = 0.77140, acc = 0.78501
[ Valid | 431/3000 ] loss = 0.77140, acc = 0.78501


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 432/3000 ] loss = 0.97758, acc = 0.51737


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 432/3000 ] loss = 0.75231, acc = 0.79555
[ Valid | 432/3000 ] loss = 0.75231, acc = 0.79555


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 433/3000 ] loss = 0.96711, acc = 0.52461


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 433/3000 ] loss = 0.80426, acc = 0.77495
[ Valid | 433/3000 ] loss = 0.80426, acc = 0.77495


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 434/3000 ] loss = 0.91134, acc = 0.53590


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 434/3000 ] loss = 0.74803, acc = 0.78934
[ Valid | 434/3000 ] loss = 0.74803, acc = 0.78934


  0%|          | 0/177 [00:00<?, ?it/s]

[ Train | 435/3000 ] loss = 0.96913, acc = 0.48346


  0%|          | 0/16 [00:00<?, ?it/s]

[ Valid | 435/3000 ] loss = 0.74124, acc = 0.79575
[ Valid | 435/3000 ] loss = 0.74124, acc = 0.79575


  0%|          | 0/177 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Test

In [15]:
test_image_paths = split_set(os.path.join(_dataset_dir,"test"), ratio = 1, shuffle = False)
test_set = FoodDataset(test_image_paths,tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

dataset with 3347 images


In [35]:
test_set1 = FoodDataset(test_image_paths,tfm=train_tfm)
test_set2 = FoodDataset(test_image_paths,tfm=train_tfm)
test_set3 = FoodDataset(test_image_paths,tfm=train_tfm)
test_set4 = FoodDataset(test_image_paths,tfm=train_tfm)
test_set5 = FoodDataset(test_image_paths,tfm=train_tfm)
test_set_all = ConcatDataset([test_set1,test_set2,test_set3,test_set4,test_set5])
test_loader_all = DataLoader(test_set_all, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

dataset with 3347 images
dataset with 3347 images
dataset with 3347 images
dataset with 3347 images
dataset with 3347 images


# Testing and generate prediction CSV

In [36]:
# model_best = Classifier().to(device)
se_resnet50.load_state_dict(torch.load(f"se_resnet50_best.ckpt"))

se_resnet50.eval()
prediction = []

with torch.no_grad():
    for data,_ in tqdm(test_loader_all):
        test_pred = se_resnet50(data.to(device))
        try:
            test_pred_all = np.vstack([test_pred_all,test_pred.cpu().data.numpy()])
        except:
            test_pred_all = test_pred.cpu().data.numpy()
        
test_pred_all = test_pred_all.reshape(5,3347,11)
test_pred_all = np.mean(test_pred_all,axis=0) 

with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = se_resnet50(data.to(device))
        try:
            test_pred_result = np.vstack([test_pred_result,test_pred.cpu().data.numpy()])
        except:
            test_pred_result = test_pred.cpu().data.numpy()
            
test_pred_final = (test_pred_result+test_pred_all)/2
test_label = np.argmax(test_pred_final, axis=1)
prediction = test_label.squeeze().tolist()

  0%|          | 0/131 [00:00<?, ?it/s]

  0%|          | 0/27 [00:00<?, ?it/s]

In [18]:
se_resnet50.load_state_dict(torch.load(f"se_resnet50_best.ckpt"))
se_resnet50.eval()
prediction = []
with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = se_resnet50(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

In [37]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("se_resnet_mixup.csv",index = False)

# Q1. Augmentation Implementation
## Implement augmentation by finishing train_tfm in the code with image size of your choice. 
## Directly copy the following block and paste it on GradeScope after you finish the code
### Your train_tfm must be capable of producing 5+ different results when given an identical image multiple times.
### Your  train_tfm in the report can be different from train_tfm in your training code.


In [None]:
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You need to add some transforms here.
    transforms.ToTensor(),
])

# Q2. Residual Implementation
![](https://i.imgur.com/GYsq1Ap.png)
## Directly copy the following block and paste it on GradeScope after you finish the code


In [None]:
from torch import nn
class Residual_Network(nn.Module):
    def __init__(self):
        super(Residual_Network, self).__init__()
        
        self.cnn_layer1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer2 = nn.Sequential(
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 2, 1),
            nn.BatchNorm2d(128),
        )

        self.cnn_layer4 = nn.Sequential(
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
        )
        self.cnn_layer5 = nn.Sequential(
            nn.Conv2d(128, 256, 3, 2, 1),
            nn.BatchNorm2d(256),
        )
        self.cnn_layer6 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(256* 32* 32, 256),
            nn.ReLU(),
            nn.Linear(256, 11)
        )
        self.relu = nn.ReLU()

    def forward(self, x):
        # input (x): [batch_size, 3, 128, 128]
        # output: [batch_size, 11]

        # Extract features by convolutional layers.
        x1 = self.cnn_layer1(x)
        
        x1 = self.relu(x1)
        
        x2 = self.cnn_layer2(x1)
        
        x2 = self.relu(x2)
        
        x3 = self.cnn_layer3(x2)
        
        x3 = self.relu(x3)
        
        x4 = self.cnn_layer4(x3)
        
        x4 = self.relu(x4)
        
        x5 = self.cnn_layer5(x4)
        
        x5 = self.relu(x5)
        
        x6 = self.cnn_layer6(x5)
        
        x6 = self.relu(x6)
        
        # The extracted feature map must be flatten before going to fully-connected layers.
        xout = x6.flatten(1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        xout = self.fc_layer(xout)
        return xout

In [71]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"dataset with {path} images",len(self.files))
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("\\")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label
