In [None]:
## HW3 Image Classification
#### Solve image classification with convolutional neural networks(CNN).
#### If you have any questions, please contact the TAs via TA hours, NTU COOL, or email to mlta-2023-spring@googlegroups.com

In [2]:
# check GPU type.
!nvidia-smi

Thu Mar 23 09:05:05 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.82.01    Driver Version: 470.82.01    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Import Packages

In [3]:
_exp_name = "sample"

In [4]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset, Sampler
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [5]:
myseed = 42  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.backends.cudnn.benchmark = False

In [7]:
class DataLoaderX(DataLoader):
    def __iter__(self):
        return BackgroundGenerator(super().__iter__())

### Transforms

In [8]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    # Resize the image into a fixed shape (height = width = 128)
    transforms.RandomHorizontalFlip(),
    transforms.AutoAugment(),
    
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

### Semi-supervised

In [9]:
class PseudoDataser(Dataset):
    def __init__(self, dataset, indices, labels=[]):
        self.dataset = dataset
        self.indices = indices
        self.targets = labels
    def __getitem__(self, index):
        subset = self.dataset[self.indeices[index]]
        imgs, _ = subset
        if len(self.targets) > 0:
            return imgs, self.targets[index]
        else:
            return subset
    def __len__(self):
        return len(self.indeices)

In [10]:
def get_pseudo_labels(dataset, model, threshole=0.65, is_test=False):
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    data_loader = DataLoderX(dataset, batch_size=batch_size, shuffle=False)
    
    model.eval() 
    softmax = nn.Softmax(dim=1)
    pseudo_probs = []
    pseudo_labels = []
    
    for batch in data_loader:
        img, _ = batch
        
        with torch.no_grad():
            outputs = model(img.to(device))
            
        probs = softmax(outputs)
        probs_max, probs = probs.max(1)
        
        pseudo_probs.extend(probs_max.cpu().numpy().tolist())
        pseudo_labels.extend(probs.cpu().numpy().tolist())
        
        if is_test:
            break
            
    pesudo_indices = [i for i, v in enumerate(pseudo_probs) if v >= threshold]
    pesudo_set = PseudoDataser(dataset, pesudo_indices, [pseudo_labels[i] for i in pesudo_indices])
    
    print("pesudo images above confidence %.2f: %d" %(threshold, len(pesudo_indices)))
    model.train()
    return pesudo_set
    
        
        

### Datasets

In [11]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files 
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
            
        return im,label

In [12]:
class propotionalsampler(Sampler):
    def __init__(self, concat_dataset, batch_size, minor_ratio=0.3, replacement=True, generator=None):   
        super(propotionalsampler).__init__()
        self.datasets = concat_dataset
        self.minor_ratio = minor_ratio
        self.sizes = concat_dataset.cumalative_sizes
        self._num_samples = num_samples
        self.generator = generator
        self.replacement = replacement
        self.batch_size = batch_size
    
    def __iter__(self):
        n1, n = self.sizes
        if self.generator is None:
            generator = torch.Generator()
            generator.manual_seed(int(torch.empty((), dtype=torch.int64).random().item()))
        else:
            generator = self.generator
            
        if self.replacement:
            size_n1 = int(self.batch_size*(1-self.minor_ratio))
            size_n2 = self.batch_size - size_n1
            
            for _ in range(int(np.ceil(m/self.batch.size))):
                idx_n1 = torch.randint(high=n1, size=(size_n1,), dtype=torch.int64, generator=generator).tolist()
                idx_n2 = torch.randint(low=n1, high=n, size=(size_n2,), dtype=torch.int64, generator=generator).tolist()
                idx_n1.extend(idx_n2)
                yield from idx_n1
        else:
            yield from torch.randperm(n, generator=generator).tolist()
    
    def __len__(self):
        return self.sizes[-1]
            
            

### Model

In [13]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

### Configurations

In [14]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

do_semi = True

threshold = 0.85

semi_turns = 10

# Initialize a model, and put it on the device specified.
model = Classifier().to(device)

# The number of batch size.
batch_size = 64

# The number of training epochs.
n_epochs = 200

# If no improvement in 'patience' epochs, early stop.
patience = 300

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=15, eta_min=1e-9) 

### Dataloader

In [15]:
# Construct train and valid datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset("/kaggle/input/ml2023spring-hw3/train", tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset("/kaggle/input/ml2023spring-hw3/valid", tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

### Start Training

In [16]:
# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):
    
    if do_semi and best_acc > 0.75 and epoch % semi_turns ==0:
        pseudo_set = get_pesudo_labels(unlabled_set, mode, threshold=threshold)
        concat_dataset = ConcatDataset([train_set, pseudo_set])
        sampler = propotionalsampler(concat_dataset, batch_size=batch_size, minor_ratio=0.9)
        train_loader = DataLoaderX(concat_dataset, batch_size=batch_size, sampler=sampler, num_workers=0)

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()
    

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 001/200 ] loss = 2.15911, acc = 0.24045


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 001/200 ] loss = 1.97506, acc = 0.28345
[ Valid | 001/200 ] loss = 1.97506, acc = 0.28345 -> best
Best model found at epoch 0, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 002/200 ] loss = 1.94461, acc = 0.31947


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 002/200 ] loss = 1.74407, acc = 0.38732
[ Valid | 002/200 ] loss = 1.74407, acc = 0.38732 -> best
Best model found at epoch 1, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 003/200 ] loss = 1.82532, acc = 0.35858


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 003/200 ] loss = 1.69784, acc = 0.39788
[ Valid | 003/200 ] loss = 1.69784, acc = 0.39788 -> best
Best model found at epoch 2, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 004/200 ] loss = 1.70729, acc = 0.40525


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 004/200 ] loss = 1.69285, acc = 0.43447
[ Valid | 004/200 ] loss = 1.69285, acc = 0.43447 -> best
Best model found at epoch 3, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 005/200 ] loss = 1.61882, acc = 0.44258


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 005/200 ] loss = 1.42450, acc = 0.51822
[ Valid | 005/200 ] loss = 1.42450, acc = 0.51822 -> best
Best model found at epoch 4, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 006/200 ] loss = 1.54429, acc = 0.46636


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 006/200 ] loss = 1.50542, acc = 0.48146
[ Valid | 006/200 ] loss = 1.50542, acc = 0.48146


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 007/200 ] loss = 1.45570, acc = 0.49463


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 007/200 ] loss = 1.36901, acc = 0.53903
[ Valid | 007/200 ] loss = 1.36901, acc = 0.53903 -> best
Best model found at epoch 6, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 008/200 ] loss = 1.41568, acc = 0.51692


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 008/200 ] loss = 1.34157, acc = 0.55280
[ Valid | 008/200 ] loss = 1.34157, acc = 0.55280 -> best
Best model found at epoch 7, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 009/200 ] loss = 1.34675, acc = 0.53742


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 009/200 ] loss = 1.37713, acc = 0.53485
[ Valid | 009/200 ] loss = 1.37713, acc = 0.53485


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 010/200 ] loss = 1.28525, acc = 0.55762


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 010/200 ] loss = 1.15638, acc = 0.61113
[ Valid | 010/200 ] loss = 1.15638, acc = 0.61113 -> best
Best model found at epoch 9, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 011/200 ] loss = 1.23809, acc = 0.57385


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 011/200 ] loss = 1.15578, acc = 0.60914
[ Valid | 011/200 ] loss = 1.15578, acc = 0.60914


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 012/200 ] loss = 1.19618, acc = 0.59086


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 012/200 ] loss = 1.24168, acc = 0.57346
[ Valid | 012/200 ] loss = 1.24168, acc = 0.57346


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 013/200 ] loss = 1.15515, acc = 0.60470


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 013/200 ] loss = 1.21167, acc = 0.58972
[ Valid | 013/200 ] loss = 1.21167, acc = 0.58972


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 014/200 ] loss = 1.12320, acc = 0.61555


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 014/200 ] loss = 1.32030, acc = 0.58376
[ Valid | 014/200 ] loss = 1.32030, acc = 0.58376


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 015/200 ] loss = 1.09427, acc = 0.62371


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 015/200 ] loss = 1.14107, acc = 0.62318
[ Valid | 015/200 ] loss = 1.14107, acc = 0.62318 -> best
Best model found at epoch 14, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 016/200 ] loss = 1.05376, acc = 0.64072


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 016/200 ] loss = 1.01766, acc = 0.65060
[ Valid | 016/200 ] loss = 1.01766, acc = 0.65060 -> best
Best model found at epoch 15, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 017/200 ] loss = 1.01095, acc = 0.65754


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 017/200 ] loss = 1.10746, acc = 0.63121
[ Valid | 017/200 ] loss = 1.10746, acc = 0.63121


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 018/200 ] loss = 0.98639, acc = 0.66222


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 018/200 ] loss = 1.18751, acc = 0.59952
[ Valid | 018/200 ] loss = 1.18751, acc = 0.59952


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 019/200 ] loss = 0.96283, acc = 0.67337


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 019/200 ] loss = 1.13865, acc = 0.62447
[ Valid | 019/200 ] loss = 1.13865, acc = 0.62447


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 020/200 ] loss = 0.93577, acc = 0.67854


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 020/200 ] loss = 1.07146, acc = 0.66134
[ Valid | 020/200 ] loss = 1.07146, acc = 0.66134 -> best
Best model found at epoch 19, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 021/200 ] loss = 0.89478, acc = 0.69437


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 021/200 ] loss = 0.95593, acc = 0.68576
[ Valid | 021/200 ] loss = 0.95593, acc = 0.68576 -> best
Best model found at epoch 20, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 022/200 ] loss = 0.88212, acc = 0.69885


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 022/200 ] loss = 1.30901, acc = 0.59751
[ Valid | 022/200 ] loss = 1.30901, acc = 0.59751


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 023/200 ] loss = 0.85615, acc = 0.70442


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 023/200 ] loss = 1.17199, acc = 0.63555
[ Valid | 023/200 ] loss = 1.17199, acc = 0.63555


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 024/200 ] loss = 0.83124, acc = 0.71835


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 024/200 ] loss = 1.08281, acc = 0.65352
[ Valid | 024/200 ] loss = 1.08281, acc = 0.65352


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 025/200 ] loss = 0.79141, acc = 0.73477


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 025/200 ] loss = 0.98876, acc = 0.67063
[ Valid | 025/200 ] loss = 0.98876, acc = 0.67063


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 026/200 ] loss = 0.75817, acc = 0.74094


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 026/200 ] loss = 1.17700, acc = 0.64078
[ Valid | 026/200 ] loss = 1.17700, acc = 0.64078


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 027/200 ] loss = 0.76286, acc = 0.73975


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 027/200 ] loss = 1.08412, acc = 0.66508
[ Valid | 027/200 ] loss = 1.08412, acc = 0.66508


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 028/200 ] loss = 0.72364, acc = 0.74910


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 028/200 ] loss = 0.98238, acc = 0.69106
[ Valid | 028/200 ] loss = 0.98238, acc = 0.69106 -> best
Best model found at epoch 27, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 029/200 ] loss = 0.71166, acc = 0.75975


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 029/200 ] loss = 0.96535, acc = 0.69366
[ Valid | 029/200 ] loss = 0.96535, acc = 0.69366 -> best
Best model found at epoch 28, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 030/200 ] loss = 0.69084, acc = 0.76194


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 030/200 ] loss = 0.98511, acc = 0.69722
[ Valid | 030/200 ] loss = 0.98511, acc = 0.69722 -> best
Best model found at epoch 29, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 031/200 ] loss = 0.64457, acc = 0.77856


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 031/200 ] loss = 0.95354, acc = 0.70714
[ Valid | 031/200 ] loss = 0.95354, acc = 0.70714 -> best
Best model found at epoch 30, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 032/200 ] loss = 0.65041, acc = 0.77607


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 032/200 ] loss = 1.14946, acc = 0.67744
[ Valid | 032/200 ] loss = 1.14946, acc = 0.67744


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 033/200 ] loss = 0.62382, acc = 0.78443


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 033/200 ] loss = 1.12073, acc = 0.67804
[ Valid | 033/200 ] loss = 1.12073, acc = 0.67804


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 034/200 ] loss = 0.59010, acc = 0.79986


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 034/200 ] loss = 0.96612, acc = 0.70627
[ Valid | 034/200 ] loss = 0.96612, acc = 0.70627


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 035/200 ] loss = 0.58466, acc = 0.80295


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 035/200 ] loss = 1.02522, acc = 0.69985
[ Valid | 035/200 ] loss = 1.02522, acc = 0.69985


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 036/200 ] loss = 0.57242, acc = 0.80474


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 036/200 ] loss = 1.15401, acc = 0.66840
[ Valid | 036/200 ] loss = 1.15401, acc = 0.66840


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 037/200 ] loss = 0.53898, acc = 0.81429


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 037/200 ] loss = 1.08483, acc = 0.70079
[ Valid | 037/200 ] loss = 1.08483, acc = 0.70079


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 038/200 ] loss = 0.54662, acc = 0.81718


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 038/200 ] loss = 0.95154, acc = 0.70844
[ Valid | 038/200 ] loss = 0.95154, acc = 0.70844 -> best
Best model found at epoch 37, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 039/200 ] loss = 0.53162, acc = 0.82166


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 039/200 ] loss = 1.01123, acc = 0.70995
[ Valid | 039/200 ] loss = 1.01123, acc = 0.70995 -> best
Best model found at epoch 38, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 040/200 ] loss = 0.51381, acc = 0.82763


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 040/200 ] loss = 1.00404, acc = 0.70712
[ Valid | 040/200 ] loss = 1.00404, acc = 0.70712


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 041/200 ] loss = 0.50010, acc = 0.82673


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 041/200 ] loss = 1.02706, acc = 0.71532
[ Valid | 041/200 ] loss = 1.02706, acc = 0.71532 -> best
Best model found at epoch 40, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 042/200 ] loss = 0.47752, acc = 0.84057


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 042/200 ] loss = 1.06905, acc = 0.70282
[ Valid | 042/200 ] loss = 1.06905, acc = 0.70282


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 043/200 ] loss = 0.47659, acc = 0.84066


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 043/200 ] loss = 1.01225, acc = 0.71088
[ Valid | 043/200 ] loss = 1.01225, acc = 0.71088


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 044/200 ] loss = 0.45125, acc = 0.84624


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 044/200 ] loss = 1.11860, acc = 0.70266
[ Valid | 044/200 ] loss = 1.11860, acc = 0.70266


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 045/200 ] loss = 0.45679, acc = 0.84146


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 045/200 ] loss = 1.08890, acc = 0.70691
[ Valid | 045/200 ] loss = 1.08890, acc = 0.70691


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 046/200 ] loss = 0.42066, acc = 0.85808


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 046/200 ] loss = 1.16002, acc = 0.70579
[ Valid | 046/200 ] loss = 1.16002, acc = 0.70579


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 047/200 ] loss = 0.41614, acc = 0.86176


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 047/200 ] loss = 1.20596, acc = 0.67790
[ Valid | 047/200 ] loss = 1.20596, acc = 0.67790


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 048/200 ] loss = 0.41283, acc = 0.85918


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 048/200 ] loss = 1.26609, acc = 0.69725
[ Valid | 048/200 ] loss = 1.26609, acc = 0.69725


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 049/200 ] loss = 0.41942, acc = 0.85838


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 049/200 ] loss = 0.98786, acc = 0.73540
[ Valid | 049/200 ] loss = 0.98786, acc = 0.73540 -> best
Best model found at epoch 48, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 050/200 ] loss = 0.38886, acc = 0.86584


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 050/200 ] loss = 1.38970, acc = 0.65798
[ Valid | 050/200 ] loss = 1.38970, acc = 0.65798


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 051/200 ] loss = 0.39138, acc = 0.86823


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 051/200 ] loss = 1.09710, acc = 0.72584
[ Valid | 051/200 ] loss = 1.09710, acc = 0.72584


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 052/200 ] loss = 0.37111, acc = 0.87341


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 052/200 ] loss = 1.13491, acc = 0.71571
[ Valid | 052/200 ] loss = 1.13491, acc = 0.71571


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 053/200 ] loss = 0.37228, acc = 0.87410


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 053/200 ] loss = 1.19250, acc = 0.71315
[ Valid | 053/200 ] loss = 1.19250, acc = 0.71315


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 054/200 ] loss = 0.36378, acc = 0.87908


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 054/200 ] loss = 1.14881, acc = 0.70899
[ Valid | 054/200 ] loss = 1.14881, acc = 0.70899


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 055/200 ] loss = 0.33770, acc = 0.88744


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 055/200 ] loss = 1.19414, acc = 0.70609
[ Valid | 055/200 ] loss = 1.19414, acc = 0.70609


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 056/200 ] loss = 0.34922, acc = 0.88057


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 056/200 ] loss = 1.07061, acc = 0.72110
[ Valid | 056/200 ] loss = 1.07061, acc = 0.72110


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 057/200 ] loss = 0.33263, acc = 0.88774


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 057/200 ] loss = 1.19544, acc = 0.70143
[ Valid | 057/200 ] loss = 1.19544, acc = 0.70143


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 058/200 ] loss = 0.34496, acc = 0.88535


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 058/200 ] loss = 1.18588, acc = 0.70625
[ Valid | 058/200 ] loss = 1.18588, acc = 0.70625


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 059/200 ] loss = 0.32407, acc = 0.89391


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 059/200 ] loss = 1.18292, acc = 0.71015
[ Valid | 059/200 ] loss = 1.18292, acc = 0.71015


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 060/200 ] loss = 0.32930, acc = 0.88883


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 060/200 ] loss = 1.17578, acc = 0.72169
[ Valid | 060/200 ] loss = 1.17578, acc = 0.72169


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 061/200 ] loss = 0.29932, acc = 0.90058


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 061/200 ] loss = 1.22421, acc = 0.71390
[ Valid | 061/200 ] loss = 1.22421, acc = 0.71390


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 062/200 ] loss = 0.29979, acc = 0.89889


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 062/200 ] loss = 1.46059, acc = 0.67806
[ Valid | 062/200 ] loss = 1.46059, acc = 0.67806


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 063/200 ] loss = 0.31431, acc = 0.89331


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 063/200 ] loss = 1.28171, acc = 0.70191
[ Valid | 063/200 ] loss = 1.28171, acc = 0.70191


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 064/200 ] loss = 0.30197, acc = 0.89769


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 064/200 ] loss = 1.23523, acc = 0.70951
[ Valid | 064/200 ] loss = 1.23523, acc = 0.70951


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 065/200 ] loss = 0.30037, acc = 0.89839


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 065/200 ] loss = 1.20685, acc = 0.72187
[ Valid | 065/200 ] loss = 1.20685, acc = 0.72187


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 066/200 ] loss = 0.26570, acc = 0.91093


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 066/200 ] loss = 1.25255, acc = 0.70864
[ Valid | 066/200 ] loss = 1.25255, acc = 0.70864


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 067/200 ] loss = 0.28751, acc = 0.90366


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 067/200 ] loss = 1.32564, acc = 0.70778
[ Valid | 067/200 ] loss = 1.32564, acc = 0.70778


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 068/200 ] loss = 0.27448, acc = 0.90894


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 068/200 ] loss = 1.23601, acc = 0.71404
[ Valid | 068/200 ] loss = 1.23601, acc = 0.71404


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 069/200 ] loss = 0.27273, acc = 0.90864


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 069/200 ] loss = 1.28576, acc = 0.71959
[ Valid | 069/200 ] loss = 1.28576, acc = 0.71959


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 070/200 ] loss = 0.26457, acc = 0.91043


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 070/200 ] loss = 1.29347, acc = 0.71297
[ Valid | 070/200 ] loss = 1.29347, acc = 0.71297


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 071/200 ] loss = 0.26633, acc = 0.90953


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 071/200 ] loss = 1.24647, acc = 0.72137
[ Valid | 071/200 ] loss = 1.24647, acc = 0.72137


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 072/200 ] loss = 0.26706, acc = 0.90963


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 072/200 ] loss = 1.31338, acc = 0.71710
[ Valid | 072/200 ] loss = 1.31338, acc = 0.71710


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 073/200 ] loss = 0.25728, acc = 0.91381


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 073/200 ] loss = 1.26350, acc = 0.71349
[ Valid | 073/200 ] loss = 1.26350, acc = 0.71349


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 074/200 ] loss = 0.26068, acc = 0.91292


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 074/200 ] loss = 1.31673, acc = 0.71269
[ Valid | 074/200 ] loss = 1.31673, acc = 0.71269


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 075/200 ] loss = 0.25264, acc = 0.91531


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 075/200 ] loss = 1.25022, acc = 0.72381
[ Valid | 075/200 ] loss = 1.25022, acc = 0.72381


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 076/200 ] loss = 0.25509, acc = 0.91232


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 076/200 ] loss = 1.22739, acc = 0.72605
[ Valid | 076/200 ] loss = 1.22739, acc = 0.72605


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 077/200 ] loss = 0.23959, acc = 0.91929


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 077/200 ] loss = 1.20577, acc = 0.74213
[ Valid | 077/200 ] loss = 1.20577, acc = 0.74213 -> best
Best model found at epoch 76, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 078/200 ] loss = 0.26028, acc = 0.91332


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 078/200 ] loss = 1.15246, acc = 0.72783
[ Valid | 078/200 ] loss = 1.15246, acc = 0.72783


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 079/200 ] loss = 0.22872, acc = 0.92148


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 079/200 ] loss = 1.34469, acc = 0.71153
[ Valid | 079/200 ] loss = 1.34469, acc = 0.71153


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 080/200 ] loss = 0.22375, acc = 0.92347


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 080/200 ] loss = 1.33157, acc = 0.71481
[ Valid | 080/200 ] loss = 1.33157, acc = 0.71481


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 081/200 ] loss = 0.22063, acc = 0.92446


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 081/200 ] loss = 1.26788, acc = 0.72372
[ Valid | 081/200 ] loss = 1.26788, acc = 0.72372


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 082/200 ] loss = 0.22289, acc = 0.92267


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 082/200 ] loss = 1.34220, acc = 0.70817
[ Valid | 082/200 ] loss = 1.34220, acc = 0.70817


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 083/200 ] loss = 0.21842, acc = 0.92486


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 083/200 ] loss = 1.46102, acc = 0.70111
[ Valid | 083/200 ] loss = 1.46102, acc = 0.70111


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 084/200 ] loss = 0.22192, acc = 0.92605


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 084/200 ] loss = 1.23644, acc = 0.73617
[ Valid | 084/200 ] loss = 1.23644, acc = 0.73617


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 085/200 ] loss = 0.21338, acc = 0.92735


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 085/200 ] loss = 1.31720, acc = 0.71390
[ Valid | 085/200 ] loss = 1.31720, acc = 0.71390


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 086/200 ] loss = 0.21605, acc = 0.92824


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 086/200 ] loss = 1.29673, acc = 0.72098
[ Valid | 086/200 ] loss = 1.29673, acc = 0.72098


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 087/200 ] loss = 0.20707, acc = 0.93133


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 087/200 ] loss = 1.39557, acc = 0.71098
[ Valid | 087/200 ] loss = 1.39557, acc = 0.71098


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 088/200 ] loss = 0.20134, acc = 0.93232


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 088/200 ] loss = 1.27661, acc = 0.72251
[ Valid | 088/200 ] loss = 1.27661, acc = 0.72251


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 089/200 ] loss = 0.21111, acc = 0.92745


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 089/200 ] loss = 1.38640, acc = 0.72160
[ Valid | 089/200 ] loss = 1.38640, acc = 0.72160


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 090/200 ] loss = 0.21011, acc = 0.92814


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 090/200 ] loss = 1.35606, acc = 0.71968
[ Valid | 090/200 ] loss = 1.35606, acc = 0.71968


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 091/200 ] loss = 0.19935, acc = 0.93471


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 091/200 ] loss = 1.39565, acc = 0.71344
[ Valid | 091/200 ] loss = 1.39565, acc = 0.71344


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 092/200 ] loss = 0.19848, acc = 0.93232


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 092/200 ] loss = 1.27724, acc = 0.71591
[ Valid | 092/200 ] loss = 1.27724, acc = 0.71591


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 093/200 ] loss = 0.19925, acc = 0.93402


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 093/200 ] loss = 1.38273, acc = 0.71945
[ Valid | 093/200 ] loss = 1.38273, acc = 0.71945


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 094/200 ] loss = 0.19770, acc = 0.93193


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 094/200 ] loss = 1.33000, acc = 0.71564
[ Valid | 094/200 ] loss = 1.33000, acc = 0.71564


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 095/200 ] loss = 0.18509, acc = 0.93611


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 095/200 ] loss = 1.35606, acc = 0.71872
[ Valid | 095/200 ] loss = 1.35606, acc = 0.71872


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 096/200 ] loss = 0.18673, acc = 0.93919


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 096/200 ] loss = 1.38822, acc = 0.72139
[ Valid | 096/200 ] loss = 1.38822, acc = 0.72139


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 097/200 ] loss = 0.17756, acc = 0.94148


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 097/200 ] loss = 1.34666, acc = 0.72311
[ Valid | 097/200 ] loss = 1.34666, acc = 0.72311


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 098/200 ] loss = 0.17850, acc = 0.94059


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 098/200 ] loss = 1.31174, acc = 0.73332
[ Valid | 098/200 ] loss = 1.31174, acc = 0.73332


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 099/200 ] loss = 0.17340, acc = 0.94248


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 099/200 ] loss = 1.43577, acc = 0.71648
[ Valid | 099/200 ] loss = 1.43577, acc = 0.71648


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 100/200 ] loss = 0.17368, acc = 0.94029


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 100/200 ] loss = 1.41505, acc = 0.71452
[ Valid | 100/200 ] loss = 1.41505, acc = 0.71452


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 101/200 ] loss = 0.18467, acc = 0.94009


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 101/200 ] loss = 1.64232, acc = 0.70479
[ Valid | 101/200 ] loss = 1.64232, acc = 0.70479


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 102/200 ] loss = 0.18290, acc = 0.93790


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 102/200 ] loss = 1.35579, acc = 0.73032
[ Valid | 102/200 ] loss = 1.35579, acc = 0.73032


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 103/200 ] loss = 0.16757, acc = 0.94576


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 103/200 ] loss = 1.37435, acc = 0.72486
[ Valid | 103/200 ] loss = 1.37435, acc = 0.72486


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 104/200 ] loss = 0.16066, acc = 0.94725


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 104/200 ] loss = 1.31791, acc = 0.73311
[ Valid | 104/200 ] loss = 1.31791, acc = 0.73311


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 105/200 ] loss = 0.16815, acc = 0.94437


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 105/200 ] loss = 1.40106, acc = 0.72334
[ Valid | 105/200 ] loss = 1.40106, acc = 0.72334


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 106/200 ] loss = 0.16542, acc = 0.94506


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 106/200 ] loss = 1.51990, acc = 0.71872
[ Valid | 106/200 ] loss = 1.51990, acc = 0.71872


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 107/200 ] loss = 0.17012, acc = 0.94078


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 107/200 ] loss = 1.59898, acc = 0.71091
[ Valid | 107/200 ] loss = 1.59898, acc = 0.71091


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 108/200 ] loss = 0.17294, acc = 0.94347


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 108/200 ] loss = 1.44628, acc = 0.73037
[ Valid | 108/200 ] loss = 1.44628, acc = 0.73037


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 109/200 ] loss = 0.15814, acc = 0.94676


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 109/200 ] loss = 1.45242, acc = 0.72185
[ Valid | 109/200 ] loss = 1.45242, acc = 0.72185


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 110/200 ] loss = 0.15428, acc = 0.94865


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 110/200 ] loss = 1.45243, acc = 0.73263
[ Valid | 110/200 ] loss = 1.45243, acc = 0.73263


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 111/200 ] loss = 0.15690, acc = 0.94755


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 111/200 ] loss = 1.46177, acc = 0.71646
[ Valid | 111/200 ] loss = 1.46177, acc = 0.71646


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 112/200 ] loss = 0.15848, acc = 0.94735


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 112/200 ] loss = 1.47682, acc = 0.71374
[ Valid | 112/200 ] loss = 1.47682, acc = 0.71374


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 113/200 ] loss = 0.15472, acc = 0.94875


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 113/200 ] loss = 1.42407, acc = 0.72749
[ Valid | 113/200 ] loss = 1.42407, acc = 0.72749


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 114/200 ] loss = 0.16175, acc = 0.94526


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 114/200 ] loss = 1.36197, acc = 0.72580
[ Valid | 114/200 ] loss = 1.36197, acc = 0.72580


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 115/200 ] loss = 0.16115, acc = 0.94566


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 115/200 ] loss = 1.40860, acc = 0.72402
[ Valid | 115/200 ] loss = 1.40860, acc = 0.72402


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 116/200 ] loss = 0.17163, acc = 0.94268


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 116/200 ] loss = 1.50128, acc = 0.70622
[ Valid | 116/200 ] loss = 1.50128, acc = 0.70622


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 117/200 ] loss = 0.14621, acc = 0.95113


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 117/200 ] loss = 1.39348, acc = 0.73679
[ Valid | 117/200 ] loss = 1.39348, acc = 0.73679


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 118/200 ] loss = 0.15602, acc = 0.94725


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 118/200 ] loss = 1.41397, acc = 0.73869
[ Valid | 118/200 ] loss = 1.41397, acc = 0.73869


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 119/200 ] loss = 0.14931, acc = 0.95064


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 119/200 ] loss = 1.33666, acc = 0.73585
[ Valid | 119/200 ] loss = 1.33666, acc = 0.73585


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 120/200 ] loss = 0.14588, acc = 0.95213


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 120/200 ] loss = 1.68641, acc = 0.70643
[ Valid | 120/200 ] loss = 1.68641, acc = 0.70643


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 121/200 ] loss = 0.15837, acc = 0.94586


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 121/200 ] loss = 1.47958, acc = 0.72448
[ Valid | 121/200 ] loss = 1.47958, acc = 0.72448


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 122/200 ] loss = 0.15017, acc = 0.94795


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 122/200 ] loss = 1.53834, acc = 0.71593
[ Valid | 122/200 ] loss = 1.53834, acc = 0.71593


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 123/200 ] loss = 0.13408, acc = 0.95551


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 123/200 ] loss = 1.51312, acc = 0.73179
[ Valid | 123/200 ] loss = 1.51312, acc = 0.73179


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 124/200 ] loss = 0.13849, acc = 0.95243


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 124/200 ] loss = 1.38460, acc = 0.72121
[ Valid | 124/200 ] loss = 1.38460, acc = 0.72121


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 125/200 ] loss = 0.13618, acc = 0.95551


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 125/200 ] loss = 1.32167, acc = 0.73053
[ Valid | 125/200 ] loss = 1.32167, acc = 0.73053


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 126/200 ] loss = 0.14232, acc = 0.95332


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 126/200 ] loss = 1.48582, acc = 0.73144
[ Valid | 126/200 ] loss = 1.48582, acc = 0.73144


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 127/200 ] loss = 0.13761, acc = 0.95283


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 127/200 ] loss = 1.42113, acc = 0.73049
[ Valid | 127/200 ] loss = 1.42113, acc = 0.73049


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 128/200 ] loss = 0.12640, acc = 0.95651


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 128/200 ] loss = 1.61771, acc = 0.72583
[ Valid | 128/200 ] loss = 1.61771, acc = 0.72583


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 129/200 ] loss = 0.13892, acc = 0.95422


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 129/200 ] loss = 1.44326, acc = 0.71806
[ Valid | 129/200 ] loss = 1.44326, acc = 0.71806


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 130/200 ] loss = 0.14606, acc = 0.95312


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 130/200 ] loss = 1.39227, acc = 0.73517
[ Valid | 130/200 ] loss = 1.39227, acc = 0.73517


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 131/200 ] loss = 0.12585, acc = 0.95750


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 131/200 ] loss = 1.56627, acc = 0.71557
[ Valid | 131/200 ] loss = 1.56627, acc = 0.71557


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 132/200 ] loss = 0.12317, acc = 0.95681


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 132/200 ] loss = 1.77961, acc = 0.70259
[ Valid | 132/200 ] loss = 1.77961, acc = 0.70259


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 133/200 ] loss = 0.12633, acc = 0.95780


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 133/200 ] loss = 1.46348, acc = 0.72962
[ Valid | 133/200 ] loss = 1.46348, acc = 0.72962


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 134/200 ] loss = 0.12530, acc = 0.95999


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 134/200 ] loss = 1.50428, acc = 0.73007
[ Valid | 134/200 ] loss = 1.50428, acc = 0.73007


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 135/200 ] loss = 0.12053, acc = 0.95920


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 135/200 ] loss = 1.34544, acc = 0.74031
[ Valid | 135/200 ] loss = 1.34544, acc = 0.74031


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 136/200 ] loss = 0.12824, acc = 0.95760


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 136/200 ] loss = 1.47224, acc = 0.71995
[ Valid | 136/200 ] loss = 1.47224, acc = 0.71995


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 137/200 ] loss = 0.12311, acc = 0.95939


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 137/200 ] loss = 1.46990, acc = 0.73750
[ Valid | 137/200 ] loss = 1.46990, acc = 0.73750


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 138/200 ] loss = 0.12368, acc = 0.95850


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 138/200 ] loss = 1.47253, acc = 0.73515
[ Valid | 138/200 ] loss = 1.47253, acc = 0.73515


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 139/200 ] loss = 0.11844, acc = 0.95999


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 139/200 ] loss = 1.48708, acc = 0.73193
[ Valid | 139/200 ] loss = 1.48708, acc = 0.73193


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 140/200 ] loss = 0.12134, acc = 0.95820


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 140/200 ] loss = 1.52995, acc = 0.71582
[ Valid | 140/200 ] loss = 1.52995, acc = 0.71582


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 141/200 ] loss = 0.12540, acc = 0.95780


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 141/200 ] loss = 1.60403, acc = 0.72139
[ Valid | 141/200 ] loss = 1.60403, acc = 0.72139


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 142/200 ] loss = 0.12669, acc = 0.95631


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 142/200 ] loss = 1.74466, acc = 0.71731
[ Valid | 142/200 ] loss = 1.74466, acc = 0.71731


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 143/200 ] loss = 0.10775, acc = 0.96427


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 143/200 ] loss = 1.53268, acc = 0.73841
[ Valid | 143/200 ] loss = 1.53268, acc = 0.73841


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 144/200 ] loss = 0.11970, acc = 0.95830


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 144/200 ] loss = 1.48692, acc = 0.73681
[ Valid | 144/200 ] loss = 1.48692, acc = 0.73681


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 145/200 ] loss = 0.11345, acc = 0.96258


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 145/200 ] loss = 1.44355, acc = 0.72980
[ Valid | 145/200 ] loss = 1.44355, acc = 0.72980


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 146/200 ] loss = 0.11092, acc = 0.96228


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 146/200 ] loss = 1.54644, acc = 0.72576
[ Valid | 146/200 ] loss = 1.54644, acc = 0.72576


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 147/200 ] loss = 0.12300, acc = 0.95800


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 147/200 ] loss = 1.72146, acc = 0.71696
[ Valid | 147/200 ] loss = 1.72146, acc = 0.71696


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 148/200 ] loss = 0.11298, acc = 0.96248


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 148/200 ] loss = 1.42463, acc = 0.73423
[ Valid | 148/200 ] loss = 1.42463, acc = 0.73423


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 149/200 ] loss = 0.10343, acc = 0.96586


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 149/200 ] loss = 1.56822, acc = 0.72706
[ Valid | 149/200 ] loss = 1.56822, acc = 0.72706


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 150/200 ] loss = 0.12343, acc = 0.95939


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 150/200 ] loss = 1.55709, acc = 0.72222
[ Valid | 150/200 ] loss = 1.55709, acc = 0.72222


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 151/200 ] loss = 0.11675, acc = 0.96099


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 151/200 ] loss = 1.55636, acc = 0.73416
[ Valid | 151/200 ] loss = 1.55636, acc = 0.73416


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 152/200 ] loss = 0.11727, acc = 0.96049


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 152/200 ] loss = 1.42939, acc = 0.72553
[ Valid | 152/200 ] loss = 1.42939, acc = 0.72553


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 153/200 ] loss = 0.10230, acc = 0.96696


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 153/200 ] loss = 1.51625, acc = 0.72836
[ Valid | 153/200 ] loss = 1.51625, acc = 0.72836


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 154/200 ] loss = 0.10556, acc = 0.96566


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 154/200 ] loss = 1.69721, acc = 0.71223
[ Valid | 154/200 ] loss = 1.69721, acc = 0.71223


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 155/200 ] loss = 0.11406, acc = 0.96268


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 155/200 ] loss = 1.61268, acc = 0.72297
[ Valid | 155/200 ] loss = 1.61268, acc = 0.72297


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 156/200 ] loss = 0.10466, acc = 0.96547


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 156/200 ] loss = 1.61747, acc = 0.71947
[ Valid | 156/200 ] loss = 1.61747, acc = 0.71947


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 157/200 ] loss = 0.11389, acc = 0.96158


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 157/200 ] loss = 1.53801, acc = 0.72991
[ Valid | 157/200 ] loss = 1.53801, acc = 0.72991


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 158/200 ] loss = 0.11627, acc = 0.95740


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 158/200 ] loss = 1.51138, acc = 0.73485
[ Valid | 158/200 ] loss = 1.51138, acc = 0.73485


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 159/200 ] loss = 0.09793, acc = 0.96905


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 159/200 ] loss = 1.59116, acc = 0.72802
[ Valid | 159/200 ] loss = 1.59116, acc = 0.72802


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 160/200 ] loss = 0.10522, acc = 0.96318


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 160/200 ] loss = 1.58097, acc = 0.73432
[ Valid | 160/200 ] loss = 1.58097, acc = 0.73432


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 161/200 ] loss = 0.10252, acc = 0.96626


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 161/200 ] loss = 1.57259, acc = 0.72850
[ Valid | 161/200 ] loss = 1.57259, acc = 0.72850


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 162/200 ] loss = 0.11069, acc = 0.96218


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 162/200 ] loss = 1.49208, acc = 0.73768
[ Valid | 162/200 ] loss = 1.49208, acc = 0.73768


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 163/200 ] loss = 0.11613, acc = 0.96049


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 163/200 ] loss = 1.57755, acc = 0.72738
[ Valid | 163/200 ] loss = 1.57755, acc = 0.72738


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 164/200 ] loss = 0.09510, acc = 0.96775


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 164/200 ] loss = 1.83333, acc = 0.71239
[ Valid | 164/200 ] loss = 1.83333, acc = 0.71239


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 165/200 ] loss = 0.10125, acc = 0.96576


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 165/200 ] loss = 1.68507, acc = 0.72637
[ Valid | 165/200 ] loss = 1.68507, acc = 0.72637


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 166/200 ] loss = 0.08840, acc = 0.96975


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 166/200 ] loss = 1.63678, acc = 0.71641
[ Valid | 166/200 ] loss = 1.63678, acc = 0.71641


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 167/200 ] loss = 0.09305, acc = 0.96795


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 167/200 ] loss = 1.60722, acc = 0.72039
[ Valid | 167/200 ] loss = 1.60722, acc = 0.72039


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 168/200 ] loss = 0.09683, acc = 0.96586


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 168/200 ] loss = 1.92079, acc = 0.71351
[ Valid | 168/200 ] loss = 1.92079, acc = 0.71351


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 169/200 ] loss = 0.10161, acc = 0.96557


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 169/200 ] loss = 1.71086, acc = 0.71497
[ Valid | 169/200 ] loss = 1.71086, acc = 0.71497


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 170/200 ] loss = 0.09898, acc = 0.96636


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 170/200 ] loss = 1.50950, acc = 0.74732
[ Valid | 170/200 ] loss = 1.50950, acc = 0.74732 -> best
Best model found at epoch 169, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 171/200 ] loss = 0.08706, acc = 0.97054


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 171/200 ] loss = 1.63547, acc = 0.71518
[ Valid | 171/200 ] loss = 1.63547, acc = 0.71518


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 172/200 ] loss = 0.09096, acc = 0.96965


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 172/200 ] loss = 1.66790, acc = 0.72082
[ Valid | 172/200 ] loss = 1.66790, acc = 0.72082


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 173/200 ] loss = 0.09907, acc = 0.96785


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 173/200 ] loss = 1.69065, acc = 0.72420
[ Valid | 173/200 ] loss = 1.69065, acc = 0.72420


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 174/200 ] loss = 0.08153, acc = 0.97472


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 174/200 ] loss = 1.77108, acc = 0.70824
[ Valid | 174/200 ] loss = 1.77108, acc = 0.70824


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 175/200 ] loss = 0.09769, acc = 0.96606


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 175/200 ] loss = 1.71234, acc = 0.72480
[ Valid | 175/200 ] loss = 1.71234, acc = 0.72480


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 176/200 ] loss = 0.09178, acc = 0.96965


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 176/200 ] loss = 1.62096, acc = 0.72361
[ Valid | 176/200 ] loss = 1.62096, acc = 0.72361


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 177/200 ] loss = 0.09191, acc = 0.96945


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 177/200 ] loss = 1.60280, acc = 0.73976
[ Valid | 177/200 ] loss = 1.60280, acc = 0.73976


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 178/200 ] loss = 0.09824, acc = 0.96706


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 178/200 ] loss = 1.61462, acc = 0.71063
[ Valid | 178/200 ] loss = 1.61462, acc = 0.71063


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 179/200 ] loss = 0.08201, acc = 0.97213


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 179/200 ] loss = 1.73087, acc = 0.72391
[ Valid | 179/200 ] loss = 1.73087, acc = 0.72391


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 180/200 ] loss = 0.09464, acc = 0.96984


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 180/200 ] loss = 1.77461, acc = 0.70988
[ Valid | 180/200 ] loss = 1.77461, acc = 0.70988


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 181/200 ] loss = 0.09554, acc = 0.96646


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 181/200 ] loss = 1.63785, acc = 0.72772
[ Valid | 181/200 ] loss = 1.63785, acc = 0.72772


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 182/200 ] loss = 0.08210, acc = 0.97353


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 182/200 ] loss = 1.88980, acc = 0.71041
[ Valid | 182/200 ] loss = 1.88980, acc = 0.71041


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 183/200 ] loss = 0.09186, acc = 0.96875


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 183/200 ] loss = 1.56296, acc = 0.72541
[ Valid | 183/200 ] loss = 1.56296, acc = 0.72541


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 184/200 ] loss = 0.08775, acc = 0.96994


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 184/200 ] loss = 1.61051, acc = 0.73286
[ Valid | 184/200 ] loss = 1.61051, acc = 0.73286


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 185/200 ] loss = 0.08600, acc = 0.97094


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 185/200 ] loss = 1.50206, acc = 0.73969
[ Valid | 185/200 ] loss = 1.50206, acc = 0.73969


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 186/200 ] loss = 0.09667, acc = 0.96775


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 186/200 ] loss = 1.65152, acc = 0.72966
[ Valid | 186/200 ] loss = 1.65152, acc = 0.72966


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 187/200 ] loss = 0.08705, acc = 0.97144


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 187/200 ] loss = 1.65098, acc = 0.71929
[ Valid | 187/200 ] loss = 1.65098, acc = 0.71929


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 188/200 ] loss = 0.08688, acc = 0.97074


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 188/200 ] loss = 1.69453, acc = 0.73523
[ Valid | 188/200 ] loss = 1.69453, acc = 0.73523


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 189/200 ] loss = 0.08855, acc = 0.97074


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 189/200 ] loss = 1.66296, acc = 0.72793
[ Valid | 189/200 ] loss = 1.66296, acc = 0.72793


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 190/200 ] loss = 0.07961, acc = 0.97293


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 190/200 ] loss = 1.64981, acc = 0.73729
[ Valid | 190/200 ] loss = 1.64981, acc = 0.73729


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 191/200 ] loss = 0.08746, acc = 0.97124


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 191/200 ] loss = 1.45773, acc = 0.74389
[ Valid | 191/200 ] loss = 1.45773, acc = 0.74389


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 192/200 ] loss = 0.08103, acc = 0.97293


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 192/200 ] loss = 1.51204, acc = 0.74232
[ Valid | 192/200 ] loss = 1.51204, acc = 0.74232


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 193/200 ] loss = 0.08398, acc = 0.97263


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 193/200 ] loss = 1.63372, acc = 0.73295
[ Valid | 193/200 ] loss = 1.63372, acc = 0.73295


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 194/200 ] loss = 0.08164, acc = 0.97124


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 194/200 ] loss = 1.56967, acc = 0.74559
[ Valid | 194/200 ] loss = 1.56967, acc = 0.74559


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 195/200 ] loss = 0.07789, acc = 0.97452


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 195/200 ] loss = 1.65572, acc = 0.73996
[ Valid | 195/200 ] loss = 1.65572, acc = 0.73996


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 196/200 ] loss = 0.07795, acc = 0.97273


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 196/200 ] loss = 1.65512, acc = 0.73707
[ Valid | 196/200 ] loss = 1.65512, acc = 0.73707


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 197/200 ] loss = 0.08032, acc = 0.97333


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 197/200 ] loss = 1.79579, acc = 0.73236
[ Valid | 197/200 ] loss = 1.79579, acc = 0.73236


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 198/200 ] loss = 0.08438, acc = 0.97203


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 198/200 ] loss = 1.75002, acc = 0.72295
[ Valid | 198/200 ] loss = 1.75002, acc = 0.72295


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 199/200 ] loss = 0.07158, acc = 0.97592


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 199/200 ] loss = 1.63667, acc = 0.73471
[ Valid | 199/200 ] loss = 1.63667, acc = 0.73471


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 200/200 ] loss = 0.09780, acc = 0.96726


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 200/200 ] loss = 1.70268, acc = 0.72692
[ Valid | 200/200 ] loss = 1.70268, acc = 0.72692


### Dataloader for test

In [17]:
# Construct test datasets.
# The argument "loader" tells how torchvision reads the data.
test_set = FoodDataset("/kaggle/input/ml2023spring-hw3/test", tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

### Testing and generate prediction CSV

In [18]:
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

  0%|          | 0/47 [00:00<?, ?it/s]

In [19]:
# create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)

## Q1. Augmentation Implementation
### Implement augmentation by finishing train_tfm in the code with image size of your choice. 
### Directly copy the following block and paste it on GradeScope after you finish the code
#### Your train_tfm must be capable of producing 5+ different results when given an identical image multiple times.
#### Your  train_tfm in the report can be different from train_tfm in your training code.

In [20]:
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You can add some transforms here.
    transforms.ToTensor(),
])

## Q2. Visual Representations Implementation
### Visualize the learned visual representations of the CNN model on the validation set by implementing t-SNE (t-distributed Stochastic Neighbor Embedding) on the output of both top & mid layers (You need to submit 2 images). 
### ChatGPT has generated the following code, which requires a minor modification to produce the expected results.

In [21]:
import torch
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from tqdm import tqdm
import matplotlib.cm as cm
import torch.nn as nn

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load the trained model
model = Classifier().to(device)
state_dict = torch.load(f"{_exp_name}_best.ckpt")
model.load_state_dict(state_dict)
model.eval()

print(model)

Classifier(
  (cnn): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
