# Assignment 3: Image Classification

In [1]:
!nvidia-smi

Mon Aug 21 05:12:52 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 532.03                 Driver Version: 532.03       CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3050 L...  WDDM | 00000000:01:00.0 Off |                  N/A |
| N/A   57C    P0               10W /  N/A|    204MiB /  4096MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

# !unzip "/content/drive/MyDrive/Lab Training ML/Assignment 3/food-11.zip"

### Import Packages

In [3]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm
from torchvision.datasets import DatasetFolder, VisionDataset
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset

In [4]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

### Transforms

In [5]:
test_tfm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_tfm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(90, interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
])


### Datasets

In [6]:
class FoodDataset(Dataset):
    def __init__(self, tfm, path="food-11", isTrain=True):
        super(FoodDataset).__init__()
        if isTrain:
            self.train_path = path + "/train"
            self.valid_path = path + "/valid"
            self.files = [self.train_path + "/" + x for x in os.listdir(self.train_path) if x.endswith(".jpg")]
            self.files += [self.valid_path + "/" + x for x in os.listdir(self.valid_path) if x.endswith(".jpg")]
            np.random.shuffle(self.files)
        else:
            self.path = path + "/test"
            self.files = sorted([self.path + "/" + x for x in os.listdir(self.path) if x.endswith(".jpg")])

        self.transform = tfm

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)

        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1  # test has no label

        return im, label


### Models

In [7]:
from torchvision import models


class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 224, 224]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [64, 112, 112]
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 1, 1),  # [128, 112, 112]
            nn.BatchNorm2d(128),
        )
        self.layer2_relu = nn.Sequential(
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [128, 56, 56]
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, 3, 1, 1),  # [256, 56, 56]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [256, 28, 28]
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(256, 512, 3, 1, 1),  # [512, 28, 28]
            nn.BatchNorm2d(512),
        )
        self.layer4_relu = nn.Sequential(
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [512, 14, 14]
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(512, 512, 3, 1, 1),  # [512, 14, 14]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [512, 7, 7]
        )
        self.fc = nn.Sequential(
            nn.Linear(512 * 7 * 7, 1024), 
            nn.ReLU(), 
            nn.Dropout(0.3), 
            nn.Linear(1024, 512), 
            nn.ReLU(), 
            nn.Linear(512, 11),
        )

    def forward(self, x):
        x1 = self.layer1(x)
        residual = x1

        x2 = self.layer2(x1)
        x2 = x2 + residual
        x2 = self.layer2_relu(x2)

        x3 = self.layer3(x2)
        residual = x3

        x4 = self.layer4(x3)
        x4 = x4 + residual
        x4 = self.layer4_relu(x4)

        out = self.layer5(x4)
        out = out.view(out.size()[0], -1)
        return self.fc(out)


class Resnet(nn.Module):
    def __init__(self):
        super(Resnet, self).__init__()
        self.cnn = models.resnet18(weights=None)
        self.cnn.fc = nn.Sequential(nn.Linear(512, 11))

    def forward(self, x):
        return self.cnn(x)


class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.vgg16 = models.vgg16(weights=None)
        self.fc = nn.Sequential(
            nn.ReLU(),
            nn.Linear(1000, 100), 
            nn.ReLU(), 
            nn.Linear(100, 11),
        )

    def forward(self, x):
        return self.vgg16(x)


### Configurations

In [8]:
device =  "cuda" if torch.cuda.is_available() else "cpu"

# hyperparameters
batch_size = 64
n_epochs = 80

patience = 8  # If no improvement in 'patience' epochs, early stop.


### Construct Dataset

In [9]:
dataset = FoodDataset(train_tfm)

### Start Training

In [10]:
from sklearn.model_selection import KFold
from torch.utils.data import SubsetRandomSampler

In [11]:
_exp_name = ["ensemble/sample1", "ensemble/sample2", "ensemble/sample3"]

In [12]:
def adjust_learning_rate(optimizer):
    isPrint = False
    for param_group in optimizer.param_groups:
        if isPrint == False:
            lr = param_group["lr"]
            print(f"--- Learning rate decreases from {lr:.6f} to {lr * 0.8:.6f}. ---")
            isPrint = True
        param_group["lr"] = param_group["lr"] * 0.8


In [13]:
kf = KFold(n_splits=3)

for fold, (train_idx, valid_idx) in enumerate(kf.split(dataset)):
    print(f"Fold {fold+1}/3")
    
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, num_workers=0, pin_memory=True )
    valid_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=0, pin_memory=True )

    model = Resnet().to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    
    # Initialize trackers, these are not parameters and should not be changed
    stale = 0
    best_acc = 0

    for epoch in range(n_epochs):
        if stale > 5:
            adjust_learning_rate(optimizer)

        # ---------- Training ----------
        model.train()
        train_loss = []
        train_accs = []

        with tqdm(total=len(train_loader), unit="batch") as tqdm_bar:
            tqdm_bar.set_description(f"Epoch {epoch + 1:03d}/{n_epochs:03d}")
            for batch in train_loader:
                imgs, labels = batch

                # Forward the data.
                logits = model(imgs.to(device))

                # Calculate the cross-entropy loss.
                loss = criterion(logits, labels.to(device))

                # Gradients stored in the parameters in the previous step should be cleared out first.
                optimizer.zero_grad()

                # Compute the gradients for parameters.
                loss.backward()

                # Clip the gradient norms for stable training.
                grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

                # Update the parameters with computed gradients.
                optimizer.step()

                # Compute the accuracy for current batch.
                acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

                # Record the loss and accuracy.
                train_loss.append(loss.item())
                train_accs.append(acc)

                tqdm_bar.update(1)
                tqdm_bar.set_postfix(loss=f"{sum(train_loss)/len(train_loss):.5f}", acc=f"{sum(train_accs) / len(train_accs):.5f}", val_loss=f"{0:.5f}", val_acc=f"{0:.5f}")

            train_loss = sum(train_loss) / len(train_loss)
            train_acc = sum(train_accs) / len(train_accs)
            tqdm_bar.set_postfix(loss=f"{train_loss:.5f}", acc=f"{train_acc:.5f}", val_loss=f"{0:.5f}", val_acc=f"{0:.5f}")

            # ---------- Validation ----------
            model.eval()
            valid_loss = []
            valid_accs = []

            for batch in valid_loader:
                imgs, labels = batch

                # Using torch.no_grad() accelerates the forward process.
                with torch.no_grad():
                    logits = model(imgs.to(device))

                # We can still compute the loss (but not the gradient).
                loss = criterion(logits, labels.to(device))

                # Compute the accuracy for current batch.
                acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

                # Record the loss and accuracy.
                valid_loss.append(loss.item())
                valid_accs.append(acc)

                tqdm_bar.set_postfix(
                    loss=f"{train_loss:.5f}", acc=f"{train_acc:.5f}", val_loss=f"{sum(valid_loss) / len(valid_loss):.5f}", val_acc=f"{sum(valid_accs) / len(valid_accs):.5f}"
                )

            # The average loss and accuracy for entire validation set is the average of the recorded values.
            valid_loss = sum(valid_loss) / len(valid_loss)
            valid_acc = sum(valid_accs) / len(valid_accs)

            tqdm_bar.set_postfix(loss=f"{train_loss:.5f}", acc=f"{train_acc:.5f}", val_loss=f"{valid_loss:.5f}", val_acc=f"{valid_acc:.5f}")
            tqdm_bar.close()

        # update logs
        if valid_acc > best_acc:
            with open(f"./{_exp_name[fold]}_log.txt", "a") as f:
                f.write(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
        else:
            with open(f"./{_exp_name[fold]}_log.txt", "a") as f:
                f.write(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

        # save models
        if valid_acc > best_acc:
            print(f"Best model found at epoch {epoch+1}, saving model")
            torch.save(model.state_dict(), f"{_exp_name[fold]}_best.ckpt")  # only save best to prevent output memory exceed error
            best_acc = valid_acc
            stale = 0
        else:
            stale += 1
            if stale > patience:
                print(f"No improvment {patience} consecutive epochs, early stopping")
                break


Fold 1/3


Epoch 001/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.26578, loss=2.08748, val_acc=0.24067, val_loss=2.15899]


Best model found at epoch 1, saving model


Epoch 002/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.32168, loss=1.91965, val_acc=0.12782, val_loss=3.24823]
Epoch 003/080: 100%|██████████| 143/143 [02:27<00:00,  1.03s/batch, acc=0.36200, loss=1.81207, val_acc=0.30035, val_loss=2.19358]


Best model found at epoch 3, saving model


Epoch 004/080: 100%|██████████| 143/143 [02:27<00:00,  1.03s/batch, acc=0.38639, loss=1.75726, val_acc=0.33008, val_loss=1.90822]


Best model found at epoch 4, saving model


Epoch 005/080: 100%|██████████| 143/143 [02:26<00:00,  1.03s/batch, acc=0.40922, loss=1.69913, val_acc=0.32031, val_loss=2.28238]
Epoch 006/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.43908, loss=1.61663, val_acc=0.37977, val_loss=1.77866]


Best model found at epoch 6, saving model


Epoch 007/080: 100%|██████████| 143/143 [02:34<00:00,  1.08s/batch, acc=0.46539, loss=1.55746, val_acc=0.39714, val_loss=1.70090]


Best model found at epoch 7, saving model


Epoch 008/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.46878, loss=1.53716, val_acc=0.36545, val_loss=2.11119]
Epoch 009/080: 100%|██████████| 143/143 [02:34<00:00,  1.08s/batch, acc=0.48925, loss=1.48237, val_acc=0.42969, val_loss=1.63496]


Best model found at epoch 9, saving model


Epoch 010/080: 100%|██████████| 143/143 [02:33<00:00,  1.08s/batch, acc=0.49274, loss=1.45024, val_acc=0.33702, val_loss=2.38760]
Epoch 011/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.51687, loss=1.40011, val_acc=0.40495, val_loss=1.87117]
Epoch 012/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.52574, loss=1.37023, val_acc=0.47548, val_loss=1.47950]


Best model found at epoch 12, saving model


Epoch 013/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.53456, loss=1.34480, val_acc=0.45486, val_loss=1.58831]
Epoch 014/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.54332, loss=1.30835, val_acc=0.47591, val_loss=1.58590]


Best model found at epoch 14, saving model


Epoch 015/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.54221, loss=1.30788, val_acc=0.45095, val_loss=1.64174]
Epoch 016/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.57087, loss=1.24181, val_acc=0.48351, val_loss=1.50918]


Best model found at epoch 16, saving model


Epoch 017/080: 100%|██████████| 143/143 [02:33<00:00,  1.08s/batch, acc=0.57652, loss=1.21464, val_acc=0.39410, val_loss=1.96213]
Epoch 018/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.57291, loss=1.22432, val_acc=0.53385, val_loss=1.37772]


Best model found at epoch 18, saving model


Epoch 019/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.59041, loss=1.18372, val_acc=0.48611, val_loss=1.53292]
Epoch 020/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.60190, loss=1.16400, val_acc=0.40169, val_loss=2.24299]
Epoch 021/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.61140, loss=1.12984, val_acc=0.55599, val_loss=1.31886]


Best model found at epoch 21, saving model


Epoch 022/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.61726, loss=1.11144, val_acc=0.57726, val_loss=1.26021]


Best model found at epoch 22, saving model


Epoch 023/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.63073, loss=1.06258, val_acc=0.55903, val_loss=1.32900]
Epoch 024/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.63327, loss=1.05862, val_acc=0.55751, val_loss=1.35569]
Epoch 025/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.64114, loss=1.03120, val_acc=0.57053, val_loss=1.27900]
Epoch 026/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.65509, loss=1.00735, val_acc=0.48655, val_loss=1.61181]
Epoch 027/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.65701, loss=0.98806, val_acc=0.58464, val_loss=1.22621]


Best model found at epoch 27, saving model


Epoch 028/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.66999, loss=0.95638, val_acc=0.55404, val_loss=1.34900]
Epoch 029/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.67262, loss=0.94281, val_acc=0.55295, val_loss=1.41817]
Epoch 030/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.67158, loss=0.94876, val_acc=0.56120, val_loss=1.33546]
Epoch 031/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.67682, loss=0.92702, val_acc=0.61241, val_loss=1.16828]


Best model found at epoch 31, saving model


Epoch 032/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.69056, loss=0.88801, val_acc=0.64410, val_loss=1.08545]


Best model found at epoch 32, saving model


Epoch 033/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.69847, loss=0.87584, val_acc=0.55642, val_loss=1.44231]
Epoch 034/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.68987, loss=0.88352, val_acc=0.61567, val_loss=1.20809]
Epoch 035/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.70734, loss=0.84254, val_acc=0.49870, val_loss=1.62244]
Epoch 036/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.71980, loss=0.82165, val_acc=0.59136, val_loss=1.27516]
Epoch 037/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.72122, loss=0.81121, val_acc=0.55512, val_loss=1.47619]
Epoch 038/080: 100%|██████████| 143/143 [02:33<00:00,  1.08s/batch, acc=0.71914, loss=0.81002, val_acc=0.67687, val_loss=1.00784]


Best model found at epoch 38, saving model


Epoch 039/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.72134, loss=0.79845, val_acc=0.55773, val_loss=1.42745]
Epoch 040/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.73377, loss=0.77241, val_acc=0.62609, val_loss=1.17651]
Epoch 041/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.74602, loss=0.75300, val_acc=0.65603, val_loss=1.04980]
Epoch 042/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.74802, loss=0.73149, val_acc=0.70638, val_loss=0.90350]


Best model found at epoch 42, saving model


Epoch 043/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.73956, loss=0.73486, val_acc=0.60221, val_loss=1.21333]
Epoch 044/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.75180, loss=0.72511, val_acc=0.66602, val_loss=1.00099]
Epoch 045/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.74813, loss=0.72471, val_acc=0.68273, val_loss=0.94900]
Epoch 046/080: 100%|██████████| 143/143 [02:33<00:00,  1.08s/batch, acc=0.75642, loss=0.69559, val_acc=0.71246, val_loss=0.89850]


Best model found at epoch 46, saving model


Epoch 047/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.76342, loss=0.68090, val_acc=0.68316, val_loss=0.96877]
Epoch 048/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.76623, loss=0.67957, val_acc=0.51693, val_loss=1.56019]
Epoch 049/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.77368, loss=0.65169, val_acc=0.71007, val_loss=0.89282]
Epoch 050/080: 100%|██████████| 143/143 [02:33<00:00,  1.08s/batch, acc=0.77279, loss=0.66529, val_acc=0.64301, val_loss=1.07508]
Epoch 051/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.77738, loss=0.63284, val_acc=0.64258, val_loss=1.17071]
Epoch 052/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.78298, loss=0.62846, val_acc=0.71463, val_loss=0.88702]


Best model found at epoch 52, saving model


Epoch 053/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.78426, loss=0.62322, val_acc=0.69466, val_loss=0.94036]
Epoch 054/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.78688, loss=0.61105, val_acc=0.65820, val_loss=1.15936]
Epoch 055/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.78962, loss=0.60228, val_acc=0.67773, val_loss=0.99863]
Epoch 056/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.79828, loss=0.58801, val_acc=0.72374, val_loss=0.84672]


Best model found at epoch 56, saving model


Epoch 057/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.80566, loss=0.57551, val_acc=0.71311, val_loss=0.87849]
Epoch 058/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.79511, loss=0.59389, val_acc=0.71050, val_loss=0.92435]
Epoch 059/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.80757, loss=0.54771, val_acc=0.71224, val_loss=0.92815]
Epoch 060/080: 100%|██████████| 143/143 [02:33<00:00,  1.08s/batch, acc=0.80899, loss=0.55011, val_acc=0.70595, val_loss=0.92016]
Epoch 061/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.81323, loss=0.54608, val_acc=0.69032, val_loss=0.94480]
Epoch 062/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.81367, loss=0.53603, val_acc=0.67383, val_loss=1.07401]


--- Learning rate decreases from 0.001000 to 0.000800. ---


Epoch 063/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.83827, loss=0.47886, val_acc=0.72418, val_loss=0.88955]


Best model found at epoch 63, saving model


Epoch 064/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.84175, loss=0.45909, val_acc=0.71029, val_loss=0.99510]
Epoch 065/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.83250, loss=0.47880, val_acc=0.76042, val_loss=0.76800]


Best model found at epoch 65, saving model


Epoch 066/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.84121, loss=0.45531, val_acc=0.75065, val_loss=0.80646]
Epoch 067/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.84314, loss=0.44820, val_acc=0.74349, val_loss=0.82026]
Epoch 068/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.84622, loss=0.44331, val_acc=0.73741, val_loss=0.86779]
Epoch 069/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.84462, loss=0.44259, val_acc=0.76128, val_loss=0.76771]


Best model found at epoch 69, saving model


Epoch 070/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.85781, loss=0.41789, val_acc=0.70768, val_loss=1.02985]
Epoch 071/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.84829, loss=0.42930, val_acc=0.73633, val_loss=0.95969]
Epoch 072/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.85243, loss=0.42094, val_acc=0.73112, val_loss=0.92955]
Epoch 073/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.85266, loss=0.42335, val_acc=0.73459, val_loss=0.87513]
Epoch 074/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.86284, loss=0.39703, val_acc=0.74674, val_loss=0.90104]
Epoch 075/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.86228, loss=0.40153, val_acc=0.75456, val_loss=0.86368]


--- Learning rate decreases from 0.000800 to 0.000640. ---


Epoch 076/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.88252, loss=0.34800, val_acc=0.75673, val_loss=0.83748]


--- Learning rate decreases from 0.000640 to 0.000512. ---


Epoch 077/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.89050, loss=0.31640, val_acc=0.72656, val_loss=0.92473]


--- Learning rate decreases from 0.000512 to 0.000410. ---


Epoch 078/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.90405, loss=0.27107, val_acc=0.77821, val_loss=0.76549]


Best model found at epoch 78, saving model


Epoch 079/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.91390, loss=0.25872, val_acc=0.78082, val_loss=0.79706]


Best model found at epoch 79, saving model


Epoch 080/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.91298, loss=0.25258, val_acc=0.77582, val_loss=0.80617]


Fold 2/3


Epoch 001/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.29900, loss=2.03903, val_acc=0.32227, val_loss=1.98848]


Best model found at epoch 1, saving model


Epoch 002/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.36086, loss=1.83798, val_acc=0.28885, val_loss=2.04295]
Epoch 003/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.39737, loss=1.72716, val_acc=0.37934, val_loss=1.83447]


Best model found at epoch 3, saving model


Epoch 004/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.41841, loss=1.66752, val_acc=0.32161, val_loss=2.04226]
Epoch 005/080: 100%|██████████| 143/143 [02:34<00:00,  1.08s/batch, acc=0.44474, loss=1.60939, val_acc=0.25391, val_loss=2.58636]
Epoch 006/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.46104, loss=1.55276, val_acc=0.41298, val_loss=1.71118]


Best model found at epoch 6, saving model


Epoch 007/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.47748, loss=1.51081, val_acc=0.35916, val_loss=2.00679]
Epoch 008/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.49572, loss=1.46094, val_acc=0.39475, val_loss=1.78819]
Epoch 009/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.50618, loss=1.42163, val_acc=0.44944, val_loss=1.60398]


Best model found at epoch 9, saving model


Epoch 010/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.50604, loss=1.40337, val_acc=0.42426, val_loss=1.71487]
Epoch 011/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.53086, loss=1.35842, val_acc=0.49718, val_loss=1.45819]


Best model found at epoch 11, saving model


Epoch 012/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.54289, loss=1.32042, val_acc=0.52821, val_loss=1.36078]


Best model found at epoch 12, saving model


Epoch 013/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.54901, loss=1.29878, val_acc=0.47157, val_loss=1.55201]
Epoch 014/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.56757, loss=1.25518, val_acc=0.47786, val_loss=1.50654]
Epoch 015/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.57711, loss=1.22498, val_acc=0.43359, val_loss=1.81192]
Epoch 016/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.57503, loss=1.21666, val_acc=0.47352, val_loss=1.66822]
Epoch 017/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.58680, loss=1.19543, val_acc=0.42188, val_loss=1.88651]
Epoch 018/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.59831, loss=1.15451, val_acc=0.48806, val_loss=1.59914]


--- Learning rate decreases from 0.001000 to 0.000800. ---


Epoch 019/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.62516, loss=1.07852, val_acc=0.53754, val_loss=1.36407]


Best model found at epoch 19, saving model


Epoch 020/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.64550, loss=1.04313, val_acc=0.55794, val_loss=1.31244]


Best model found at epoch 20, saving model


Epoch 021/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.64649, loss=1.03091, val_acc=0.59201, val_loss=1.20071]


Best model found at epoch 21, saving model


Epoch 022/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.65817, loss=0.98743, val_acc=0.58008, val_loss=1.24211]
Epoch 023/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.66068, loss=0.98215, val_acc=0.51454, val_loss=1.60390]
Epoch 024/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.66994, loss=0.96673, val_acc=0.54536, val_loss=1.45639]
Epoch 025/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.67523, loss=0.93267, val_acc=0.60612, val_loss=1.17035]


Best model found at epoch 25, saving model


Epoch 026/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.68567, loss=0.92077, val_acc=0.59440, val_loss=1.20073]
Epoch 027/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.69590, loss=0.88769, val_acc=0.56141, val_loss=1.34377]
Epoch 028/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.69248, loss=0.87629, val_acc=0.51931, val_loss=1.48588]
Epoch 029/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.70320, loss=0.85304, val_acc=0.62956, val_loss=1.14446]


Best model found at epoch 29, saving model


Epoch 030/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.71633, loss=0.82822, val_acc=0.66797, val_loss=1.01320]


Best model found at epoch 30, saving model


Epoch 031/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.72540, loss=0.79589, val_acc=0.62630, val_loss=1.17392]
Epoch 032/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.72863, loss=0.79026, val_acc=0.58550, val_loss=1.25042]
Epoch 033/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.73378, loss=0.77904, val_acc=0.64952, val_loss=1.05990]
Epoch 034/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.73598, loss=0.77247, val_acc=0.65647, val_loss=1.02805]
Epoch 035/080: 100%|██████████| 143/143 [02:34<00:00,  1.08s/batch, acc=0.73881, loss=0.76221, val_acc=0.61849, val_loss=1.16841]
Epoch 036/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.74393, loss=0.73892, val_acc=0.63477, val_loss=1.20096]


--- Learning rate decreases from 0.000800 to 0.000640. ---


Epoch 037/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.75894, loss=0.69655, val_acc=0.68663, val_loss=0.94576]


Best model found at epoch 37, saving model


Epoch 038/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.76491, loss=0.67045, val_acc=0.65386, val_loss=1.10587]
Epoch 039/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.76918, loss=0.66065, val_acc=0.71224, val_loss=0.89550]


Best model found at epoch 39, saving model


Epoch 040/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.77896, loss=0.64235, val_acc=0.67079, val_loss=1.02730]
Epoch 041/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.78253, loss=0.63328, val_acc=0.70985, val_loss=0.89815]
Epoch 042/080: 100%|██████████| 143/143 [02:31<00:00,  1.06s/batch, acc=0.78237, loss=0.63325, val_acc=0.68359, val_loss=0.99048]
Epoch 043/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.78834, loss=0.61310, val_acc=0.63368, val_loss=1.25660]
Epoch 044/080: 100%|██████████| 143/143 [02:33<00:00,  1.08s/batch, acc=0.78217, loss=0.61794, val_acc=0.68924, val_loss=0.97247]
Epoch 045/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.79815, loss=0.59557, val_acc=0.69379, val_loss=1.01091]


--- Learning rate decreases from 0.000640 to 0.000512. ---


Epoch 046/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.80491, loss=0.55765, val_acc=0.72352, val_loss=0.89439]


Best model found at epoch 46, saving model


Epoch 047/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.81564, loss=0.53299, val_acc=0.72244, val_loss=0.90122]
Epoch 048/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.81915, loss=0.52057, val_acc=0.73915, val_loss=0.84748]


Best model found at epoch 48, saving model


Epoch 049/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.82439, loss=0.50987, val_acc=0.71745, val_loss=0.92721]
Epoch 050/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.82538, loss=0.50000, val_acc=0.71311, val_loss=0.88051]
Epoch 051/080: 100%|██████████| 143/143 [02:32<00:00,  1.06s/batch, acc=0.82884, loss=0.48710, val_acc=0.74067, val_loss=0.84190]


Best model found at epoch 51, saving model


Epoch 052/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.82677, loss=0.50752, val_acc=0.69705, val_loss=0.94002]
Epoch 053/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.82525, loss=0.49769, val_acc=0.68620, val_loss=1.04606]
Epoch 054/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.82866, loss=0.48832, val_acc=0.72830, val_loss=0.89942]
Epoch 055/080: 100%|██████████| 143/143 [02:25<00:00,  1.02s/batch, acc=0.84242, loss=0.44986, val_acc=0.75065, val_loss=0.82323]


Best model found at epoch 55, saving model


Epoch 056/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.84339, loss=0.45234, val_acc=0.71962, val_loss=0.93419]
Epoch 057/080: 100%|██████████| 143/143 [02:30<00:00,  1.05s/batch, acc=0.83769, loss=0.46135, val_acc=0.66450, val_loss=1.19878]
Epoch 058/080: 100%|██████████| 143/143 [02:32<00:00,  1.07s/batch, acc=0.84971, loss=0.44965, val_acc=0.73416, val_loss=0.86163]
Epoch 059/080: 100%|██████████| 143/143 [02:30<00:00,  1.05s/batch, acc=0.85138, loss=0.42273, val_acc=0.72222, val_loss=0.92369]
Epoch 060/080: 100%|██████████| 143/143 [02:27<00:00,  1.03s/batch, acc=0.85017, loss=0.41906, val_acc=0.73872, val_loss=0.86737]
Epoch 061/080: 100%|██████████| 143/143 [02:27<00:00,  1.03s/batch, acc=0.85116, loss=0.41967, val_acc=0.72439, val_loss=0.94527]


--- Learning rate decreases from 0.000512 to 0.000410. ---


Epoch 062/080: 100%|██████████| 143/143 [02:27<00:00,  1.03s/batch, acc=0.87116, loss=0.37204, val_acc=0.71615, val_loss=1.00277]


--- Learning rate decreases from 0.000410 to 0.000328. ---


Epoch 063/080: 100%|██████████| 143/143 [02:33<00:00,  1.07s/batch, acc=0.87848, loss=0.34654, val_acc=0.75521, val_loss=0.81944]


Best model found at epoch 63, saving model


Epoch 064/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.88546, loss=0.32985, val_acc=0.73806, val_loss=0.91866]
Epoch 065/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.88938, loss=0.31671, val_acc=0.76237, val_loss=0.79082]


Best model found at epoch 65, saving model


Epoch 066/080: 100%|██████████| 143/143 [02:52<00:00,  1.21s/batch, acc=0.88819, loss=0.31621, val_acc=0.75434, val_loss=0.89134]
Epoch 067/080: 100%|██████████| 143/143 [02:26<00:00,  1.03s/batch, acc=0.89220, loss=0.30822, val_acc=0.72656, val_loss=0.99227]
Epoch 068/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.89114, loss=0.31499, val_acc=0.75174, val_loss=0.88580]
Epoch 069/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.89362, loss=0.30752, val_acc=0.77018, val_loss=0.85946]


Best model found at epoch 69, saving model


Epoch 070/080: 100%|██████████| 143/143 [02:27<00:00,  1.03s/batch, acc=0.90352, loss=0.28120, val_acc=0.75651, val_loss=0.90391]
Epoch 071/080: 100%|██████████| 143/143 [02:27<00:00,  1.03s/batch, acc=0.90141, loss=0.27739, val_acc=0.75760, val_loss=0.85895]
Epoch 072/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.89737, loss=0.29265, val_acc=0.75130, val_loss=0.89552]
Epoch 073/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.90119, loss=0.28439, val_acc=0.75239, val_loss=0.90237]
Epoch 074/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.90385, loss=0.27606, val_acc=0.75998, val_loss=0.89091]
Epoch 075/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.91640, loss=0.24014, val_acc=0.75434, val_loss=0.91842]


--- Learning rate decreases from 0.000328 to 0.000262. ---


Epoch 076/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.92273, loss=0.22598, val_acc=0.76671, val_loss=0.89641]


--- Learning rate decreases from 0.000262 to 0.000210. ---


Epoch 077/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.92941, loss=0.20504, val_acc=0.76931, val_loss=0.87908]


--- Learning rate decreases from 0.000210 to 0.000168. ---


Epoch 078/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.93464, loss=0.18877, val_acc=0.78082, val_loss=0.84447]


Best model found at epoch 78, saving model


Epoch 079/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.94064, loss=0.17713, val_acc=0.77778, val_loss=0.83433]
Epoch 080/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.93672, loss=0.17926, val_acc=0.78733, val_loss=0.83952]


Best model found at epoch 80, saving model
Fold 3/3


Epoch 001/080: 100%|██████████| 143/143 [02:27<00:00,  1.03s/batch, acc=0.26377, loss=2.08340, val_acc=0.22287, val_loss=2.48666]


Best model found at epoch 1, saving model


Epoch 002/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.33151, loss=1.89922, val_acc=0.25029, val_loss=2.40408]


Best model found at epoch 2, saving model


Epoch 003/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.37609, loss=1.80440, val_acc=0.24696, val_loss=2.23485]
Epoch 004/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.39674, loss=1.74190, val_acc=0.37977, val_loss=1.84303]


Best model found at epoch 4, saving model


Epoch 005/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.41969, loss=1.66669, val_acc=0.32292, val_loss=1.92023]
Epoch 006/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.43422, loss=1.62379, val_acc=0.43685, val_loss=1.64670]


Best model found at epoch 6, saving model


Epoch 007/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.46176, loss=1.55656, val_acc=0.39316, val_loss=1.81955]
Epoch 008/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.46831, loss=1.52922, val_acc=0.39222, val_loss=1.83662]
Epoch 009/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.48656, loss=1.49682, val_acc=0.45305, val_loss=1.58558]


Best model found at epoch 9, saving model


Epoch 010/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.49355, loss=1.43688, val_acc=0.14497, val_loss=4.11906]
Epoch 011/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.50699, loss=1.41890, val_acc=0.48705, val_loss=1.50202]


Best model found at epoch 11, saving model


Epoch 012/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.52174, loss=1.37294, val_acc=0.49660, val_loss=1.49322]


Best model found at epoch 12, saving model


Epoch 013/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.52393, loss=1.35187, val_acc=0.53118, val_loss=1.39235]


Best model found at epoch 13, saving model


Epoch 014/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.55114, loss=1.30041, val_acc=0.42694, val_loss=1.74984]
Epoch 015/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.55452, loss=1.28883, val_acc=0.49609, val_loss=1.50624]
Epoch 016/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.57026, loss=1.24886, val_acc=0.44061, val_loss=1.66300]
Epoch 017/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.57616, loss=1.20966, val_acc=0.54919, val_loss=1.31190]


Best model found at epoch 17, saving model


Epoch 018/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.59659, loss=1.17343, val_acc=0.50441, val_loss=1.48939]
Epoch 019/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.60282, loss=1.14668, val_acc=0.57653, val_loss=1.24096]


Best model found at epoch 19, saving model


Epoch 020/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.61943, loss=1.09871, val_acc=0.55317, val_loss=1.30898]
Epoch 021/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.62533, loss=1.08795, val_acc=0.42484, val_loss=1.80776]
Epoch 022/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.62653, loss=1.08189, val_acc=0.60605, val_loss=1.16981]


Best model found at epoch 22, saving model


Epoch 023/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.65046, loss=1.02331, val_acc=0.60294, val_loss=1.16708]
Epoch 024/080: 100%|██████████| 143/143 [02:30<00:00,  1.05s/batch, acc=0.64740, loss=1.01235, val_acc=0.59259, val_loss=1.21224]
Epoch 025/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.65439, loss=0.99322, val_acc=0.59679, val_loss=1.22799]
Epoch 026/080: 100%|██████████| 143/143 [02:30<00:00,  1.05s/batch, acc=0.67045, loss=0.97066, val_acc=0.58659, val_loss=1.22471]
Epoch 027/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.67592, loss=0.94509, val_acc=0.57610, val_loss=1.32036]
Epoch 028/080: 100%|██████████| 143/143 [02:27<00:00,  1.03s/batch, acc=0.68040, loss=0.93035, val_acc=0.64873, val_loss=1.04862]


Best model found at epoch 28, saving model


Epoch 029/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.69362, loss=0.88635, val_acc=0.60062, val_loss=1.20349]
Epoch 030/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.69340, loss=0.88003, val_acc=0.63983, val_loss=1.07953]
Epoch 031/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.70651, loss=0.86093, val_acc=0.66356, val_loss=0.98068]


Best model found at epoch 31, saving model


Epoch 032/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.70957, loss=0.83902, val_acc=0.64598, val_loss=1.03792]
Epoch 033/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.71208, loss=0.83868, val_acc=0.61798, val_loss=1.16979]
Epoch 034/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.71995, loss=0.81504, val_acc=0.61950, val_loss=1.21237]
Epoch 035/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.72072, loss=0.80223, val_acc=0.67419, val_loss=0.97804]


Best model found at epoch 35, saving model


Epoch 036/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.73328, loss=0.78539, val_acc=0.61277, val_loss=1.19118]
Epoch 037/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.73547, loss=0.76281, val_acc=0.68381, val_loss=0.93695]


Best model found at epoch 37, saving model


Epoch 038/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.74858, loss=0.73065, val_acc=0.63563, val_loss=1.15912]
Epoch 039/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.74541, loss=0.72908, val_acc=0.60598, val_loss=1.21554]
Epoch 040/080: 100%|██████████| 143/143 [02:30<00:00,  1.05s/batch, acc=0.75055, loss=0.72288, val_acc=0.59787, val_loss=1.37290]
Epoch 041/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.75885, loss=0.70232, val_acc=0.68504, val_loss=0.94697]


Best model found at epoch 41, saving model


Epoch 042/080: 100%|██████████| 143/143 [02:30<00:00,  1.05s/batch, acc=0.75918, loss=0.69791, val_acc=0.68063, val_loss=0.94171]
Epoch 043/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.76683, loss=0.67754, val_acc=0.68576, val_loss=0.97674]


Best model found at epoch 43, saving model


Epoch 044/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.77000, loss=0.66270, val_acc=0.71716, val_loss=0.86312]


Best model found at epoch 44, saving model


Epoch 045/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.77229, loss=0.65872, val_acc=0.68714, val_loss=0.91466]
Epoch 046/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.77174, loss=0.65090, val_acc=0.68388, val_loss=0.92909]
Epoch 047/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.78387, loss=0.62608, val_acc=0.65437, val_loss=1.10711]
Epoch 048/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.78486, loss=0.62105, val_acc=0.69459, val_loss=1.02417]
Epoch 049/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.78682, loss=0.61888, val_acc=0.66479, val_loss=1.08314]
Epoch 050/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.79021, loss=0.60902, val_acc=0.68446, val_loss=0.94855]


--- Learning rate decreases from 0.001000 to 0.000800. ---


Epoch 051/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.81141, loss=0.55462, val_acc=0.74501, val_loss=0.83275]


Best model found at epoch 51, saving model


Epoch 052/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.81436, loss=0.53704, val_acc=0.68482, val_loss=1.05258]
Epoch 053/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.81589, loss=0.53157, val_acc=0.73025, val_loss=0.88797]
Epoch 054/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.82441, loss=0.51129, val_acc=0.73647, val_loss=0.80158]
Epoch 055/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.82146, loss=0.51852, val_acc=0.72078, val_loss=0.85582]
Epoch 056/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.82244, loss=0.50980, val_acc=0.71542, val_loss=0.94057]
Epoch 057/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.81600, loss=0.52234, val_acc=0.72844, val_loss=0.84034]


--- Learning rate decreases from 0.000800 to 0.000640. ---


Epoch 058/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.85533, loss=0.42190, val_acc=0.75036, val_loss=0.78560]


Best model found at epoch 58, saving model


Epoch 059/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.84889, loss=0.43498, val_acc=0.75477, val_loss=0.83206]


Best model found at epoch 59, saving model


Epoch 060/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.85151, loss=0.42964, val_acc=0.74740, val_loss=0.82085]
Epoch 061/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.84670, loss=0.43561, val_acc=0.75152, val_loss=0.81660]
Epoch 062/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.84867, loss=0.42523, val_acc=0.73322, val_loss=0.88204]
Epoch 063/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.85260, loss=0.42221, val_acc=0.74457, val_loss=0.86369]
Epoch 064/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.86844, loss=0.39230, val_acc=0.76425, val_loss=0.78885]


Best model found at epoch 64, saving model


Epoch 065/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.86746, loss=0.37647, val_acc=0.73387, val_loss=0.84179]
Epoch 066/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.86702, loss=0.36917, val_acc=0.74161, val_loss=0.86337]
Epoch 067/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.86418, loss=0.39172, val_acc=0.77235, val_loss=0.76007]


Best model found at epoch 67, saving model


Epoch 068/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.87216, loss=0.37063, val_acc=0.68056, val_loss=1.14911]
Epoch 069/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.87030, loss=0.37257, val_acc=0.75130, val_loss=0.81028]
Epoch 070/080: 100%|██████████| 143/143 [02:30<00:00,  1.05s/batch, acc=0.87303, loss=0.35593, val_acc=0.74776, val_loss=0.85660]
Epoch 071/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.87107, loss=0.36749, val_acc=0.76577, val_loss=0.79241]
Epoch 072/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.88888, loss=0.31634, val_acc=0.75492, val_loss=0.86624]
Epoch 073/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.88090, loss=0.34379, val_acc=0.76251, val_loss=0.81084]


--- Learning rate decreases from 0.000640 to 0.000512. ---


Epoch 074/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.88571, loss=0.32760, val_acc=0.77148, val_loss=0.79066]


--- Learning rate decreases from 0.000512 to 0.000410. ---


Epoch 075/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.90516, loss=0.26685, val_acc=0.77054, val_loss=0.80260]


--- Learning rate decreases from 0.000410 to 0.000328. ---


Epoch 076/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.91794, loss=0.24420, val_acc=0.78074, val_loss=0.83105]


Best model found at epoch 76, saving model


Epoch 077/080: 100%|██████████| 143/143 [02:30<00:00,  1.05s/batch, acc=0.91936, loss=0.23420, val_acc=0.78168, val_loss=0.79091]


Best model found at epoch 77, saving model


Epoch 078/080: 100%|██████████| 143/143 [02:29<00:00,  1.04s/batch, acc=0.92450, loss=0.22457, val_acc=0.79008, val_loss=0.76941]


Best model found at epoch 78, saving model


Epoch 079/080: 100%|██████████| 143/143 [02:29<00:00,  1.05s/batch, acc=0.92515, loss=0.21947, val_acc=0.77879, val_loss=0.80884]
Epoch 080/080: 100%|██████████| 143/143 [02:28<00:00,  1.04s/batch, acc=0.93116, loss=0.20040, val_acc=0.77619, val_loss=0.82401]


### Dataloader for test

In [25]:
# Construct test datasets.
test_set = FoodDataset(test_tfm, isTrain=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

### Test Time Augmentation

In [26]:
tta_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(90, interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
])

tta_num = 5

### Testing and generate prediction CSV

In [42]:
model_1 = Resnet().to(device)
model_1.load_state_dict(torch.load("ensemble/sample1_best.ckpt"))
model_1.eval()

model_2 = Resnet().to(device)
model_2.load_state_dict(torch.load("ensemble/sample2_best.ckpt"))
model_2.eval()

model_3 = Resnet().to(device)
model_3.load_state_dict(torch.load("ensemble/sample3_best.ckpt"))
model_3.eval()

prediction = []
with torch.no_grad():
    for data, _ in tqdm(test_loader):
        for img in data:
            test_input = img.view(1, 3, 224, 224)
            test_input = test_input.to(device)

            test_pred_1 = model_1(test_input)
            test_pred_2 = model_2(test_input)
            test_pred_3 = model_3(test_input)

            test_pred_1 = test_pred_1.cpu().data.numpy()
            test_pred_2 = test_pred_2.cpu().data.numpy()
            test_pred_3 = test_pred_3.cpu().data.numpy()

            # test time augmentation
            tta_pred_1 = np.zeros((1, 11))
            tta_pred_2 = np.zeros((1, 11))
            tta_pred_3 = np.zeros((1, 11))

            for _ in range(tta_num):
                test_augmented = tta_transform(img)
                test_augmented = test_augmented.view(1, 3, 224, 224)
                test_augmented = test_augmented.to(device)

                pred_1 = model_1(test_augmented)
                pred_2 = model_2(test_augmented)
                pred_3 = model_3(test_augmented)

                tta_pred_1 = tta_pred_1 + pred_1.cpu().data.numpy()
                tta_pred_2 = tta_pred_2 + pred_2.cpu().data.numpy()
                tta_pred_3 = tta_pred_3 + pred_3.cpu().data.numpy()

            tta_pred_1 = tta_pred_1 / tta_num
            tta_pred_2 = tta_pred_2 / tta_num
            tta_pred_3 = tta_pred_3 / tta_num

            # final prediction
            test_label = np.argmax((test_pred_1 * 0.7 + tta_pred_1 * 0.3) + (test_pred_2 * 0.7 + tta_pred_2 * 0.3) + (test_pred_3 * 0.7 + tta_pred_3 * 0.3))

            prediction.append(test_label)


100%|██████████| 47/47 [06:30<00:00,  8.32s/it]


In [43]:
# create test csv
def pad4(i):
    return "0" * (4 - len(str(i))) + str(i)


df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Category"] = prediction
df.to_csv("submission.csv", index=False)


### Visual Representations Implementation

In [29]:
# import torch
# import numpy as np
# from sklearn.manifold import TSNE
# import matplotlib.pyplot as plt
# from tqdm import tqdm
# import matplotlib.cm as cm
# import torch.nn as nn

# device = 'cuda' if torch.cuda.is_available() else 'cpu'

# # Load the trained model
# model = Resnet().to(device)
# state_dict = torch.load(f"sample_best.ckpt")
# model.load_state_dict(state_dict)
# model.eval()

# print(model)

In [30]:
# # Load the vaildation set defined by TA
# valid_set = FoodDataset("food-11/valid", tfm=test_tfm)
# valid_loader = DataLoader(valid_set, batch_size=64, shuffle=False, num_workers=0, pin_memory=True)

# # Extract the representations for the specific layer of model
# index = 1 # You should find out the index of layer which is defined as "top" or 'mid' layer of your model.
# features = []
# labels = []
# for batch in tqdm(valid_loader):
#     imgs, lbls = batch
#     with torch.no_grad():
#         logits = model.cnn[:index](imgs.to(device))
#         logits = logits.view(logits.size()[0], -1)
#     labels.extend(lbls.cpu().numpy())
#     logits = np.squeeze(logits.cpu().numpy())
#     features.extend(logits)

# features = np.array(features)
# colors_per_class = cm.rainbow(np.linspace(0, 1, 11))

# # Apply t-SNE to the features
# features_tsne = TSNE(n_components=2, init='pca', random_state=42).fit_transform(features)

# # Plot the t-SNE visualization
# plt.figure(figsize=(10, 8))
# for label in np.unique(labels):
#     plt.scatter(features_tsne[labels == label, 0], features_tsne[labels == label, 1], label=label, s=5)
# plt.legend()
# plt.show()

# plt.figure(figsize=(10, 8))
# labels = [0]
# for label in np.unique(labels):
#     plt.scatter(features_tsne[labels == label, 0], features_tsne[labels == label, 1], label=label, s=5)
# plt.legend()
# plt.show()