In [1]:
!export WANDB_NOTEBOOK_NAME="CNN1"

In [2]:
! nvidia-smi -L

GPU 0: NVIDIA GeForce RTX 3090 (UUID: GPU-72fdbd81-da45-b750-3719-ae5877e26726)


In [3]:
import multiprocessing as mp
num_cpu = mp.cpu_count()
num_cpu

12

# Training

In [4]:
_exp_name = "resnet18_pretrained"

In [5]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import wandb

In [6]:
myseed = 4012  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**

In [7]:
test_tfm = transforms.Compose([
    # (height = width = 128)
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

train_tfm = transforms.Compose([
    # (height = width = 128)
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [8]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        # print(path)
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label



In [9]:
batch_size = 128
_dataset_dir = "./food-11"

train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_cpu, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=num_cpu, pin_memory=True)

One ./food-11/training sample ./food-11/training/0_0.jpg
One ./food-11/validation sample ./food-11/validation/0_0.jpg


In [10]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
n_epochs = 300
patience = 20 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model = torchvision.models.resnet18(pretrained=True).to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5) 

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

wandb.init(
    project="Food-11",
    config={
        "learning rate": 0.0003,
        "architecture": "CNN",
        "epochs": n_epochs,
        "batch_size": batch_size,
        "image_dim": 128,
    }
)

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        wandb.log({"step_training_loss": loss.item()})
        wandb.log({"step_training_accuracy": acc})
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        wandb.log({"step_validation_loss": loss.item()})
        wandb.log({"step_validation_accuracy": acc})
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

    wandb.log({
        "average_training_loss": train_loss,
        "average_validation_loss": valid_loss,
    })
    
    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break
wandb.finish()

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/user/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnickwkt[0m. Use [1m`wandb login --relogin`[0m to force relogin


100%|███████████████████████████████████████████| 78/78 [00:15<00:00,  4.90it/s]


[ Train | 001/300 ] loss = 1.74572, acc = 0.66593


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.12it/s]


[ Valid | 001/300 ] loss = 0.66608, acc = 0.79438
[ Valid | 001/300 ] loss = 0.66608, acc = 0.79438 -> best
Best model found at epoch 0, saving model


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.54it/s]


[ Train | 002/300 ] loss = 0.25228, acc = 0.91799


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.37it/s]


[ Valid | 002/300 ] loss = 0.74745, acc = 0.79924
[ Valid | 002/300 ] loss = 0.74745, acc = 0.79924 -> best
Best model found at epoch 1, saving model


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.78it/s]


[ Train | 003/300 ] loss = 0.09077, acc = 0.96999


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.54it/s]


[ Valid | 003/300 ] loss = 0.85153, acc = 0.79850
[ Valid | 003/300 ] loss = 0.85153, acc = 0.79850


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.50it/s]


[ Train | 004/300 ] loss = 0.07941, acc = 0.97458


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.64it/s]


[ Valid | 004/300 ] loss = 1.01591, acc = 0.77579
[ Valid | 004/300 ] loss = 1.01591, acc = 0.77579


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.68it/s]


[ Train | 005/300 ] loss = 0.08388, acc = 0.97155


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.39it/s]


[ Valid | 005/300 ] loss = 1.12434, acc = 0.76379
[ Valid | 005/300 ] loss = 1.12434, acc = 0.76379


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.47it/s]


[ Train | 006/300 ] loss = 0.09215, acc = 0.96799


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.43it/s]


[ Valid | 006/300 ] loss = 1.18141, acc = 0.76573
[ Valid | 006/300 ] loss = 1.18141, acc = 0.76573


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.53it/s]


[ Train | 007/300 ] loss = 0.06657, acc = 0.97598


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.45it/s]


[ Valid | 007/300 ] loss = 1.02163, acc = 0.79705
[ Valid | 007/300 ] loss = 1.02163, acc = 0.79705


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.65it/s]


[ Train | 008/300 ] loss = 0.06121, acc = 0.97949


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.52it/s]


[ Valid | 008/300 ] loss = 1.02820, acc = 0.79822
[ Valid | 008/300 ] loss = 1.02820, acc = 0.79822


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.63it/s]


[ Train | 009/300 ] loss = 0.04906, acc = 0.98460


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.59it/s]


[ Valid | 009/300 ] loss = 1.11312, acc = 0.79409
[ Valid | 009/300 ] loss = 1.11312, acc = 0.79409


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.74it/s]


[ Train | 010/300 ] loss = 0.07248, acc = 0.97859


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.07it/s]


[ Valid | 010/300 ] loss = 1.04694, acc = 0.77913
[ Valid | 010/300 ] loss = 1.04694, acc = 0.77913


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.56it/s]


[ Train | 011/300 ] loss = 0.05669, acc = 0.98207


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.44it/s]


[ Valid | 011/300 ] loss = 1.16382, acc = 0.78527
[ Valid | 011/300 ] loss = 1.16382, acc = 0.78527


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.60it/s]


[ Train | 012/300 ] loss = 0.04815, acc = 0.98532


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.74it/s]


[ Valid | 012/300 ] loss = 0.93929, acc = 0.80596
[ Valid | 012/300 ] loss = 0.93929, acc = 0.80596 -> best
Best model found at epoch 11, saving model


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.78it/s]


[ Train | 013/300 ] loss = 0.05808, acc = 0.98241


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.61it/s]


[ Valid | 013/300 ] loss = 0.98061, acc = 0.80183
[ Valid | 013/300 ] loss = 0.98061, acc = 0.80183


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.51it/s]


[ Train | 014/300 ] loss = 0.06319, acc = 0.98231


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.88it/s]


[ Valid | 014/300 ] loss = 1.00908, acc = 0.80937
[ Valid | 014/300 ] loss = 1.00908, acc = 0.80937 -> best
Best model found at epoch 13, saving model


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.45it/s]


[ Train | 015/300 ] loss = 0.04703, acc = 0.98570


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.29it/s]


[ Valid | 015/300 ] loss = 1.54504, acc = 0.75089
[ Valid | 015/300 ] loss = 1.54504, acc = 0.75089


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.41it/s]


[ Train | 016/300 ] loss = 0.05488, acc = 0.98323


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.62it/s]


[ Valid | 016/300 ] loss = 1.02820, acc = 0.79692
[ Valid | 016/300 ] loss = 1.02820, acc = 0.79692


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.47it/s]


[ Train | 017/300 ] loss = 0.03077, acc = 0.98922


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.35it/s]


[ Valid | 017/300 ] loss = 0.97419, acc = 0.81147
[ Valid | 017/300 ] loss = 0.97419, acc = 0.81147 -> best
Best model found at epoch 16, saving model


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.52it/s]


[ Train | 018/300 ] loss = 0.03374, acc = 0.98760


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.56it/s]


[ Valid | 018/300 ] loss = 1.02244, acc = 0.80791
[ Valid | 018/300 ] loss = 1.02244, acc = 0.80791


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.57it/s]


[ Train | 019/300 ] loss = 0.03867, acc = 0.99042


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.33it/s]


[ Valid | 019/300 ] loss = 1.17211, acc = 0.80170
[ Valid | 019/300 ] loss = 1.17211, acc = 0.80170


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.53it/s]


[ Train | 020/300 ] loss = 0.03850, acc = 0.98718


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.61it/s]


[ Valid | 020/300 ] loss = 1.05840, acc = 0.80452
[ Valid | 020/300 ] loss = 1.05840, acc = 0.80452


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.45it/s]


[ Train | 021/300 ] loss = 0.04747, acc = 0.98590


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.50it/s]


[ Valid | 021/300 ] loss = 1.07028, acc = 0.79619
[ Valid | 021/300 ] loss = 1.07028, acc = 0.79619


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.57it/s]


[ Train | 022/300 ] loss = 0.03040, acc = 0.99079


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.49it/s]


[ Valid | 022/300 ] loss = 0.96008, acc = 0.81276
[ Valid | 022/300 ] loss = 0.96008, acc = 0.81276 -> best
Best model found at epoch 21, saving model


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.51it/s]


[ Train | 023/300 ] loss = 0.00489, acc = 0.99910


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.38it/s]


[ Valid | 023/300 ] loss = 0.91455, acc = 0.82730
[ Valid | 023/300 ] loss = 0.91455, acc = 0.82730 -> best
Best model found at epoch 22, saving model


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.61it/s]


[ Train | 024/300 ] loss = 0.02823, acc = 0.99445


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.27it/s]


[ Valid | 024/300 ] loss = 0.93920, acc = 0.82456
[ Valid | 024/300 ] loss = 0.93920, acc = 0.82456


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.55it/s]


[ Train | 025/300 ] loss = 0.03184, acc = 0.99042


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.41it/s]


[ Valid | 025/300 ] loss = 1.01123, acc = 0.81450
[ Valid | 025/300 ] loss = 1.01123, acc = 0.81450


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.37it/s]


[ Train | 026/300 ] loss = 0.03212, acc = 0.98900


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.52it/s]


[ Valid | 026/300 ] loss = 1.07438, acc = 0.80069
[ Valid | 026/300 ] loss = 1.07438, acc = 0.80069


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.62it/s]


[ Train | 027/300 ] loss = 0.06363, acc = 0.97985


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.15it/s]


[ Valid | 027/300 ] loss = 1.17190, acc = 0.78867
[ Valid | 027/300 ] loss = 1.17190, acc = 0.78867


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.64it/s]


[ Train | 028/300 ] loss = 0.05258, acc = 0.98387


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.06it/s]


[ Valid | 028/300 ] loss = 1.09915, acc = 0.78412
[ Valid | 028/300 ] loss = 1.09915, acc = 0.78412


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.40it/s]


[ Train | 029/300 ] loss = 0.01810, acc = 0.99459


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.59it/s]


[ Valid | 029/300 ] loss = 0.91353, acc = 0.81819
[ Valid | 029/300 ] loss = 0.91353, acc = 0.81819


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.38it/s]


[ Train | 030/300 ] loss = 0.00405, acc = 0.99870


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.46it/s]


[ Valid | 030/300 ] loss = 0.87439, acc = 0.82760
[ Valid | 030/300 ] loss = 0.87439, acc = 0.82760 -> best
Best model found at epoch 29, saving model


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.38it/s]


[ Train | 031/300 ] loss = 0.00475, acc = 0.99842


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.63it/s]


[ Valid | 031/300 ] loss = 0.93415, acc = 0.82211
[ Valid | 031/300 ] loss = 0.93415, acc = 0.82211


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.74it/s]


[ Train | 032/300 ] loss = 0.02802, acc = 0.99369


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.82it/s]


[ Valid | 032/300 ] loss = 0.99894, acc = 0.81305
[ Valid | 032/300 ] loss = 0.99894, acc = 0.81305


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.41it/s]


[ Train | 033/300 ] loss = 0.03402, acc = 0.98848


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.91it/s]


[ Valid | 033/300 ] loss = 0.98939, acc = 0.81696
[ Valid | 033/300 ] loss = 0.98939, acc = 0.81696


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.73it/s]


[ Train | 034/300 ] loss = 0.01501, acc = 0.99461


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.39it/s]


[ Valid | 034/300 ] loss = 1.05366, acc = 0.81574
[ Valid | 034/300 ] loss = 1.05366, acc = 0.81574


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.26it/s]


[ Train | 035/300 ] loss = 0.02950, acc = 0.99239


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.00it/s]


[ Valid | 035/300 ] loss = 1.17290, acc = 0.79185
[ Valid | 035/300 ] loss = 1.17290, acc = 0.79185


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.59it/s]


[ Train | 036/300 ] loss = 0.02217, acc = 0.99269


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.38it/s]


[ Valid | 036/300 ] loss = 1.10091, acc = 0.80415
[ Valid | 036/300 ] loss = 1.10091, acc = 0.80415


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.40it/s]


[ Train | 037/300 ] loss = 0.07054, acc = 0.97933


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.74it/s]


[ Valid | 037/300 ] loss = 1.28729, acc = 0.78360
[ Valid | 037/300 ] loss = 1.28729, acc = 0.78360


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.59it/s]


[ Train | 038/300 ] loss = 0.06079, acc = 0.98129


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.70it/s]


[ Valid | 038/300 ] loss = 1.10456, acc = 0.78034
[ Valid | 038/300 ] loss = 1.10456, acc = 0.78034


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.72it/s]


[ Train | 039/300 ] loss = 0.03112, acc = 0.99151


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.77it/s]


[ Valid | 039/300 ] loss = 0.93171, acc = 0.81914
[ Valid | 039/300 ] loss = 0.93171, acc = 0.81914


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.51it/s]


[ Train | 040/300 ] loss = 0.02312, acc = 0.99289


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.39it/s]


[ Valid | 040/300 ] loss = 1.05122, acc = 0.80683
[ Valid | 040/300 ] loss = 1.05122, acc = 0.80683


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.41it/s]


[ Train | 041/300 ] loss = 0.00752, acc = 0.99750


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.31it/s]


[ Valid | 041/300 ] loss = 0.96211, acc = 0.82173
[ Valid | 041/300 ] loss = 0.96211, acc = 0.82173


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.41it/s]


[ Train | 042/300 ] loss = 0.01713, acc = 0.99479


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.63it/s]


[ Valid | 042/300 ] loss = 0.97852, acc = 0.82846
[ Valid | 042/300 ] loss = 0.97852, acc = 0.82846 -> best
Best model found at epoch 41, saving model


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.58it/s]


[ Train | 043/300 ] loss = 0.00949, acc = 0.99671


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.53it/s]


[ Valid | 043/300 ] loss = 1.03309, acc = 0.82441
[ Valid | 043/300 ] loss = 1.03309, acc = 0.82441


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.63it/s]


[ Train | 044/300 ] loss = 0.04436, acc = 0.98820


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.86it/s]


[ Valid | 044/300 ] loss = 1.12176, acc = 0.78969
[ Valid | 044/300 ] loss = 1.12176, acc = 0.78969


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.77it/s]


[ Train | 045/300 ] loss = 0.03371, acc = 0.98968


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.57it/s]


[ Valid | 045/300 ] loss = 1.11946, acc = 0.78802
[ Valid | 045/300 ] loss = 1.11946, acc = 0.78802


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.57it/s]


[ Train | 046/300 ] loss = 0.02702, acc = 0.99411


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.51it/s]


[ Valid | 046/300 ] loss = 1.04191, acc = 0.80971
[ Valid | 046/300 ] loss = 1.04191, acc = 0.80971


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.52it/s]


[ Train | 047/300 ] loss = 0.03022, acc = 0.99349


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.27it/s]


[ Valid | 047/300 ] loss = 1.08050, acc = 0.80806
[ Valid | 047/300 ] loss = 1.08050, acc = 0.80806


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.39it/s]


[ Train | 048/300 ] loss = 0.03301, acc = 0.99139


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.73it/s]


[ Valid | 048/300 ] loss = 0.97518, acc = 0.80819
[ Valid | 048/300 ] loss = 0.97518, acc = 0.80819


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.29it/s]


[ Train | 049/300 ] loss = 0.02279, acc = 0.99293


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.63it/s]


[ Valid | 049/300 ] loss = 1.13808, acc = 0.79663
[ Valid | 049/300 ] loss = 1.13808, acc = 0.79663


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.29it/s]


[ Train | 050/300 ] loss = 0.02649, acc = 0.99239


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.10it/s]


[ Valid | 050/300 ] loss = 1.15304, acc = 0.80791
[ Valid | 050/300 ] loss = 1.15304, acc = 0.80791


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.59it/s]


[ Train | 051/300 ] loss = 0.01119, acc = 0.99641


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.68it/s]


[ Valid | 051/300 ] loss = 0.99790, acc = 0.81798
[ Valid | 051/300 ] loss = 0.99790, acc = 0.81798


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.58it/s]


[ Train | 052/300 ] loss = 0.02407, acc = 0.99249


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.44it/s]


[ Valid | 052/300 ] loss = 1.14266, acc = 0.79677
[ Valid | 052/300 ] loss = 1.14266, acc = 0.79677


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.48it/s]


[ Train | 053/300 ] loss = 0.00913, acc = 0.99730


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.29it/s]


[ Valid | 053/300 ] loss = 1.19534, acc = 0.79581
[ Valid | 053/300 ] loss = 1.19534, acc = 0.79581


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.70it/s]


[ Train | 054/300 ] loss = 0.01642, acc = 0.99379


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.45it/s]


[ Valid | 054/300 ] loss = 1.15601, acc = 0.79243
[ Valid | 054/300 ] loss = 1.15601, acc = 0.79243


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.43it/s]


[ Train | 055/300 ] loss = 0.00978, acc = 0.99639


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.81it/s]


[ Valid | 055/300 ] loss = 1.07147, acc = 0.80972
[ Valid | 055/300 ] loss = 1.07147, acc = 0.80972


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.82it/s]


[ Train | 056/300 ] loss = 0.01113, acc = 0.99679


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.05it/s]


[ Valid | 056/300 ] loss = 1.13015, acc = 0.80676
[ Valid | 056/300 ] loss = 1.13015, acc = 0.80676


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.63it/s]


[ Train | 057/300 ] loss = 0.02902, acc = 0.99273


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.77it/s]


[ Valid | 057/300 ] loss = 1.17875, acc = 0.80705
[ Valid | 057/300 ] loss = 1.17875, acc = 0.80705


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.67it/s]


[ Train | 058/300 ] loss = 0.03566, acc = 0.99028


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.05it/s]


[ Valid | 058/300 ] loss = 1.39464, acc = 0.78122
[ Valid | 058/300 ] loss = 1.39464, acc = 0.78122


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.73it/s]


[ Train | 059/300 ] loss = 0.01457, acc = 0.99531


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.72it/s]


[ Valid | 059/300 ] loss = 1.17050, acc = 0.80901
[ Valid | 059/300 ] loss = 1.17050, acc = 0.80901


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.44it/s]


[ Train | 060/300 ] loss = 0.02734, acc = 0.99189


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.98it/s]


[ Valid | 060/300 ] loss = 1.12138, acc = 0.80046
[ Valid | 060/300 ] loss = 1.12138, acc = 0.80046


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.48it/s]


[ Train | 061/300 ] loss = 0.02013, acc = 0.99391


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.65it/s]


[ Valid | 061/300 ] loss = 1.22646, acc = 0.79331
[ Valid | 061/300 ] loss = 1.22646, acc = 0.79331


100%|███████████████████████████████████████████| 78/78 [00:14<00:00,  5.39it/s]


[ Train | 062/300 ] loss = 0.02751, acc = 0.99179


100%|███████████████████████████████████████████| 27/27 [00:06<00:00,  4.47it/s]


[ Valid | 062/300 ] loss = 1.19269, acc = 0.79180
[ Valid | 062/300 ] loss = 1.19269, acc = 0.79180


100%|███████████████████████████████████████████| 78/78 [00:13<00:00,  5.65it/s]


[ Train | 063/300 ] loss = 0.02340, acc = 0.99429


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.84it/s]

[ Valid | 063/300 ] loss = 1.12520, acc = 0.79482
[ Valid | 063/300 ] loss = 1.12520, acc = 0.79482
No improvment 20 consecutive epochs, early stopping





0,1
average_training_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
average_validation_loss,▁▂▄▅▄▄▄▃▄█▄▄▄▄▃▃▄▄▃▃▄▄▅▆▃▄▃▄▅▄▃▅▄▅▄▅▇▅▅▅
step_training_accuracy,▁▅▇▆▇▇▇▇▇██▇█▇██▇▇█████▇████▇█▇█████▇███
step_training_loss,█▄▂▄▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁
step_validation_accuracy,▅▄▅▁▄▅▅█▅▂▆▆▄▆█▄▆▅▇▇▅▅▆▅▅▅▅▆▄▅▃▅▇▅▅▇▅▅▃▇
step_validation_loss,▂▃▃█▅▆▃▂▄█▃▄▄▂▁▆▃▇▅▁▄▄▃▆▅▄▄▃▆▅▇▄▅▄▅▂▅▆▇▄

0,1
average_training_loss,0.0234
average_validation_loss,1.1252
step_training_accuracy,1.0
step_training_loss,0.09002
step_validation_accuracy,0.76471
step_validation_loss,1.29305


In [11]:
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=num_cpu, pin_memory=True)

One ./food-11/test sample ./food-11/test/0_0.jpg


# Testing and generate prediction CSV

In [14]:
model_best = torchvision.models.resnet18(pretrained=True).to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
test_accs = []
true_labels = []
with torch.no_grad():
    for data, labels in test_loader:
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)

        acc = (test_pred.argmax(dim=-1) == labels.to(device)).float().mean()
        test_accs.append(acc)
        prediction += test_label.squeeze().tolist()
        true_labels.extend(labels.tolist())

test_acc = sum(test_accs) / len(test_accs)

print(f"Test accurary: {test_acc}")

Test accurary: 0.8432474732398987


In [15]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("prediction_resnet18.csv",index = False)

df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = true_labels
df.to_csv("true_labels.csv",index = False)