# STAT4012 Project

In [1]:
! nvidia-smi -L

GPU 0: NVIDIA GeForce RTX 3090 (UUID: GPU-72fdbd81-da45-b750-3719-ae5877e26726)


In [2]:
import multiprocessing as mp
num_cpu = mp.cpu_count()
num_cpu

12

# Training

In [3]:
_exp_name = "alexnet_pretrained"

In [4]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import wandb

In [5]:
myseed = 4012  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**

In [6]:
test_tfm = transforms.Compose([
    # (height = width = 128)
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

train_tfm = transforms.Compose([
    # (height = width = 128)
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [7]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        # print(path)
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label



In [8]:
batch_size = 128
_dataset_dir = "./food-11"

train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_cpu, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=num_cpu, pin_memory=True)

One ./food-11/training sample ./food-11/training/0_0.jpg
One ./food-11/validation sample ./food-11/validation/0_0.jpg


In [9]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
n_epochs = 300
patience = 20 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model = torchvision.models.alexnet(pretrained=True).to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5) 

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

wandb.init(
    project="Food-11",
    config={
        "learning rate": 0.0003,
        "architecture": "CNN",
        "epochs": n_epochs,
        "batch_size": batch_size,
        "image_dim": 128,
    }
)

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        wandb.log({"step_training_loss": loss.item()})
        wandb.log({"step_training_accuracy": acc})
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        wandb.log({"step_validation_loss": loss.item()})
        wandb.log({"step_validation_accuracy": acc})
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

    wandb.log({
        "average_training_loss": train_loss,
        "average_validation_loss": valid_loss,
    })
    
    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break
wandb.finish()

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /home/user/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth


  0%|          | 0.00/233M [00:00<?, ?B/s]

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnickwkt[0m. Use [1m`wandb login --relogin`[0m to force relogin


100%|███████████████████████████████████████████| 78/78 [00:11<00:00,  6.63it/s]


[ Train | 001/300 ] loss = 1.81576, acc = 0.47163


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.99it/s]


[ Valid | 001/300 ] loss = 1.46176, acc = 0.52423
[ Valid | 001/300 ] loss = 1.46176, acc = 0.52423 -> best
Best model found at epoch 0, saving model


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.13it/s]


[ Train | 002/300 ] loss = 0.93148, acc = 0.68538


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.07it/s]


[ Valid | 002/300 ] loss = 0.96195, acc = 0.68210
[ Valid | 002/300 ] loss = 0.96195, acc = 0.68210 -> best
Best model found at epoch 1, saving model


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.74it/s]


[ Train | 003/300 ] loss = 0.66552, acc = 0.77272


100%|███████████████████████████████████████████| 27/27 [00:04<00:00,  5.43it/s]


[ Valid | 003/300 ] loss = 1.07867, acc = 0.66228
[ Valid | 003/300 ] loss = 1.07867, acc = 0.66228


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.70it/s]


[ Train | 004/300 ] loss = 0.48554, acc = 0.84056


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.23it/s]


[ Valid | 004/300 ] loss = 1.04698, acc = 0.68955
[ Valid | 004/300 ] loss = 1.04698, acc = 0.68955 -> best
Best model found at epoch 3, saving model


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.70it/s]


[ Train | 005/300 ] loss = 0.29747, acc = 0.89968


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.20it/s]


[ Valid | 005/300 ] loss = 1.27285, acc = 0.67560
[ Valid | 005/300 ] loss = 1.27285, acc = 0.67560


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.37it/s]


[ Train | 006/300 ] loss = 0.19804, acc = 0.93349


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.01it/s]


[ Valid | 006/300 ] loss = 1.28322, acc = 0.70821
[ Valid | 006/300 ] loss = 1.28322, acc = 0.70821 -> best
Best model found at epoch 5, saving model


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.36it/s]


[ Train | 007/300 ] loss = 0.13716, acc = 0.95395


100%|███████████████████████████████████████████| 27/27 [00:04<00:00,  5.62it/s]


[ Valid | 007/300 ] loss = 1.25372, acc = 0.70735
[ Valid | 007/300 ] loss = 1.25372, acc = 0.70735


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.44it/s]


[ Train | 008/300 ] loss = 0.15937, acc = 0.94822


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.86it/s]


[ Valid | 008/300 ] loss = 1.33037, acc = 0.69910
[ Valid | 008/300 ] loss = 1.33037, acc = 0.69910


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.37it/s]


[ Train | 009/300 ] loss = 0.11322, acc = 0.96234


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.15it/s]


[ Valid | 009/300 ] loss = 1.38053, acc = 0.69904
[ Valid | 009/300 ] loss = 1.38053, acc = 0.69904


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.22it/s]


[ Train | 010/300 ] loss = 0.08054, acc = 0.97526


100%|███████████████████████████████████████████| 27/27 [00:04<00:00,  5.74it/s]


[ Valid | 010/300 ] loss = 1.38642, acc = 0.70967
[ Valid | 010/300 ] loss = 1.38642, acc = 0.70967 -> best
Best model found at epoch 9, saving model


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.75it/s]


[ Train | 011/300 ] loss = 0.09795, acc = 0.96769


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.29it/s]


[ Valid | 011/300 ] loss = 1.52628, acc = 0.69910
[ Valid | 011/300 ] loss = 1.52628, acc = 0.69910


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.27it/s]


[ Train | 012/300 ] loss = 0.09273, acc = 0.97065


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.81it/s]


[ Valid | 012/300 ] loss = 1.43871, acc = 0.71415
[ Valid | 012/300 ] loss = 1.43871, acc = 0.71415 -> best
Best model found at epoch 11, saving model


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.50it/s]


[ Train | 013/300 ] loss = 0.05311, acc = 0.98217


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.94it/s]


[ Valid | 013/300 ] loss = 1.61533, acc = 0.70539
[ Valid | 013/300 ] loss = 1.61533, acc = 0.70539


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.67it/s]


[ Train | 014/300 ] loss = 0.05272, acc = 0.98237


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.13it/s]


[ Valid | 014/300 ] loss = 1.50574, acc = 0.72131
[ Valid | 014/300 ] loss = 1.50574, acc = 0.72131 -> best
Best model found at epoch 13, saving model


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.11it/s]


[ Train | 015/300 ] loss = 0.05085, acc = 0.98427


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.87it/s]


[ Valid | 015/300 ] loss = 1.50578, acc = 0.71537
[ Valid | 015/300 ] loss = 1.50578, acc = 0.71537


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.23it/s]


[ Train | 016/300 ] loss = 0.05818, acc = 0.98087


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.85it/s]


[ Valid | 016/300 ] loss = 1.61012, acc = 0.70728
[ Valid | 016/300 ] loss = 1.61012, acc = 0.70728


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.45it/s]


[ Train | 017/300 ] loss = 0.05865, acc = 0.98077


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.89it/s]


[ Valid | 017/300 ] loss = 1.57864, acc = 0.71438
[ Valid | 017/300 ] loss = 1.57864, acc = 0.71438


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.51it/s]


[ Train | 018/300 ] loss = 0.06647, acc = 0.97937


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.13it/s]


[ Valid | 018/300 ] loss = 1.52549, acc = 0.70401
[ Valid | 018/300 ] loss = 1.52549, acc = 0.70401


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.40it/s]


[ Train | 019/300 ] loss = 0.06216, acc = 0.97889


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.95it/s]


[ Valid | 019/300 ] loss = 1.82232, acc = 0.68926
[ Valid | 019/300 ] loss = 1.82232, acc = 0.68926


100%|███████████████████████████████████████████| 78/78 [00:11<00:00,  6.99it/s]


[ Train | 020/300 ] loss = 0.06871, acc = 0.97887


100%|███████████████████████████████████████████| 27/27 [00:04<00:00,  5.44it/s]


[ Valid | 020/300 ] loss = 1.47951, acc = 0.70265
[ Valid | 020/300 ] loss = 1.47951, acc = 0.70265


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.77it/s]


[ Train | 021/300 ] loss = 0.05730, acc = 0.98209


100%|███████████████████████████████████████████| 27/27 [00:04<00:00,  5.54it/s]


[ Valid | 021/300 ] loss = 1.50702, acc = 0.72320
[ Valid | 021/300 ] loss = 1.50702, acc = 0.72320 -> best
Best model found at epoch 20, saving model


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.40it/s]


[ Train | 022/300 ] loss = 0.07405, acc = 0.97606


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.63it/s]


[ Valid | 022/300 ] loss = 1.61675, acc = 0.71366
[ Valid | 022/300 ] loss = 1.61675, acc = 0.71366


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.27it/s]


[ Train | 023/300 ] loss = 0.05914, acc = 0.98227


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.16it/s]


[ Valid | 023/300 ] loss = 1.43763, acc = 0.70539
[ Valid | 023/300 ] loss = 1.43763, acc = 0.70539


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.11it/s]


[ Train | 024/300 ] loss = 0.03331, acc = 0.99018


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.91it/s]


[ Valid | 024/300 ] loss = 1.89019, acc = 0.69823
[ Valid | 024/300 ] loss = 1.89019, acc = 0.69823


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.45it/s]


[ Train | 025/300 ] loss = 0.05051, acc = 0.98387


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.89it/s]


[ Valid | 025/300 ] loss = 1.59618, acc = 0.69946
[ Valid | 025/300 ] loss = 1.59618, acc = 0.69946


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.34it/s]


[ Train | 026/300 ] loss = 0.03985, acc = 0.98768


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.84it/s]


[ Valid | 026/300 ] loss = 1.61018, acc = 0.71307
[ Valid | 026/300 ] loss = 1.61018, acc = 0.71307


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.52it/s]


[ Train | 027/300 ] loss = 0.03765, acc = 0.98758


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.11it/s]


[ Valid | 027/300 ] loss = 1.84221, acc = 0.69679
[ Valid | 027/300 ] loss = 1.84221, acc = 0.69679


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.57it/s]


[ Train | 028/300 ] loss = 0.05089, acc = 0.98568


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.62it/s]


[ Valid | 028/300 ] loss = 1.81243, acc = 0.69491
[ Valid | 028/300 ] loss = 1.81243, acc = 0.69491


100%|███████████████████████████████████████████| 78/78 [00:11<00:00,  6.83it/s]


[ Train | 029/300 ] loss = 0.05764, acc = 0.98137


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.70it/s]


[ Valid | 029/300 ] loss = 1.64404, acc = 0.69063
[ Valid | 029/300 ] loss = 1.64404, acc = 0.69063


100%|███████████████████████████████████████████| 78/78 [00:11<00:00,  6.79it/s]


[ Train | 030/300 ] loss = 0.03325, acc = 0.98858


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.35it/s]


[ Valid | 030/300 ] loss = 1.54570, acc = 0.70959
[ Valid | 030/300 ] loss = 1.54570, acc = 0.70959


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.48it/s]


[ Train | 031/300 ] loss = 0.04430, acc = 0.98588


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.13it/s]


[ Valid | 031/300 ] loss = 1.86260, acc = 0.66748
[ Valid | 031/300 ] loss = 1.86260, acc = 0.66748


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.46it/s]


[ Train | 032/300 ] loss = 0.06262, acc = 0.97967


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.67it/s]


[ Valid | 032/300 ] loss = 1.80486, acc = 0.69331
[ Valid | 032/300 ] loss = 1.80486, acc = 0.69331


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.51it/s]


[ Train | 033/300 ] loss = 0.04130, acc = 0.98678


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.26it/s]


[ Valid | 033/300 ] loss = 1.58140, acc = 0.71305
[ Valid | 033/300 ] loss = 1.58140, acc = 0.71305


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.66it/s]


[ Train | 034/300 ] loss = 0.05854, acc = 0.98409


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.03it/s]


[ Valid | 034/300 ] loss = 1.60274, acc = 0.70149
[ Valid | 034/300 ] loss = 1.60274, acc = 0.70149


100%|███████████████████████████████████████████| 78/78 [00:11<00:00,  6.98it/s]


[ Train | 035/300 ] loss = 0.03991, acc = 0.98708


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.50it/s]


[ Valid | 035/300 ] loss = 1.90834, acc = 0.69491
[ Valid | 035/300 ] loss = 1.90834, acc = 0.69491


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.31it/s]


[ Train | 036/300 ] loss = 0.03293, acc = 0.98968


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.57it/s]


[ Valid | 036/300 ] loss = 1.91668, acc = 0.68768
[ Valid | 036/300 ] loss = 1.91668, acc = 0.68768


100%|███████████████████████████████████████████| 78/78 [00:11<00:00,  6.99it/s]


[ Train | 037/300 ] loss = 0.04925, acc = 0.98638


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.93it/s]


[ Valid | 037/300 ] loss = 1.60832, acc = 0.71625
[ Valid | 037/300 ] loss = 1.60832, acc = 0.71625


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.33it/s]


[ Train | 038/300 ] loss = 0.02038, acc = 0.99209


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  5.03it/s]


[ Valid | 038/300 ] loss = 1.76461, acc = 0.70669
[ Valid | 038/300 ] loss = 1.76461, acc = 0.70669


100%|███████████████████████████████████████████| 78/78 [00:10<00:00,  7.37it/s]


[ Train | 039/300 ] loss = 0.02297, acc = 0.99369


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.65it/s]


[ Valid | 039/300 ] loss = 1.91672, acc = 0.69157
[ Valid | 039/300 ] loss = 1.91672, acc = 0.69157


100%|███████████████████████████████████████████| 78/78 [00:11<00:00,  7.05it/s]


[ Train | 040/300 ] loss = 0.03497, acc = 0.98888


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.87it/s]


[ Valid | 040/300 ] loss = 1.88932, acc = 0.69635
[ Valid | 040/300 ] loss = 1.88932, acc = 0.69635


100%|███████████████████████████████████████████| 78/78 [00:11<00:00,  7.04it/s]


[ Train | 041/300 ] loss = 0.04494, acc = 0.98718


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.67it/s]


[ Valid | 041/300 ] loss = 1.72019, acc = 0.68949
[ Valid | 041/300 ] loss = 1.72019, acc = 0.68949


100%|███████████████████████████████████████████| 78/78 [00:11<00:00,  6.91it/s]


[ Train | 042/300 ] loss = 0.02232, acc = 0.99299


100%|███████████████████████████████████████████| 27/27 [00:05<00:00,  4.60it/s]

[ Valid | 042/300 ] loss = 1.78881, acc = 0.71487
[ Valid | 042/300 ] loss = 1.78881, acc = 0.71487
No improvment 20 consecutive epochs, early stopping





0,1
average_training_loss,█▅▄▃▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
average_validation_loss,▅▁▂▂▃▃▃▄▄▄▅▄▆▅▅▆▆▅▇▅▆▄█▆▆▇▇▆▅█▇▆▆██▆▇██▇
step_training_accuracy,▁▃▅▆▇▇█████▇████▇███████████████████████
step_training_loss,█▅▃▃▂▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
step_validation_accuracy,▁▅▄▆▅▆▇▇▆▅▅▆▇▅▅▆█▆▆▆▆▆▅▆▇▇▅▅▆▆▅▆▅▇▅▆▆▇▅▆
step_validation_loss,▃▂▂▂▃▂▁▃▄▆▅▄▃▄▅▆▁▄▅▅▄▄▇▃▃▅█▅▄▅▆▄▇▄▆▄▄▅▄▄

0,1
average_training_loss,0.02232
average_validation_loss,1.78881
step_training_accuracy,1.0
step_training_loss,0.01027
step_validation_accuracy,0.67647
step_validation_loss,2.27764


In [10]:
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=num_cpu, pin_memory=True)

One ./food-11/test sample ./food-11/test/0_0.jpg


# Testing and generate prediction CSV

In [11]:
model_best = torchvision.models.alexnet(pretrained=True).to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
test_accs = []
true_labels = []
with torch.no_grad():
    for data, labels in test_loader:
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)

        acc = (test_pred.argmax(dim=-1) == labels.to(device)).float().mean()
        test_accs.append(acc)
        prediction += test_label.squeeze().tolist()
        true_labels.extend(labels.tolist())

test_acc = sum(test_accs) / len(test_accs)

print(f"Test accurary: {test_acc}")

Test accurary: 0.7317860722541809


In [12]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("prediction_alexnet.csv",index = False)

df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = true_labels
df.to_csv("true_labels.csv",index = False)