In [4]:
!export WANDB_NOTEBOOK_NAME="CNN1"

In [5]:
! nvidia-smi -L

GPU 0: NVIDIA GeForce RTX 3090 (UUID: GPU-72fdbd81-da45-b750-3719-ae5877e26726)


In [6]:
import multiprocessing as mp
num_cpu = mp.cpu_count()
num_cpu

12

# Training

In [7]:
_exp_name = "CNN1"

In [8]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import wandb

In [9]:
myseed = 4012  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**

In [10]:
test_tfm = transforms.Compose([
    # (height = width = 128)
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

train_tfm = transforms.Compose([
    # (height = width = 128)
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [11]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        # print(path)
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label



In [12]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input dimension [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

In [13]:
batch_size = 128
_dataset_dir = "./food-11"

train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_cpu, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=num_cpu, pin_memory=True)

One ./food-11/training sample ./food-11/training/0_0.jpg
One ./food-11/validation sample ./food-11/validation/0_0.jpg


In [14]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
n_epochs = 300
patience = 20 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model = Classifier().to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5) 

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

wandb.init(
    project="Food-11",
    config={
        "learning rate": 0.0003,
        "architecture": "CNN",
        "epochs": n_epochs,
        "batch_size": batch_size,
        "image_dim": 128,
    }
)

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        wandb.log({"step_training_loss": loss.item()})
        wandb.log({"step_training_accuracy": acc})
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        wandb.log({"step_validation_loss": loss.item()})
        wandb.log({"step_validation_accuracy": acc})
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

    wandb.log({
        "average_training_loss": train_loss,
        "average_validation_loss": valid_loss,
    })
    
    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break
wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnickwkt[0m. Use [1m`wandb login --relogin`[0m to force relogin


100%|███████████████████████████████████████████| 78/78 [00:07<00:00, 11.06it/s]


[ Train | 001/300 ] loss = 1.92350, acc = 0.32993


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.52it/s]


[ Valid | 001/300 ] loss = 2.06868, acc = 0.32665
[ Valid | 001/300 ] loss = 2.06868, acc = 0.32665 -> best
Best model found at epoch 0, saving model


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.18it/s]


[ Train | 002/300 ] loss = 1.63341, acc = 0.43852


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.13it/s]


[ Valid | 002/300 ] loss = 1.59052, acc = 0.45535
[ Valid | 002/300 ] loss = 1.59052, acc = 0.45535 -> best
Best model found at epoch 1, saving model


100%|███████████████████████████████████████████| 78/78 [00:07<00:00, 11.14it/s]


[ Train | 003/300 ] loss = 1.43095, acc = 0.50274


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 10.14it/s]


[ Valid | 003/300 ] loss = 1.51105, acc = 0.48313
[ Valid | 003/300 ] loss = 1.51105, acc = 0.48313 -> best
Best model found at epoch 2, saving model


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.27it/s]


[ Train | 004/300 ] loss = 1.24740, acc = 0.56468


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.58it/s]


[ Valid | 004/300 ] loss = 1.43020, acc = 0.52299
[ Valid | 004/300 ] loss = 1.43020, acc = 0.52299 -> best
Best model found at epoch 3, saving model


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.36it/s]


[ Train | 005/300 ] loss = 1.09469, acc = 0.62127


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.64it/s]


[ Valid | 005/300 ] loss = 1.57051, acc = 0.51157
[ Valid | 005/300 ] loss = 1.57051, acc = 0.51157


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.37it/s]


[ Train | 006/300 ] loss = 0.98906, acc = 0.65465


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.78it/s]


[ Valid | 006/300 ] loss = 1.26941, acc = 0.59631
[ Valid | 006/300 ] loss = 1.26941, acc = 0.59631 -> best
Best model found at epoch 5, saving model


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.29it/s]


[ Train | 007/300 ] loss = 0.86047, acc = 0.69848


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.25it/s]


[ Valid | 007/300 ] loss = 1.28217, acc = 0.59173
[ Valid | 007/300 ] loss = 1.28217, acc = 0.59173


100%|███████████████████████████████████████████| 78/78 [00:07<00:00, 11.13it/s]


[ Train | 008/300 ] loss = 0.73907, acc = 0.73836


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 10.62it/s]


[ Valid | 008/300 ] loss = 1.50033, acc = 0.54399
[ Valid | 008/300 ] loss = 1.50033, acc = 0.54399


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.29it/s]


[ Train | 009/300 ] loss = 0.68381, acc = 0.76699


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 10.46it/s]


[ Valid | 009/300 ] loss = 1.65322, acc = 0.53061
[ Valid | 009/300 ] loss = 1.65322, acc = 0.53061


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.33it/s]


[ Train | 010/300 ] loss = 0.55872, acc = 0.80641


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.32it/s]


[ Valid | 010/300 ] loss = 1.36321, acc = 0.61597
[ Valid | 010/300 ] loss = 1.36321, acc = 0.61597 -> best
Best model found at epoch 9, saving model


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.39it/s]


[ Train | 011/300 ] loss = 0.47378, acc = 0.83780


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.40it/s]


[ Valid | 011/300 ] loss = 1.56357, acc = 0.59667
[ Valid | 011/300 ] loss = 1.56357, acc = 0.59667


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.24it/s]


[ Train | 012/300 ] loss = 0.36872, acc = 0.87338


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.37it/s]


[ Valid | 012/300 ] loss = 1.46411, acc = 0.61105
[ Valid | 012/300 ] loss = 1.46411, acc = 0.61105


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.23it/s]


[ Train | 013/300 ] loss = 0.27388, acc = 0.90695


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.60it/s]


[ Valid | 013/300 ] loss = 2.22606, acc = 0.52865
[ Valid | 013/300 ] loss = 2.22606, acc = 0.52865


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.40it/s]


[ Train | 014/300 ] loss = 0.22898, acc = 0.92129


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.94it/s]


[ Valid | 014/300 ] loss = 1.95795, acc = 0.58212
[ Valid | 014/300 ] loss = 1.95795, acc = 0.58212


100%|███████████████████████████████████████████| 78/78 [00:07<00:00, 11.05it/s]


[ Train | 015/300 ] loss = 0.12483, acc = 0.96084


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 10.83it/s]


[ Valid | 015/300 ] loss = 1.53598, acc = 0.65106
[ Valid | 015/300 ] loss = 1.53598, acc = 0.65106 -> best
Best model found at epoch 14, saving model


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.24it/s]


[ Train | 016/300 ] loss = 0.08863, acc = 0.97496


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.51it/s]


[ Valid | 016/300 ] loss = 1.83303, acc = 0.62480
[ Valid | 016/300 ] loss = 1.83303, acc = 0.62480


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.29it/s]


[ Train | 017/300 ] loss = 0.13379, acc = 0.95274


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.32it/s]


[ Valid | 017/300 ] loss = 2.01477, acc = 0.59485
[ Valid | 017/300 ] loss = 2.01477, acc = 0.59485


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.31it/s]


[ Train | 018/300 ] loss = 0.11109, acc = 0.96224


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.36it/s]


[ Valid | 018/300 ] loss = 1.58727, acc = 0.65201
[ Valid | 018/300 ] loss = 1.58727, acc = 0.65201 -> best
Best model found at epoch 17, saving model


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.45it/s]


[ Train | 019/300 ] loss = 0.06244, acc = 0.98077


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.58it/s]


[ Valid | 019/300 ] loss = 2.54744, acc = 0.58834
[ Valid | 019/300 ] loss = 2.54744, acc = 0.58834


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.22it/s]


[ Train | 020/300 ] loss = 0.03434, acc = 0.99069


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.42it/s]


[ Valid | 020/300 ] loss = 2.31041, acc = 0.62408
[ Valid | 020/300 ] loss = 2.31041, acc = 0.62408


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.22it/s]


[ Train | 021/300 ] loss = 0.05998, acc = 0.97927


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.81it/s]


[ Valid | 021/300 ] loss = 2.10718, acc = 0.60859
[ Valid | 021/300 ] loss = 2.10718, acc = 0.60859


100%|███████████████████████████████████████████| 78/78 [00:07<00:00, 11.12it/s]


[ Train | 022/300 ] loss = 0.05425, acc = 0.98071


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 10.77it/s]


[ Valid | 022/300 ] loss = 3.39722, acc = 0.51751
[ Valid | 022/300 ] loss = 3.39722, acc = 0.51751


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.25it/s]


[ Train | 023/300 ] loss = 0.07498, acc = 0.97366


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.08it/s]


[ Valid | 023/300 ] loss = 2.55185, acc = 0.59326
[ Valid | 023/300 ] loss = 2.55185, acc = 0.59326


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.27it/s]


[ Train | 024/300 ] loss = 0.05042, acc = 0.98337


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 10.68it/s]


[ Valid | 024/300 ] loss = 1.99280, acc = 0.63905
[ Valid | 024/300 ] loss = 1.99280, acc = 0.63905


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.45it/s]


[ Train | 025/300 ] loss = 0.14797, acc = 0.95052


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.56it/s]


[ Valid | 025/300 ] loss = 1.85968, acc = 0.61366
[ Valid | 025/300 ] loss = 1.85968, acc = 0.61366


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.48it/s]


[ Train | 026/300 ] loss = 0.05907, acc = 0.98147


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.05it/s]


[ Valid | 026/300 ] loss = 1.96268, acc = 0.66084
[ Valid | 026/300 ] loss = 1.96268, acc = 0.66084 -> best
Best model found at epoch 25, saving model


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.60it/s]


[ Train | 027/300 ] loss = 0.01484, acc = 0.99639


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 10.53it/s]


[ Valid | 027/300 ] loss = 1.94259, acc = 0.66735
[ Valid | 027/300 ] loss = 1.94259, acc = 0.66735 -> best
Best model found at epoch 26, saving model


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.52it/s]


[ Train | 028/300 ] loss = 0.00529, acc = 0.99890


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.13it/s]


[ Valid | 028/300 ] loss = 2.12371, acc = 0.66495
[ Valid | 028/300 ] loss = 2.12371, acc = 0.66495


100%|███████████████████████████████████████████| 78/78 [00:07<00:00, 11.12it/s]


[ Train | 029/300 ] loss = 0.00134, acc = 1.00000


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.41it/s]


[ Valid | 029/300 ] loss = 1.95946, acc = 0.68919
[ Valid | 029/300 ] loss = 1.95946, acc = 0.68919 -> best
Best model found at epoch 28, saving model


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.25it/s]


[ Train | 030/300 ] loss = 0.00841, acc = 0.99744


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.22it/s]


[ Valid | 030/300 ] loss = 1.96271, acc = 0.68159
[ Valid | 030/300 ] loss = 1.96271, acc = 0.68159


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.20it/s]


[ Train | 031/300 ] loss = 0.07941, acc = 0.97350


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 10.85it/s]


[ Valid | 031/300 ] loss = 2.36959, acc = 0.60592
[ Valid | 031/300 ] loss = 2.36959, acc = 0.60592


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.24it/s]


[ Train | 032/300 ] loss = 0.07523, acc = 0.97540


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 10.99it/s]


[ Valid | 032/300 ] loss = 2.00486, acc = 0.65237
[ Valid | 032/300 ] loss = 2.00486, acc = 0.65237


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.34it/s]


[ Train | 033/300 ] loss = 0.02542, acc = 0.99209


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.11it/s]


[ Valid | 033/300 ] loss = 2.35941, acc = 0.63891
[ Valid | 033/300 ] loss = 2.35941, acc = 0.63891


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.48it/s]


[ Train | 034/300 ] loss = 0.03194, acc = 0.99050


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.47it/s]


[ Valid | 034/300 ] loss = 2.78760, acc = 0.59802
[ Valid | 034/300 ] loss = 2.78760, acc = 0.59802


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.65it/s]


[ Train | 035/300 ] loss = 0.04611, acc = 0.98417


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.53it/s]


[ Valid | 035/300 ] loss = 2.20731, acc = 0.63702
[ Valid | 035/300 ] loss = 2.20731, acc = 0.63702


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.58it/s]


[ Train | 036/300 ] loss = 0.04974, acc = 0.98199


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.65it/s]


[ Valid | 036/300 ] loss = 2.64558, acc = 0.60961
[ Valid | 036/300 ] loss = 2.64558, acc = 0.60961


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.55it/s]


[ Train | 037/300 ] loss = 0.06727, acc = 0.97786


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.72it/s]


[ Valid | 037/300 ] loss = 2.38080, acc = 0.61388
[ Valid | 037/300 ] loss = 2.38080, acc = 0.61388


100%|███████████████████████████████████████████| 78/78 [00:07<00:00, 10.96it/s]


[ Train | 038/300 ] loss = 0.03566, acc = 0.98738


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.02it/s]


[ Valid | 038/300 ] loss = 2.33451, acc = 0.64274
[ Valid | 038/300 ] loss = 2.33451, acc = 0.64274


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.44it/s]


[ Train | 039/300 ] loss = 0.02302, acc = 0.99269


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 10.64it/s]


[ Valid | 039/300 ] loss = 2.44354, acc = 0.63102
[ Valid | 039/300 ] loss = 2.44354, acc = 0.63102


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.51it/s]


[ Train | 040/300 ] loss = 0.03385, acc = 0.98790


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.34it/s]


[ Valid | 040/300 ] loss = 2.80426, acc = 0.61046
[ Valid | 040/300 ] loss = 2.80426, acc = 0.61046


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.40it/s]


[ Train | 041/300 ] loss = 0.10743, acc = 0.96446


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.79it/s]


[ Valid | 041/300 ] loss = 2.56706, acc = 0.59992
[ Valid | 041/300 ] loss = 2.56706, acc = 0.59992


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.15it/s]


[ Train | 042/300 ] loss = 0.04785, acc = 0.98233


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.79it/s]


[ Valid | 042/300 ] loss = 2.36069, acc = 0.64780
[ Valid | 042/300 ] loss = 2.36069, acc = 0.64780


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.29it/s]


[ Train | 043/300 ] loss = 0.04396, acc = 0.98518


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.80it/s]


[ Valid | 043/300 ] loss = 2.35807, acc = 0.65302
[ Valid | 043/300 ] loss = 2.35807, acc = 0.65302


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.34it/s]


[ Train | 044/300 ] loss = 0.02048, acc = 0.99261


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.20it/s]


[ Valid | 044/300 ] loss = 2.48145, acc = 0.63920
[ Valid | 044/300 ] loss = 2.48145, acc = 0.63920


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.60it/s]


[ Train | 045/300 ] loss = 0.05631, acc = 0.98237


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.06it/s]


[ Valid | 045/300 ] loss = 2.32207, acc = 0.64514
[ Valid | 045/300 ] loss = 2.32207, acc = 0.64514


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.18it/s]


[ Train | 046/300 ] loss = 0.01877, acc = 0.99341


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 10.87it/s]


[ Valid | 046/300 ] loss = 2.71077, acc = 0.62524
[ Valid | 046/300 ] loss = 2.71077, acc = 0.62524


100%|███████████████████████████████████████████| 78/78 [00:07<00:00, 10.96it/s]


[ Train | 047/300 ] loss = 0.04461, acc = 0.98680


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.62it/s]


[ Valid | 047/300 ] loss = 2.03417, acc = 0.66502
[ Valid | 047/300 ] loss = 2.03417, acc = 0.66502


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.46it/s]


[ Train | 048/300 ] loss = 0.03536, acc = 0.98838


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 11.29it/s]


[ Valid | 048/300 ] loss = 2.67511, acc = 0.62162
[ Valid | 048/300 ] loss = 2.67511, acc = 0.62162


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.53it/s]


[ Train | 049/300 ] loss = 0.00931, acc = 0.99700


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.22it/s]


[ Valid | 049/300 ] loss = 2.23428, acc = 0.67147
[ Valid | 049/300 ] loss = 2.23428, acc = 0.67147


100%|███████████████████████████████████████████| 78/78 [00:06<00:00, 11.62it/s]


[ Train | 050/300 ] loss = 0.00265, acc = 0.99960


100%|███████████████████████████████████████████| 27/27 [00:02<00:00, 12.11it/s]

[ Valid | 050/300 ] loss = 2.27854, acc = 0.67631
[ Valid | 050/300 ] loss = 2.27854, acc = 0.67631
No improvment 20 consecutive epochs, early stopping





VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_training_loss,█▇▆▆▅▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
average_validation_loss,▄▂▂▂▁▁▂▂▂▂▄▃▃▃▂▅▄█▅▃▃▃▄▃▅▃▅▆▆▅▅▅▅▅▅▅▆▄▆▄
step_training_accuracy,▁▂▂▃▄▄▅▅▇▆▇▇█▇▇███▇▇█████████▇██████████
step_training_loss,█▇▆▆▅▄▄▄▂▃▂▂▁▂▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁
step_validation_accuracy,▁▄▃▄▄▅▄▆▄▆▄▇▄▅▇▄▆▄▇▆▇▇▇▆▆▇▇▇▆▅▇▆▆███▆▇▇▆
step_validation_loss,▄▂▂▃▂▂▃▁▃▃▄▃▅▄▂█▄▇▂▂▄▄▂▄▃▄▄▃▅▅▄▅▅▄▄▄▇▃▄▇

0,1
average_training_loss,0.00265
average_validation_loss,2.27854
step_training_accuracy,1.0
step_training_loss,0.07469
step_validation_accuracy,0.62745
step_validation_loss,2.47212


In [15]:
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=num_cpu, pin_memory=True)

One ./food-11/test sample ./food-11/test/0_0.jpg


# Testing and generate prediction CSV

In [20]:
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
test_accs = []
true_labels = []
with torch.no_grad():
    for data, labels in test_loader:
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)

        acc = (test_pred.argmax(dim=-1) == labels.to(device)).float().mean()
        test_accs.append(acc)
        prediction += test_label.squeeze().tolist()
        true_labels.extend(labels.tolist())

test_acc = sum(test_accs) / len(test_accs)

print(f"Test accurary: {test_acc}")

Test accurary: 0.7231664061546326


In [21]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("prediction_CNN1.csv",index = False)

df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = true_labels
df.to_csv("true_labels.csv",index = False)