# HW3 Image Classification
## We strongly recommend that you run with Kaggle for this homework
https://www.kaggle.com/c/ml2022spring-hw3b/code?competitionId=34954&sortBy=dateCreated

# Get Data
Notes: if the links are dead, you can download the data directly from Kaggle and upload it to the workspace, or you can use the Kaggle API to directly download the data into colab.


In [None]:
#! wget https://www.dropbox.com/s/6l2vcvxl54b0b6w/food11.zip
# ! wget -O food11.zip "https://github.com/virginiakm1988/ML2022-Spring/blob/main/HW03/food11.zip?raw=true"

In [None]:
# ! unzip food11.zip

# Training

In [1]:
_exp_name = "sample"

In [1]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import torch.hub
import os
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [2]:
myseed = 3231  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)
# "cuda" only when GPUs are available.
device = "cuda:1" if torch.cuda.is_available() else "cpu"

In [3]:
print(device)

cuda:1


## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [4]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    # ToTensor() should be the last one of the transforms.
    transforms.RandomAffine(degrees=18, translate=(0.15, 0.15), scale=(0.8, 1.2)),
    transforms.RandomHorizontalFlip(),
    transforms.AutoAugment(),
    transforms.ToTensor(),
])



## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [5]:
def split_set(path, ratio = 0.8, files = None, shuffle = True):
    
    files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
    if shuffle == True:
        random.shuffle(files)
    if ratio < 1:
        train_image_paths, valid_image_paths = files[:int(ratio*len(files))], files[int(ratio*len(files)):]    
        return train_image_paths, valid_image_paths
    elif ratio == 1:
        test_image_paths = files[:]
        return test_image_paths

In [6]:
class FoodDataset(Dataset):

    def __init__(self,pathes,tfm=test_tfm):
        super(FoodDataset).__init__()
        self.pathes = sorted(pathes)
        print(f"dataset with {len(self.pathes)} images")
        self.transform = tfm
  
    def __len__(self):
        return len(self.pathes)
  
    def __getitem__(self,idx):
        fname = self.pathes[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("\\")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label



In [7]:
def mixup_data(x, y, alpha=1.0):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(device)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [8]:
# import torch.utils.data as data_utils
# indices = torch.arange(16000)
batch_size = 128
_dataset_dir = "./food11"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
# train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
# valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
train_image_paths, valid_image_paths = split_set(os.path.join(_dataset_dir,"all"), ratio = 0.8, shuffle = True)


train_set = FoodDataset(train_image_paths,tfm=train_tfm)
train_set2 = FoodDataset(train_image_paths,tfm=test_tfm)
# train_set3 = FoodDataset(train_image_paths,tfm=train_tfm,mixup=True)
train_set_all = ConcatDataset([train_set,train_set2])
# train_set_all = data_utils.Subset(train_set_all, indices)
valid_set = FoodDataset(valid_image_paths,tfm=test_tfm)

print('training set:',len(train_set_all))

train_loader = DataLoader(train_set_all, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)


dataset with 10636 images
dataset with 10636 images
dataset with 2660 images
training set: 21272


## **Models**

In [9]:
RestNeXt = torchvision.models.resnext50_32x4d(pretrained=False, num_classes = 11).to(device)

# Train

In [11]:
# The number of training epochs and patience.
n_epochs = 3000
patience = 300 # If no improvement in 'patience' epochs, early stop
learning_rate = 0.0002
# Initialize a model, and put it on the device specified.
# model = Classifier().to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
# optimizer = torch.optim.AdamW(model.parameters(), lr=0.0003, weight_decay=1e-5) 
# optimizer = torch.optim.AdamW(RestNeXt.parameters(), betas=(0.9, 0.98), lr=learning_rate, weight_decay=0.0005)
optimizer = torch.optim.SGD(RestNeXt.parameters(), lr=learning_rate, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.5, patience = 20, min_lr = 0.0001)

In [12]:
# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0
RestNeXt.load_state_dict(torch.load("./RestNeXt_best.ckpt"))
for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    RestNeXt.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        imgs, labels = imgs.to(device), labels.to(device)
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)
#         inputs, targets_a, targets_b = map(Variable, (imgs,targets_a, targets_b))
        inputs, targets_a, targets_b, lam = mixup_data(imgs, labels,alpha = 1)
        # Forward the data. (Make sure data and model are on the same device.)
        logits = RestNeXt(inputs.to(device))
        
        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
#         loss = criterion(logits, labels.to(device))
#         loss = cross_entropy(logits, labels)
        loss = mixup_criterion(criterion, logits, targets_a, targets_b, lam)
        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(RestNeXt.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()
        
        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
        scheduler.step(loss)
        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
#     train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
#     print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}")
    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    RestNeXt.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        
        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = RestNeXt(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./RestNeXt_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./RestNeXt_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(RestNeXt.state_dict(),"./RestNeXt_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        print(stale)
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 001/3000 ] loss = 0.90355


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 001/3000 ] loss = 0.22659, acc = 0.96482
[ Valid | 001/3000 ] loss = 0.22659, acc = 0.96482 -> best
Best model found at epoch 0, saving model


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 002/3000 ] loss = 0.95487


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 002/3000 ] loss = 0.24676, acc = 0.96621
[ Valid | 002/3000 ] loss = 0.24676, acc = 0.96621 -> best
Best model found at epoch 1, saving model


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 003/3000 ] loss = 0.96611


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 003/3000 ] loss = 0.26518, acc = 0.96769
[ Valid | 003/3000 ] loss = 0.26518, acc = 0.96769 -> best
Best model found at epoch 2, saving model


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 004/3000 ] loss = 0.96618


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 004/3000 ] loss = 0.23903, acc = 0.96844
[ Valid | 004/3000 ] loss = 0.23903, acc = 0.96844 -> best
Best model found at epoch 3, saving model


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 005/3000 ] loss = 0.95478


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 005/3000 ] loss = 0.22561, acc = 0.96827
[ Valid | 005/3000 ] loss = 0.22561, acc = 0.96827
1


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 006/3000 ] loss = 0.94847


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 006/3000 ] loss = 0.21040, acc = 0.96865
[ Valid | 006/3000 ] loss = 0.21040, acc = 0.96865 -> best
Best model found at epoch 5, saving model


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 007/3000 ] loss = 0.95157


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 007/3000 ] loss = 0.24446, acc = 0.96955
[ Valid | 007/3000 ] loss = 0.24446, acc = 0.96955 -> best
Best model found at epoch 6, saving model


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 008/3000 ] loss = 0.92576


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 008/3000 ] loss = 0.26297, acc = 0.96743
[ Valid | 008/3000 ] loss = 0.26297, acc = 0.96743
1


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 009/3000 ] loss = 0.96111


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 009/3000 ] loss = 0.23953, acc = 0.96647
[ Valid | 009/3000 ] loss = 0.23953, acc = 0.96647
2


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 010/3000 ] loss = 0.93634


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 010/3000 ] loss = 0.26095, acc = 0.96854
[ Valid | 010/3000 ] loss = 0.26095, acc = 0.96854
3


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 011/3000 ] loss = 0.92972


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 011/3000 ] loss = 0.27880, acc = 0.96579
[ Valid | 011/3000 ] loss = 0.27880, acc = 0.96579
4


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 012/3000 ] loss = 0.96433


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 012/3000 ] loss = 0.29326, acc = 0.96577
[ Valid | 012/3000 ] loss = 0.29326, acc = 0.96577
5


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 013/3000 ] loss = 0.92473


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 013/3000 ] loss = 0.24639, acc = 0.96743
[ Valid | 013/3000 ] loss = 0.24639, acc = 0.96743
6


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 014/3000 ] loss = 0.96202


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 014/3000 ] loss = 0.25398, acc = 0.96716
[ Valid | 014/3000 ] loss = 0.25398, acc = 0.96716
7


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 015/3000 ] loss = 0.94772


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 015/3000 ] loss = 0.22131, acc = 0.96716
[ Valid | 015/3000 ] loss = 0.22131, acc = 0.96716
8


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 016/3000 ] loss = 0.95669


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 016/3000 ] loss = 0.24254, acc = 0.96786
[ Valid | 016/3000 ] loss = 0.24254, acc = 0.96786
9


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 017/3000 ] loss = 0.96086


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 017/3000 ] loss = 0.26437, acc = 0.96658
[ Valid | 017/3000 ] loss = 0.26437, acc = 0.96658
10


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 018/3000 ] loss = 0.97315


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 018/3000 ] loss = 0.27030, acc = 0.96451
[ Valid | 018/3000 ] loss = 0.27030, acc = 0.96451
11


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 019/3000 ] loss = 0.95887


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 019/3000 ] loss = 0.23238, acc = 0.96833
[ Valid | 019/3000 ] loss = 0.23238, acc = 0.96833
12


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 020/3000 ] loss = 0.93803


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 020/3000 ] loss = 0.24576, acc = 0.96790
[ Valid | 020/3000 ] loss = 0.24576, acc = 0.96790
13


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 021/3000 ] loss = 0.96764


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 021/3000 ] loss = 0.27550, acc = 0.96887
[ Valid | 021/3000 ] loss = 0.27550, acc = 0.96887
14


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 022/3000 ] loss = 0.95648


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 022/3000 ] loss = 0.28841, acc = 0.96653
[ Valid | 022/3000 ] loss = 0.28841, acc = 0.96653
15


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 023/3000 ] loss = 0.98365


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 023/3000 ] loss = 0.27007, acc = 0.96790
[ Valid | 023/3000 ] loss = 0.27007, acc = 0.96790
16


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 024/3000 ] loss = 0.94146


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 024/3000 ] loss = 0.25005, acc = 0.96993
[ Valid | 024/3000 ] loss = 0.25005, acc = 0.96993 -> best
Best model found at epoch 23, saving model


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 025/3000 ] loss = 0.95187


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 025/3000 ] loss = 0.24459, acc = 0.96790
[ Valid | 025/3000 ] loss = 0.24459, acc = 0.96790
1


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 026/3000 ] loss = 0.95714


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 026/3000 ] loss = 0.24583, acc = 0.96854
[ Valid | 026/3000 ] loss = 0.24583, acc = 0.96854
2


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 027/3000 ] loss = 0.94230


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 027/3000 ] loss = 0.22927, acc = 0.96823
[ Valid | 027/3000 ] loss = 0.22927, acc = 0.96823
3


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 028/3000 ] loss = 0.97622


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 028/3000 ] loss = 0.27759, acc = 0.96685
[ Valid | 028/3000 ] loss = 0.27759, acc = 0.96685
4


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 029/3000 ] loss = 0.94094


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 029/3000 ] loss = 0.25722, acc = 0.96701
[ Valid | 029/3000 ] loss = 0.25722, acc = 0.96701
5


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 030/3000 ] loss = 0.96210


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 030/3000 ] loss = 0.27380, acc = 0.96786
[ Valid | 030/3000 ] loss = 0.27380, acc = 0.96786
6


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 031/3000 ] loss = 0.97128


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 031/3000 ] loss = 0.30587, acc = 0.96387
[ Valid | 031/3000 ] loss = 0.30587, acc = 0.96387
7


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 032/3000 ] loss = 0.98063


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 032/3000 ] loss = 0.27807, acc = 0.96664
[ Valid | 032/3000 ] loss = 0.27807, acc = 0.96664
8


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 033/3000 ] loss = 0.94325


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 033/3000 ] loss = 0.23918, acc = 0.96939
[ Valid | 033/3000 ] loss = 0.23918, acc = 0.96939
9


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 034/3000 ] loss = 0.97314


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 034/3000 ] loss = 0.25311, acc = 0.96844
[ Valid | 034/3000 ] loss = 0.25311, acc = 0.96844
10


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 035/3000 ] loss = 0.95716


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 035/3000 ] loss = 0.25902, acc = 0.96695
[ Valid | 035/3000 ] loss = 0.25902, acc = 0.96695
11


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 036/3000 ] loss = 0.94517


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 036/3000 ] loss = 0.26974, acc = 0.96647
[ Valid | 036/3000 ] loss = 0.26974, acc = 0.96647
12


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 037/3000 ] loss = 0.89807


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 037/3000 ] loss = 0.22982, acc = 0.96705
[ Valid | 037/3000 ] loss = 0.22982, acc = 0.96705
13


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 038/3000 ] loss = 0.98884


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 038/3000 ] loss = 0.27190, acc = 0.96722
[ Valid | 038/3000 ] loss = 0.27190, acc = 0.96722
14


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 039/3000 ] loss = 0.97098


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 039/3000 ] loss = 0.25819, acc = 0.96807
[ Valid | 039/3000 ] loss = 0.25819, acc = 0.96807
15


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 040/3000 ] loss = 0.94818


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 040/3000 ] loss = 0.25985, acc = 0.96759
[ Valid | 040/3000 ] loss = 0.25985, acc = 0.96759
16


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 041/3000 ] loss = 0.93903


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 041/3000 ] loss = 0.27224, acc = 0.96827
[ Valid | 041/3000 ] loss = 0.27224, acc = 0.96827
17


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 042/3000 ] loss = 0.95552


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 042/3000 ] loss = 0.29468, acc = 0.96817
[ Valid | 042/3000 ] loss = 0.29468, acc = 0.96817
18


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 043/3000 ] loss = 0.98381


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 043/3000 ] loss = 0.25482, acc = 0.96631
[ Valid | 043/3000 ] loss = 0.25482, acc = 0.96631
19


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 044/3000 ] loss = 0.94046


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 044/3000 ] loss = 0.26056, acc = 0.96780
[ Valid | 044/3000 ] loss = 0.26056, acc = 0.96780
20


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 045/3000 ] loss = 0.96484


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 045/3000 ] loss = 0.22036, acc = 0.96966
[ Valid | 045/3000 ] loss = 0.22036, acc = 0.96966
21


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 046/3000 ] loss = 0.91101


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 046/3000 ] loss = 0.25245, acc = 0.96743
[ Valid | 046/3000 ] loss = 0.25245, acc = 0.96743
22


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 047/3000 ] loss = 0.95782


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 047/3000 ] loss = 0.28659, acc = 0.96509
[ Valid | 047/3000 ] loss = 0.28659, acc = 0.96509
23


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 048/3000 ] loss = 0.93844


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 048/3000 ] loss = 0.26341, acc = 0.96817
[ Valid | 048/3000 ] loss = 0.26341, acc = 0.96817
24


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 049/3000 ] loss = 0.95255


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 049/3000 ] loss = 0.26871, acc = 0.96695
[ Valid | 049/3000 ] loss = 0.26871, acc = 0.96695
25


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 050/3000 ] loss = 0.95649


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 050/3000 ] loss = 0.25805, acc = 0.96780
[ Valid | 050/3000 ] loss = 0.25805, acc = 0.96780
26


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 051/3000 ] loss = 0.93497


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 051/3000 ] loss = 0.24584, acc = 0.96807
[ Valid | 051/3000 ] loss = 0.24584, acc = 0.96807
27


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 052/3000 ] loss = 0.95340


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 052/3000 ] loss = 0.25340, acc = 0.96817
[ Valid | 052/3000 ] loss = 0.25340, acc = 0.96817
28


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 053/3000 ] loss = 0.94673


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 053/3000 ] loss = 0.26862, acc = 0.96701
[ Valid | 053/3000 ] loss = 0.26862, acc = 0.96701
29


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 054/3000 ] loss = 0.95933


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 054/3000 ] loss = 0.24256, acc = 0.96796
[ Valid | 054/3000 ] loss = 0.24256, acc = 0.96796
30


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 055/3000 ] loss = 0.95472


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 055/3000 ] loss = 0.24200, acc = 0.96749
[ Valid | 055/3000 ] loss = 0.24200, acc = 0.96749
31


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 056/3000 ] loss = 0.93247


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 056/3000 ] loss = 0.25560, acc = 0.96685
[ Valid | 056/3000 ] loss = 0.25560, acc = 0.96685
32


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 057/3000 ] loss = 0.93194


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 057/3000 ] loss = 0.22561, acc = 0.96881
[ Valid | 057/3000 ] loss = 0.22561, acc = 0.96881
33


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 058/3000 ] loss = 0.97616


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 058/3000 ] loss = 0.23711, acc = 0.96790
[ Valid | 058/3000 ] loss = 0.23711, acc = 0.96790
34


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 059/3000 ] loss = 0.91908


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 059/3000 ] loss = 0.22341, acc = 0.96929
[ Valid | 059/3000 ] loss = 0.22341, acc = 0.96929
35


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 060/3000 ] loss = 0.94408


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 060/3000 ] loss = 0.23470, acc = 0.96813
[ Valid | 060/3000 ] loss = 0.23470, acc = 0.96813
36


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 061/3000 ] loss = 0.91060


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 061/3000 ] loss = 0.26496, acc = 0.96732
[ Valid | 061/3000 ] loss = 0.26496, acc = 0.96732
37


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 062/3000 ] loss = 0.91849


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 062/3000 ] loss = 0.28662, acc = 0.96722
[ Valid | 062/3000 ] loss = 0.28662, acc = 0.96722
38


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 063/3000 ] loss = 0.90280


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 063/3000 ] loss = 0.28186, acc = 0.96743
[ Valid | 063/3000 ] loss = 0.28186, acc = 0.96743
39


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 064/3000 ] loss = 1.01255


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 064/3000 ] loss = 0.27369, acc = 0.96769
[ Valid | 064/3000 ] loss = 0.27369, acc = 0.96769
40


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 065/3000 ] loss = 0.95134


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 065/3000 ] loss = 0.25813, acc = 0.96705
[ Valid | 065/3000 ] loss = 0.25813, acc = 0.96705
41


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 066/3000 ] loss = 1.02561


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 066/3000 ] loss = 0.24928, acc = 0.96854
[ Valid | 066/3000 ] loss = 0.24928, acc = 0.96854
42


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 067/3000 ] loss = 0.97034


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 067/3000 ] loss = 0.26733, acc = 0.96854
[ Valid | 067/3000 ] loss = 0.26733, acc = 0.96854
43


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 068/3000 ] loss = 0.98193


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 068/3000 ] loss = 0.28476, acc = 0.96641
[ Valid | 068/3000 ] loss = 0.28476, acc = 0.96641
44


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 069/3000 ] loss = 0.94815


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 069/3000 ] loss = 0.25675, acc = 0.96631
[ Valid | 069/3000 ] loss = 0.25675, acc = 0.96631
45


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 070/3000 ] loss = 0.97517


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 070/3000 ] loss = 0.28053, acc = 0.96769
[ Valid | 070/3000 ] loss = 0.28053, acc = 0.96769
46


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 071/3000 ] loss = 0.98485


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 071/3000 ] loss = 0.25894, acc = 0.96716
[ Valid | 071/3000 ] loss = 0.25894, acc = 0.96716
47


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 072/3000 ] loss = 0.90314


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 072/3000 ] loss = 0.22203, acc = 0.96918
[ Valid | 072/3000 ] loss = 0.22203, acc = 0.96918
48


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 073/3000 ] loss = 0.95027


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 073/3000 ] loss = 0.27391, acc = 0.96786
[ Valid | 073/3000 ] loss = 0.27391, acc = 0.96786
49


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 074/3000 ] loss = 0.96840


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 074/3000 ] loss = 0.28038, acc = 0.96530
[ Valid | 074/3000 ] loss = 0.28038, acc = 0.96530
50


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 075/3000 ] loss = 0.92098


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 075/3000 ] loss = 0.24847, acc = 0.96690
[ Valid | 075/3000 ] loss = 0.24847, acc = 0.96690
51


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 076/3000 ] loss = 0.95614


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 076/3000 ] loss = 0.27407, acc = 0.96759
[ Valid | 076/3000 ] loss = 0.27407, acc = 0.96759
52


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 077/3000 ] loss = 0.89940


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 077/3000 ] loss = 0.22537, acc = 0.96759
[ Valid | 077/3000 ] loss = 0.22537, acc = 0.96759
53


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 078/3000 ] loss = 0.92580


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 078/3000 ] loss = 0.24524, acc = 0.96881
[ Valid | 078/3000 ] loss = 0.24524, acc = 0.96881
54


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 079/3000 ] loss = 0.98100


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 079/3000 ] loss = 0.21317, acc = 0.96961
[ Valid | 079/3000 ] loss = 0.21317, acc = 0.96961
55


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 080/3000 ] loss = 0.93528


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 080/3000 ] loss = 0.29265, acc = 0.96579
[ Valid | 080/3000 ] loss = 0.29265, acc = 0.96579
56


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 081/3000 ] loss = 0.98917


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 081/3000 ] loss = 0.25275, acc = 0.96871
[ Valid | 081/3000 ] loss = 0.25275, acc = 0.96871
57


  0%|          | 0/167 [00:00<?, ?it/s]

[ Train | 082/3000 ] loss = 0.90058


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 082/3000 ] loss = 0.24217, acc = 0.96796
[ Valid | 082/3000 ] loss = 0.24217, acc = 0.96796
58


  0%|          | 0/167 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Test

In [13]:
test_image_paths = split_set(os.path.join(_dataset_dir,"test"), ratio = 1, shuffle = False)
test_set = FoodDataset(test_image_paths,tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

dataset with 3347 images


In [14]:
test_set1 = FoodDataset(test_image_paths,tfm=train_tfm)
test_set2 = FoodDataset(test_image_paths,tfm=train_tfm)
test_set3 = FoodDataset(test_image_paths,tfm=train_tfm)
test_set4 = FoodDataset(test_image_paths,tfm=train_tfm)
test_set5 = FoodDataset(test_image_paths,tfm=train_tfm)
test_set_all = ConcatDataset([test_set1,test_set2,test_set3,test_set4,test_set5])
test_loader_all = DataLoader(test_set_all, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

dataset with 3347 images
dataset with 3347 images
dataset with 3347 images
dataset with 3347 images
dataset with 3347 images


# Testing and generate prediction CSV

In [15]:
# model_best = Classifier().to(device)
RestNeXt.load_state_dict(torch.load("./RestNeXt_best.ckpt"))

RestNeXt.eval()
prediction = []

with torch.no_grad():
    for data,_ in tqdm(test_loader_all):
        test_pred = RestNeXt(data.to(device))
        try:
            test_pred_all = np.vstack([test_pred_all,test_pred.cpu().data.numpy()])
        except:
            test_pred_all = test_pred.cpu().data.numpy()
        
test_pred_all = test_pred_all.reshape(5,3347,11)
test_pred_all = np.mean(test_pred_all,axis=0) 

with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = RestNeXt(data.to(device))
        try:
            test_pred_result = np.vstack([test_pred_result,test_pred.cpu().data.numpy()])
        except:
            test_pred_result = test_pred.cpu().data.numpy()
            
test_pred_final = (test_pred_result+test_pred_all)/2
test_label = np.argmax(test_pred_final, axis=1)
prediction = test_label.squeeze().tolist()

  0%|          | 0/131 [00:00<?, ?it/s]

  0%|          | 0/27 [00:00<?, ?it/s]

In [16]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("submission11.csv",index = False)
#df.to_csv("RestNeXt_haung.csv",index = False)

# Q1. Augmentation Implementation
## Implement augmentation by finishing train_tfm in the code with image size of your choice. 
## Directly copy the following block and paste it on GradeScope after you finish the code
### Your train_tfm must be capable of producing 5+ different results when given an identical image multiple times.
### Your  train_tfm in the report can be different from train_tfm in your training code.


In [None]:
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You need to add some transforms here.
    transforms.ToTensor(),
])

# Q2. Residual Implementation
![](https://i.imgur.com/GYsq1Ap.png)
## Directly copy the following block and paste it on GradeScope after you finish the code


In [None]:
from torch import nn
class Residual_Network(nn.Module):
    def __init__(self):
        super(Residual_Network, self).__init__()
        
        self.cnn_layer1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer2 = nn.Sequential(
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 2, 1),
            nn.BatchNorm2d(128),
        )

        self.cnn_layer4 = nn.Sequential(
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
        )
        self.cnn_layer5 = nn.Sequential(
            nn.Conv2d(128, 256, 3, 2, 1),
            nn.BatchNorm2d(256),
        )
        self.cnn_layer6 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(256* 32* 32, 256),
            nn.ReLU(),
            nn.Linear(256, 11)
        )
        self.relu = nn.ReLU()

    def forward(self, x):
        # input (x): [batch_size, 3, 128, 128]
        # output: [batch_size, 11]

        # Extract features by convolutional layers.
        x1 = self.cnn_layer1(x)
        
        x1 = self.relu(x1)
        
        x2 = self.cnn_layer2(x1)
        
        x2 = self.relu(x2)
        
        x3 = self.cnn_layer3(x2)
        
        x3 = self.relu(x3)
        
        x4 = self.cnn_layer4(x3)
        
        x4 = self.relu(x4)
        
        x5 = self.cnn_layer5(x4)
        
        x5 = self.relu(x5)
        
        x6 = self.cnn_layer6(x5)
        
        x6 = self.relu(x6)
        
        # The extracted feature map must be flatten before going to fully-connected layers.
        xout = x6.flatten(1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        xout = self.fc_layer(xout)
        return xout

In [71]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"dataset with {path} images",len(self.files))
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("\\")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label
