# HW3 Image Classification
## We strongly recommend that you run with Kaggle for this homework
https://www.kaggle.com/c/ml2022spring-hw3b/code?competitionId=34954&sortBy=dateCreated

# Get Data
Notes: if the links are dead, you can download the data directly from Kaggle and upload it to the workspace, or you can use the Kaggle API to directly download the data into colab.


In [1]:
# ! wget https://www.dropbox.com/s/6l2vcvxl54b0b6w/food11.zip  -O food11.zip
! echo "get data"

get data


In [2]:
# ! unzip food11.zip
! echo "unzip"

unzip


# Training

## Subset
![image.png](attachment:fad8d73c-ca2b-4c7a-b0b7-fdc838ed4b2a.png)

## ConcatDataset
![image.png](attachment:1831385e-376b-4aa0-989d-0cdd8f9c4e6d.png)

In [3]:
_exp_name = "sample"

In [4]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random

torch.backends.cudnn.deterministic是啥？顧名思義，將這個flag 置為True的話，每次返回的捲積算法將是確定的，即默認算法。如果配合上設置Torch 的隨機種子為固定值的話，應該可以保證每次運行網絡的時候相同輸入的輸出是固定的

benchmark 設置False，是為了保證不使用選擇卷積算法的機制，使用固定的捲積算法。但是，就算是固定的捲積算法，由於其實現不同，也可能是不可控制的，即相同的值，同一個算法卷積出來有細微差別，deterministic設置True保證使用確定性的捲積算法，二者配合起來，才能保證卷積操作的一致性

In [5]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [6]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(), # range [0, 255] -> [0.0,1.0]
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods

transform_set = [ 
    # transforms.RandomVerticalFlip(),
    transforms.RandomHorizontalFlip(),
    # transforms.RandomRotation(degrees = 30,center=(0,0)),
    # transforms.Pad(padding = 30,fill=(255,200,0)),
    # transforms.ColorJitter(contrast=(0.3,0.5)),
    # transforms.ColorJitter(saturation=(0.3,0.5)),
    # transforms.RandomAffine(degrees=30),
    # transforms.RandomAffine(degrees= 0 ,translate=(0.2,0.5)),
    # transforms.RandomAffine(degrees= 0 ,scale=(0.2,0.5)),
    # ToTensor() should be the last one of the transforms.
    # transforms.RandomAutocontrast(),
    # transforms.RandomEqualize()
]

train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    # transforms.RandomApply(transform_set, p=0.5),
    transforms.RandomHorizontalFlip(),
    
    transforms.ColorJitter(brightness=(0.5,0.9)),
    transforms.ColorJitter(contrast=(0.5,0.9)),
    transforms.ColorJitter(saturation=(0.5,0.9)),
    transforms.ColorJitter(hue=(-0.2,0.2)),
    transforms.ToTensor(), # range [0, 255] -> [0.0,1.0]
])

# origin: 0.33085

![image.png](attachment:fc2b8008-248d-4ce0-97e8-7ae9c2ca046a.png)

![image.png](attachment:7fe0ea68-6a7e-4db5-b530-115d42a1ed05.png)

## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [7]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label



In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]
            # nn.Dropout(0.2),

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]
            # nn.Dropout(0.2),

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]
            # nn.Dropout(0.2),

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            # nn.Dropout(0.2),
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
            # nn.Dropout(0.2),
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            # nn.Dropout(0.2),
            nn.ReLU(),            
            nn.Linear(1024, 512),
            # nn.Dropout(0.2),
            nn.ReLU(),
            nn.Linear(512, 11),            
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)
    
model = Classifier().to(device)    

![image.png](attachment:4f601a71-6901-4e2e-b2de-5a7d74267cd6.png)

In [9]:
# class Residual_Network(nn.Module):
#         def __init__(self):
#             super(Residual_Network, self).__init__()

#             self.cnn_layer1 = nn.Sequential(
#                 nn.Conv2d(3, 64, 3, 1, 1),
#                 nn.BatchNorm2d(64),
#             )

#             self.cnn_layer2 = nn.Sequential(
#                 nn.Conv2d(64, 64, 3, 1, 1),
#                 nn.BatchNorm2d(64),
#             )

#             self.cnn_layer3 = nn.Sequential(
#                 nn.Conv2d(64, 128, 3, 2, 1),
#                 nn.BatchNorm2d(128),
#             )

#             self.cnn_layer4 = nn.Sequential(
#                 nn.Conv2d(128, 128, 3, 1, 1),
#                 nn.BatchNorm2d(128),
#             )
#             self.cnn_layer5 = nn.Sequential(
#                 nn.Conv2d(128, 256, 3, 2, 1),
#                 nn.BatchNorm2d(256),
#             )
#             self.cnn_layer6 = nn.Sequential(
#                 nn.Conv2d(256, 256, 3, 1, 1),
#                 nn.BatchNorm2d(256),
#             )
#             self.fc_layer = nn.Sequential(
#                 nn.Linear(256* 32* 32, 256),
#                 nn.ReLU(),
#                 nn.Linear(256, 11)
#             )
#             self.relu = nn.ReLU()

#         def forward(self, x):
#             # input (x): [batch_size, 3, 128, 128]
#             # output: [batch_size, 11]

#             # Extract features by convolutional layers.
#             x1 = self.cnn_layer1(x)

#             x1 = self.relu(x1)

#             x2 = self.cnn_layer2(x1) + x1

#             x2 = self.relu(x2)

#             x3 = self.cnn_layer3(x2) 

#             x3 = self.relu(x3)

#             x4 = self.cnn_layer4(x3) + x3

#             x4 = self.relu(x4)

#             x5 = self.cnn_layer5(x4) 

#             x5 = self.relu(x5)

#             x6 = self.cnn_layer6(x5) + x5

#             x6 = self.relu(x6)

#             # The extracted feature map must be flatten before going to fully-connected layers.
#             xout = x6.flatten(1)

#             # The features are transformed by fully-connected layers to obtain the final logits.
#             xout = self.fc_layer(xout)
#             return xout
        
# model = Residual_Network().to(device)         

![image.png](attachment:c1008082-82bb-4943-9d5f-6dd0b7335921.png)

In [10]:
# from torchvision.models import resnet152

# device = "cuda" if torch.cuda.is_available() else "cpu"

# ENCODING_SIZE = 2048
# new_fc_dim = 11

# model = resnet152(pretrained=False)

# del model.fc
# if new_fc_dim is not None:
#     model.fc = nn.Linear(ENCODING_SIZE, new_fc_dim)

# model = model.to(device)    

![image.png](attachment:6f3c8979-c5b5-48a0-be67-bdaf19ef53d8.png)

In [11]:
batch_size = 64
_dataset_dir = "./food11"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

One ./food11/training sample ./food11/training/0_0.jpg
One ./food11/validation sample ./food11/validation/0_0.jpg


In [12]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
n_epochs = 40
patience = 300 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
# model = Classifier().to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss() # CrossEntropyLoss()函数的主要是将softmax-log-NLLLoss合并到一块得到的结果。
# https://zhuanlan.zhihu.com/p/98785902

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5) 

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

HBox(children=(FloatProgress(value=0.0, max=155.0), HTML(value='')))


[ Train | 001/1000 ] loss = 2.12411, acc = 0.25415


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


[ Valid | 001/1000 ] loss = 2.08318, acc = 0.29123
[ Valid | 001/1000 ] loss = 2.08318, acc = 0.29123 -> best
Best model found at epoch 0, saving model


HBox(children=(FloatProgress(value=0.0, max=155.0), HTML(value='')))


[ Train | 002/1000 ] loss = 1.92037, acc = 0.32617


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


[ Valid | 002/1000 ] loss = 1.87085, acc = 0.36824
[ Valid | 002/1000 ] loss = 1.87085, acc = 0.36824 -> best
Best model found at epoch 1, saving model


HBox(children=(FloatProgress(value=0.0, max=155.0), HTML(value='')))


[ Train | 003/1000 ] loss = 1.74720, acc = 0.39121


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


[ Valid | 003/1000 ] loss = 1.76386, acc = 0.39564
[ Valid | 003/1000 ] loss = 1.76386, acc = 0.39564 -> best
Best model found at epoch 2, saving model


HBox(children=(FloatProgress(value=0.0, max=155.0), HTML(value='')))

KeyboardInterrupt: 

In [13]:
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

One ./food11/test sample ./food11/test/0001.jpg


# Testing and generate prediction CSV

In [14]:
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in test_loader:
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

In [15]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)

In [16]:
df.head()

Unnamed: 0,Id,Category
0,1,9
1,2,3
2,3,5
3,4,5
4,5,3
