# If you want to access the version you have already modified, click "Edit"
# If you want to access the original sample code, click "...", then click "Copy & Edit Notebook"

In [1]:
## This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        pass
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
_exp_name = "sample"

In [3]:
# Import necessary packages.
import numpy as np
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [4]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [5]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    transforms.RandomRotation(15),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=1),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.5),
    # You may add some transforms here.
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [6]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label



In [7]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn_layer1=nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  
            nn.BatchNorm2d(64),
            
        )
        
        self.cnn_layer2=nn.Sequential(
            nn.Conv2d(64, 64, 3, 1, 1), 
            nn.BatchNorm2d(64),
            
        )
        
        self.cnn_layer3=nn.Sequential(
            nn.Conv2d(64, 128, 3, 1, 1), 
            nn.BatchNorm2d(128),
           
        )
        
        self.cnn_layer4=nn.Sequential(
            nn.Conv2d(128,128, 3, 1, 1), 
            nn.BatchNorm2d(128),
           
        )
        
        self.cnn_layer5=nn.Sequential(
            nn.Conv2d(128, 256, 3, 1, 1), 
            nn.BatchNorm2d(256),
           
        )
        self.cnn_layer6=nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1), 
            nn.BatchNorm2d(256),
           
        )
        self.relu=nn.Sequential(
            nn.ReLU(),
        )
        
        self.maxpool=nn.Sequential(
            nn.MaxPool2d(2,2,0),
        )
        
        self.fc = nn.Sequential(
            nn.Linear(256*8*8, 512),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(256, 11)
        )

    def forward(self, x):
        x1=self.cnn_layer1(x)
        x1=self.relu(x1)
        x1=self.maxpool(x1)
        residual=x1
        x2=self.cnn_layer2(x1)
        x2=self.relu(residual+x2)
        x2=self.maxpool(x2)
        x3=self.cnn_layer3(x2)
        x3=self.relu(x3)
        x3=self.maxpool(x3)
        residual=x3
        x4=self.cnn_layer4(x3)
        x4=self.relu(residual+x4)
        x5=self.cnn_layer5(x4)
        x5=self.relu(x5)
        residual=x5
        x6=self.cnn_layer6(x5)
        x6=self.relu(residual+x6)
        x6=self.maxpool(x6)
        xout=x6.flatten(1)
        return self.fc(xout)
        
        
        
        

In [8]:
batch_size = 64
_dataset_dir = "../input/ml2022spring-hw3b/food11"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

One ../input/ml2022spring-hw3b/food11/training sample ../input/ml2022spring-hw3b/food11/training/0_0.jpg
One ../input/ml2022spring-hw3b/food11/validation sample ../input/ml2022spring-hw3b/food11/validation/0_0.jpg


In [None]:
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
test_loaders = []
for i in range(5):
    test_set_i = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=train_tfm)
    test_loader_i = DataLoader(test_set_i, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
    test_loaders.append(test_loader_i)
    

## Testing and generate prediction CSV

In [None]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
n_epochs = 100
patience = 20 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model = Classifier().to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5) 

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 001/100 ] loss = 2.25479, acc = 0.20236


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 001/100 ] loss = 2.03321, acc = 0.27590
[ Valid | 001/100 ] loss = 2.03321, acc = 0.27590 -> best
Best model found at epoch 0, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 002/100 ] loss = 2.05855, acc = 0.27196


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 002/100 ] loss = 1.91701, acc = 0.31940
[ Valid | 002/100 ] loss = 1.91701, acc = 0.31940 -> best
Best model found at epoch 1, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 003/100 ] loss = 1.99746, acc = 0.29272


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 003/100 ] loss = 1.84925, acc = 0.33745
[ Valid | 003/100 ] loss = 1.84925, acc = 0.33745 -> best
Best model found at epoch 2, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 004/100 ] loss = 1.94233, acc = 0.31591


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 004/100 ] loss = 1.90273, acc = 0.33175
[ Valid | 004/100 ] loss = 1.90273, acc = 0.33175


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 005/100 ] loss = 1.88175, acc = 0.33772


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 005/100 ] loss = 1.76114, acc = 0.38155
[ Valid | 005/100 ] loss = 1.76114, acc = 0.38155 -> best
Best model found at epoch 4, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 006/100 ] loss = 1.82895, acc = 0.36147


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 006/100 ] loss = 1.68152, acc = 0.41386
[ Valid | 006/100 ] loss = 1.68152, acc = 0.41386 -> best
Best model found at epoch 5, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 007/100 ] loss = 1.80170, acc = 0.36506


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 007/100 ] loss = 1.66125, acc = 0.42597
[ Valid | 007/100 ] loss = 1.66125, acc = 0.42597 -> best
Best model found at epoch 6, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 008/100 ] loss = 1.74977, acc = 0.38060


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 008/100 ] loss = 1.61227, acc = 0.43555
[ Valid | 008/100 ] loss = 1.61227, acc = 0.43555 -> best
Best model found at epoch 7, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 009/100 ] loss = 1.70450, acc = 0.40653


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 009/100 ] loss = 1.62648, acc = 0.43430
[ Valid | 009/100 ] loss = 1.62648, acc = 0.43430


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 010/100 ] loss = 1.67057, acc = 0.41649


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 010/100 ] loss = 1.69983, acc = 0.41831
[ Valid | 010/100 ] loss = 1.69983, acc = 0.41831


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 011/100 ] loss = 1.64048, acc = 0.42446


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 011/100 ] loss = 1.54266, acc = 0.48419
[ Valid | 011/100 ] loss = 1.54266, acc = 0.48419 -> best
Best model found at epoch 10, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 012/100 ] loss = 1.61856, acc = 0.43317


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 012/100 ] loss = 1.60933, acc = 0.46018
[ Valid | 012/100 ] loss = 1.60933, acc = 0.46018


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 013/100 ] loss = 1.58970, acc = 0.44770


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 013/100 ] loss = 1.46196, acc = 0.50369
[ Valid | 013/100 ] loss = 1.46196, acc = 0.50369 -> best
Best model found at epoch 12, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 014/100 ] loss = 1.56991, acc = 0.45288


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 014/100 ] loss = 1.42826, acc = 0.50550
[ Valid | 014/100 ] loss = 1.42826, acc = 0.50550 -> best
Best model found at epoch 13, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 015/100 ] loss = 1.52684, acc = 0.46855


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 015/100 ] loss = 1.53435, acc = 0.47917
[ Valid | 015/100 ] loss = 1.53435, acc = 0.47917


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 016/100 ] loss = 1.52479, acc = 0.47444


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 016/100 ] loss = 1.40296, acc = 0.51757
[ Valid | 016/100 ] loss = 1.40296, acc = 0.51757 -> best
Best model found at epoch 15, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 017/100 ] loss = 1.48902, acc = 0.48496


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 017/100 ] loss = 1.44315, acc = 0.51494
[ Valid | 017/100 ] loss = 1.44315, acc = 0.51494


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 018/100 ] loss = 1.46386, acc = 0.48746


  0%|          | 0/54 [00:00<?, ?it/s]

In [None]:
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
preds
with torch.no_grad():
    for data,_ in test_loader:
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

In [None]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)