# CNN

kaggle: https://www.kaggle.com/competitions/ml2021spring-hw3/overview

In [16]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, ConcatDataset, Subset
from PIL import Image
# # 合并数据集用
import torchvision.transforms as transforms
from torchvision.datasets import DatasetFolder
from torchvision import models

from tqdm.auto import tqdm

## Dataset, DataLoader, Transforms

In [13]:
train_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    
    transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    
    transforms.ToTensor(),
])

test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

In [None]:
batch_size = 128

# 构建数据集
train_set = DatasetFolder('food-11/training/labeled', loader=lambda x: Image.open(x), extensions='jpg', transform=train_tfm)
val_set = DatasetFolder('food-11/validation', loader=lambda x: Image.open(x), extensions='jpg', transform=test_tfm)
test_set = DatasetFolder('food-11/testing', loader=lambda x: Image.open(x), extensions='jpg', transform=test_tfm)

# 构建数据加载器
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)
valid_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

## Model

In [18]:
class Classifier(nn.Module):
    def __init__(self):
        super(self, Classifier).__init__()

        # input image size: [3, 128, 128]
        self.model = models.resnet18(pretrained=False)
        self.model.fc = nn.Linear(512, 11)

    def forward(self, x):
        return self.model(x)

## Training

使用 semi-supervised learning 提高performance

In [None]:
def get_pseudo_label(dataset, model, threshold=0.65):
    # This functions generates pseudo-labels of a dataset using given model.
    # It returns an instance of DatasetFolder containing images whose prediction confidences exceed a given threshold.
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    model.eval()
    softmax = nn.Softmax(dim=1)
    for batch in tqdm(data_loader):
        img, _ = batch
        with torch.no_grad():
            logit = model(img.to(device))
        probs = softmax(logit)  # size: batch_size x 11
        
        labels = probs.argmax(dim=1)
        batch = img, labels
        dataset = Subset(dataset, dataset.indices[labels > 0])
        ConcatDataset([dataset, batch])

    model.train()
    return dataset

In [None]:
device = 'cuda' if torch.cuda.available else 'cpu'
model = Classifier().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
epochs = 30
do_semi = False

for epoch in range(epochs):
    if do_semi:
        pseudo_set = get_pseudo_label(train_set, model)
        concat_dataset = ConcatDataset([train_set, pseudo_set])
        train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)

    model.train()
    train_loss = []
    train_acc = []

    for batch in tqdm(train_loader):
        imgs, labels = batch
        logits = model(imgs.to(device))
        loss = criterion(logits, labels.to(device))
        optimizer.zero_grad()
        loss.backward()
        # 梯度剪裁
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
        optimizer.step()
        acc = (logits.argmax(dim=-1) == labels).float().mean()
        train_loss.append(loss.item())
        train_acc.append(acc)

    trian_loss = sum(trian_loss) / len(train_loss)
    train_acc = sum(train_acc) / len(train_acc)
    
    print(f"[ Train | {epoch + 1:03d}/{epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")


    model.eval()
    valid_loss = []
    valid_acc = []
    for batch in tqdm(valid_loader):
        imgs, labels = batch
        with torch.no_grad():
            logits = model(imgs.to(device))
            loss = criterion(logits, labels.to(device))
            acc = (logits.argmax(dim=-1) == labels).float().mean()
            valid_loss.append(loss.item())
            valid_acc.append(acc)

    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_acc) / len(valid_acc)

    print(f"[ Valid | {epoch + 1:03d}/{epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

## Testing

In [None]:
model.eval()

predictions = []

for batch in tqdm(test_loader):
    # A batch consists of image data and corresponding labels.
    # But here the variable "labels" is useless since we do not have the ground-truth.
    # If printing out the labels, you will find that it is always 0.
    # This is because the wrapper (DatasetFolder) returns images and labels for each batch,
    # so we have to create fake labels to make it work normally.
    imgs, labels = batch

    with torch.no_grad():
        logits = model(imgs.to(device))

    predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())

predictions