# Exercise 02: Multi-class Classification 
In this exercise, you will train a deep model on the CIFAR10 from the scratch using PyTorch. The following tasks should be done:
- Task 1: per batch training/testing
- Task 2: Instance inference and visualization

### Basic Imports

In [1]:
import numpy

In [2]:
import os
import time
import os.path as osp

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from torchvision import datasets
from torchvision import transforms
import torchvision.transforms as transforms
import torchvision

import matplotlib.pyplot as plt
from PIL import Image

### Hyperparameters

In [3]:
# random seed
SEED = 1 
NUM_CLASS = 10

# Training
BATCH_SIZE = 128
NUM_EPOCHS = 30
EVAL_INTERVAL=1
SAVE_DIR = './log'

# Optimizer
LEARNING_RATE = 1e-1
MOMENTUM = 0.9
STEP=5
GAMMA=0.5


### Device

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


### Dataset


In [5]:
import torch
import torchvision
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
import os

# 定义数据集的根目录
root_directory = "caltech101/101_ObjectCategories"

# 定义转换
transform = transforms.Compose([
    transforms.RandomResizedCrop(64),
    transforms.RandomHorizontalFlip(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

# 创建数据集实例
caltech_dataset = torchvision.datasets.ImageFolder(root=root_directory+'/', transform=transform)

# 设置训练和测试数据集的比例
train_ratio = 0.7
dataset_size = len(caltech_dataset)
train_size = int(train_ratio * dataset_size)
test_size = dataset_size - train_size

# 划分训练和测试数据集
train_set, test_set = random_split(caltech_dataset, [train_size, test_size])

# 创建数据加载器
train_dataloader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=12)
test_dataloader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=12)

# 打印数据集和加载器的大小
print(f"Total dataset size: {dataset_size}")
print(f"Training dataset size: {len(train_set)}")
print(f"Testing dataset size: {len(test_set)}")
print(f"Batch size: {BATCH_SIZE}")


Total dataset size: 9144
Training dataset size: 6400
Testing dataset size: 2744
Batch size: 128


### Model

In [6]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 4, 3)  
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(4, 8, 3)  
        self.fc1 = nn.Linear(32 * 27 * 27, 32)
        self.fc2 = nn.Linear(32, 102)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 27 * 27)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [7]:
model = ConvNet()
model.to(device)

ConvNet(
  (conv1): Conv2d(3, 4, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(4, 8, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=23328, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=102, bias=True)
)

### Optimizer

In [8]:
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP, gamma=GAMMA)

### Task 1: per batch training/testing
---

Please denfine two function named ``train_batch`` and ``test_batch``. These functions are essential for training and evaluating machine learning models using batched data from dataloaders.

**To do**: 
1. Define the loss function i.e [nn.CrossEntropyLoss()](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html).
2. Take the image as the input and generate the output using the pre-defined SimpleNet.
3. Calculate the loss between the output and the corresponding label using the loss function.

In [9]:
##################### Write your answer here ##################
# Define the loss function
criterion = nn.CrossEntropyLoss()
###############################################################

In [10]:
def train_batch(model, image, target):
    """
    Perform one training batch iteration.

    Args:
        model (torch.nn.Module): The machine learning model to train.
        image (torch.Tensor): Batch of input data (images).
        target (torch.Tensor): Batch of target labels.

    Returns:
        torch.Tensor: Model output (predictions) for the batch.
        torch.Tensor: Loss value calculated by the defined loss function loss_fn().
    """
    
    ##################### Write your answer here ##################
    output = model(image)
    loss = criterion(output,target)
    ###############################################################

    return output, loss

In [11]:

def test_batch(model, image, target):
    """
    Perform one testing batch iteration.

    Args:
        model (torch.nn.Module): The machine learning model to evaluate.
        image (torch.Tensor): Batch of input data (images).
        target (torch.Tensor): Batch of target labels.

    Returns:
        torch.Tensor: Model output (predictions) for the batch.
        torch.Tensor: Loss value calculated for the batch.
    """

    ##################### Write your answer here ##################
    output = model(image)
    loss = criterion(output,target)
    ###############################################################

    return output, loss

### Model Training

In [12]:
training_loss = []
training_acc = []
testing_loss = []
testing_acc = []

model.to(device)
for epoch in range(NUM_EPOCHS):
    torch.cuda.empty_cache()
    model.train()
    

    ##########################
    ### Training
    ##########################ew

    running_cls_loss = 0.0
    running_cls_corrects = 0

    for  image, target in train_dataloader:
#         print(f"Image shape: {image.shape}, Target shape: {target.shape}")
#         print(batch_idx)

        image = image.to(device)
        target = target.to(device)

        # train model
        outputs, loss = train_batch(model, image, target)
        _, preds = torch.max(outputs, 1)

        
        loss_data = loss.data.item()
        if np.isnan(loss_data):
            raise ValueError('loss is nan while training')
        running_cls_loss += loss.item()
        running_cls_corrects += torch.sum(preds == target.data)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    epoch_loss = running_cls_loss / len(train_set)
    epoch_acc = running_cls_corrects.double() / len(train_set)

    print(f'Epoch: {epoch+1}/{NUM_EPOCHS} Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    training_loss.append(epoch_loss)
    training_acc.append(epoch_acc.cpu().detach().numpy())

    # change learning rate
    scheduler.step()


    ##########################
    ### Testing
    ##########################
    # # eval model during training or in the last epoch
    if (epoch + 1) % EVAL_INTERVAL == 0 or (epoch +1) == NUM_EPOCHS:
        print('Begin test......')
        model.eval()
    
        val_loss = 0.0
        val_corrects = 0

        for batch_idx, (image, target) in enumerate(test_dataloader):

            image = image.to(device)
            target = target.to(device)

            # test model
            outputs, loss = test_batch(model, image, target)
            _, preds = torch.max(outputs, 1)
            
            val_loss += loss.item()
            val_corrects += torch.sum(preds == target.data)

        val_loss = val_loss / len(test_set)
        val_acc = val_corrects.double() / len(test_set)
        print(f'Test Loss: {val_loss:.4f} Acc: {val_acc:.4f}')
        testing_loss.append(val_loss)
        testing_acc.append(val_acc.cpu().detach().numpy())

        # save the model in last epoch
        if (epoch +1) == NUM_EPOCHS:
            
            state = {
            'state_dict': model.state_dict(),
            'acc': epoch_acc,
            'epoch': (epoch+1),
            }

            # check the dir
            if not os.path.exists(SAVE_DIR):
                os.makedirs(SAVE_DIR)

            # save the state
            torch.save(state, osp.join(SAVE_DIR, 'checkpoint_%s.pth' % (str(epoch+1))))

Epoch: 1/30 Train Loss: 0.0357 Acc: 0.0816
Begin test......
Test Loss: 0.0339 Acc: 0.0842
Epoch: 2/30 Train Loss: 0.0328 Acc: 0.0891
Begin test......
Test Loss: 0.0336 Acc: 0.0907
Epoch: 3/30 Train Loss: 0.0327 Acc: 0.0841
Begin test......
Test Loss: 0.0335 Acc: 0.0842
Epoch: 4/30 Train Loss: 0.0327 Acc: 0.0864
Begin test......
Test Loss: 0.0335 Acc: 0.0842
Epoch: 5/30 Train Loss: 0.0326 Acc: 0.0889
Begin test......
Test Loss: 0.0335 Acc: 0.0842
Epoch: 6/30 Train Loss: 0.0326 Acc: 0.0889
Begin test......
Test Loss: 0.0335 Acc: 0.0907
Epoch: 7/30 Train Loss: 0.0326 Acc: 0.0880
Begin test......
Test Loss: 0.0335 Acc: 0.0842
Epoch: 8/30 Train Loss: 0.0326 Acc: 0.0889
Begin test......
Test Loss: 0.0335 Acc: 0.0842
Epoch: 9/30 Train Loss: 0.0326 Acc: 0.0889
Begin test......
Test Loss: 0.0335 Acc: 0.0842
Epoch: 10/30 Train Loss: 0.0326 Acc: 0.0877
Begin test......
Test Loss: 0.0335 Acc: 0.0907
Epoch: 11/30 Train Loss: 0.0326 Acc: 0.0867
Begin test......
Test Loss: 0.0335 Acc: 0.0842
Epoch: 1