# Assignment 01: Multi-class Classification 
In this Assignment, you will train a deep model on the CIFAR10 from the scratch using PyTorch.

### Basic Imports

In [1]:
import os
import time
import os.path as osp

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from torchvision import datasets
from torchvision.datasets import FashionMNIST
from torchvision import transforms
import torchvision

import matplotlib.pyplot as plt
from PIL import Image

### Hyperparameters

In [2]:
# random seed
SEED = 1 
NUM_CLASS = 10

# Training
BATCH_SIZE = 72
NUM_EPOCHS = 60
EVAL_INTERVAL=1
SAVE_DIR = './log'

# Optimizer
LEARNING_RATE = 1e-3
MOMENTUM = 0.9
STEP=5
GAMMA=0.5


### Device

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")

In [4]:
import torch
print(torch.cuda.is_available())  # Check if CUDA is available
print(torch.cuda.get_device_name(0))  # Check the name of the CUDA device

True
NVIDIA A30 MIG 1g.6gb



### Dataset


In [5]:
def convert_to_rgb(x):
    return x.repeat(3, 1, 1)

In [6]:
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(convert_to_rgb),  
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(convert_to_rgb),  
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_set = torchvision.datasets.FashionMNIST(root='../data', train=True,
                                        download=True, transform=transform_train)
train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=2)

test_set = torchvision.datasets.FashionMNIST(root='../data', train=False,
                                       download=True, transform=transform_test)
test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=2)

class_names = train_set.classes
print(len(class_names))

10


### Model

In [7]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.dropout = nn.Dropout(0.5)

        self.fc1 = nn.Linear(128 * 3 * 3, 256)
        self.fc2 = nn.Linear(256, 10) # 10个输出类别

    def forward(self, x):

        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))

        x = x.view(-1, 128 * 3 * 3)

        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [8]:
model = ConvNet()
model.to(device)

ConvNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=1152, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)

### Optimizer

In [9]:
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=STEP, gamma=GAMMA)

### One-Hot Encoding

In [10]:
def one_hot_encoding(target, num_classes):
    return F.one_hot(target, num_classes=num_classes).float()

### Focal Loss Function

In [11]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0, alpha=1.0):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha

    def forward(self, y_pred, y_true):
        """
        y_pred: raw scores (logits) for each class. [batch_size, num_classes]
        y_true: ground truth labels. [batch_size]
        """
        # Convert y_true labels into one-hot encoding
        y_true_onehot = torch.zeros(y_pred.size(), device=y_pred.device).scatter_(1, y_true.unsqueeze(1).long(), 1)
        
        # Calculate softmax over y_pred for calculating probabilities
        probs = F.softmax(y_pred, dim=1)
        
        # Calculate focal loss
        focal_loss = -self.alpha * (y_true_onehot * torch.log(probs) * (1 - probs) ** self.gamma).sum(dim=1).mean()
        
        return focal_loss

### Loss Function define

In [12]:
##################### Write your answer here ##################
# Define the loss function
criterion = nn.CrossEntropyLoss()
#criterion = FocalLoss()
#criterion = nn.L1Loss()
#lossfunction = 'epo100FC201'
###############################################################

In [13]:
def train_batch(model, image, target):
    """
    Perform one training batch iteration.

    Args:
        model (torch.nn.Module): The machine learning model to train.
        image (torch.Tensor): Batch of input data (images).
        target (torch.Tensor): Batch of target labels.

    Returns:
        torch.Tensor: Model output (predictions) for the batch.
        torch.Tensor: Loss value calculated by the defined loss function loss_fn().
    """
    
    ##################### Write your answer here ##################
    output = model(image)
    loss = criterion(output, target)
    ###############################################################

    return output, loss

In [14]:

def test_batch(model, image, target):
    """
    Perform one testing batch iteration.

    Args:
        model (torch.nn.Module): The machine learning model to evaluate.
        image (torch.Tensor): Batch of input data (images).
        target (torch.Tensor): Batch of target labels.

    Returns:
        torch.Tensor: Model output (predictions) for the batch.
        torch.Tensor: Loss value calculated for the batch.
    """

    ##################### Write your answer here ##################
    output = model(image)
    loss = criterion(output, target)
    ###############################################################

    return output, loss

### Model Training

In [15]:
training_loss = []
training_acc = []
testing_loss = []
testing_acc = []

run_num = 1

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# Now run your training code
for epoch in range(NUM_EPOCHS):
    model.train()

    running_cls_loss = 0.0
    running_cls_corrects = 0
    
    for batch_idx, (image, target) in enumerate(train_dataloader):
        
#         print("Image shape:", image.shape)
#         print("Image dtype:", image.dtype)
#         print("Contains NaN:", torch.isnan(image).any())
#         print("Contains Inf:", torch.isinf(image).any())

#         print(run_num)
#         run_num += 1
#         print(target.shape)
#         print(target)
#         print('\n')
        
        image = image.to(device)
        target = target.to(device)

        optimizer.zero_grad()  # Clear gradients before the forward pass

        outputs, loss = train_batch(model, image, target)
        _, preds = torch.max(outputs, 1)

        loss_data = loss.item()
        if np.isnan(loss_data):
            raise ValueError('Loss is NaN while training')

        running_cls_loss += loss_data
        running_cls_corrects += torch.sum(preds == target.data)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()

    epoch_loss = running_cls_loss / len(train_set)
    epoch_acc = running_cls_corrects.double() / len(train_set)

    #print(f'Epoch: {epoch+1}/{NUM_EPOCHS} Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    # Rest of your code for testing and checkpoint saving...

    training_loss.append(epoch_loss)
    training_acc.append(epoch_acc.cpu().detach().numpy())

    # change learning rate
    scheduler.step()


    ##########################
    ### Testing
    ##########################
    # # eval model during training or in the last epoch
    if (epoch + 1) % EVAL_INTERVAL == 0 or (epoch +1) == NUM_EPOCHS:
        #print('Begin test......')
        model.eval()
    
        val_loss = 0.0
        val_corrects = 0
        for batch_idx, (image, target) in enumerate(test_dataloader):  #CE-Loss/FCLoss


            image = image.to(device)
            target = target.to(device)

            # test model
            outputs, loss = test_batch(model, image, target)
            _, preds = torch.max(outputs, 1)
            
            val_loss += loss.item()
            val_corrects += torch.sum(preds == target.data)           #CE-Loss/FCLoss
      

        val_loss = val_loss / len(test_set)
        val_acc = val_corrects.double() / len(test_set)
        #print(f'Test Loss: {val_loss:.4f} Acc: {val_acc:.4f}')
        print(f'epoch:{epoch} finished')
        testing_loss.append(val_loss)
        testing_acc.append(val_acc.cpu().detach().numpy())

        # save the model in last epoch
        if (epoch +1) == NUM_EPOCHS:
            
            state = {
            'state_dict': model.state_dict(),
            'acc': epoch_acc,
            'epoch': (epoch+1),
            }

            # check the dir
            if not os.path.exists(SAVE_DIR):
                os.makedirs(SAVE_DIR)

            # save the state
            torch.save(state, osp.join(SAVE_DIR, 'checkpoint_%s.pth' % (str(epoch+1))))

epoch:0 finished
epoch:1 finished
epoch:2 finished
epoch:3 finished
epoch:4 finished
epoch:5 finished
epoch:6 finished
epoch:7 finished
epoch:8 finished
epoch:9 finished
epoch:10 finished
epoch:11 finished
epoch:12 finished
epoch:13 finished
epoch:14 finished
epoch:15 finished
epoch:16 finished
epoch:17 finished
epoch:18 finished
epoch:19 finished
epoch:20 finished
epoch:21 finished
epoch:22 finished
epoch:23 finished
epoch:24 finished
epoch:25 finished
epoch:26 finished
epoch:27 finished
epoch:28 finished
epoch:29 finished
epoch:30 finished
epoch:31 finished
epoch:32 finished
epoch:33 finished
epoch:34 finished
epoch:35 finished
epoch:36 finished
epoch:37 finished
epoch:38 finished
epoch:39 finished
epoch:40 finished
epoch:41 finished
epoch:42 finished
epoch:43 finished
epoch:44 finished
epoch:45 finished
epoch:46 finished
epoch:47 finished
epoch:48 finished
epoch:49 finished
epoch:50 finished
epoch:51 finished
epoch:52 finished
epoch:53 finished
epoch:54 finished
epoch:55 finished
ep

In [17]:
print(f'NN Accuracy on FASHION-MNIST test images: {testing_acc[-1] * 100:.2f}%')

NN Accuracy on FASHION-MNIST test images: 88.74%
