### Basic Imports | Hyperparameters | Device | Dataset

In [11]:
## Basic imports
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import transforms
import torchvision

import matplotlib.pyplot as plt

import torchmetrics

## Hyperparameters 
# random seed
SEED = 1 
NUM_CLASS = 10
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# Training
BATCH_SIZE = 128
NUM_EPOCHS = 30
EVAL_INTERVAL=1
SAVE_DIR = './log'

# Optimizer
LEARNING_RATE = 1e-1
MOMENTUM = 0.9
STEP=5
GAMMA=0.5

## Device
device = torch.device("cuda:6" if torch.cuda.is_available() else "cpu")

## Dataset
# cifar10 transform
transform_cifar10_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_cifar10_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_set = torchvision.datasets.CIFAR10(root='../../data', train=True,
                                        download=True, transform=transform_cifar10_train)
train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=2)

test_set = torchvision.datasets.CIFAR10(root='../../data', train=False,
                                       download=True, transform=transform_cifar10_test)
test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=2)

class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


Files already downloaded and verified
Files already downloaded and verified


### Model | Optimizer

In [12]:
## Model def
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 4, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(4, 8, 3)
        self.fc1 = nn.Linear(8 * 6 * 6, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 8 * 6 * 6)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
## Init model
model = ConvNet()
model.to(device)

## Init Optimizer
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=STEP, gamma=GAMMA)

### Model Trainning

In [13]:
####################### Modify here ###########################
# Define the loss function
criterion = nn.L1Loss()
###############################################################

## Define the batch train
def train_batch(model, image, target):
    """
    Perform one training batch iteration.

    Args:
        model (torch.nn.Module): The machine learning model to train.
        image (torch.Tensor): Batch of input data (images).
        target (torch.Tensor): Batch of target labels.

    Returns:
        torch.Tensor: Model output (predictions) for the batch.
        torch.Tensor: Loss value calculated by the defined loss function loss_fn().
    """
    
    ####################### Modify here ###########################
    output = model(image)
    loss = criterion(F.softmax(output, dim=1), F.one_hot(target, num_classes=10))
    ###############################################################

    return output, loss


def test_batch(model, image, target):
    """
    Perform one testing batch iteration.

    Args:
        model (torch.nn.Module): The machine learning model to evaluate.
        image (torch.Tensor): Batch of input data (images).
        target (torch.Tensor): Batch of target labels.

    Returns:
        torch.Tensor: Model output (predictions) for the batch.
        torch.Tensor: Loss value calculated for the batch.
    """

    ####################### Modify here ###########################
    with torch.no_grad():
        output = model(image)
        loss = criterion(F.softmax(output, dim=1), F.one_hot(target, num_classes=10))
    ###############################################################

    return output, loss

In [14]:
test_loss_l = []
test_acc_l = []
test_precision_l = []
test_recall_l = []
test_f1score_micro_l = []
test_f1score_macro_l = []

test_acc = torchmetrics.classification.MulticlassAccuracy(num_classes=10).to(device)
test_precision = torchmetrics.classification.MulticlassPrecision(num_classes=10, average='macro').to(device)
test_recall = torchmetrics.classification.MulticlassRecall(num_classes=10, average='macro').to(device)
test_f1score_micro = torchmetrics.classification.MulticlassF1Score(num_classes=10, average='micro').to(device)
test_f1score_macro = torchmetrics.classification.MulticlassF1Score(num_classes=10, average='macro').to(device)


for epoch in range(NUM_EPOCHS):
    model.train()
    torch.cuda.empty_cache()

    ##########################
    ### Training
    ##########################

    for batch_idx, (image, target) in enumerate(train_dataloader):

        image = image.to(device)
        target = target.to(device)

        # train model
        outputs, loss = train_batch(model, image, target)
        _, preds = torch.max(outputs, 1)

        
        loss_data = loss.data.item()
        if np.isnan(loss_data):
            raise ValueError('loss is nan while training')

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    print(f'Epoch: {epoch+1}/{NUM_EPOCHS}')

    # change learning rate
    scheduler.step()

    ##########################
    ### Testing
    ##########################
    # # eval model during training or in the last epoch
    if (epoch + 1) % EVAL_INTERVAL == 0 or (epoch +1) == NUM_EPOCHS:
        print('Begin test......')
        model.eval()
        
        test_loss = .0        
        test_acc.reset()
        test_precision.reset()    
        test_recall.reset()
        test_f1score_micro.reset()
        test_f1score_macro.reset()

        for batch_idx, (image, target) in enumerate(test_dataloader):

            image = image.to(device)
            target = target.to(device)

            # test model
            outputs, loss = test_batch(model, image, target)
            _, preds = torch.max(outputs, 1)
            
            test_loss += loss.item()
            test_acc.update(preds, target)
            test_precision.update(preds, target)
            test_recall.update(preds, target)
            test_f1score_micro.update(preds, target)
            test_f1score_macro.update(preds, target)

        val_loss = test_loss / len(test_set)
        val_acc = test_acc.compute()
        val_precision = test_precision.compute()
        val_recall = test_recall.compute()
        val_f1score_micro = test_f1score_micro.compute()
        val_f1score_macro = test_f1score_macro.compute()
        print(f'Test Loss: {val_loss:.4f} Acc: {val_acc:.4f} Precision: {val_precision:.4f} Recall: {val_recall:.4f} f1score_micro: {val_f1score_micro:.4f} f1score_macro: {val_f1score_macro:.4f}')

        test_loss_l.append(test_loss)
        test_acc_l.append(val_acc.cpu().detach().numpy())
        test_precision_l.append(val_precision.cpu().detach().numpy())
        test_recall_l.append(val_recall.cpu().detach().numpy())
        test_f1score_micro_l.append(val_f1score_micro.cpu().numpy())
        test_f1score_macro_l.append(val_f1score_macro.cpu().numpy())


Epoch: 1/30
Begin test......
Test Loss: 0.0013 Acc: 0.1707 Precision: 0.0848 Recall: 0.1707 f1score_micro: 0.1707 f1score_macro: 0.0780
Epoch: 2/30
Begin test......
Test Loss: 0.0012 Acc: 0.2119 Precision: 0.1266 Recall: 0.2119 f1score_micro: 0.2119 f1score_macro: 0.1444
Epoch: 3/30
Begin test......
Test Loss: 0.0012 Acc: 0.2362 Precision: 0.1505 Recall: 0.2362 f1score_micro: 0.2362 f1score_macro: 0.1684
Epoch: 4/30
Begin test......
Test Loss: 0.0012 Acc: 0.2422 Precision: 0.1413 Recall: 0.2422 f1score_micro: 0.2422 f1score_macro: 0.1705
Epoch: 5/30
Begin test......
Test Loss: 0.0012 Acc: 0.2597 Precision: 0.1427 Recall: 0.2597 f1score_micro: 0.2597 f1score_macro: 0.1776
Epoch: 6/30
Begin test......
Test Loss: 0.0012 Acc: 0.2730 Precision: 0.1517 Recall: 0.2730 f1score_micro: 0.2730 f1score_macro: 0.1890
Epoch: 7/30
Begin test......
Test Loss: 0.0012 Acc: 0.2679 Precision: 0.1623 Recall: 0.2679 f1score_micro: 0.2679 f1score_macro: 0.1896
Epoch: 8/30
Begin test......
Test Loss: 0.0011 A

## Exploratory Invetigation

In [15]:
train_targets = pd.Series(train_set.targets)
test_targets = pd.Series(test_set.targets)
train_targets.value_counts(), test_targets.value_counts()

(6    5000
 9    5000
 4    5000
 1    5000
 2    5000
 7    5000
 8    5000
 3    5000
 5    5000
 0    5000
 dtype: int64,
 3    1000
 8    1000
 0    1000
 6    1000
 1    1000
 9    1000
 5    1000
 7    1000
 4    1000
 2    1000
 dtype: int64)