In [1]:
!pip install torch
!pip install torchmetrics
!pip install torchvision

Collecting torchmetrics
  Downloading torchmetrics-1.2.0-py3-none-any.whl (805 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m805.2/805.2 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.9.0-py3-none-any.whl (23 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.9.0 torchmetrics-1.2.0


## 1. Import Library

In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as T

from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchmetrics import Accuracy
from torchmetrics.aggregation import MeanMetric

## 2. Build Config & Directory

In [3]:
title = 'CIFAR10'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
data_root = 'data'
batch_size = 128
base_lr = 0.01
momentum = 0.9
epochs = 60
checkpoint_dir = 'checkpoint'

In [4]:
os.makedirs(checkpoint_dir, exist_ok=True)

## 3. Build Dataset

In [5]:
# Build dataset
CIFAR10_MEAN = (0.491, 0.482, 0.447)
CIFAR10_STD = (0.247, 0.243, 0.262)

train_transform = T.Compose([
    T.ToTensor(),
    T.Normalize(CIFAR10_MEAN, CIFAR10_STD),
])
train_data = CIFAR10(data_root, train=True, download=True, transform=train_transform)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

val_transform = T.Compose([
    T.ToTensor(),
    T.Normalize(CIFAR10_MEAN, CIFAR10_STD),
])
val_data = CIFAR10(data_root, train=False, download=True, transform=val_transform)
val_loader = DataLoader(val_data, batch_size=batch_size)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 48730332.02it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


## 4. Build Model

In [6]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(3, 96, 3, 2, 1),
            nn.ReLU(),
            nn.Conv2d(96, 192, 3, 2, 1),
            nn.ReLU(),
            nn.Conv2d(192, 384, 3, 2, 1),
            nn.ReLU(),
        )
        self.head = nn.Linear(384, 10)

    def forward(self, x):
        x = self.layers(x)
        x = x.mean([-1, -2])
        x = self.head(x)
        return x

model = MyModel()
model = model.to(device)

## 5. Set Optimizer, Scheduler, Loss function

In [7]:
optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=momentum)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs * len(train_loader))

loss_fn = nn.CrossEntropyLoss()
metric_fn = Accuracy(task='multiclass', num_classes=10)
metric_fn = metric_fn.to(device)

## 6. Define Train Loop

In [8]:
def train(loader, model, optimizer, scheduler, loss_fn, metric_fn, device):
    model.train()
    loss_mean = MeanMetric()
    metric_mean = MeanMetric()

    for inputs, targets in loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        metric = metric_fn(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_mean.update(loss.to('cpu'))
        metric_mean.update(metric.to('cpu'))
        scheduler.step()

    summary = {'loss': loss_mean.compute(), 'metric': metric_mean.compute()}

    return summary

## 7. Define Evaluation Loop

In [9]:
def evaluate(loader, model, loss_fn, metric_fn, device):
    model.eval()
    loss_mean = MeanMetric()
    metric_mean = MeanMetric()

    for inputs, targets in loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        with torch.no_grad():
            outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        metric = metric_fn(outputs, targets)

        loss_mean.update(loss.to('cpu'))
        metric_mean.update(metric.to('cpu'))

    summary = {'loss': loss_mean.compute(), 'metric': metric_mean.compute()}

    return summary

## 8. Define Main Loop

In [None]:
for epoch in range(epochs):
    train_summary = train(train_loader, model, optimizer, scheduler, loss_fn, metric_fn, device)
    val_summary = evaluate(val_loader, model, loss_fn, metric_fn, device)

    print((f'Epoch {epoch+1}: '
           + f'Train Loss {train_summary["loss"]:.04f}, '
           + f'Train Accuracy {train_summary["metric"]:.04f}, '
           + f'Test Loss {val_summary["loss"]:.04f}, '
           + f'Test Accuracy {val_summary["metric"]:.04f}'))

    state_dict = {
        'epoch': epoch + 1,
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    }
    checkpoint_path = f'{checkpoint_dir}/{title}_last.pth'
    torch.save(state_dict, checkpoint_path)

Epoch 1: Train Loss 2.0310, Train Accuracy 0.2475, Test Loss 1.8184, Test Accuracy 0.3301
Epoch 2: Train Loss 1.7164, Train Accuracy 0.3720, Test Loss 1.5915, Test Accuracy 0.4146
Epoch 3: Train Loss 1.5540, Train Accuracy 0.4309, Test Loss 1.4708, Test Accuracy 0.4623
Epoch 4: Train Loss 1.4708, Train Accuracy 0.4640, Test Loss 1.4260, Test Accuracy 0.4798
Epoch 5: Train Loss 1.4000, Train Accuracy 0.4913, Test Loss 1.4228, Test Accuracy 0.4759
Epoch 6: Train Loss 1.3314, Train Accuracy 0.5182, Test Loss 1.2873, Test Accuracy 0.5309
Epoch 7: Train Loss 1.2691, Train Accuracy 0.5419, Test Loss 1.2367, Test Accuracy 0.5491
Epoch 8: Train Loss 1.2128, Train Accuracy 0.5676, Test Loss 1.1874, Test Accuracy 0.5718
Epoch 9: Train Loss 1.1600, Train Accuracy 0.5847, Test Loss 1.1470, Test Accuracy 0.5907
Epoch 10: Train Loss 1.1182, Train Accuracy 0.5995, Test Loss 1.1174, Test Accuracy 0.6004
Epoch 11: Train Loss 1.0679, Train Accuracy 0.6201, Test Loss 1.1090, Test Accuracy 0.6047
Epoch 12