Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
Portions of this notebook consist of AI-generated content.

Permission is hereby granted, free of charge, to any person obtaining a copy

of this software and associated documentation files (the "Software"), to deal

in the Software without restriction, including without limitation the rights

to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

copies of the Software, and to permit persons to whom the Software is

furnished to do so, subject to the following conditions:



The above copyright notice and this permission notice shall be included in all

copies or substantial portions of the Software.



THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

SOFTWARE.

# DL08 Basic CNN
### Lab Description
This laboratory exercise introduces **Convolutional Neural Networks (CNNs)**, a foundational deep learning architecture widely used for image classification tasks. CNNs are specifically designed to process and learn from visual data by capturing spatial hierarchies in images through convolutional filters.

In this hands-on lab, you will use the ``CIFAR-10`` dataset to build and train a basic CNN for multi-class image classification. You will explore model construction, training loops, and evaluation techniques to better understand the workflow of applying CNNs to real-world datasets.

### What you can expect to learn
- Model Architecture: Understand the structure and components of a basic CNN (convolution, pooling, fully connected layers).

- Data Preprocessing: Learn to prepare and normalize image data for deep learning models.

- Model Training and Evaluation: Gain hands-on experience training a CNN using PyTorch, monitoring performance, and testing on unseen data.

- Visualization: Visualize sample predictions to assess model performance qualitatively.

### Import necessary libraries

In [None]:
import os

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"device: {device}")
print("GPU Name:", torch.cuda.get_device_name(0))

### Required Dataset
In this section, we load the ``CIFAR-10`` dataset and apply necessary transformations.
Each image in CIFAR-10 is a 32x32 color image belonging to one of 10 classes.
We normalize the pixel values to fall within the range [-1, 1] for better training stability.
The dataset is then split into a training set and a test set, and data loaders are prepared for efficient batch processing during training and evaluation.

In [3]:
# Define image transformations
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))]
)

# Download CIFAR-10 training and test sets
DATA_ROOT = os.path.expanduser("~/data/cifar10")
os.makedirs(DATA_ROOT, exist_ok=True)

trainset = torchvision.datasets.CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

# Classes in CIFAR-10 Dataset
classes = trainset.classes
len(classes), classes[:10]

In [4]:
# Visualize some training images
def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# Get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# Show images
imshow(torchvision.utils.make_grid(images[:8]))
# Print labels
print(" ".join(f"{classes[labels[j]]:5s}" for j in range(8)))

### Define a Simple CNN Model
In this section, we define a simple Convolutional Neural Network (CNN) using PyTorchâ€™s nn.Module.
The model consists of two convolutional layers followed by ReLU activation and max pooling to downsample feature maps.
The resulting features are then flattened and passed through fully connected layers for classification into one of the 100 CIFAR-100 classes.
This basic architecture is sufficient for demonstrating key concepts in CNN-based image classification.

In [None]:
import torch
import torch.nn.functional as F


class BasicCNN(nn.Module):
    def __init__(self, num_classes=10, p_drop=0.25):
        super().__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  # 3 -> 32
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # 32 -> 64

        # Pooling
        self.pool = nn.MaxPool2d(2, 2)  # 32->16->8

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, num_classes)

        # Regularization
        self.dropout = nn.Dropout(p_drop)

        # Init
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")

    def forward(self, x):
        # [B,3,32,32] -> [B,32,32,32] -> [B,32,16,16]
        x = self.pool(F.relu(self.conv1(x)))
        # [B,64,16,16] -> [B,64,8,8]
        x = self.pool(F.relu(self.conv2(x)))
        # Flatten: [B, 64*8*8]
        x = x.view(x.size(0), -1)
        # FC + dropout -> logits [B, num_classes]
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

### Training
In this part, we initialize the CNN model, define the loss function (CrossEntropyLoss for multi-class classification), and choose the optimizer (Adam) with a learning rate of 0.001.

We then train the model using the training data.
Each batch of images is passed through the model, the loss is computed, and backpropagation is performed to update the weights.
The training loss is printed every 100 mini-batches to monitor progress.
The model is trained using a GPU if available for faster computation.

In [None]:
import glob
import os
import re

import torch

model = BasicCNN(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

CKPT_DIR = os.path.expanduser("~/data/cnn_ckpt")
os.makedirs(CKPT_DIR, exist_ok=True)


def find_latest_ckpt(ckpt_dir: str):
    pattern = re.compile(r"epoch_(\d+)\.pth$")
    epoch_ckpts = []
    for p in glob.glob(os.path.join(ckpt_dir, "epoch_*.pth")):
        m = pattern.search(os.path.basename(p))
        if m:
            epoch_ckpts.append((int(m.group(1)), p))
    if epoch_ckpts:
        epoch_ckpts.sort()
        return epoch_ckpts[-1][1]
    last_p = os.path.join(ckpt_dir, "last.pth")
    return last_p if os.path.exists(last_p) else None


start_epoch = 0
train_loss_per_epoch = []

latest = find_latest_ckpt(CKPT_DIR)
if latest:
    print(f"[resume] loading ckpt: {latest}")
    ckpt = torch.load(latest, map_location="cpu")
    try:
        model.load_state_dict(ckpt["model_state"])
        optimizer.load_state_dict(ckpt["optimizer_state"])
        for state in optimizer.state.values():
            for k, v in state.items():
                if torch.is_tensor(v):
                    state[k] = v.to(device)
        start_epoch = int(ckpt.get("epoch", 0))
        if "train_loss" in ckpt:
            print(f"[resume] previous loss: {ckpt['train_loss']:.6f}")
        print(f"[resume] start from epoch {start_epoch + 1}")
    except Exception as e:
        print(f"[resume] failed to load state dicts: {e}\n[resume] training from scratch.")

# Training loop
EPOCHS = 10
train_loss_per_epoch = []

for epoch in range(EPOCHS):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(trainloader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(trainloader)
    train_loss_per_epoch.append(epoch_loss)

    if (epoch + 1) % 5 == 0:
        print(f"Epoch [{epoch + 1}/{EPOCHS}] Loss: {epoch_loss:.4f}")

    if (epoch + 1) % 10 == 0:
        ckpt_path = os.path.join(CKPT_DIR, f"epoch_{epoch + 1:03d}.pth")
        torch.save(
            {
                "epoch": epoch + 1,
                "model_state": model.state_dict(),
                "optimizer_state": optimizer.state_dict(),
                "train_loss": epoch_loss,
                "arch": model.__class__.__name__,
                "lr": optimizer.param_groups[0]["lr"],
            },
            ckpt_path,
        )
        print(f"[ckpt] saved: {ckpt_path}")

print("Finished Training")

final_path = os.path.join(CKPT_DIR, "last.pth")
torch.save(
    {
        "epoch": EPOCHS,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "train_loss": train_loss_per_epoch[-1],
        "arch": model.__class__.__name__,
        "lr": optimizer.param_groups[0]["lr"],
    },
    final_path,
)
print(f"[ckpt] saved last: {final_path}")

In [None]:
# Plot the training loss curve

plt.figure(figsize=(8, 5))
plt.plot(range(1, EPOCHS + 1), train_loss_per_epoch, marker="o", label="Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss Curve")
plt.grid(True)
plt.legend()
plt.show()

### Results & Visualization

In [None]:
# Evaluate on test data
correct = 0
total = 0
with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on 10,000 test images: {100 * correct / total:.2f}%")

# Show some predictions
dataiter = iter(testloader)
images, labels = next(dataiter)
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs, 1)

imshow(torchvision.utils.make_grid(images[:8].cpu()))
print("GroundTruth: ", " ".join(f"{classes[labels[j]]:5s}" for j in range(8)))
print("Predicted:   ", " ".join(f"{classes[predicted[j]]:5s}" for j in range(8)))