*Accompanying code examples of the book "Introduction to Artificial Neural Networks and Deep Learning: A Practical Guide with Applications in Python" by [Sebastian Raschka](https://sebastianraschka.com). All code examples are released under the [MIT license](https://github.com/rasbt/deep-learning-book/blob/master/LICENSE). If you find this content useful, please consider supporting the work by buying a [copy of the book](https://leanpub.com/ann-and-deeplearning).*
  
Other code examples and content are available on [GitHub](https://github.com/rasbt/deep-learning-book). The PDF and ebook versions of the book are available through [Leanpub](https://leanpub.com/ann-and-deeplearning).

In [6]:
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch

Author: Sebastian Raschka

Python implementation: CPython
Python version       : 3.11.11
IPython version      : 9.0.2

torch: 2.6.0+cu126



# Model Zoo -- AlexNet CIFAR-10 Classifier

### Network Architecture

References
    
- [1] Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "[Imagenet classification with deep convolutional neural networks.](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)" In Advances in Neural Information Processing Systems, pp. 1097-1105. 2012.


## Imports

In [7]:
import os
import time

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

from torchvision import datasets
from torchvision import transforms

import matplotlib.pyplot as plt
from PIL import Image


if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

## Model Settings

In [8]:
##########################
### 参数设置
##########################

# 超参数
RANDOM_SEED = 1          # 随机种子
LEARNING_RATE = 0.0001   # 学习率
BATCH_SIZE = 256         # 批量大小
NUM_EPOCHS = 20          # 训练轮数

# 网络结构相关
NUM_CLASSES = 10         # 分类类别数量

# 其他设置
DEVICE = "cuda:0"        # 使用的设备（GPU）

## Dataset

In [11]:
def get_train_valid_loader(data_dir,
                           batch_size,
                           train_transform,
                           valid_transform,
                           random_seed,
                           valid_size=0.1,
                           shuffle=True,
                           num_workers=4):
    """
    加载 CIFAR-10 的训练集，并划分为训练集和验证集，返回对应的 DataLoader。
    """

    # 加载训练数据集（用于训练）
    train_dataset = datasets.CIFAR10(root=data_dir,
                                     train=True,
                                     download=True,
                                     transform=train_transform)

    # 再次加载训练数据集（用于验证），但使用不同的 transform
    valid_dataset = datasets.CIFAR10(root=data_dir,
                                     train=True,
                                     download=False,
                                     transform=valid_transform)

    # 计算总训练样本数
    num_train = len(train_dataset)
    indices = np.arange(num_train)  # 生成索引列表
    split = np.int64(np.floor(valid_size * num_train))  # 计算验证集大小

    if shuffle:
        # 打乱索引，确保划分是随机的
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    # 根据划分的索引生成训练和验证集索引
    train_idx, valid_idx = indices[split:], indices[:split]

    # 创建采样器
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # 创建训练数据加载器
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               num_workers=num_workers)

    # 创建验证数据加载器
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=batch_size,
                                               sampler=valid_sampler,
                                               num_workers=num_workers)

    return (train_loader, valid_loader)


def get_test_loader(data_dir,
                    batch_size,
                    test_transform,
                    num_workers=4):
    """
    加载 CIFAR-10 的测试集并返回 DataLoader。
    """

    # 加载测试数据集
    dataset = datasets.CIFAR10(root=data_dir,
                               train=False,
                               download=False,
                               transform=test_transform)

    # 创建测试数据加载器
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=batch_size,
                                              num_workers=num_workers)

    return data_loader


In [12]:
##########################
### CIFAR-10 数据集加载与检查
##########################

# 定义图像的预处理操作：
# - Resize：将图像统一调整为 64x64 像素
# - ToTensor：将图像转换为 PyTorch 的张量格式，并自动归一化到 [0, 1]
custom_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

# 加载训练集和验证集
train_loader, valid_loader = get_train_valid_loader(
    data_dir='data',                 # 数据存储路径
    batch_size=BATCH_SIZE,          # 批次大小
    train_transform=custom_transform,  # 训练集图像预处理
    valid_transform=custom_transform,  # 验证集图像预处理
    random_seed=RANDOM_SEED,        # 控制划分验证集的随机性
    valid_size=0.1,                 # 验证集占训练集的比例（10%）
    shuffle=True,                   # 是否在划分前打乱数据
    num_workers=4                   # 加载数据使用的线程数
)

# 加载测试集
test_loader = get_test_loader(
    data_dir='data',
    batch_size=BATCH_SIZE,
    test_transform=custom_transform,
    num_workers=4
)

# 检查训练集中的一个批次
print('训练集样本:\n')
for images, labels in train_loader:
    print('图像批次尺寸:', images.size())  # 输出形状：(batch_size, 3, 64, 64)
    print('标签批次尺寸:', labels.size())  # 输出形状：(batch_size)
    break  # 只查看一个批次

# 检查验证集中的一个批次
print('\n验证集样本:')
for images, labels in valid_loader:
    print('图像批次尺寸:', images.size())
    print('标签批次尺寸:', labels.size())
    break

# 检查测试集中的一个批次
print('\n测试集样本:')
for images, labels in test_loader:
    print('图像批次尺寸:', images.size())
    print('标签批次尺寸:', labels.size())
    break


训练集样本:

图像批次尺寸: torch.Size([256, 3, 64, 64])
标签批次尺寸: torch.Size([256])

验证集样本:
图像批次尺寸: torch.Size([256, 3, 64, 64])
标签批次尺寸: torch.Size([256])

测试集样本:
图像批次尺寸: torch.Size([256, 3, 64, 64])
标签批次尺寸: torch.Size([256])


## Model

In [14]:
##########################
### 模型定义：AlexNet
##########################

class AlexNet(nn.Module):

    def __init__(self, num_classes):
        super(AlexNet, self).__init__()

        # 特征提取部分：多个卷积层 + 激活函数 + 池化层
        self.features = nn.Sequential(
            # 第一层：输入通道3（RGB图像），输出通道64，卷积核11x11，步幅4，padding=2
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            # 第二层：卷积核5x5，输出通道192
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            # 第三层：卷积核3x3，输出通道384
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),

            # 第四层：卷积核3x3，输出通道256
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),

            # 第五层：卷积核3x3，输出通道256
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),

            # 最大池化层
            nn.MaxPool2d(kernel_size=3, stride=2),
        )

        # 自适应平均池化层：输出尺寸为 (6, 6)
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))

        # 分类器部分：全连接层 + Dropout + ReLU
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)  # 输出为类别数
        )

    def forward(self, x):
        x = self.features(x)  # 提取特征
        x = self.avgpool(x)   # 池化
        x = x.view(x.size(0), 256 * 6 * 6)  # 展平成一维向量
        logits = self.classifier(x)        # 全连接层进行分类
        probas = F.softmax(logits, dim=1)  # 转为概率分布
        return logits, probas              # 返回原始输出 + 概率


In [15]:
# 设置随机种子，确保实验结果可复现
torch.manual_seed(RANDOM_SEED)

# 实例化模型，指定输出类别数
model = AlexNet(NUM_CLASSES)

# 将模型移动到指定设备（GPU 或 CPU）
model.to(DEVICE)

# 使用 Adam 优化器，并设置学习率
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


## Training

In [16]:
# 计算模型在给定数据集上的准确率
def compute_accuracy(model, data_loader, device):
    correct_pred, num_examples = 0, 0
    model.eval()  # 设置为评估模式（关闭 dropout、BN 等）
    
    for i, (features, targets) in enumerate(data_loader):
        features = features.to(device)
        targets = targets.to(device)

        logits, probas = model(features)  # 前向传播，获取分类结果
        _, predicted_labels = torch.max(probas, 1)  # 获取每个样本预测的类别标签

        num_examples += targets.size(0)  # 累加样本总数
        assert predicted_labels.size() == targets.size()
        correct_pred += (predicted_labels == targets).sum()  # 累加预测正确的数量
        
    return correct_pred.float() / num_examples * 100  # 返回百分比准确率
    


# ========== 模型训练部分 ==========
start_time = time.time()  # 记录开始时间

for epoch in range(NUM_EPOCHS):
    
    model.train()  # 设置为训练模式（启用 dropout、BN 等）
    
    for batch_idx, (features, targets) in enumerate(train_loader):
        features = features.to(DEVICE)
        targets = targets.to(DEVICE)
            
        ### 前向传播 + 反向传播
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)  # 计算交叉熵损失
        optimizer.zero_grad()  # 梯度清零
        cost.backward()        # 反向传播计算梯度
        
        ### 更新模型参数
        optimizer.step()       # 用优化器更新参数
        
        ### 打印训练日志
        if not batch_idx % 150:  # 每隔150个批次打印一次
            print('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f' 
                  % (epoch+1, NUM_EPOCHS, batch_idx, 
                     len(train_loader), cost))

    # 每个 epoch 结束后评估训练集和验证集上的准确率
    model.eval()
    with torch.set_grad_enabled(False):  # 推理模式下不计算梯度，节省内存
        print('Epoch: %03d/%03d | Train: %.3f%%  | Valid: %.3f%%' % (
              epoch+1, NUM_EPOCHS, 
              compute_accuracy(model, train_loader, device=DEVICE),
              compute_accuracy(model, valid_loader, device=DEVICE)))
        
    print('本轮耗时: %.2f 分钟' % ((time.time() - start_time)/60))
    

# ========== 训练结束后评估测试集 ==========
print('总训练时间: %.2f 分钟' % ((time.time() - start_time)/60))

with torch.set_grad_enabled(False):  # 推理时关闭梯度计算
    print('测试集准确率: %.2f%%' % (compute_accuracy(model, test_loader, device=DEVICE)))
    
print('总运行时间: %.2f 分钟' % ((time.time() - start_time)/60))


Epoch: 001/020 | Batch 0000/0176 | Cost: 2.3027
Epoch: 001/020 | Batch 0150/0176 | Cost: 1.5864
Epoch: 001/020 | Train: 36.842%  | Valid: 36.740%
本轮耗时: 0.15 分钟
Epoch: 002/020 | Batch 0000/0176 | Cost: 1.6263
Epoch: 002/020 | Batch 0150/0176 | Cost: 1.4651
Epoch: 002/020 | Train: 46.724%  | Valid: 46.260%
本轮耗时: 0.29 分钟
Epoch: 003/020 | Batch 0000/0176 | Cost: 1.3999
Epoch: 003/020 | Batch 0150/0176 | Cost: 1.2032
Epoch: 003/020 | Train: 51.836%  | Valid: 50.840%
本轮耗时: 0.43 分钟
Epoch: 004/020 | Batch 0000/0176 | Cost: 1.2515
Epoch: 004/020 | Batch 0150/0176 | Cost: 1.1873
Epoch: 004/020 | Train: 57.331%  | Valid: 55.120%
本轮耗时: 0.58 分钟
Epoch: 005/020 | Batch 0000/0176 | Cost: 1.0831
Epoch: 005/020 | Batch 0150/0176 | Cost: 1.0954
Epoch: 005/020 | Train: 61.304%  | Valid: 58.980%
本轮耗时: 0.71 分钟
Epoch: 006/020 | Batch 0000/0176 | Cost: 1.0111
Epoch: 006/020 | Batch 0150/0176 | Cost: 1.1086
Epoch: 006/020 | Train: 62.633%  | Valid: 58.500%
本轮耗时: 0.85 分钟
Epoch: 007/020 | Batch 0000/0176 | Cost:

In [17]:
%watermark -iv

pandas     : 2.2.3
PIL        : 11.1.0
numpy      : 1.26.4
torch      : 2.6.0+cu126
matplotlib : 3.10.1
torchvision: 0.21.0+cu126

