*Accompanying code examples of the book "Introduction to Artificial Neural Networks and Deep Learning: A Practical Guide with Applications in Python" by [Sebastian Raschka](https://sebastianraschka.com). All code examples are released under the [MIT license](https://github.com/rasbt/deep-learning-book/blob/master/LICENSE). If you find this content useful, please consider supporting the work by buying a [copy of the book](https://leanpub.com/ann-and-deeplearning).*
  
Other code examples and content are available on [GitHub](https://github.com/rasbt/deep-learning-book). The PDF and ebook versions of the book are available through [Leanpub](https://leanpub.com/ann-and-deeplearning).

In [3]:
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch

Author: Sebastian Raschka

Python implementation: CPython
Python version       : 3.11.11
IPython version      : 9.0.2

torch: 2.6.0+cu126



- Runs on CPU or GPU (if available)

# Model Zoo -- Convolutional Neural Network with He Initialization

## Imports

In [2]:
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader


if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

## Settings and Dataset

In [4]:
##########################
### SETTINGS
##########################

# 设备设置
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # 如果有可用的GPU则使用GPU，否则使用CPU

# 超参数设置
random_seed = 1            # 随机种子，确保实验可复现
learning_rate = 0.05       # 学习率
num_epochs = 10            # 训练轮数
batch_size = 128           # 每个批次的样本数

# 网络架构相关
num_classes = 10           # 类别数（MNIST数据集有10个类别：0到9）

##########################
### MNIST 数据集
##########################

# 注意：transforms.ToTensor() 会将输入图像的像素值缩放到0到1的范围
train_dataset = datasets.MNIST(root='data',           # 数据存储的路径
                               train=True,            # 训练集
                               transform=transforms.ToTensor(),  # 数据转换：将图像转为Tensor并归一化到[0,1]
                               download=True)         # 如果数据集不存在则下载

test_dataset = datasets.MNIST(root='data',            # 数据存储的路径
                              train=False,           # 测试集
                              transform=transforms.ToTensor())  # 数据转换：将图像转为Tensor并归一化到[0,1]

# 创建训练数据的DataLoader
train_loader = DataLoader(dataset=train_dataset,      # 数据集
                          batch_size=batch_size,     # 批次大小
                          shuffle=True)              # 打乱数据

# 创建测试数据的DataLoader
test_loader = DataLoader(dataset=test_dataset,        # 数据集
                         batch_size=batch_size,      # 批次大小
                         shuffle=False)              # 不打乱测试数据

# 检查数据集的一些基本信息
for images, labels in train_loader:  # 取一个batch的数据
    print('Image batch dimensions:', images.shape)  # 打印图像的尺寸
    print('Image label dimensions:', labels.shape)  # 打印标签的尺寸
    break  # 只查看一个批次的数据，查看数据格式


Image batch dimensions: torch.Size([128, 1, 28, 28])
Image label dimensions: torch.Size([128])


## Model

In [5]:
##########################
### 模型定义
##########################

class ConvNet(torch.nn.Module):

    def __init__(self, num_classes):
        super(ConvNet, self).__init__()
        
        # 计算 same padding（保持输入输出尺寸一致）所需的填充量公式：
        # (w - k + 2*p)/s + 1 = o
        # 推导得：p = (s(o-1) - w + k)/2
        
        # 卷积层1: 输入为28x28x1 -> 输出为28x28x4
        self.conv_1 = torch.nn.Conv2d(in_channels=1,           # 输入通道数：灰度图像为1
                                      out_channels=4,          # 卷积核数量：输出通道数为4
                                      kernel_size=(3, 3),      # 卷积核大小为3x3
                                      stride=(1, 1),           # 步幅为1
                                      padding=1)               # 填充为1：保持输出尺寸与输入相同
                                      # 对应公式：(1(28-1) - 28 + 3)/2 = 1
        
        # 最大池化层1: 输入为28x28x4 -> 输出为14x14x4
        self.pool_1 = torch.nn.MaxPool2d(kernel_size=(2, 2),   # 池化核大小为2x2
                                         stride=(2, 2),         # 步幅为2
                                         padding=0)             # 不需要填充：直接缩小尺寸
                                         # 输出尺寸变为一半：28 -> 14
        
        # 卷积层2: 输入为14x14x4 -> 输出为14x14x8
        self.conv_2 = torch.nn.Conv2d(in_channels=4,           # 输入通道数为4
                                      out_channels=8,          # 输出通道数为8
                                      kernel_size=(3, 3),      # 卷积核大小为3x3
                                      stride=(1, 1),           
                                      padding=1)               # 填充为1，保持尺寸
                                      # 对应公式：(1(14-1) - 14 + 3)/2 = 1
        
        # 最大池化层2: 输入为14x14x8 -> 输出为7x7x8
        self.pool_2 = torch.nn.MaxPool2d(kernel_size=(2, 2),
                                         stride=(2, 2),
                                         padding=0)            # 不需要填充：14 -> 7
        
        # 全连接层：输入特征数为 7*7*8，输出为类别数（10）
        self.linear_1 = torch.nn.Linear(7*7*8, num_classes)
        
        ###############################################
        # 权重初始化：使用 He（Kaiming）初始化方式
        ###############################################
        for m in self.modules():
            if isinstance(m, torch.nn.Conv2d):
                nn.init.kaiming_normal_(m.weight.detach())  # 初始化卷积权重
                m.bias.detach().zero_()                     # 初始化偏置为0
            elif isinstance(m, torch.nn.Linear):
                nn.init.kaiming_normal_(m.weight.detach())  # 初始化全连接层权重
                m.bias.detach().zero_()                     # 初始化偏置为0
        
    def forward(self, x):
        # 前向传播过程
        
        out = self.conv_1(x)        # 卷积层1
        out = F.relu(out)           # 激活函数 ReLU
        out = self.pool_1(out)      # 最大池化1

        out = self.conv_2(out)      # 卷积层2
        out = F.relu(out)           # 激活函数 ReLU
        out = self.pool_2(out)      # 最大池化2
        
        # 展平：将7x7x8的特征图展开为一维向量
        logits = self.linear_1(out.view(-1, 7*7*8))  # 全连接层
        probas = F.softmax(logits, dim=1)            # 输出为各类别的概率（Softmax）
        return logits, probas

# 设置随机种子，保证结果可复现
torch.manual_seed(random_seed)

# 创建模型对象，指定类别数（MNIST为10类）
model = ConvNet(num_classes=num_classes)

# 将模型移动到设备上（GPU 或 CPU）
model = model.to(device)

# 使用随机梯度下降（SGD）优化器，并设置学习率
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


## Training

In [6]:
# 计算模型在给定数据集（如训练集或测试集）上的准确率
def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0  # 正确预测的数量，总样本数
    for features, targets in data_loader:
        features = features.to(device)  # 将输入数据移动到GPU或CPU
        targets = targets.to(device)    # 将目标标签移动到GPU或CPU
        logits, probas = model(features)  # 前向传播，得到logits和softmax概率
        _, predicted_labels = torch.max(probas, 1)  # 找出每个样本预测概率最大的类别
        num_examples += targets.size(0)  # 累加总样本数
        correct_pred += (predicted_labels == targets).sum()  # 累加预测正确的数量
    return correct_pred.float() / num_examples * 100  # 返回准确率（百分比）


# 记录训练起始时间
start_time = time.time()

# 开始训练多个epoch（轮次）
for epoch in range(num_epochs):
    model = model.train()  # 设置模型为训练模式（启用dropout、BN等）

    # 遍历每一个训练批次
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(device)  # 将特征张量转移到GPU或CPU
        targets = targets.to(device)    # 将标签张量转移到GPU或CPU

        ### 前向传播和反向传播
        logits, probas = model(features)                   # 前向传播
        cost = F.cross_entropy(logits, targets)           # 计算交叉熵损失
        optimizer.zero_grad()                             # 梯度清零
        
        cost.backward()                                   # 反向传播，计算梯度

        ### 更新模型参数
        optimizer.step()                                  # 用优化器执行一步梯度下降
        
        ### 日志打印（每50个批次打印一次）
        if not batch_idx % 50:
            print('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                  % (epoch + 1, num_epochs, batch_idx, 
                     len(train_loader), cost))
    
    model = model.eval()  # 每个epoch结束后将模型设置为评估模式（禁用dropout等）
    
    # 计算当前epoch在训练集上的准确率
    print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
          epoch + 1, num_epochs, 
          compute_accuracy(model, train_loader)))

    # 打印本轮训练耗时
    print('Time elapsed: %.2f min' % ((time.time() - start_time) / 60))

# 打印总训练耗时
print('Total Training Time: %.2f min' % ((time.time() - start_time) / 60))


Epoch: 001/010 | Batch 000/469 | Cost: 2.4756
Epoch: 001/010 | Batch 050/469 | Cost: 1.1244
Epoch: 001/010 | Batch 100/469 | Cost: 0.7610
Epoch: 001/010 | Batch 150/469 | Cost: 0.3307
Epoch: 001/010 | Batch 200/469 | Cost: 0.4453
Epoch: 001/010 | Batch 250/469 | Cost: 0.3692
Epoch: 001/010 | Batch 300/469 | Cost: 0.2474
Epoch: 001/010 | Batch 350/469 | Cost: 0.2564
Epoch: 001/010 | Batch 400/469 | Cost: 0.1947
Epoch: 001/010 | Batch 450/469 | Cost: 0.2040
Epoch: 001/010 training accuracy: 92.00%
Time elapsed: 0.06 min
Epoch: 002/010 | Batch 000/469 | Cost: 0.2104
Epoch: 002/010 | Batch 050/469 | Cost: 0.2966
Epoch: 002/010 | Batch 100/469 | Cost: 0.1638
Epoch: 002/010 | Batch 150/469 | Cost: 0.1436
Epoch: 002/010 | Batch 200/469 | Cost: 0.3372
Epoch: 002/010 | Batch 250/469 | Cost: 0.1965
Epoch: 002/010 | Batch 300/469 | Cost: 0.2100
Epoch: 002/010 | Batch 350/469 | Cost: 0.1994
Epoch: 002/010 | Batch 400/469 | Cost: 0.1761
Epoch: 002/010 | Batch 450/469 | Cost: 0.1119
Epoch: 002/010 t

## Evaluation

In [8]:
# 打印模型在测试集上的准确率，保留两位小数
print('测试集准确率: %.2f%%' % (compute_accuracy(model, test_loader)))

测试集准确率: 97.90%


In [9]:
%watermark -iv

torchvision: 0.21.0+cu126
numpy      : 1.26.4
torch      : 2.6.0+cu126

