*Accompanying code examples of the book "Introduction to Artificial Neural Networks and Deep Learning: A Practical Guide with Applications in Python" by [Sebastian Raschka](https://sebastianraschka.com). All code examples are released under the [MIT license](https://github.com/rasbt/deep-learning-book/blob/master/LICENSE). If you find this content useful, please consider supporting the work by buying a [copy of the book](https://leanpub.com/ann-and-deeplearning).*
  
Other code examples and content are available on [GitHub](https://github.com/rasbt/deep-learning-book). The PDF and ebook versions of the book are available through [Leanpub](https://leanpub.com/ann-and-deeplearning).

In [1]:
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch

Author: Sebastian Raschka

Python implementation: CPython
Python version       : 3.11.11
IPython version      : 9.0.2

torch: 2.6.0+cu126



- Runs on CPU or GPU (if available)

# Model Zoo -- Convolutional Neural Network

## Imports

In [2]:
import time
import numpy as np
import torch
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader


if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

## Settings and Dataset

In [3]:
##########################
### 设置部分
##########################

# 设备选择：如果有可用的GPU（CUDA），则使用GPU，否则使用CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 超参数设置
random_seed = 1          # 随机种子，确保实验可复现
learning_rate = 0.05     # 学习率
num_epochs = 10          # 训练轮数
batch_size = 128         # 每批训练样本数量

# 网络结构相关
num_classes = 10         # 分类数量（MNIST是0-9共10类）


##########################
### MNIST 数据集加载
##########################

# 注意：transforms.ToTensor() 会将图像数据归一化到0-1范围
train_dataset = datasets.MNIST(root='data',           # 数据存储路径
                               train=True,            # 加载训练集
                               transform=transforms.ToTensor(),  # 数据预处理：转为Tensor并归一化
                               download=True)         # 如果数据不存在则下载

test_dataset = datasets.MNIST(root='data',            # 数据存储路径
                              train=False,            # 加载测试集
                              transform=transforms.ToTensor())   # 数据预处理

# 构建训练集数据加载器
train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=batch_size,      # 每批样本数量
                          shuffle=True)              # 每个epoch打乱数据顺序

# 构建测试集数据加载器
test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=batch_size, 
                         shuffle=False)              # 测试时不需要打乱顺序

# 检查加载的数据形状（调试用）
for images, labels in train_loader:  
    print('图像批次的尺寸:', images.shape)        # 输出图像批次的尺寸：[batch_size, 1, 28, 28]
    print('标签批次的尺寸:', labels.shape)        # 输出标签批次的尺寸：[batch_size]
    break  # 只检查第一批数据


图像批次的尺寸: torch.Size([128, 1, 28, 28])
标签批次的尺寸: torch.Size([128])


## Model

In [5]:
##########################
### MODEL
##########################

class ConvNet(torch.nn.Module):

    def __init__(self, num_classes):
        super(ConvNet, self).__init__()
        
        # 计算 same padding（相同输出尺寸时所需的填充量）：
        # (w - k + 2*p)/s + 1 = o
        # => p = (s(o-1) - w + k)/2
        
        # 输入：28x28x1 => 经过 conv_1 后输出为 28x28x4
        self.conv_1 = torch.nn.Conv2d(in_channels=1,          # 输入通道数为1（灰度图）
                                      out_channels=4,          # 输出通道数为4（提取4种特征）
                                      kernel_size=(3, 3),      # 卷积核大小为 3x3
                                      stride=(1, 1),           # 步长为1
                                      padding=1)               # 填充为1，使得输出尺寸不变
                                      # 计算：p = (1*(28-1) - 28 + 3)/2 = 1
        
        # 28x28x4 => 经过 pool_1 后输出为 14x14x4
        self.pool_1 = torch.nn.MaxPool2d(kernel_size=(2, 2),   # 池化窗口为 2x2
                                         stride=(2, 2),         # 步长为2，尺寸减半
                                         padding=0)             # 无填充
                                         # 计算：p = (2*(14-1) - 28 + 2) = 0                                       
        
        # 14x14x4 => 经过 conv_2 后输出为 14x14x8
        self.conv_2 = torch.nn.Conv2d(in_channels=4,           # 输入通道数为4
                                      out_channels=8,          # 输出通道数为8
                                      kernel_size=(3, 3),      # 卷积核大小为 3x3
                                      stride=(1, 1),           # 步长为1
                                      padding=1)               # 填充为1，保持尺寸不变
                                      # 计算：p = (1*(14-1) - 14 + 3)/2 = 1                 
        
        # 14x14x8 => 经过 pool_2 后输出为 7x7x8                             
        self.pool_2 = torch.nn.MaxPool2d(kernel_size=(2, 2),   # 池化窗口为 2x2
                                         stride=(2, 2),         # 步长为2
                                         padding=0)             # 无填充
                                         # 计算：p = (2*(7-1) - 14 + 2) = 0
        
        # 全连接层，将 7x7x8 展平成一个向量，输出类别数个神经元
        self.linear_1 = torch.nn.Linear(7*7*8, num_classes)

        
    def forward(self, x):
        # 前向传播逻辑
        out = self.conv_1(x)
        out = F.relu(out)              # 激活函数ReLU
        out = self.pool_1(out)         # 第一次最大池化

        out = self.conv_2(out)
        out = F.relu(out)              # 第二次卷积后的ReLU激活
        out = self.pool_2(out)         # 第二次最大池化
        
        # 将输出展平为(batch_size, 7*7*8)的张量，输入全连接层
        logits = self.linear_1(out.view(-1, 7*7*8))  
        probas = F.softmax(logits, dim=1)  # 使用softmax计算每个类别的概率分布
        return logits, probas

# 设置随机种子，确保实验可复现
torch.manual_seed(random_seed)

# 实例化模型
model = ConvNet(num_classes=num_classes)

# 将模型移动到指定设备（CPU或GPU）
model = model.to(device)

# 使用SGD优化器，并设定学习率
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


## Training

In [6]:
# 计算模型在数据集上的准确率
def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0
    for features, targets in data_loader:
        features = features.to(device)  # 将特征数据移动到GPU或CPU
        targets = targets.to(device)    # 将标签移动到GPU或CPU
        logits, probas = model(features)  # 前向传播，得到输出logits和softmax后的概率
        _, predicted_labels = torch.max(probas, 1)  # 获取最大概率对应的预测类别
        num_examples += targets.size(0)             # 统计总样本数
        correct_pred += (predicted_labels == targets).sum()  # 统计预测正确的样本数
    return correct_pred.float()/num_examples * 100  # 返回百分比准确率


start_time = time.time()  # 记录训练开始时间    

# 训练多个epoch
for epoch in range(num_epochs):
    model = model.train()  # 设置为训练模式（启用dropout、BN等）

    # 遍历训练数据的每一个batch
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(device)  # 将输入特征转移到GPU或CPU
        targets = targets.to(device)    # 将目标标签转移到GPU或CPU

        ### 正向传播和反向传播
        logits, probas = model(features)              # 正向传播
        cost = F.cross_entropy(logits, targets)       # 计算交叉熵损失
        optimizer.zero_grad()                         # 清空上一次的梯度（避免累加）
        
        cost.backward()                               # 反向传播，计算梯度
        
        ### 更新模型参数
        optimizer.step()                              # 使用优化器更新参数
        
        ### 训练过程日志打印
        if not batch_idx % 50:  # 每隔50个batch打印一次信息
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   % (epoch+1, num_epochs, batch_idx, 
                      len(train_loader), cost))
    
    # 每个epoch结束后进行模型评估
    model = model.eval()  # 设置为评估模式（禁用dropout等）
    print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
          epoch+1, num_epochs, 
          compute_accuracy(model, train_loader)))  # 评估在训练集上的准确率

    # 打印已用时间
    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
    
# 打印总训练时间
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))


Epoch: 001/010 | Batch 000/469 | Cost: 2.3087
Epoch: 001/010 | Batch 050/469 | Cost: 2.2788
Epoch: 001/010 | Batch 100/469 | Cost: 1.4371
Epoch: 001/010 | Batch 150/469 | Cost: 0.4326
Epoch: 001/010 | Batch 200/469 | Cost: 0.4182
Epoch: 001/010 | Batch 250/469 | Cost: 0.2271
Epoch: 001/010 | Batch 300/469 | Cost: 0.2526
Epoch: 001/010 | Batch 350/469 | Cost: 0.3004
Epoch: 001/010 | Batch 400/469 | Cost: 0.2788
Epoch: 001/010 | Batch 450/469 | Cost: 0.2298
Epoch: 001/010 training accuracy: 90.90%
Time elapsed: 0.06 min
Epoch: 002/010 | Batch 000/469 | Cost: 0.4991
Epoch: 002/010 | Batch 050/469 | Cost: 0.1725
Epoch: 002/010 | Batch 100/469 | Cost: 0.2281
Epoch: 002/010 | Batch 150/469 | Cost: 0.1696
Epoch: 002/010 | Batch 200/469 | Cost: 0.3248
Epoch: 002/010 | Batch 250/469 | Cost: 0.2216
Epoch: 002/010 | Batch 300/469 | Cost: 0.1407
Epoch: 002/010 | Batch 350/469 | Cost: 0.2157
Epoch: 002/010 | Batch 400/469 | Cost: 0.1855
Epoch: 002/010 | Batch 450/469 | Cost: 0.1759
Epoch: 002/010 t

## Evaluation

In [7]:
# 使用 torch.set_grad_enabled(False) 可以在推理阶段关闭梯度计算，从而节省内存
with torch.set_grad_enabled(False):  # 推理阶段关闭梯度计算，节省内存
    print('测试集准确率: %.2f%%' % (compute_accuracy(model, test_loader)))  # 输出模型在测试集上的准确率

测试集准确率: 97.85%


In [8]:
%watermark -iv

numpy      : 1.26.4
torch      : 2.6.0+cu126
torchvision: 0.21.0+cu126

