In [4]:
%matplotlib inline
import torch
import numpy as np
import torch.nn as nn
from torch.nn import init
import matplotlib.pylab as plt
import sys
import d2l.torch as d2l


定义模型

In [7]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
num_inputs, num_outputs, num_hiddens = 784, 10, 256

net = nn.Sequential(
    nn.Flatten(),  # 使用PyTorch内置的Flatten层
    nn.Linear(num_inputs, num_hiddens),
    nn.ReLU(),
    nn.Linear(num_hiddens, num_outputs)
)

for params in net.parameters():
    init.normal_(params, mean=0, std=0.01)

读取数据并训练模型

In [13]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

# 定义训练函数
def train_ch3(net, train_iter, test_iter, loss, num_epochs, optimizer):
    def evaluate_accuracy(data_iter, net):
        """计算数据集上的准确率"""
        net.eval()  # 设置为评估模式
        correct = 0
        total = 0
        with torch.no_grad():
            for X, y in data_iter:
                X = X.view(X.shape[0], -1)  # 使用 view 替代 reshape，并自动适配输入维度
                y_hat = net(X)
                correct += (y_hat.argmax(1) == y).sum().item()
                total += y.numel()
        net.train()  # 恢复为训练模式
        return correct / total

    # 训练过程
    for epoch in range(num_epochs):
        net.train()  # 设置为训练模式
        train_loss = 0.0
        train_acc = 0.0
        total_samples = 0
        
        for X, y in train_iter:
            X = X.view(X.shape[0], -1)  # 自动展平输入
            y_hat = net(X)
            l = loss(y_hat, y)
            
            # 反向传播和优化
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            
            # 累计损失和准确率
            train_loss += l.item()
            train_acc += (y_hat.argmax(1) == y).sum().item()
            total_samples += y.numel()
        
        # 计算测试准确率
        test_acc = evaluate_accuracy(test_iter, net)
        
        print(f'Epoch {epoch + 1}: '
              f'loss {train_loss:.4f}, '
              f'train acc {train_acc / total_samples:.3f}, '
              f'test acc {test_acc:.3f}')

# 调用训练函数
num_epochs, lr = 5, 100.0
train_ch3(net, train_iter, test_iter, loss, num_epochs, optimizer)

Epoch 1: loss 189.9274, train acc 0.700, test acc 0.757
Epoch 2: loss 115.1941, train acc 0.819, test acc 0.821
Epoch 3: loss 101.3269, train acc 0.840, test acc 0.799
Epoch 4: loss 92.6314, train acc 0.854, test acc 0.851
Epoch 5: loss 87.5811, train acc 0.861, test acc 0.789
