## 导入必要库

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms

In [2]:
batch_size = 2048
device = torch.device('cpu') 

## 加载并预处理 MNIST 数据集


In [3]:
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))  # MNIST标准归一化参数
                   ])),
    batch_size=batch_size, shuffle=True
)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True
)

## 定义 MLP 模型
模型为两层全连接神经网络，结构：输入层（784 维，28×28 展平）→ 隐藏层（128 维，ReLU 激活）→ 输出层（10 维，对应 10 个数字）。

In [4]:
class mlp(nn.Module):
    def __init__(self):  # 修正初始化方法名
        super(mlp, self).__init__()  # 修正父类初始化调用
        self.l1 = nn.Linear(784, 128)  # 修正层命名（1→l）
        self.l2 = nn.Linear(128, 10)   # 修正层命名（1→l）

    def forward(self, x):
        a1 = self.l1(x)
        x1 = F.relu(a1)
        a2 = self.l2(x1)
        x2 = a2
        return x2

# 初始化模型、优化器
model = mlp().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.1) 

## 设置训练参数 训练模型 测试模型

训练轮次：epochs=10

In [5]:
epochs = 10
for epoch in range(epochs):
    # 训练阶段
    model.train()
    for batch_idx, (x, y) in enumerate(train_loader):
        x, y = x.view(x.shape[0], -1).to(device), y.to(device)  # 展平为784维向量
        output = model(x)
        optimizer.zero_grad()
        loss = F.cross_entropy(output, y)
        loss.backward()
        optimizer.step()

    # 测试阶段
    model.eval()
    correct = 0
    test_loss = 0
    with torch.no_grad():
        for batch_idx, (x, y) in enumerate(test_loader):
            x, y = x.view(x.shape[0], -1).to(device), y.to(device)
            output = model(x)
            test_loss += F.cross_entropy(output, y, reduction='sum').item()  # 累加批次损失
            pred = output.max(1, keepdim=True)[1]  # 取概率最大的类别
            correct += pred.eq(y.view_as(pred)).sum().item()  # 统计正确预测数

    # 计算平均测试损失和准确率
    test_loss = test_loss / len(test_loader.dataset)
    acc = correct / len(test_loader.dataset)
    print('epoch:{}, loss:{:.4f}, acc:{:.4f}'.format(epoch, test_loss, acc))


epoch:0, loss:0.4949, acc:0.8696
epoch:1, loss:0.3721, acc:0.8972
epoch:2, loss:0.3268, acc:0.9073
epoch:3, loss:0.2955, acc:0.9172
epoch:4, loss:0.2766, acc:0.9193
epoch:5, loss:0.2597, acc:0.9262
epoch:6, loss:0.2504, acc:0.9283
epoch:7, loss:0.2365, acc:0.9326
epoch:8, loss:0.2260, acc:0.9357
epoch:9, loss:0.2157, acc:0.9372
