# 设计一个具有单隐层的多层感知器网络

我们首先定义一个多层感知器类 MLP，该类继承自 nn.Module。这个网络包含一个输入层（784 个节点，对应 28x28 的图像）、一个隐藏层（128 个节点）和一个输出层（10 个节点，对应 10 个分类）。

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# 定义多层感知器网络
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.hidden = nn.Linear(784, 128)  # 隐藏层
        self.relu = nn.ReLU()
        self.output = nn.Linear(128, 10)  # 输出层

    def forward(self, x):
        x = x.view(-1, 784)  # 将输入展平为 1D 向量
        x = self.hidden(x)
        x = self.relu(x)
        x = self.output(x)
        return x

# 实例化模型
model = MLP()
print(model)


MLP(
  (hidden): Linear(in_features=784, out_features=128, bias=True)
  (relu): ReLU()
  (output): Linear(in_features=128, out_features=10, bias=True)
)

# 构造多层感知器前向传播和后向传播程序

我们定义损失函数和优化器，并编写训练函数，该函数包括前向传播、损失计算、后向传播和参数更新。

In [None]:
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# 定义前向传播和后向传播过程
def train(model, trainloader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 200 == 199:
                print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 200:.3f}")
                running_loss = 0.0
    print('Finished Training')


# 训练多层感知器

我们加载 MNIST 数据集，并使用上述的 train 函数训练模型。

In [None]:
# 加载 MNIST 数据集
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# 训练模型
train(model, trainloader, criterion, optimizer, epochs=5)


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw

[1, 200] loss: 0.657
[1, 400] loss: 0.330
[1, 600] loss: 0.282
[2, 200] loss: 0.230
[2, 400] loss: 0.207
[2, 600] loss: 0.193
[3, 200] loss: 0.165
[3, 400] loss: 0.157
[3, 600] loss: 0.146
[4, 200] loss: 0.129
[4, 400] loss: 0.124
[4, 600] loss: 0.121
[5, 200] loss: 0.106
[5, 400] loss: 0.101
[5, 600] loss: 0.108
Finished Training

# 利用多层感知器实现 MNIST 手写体识别

我们定义一个测试函数，用于评估模型在测试集上的性能，并可视化一些测试结果。

In [None]:
import numpy as np

# 测试多层感知器
def test(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')

# 测试模型
test(model, testloader)

# 可视化一些测试结果
dataiter = iter(testloader)
images, labels = next(dataiter)
outputs = model(images)
_, predicted = torch.max(outputs, 1)

# 显示图片
def imshow(img):
    img = img / 2 + 0.5  # 反归一化
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# 打印前5个图像及其预测标签
imshow(torchvision.utils.make_grid(images[:5]))
print('GroundTruth: ', ' '.join(f'{labels[j]}' for j in range(5)))
print('Predicted: ', ' '.join(f'{predicted[j]}' for j in range(5)))


Accuracy of the network on the 10000 test images: 96.88%

图片（见报告）

GroundTruth:  7 2 1 0 4
Predicted:  7 2 1 0 4