# 对LeNet进行适当修改
![image.png](attachment:image.png)
1. 将平均汇聚层替换为最大汇聚层  
2. 调整卷积窗口大小。  
3. 调整输出通道的数量。  
4. 调整激活函数（如ReLU）。  
5. 调整卷积层的数量。  
6. 调整全连接层的数量。  
7. 调整学习率和其他训练细节（例如，初始化和轮数）。

In [1]:
import torch
from torch import nn
import torchvision
from torch.utils import data
from torchvision.datasets import FashionMNIST
from torchvision import transforms
# 1. 读取数据
trans = transforms.ToTensor()
train_minist = FashionMNIST(root="../data/", transform=trans, train=True)
test_minist = FashionMNIST(root="../data/", transform=trans, train=False)
train_iter = data.DataLoader(train_minist, shuffle=True, batch_size=256)
test_iter = data.DataLoader(test_minist, shuffle=False, batch_size=256)
for X, y in train_iter:
    print(X.shape, y.shape)
    break

torch.Size([256, 1, 28, 28]) torch.Size([256])


In [2]:
# 2. 精确度度量
gpu = torch.device("cuda:0")
def acc_val(data_iter, net, device=None):
    if isinstance(net, nn.Module):
        net.eval()
        if not device:
            device = next(iter(net.parameters())).device
    acc, tot = 0.0, 0.0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(X, list):# BERT微调所需的（之后将介绍）
                X = [x.to(device) for x in X]
            else:
                X = X.to(device)
            y = y.to(device)
            acc += (net(X).argmax(dim=1) == y).sum()
            tot += y.shape[0]
    return acc / tot
# 3. 训练函数
def train(net, train_iter, test_iter, loss, optim, device, epoch_num):
    net.to(device)
    for epoch in range(epoch_num):
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            l = loss(net(X), y)
            optim.zero_grad()
            l.backward()
            optim.step()
        with torch.no_grad():
            tra = acc_val(train_iter, net)
            tea = acc_val(test_iter, net)
            print(f"In epoch{epoch+1}: train-acc={tra:.5f}, test-acc={tea:.5f}")
    return net
# 4. 模型参数初始化
def init_net(net):
    if type(net)==nn.Linear or type(net)==nn.Conv2d:
        nn.init.xavier_normal_(net.weight)

In [10]:
# 5. 定义模型、损失函数、优化函数，修改是sigmoid换为ReLU, 其次是全连接层仅使用400x10一层
lr = 0.03
loss = nn.CrossEntropyLoss()
net1 = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.ReLU(),
    nn.AvgPool2d(2),
    nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(),
    nn.AvgPool2d(2), nn.Flatten(),
    nn.Linear(16*5*5, 10)
)
optim1 = torch.optim.Adam(net1.parameters(), lr=lr)

In [11]:
# 6 初始化模型参数, 进行训练
num_epoch = 10
net1.apply(init_net)
net1 = train(net1, train_iter, test_iter, loss, optim1, gpu, num_epoch)

In epoch1: train-acc=0.85278, test-acc=0.84650
In epoch2: train-acc=0.86535, test-acc=0.85410
In epoch3: train-acc=0.87578, test-acc=0.86140
In epoch4: train-acc=0.88555, test-acc=0.87140
In epoch5: train-acc=0.87522, test-acc=0.85710
In epoch6: train-acc=0.88800, test-acc=0.87500
In epoch7: train-acc=0.88865, test-acc=0.87250
In epoch8: train-acc=0.88178, test-acc=0.86740
In epoch9: train-acc=0.88590, test-acc=0.87120
In epoch10: train-acc=0.88795, test-acc=0.86920


In [3]:
# 尝试继续做减法, 例如只使用一个卷积块, 增加池化块
lr = 0.03
loss = nn.CrossEntropyLoss()
net2 = nn.Sequential(
    nn.Conv2d(1, 16, kernel_size=5, padding=2), nn.ReLU(),
    nn.AvgPool2d(4), nn.Flatten(),
    nn.Linear(16*7*7, 10)
)
optim2 = torch.optim.Adam(net2.parameters(), lr=lr)
num_epoch = 10
net2.apply(init_net)
net2 = train(net2, train_iter, test_iter, loss, optim2, gpu, num_epoch)

NameError: name 'loss' is not defined

### 做减法或许是一件好事，更少的参数，可能具有更好的优化效果