# LeNet代码练习

### （1）__LeNet__

#### 1.1 __LeNet5_model__

In [3]:
import torch.nn as nn

# 没有参数的层尽量使用torch.nn.functional函数,
# 比如激活F.relu(input)、池化F.max_pool2d(input),
# 但是只能用在前向传播函数中，因为需要传入被池化或激活的参数。
import torch.nn.functional as F

# Sequential接受OrderedDict,我们可以用它来命名传递给Sequential的每个模块,
# 方便给模型的每一层命名。
from collections import OrderedDict


# 这里的LeNet模型是针对CIFAR数据集（图像大小3X32X32），
# 所以不用像LeNet论文那样为了使用mnist数据集（图像大小1X28X28）要先将图像填充到32X32。
class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            OrderedDict(
                [
                    ("conv1", nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5)),
                    ("relu1", nn.ReLU()),
                    ("max_pool1", nn.MaxPool2d(kernel_size=2, stride=2)),
                    (
                        "conv2",
                        nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5),
                    ),
                    ("relu2", nn.ReLU()),
                    ("max_pool2", nn.MaxPool2d(kernel_size=2, stride=2)),
                    # nn.Flatten()将某些连续的维度展平，然后才能正确输入全连接层。
                    # 也可以在前向传播函数中使用x = x.view(-1, 32 * 5 * 5)将卷积后的数据
                    # 变成能够输入全连接层的形状，32 * 5 * 5是全连接层中输入层的大小，
                    # -1表示自适应。
                    # 意思就是将每个卷积后的样本展开为一个列向量，
                    # 这个列向量的长度是全连接层的输入层的节点数，如果不对应则无法前向传播。
                    # 这里展开后尺寸是（1, 800）
                    ("flatten", nn.Flatten()),
                ]
            )
        )
        self.classifier = nn.Sequential(
            OrderedDict(
                [
                    ("input_fc", nn.Linear(in_features=32 * 5 * 5, out_features=120)),
                    ("relu_fc1", nn.ReLU()),
                    ("hidden_fc", nn.Linear(in_features=120, out_features=84)),
                    ("relu_fc2", nn.ReLU()),
                    ("output_fc", nn.Linear(in_features=84, out_features=10)),
                ]
            )
        )

    def forward(self, x):
        x = self.conv(x)
        # 经过卷积进入全连接层需要展平才能正确输入,不然会出错。
        # 如果在卷积层中使用了nn.Flatten()进行展平则不需要x.view()。
        # x = x.view(-1, 32 * 5 * 5)
        x = self.classifier(x)
        return x


"""
# 另一种写法
class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5)
        self.max_pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        self.max_pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.input_fc = nn.Linear(in_features=32 * 5 * 5, out_features=120)
        self.hidden_fc = nn.Linear(in_features=120, out_features=84)
        self.output_fc = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.max_pool1(x)
        x = F.relu(self.conv2(x))
        x = self.max_pool2(x)

        x = x.view(-1, 32 * 5 * 5)
        x = F.relu(self.input_fc(x))
        x = F.relu(self.hidden_fc(x))
        x = self.output_fc(x)

        return x
"""

'\n# 另一种写法\nclass LeNet(nn.Module):\n    def __init__(self):\n        super().__init__()\n        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5)\n        self.max_pool1 = nn.MaxPool2d(kernel_size=2, stride=2)\n        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)\n        self.max_pool2 = nn.MaxPool2d(kernel_size=2, stride=2)\n        self.input_fc = nn.Linear(in_features=32 * 5 * 5, out_features=120)\n        self.hidden_fc = nn.Linear(in_features=120, out_features=84)\n        self.output_fc = nn.Linear(in_features=84, out_features=10)\n\n    def forward(self, x):\n        x = F.relu(self.conv1(x))\n        x = self.max_pool1(x)\n        x = F.relu(self.conv2(x))\n        x = self.max_pool2(x)\n\n        x = x.view(-1, 32 * 5 * 5)\n        x = F.relu(self.input_fc(x))\n        x = F.relu(self.hidden_fc(x))\n        x = self.output_fc(x)\n\n        return x\n'

In [5]:
lenet = LeNet()
# print(lenet)
import torch

# 输入两个大小为（3, 32, 32）的张量，可看做两张图像。
x = torch.rand([2, 3, 32, 32])
y = lenet(x)
print(y)

tensor([[-0.0647, -0.0014,  0.0293, -0.0876,  0.0110, -0.0100,  0.1181, -0.0094,
          0.0158, -0.0840],
        [-0.0655,  0.0060,  0.0337, -0.0853,  0.0127, -0.0088,  0.1295, -0.0054,
          0.0132, -0.0820]], grad_fn=<AddmmBackward0>)


#### 1.2 __train__

In [None]:
# 要运行需要将train_set和val_set的download改为True，或者自行加载已经下载的数据。

"""
import torch
import torch.nn as nn
import torch.utils
import torch.utils.data
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms


transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)

train_set = torchvision.datasets.CIFAR10(
    root="./data", train=True, download=False, transform=transform
)
train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=36, shuffle=True, num_workers=0
)

val_set = torchvision.datasets.CIFAR10(
    root="./data",
    train=False,
    download=False,
    transform=transform,
)
val_loader = torch.utils.data.DataLoader(
    val_set, batch_size=5000, shuffle=False, num_workers=0
)

val_data_iter = iter(val_loader)
val_image, val_label = next(val_data_iter)

loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(lenet.parameters(), lr=1e-3)

for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    for step, data in enumerate(train_loader, start=0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = lenet(inputs)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if step % 500 == 499:  # 每500 mini-batches执行一次,因为下标从0开始所以这样写。
            with torch.no_grad():
                outputs = lenet(val_image)  # [batch, 10]
                predict_y = torch.max(outputs, dim=1)[1]
                accuracy = torch.eq(predict_y, val_label).sum().item() / val_label.size(
                    0
                )

                print(
                    "[%d, %5d] train_loss: %.3f  test_accuracy: %.3f"
                    % (epoch + 1, step + 1, running_loss / 500, accuracy)
                )
                running_loss = 0.0

print("Finished Training")

save_path = "./Lenet.pth"
torch.save(lenet.state_dict(), save_path)
"""

#### 1.3 __predict__

In [38]:
import torch
import torchvision.transforms as transforms
from PIL import Image

# 由于自己写的LeNet5_model的结构和model.py中的LeNet结构不同，
# 而且文件夹中保存的模型权重文件Lenet.pth是由model.py中的LeNet训练得到，
# 所以预测时要使用model.py中的LeNet进行模型初始化，才能正确加载权重文件Lenet.pth
from model import LeNet as LeNet5

transform = transforms.Compose(
    [
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)

classes = (
    "plane",
    "car",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
)

lenet_predict = LeNet5()
lenet_predict.load_state_dict(torch.load("Lenet.pth"))

image = Image.open("./test_image/0_3.jpg")
image = transform(image)  # 图尺寸[C,H,W]
image = torch.unsqueeze(image, dim=0)  # 图尺寸[N=1,C,H,W],增加批量大小维度N。

# 预测时关闭梯度，禁止反向传播。
with torch.no_grad():
    output = lenet_predict(image)
    # predict = torch.max(output, dim=1)[1].numpy()
    # 直接得到输出最大值的索引indices，而不是每次根据输入的图片指定其索引[1],[2],[3]
    predict = torch.max(output, dim=1).indices.numpy()

print(torch.max(output, dim=1).indices.numpy())
print("这张图片是：", classes[int(predict)])

[0]
这张图片是： plane
