In [1]:
import time
import torch
from torch import nn, optim
import torch.nn.functional as F

import d2lzh_pytorch as d2l

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

定义NiN块，一个自定义的卷积层 + 两个1x1卷积层

In [2]:
def nin_block(in_channels, out_channels, kernel_size, stride, padding):
    blk = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding),
                        nn.ReLU(),
                        nn.Conv2d(out_channels, out_channels, 1), 
                        nn.ReLU(),
                        nn.Conv2d(out_channels, out_channels, 1), 
                        nn.ReLU())
    return blk

NiN模型

使用全局平均池化代替全连接层（池化层核等于输入的高和宽，通道数为10）

作用：减少参数尺寸，缓解过拟合（有时会造成获得有效模型的训练时间的增加）

![NiN](./images/NiN.jpg)

In [3]:
# 定义全局平均池化
class GlobalAvgPool2d(nn.Module):
    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()
    def forward(self, img):
        return F.avg_pool2d(img, kernel_size=img.size()[2:])  # 除开前面的batch和channel

In [4]:
class NiN(nn.Module):
    def __init__(self):
        super(NiN, self).__init__()
        self.conv_fc = nn.Sequential(
                        nin_block(1, 96, kernel_size=11, stride=4, padding=0),  # 96x54x54 （省略第一维的batch_size）
                        nn.MaxPool2d(kernel_size=3, stride=2),  # 96x26x26
                        nin_block(96, 256, kernel_size=5, stride=1, padding=2),  # 256x26x26 
                        nn.MaxPool2d(kernel_size=3, stride=2),  # 256x12x12
                        nin_block(256, 384, kernel_size=3, stride=1, padding=1),  # 384x12x12
                        nn.MaxPool2d(kernel_size=3, stride=2),  # 384x5x5
                        nn.Dropout(0.5), 
        
                        nin_block(384, 10, kernel_size=3, stride=1, padding=1),  # 10x5x5
                        GlobalAvgPool2d(),  # 经过全局平均池化后是 batch_size x 10(C) x 1(H) x 1(W)
                        d2l.FlattenLayer())  # 转换为 batch_size x 10
        
    def forward(self, img):
        return self.conv_fc(img)

In [5]:
net = NiN()

构建一个数据样本来查看每一层的输出形状

In [6]:
X = torch.rand(1, 1, 224, 224)
for name, blk in net.named_children(): 
    X = blk(X)
    print(name, 'output shape: ', X.shape)

conv_fc output shape:  torch.Size([1, 10])


训练模型

In [7]:
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

lr, num_epochs = 0.002, 5

optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda
epoch 1, loss 1.3034, train acc 0.537, test acc 0.668, time 143.0 sec
epoch 2, loss 0.8761, train acc 0.696, test acc 0.705, time 140.5 sec
epoch 3, loss 0.7987, train acc 0.718, test acc 0.731, time 130.8 sec
epoch 4, loss 0.7421, train acc 0.746, test acc 0.746, time 124.5 sec
epoch 5, loss 0.6925, train acc 0.763, test acc 0.755, time 124.7 sec


总结NiN

- 使用由卷积层和1×1卷积层构成的NiN块来构建深层网络。（1x1卷积层的作用类似于线性变换）
- NiN去除了容易造成过拟合的全连接输出层，而是将其替换成输出通道数等于标签类别数的NiN块和全局平均池化层。

这个acc好像不太行的样子...