In [16]:
import time
import torch
from torch import nn, optim
import torch.nn.functional as F

import sys
sys.path.append('..')
import d2lzh_pytorch as d2l

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#### 定义NIN块

In [2]:
def nin_block(in_channels, out_channels, kernel_size, stride=1, padding=0):
    blk = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=1),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=1),
        nn.ReLU()
    )
    return blk

#### 定义全局平均池化层

In [17]:
class GlobalAvgPool2d(nn.Module):
    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()
    def forward(self, x):
        # 通过将池化窗口设置成输入的高和宽来实现全局平局池化
        return F.avg_pool2d(x, kernel_size=x.size()[2:])

In [19]:
net = nn.Sequential(
    nin_block(1, 96, kernel_size=11, stride=4, padding=0),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nin_block(96, 256, kernel_size=5, stride=1, padding=2),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nin_block(256, 384, kernel_size=3, stride=1, padding=1),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Dropout(0.5),
    nin_block(384, 10, kernel_size=3, stride=1, padding=1),
    GlobalAvgPool2d(),
    # 将四维的输出转成二维的输出（batch_size, 10）
    d2l.FlattenLayer()
)

In [20]:
X = torch.rand(1, 1, 224, 224)
for name, blk in net.named_children():
    X = blk(X)
    print(name, 'output shape: ', X.shape)

0 output shape:  torch.Size([1, 96, 54, 54])
1 output shape:  torch.Size([1, 96, 26, 26])
2 output shape:  torch.Size([1, 256, 26, 26])
3 output shape:  torch.Size([1, 256, 12, 12])
4 output shape:  torch.Size([1, 384, 12, 12])
5 output shape:  torch.Size([1, 384, 5, 5])
6 output shape:  torch.Size([1, 384, 5, 5])
7 output shape:  torch.Size([1, 10, 5, 5])
8 output shape:  torch.Size([1, 10, 1, 1])
9 output shape:  torch.Size([1, 10])


#### 训练模型

In [22]:
batch_size = 64
data_dir = './Datasets/FashionMNIST'
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224, root=data_dir)

In [23]:
lr, num_epochs = 0.002, 5
optimizer = optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda
epoch 1, loss 0.9077, train_acc 0.668, test acc 0.757, 80.9 sec
epoch 2, loss 0.2711, train_acc 0.803, test acc 0.814, 79.7 sec
epoch 3, loss 0.1588, train_acc 0.825, test acc 0.829, 79.5 sec
epoch 4, loss 0.1090, train_acc 0.840, test acc 0.843, 79.7 sec
epoch 5, loss 0.0815, train_acc 0.850, test acc 0.854, 79.7 sec
