In [2]:
import time
import torch
from torch import nn, optim

import sys
sys.path.append('..')
import d2lzh_pytorch as d2l

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#### 定义VGG块

In [3]:
def vgg_block(num_convs, in_channels, out_channels):
    blk = []
    for i in range(num_convs):
        if i == 0:
            blk.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        else:
            blk.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))
        blk.append(nn.ReLU())
    blk.append(nn.MaxPool2d(kernel_size=2, stride=2))
    
    return nn.Sequential(*blk)

#### 定义VGG模型

In [7]:
def vgg(conv_arch, fc_features, fc_hidden_units=4096):
    net = nn.Sequential()
    # 卷积层
    for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch):
        net.add_module('vgg_block' + str(i+1),
                      vgg_block(num_convs, in_channels, out_channels))
        
    # 全连接层
    net.add_module('fc', nn.Sequential(d2l.FlattenLayer(),
                                      nn.Linear(fc_features, fc_hidden_units),
                                      nn.ReLU(),
                                      nn.Dropout(0.5),
                                      nn.Linear(fc_hidden_units, fc_hidden_units),
                                      nn.ReLU(),
                                      nn.Dropout(0.5),
                                      nn.Linear(fc_hidden_units, 10)
                                      ))
    
    return net

In [15]:
def vgg_11():
    conv_arch = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512))
    fc_features = 512 * 7 * 7
    fc_hidden_units = 4096
    return vgg(conv_arch, fc_features, fc_hidden_units)

In [16]:
net = vgg_11()
print(net)

Sequential(
  (vgg_block1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dil

- 为了快速训练缩小网络

In [19]:
ratio = 8
small_conv_arch = [(1, 1, 64//ratio), (1, 64//ratio, 128//ratio),
                  (2, 128//ratio, 256//ratio), (2, 256//ratio, 512//ratio),
                  (2, 512//ratio, 512//ratio)]
fc_features = 512*7*7
fc_hidden_units = 4096
net = vgg(small_conv_arch, fc_features//ratio, fc_hidden_units//ratio)

#### 获取数据

In [20]:
batch_size = 64
data_dir = './Datasets/FashionMNIST'
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224, root=data_dir)

lr, num_epochs = 0.001, 5
optimizer = optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda
epoch 1, loss 0.5945, train_acc 0.777, test acc 0.880, 65.7 sec
epoch 2, loss 0.1602, train_acc 0.884, test acc 0.895, 63.3 sec
epoch 3, loss 0.0910, train_acc 0.901, test acc 0.908, 65.4 sec
epoch 4, loss 0.0610, train_acc 0.911, test acc 0.912, 63.8 sec
epoch 5, loss 0.0440, train_acc 0.921, test acc 0.919, 63.9 sec
