# VGG - 使用重复元素的非常深的网络

VGG第一次使用大量的重复结构来紧凑网络。

VGG的一个关键是使用很多有着相对较小的 kernel ($3 \times 3$)的卷积层然后接上一个池化层，之后再将这个模块(我们将其称为vgg_block)重复很多次，构成(vgg_stack)。

In [1]:
import mxnet as mx

from mxnet import nd
from mxnet import gluon
from mxnet import autograd

import utils
mx.random.seed(1)
 
ctx = mx.gpu()

In [2]:
batch_size = 64
train_data, test_data = utils.load_dataset(batch_size, resize=96, data_type='mnist')

In [3]:
for data, _ in train_data:
    data = data.as_in_context(ctx)
    print(data.shape)
    break

(64, 1, 96, 96)


## 定义``vgg_block``

In [4]:
# vgg_block
def vgg_block(num_convs, channels):
    vgg_block = gluon.nn.Sequential()
    for per_conv in range(num_convs):
        vgg_block.add(gluon.nn.Conv2D(channels=channels, kernel_size=3, 
                                      strides=1, padding=1, activation="relu"))
    vgg_block.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
    return vgg_block

In [5]:
net0 = vgg_block(2, 64)
net0.initialize(ctx=ctx)
net0(data).shape

(64, 64, 48, 48)

In [6]:
net0.collect_params()

sequential0_ (
  Parameter conv0_weight (shape=(64, 1, 3, 3), dtype=<class 'numpy.float32'>)
  Parameter conv0_bias (shape=(64,), dtype=<class 'numpy.float32'>)
  Parameter conv1_weight (shape=(64, 64, 3, 3), dtype=<class 'numpy.float32'>)
  Parameter conv1_bias (shape=(64,), dtype=<class 'numpy.float32'>)
)

## 定义``vgg_stack``

In [7]:
def vgg_stack(architecture):
    vgg_stack = gluon.nn.Sequential()
    for (num_convs, output_channel) in architecture:
        vgg_stack.add(vgg_block(num_convs, output_channel))
    return vgg_stack

In [8]:
# test
architecture = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
vgg = vgg_stack(architecture)
vgg.initialize(ctx=ctx)
vgg(data).shape

(64, 512, 3, 3)

## 定义模型

In [9]:
# vgg
num_outputs = 10
architecture = ((1,64), (1,128), (2,256), (2,512))

net = gluon.nn.Sequential()
with net.name_scope():
    net.add(vgg_stack(architecture))
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(512, activation='relu'))
    net.add(gluon.nn.Dropout(.5))
    net.add(gluon.nn.Dense(512, activation='relu'))
    net.add(gluon.nn.Dense(num_outputs))

net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

In [10]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [11]:
learning_rate = .01
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate})

## 训练

In [12]:
from time import time

epochs = 10

niter = 0
moving_loss = .0
smoothing_constant = 0.1

for epoch in range(epochs):
    start = time()
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)
        
        niter += 1
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss
        estimated_loss = moving_loss / (1 - (1-smoothing_constant)**niter)
        
    train_acc = utils.evaluate_accuracy_gluon(train_data, net, ctx)
    test_acc = utils.evaluate_accuracy_gluon(test_data, net, ctx)
    print("Epoch %d, Moving Train Avg loss %.5f, Train acc %.5f, Test acc %.5f, Time consume %.5f s."
         % (epoch, estimated_loss, train_acc, test_acc, time() - start))

Epoch 0, Moving Train Avg loss 0.36758, Train acc 0.93533, Test acc 0.93610, Time consume 86.57945 s.
Epoch 1, Moving Train Avg loss 0.11837, Train acc 0.97020, Test acc 0.97090, Time consume 85.61425 s.
Epoch 2, Moving Train Avg loss 0.11686, Train acc 0.98050, Test acc 0.98010, Time consume 85.54478 s.
Epoch 3, Moving Train Avg loss 0.06699, Train acc 0.98532, Test acc 0.98380, Time consume 85.53005 s.
Epoch 4, Moving Train Avg loss 0.07487, Train acc 0.98765, Test acc 0.98680, Time consume 85.65583 s.
Epoch 5, Moving Train Avg loss 0.05795, Train acc 0.98973, Test acc 0.98750, Time consume 85.58270 s.
Epoch 6, Moving Train Avg loss 0.04640, Train acc 0.99125, Test acc 0.98980, Time consume 85.67273 s.
Epoch 7, Moving Train Avg loss 0.04541, Train acc 0.99070, Test acc 0.99000, Time consume 85.64016 s.
Epoch 8, Moving Train Avg loss 0.04476, Train acc 0.99385, Test acc 0.99110, Time consume 85.27144 s.
Epoch 9, Moving Train Avg loss 0.04945, Train acc 0.99440, Test acc 0.99050, Time 

In [13]:
filename = "models/vgg-mnist-0000.params"
net.save_params(filename)