# 使用``gluon``实现卷积神经网络

使用``gluon``实现一个简单的CNN与从零实现其实并没有太多的区别，但是，使用``gluon``时计算会快很多，这是因为``gluon``会调用底层使用C++优化过的计算图。

## 加载数据

In [1]:
import mxnet as mx
import numpy as np

from mxnet import nd
from mxnet import gluon
from mxnet import autograd

import utils

ctx = mx.gpu()

In [2]:
num_examples = 60000
num_outputs = 10
num_inputs = 784

batch_size = 64
train_data, test_data = utils.load_dataset(batch_size, data_type='mnist')

## 定义模型

In [3]:
num_filter_conv1 = 20
num_filter_conv2 = 50
num_fc1 = 512
num_fc2 = num_outputs

def get_net():
    net = gluon.nn.Sequential()
    with net.name_scope():
        ############### Conv Layer ###############
        net.add(gluon.nn.Conv2D(channels=20, kernel_size=(3,3), strides=(1,1), activation='relu'))
        net.add(gluon.nn.MaxPool2D(pool_size=(2,2), strides=(2,2)))
        net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, strides=(1,1), activation='relu'))
        net.add(gluon.nn.MaxPool2D(pool_size=(2,2), strides=(2,2)))
        ############### FC Layer ###############
        net.add(gluon.nn.Dense(num_fc1, activation='relu'))
        net.add(gluon.nn.Dense(num_fc2, activation='relu'))
    return net

In [4]:
net = get_net()
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

In [5]:
net.collect_params()

sequential0_ (
  Parameter sequential0_conv0_weight (shape=(20, 0, 3, 3), dtype=<class 'numpy.float32'>)
  Parameter sequential0_conv0_bias (shape=(20,), dtype=<class 'numpy.float32'>)
  Parameter sequential0_conv1_weight (shape=(50, 0, 5, 5), dtype=<class 'numpy.float32'>)
  Parameter sequential0_conv1_bias (shape=(50,), dtype=<class 'numpy.float32'>)
  Parameter sequential0_dense0_weight (shape=(512, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential0_dense0_bias (shape=(512,), dtype=<class 'numpy.float32'>)
  Parameter sequential0_dense1_weight (shape=(10, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential0_dense1_bias (shape=(10,), dtype=<class 'numpy.float32'>)
)

In [6]:
for data, _ in train_data:
    data = data.as_in_context(ctx)
    break
    
net(data).shape

(64, 10)

In [7]:
net.collect_params()

sequential0_ (
  Parameter sequential0_conv0_weight (shape=(20, 1, 3, 3), dtype=<class 'numpy.float32'>)
  Parameter sequential0_conv0_bias (shape=(20,), dtype=<class 'numpy.float32'>)
  Parameter sequential0_conv1_weight (shape=(50, 20, 5, 5), dtype=<class 'numpy.float32'>)
  Parameter sequential0_conv1_bias (shape=(50,), dtype=<class 'numpy.float32'>)
  Parameter sequential0_dense0_weight (shape=(512, 800), dtype=<class 'numpy.float32'>)
  Parameter sequential0_dense0_bias (shape=(512,), dtype=<class 'numpy.float32'>)
  Parameter sequential0_dense1_weight (shape=(10, 512), dtype=<class 'numpy.float32'>)
  Parameter sequential0_dense1_bias (shape=(10,), dtype=<class 'numpy.float32'>)
)

## 定义损失函数和优化器

In [8]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [9]:
learning_rate = 0.1
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate})

## 训练

In [10]:
# 可以通过减少epochs的次数来设置early stop
# 由loss可知，在epoch 5的时候，就可以停止训练了
epochs = 10

niter = 0
moving_loss = .0
smoothing_constant = 0.1

for epoch in range(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)
        
        ## moving loss
        niter += 1
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss
        ## 偏差修正
        estimated_loss = moving_loss / (1- (1-smoothing_constant)**niter)
        
    train_acc = utils.evaluate_accuracy_gluon(train_data, net, ctx)
    test_acc = utils.evaluate_accuracy_gluon(test_data, net, ctx)
    print("Epoch %s, Train Moving Avg Loss %s, Train acc %s, Test acc %s"
         % (epoch, estimated_loss, train_acc, test_acc))

Epoch 0, Train Moving Avg Loss 0.317980416602, Train acc 0.87735, Test acc 0.8807
Epoch 1, Train Moving Avg Loss 0.0715107289863, Train acc 0.985216666667, Test acc 0.9835
Epoch 2, Train Moving Avg Loss 0.0510586454028, Train acc 0.98855, Test acc 0.9866
Epoch 3, Train Moving Avg Loss 0.041884068308, Train acc 0.992616666667, Test acc 0.9891
Epoch 4, Train Moving Avg Loss 0.0270648670209, Train acc 0.99345, Test acc 0.987
Epoch 5, Train Moving Avg Loss 0.0136126880306, Train acc 0.994833333333, Test acc 0.9889
Epoch 6, Train Moving Avg Loss 0.0283742334714, Train acc 0.994133333333, Test acc 0.9871
Epoch 7, Train Moving Avg Loss 0.0210650549476, Train acc 0.995766666667, Test acc 0.9881
Epoch 8, Train Moving Avg Loss 0.00841941742711, Train acc 0.9978, Test acc 0.9906
Epoch 9, Train Moving Avg Loss 0.00667158403, Train acc 0.997466666667, Test acc 0.9902
