# Densely Connected Convolutional Networks

# 稠密连接的卷积神经网络

<img src="../img/Chapter4-Convolutional-Neural-Networks/4-15.png" width="450">

## DenseNet在ImageNet上的网络参数

<img src="../img/Chapter4-Convolutional-Neural-Networks/4-16.png" width="700">

In [1]:
import mxnet as mx

from mxnet import nd
from mxnet import gluon
from mxnet import autograd

import sys
sys.path.append('..')
import utils

ctx = mx.gpu()

In [2]:
def BN_ReLU_CONV(channels):
    net = gluon.nn.Sequential()
    with net.name_scope():
        net.add(gluon.nn.BatchNorm(axis=1))
        net.add(gluon.nn.Activation('relu'))
        net.add(gluon.nn.Conv2D(channels, kernel_size=3, padding=1))
    return net

In [3]:
'''
layers : 该DenseBlock有几层
growth_rate : 每层有几个输出通道
'''
class DenseBlock(gluon.Block):
    def __init__(self, layers, growth_rate, **kwargs):
        super().__init__(**kwargs)
        self.net = gluon.nn.Sequential()
        for i in range(layers):
            self.net.add(BN_ReLU_CONV(growth_rate))
            
    def forward(self, X):
        for blk in self.net:
            out = blk(X)
            X = nd.concat(X, out, dim=1)
        return X

In [None]:
blk = DenseBlock(6, 12)
blk.initialize(ctx=ctx)
X = nd.random.uniform(shape=(1, 3, 32, 32), ctx=ctx)
blk(X).shape 

In [5]:
# 过渡层的作用是每次将通道数减半
def TransitionLayer(channels):
    net = gluon.nn.Sequential()
    with net.name_scope():
        net.add(gluon.nn.BatchNorm(axis=1))
        net.add(gluon.nn.Conv2D(channels, kernel_size=1))
        net.add(gluon.nn.AvgPool2D(pool_size=2))
    return net

In [6]:
layer = TransitionLayer(12)
layer.initialize(ctx=ctx)
Y = nd.random.uniform(shape=(1, 3, 56, 56), ctx=ctx)
layer(Y).shape

(1, 12, 28, 28)

In [7]:
'''
num_classes : 分类类别数
init_channels : 初始通道数
dense_channels : 每个DenseBlock的通道数，是一个list
verbose : 输出调试信息
'''
class DenseNet121(gluon.Block):
    def __init__(self, num_classes, dense_blk, init_channels, verbose=True, **kwargs):
        super().__init__(**kwargs)
        self.verbose = verbose 
        self.net = gluon.nn.Sequential()
        with self.name_scope():
            
            # Conv Layer 
            b1 = gluon.nn.Sequential()
            b1.add(
                gluon.nn.BatchNorm(axis=1),
                gluon.nn.Activation('relu'),
                gluon.nn.Conv2D(init_channels, kernel_size=7, strides=2, padding=3),
                gluon.nn.MaxPool2D(pool_size=3, strides=2, padding=1)
            )
            self.net.add(b1)
            
            # DenseNet Layer 
            for i in range(len(dense_blk)):
                blk = gluon.nn.Sequential()
                init_channels //= 2
                if i != len(dense_blk)-1:
                    blk.add(
                        DenseBlock(dense_blk[i], growth_rate=32),
                        TransitionLayer(init_channels)
                    )
                else:
                     blk.add(DenseBlock(dense_blk[i], growth_rate=32))
                self.net.add(blk)
                
            # classification layer 
            b2 = gluon.nn.Sequential()
            b2.add(
                gluon.nn.AvgPool2D(pool_size=7, strides=1),
                gluon.nn.Flatten(),
                gluon.nn.Dense(num_classes)
            )
            self.net.add(b2)
            
    def forward(self, X):
        out = X
        for i, blk in enumerate(self.net):
            out = blk(out)
            if self.verbose:
                print("blk %d : %s" % (i+1, out.shape))
        return out

In [8]:
densenet = DenseNet121(num_classes=10, dense_blk=[6, 12, 24, 16], init_channels=64, verbose=True)
densenet.initialize(ctx=ctx)
X = nd.random.uniform(shape=(32, 3, 224, 224), ctx=ctx)
y = densenet(X)

blk 1 : (32, 64, 56, 56)
blk 2 : (32, 32, 28, 28)
blk 3 : (32, 16, 14, 14)
blk 4 : (32, 8, 7, 7)
blk 5 : (32, 520, 7, 7)
blk 6 : (32, 10)


In [9]:
from time import time

batch_size = 32
train_data, test_data = utils.load_dataset(batch_size, resize=224, data_type='cifar10')

densenet = DenseNet121(num_classes=10, dense_blk=[6, 12, 24, 16], init_channels=64, verbose=False)
densenet.collect_params().initialize(mx.init.Xavier(), ctx=ctx, force_reinit=True)

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

learning_rate = 0.1
trainer = gluon.Trainer(densenet.collect_params(), 'sgd', {'learning_rate' : learning_rate})

epochs = 20

niter = 0
moving_loss = 0.0
smoothing_constant = 0.9

from time import time
for epoch in range(epochs):
    start = time()
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = densenet(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)
        
        niter += 1
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = smoothing_constant * moving_loss + (1-smoothing_constant) * curr_loss
        estimated_loss = moving_loss / (1 - smoothing_constant**niter)
    
    if not (epoch+1)%5:
        learning_rate /= 10
    
    train_acc = utils.evaluate_accuracy_gluon(train_data, densenet, ctx)
    test_acc = utils.evaluate_accuracy_gluon(test_data, densenet, ctx)
    print("Epoch %d, Moving Train Avg loss %.5f, Train acc %.5f, Test acc %.5f, Time consume %.5f s."
         % (epoch, estimated_loss, train_acc, test_acc, time() - start))

Epoch 0, Moving Train Avg loss 1.39727, Train acc 0.45404, Test acc 0.45500, Time consume 363.58925 s.
Epoch 1, Moving Train Avg loss 1.12915, Train acc 0.60822, Test acc 0.59510, Time consume 362.88870 s.
Epoch 2, Moving Train Avg loss 0.91204, Train acc 0.67750, Test acc 0.66200, Time consume 363.14153 s.
Epoch 3, Moving Train Avg loss 0.86925, Train acc 0.73832, Test acc 0.70940, Time consume 362.99581 s.
Epoch 4, Moving Train Avg loss 0.73567, Train acc 0.81442, Test acc 0.76830, Time consume 363.15373 s.
Epoch 5, Moving Train Avg loss 0.61311, Train acc 0.80320, Test acc 0.75470, Time consume 363.25171 s.
Epoch 6, Moving Train Avg loss 0.51046, Train acc 0.84870, Test acc 0.78860, Time consume 363.21021 s.
Epoch 7, Moving Train Avg loss 0.52230, Train acc 0.84722, Test acc 0.78430, Time consume 362.94565 s.
Epoch 8, Moving Train Avg loss 0.38691, Train acc 0.86562, Test acc 0.79670, Time consume 362.78417 s.
Epoch 9, Moving Train Avg loss 0.41949, Train acc 0.87454, Test acc 0.802