# Densely Connected Convolutional Networks

## DenseNet实现

In [None]:
"""Contains a variant of the densenet model definition."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim

# 高斯分布初始化
def trunc_normal(stddev): 
    return tf.truncated_normal_initializer(stddev=stddev)

# 定义复合函数作为一个非线性变换的小模块 BN（批归一化） - RELU（激活） - conv（卷积） - dropout（随机失活）
# 应用于论文提到的 bottlneck 和 其它有卷积操作的地方
# 参数： current - 当前的输出，num_outputs - 输出通道数， kernel_size - 卷积核大小
def bn_act_conv_drp(current, num_outputs, kernel_size, scope='block'):
    current = slim.batch_norm(current, scope=scope + '_bn')
    current = tf.nn.relu(current)
    current = slim.conv2d(current, num_outputs, kernel_size, scope=scope + '_conv') # 默认参数 padding = "same"
    current = slim.dropout(current, scope=scope + '_dropout')
    return current

# densenet的模块组成，连续进行layers次的bottleneck和复合函数变化，每次得出的结果与上一次的结果拼接，作为下一次变换的输入
# 返回该block的最终输出 net
# 参数：net - 当前的输出，layers - block中重复一个小模块的次数， growth - 增长率单位（论文中的k）每次输出的通道数
# block最终输出的net的通道数为 growth0 + growth * layers
# 虽然每次循环都会增加上次的通道数，但bottleneck对每次的输入都限制了通道数的增长
def block(net, layers, growth, scope='block'):
    for idx in range(layers):
        bottleneck = bn_act_conv_drp(net, 4 * growth, [1, 1],
                                     scope=scope + '_conv1x1' + str(idx))
        tmp = bn_act_conv_drp(bottleneck, growth, [3, 3],
                              scope=scope + '_conv3x3' + str(idx))
        net = tf.concat(axis=3, values=[net, tmp])
    return net


def densenet(images, num_classes=1001, is_training=False,
             dropout_keep_prob=0.8,
             scope='densenet'):
    """Creates a variant of the densenet model.

      images: A batch of `Tensors` of size [batch_size, height, width, channels].
      num_classes: the number of classes in the dataset.
      is_training: specifies whether or not we're currently training the model.
        This variable will determine the behaviour of the dropout layer.
      dropout_keep_prob: the percentage of activation values that are retained.
      prediction_fn: a function to get predictions out of logits.
      scope: Optional variable_scope.

    Returns:
      logits: the pre-softmax activations, a tensor of size
        [batch_size, `num_classes`]
      end_points: a dictionary from components of the network to the corresponding
        activation.
    """
    growth = 24             # growth rate，对应于文章中的k，限制网络的宽度
    compression_rate = 0.5  # 压缩率，用于在transition layers压缩feature map的通道数

    def reduce_dim(input_feature):
        return int(int(input_feature.shape[-1]) * compression_rate)

    end_points = {}

    with tf.variable_scope(scope, 'DenseNet', [images, num_classes]):
        with slim.arg_scope(bn_drp_scope(is_training=is_training,
                                         keep_prob=dropout_keep_prob)) as ssc:
            # 原始图像大小为224*224*3
            # 初始化卷积操作，输出feature map 为 112*112*48
            end_point = 'Pre_Conv2d_7x7'
            current = slim.conv2d(images, 2*growth, [7, 7], stride=2, padding='same', scope=end_point)
            end_points[end_point] = current
            # 最大池化，降空间尺寸， 输出feature map 为 56*56*48
            end_point = 'Pre_Pool_3x3'
            current = slim.max_pool2d(current, [3, 3], stride=2, padding='same', scope=end_point)
            end_points[end_point] = current

            # DenseBlock1 输出feature map 为 56*56
            end_point = 'Block1'
            current = block(current, 6, growth, scope=end_point)
            end_points[end_point] = current
            # Transition1 输出feature map 为 28*28
            end_point = 'Trans1_Conv2d_1x1'
            current = bn_act_conv_drp(current, growth, [1, 1], scope=end_point)
            end_points[end_point] = current
            end_point = 'Trans1_avgPool_2x2'
            current = slim.avg_pool2d(current, [2, 2], stride=2, padding='same', scope=end_point)
            end_points[end_point] = current

            # DenseBlock2 输出feature map 为 28*28
            end_point = 'Block2'
            current =  block(current, 12, growth, scope=end_point) 
            end_points[end_point] = current
            # Transition2 输出feature map 为 14*14
            end_point = 'Trans2_Conv2d_1x1'                     
            current = bn_act_conv_drp(current, growth, [1, 1], scope=end_point)
            end_points[end_point] = current
            end_point = 'Trans2_avgPool_2x2' 
            current = slim.avg_pool2d(current, [2, 2], stride=2, padding='same', scope=end_point) 
            end_points[end_point] = current

            # DenseBlock3 输出feature map 为 14*14
            end_point = 'Block3'
            current =  block(current, 24, growth, scope=end_point) 
            end_points[end_point] = current
            # Transition3 输出feature map 为 7*7
            end_point = 'Trans3_Conv2d_1x1'
            current = bn_act_conv_drp(current, growth, [1, 1], scope=end_point)
            end_points[end_point] = current
            end_point = 'Trans3_avgPool_2x2' 
            current = slim.avg_pool2d(current, [2, 2], stride=2, padding='same', scope=end_point) 
            end_points[end_point] = current

            # DenseBlock4 输出feature map 为 7*7
            end_point = 'Block4'
            current =  block(current, 16, growth, scope=end_point) 
            end_points[end_point] = current
            # 全局池化 kernel size 7*7
            end_point = 'Global_Pool_7x7' 
            current = slim.avg_pool2d(current, [7, 7], padding='same', scope=end_point)  
            end_points[end_point] = current

            # flatten后做全连接
            end_point = 'Pre_Flatten'
            current =  slim.flatten(current, scope=end_point)
            end_points[end_point] = current
            end_point = 'FC_Logits'
            logits = slim.fully_connected(current, num_classes, activation_fn=None, scope=end_point)
            end_points[end_point] = logits

            # softmax分类
            end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
            
    return logits, end_points

def bn_drp_scope(is_training=True, keep_prob=0.8):
    keep_prob = keep_prob if is_training else 1
    with slim.arg_scope(
        [slim.batch_norm],
            scale=True, is_training=is_training, updates_collections=None):
        with slim.arg_scope(
            [slim.dropout],
                is_training=is_training, keep_prob=keep_prob) as bsc:
            return bsc


def densenet_arg_scope(weight_decay=0.004):
    """Defines the default densenet argument scope.

    Args:
      weight_decay: The weight decay to use for regularizing the model.

    Returns:
      An `arg_scope` to use for the inception v3 model.
    """
    with slim.arg_scope(
        [slim.conv2d],
        weights_initializer=tf.contrib.layers.variance_scaling_initializer(
            factor=2.0, mode='FAN_IN', uniform=False),
        activation_fn=None, biases_initializer=None, padding='same',
            stride=1) as sc:
        return sc


densenet.default_image_size = 224