# Sparsely Connected Convolutional Networks


## 基本结构

<img src="https://github.com/titu1994/keras-SparseNet/blob/master/images/sparse_connectivity.PNG?raw=true" width="700">

## 与DenseNet的不同

<img src="https://github.com/titu1994/keras-SparseNet/raw/master/images/dense_vs_sparse.png?raw=true" width="700">

## 2018年1月30日前的版本

In [1]:
import mxnet as mx
import numpy as np

from mxnet import nd
from mxnet import gluon
from mxnet import autograd

import utils

In [2]:
def BN_ReLU_CONV(channels, kernel_size, strides=1, padding=0):
    basic = gluon.nn.HybridSequential()
    with basic.name_scope():
        basic.add(
            gluon.nn.BatchNorm(axis=1),
            gluon.nn.Activation('relu'),
            gluon.nn.Conv2D(channels, kernel_size=kernel_size, 
                            strides=strides, padding=padding)
        )
    return basic

In [131]:
class SparseUnit(gluon.HybridBlock):
    def __init__(self, channels, **kwargs):
        super().__init__(**kwargs)
        # bottleneck unit
        self.unit = gluon.nn.HybridSequential()
        self.unit.add(
            BN_ReLU_CONV(channels, kernel_size=1),
            BN_ReLU_CONV(channels, kernel_size=3, padding=1)
        )
            
    def hybrid_forward(self, F, X):
        return self.unit(X)
    
'''
第二种方法
'''
class SparseUnit(gluon.nn.HybridBlock):
    def __init__(self, channels, **kwargs):
        super().__init__(**kwargs)
        with self.name_scope():
            self.bn1 = gluon.nn.BatchNorm(axis=1)
            self.conv1 = gluon.nn.Conv2D(channels, kernel_size=1)
            self.bn2 = gluon.nn.BatchNorm(axis=1)
            self.conv2 = gluon.nn.Conv2D(channels, kernel_size=3, padding=1)
            
    def hybrid_forward(self, F, X):
        output = self.conv1(F.relu(self.bn1(X)))
        return  self.conv2(F.relu(self.bn2(output)))

In [132]:
s = SparseUnit(1000)
s.initialize()
s.hybridize()
X = nd.random.normal(shape=(1,3,32,32))
s(X).shape

(1, 1000, 32, 32)

In [133]:
def exponential_index_fetch(x_list):
    count = len(x_list)
    i = 1
    inputs = []
    while i <= count:
        inputs.append(x_list[count - i])
        i *= 2
    return inputs

In [163]:
class SparseBlock(gluon.nn.HybridBlock):
    def __init__(self, layers, growth_rate, **kwargs):
        super().__init__(**kwargs)
        self.layers = layers
        net = self.net = gluon.nn.HybridSequential()
        for i in range(layers):
            net.add(SparseUnit(growth_rate))
        
    def hybrid_forward(self, F, X):
        X_list = [X]
        for i, blk in enumerate(self.net):
            X = blk(X)
            X_list.append(X)
            fetch_output = exponential_index_fetch(X_list)
            tmp = fetch_output[0]
            for out in fetch_output[1:]:
                tmp = F.concat(tmp, out, dim=1)
            X = tmp
        # print(X.shape)
        return X

In [153]:
blk = SparseBlock(12, 24)
blk.collect_params().initialize()
# blk.hybridize()
X = nd.random.normal(shape=(1,16,32,32))
print(blk(X).shape)

(1, 96, 32, 32)


In [140]:
def TransitionLayer(channels):
    layer = gluon.nn.HybridSequential()
    with layer.name_scope():
        layer.add(
            BN_ReLU_CONV(channels, kernel_size=1),
            gluon.nn.AvgPool2D(pool_size=2)
        )
    return layer

In [164]:
'''
BN_ReLU_CONV: 非线性单元
'''
def BN_ReLU_CONV(channels, kernel_size, strides=1, padding=0):
    basic = gluon.nn.HybridSequential()
    with basic.name_scope():
        basic.add(
            gluon.nn.BatchNorm(axis=1),
            gluon.nn.Activation('relu'),
            gluon.nn.Conv2D(channels, kernel_size=kernel_size, 
                            strides=strides, padding=padding)
        )
    return basic


'''
SparseUnit: Sparse单元
'''
class SparseUnit(gluon.HybridBlock):
    def __init__(self, channels, **kwargs):
        super().__init__(**kwargs)
        # bottleneck unit
        self.unit = gluon.nn.HybridSequential()
        self.unit.add(
            BN_ReLU_CONV(channels, kernel_size=1),
            BN_ReLU_CONV(channels, kernel_size=3, padding=1)
        )
            
    def hybrid_forward(self, F, X):
        return self.unit(X)

def exponential_index_fetch(x_list):
    count = len(x_list)
    i = 1
    inputs = []
    while i <= count:
        inputs.append(x_list[count - i])
        i *= 2
    return inputs

'''
SparseBlock: Sparse连接层
'''
class SparseBlock(gluon.HybridBlock):
    def __init__(self, layers, growth_rate, **kwargs):
        super().__init__(**kwargs)
        self.layers = layers
        net = self.net = gluon.nn.HybridSequential()
        for i in range(layers):
            net.add(SparseUnit(growth_rate))
        
    def hybrid_forward(self, F, X):
        X_list = [X]
        for i, blk in enumerate(self.net):
            X = blk(X)
            X_list.append(X)
            fetch_output = exponential_index_fetch(X_list)
            tmp = fetch_output[0]
            for out in fetch_output[1:]:
                tmp = F.concat(tmp, out, dim=1)
            X = tmp
        # print(X.shape)
        return X

'''
TransitionLayer: 过渡层
'''
def TransitionLayer(channels):
    layer = gluon.nn.HybridSequential()
    with layer.name_scope():
        layer.add(
            BN_ReLU_CONV(channels, kernel_size=1),
            gluon.nn.AvgPool2D(pool_size=2)
        )
    return layer

'''
num_classes: 类别数
num_sparseblk_count: 共有几个SparseBlock
num_layers: 每个SparseBlock的层数
growth_rate: 每个SparseBlock的输出通道数
init_channels: Init Conv Layer的初始通道数
verbose: 打印调试信息
'''
class SparseNet(gluon.HybridBlock):
    def __init__(self, num_classes, num_sparseblk_count, num_layers, growth_rate=12, 
                 init_channels=16, verbose=False, **kwargs):
        super().__init__(**kwargs)
        self.verbose = verbose
        net = self.net = gluon.nn.HybridSequential() # 整合之后的网络
        with self.name_scope(): 
            
            # Init Conv Layer
            b1 = gluon.nn.HybridSequential()
            b1.add(BN_ReLU_CONV(init_channels, kernel_size=3, padding=1))
            net.add(b1)
            
            # Sparse Connected Layer
            sparse_output_channel = init_channels
            for i in range(num_sparseblk_count):
                b2 = gluon.nn.HybridSequential()
                
                # Adding SparseBlock
                b2.add(SparseBlock(num_layers, growth_rate))
                
                # 统计sparsenet的输出通道数 用于给过渡层减半
                sparse_filter = [sparse_output_channel]
                for _ in range(num_layers):
                    sparse_filter.append(growth_rate)
                sparse_output_channel = sum(exponential_index_fetch(sparse_filter)) // 2
                
                #过渡层比SparseBlock层少一层 
                if i != num_sparseblk_count-1:
                    # Transition Layer 每次过渡层将输出通道减半
                    b2.add(TransitionLayer(sparse_output_channel))
                net.add(b2)
                    
            # Classification Layer
            b3 = gluon.nn.HybridSequential()
            b3.add(
                gluon.nn.GlobalAvgPool2D(),
                gluon.nn.Dense(num_classes)
            )
            net.add(b3)
            
    def hybrid_forward(self, F, X):
        out = X
        for i, blk in enumerate(self.net):
            out = blk(out)
            # if self.verbose:
                # print("blk %d : %s" % (i+1, out.shape))
        return out

In [167]:
sparsenet = SparseNet(num_classes=10, num_sparseblk_count=3, num_layers=12, 
                      growth_rate=24, init_channels=16, verbose=True)
sparsenet.initialize()
sparsenet.hybridize()
X = nd.random.uniform(shape=(1,3,32,32))
y = sparsenet(X)

## 最新版本(2018年2月20日)

In [2]:
'''
BN_ReLU_CONV: 非线性单元
'''
def BN_ReLU_CONV(channels, kernel_size, strides=1, padding=0, erase_relu=False):
    basic = gluon.nn.HybridSequential()
    with basic.name_scope():
        basic.add(gluon.nn.BatchNorm(axis=1, epsilon=2e-5))
        if not erase_relu:
            basic.add(gluon.nn.Activation('relu'))
        basic.add(gluon.nn.Conv2D(channels, kernel_size=kernel_size, 
                                  strides=strides, padding=padding)) 
    return basic


'''
SparseUnit: Sparse单元
'''
class SparseUnit(gluon.HybridBlock):
    def __init__(self, channels, bottleneck=False, **kwargs):
        super().__init__(**kwargs)
        # bottleneck unit
        self.bottleneck = bottleneck
        with self.name_scope():
            unit = self.unit = gluon.nn.HybridSequential()
            if self.bottleneck:
                unit.add(BN_ReLU_CONV(channels, kernel_size=1))
            unit.add(BN_ReLU_CONV(channels, kernel_size=3, padding=1))
            
    def hybrid_forward(self, F, X):
        return self.unit(X)

'''
fetch_exponential_idx: 返回列表中的2^k的index
'''
def fetch_exponential_idx(x_list):
    count = len(x_list)
    i = 1    
    inputs = []
    while i <= count:
        inputs.append(x_list[count - i])
        i *= 2
    return inputs

'''
SparseBlock: Sparse block层
'''
class SparseBlock(gluon.HybridBlock):
    def __init__(self, nDenseBlock, growth_rate, bottleneck=False, droprate=.0, **kwargs):
        super().__init__(**kwargs)
        self.nDenseBlock = nDenseBlock
        with self.name_scope():
            net = self.net = gluon.nn.HybridSequential()
            for i in range(nDenseBlock):
                blk = gluon.nn.HybridSequential()
                if bottleneck:
                    interchannels = 4 * growth_rate
                    blk.add(BN_ReLU_CONV(interchannels, kernel_size=1, erase_relu=True))
                    if droprate > .0:
                        blk.add(gluon.nn.Dropout(droprate)) 
                    blk.add(BN_ReLU_CONV(growth_rate, kernel_size=3, padding=1))
                    if droprate > .0:
                        blk.add(gluon.nn.Dropout(droprate)) 
                else:
                    blk.add(BN_ReLU_CONV(growth_rate, kernel_size=3, padding=1))
                    if droprate > .0:
                        blk.add(gluon.nn.Dropout(droprate)) 
                # add BN layer each
                blk.add(gluon.nn.BatchNorm(axis=1, epsilon=2e-5))
                net.add(blk)
        
    def hybrid_forward(self, F, X):
        X_list = [X]
        for i, blk in enumerate(self.net):
            X = blk(X)
            X_list.append(X)
            expidx_output = fetch_exponential_idx(X_list)
            
            # concat
            tmp = expidx_output[0] 
            for out in expidx_output[1:]:
                tmp = F.concat(tmp, out, dim=1)
            X = tmp
        print('SparseBlk - ', X.shape)
        return X

'''
TransitionLayer: 过渡层
'''
def TransitionLayer(channels, droprate=0.0):
    layer = gluon.nn.HybridSequential()
    with layer.name_scope():
        layer.add(BN_ReLU_CONV(channels, kernel_size=1))
        if droprate > 0.0:
            layer.add(gluon.nn.Dropout(droprate))
        layer.add(gluon.nn.AvgPool2D(pool_size=2, strides=2))
    return layer

'''
num_classes: 待预测的类别数
num_sparseblk_count: 共需要加入几个SparseBlock
depth: 网络总的层数
reduction: transitionlayer的输出通道数减少比例
droprate: transitionlayer的dropout层的丢弃比例
growth_rate: 每个SparseBlock的固定输出通道数
init_channels: Init Conv Layer的初始输出通道数
bottleneck: 是否为bottleneck
verbose: 打印调试信息
'''
class SparseNet(gluon.HybridBlock):
    def __init__(self, num_classes, num_sparseblk_count, depth, reduction=.5, droprate=0.0, growth_rate=12, 
                 bottleneck=False, verbose=False, **kwargs):
        super().__init__(**kwargs)
        self.verbose = verbose
        with self.name_scope(): 
            net = self.net = gluon.nn.HybridSequential() # 整合之后的网络
            
            # 每个SpraseBlock有多少个block
            nDenseBlock = (depth - 4) // num_sparseblk_count
            # 如果是bottleneck，那么每个nDenseBlock减半，因为每个bottleneck中有两个Conv2D
            if bottleneck:
                nDenseBlock //= 2
            else:
                reduction = 1
            
            # Init Conv Layer
            init_channels = growth_rate
            net.add(gluon.nn.Conv2D(init_channels, kernel_size=3, padding=1))
            
            # Sparse Connected Layer
            sparse_output_channel = init_channels
            for idx in range(num_sparseblk_count):
                b2 = gluon.nn.HybridSequential()
                
                # Adding SparseBlock
                b2.add(SparseBlock(nDenseBlock, growth_rate, bottleneck))
                
                # 统计每个sparseblock的输出通道数 用于给过渡层以固定的reduction比例减少通道数
                sparse_output_filter = [sparse_output_channel]
                for _ in range(nDenseBlock):
                    sparse_output_filter.append(growth_rate)
                sparse_output_channel = sum(fetch_exponential_idx(sparse_output_filter)) * reduction
                
                # 过渡层比SparseBlock层少一层 
                if idx != num_sparseblk_count - 1:
                    # Transition Layer 每次过渡层将输出通道减半
                    b2.add(TransitionLayer(int(sparse_output_channel), droprate))
                net.add(b2)
            
            ############# final feature layer
            net.add(gluon.nn.BatchNorm(axis=1))
            net.add(gluon.nn.Activation('relu'))
            #############        
            
            # Classification Layer
            b3 = gluon.nn.HybridSequential()
            b3.add(
                gluon.nn.GlobalAvgPool2D(),
                gluon.nn.Dense(num_classes)
            )
            net.add(b3)
            
    def hybrid_forward(self, F, X):
        out = X
        for i, blk in enumerate(self.net):
            out = blk(out)
            if self.verbose:
                print("blk %d : %s" % (i+1, out.shape))
        return out