# resnet

## residual模块
ResNet沿用了VGG的那种全用3×33×3卷积，但在卷积和池化层之间加入了批量归一层来加速训练。每次跨层连接跨过两层卷积。这里我们定义一个这样的残差块。注意到如果输入的通道数和输出不一样时（same_shape=False），我们使用一个额外的1×1卷积来做通道变化，同时使用strides=2来把长宽减半。

![](https://www.github.com/DragonFive/CVBasicOp/raw/master/1514013854016.jpg)

In [35]:
from mxnet.gluon import nn
from mxnet import nd
class Residual(nn.Block):
    def __init__(self, channels, same_shape=True, **kwargs):
        super(Residual,self).__init__(**kwargs)
        strides=1 if same_shape else 2
        self.same_shape=same_shape
        self.conv1 = nn.Conv2D(channels,kernel_size = 3,strides=strides,padding=1)
        self.bn1 = nn.BatchNorm()
        self.conv2 = nn.Conv2D(channels,kernel_size = 3,strides=1,padding=1)
        self.bn2 = nn.BatchNorm()
        if not same_shape:
            self.conv3 = nn.Conv2D(channels,kernel_size = 1,strides=strides)
            self.bn3 = nn.BatchNorm()
    def forward(self, x):
        out = nd.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if not self.same_shape:
            x = self.bn3(self.conv3(x))
        return nd.relu(out + x)


测试

In [36]:
blk = Residual(8,same_shape=False)
blk.initialize()
x = nd.random.normal(shape=(4,3,6,6))
y = blk(x)
print(y.shape)

(4L, 8L, 3L, 3L)


## resnet16结构

![image.png](https://www.github.com/DragonFive/CVBasicOp/raw/master/1514012389756.jpg)

In [50]:
class Resnet16(nn.Block):
    def __init__(self,num_output,verbose=False,**kwargs):
        super(Resnet16,self).__init__(**kwargs)
        b1 = nn.Sequential()
        b1.add(
            nn.Conv2D(channels=64,kernel_size=7,strides=2,padding=3),
            nn.BatchNorm(),
            nn.Activation(activation='relu')
            
        )
        b2 = nn.Sequential()
        b2.add(
            nn.MaxPool2D(pool_size=3,strides=2),
            Residual(64),
            Residual(64)
        )
        b3 = nn.Sequential()
        b3.add(
            Residual(128,same_shape=False),
            Residual(128)
        )
        b4 = nn.Sequential()
        b4.add(
            Residual(256,same_shape=False),
            Residual(256)
        )
        b5 = nn.Sequential()
        b5.add(
            Residual(512,same_shape=False),
            Residual(512)
        )
        b6 = nn.Sequential()
        b6.add(
            nn.AvgPool2D(pool_size=4),
            nn.Flatten(),
            nn.Dense(num_output)
        )
        self.verbose=verbose
        self.net = nn.Sequential()
        self.net.add(b1,b2,b3,b4,b5,b6)
    def forward(self,x):
        out = x
        for i, b in enumerate(self.net):
            out=b(out)
            if self.verbose:
                print("block %d output is %s"%(i+1,out.shape))
        return out

In [52]:
net = Resnet16(10,True)
net.initialize()
x = nd.random.normal(shape=(4,3,112,112))
net(x)

block 1 output is (4L, 64L, 56L, 56L)
block 2 output is (4L, 64L, 27L, 27L)
block 3 output is (4L, 128L, 14L, 14L)
block 4 output is (4L, 256L, 7L, 7L)
block 5 output is (4L, 512L, 4L, 4L)
block 6 output is (4L, 10L)



[[-4.530291   -0.5240102  15.62335    -8.492277   10.855072   15.0826435
  12.526077   -0.03772478  7.58306     3.8723946 ]
 [-4.027771   -2.7261052  14.776473   -7.7309847  10.998182   15.311948
  12.584428    0.45461935  7.239169    4.0754642 ]
 [-4.6466947  -1.0142665  16.139532   -8.402838   12.904835   14.8633175
  13.069327    0.5838256   8.8780575   3.416175  ]
 [-4.770364   -1.3705606  16.504946   -6.8040566  12.002779   16.16553
  12.887781    0.06493378  7.07369     2.933753  ]]
<NDArray 4x10 @cpu(0)>

In [None]:
import sys
sys.path.append('..')
import utils
from mxnet import gluon
from mxnet import init

train_data, test_data = utils.load_data_fashion_mnist(
    batch_size=128, resize=112)

ctx = utils.try_gpu()
net = Resnet16(10)
net.initialize(ctx=ctx, init=init.Xavier())

loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),
                        'sgd', {'learning_rate': 0.01})
utils.train(train_data, test_data, net, loss,
            trainer, ctx, num_epochs=5,show=True)

('Start training on ', gpu(0))
Epoch 0. Loss: 0.498, Train acc 0.83, Test acc 0.88, Time 46.3 sec
Epoch 1. Loss: 0.282, Train acc 0.90, Test acc 0.90, Time 46.4 sec
Epoch 2. Loss: 0.225, Train acc 0.92, Test acc 0.91, Time 46.7 sec
Epoch 3. Loss: 0.180, Train acc 0.94, Test acc 0.91, Time 46.5 sec
