In [34]:
from mxnet import gluon,init,nd
from mxnet.gluon import nn
import d2lzh as d2l

# 1. Res_block
---

In [35]:
class Residual(nn.Block):
    def __init__(self,num_channels,use_1x1conv = False,strides = 1,**kwargs):
        super(Residual,self).__init__(**kwargs)
        
        self.conv1 = nn.Conv2D(num_channels,kernel_size=3,padding=1,strides = strides)
        self.conv2 = nn.Conv2D(num_channels,kernel_size=3,padding=1)
        
        if use_1x1conv:
            self.conv3 = nn.Conv2D(num_channels,kernel_size=1,strides=strides)
        else:
            self.conv3 = None
        
        self.bn1 = nn.BatchNorm()
        self.bn2 = nn.BatchNorm()
        
    def forward(self,X):
        Y = nd.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return nd.relu(Y+X)

In [36]:
# 假如输入和输出形状一致
blk = Residual(3)
blk.initialize()
X = nd.random.uniform(shape=(4,3,6,6))
blk(X).shape

(4, 3, 6, 6)

In [37]:
# 增加输出通道数的同时，输出的 w & h 都减半
blk = Residual(6,use_1x1conv=True,strides=2)
blk.initialize()
blk(X).shape

(4, 6, 3, 3)

# 2. ResNet define
---

In [38]:
net = nn.Sequential()
# 前两层和GoogLeNet一样
net.add(nn.Conv2D(64,kernel_size=7,strides=2,padding=3),
        nn.BatchNorm(),
        nn.Activation('relu'),
        nn.MaxPool2D(pool_size=3,strides=2,padding=1))

In [39]:
def resnet_block(num_channels,num_residual,first_block = False):
    blk = nn.Sequential()
    for i in range(num_residual):
        if i == 0 and not first_block:
            blk.add(Residual(num_channels,use_1x1conv=True,strides=2))
        else:
            blk.add(Residual(num_channels))
    return blk

In [40]:
# 为resnet加入残差块，每个模块使用2个残差块。
net.add(resnet_block(64,2,first_block=True),# 出了这个块，其它块都带1x1conv
       resnet_block(128,2),
       resnet_block(256,2),
       resnet_block(512,2))

In [41]:
# 最后模仿GoogLeNet,加入全局平均池化层后接全连层输出
net.add(nn.GlobalAvgPool2D(),
        nn.Dense(10))

In [42]:
# 来看看这个网络的形状
X = nd.random.uniform(shape=(1,1,224,224))
net.initialize()
for layer in net:
    X = layer(X)
    print(layer.name,'output shape:\t',X.shape)

conv85 output shape:	 (1, 64, 112, 112)
batchnorm71 output shape:	 (1, 64, 112, 112)
relu3 output shape:	 (1, 64, 112, 112)
pool7 output shape:	 (1, 64, 56, 56)
sequential18 output shape:	 (1, 64, 56, 56)
sequential19 output shape:	 (1, 128, 28, 28)
sequential20 output shape:	 (1, 256, 14, 14)
sequential21 output shape:	 (1, 512, 7, 7)
pool8 output shape:	 (1, 512, 1, 1)
dense4 output shape:	 (1, 10)


In [None]:
lr,num_epochs,batch_size,ctx = 0.05,5,256,d2l.try_gpu()
net.initialize(force_reinit=True,init = init.Xavier())
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size,resize=96)
d2l.train_ch5(net,train_iter,test_iter,
             batch_size,trainer,ctx,num_epochs)

training on cpu(0)
