In [1]:
from mxnet import gluon,init,nd,autograd
from mxnet.gluon import nn,data as gdata,loss as gloss

# 1.Res_block
---

In [2]:
class Residual(nn.Block):
    def __init__(self,num_channels,use_1x1conv = False,strides = 1,**kwargs):
        super(Residual,self).__init__(**kwargs)
        
        self.conv1 = nn.Conv2D(num_channels,kernel_size=3,padding=1,strides=strides)
        self.conv2 = nn.Conv2D(num_channels,kernel_size=3,padding=1)
        
        if use_1x1conv:
            self.conv3 = nn.Conv2D(num_channels,kernel_size=1,strides=strides)
        else:
            self.conv3 = None
            
        self.bn1 = nn.BatchNorm()
        self.bn2 = nn.BatchNorm()
    
    def forward(self,X):
        Y = nd.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return nd.relu(Y + X)

In [3]:
# 浅层测试
blk = Residual(3)
blk.initialize()
X = nd.random.uniform(shape=(8,3,12,12))
blk(X).shape

(8, 3, 12, 12)

In [4]:
# 深层测试
blk = Residual(6,use_1x1conv=True,strides=2)
blk.initialize()
blk(X).shape

(8, 6, 6, 6)

In [5]:
def resnet_block(num_channels,num_residual,first_block = False):
    blk = nn.Sequential()
    for i in range(num_residual):
        if i == 0 and not first_block:
            blk.add(Residual(num_channels,use_1x1conv=True,strides=2))
        else:
            blk.add(Residual(num_channels))
    return blk

In [6]:
net = nn.Sequential()
net.add(nn.Conv2D(64,kernel_size=7,strides=2,padding=3),
        nn.BatchNorm(),
        nn.Activation('relu'),
        nn.MaxPool2D(pool_size=3,strides=2,padding=1))

In [7]:
net.add(resnet_block(64,2,first_block=True), 
       resnet_block(128,2),
       resnet_block(256,2),
       resnet_block(512,2))

In [8]:
net.add(nn.GlobalAvgPool2D(),
        nn.Dense(10))

In [9]:
X = nd.random.uniform(shape=(1,1,224,224))
net.initialize()
for layer in net:
    X = layer(X)
    print(layer.name,"output shape:\t",X.shape)

conv5 output shape:	 (1, 64, 112, 112)
batchnorm4 output shape:	 (1, 64, 112, 112)
relu0 output shape:	 (1, 64, 112, 112)
pool0 output shape:	 (1, 64, 56, 56)
sequential1 output shape:	 (1, 64, 56, 56)
sequential2 output shape:	 (1, 128, 28, 28)
sequential3 output shape:	 (1, 256, 14, 14)
sequential4 output shape:	 (1, 512, 7, 7)
pool1 output shape:	 (1, 512, 1, 1)
dense0 output shape:	 (1, 10)


In [10]:
batch_size=256
mnist_train = gdata.vision.FashionMNIST(train=True)
mnist_test = gdata.vision.FashionMNIST(train=False)

transformer = gdata.vision.transforms.ToTensor()
train_iter = gdata.DataLoader(mnist_train.transform_first(transformer),batch_size=batch_size,shuffle=True)
test_iter = gdata.DataLoader(mnist_test.transform_first(transformer),batch_size=batch_size,shuffle=False)

In [11]:
def evaluate_accuracy(net,data_iter):
    acc_sum,n = nd.array([0]),0
    for X,y in data_iter:
        y_hat = net(X)
        acc_sum += (y_hat.argmax(axis = 1) == y.astype('float32')).sum()
        print('evaluate_accuracy,acc_sum.shape:',acc_sum.shape)
        n += y.size
    return acc_sum/n

In [12]:
def train(net,train_iter,test_iter,
         batch_size,num_epochs,trainer,lr):
    l_sum,acc_sum ,n = 0.0,0.0,0
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(num_epochs):
        print('now is trainning...epoch %d' %epoch)
        for X,y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat,y).sum()
            l.backward()
            trainer.step(batch_size)
            
            acc_sum = (net(X).argmax(axis = 1) == y.astype('float32')).sum().asscalar()
            l_sum += l.asscalar()
            n += y.size
        test_acc = evaluate_accuracy(net,test_iter)
        print('epoch %d ==========' %epoch)
        print('train_acc : ',acc_sum/n)
        print('train_loss :',l_sum/n)
        print('\n test_acc :',test_acc)

In [None]:
num_epochs,lr, = 5,0.05
net.initialize(force_reinit=True,init = init.Xavier())
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
train(net,train_iter,test_iter,
     batch_size,num_epochs,trainer,lr)

now is trainning...epoch 0
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_sum.shape: (1,)
evaluate_accuracy,acc_s