In [1]:
import d2lzh as d2l
import mxnet as mx
from mxnet import autograd,gluon,init,nd
from mxnet.gluon import loss as gloss,nn
import time

In [2]:
X = nd.random.normal(shape=(1,1,28,28))

batch_size = 256
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)

In [3]:
# 注意！！用的全是sigmoid
net = nn.Sequential()
net.add(nn.Conv2D(channels=6,kernel_size=5,activation='sigmoid'),
        nn.MaxPool2D(pool_size=2,strides=2),
        nn.Conv2D(channels=16,kernel_size=5,activation='sigmoid'),
        nn.MaxPool2D(pool_size=2,strides=2),

        nn.Dense(120,activation='sigmoid'),
        nn.Dense(84,activation='sigmoid'),
        nn.Dense(10))
net.initialize()

In [4]:
for layer in net:# 可以这样看output信息！！！
    X=layer(X)
    print(layer.name,'output shape:\t',X.shape) 

conv0 output shape:	 (1, 6, 24, 24)
pool0 output shape:	 (1, 6, 12, 12)
conv1 output shape:	 (1, 16, 8, 8)
pool1 output shape:	 (1, 16, 4, 4)
dense0 output shape:	 (1, 120)
dense1 output shape:	 (1, 84)
dense2 output shape:	 (1, 10)


In [5]:
# 2 # 判断有没有gpu可以用！！=====================================================
def try_gpu():
    try:
        ctx = mx.gpu()
        _ = nd.zeros((1,),ctx=ctx) # 测试一下这个cpu是否可用，
    except mx.base.MXNetError:
        ctx = mx.cpu()
    return ctx

ctx = try_gpu()
print(ctx)

cpu(0)


In [6]:
# 3 # 开始操作 ==================================================================

def evaluate_accuracy(data_iter,net,ctx):
    acc_sum,n = nd.array([0],ctx=ctx),0
    for X,y in data_iter:
        # 如果ctx代表GPU及相应显存，将数据复制到显存上。、记得这个操作！！
        X,y = X.as_in_context(ctx),y.as_in_context(ctx).astype('float32')
        acc_sum = (net(X).argmax(axis=1) == y).sum() # argmax在mxnet中会返回浮点数
        n+=y.size
    return acc_sum.asscalar()/n # 这里转成标量

In [7]:
def train_LeNet(train_iter,test_iter,net,
              batch_size,num_epochs,trainer,ctx):
    print('training on',ctx)
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,start= 0.0,0.0,0,time.time()
        for X,y in train_iter:
            X,y = X.as_in_context(ctx),y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat,y).sum()
            l.backward()
            trainer.step(batch_size)

            # 都转化成标量！！
            y = y.astype('float32')
            train_l_sum = l.asscalar()
            train_acc_sum = (y_hat.argmax(axis =1 ) ==y).sum().asscalar()
            n+=y.size
        test_acc = evaluate_accuracy(test_iter,net,ctx) # 每个epochs完之后都要计算下模型的正确率
        print('epoch %d,loss %.3f,train_acc %.4f,test_acc %.4f,time: %.1f sec'
              %(epoch+1,train_l_sum/n,train_acc_sum/n,test_acc,time.time()-start))

In [8]:
# 4 # 走你！===============================================================
lr,epoch = 0.1,5

# force_reinit : Whether to force re-initialization if parameter is already initialized.是否重加载参数
# init : Global default Initializer to be used when Parameter.init() is None. 
#         Otherwise, Parameter.init() takes precedence.
net.initialize(force_reinit=True,ctx=ctx,init=init.Xavier())

trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
train_LeNet(train_iter,test_iter,net,batch_size,epoch,trainer,ctx)

training on cpu(0)
epoch 1,loss 0.004,train_acc 0.0002,test_acc 0.0003,time: 326.9 sec
epoch 2,loss 0.004,train_acc 0.0002,test_acc 0.0001,time: 325.6 sec
epoch 3,loss 0.004,train_acc 0.0002,test_acc 0.0003,time: 332.4 sec
epoch 4,loss 0.004,train_acc 0.0002,test_acc 0.0001,time: 328.5 sec
epoch 5,loss 0.004,train_acc 0.0001,test_acc 0.0003,time: 330.2 sec
