In [1]:
# 5.5.1 LeNet模型
import d2lzh as d2l
import mxnet as mx
from mxnet import autograd,gluon,init,nd
from mxnet.gluon import loss as gloss,nn
import time

net=nn.Sequential()
net.add(nn.Conv2D(channels=6,kernel_size=5,activation='sigmoid'),# 卷积层
       nn.MaxPool2D(pool_size=2,strides=2),# 最大池化层
       nn.Conv2D(channels=6,kernel_size=5,activation='sigmoid'),
       nn.MaxPool2D(pool_size=2,strides=2),
       # Dense会默认将(批量⼤⼩, 通道, ⾼, 宽)形状的输⼊转换成(批量⼤⼩, 通道*⾼*宽)形状的输入
       nn.Dense(120,activation='sigmoid'),# 全连接层
       nn.Dense(84,activation='sigmoid'),
       nn.Dense(10)# 输出层
       )

In [2]:
X=nd.random.uniform(shape=(1,1,28,28))
net.initialize()
# 逐层进⾏前向计算来查看每个层的输出形状。
# 卷积层由于使⽤⾼和宽均为5的卷积核，从而将⾼和宽分别减小4，而池化层则将⾼和宽减半，但通道数则从1增加到16。
# 全连接层则逐层减少输出个数，直到变成图像的类别数10。 
for layer in net:
    X=layer(X)
    print(layer.name,'output shape:\t',X.shape)    

conv0 output shape:	 (1, 6, 24, 24)
pool0 output shape:	 (1, 6, 12, 12)
conv1 output shape:	 (1, 6, 8, 8)
pool1 output shape:	 (1, 6, 4, 4)
dense0 output shape:	 (1, 120)
dense1 output shape:	 (1, 84)
dense2 output shape:	 (1, 10)


In [3]:
# 5.5.2 获取数据和训练模型 
batch_size=256
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size=batch_size)

In [4]:
# 尝试在gpu(0)上 创建NDArray，如果成功则使⽤gpu(0)，否则仍然使⽤CPU。 
# 本函数已保存在d2lzh包中⽅便以后使⽤ 
def try_gpu():
    try:
        ctx=mx.gpu()
        _=nd.zeros((1,),ctx=ctx)# 尝试在gpu上创建NDArray
    except mx.base.MXNetError:
            ctx =mx.cpu()
    return ctx

In [8]:
# 本函数已保存在d2lzh包中⽅便以后使⽤。
def evaluate_accuracy(data_iter,net,ctx):
    acc_sum,n=nd.array([0],ctx=ctx),0
    for X,y in data_iter:
        # 如果ctx代表GPU及相应的显存，将数据复制到显存上
        # 如果ctx代表CPU及内存，则目标变量和源变量共享源变量的内存
        X,y=X.as_in_context(ctx),y.as_in_context(ctx).astype('float32')
        acc_sum+=( net(X).argmax(axis=1)==y ).sum()# argmax(axis=1)返回每行最大元素的索引
        n+=y.size
    return acc_sum.asscalar()/n

In [13]:
# 本函数已保存在d2lzh包中⽅便以后使⽤
def train_ch5(net,train_iter,test_iter,batch_size,trainer,ctx,num_epochs):
    print('training on',ctx)
    loss=gloss.SoftmaxCrossEntropyLoss()
    for epoch in range (num_epochs):
        train_l_sum,train_acc_sum,n,start=0.0,0.0,0,time.time()
        for X,y in train_iter:
            X,y=X.as_in_context(ctx),y.as_in_context(ctx)# 将数据复制到相应的设备上
            with autograd.record():
                y_hat=net(X)
                l=loss(y_hat,y).sum() #小批量样品的损失函数和
            l.backward()
            trainer.step(batch_size)
            y=y.astype('float32')
            train_l_sum+=l.asscalar()# 所有样品的损失函数的和
            train_acc_sum+=( y_hat.argmax(axis=1)==y ).sum().asscalar()# 所有样品预测准确的个数
            n+=y.size# 样本数
        test_acc=evaluate_accuracy(test_iter,net,ctx)
        print('epoch %d,loss %.4f,train acc %.3f,test acc %.3f,time %.lf sec'
              %(epoch+1,train_l_sum/n,train_acc_sum/n,test_acc,time.time()-start))

In [20]:
ctx=mx.gpu()
lr,num_epochs=0.9,5
# 重新将模型参数初始化到设备变量ctx之上，并使⽤Xavier随机初始化。
# 损失函数和训练算 法则依然使⽤交叉熵损失函数和小批量随机梯度下降。 
net.initialize(force_reinit=True,ctx=ctx,init=init.Xavier())
trainer=gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
train_ch5(net,train_iter,test_iter,batch_size,trainer,ctx,num_epochs)                      

training on gpu(0)
epoch 1,loss 2.2773,train acc 0.133,test acc 0.372,time 6 sec
epoch 2,loss 1.1670,train acc 0.540,test acc 0.671,time 5 sec
epoch 3,loss 0.8433,train acc 0.673,test acc 0.713,time 5 sec
epoch 4,loss 0.7180,train acc 0.721,test acc 0.730,time 5 sec
epoch 5,loss 0.6530,train acc 0.744,test acc 0.765,time 5 sec


In [5]:
net.name??

In [6]:
nn.MaxPool2D??

In [7]:
d2l.evaluate_accuracy??
autograd.record??

In [17]:
d2l.train_ch3??
gluon.Trainer??


Object `gluon.Optimize` not found.
