# 实现一个早期用来识别手写数字图像的卷积神经网络LeNet

## LeNet分为卷积层和全连接层两个部分
+ 其中卷积层的基本单位是卷积层后接最大池化层
    - 卷积层用来识别图像里的空间模式
    - 最大池化层用来降低卷积层对位置的敏感性
+ 全连接层会将小批量中的每个样本变平（flatten）。也就是说，全连接层的输入形状将变成二维，其中第一维为小批量中的样本，第二维为每个样本变平后表示的向量

## 利用Sequential类来实现 LeNet模型

In [1]:
import sys
sys.path.append('../')
import time

In [2]:
import gluonbook as gb
import mxnet as mx 
from mxnet.gluon import data as gdata,nn,loss as gloss
from mxnet import nd,autograd,init,gluon

  from ._conv import register_converters as _register_converters


In [3]:
LeNet = nn.Sequential()

## 搭建LeNet模型

In [4]:
LeNet.add(nn.Conv2D(channels=8,kernel_size=5,activation='relu'),  #第一个卷积层 输出通道数为6，卷积核大小为5*5
          nn.MaxPool2D(pool_size=2,strides=2),                        #后面接一个池化层，降低控件敏感度
          nn.Conv2D(channels=20,kernel_size=3,activation='relu'),
          nn.MaxPool2D(pool_size =2,strides=2),
          
          #下面接全连接层，Dense会默认将(批量大小，通道，高，宽)形状的输入转换成
          #（批量大小，通道*高*宽）形状的输入
          nn.Dense(240,activation='relu'),
          nn.Dense(120,activation='relu'),
          nn.Dense(10)                
        )

## 观察每一层的输出

In [5]:
def print_output(X,net):
    for layer in net:
        X = layer(X)
        print(layer.name,'output shape:\t',X.shape)

In [6]:
X = nd.random.uniform(shape=(1,1,28,28),ctx=mx.gpu())
LeNet.initialize(ctx=mx.gpu())

print_output(X,LeNet)

conv0 output shape:	 (1, 8, 24, 24)
pool0 output shape:	 (1, 8, 12, 12)
conv1 output shape:	 (1, 20, 10, 10)
pool1 output shape:	 (1, 20, 5, 5)
dense0 output shape:	 (1, 240)
dense1 output shape:	 (1, 120)
dense2 output shape:	 (1, 10)


##  获取数据和训练

In [7]:
batch_size  = 256
train_iter,test_iter = gb.load_data_fashion_mnist(batch_size,root='../chapter1_baseKnowledge/FashionMNIST/')

In [8]:
def try_gpu4():
    try:
        ctx=mx.gpu()
        _ = nd.zeros((1,),ctx=ctx)
    except mx.base.MXNetError:
        ctx = mx.cpu()
    return ctx

In [9]:
ctx = try_gpu4()
ctx

gpu(0)

In [10]:
#在GPU中训练
def evaluate_accuracy(data_iter,net,ctx):
    acc = nd.array([0],ctx= ctx)
    for X,y in data_iter:
        X,y = X.as_in_context(ctx),y.as_in_context(ctx)
        acc+=gb.accuracy(net(X),y)
    return acc.asscalar()/len(data_iter)

In [11]:
def train_LeNet_on_GPU(net,train_iter,test_iter,batch_size,trainer,ctx,num_epochs):
    print('training on ',ctx)
    loss = gloss.SoftmaxCrossEntropyLoss()
    
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,start = 0,0,time.time()
        for X,y in train_iter:
            X =X.as_in_context(ctx)
            y =y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat,y)
            l.backward()
            trainer.step(batch_size)
            train_l_sum += l.mean().asscalar()
            train_acc_sum += gb.accuracy(y_hat,y)
        
        test_acc = evaluate_accuracy(test_iter,net,ctx)
    
        print('epoch %d,loss %.4f,train acc %.3f,test acc %.3f,'
              'time %.1f sec'
             %(epoch+1,train_l_sum/len(train_iter),train_acc_sum/len(train_iter),test_acc,time.time()-start))

In [14]:
lr,num_epochs = 0.1,5
LeNet.initialize(force_reinit=True,ctx=ctx,init=init.Xavier())
trainer = gluon.Trainer(LeNet.collect_params(),'sgd',{'learning_rate':lr})

## GPU训练速度大概是CPU 5倍 ，relu为激活函数的CNN 学习率要设置的小一点，不然不会收敛

In [15]:
num_epochs = 30
train_LeNet_on_GPU(LeNet,train_iter,test_iter,batch_size,trainer,ctx,num_epochs)

training on  gpu(0)
epoch 1,loss 0.9342,train acc 0.652,test acc 0.785,time 5.3 sec
epoch 2,loss 0.5306,train acc 0.799,test acc 0.832,time 5.4 sec
epoch 3,loss 0.4572,train acc 0.830,test acc 0.855,time 5.3 sec
epoch 4,loss 0.4096,train acc 0.850,test acc 0.866,time 5.4 sec
epoch 5,loss 0.3776,train acc 0.861,test acc 0.874,time 5.4 sec
epoch 6,loss 0.3546,train acc 0.871,test acc 0.880,time 5.3 sec
epoch 7,loss 0.3394,train acc 0.875,test acc 0.882,time 5.3 sec
epoch 8,loss 0.3215,train acc 0.882,test acc 0.883,time 5.3 sec
epoch 9,loss 0.3096,train acc 0.888,test acc 0.887,time 5.3 sec
epoch 10,loss 0.2995,train acc 0.889,test acc 0.891,time 5.4 sec
epoch 11,loss 0.2918,train acc 0.894,test acc 0.887,time 5.3 sec
epoch 12,loss 0.2819,train acc 0.897,test acc 0.897,time 5.4 sec
epoch 13,loss 0.2706,train acc 0.899,test acc 0.896,time 5.4 sec
epoch 14,loss 0.2660,train acc 0.901,test acc 0.890,time 5.4 sec
epoch 15,loss 0.2567,train acc 0.905,test acc 0.901,time 5.3 sec
epoch 16,loss 