In [1]:
import keras
import numpy as np
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
import keras.datasets.mnist as mnist
(train_image, train_label), (test_image, test_label) = mnist.load_data() 

train_image.shape, train_label.shape, test_image.shape, test_label.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [3]:
type(train_image)

numpy.ndarray

# 数据预处理：多加一个维度

In [4]:
train_image = np.expand_dims(train_image, axis = -1)

In [6]:
train_image.shape

(60000, 28, 28, 1)

In [7]:
test_image = np.expand_dims(test_image, axis = -1)

In [8]:
test_image.shape

(10000, 28, 28, 1)

# 网络搭建

In [21]:
model = keras.Sequential()

In [22]:
from keras import layers

In [23]:
# 第一层要给输入数据的形状：只要管最后3个维度，前面的batch维不用管
model.add( layers.Conv2D( filters=64, kernel_size=(3,3), activation = 'relu', input_shape=(28,28,1) ) )  # 其他一般都用默认
model.add( layers.Conv2D( filters=64, kernel_size=(3,3), activation='relu') )
model.add( layers.MaxPooling2D()  )  # 池化层一般都用默认的

In [24]:
model.summary()
# 第一卷积层Param = 640：3*3*64 + 64 = 640 —— 64个3x3的卷积核，每个卷积核再加一个偏置b
# 第二卷积层Param = 36928：3*3*64*64 + 64 = 36928 —— 公式：kernel_size * kernel_size * kernel_numbers * input_channels

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 26, 26, 64)        640       
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 24, 24, 64)        36928     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 12, 12, 64)        0         
Total params: 37,568
Trainable params: 37,568
Non-trainable params: 0
_________________________________________________________________


In [25]:
# 进入全连接层：
model.add( layers.Flatten() )  # 把(12,12,64)全部展平为12*12*64 = 9216 —— 前面已经说过这个三维数据里都是特征！！！
model.add(layers.Dense(256, activation='relu'))
model.add( layers.Dropout(0.5) )  # 网络容量还是有些大，dropout一下
model.add(layers.Dense(10, activation='softmax'))  # 最后是10分类输出，激活用softmax多分类

In [26]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 26, 26, 64)        640       
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 24, 24, 64)        36928     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               2359552   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)               

In [27]:
model.compile( optimizer='adam',
               loss = 'sparse_categorical_crossentropy',  # 顺序编码
               metrics=['acc']
)

In [28]:
model.fit(train_image, train_label, epochs = 5, batch_size = 512, validation_data=(test_image, test_label) )


Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x28a65927f88>

# 如何进一步提高精度

根据前面所说，卷积层是特征的提取，池化层是非特征的丢弃。所以只需多加几组“卷积-池化层”即可。

理论上可以加很多组，但也别加太多，因为加的太多首先算的非常慢，二有一点儿可能会导致梯度下降到最后面已经为0，降不动了。

In [30]:
# 比如搞成这种大网络：
model = keras.Sequential()
model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation='softmax'))