In [1]:
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import SimpleRNN, Activation, Dense
from keras.optimizers import Adam

Using TensorFlow backend.


MNIST里面的图像分辨率是28×28，为了使用RNN，我们将图像理解为序列化数据。 每一行作为一个输入单元，所以输入数据大小INPUT_SIZE = 28； 先是第1行输入，再是第2行，第3行，第4行，…，第28行输入， 这就是一张图片也就是一个序列，所以步长TIME_STEPS = 28。

In [2]:
TIME_STEPS = 28     # same as the height of the image
INPUT_SIZE = 28     # same as the width of the image
BATCH_SIZE = 50
BATCH_INDEX = 0
OUTPUT_SIZE = 10
CELL_SIZE = 50
LR = 0.001

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# data pre-processing
X_train = X_train.reshape(-1, 28, 28) / 255.      # normalize
X_test = X_test.reshape(-1, 28, 28) / 255.        # normalize
y_train = np_utils.to_categorical(y_train, num_classes=10)
y_test = np_utils.to_categorical(y_test, num_classes=10)

In [6]:
# build RNN model
model = Sequential()

In [7]:
# RNN cell
model.add(SimpleRNN(
    # for batch_input_shape, if using tensorflow as the backend, we have to put None for the batch_size.
    # Otherwise, model.evaluate() will get error.
    batch_input_shape=(None, TIME_STEPS, INPUT_SIZE),       # Or: input_dim=INPUT_SIZE, input_length=TIME_STEPS,
    units=CELL_SIZE, # 输出  dimensionality of the output space.
    unroll=True,
))

In [8]:
# output layer
model.add(Dense(OUTPUT_SIZE))
model.add(Activation('softmax'))

In [9]:
# optimizer
adam = Adam(LR)
model.compile(optimizer=adam,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [10]:
# training
for step in range(4001):
    # data shape = (batch_num, steps, inputs/outputs)
    X_batch = X_train[BATCH_INDEX: BATCH_INDEX+BATCH_SIZE, :, :]
    Y_batch = y_train[BATCH_INDEX: BATCH_INDEX+BATCH_SIZE, :]
    cost = model.train_on_batch(X_batch, Y_batch)
    BATCH_INDEX += BATCH_SIZE
    BATCH_INDEX = 0 if BATCH_INDEX >= X_train.shape[0] else BATCH_INDEX

    if step % 500 == 0:
        cost, accuracy = model.evaluate(X_test, y_test, batch_size=y_test.shape[0], verbose=False)
        print('test cost: ', cost, 'test accuracy: ', accuracy)

test cost:  2.39386796951 test accuracy:  0.0806000009179
test cost:  0.583218753338 test accuracy:  0.821500003338
test cost:  0.384209364653 test accuracy:  0.887799978256
test cost:  0.320295006037 test accuracy:  0.905499994755
test cost:  0.296145021915 test accuracy:  0.912100017071
test cost:  0.250418365002 test accuracy:  0.930999994278
test cost:  0.229705572128 test accuracy:  0.936200022697
test cost:  0.214046314359 test accuracy:  0.94059997797
test cost:  0.23217612505 test accuracy:  0.935000002384
