In [1]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
mnist = input_data.read_data_sets('./MNIST_data', one_hot=True)

  from ._conv import register_converters as _register_converters


Extracting ./MNIST_data/train-images-idx3-ubyte.gz
Extracting ./MNIST_data/train-labels-idx1-ubyte.gz
Extracting ./MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ./MNIST_data/t10k-labels-idx1-ubyte.gz


# 构建模型

## 设置训练的超格数,分别设置学习率、训练次数和每轮训练的数据大小:

In [2]:
# 设置训练的超参数
lr = 0.001
training_iters = 100000
batch_size = 64

#### 为了使用 RNN 来分类图片,我们把每张图片的行看成是一个像素序列(sequence)。因为MNIST 图片的大小是 28×28 像素,所以我们把每一个图像样本看成一行行的序列。因此,共有(28 个元素的序列)×(28 行),然后每一步输入的序列长度是 28,输入的步数是 28 步。下面定义 RNN 的参数

In [3]:
# 神经网络的参数
n_input = 28 # 输入层的 n
n_steps = 28 # 28 长度
n_hidden_units = 128 # # 隐藏层的神经元个数
n_classes = 10 # 输出的数量,即分类的类别, 0 ~ 9 个数字,共有 10 个

### 定义输入数据及权重:

In [4]:
# 输入数据占位符
x = tf.placeholder(tf.float32,[None,n_steps,n_input])
y = tf.placeholder(tf.float32,[None,n_classes])

In [5]:
# 定义权重
weights = {
    # (28,128)
    'in': tf.Variable(tf.random_normal([n_input,n_hidden_units])),
    # (128,10)
    'out': tf.Variable(tf.random_normal([n_hidden_units,n_classes]))
}

biases = {
    # (128,)
    'in': tf.Variable(tf.constant(0.1,shape=[n_hidden_units,])),
    # (10,)
    'out':tf.Variable(tf.constant(0.1,shape=[n_classes,]))
}

## 定义 RNN 模型

In [8]:
def RNN(X, weights, biases):
    # 把输入的 X 转换成 X ==> (128 batch * 28 steps, 28 inputs)
    X = tf.reshape(X, [-1, n_input])
    # 进入隐藏层
    # X_in = (128 batch * 28 steps, 128 hidden)
    X_in = tf.matmul(X, weights['in']) + biases['in']
    # X_in ==> (128 batch, 28 steps, 128 hidden)
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
    # 这里采用基本的 LSTM 循环网络单元: basic LSTM Cell
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units, forget_bias=1.0,
                            state_is_tuple=True)
    # 初始化为零值, lstm 单元由两个部分组成: (c_state, h_state)
    init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
    # dynamic_rnn 接收张量 (batch, steps, inputs) 或者 (steps, batch, inputs) 作为 X _ in
    outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=
                                            init_state, time_major=False)
    results = tf.matmul(final_state[1], weights['out']) + biases['out']
    return results

## 定义损失函数和优化器,优化器采用 AdamOptimizer

In [9]:
pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
train_op = tf.train.AdamOptimizer(lr).minimize(cost)

## 定义模型预测结果及准确率计算方法:

In [10]:
correct_pred = tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))

## 训练数据及评估模型
#### 在一个会话中启动图,开始训练,每 20 次输出 1 次准确率的大小:

In [11]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    step = 0
    while step * batch_size < training_iters:
        batch_xs,batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape([batch_size,n_steps,n_input])
        sess.run([train_op],feed_dict={
            x:batch_xs,
            y:batch_ys,
        })
        if step % 20 == 0:
            print(sess.run(accuracy, feed_dict={
                x: batch_xs,
                y: batch_ys,
            }))
        step += 1

0.203125
0.625
0.828125
0.78125
0.859375
0.875
0.859375
0.875
0.796875
0.84375
0.90625
0.96875
0.859375
0.859375
0.890625
0.953125
0.9375
0.953125
0.9375
0.953125
0.953125
0.953125
0.875
0.953125
0.953125
0.9375
0.921875
0.953125
0.921875
0.984375
0.9375
0.921875
0.890625
0.921875
0.984375
0.9375
0.953125
0.921875
1.0
0.984375
0.9375
0.96875
0.953125
0.984375
0.953125
0.984375
0.96875
0.921875
0.96875
0.953125
0.859375
0.96875
1.0
0.921875
1.0
0.984375
0.984375
1.0
0.96875
0.90625
0.984375
1.0
0.953125
0.953125
0.96875
0.953125
0.921875
0.953125
0.96875
0.96875
1.0
1.0
0.9375
1.0
0.984375
0.984375
0.96875
0.984375
1.0
