In [1]:
import warnings
warnings.filterwarnings('ignore')  # 不打印 warning 

import tensorflow as tf

sess = tf.Session()

import numpy as np

# 用tensorflow 导入数据
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('./data/MNIST_data', one_hot=True) 

# 看看咱们样本的数量
print(mnist.test.labels.shape)
print(mnist.train.labels.shape)

Extracting ./data/MNIST_data/train-images-idx3-ubyte.gz
Extracting ./data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ./data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ./data/MNIST_data/t10k-labels-idx1-ubyte.gz
(10000, 10)
(55000, 10)


### 1.设置超参

In [2]:
lr = 1e-3
input_size = 28
timestep_size = 28
hidden_size = 256
layer_num = 2
class_num = 10
cell_type = 'lstm'

x_input = tf.placeholder(tf.float32, [None, 784])
y_input = tf.placeholder(tf.float32, [None, class_num])
batch_size = tf.placeholder(tf.int32, [])
keep_prob = tf.placeholder(tf.float32, [])

### 2.搭建模型

In [3]:
#转换输入尺寸
X = tf.reshape(x_input, [-1, 28, 28])

def lstm_cell(cell_type, num_nodes, keep_prob):
    assert(cell_type in ['lstm', 'block_lstm'], "wrong cell type")
    if cell_type == 'lstm':
        cell = tf.contrib.rnn.BasicLSTMCell(num_nodes)
    else:
        cell = tf.contrib.rnn.LSTMBlockCell(num_nodes)
    cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
    return cell

mlstm_cell = tf.contrib.rnn.MultiRNNCell([lstm_cell(cell_type, hidden_size, keep_prob) for _ in range(layer_num)], state_is_tuple=True)
#用全0初始化state
init_state = mlstm_cell.zero_state(batch_size, dtype=tf.float32)

#接下来有几种方法来运行构建好的网络
#方法一:调用dynamic_rnn()来让我们构建好的网络运行起来
#outputs, state = tf.nn.dynamic_rnn(mlstm_cell, inputs=X, init_state= init_state, time_major=False)
#h_state = state[-1][1]

#方法二:按时间步展开计算
outputs = list()
state = init_state
with tf.variable_scope('RNN'):
    for timestep in range(timestep_size):
        (cell_output, state) = mlstm_cell(X[:, timestep, :],state)
        outputs.append(cell_output)
h_state = outputs[-1]
print(h_state)

Tensor("RNN/RNN/multi_rnn_cell/cell_1_27/dropout/mul:0", shape=(?, 256), dtype=float32)


### 3.设置loss和优化器，展开训练&测试

In [5]:
import time
W =tf.Variable(tf.truncated_normal([hidden_size, class_num], stddev=0.1), dtype=tf.float32)
bias = tf.Variable(tf.constant(0.1, shape=[class_num]), dtype=tf.float32)
y_pre = tf.nn.softmax(tf.matmul(h_state, W) + bias)

#损失
cross_entropy = -tf.reduce_mean(y_input * tf.log(y_pre))
train_op = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(y_input, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

sess.run(tf.global_variables_initializer())
time0 = time.time()
for i in range(5000):
    _batch_size = 100
    X_batch, y_batch = mnist.train.next_batch(batch_size=_batch_size)
    cost, acc, _ = sess.run([cross_entropy, accuracy, train_op], feed_dict={x_input:X_batch, y_input:y_batch, keep_prob:0.5, batch_size:_batch_size})
    if (i + 1) % 500 == 0:
        test_acc = 0.0
        test_cost = 0.0
        N = 100
        for j in range(N):
            X_batch, y_batch = mnist.test.next_batch(batch_size=_batch_size)
            _cost, _acc, _ = sess.run([cross_entropy, accuracy, train_op], feed_dict={x_input:X_batch, y_input:y_batch, keep_prob:1.0, batch_size:_batch_size})
            test_acc += _acc
            test_cost += _cost
        print("step {}, train cost={:.6f}; test cost={:.6f}; test cost = {:.6f}, acc={:.6f}; pass {}s".format(i+1, cost, acc, test_cost/N, test_acc/N, time.time() - time0))

step 500, train cost=0.015462; test cost=0.940000; test cost = 0.012391, acc=0.961300; pass 125.01848340034485s
step 1000, train cost=0.004143; test cost=0.990000; test cost = 0.006958, acc=0.979000; pass 249.1424696445465s
step 1500, train cost=0.005402; test cost=0.990000; test cost = 0.005350, acc=0.983500; pass 373.2720148563385s
step 2000, train cost=0.005764; test cost=0.990000; test cost = 0.004549, acc=0.985400; pass 497.41186356544495s
step 2500, train cost=0.001697; test cost=1.000000; test cost = 0.003483, acc=0.988700; pass 621.5442481040955s
step 3000, train cost=0.001939; test cost=0.990000; test cost = 0.003256, acc=0.990100; pass 745.5632226467133s
step 3500, train cost=0.002817; test cost=0.990000; test cost = 0.002753, acc=0.991100; pass 869.6075649261475s
step 4000, train cost=0.008544; test cost=0.980000; test cost = 0.002511, acc=0.991800; pass 993.8090524673462s
step 4500, train cost=0.001372; test cost=0.990000; test cost = 0.001889, acc=0.993800; pass 1117.98355