In [17]:
import tensorflow as tf 
from tensorflow.examples.tutorials.mnist import input_data

tf.set_random_seed(1)
tf.reset_default_graph()
# data 
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

# set hyperparameters
learn_rate = 0.001
training_iters = 100000
batch_size = 128
n_inputs = 28 # MNIST data input (image shape 28 * 28)
n_steps = 28 # rnn cells numbers 一张图片每次输入28个pixes，需输入28次，按照时间顺序需要查看28步
n_hidden_layers = 128 # 进入cells之前的隐藏层
n_classes = 10 # cells输出之后的隐藏层，得到结果

# tf Graph Input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])

# define weights
weights = {
    'in': tf.Variable(tf.truncated_normal([n_inputs, n_hidden_layers])), # 28 * 128
    'out': tf.Variable(tf.truncated_normal([n_hidden_layers, n_classes])) # 128 * 10
}

bias = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_layers])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))
}

def RNN(X, weights, bias):
    # hidden layer for input to cell
    ################################

    # transpose the inputs shape from x --> (128batch * 28 steps, 28 inputs)
    X = tf.reshape(X, [-1, n_inputs])

    # into hidden
    X_in = tf.matmul(X, weights['in']) + bias['in'] # 矩阵乘法只能2维*2维，所以要先transpose，然后转回去

    # retranspose
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_layers])

    # cell
    ################################
    cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_layers, forget_bias=1.0, state_is_tuple=True)
    init_state = cell.zero_state(batch_size, dtype=tf.float32) # 每一批的每一个是独立的
    
    '''
    函数原型：
    tf.nn.dynamic_rnn
    (
    cell,
    inputs,
    sequence_length=None,
    initial_state=None,
    dtype=None,
    parallel_iterations=None,
    swap_memory=False,
    time_major=False,
    scope=None
    )
    
    Returns:
    A pair (outputs, state) where:

    outputs: The RNN output Tensor.

    If time_major == False (default), this will be a Tensor shaped: [batch_size, max_time, cell.output_size].
    If time_major == True, this will be a Tensor shaped: [max_time, batch_size, cell.output_size].

    Note：
    if cell.output_size is a (possibly nested) tuple of integers or TensorShape objects, 
    then outputs will be a tuple having the same structure as cell.output_size, 
    containing Tensors having shapes corresponding to the shape data in cell.output_size.

    state: The final state. 
    If cell.state_size is an int, this will be shaped [batch_size, cell.state_size]. 
    If it is a TensorShape, this will be shaped [batch_size] + cell.state_size. 
    If it is a (possibly nested) tuple of ints or TensorShape, this will be a tuple having the corresponding shapes. 
    If cells are LSTMCells state will be a tuple containing a LSTMStateTuple for each cell.
    lstm 来说, state可被分为(c_state, h_state) 【主线state， 支线state】

    '''
    outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state, time_major=False)

    # hidden layer for output as the final results
    # method1: 
    # result = tf.matmul(final_state[1], weights['out']) + bias['out'] # final_state:(major state, side state)
    # method2:
    '''
    tf.transpose([1, 0, 2]) :交换坐标轴， 第0维和第1维交换
    tf.unstack(): 矩阵分解函数，默认axis=0，分解成一个list,降纬
    '''
    
    # outputs: shape=[batch_size, n_step, n_hidden_layers]
    outputs = tf.unstack(tf.transpose(outputs, [1, 0, 2]))
    # output[-1]=final_state, shape=[batch_size, n_hidden_layers]
    results = tf.matmul(outputs[-1], weights['out']) + bias['out']

    return results

pred = RNN(x, weights, bias)
# logits, logits=log(p/1-p)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))  

train_step = tf.train.AdamOptimizer(learn_rate).minimize(loss)

accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)), dtype=tf.float32))

init = tf.global_variables_initializer()

with tf.Session() as sess:
    
    sess.run(init)
    times = 0 
    while times*batch_size < training_iters:
 
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        test_x, test_y = mnist.test.next_batch(batch_size)
        batch_x = batch_x.reshape([batch_size, n_steps, n_inputs])
        test_x = test_x.reshape([batch_size, n_steps, n_inputs])
        sess.run(train_step, feed_dict={x: batch_x, y: batch_y})
        if times % 20 == 0:
            print(sess.run(accuracy, feed_dict={x: test_x, y: test_y}))
        times+=1

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
0.0859375
0.5703125
0.78125
0.796875
0.8359375
0.8203125
0.84375
0.8515625
0.84375
0.9375
0.9375
0.890625
0.9453125
0.890625
0.9609375
0.9296875
0.9375
0.953125
0.8984375
0.9296875
0.9375
0.9609375
0.90625
0.9921875
0.953125
0.96875
0.96875
0.96875
0.96875
0.9765625
0.9453125
0.96875
0.953125
0.9375
0.9453125
0.984375
0.984375
0.9921875
0.9375
0.9609375
