# RNN循环神经网络

In [None]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
# set random seed for comparing the two result calculations
tf.set_random_seed(1)

In [3]:
# this is data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data\train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [4]:
# hyperparameters 超参数
lr = 0.001              # learning rate
training_iters = 100000 # train step 上限 循环次数
batch_size = 128        # 

n_inputs = 28           # MNIST data input (image shape:28x28) 指图片一行28像素，RNN每次提取一行
n_steps = 28            # time steps 图片有28行 ， 所以rnn提取28次
n_hidden_units = 128    # neurons in hidden layer
n_classes = 10          # MNIST classes (0-9 digits)

In [5]:
# tf Graph input 
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])

In [None]:
# Define weights
weights = {
    # (28, 128)
    "in": tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    # (128, 10)
    "out": tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}

biases = {
    # (128, )
    "in": tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    # (10, )
    "out": tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}

In [None]:
def RNN(X, weights, biases):
    # hidden layer for input to cell 
    ####################################
    # transpose the inputs shape from X(128 batch, 28 steps, 28 inputs)
    # X==> (128 batch * 28 steps, 28 inputs)
    # 将batch的输入经过一个送入hidden layer
    # 将原始数据batch输入转换[batch*step, input],每个时刻的input大小为batch * step
    X = tf.reshape(X, [-1, n_inputs])
    
    # into hidden
    # X_in = (128 batch * 28 steps, 128 hidden)
    # 将输入经过一个单层网络， 变成一个[batch*step, hidden], 就是通过变换把input变成hidden
    X_in = tf.matmul(X, weights["in"]) + biases["in"]
    # X_in ==> (128 batch, 28 steps, 128 hidden)
    # 再展开成[batch, step, hidden]
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
    
    # cell 
    ############################################
    # basic LSTM Cell
    # 定义了一个cell， 一个矩形框是一个cell。一个cell输入可以是多维的
    cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
    # lstm cell is divided into two parts (c_state, h_state)
    # 定义batchs size， 将他的batch用于网络的建立
    init_state = cell.zero_state(batch_size, dtype=tf.float32)
    
    # You have 2 options for following step.
    # 1: tf.nn.rnn(cell, inputs);
    # 2: tf.nn.dynamic_rnn(cell, inputs).
    # If use option 1, you have to modified the shape of X_in, go and check out this:
    # https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py
    # In here, we go for option 2.
    # dynamic_rnn receive Tensor (batch, steps, inputs) or (steps, batch, inputs) as X_in.
    # Make sure the time_major is changed accordingly.
    outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state, time_major=False)
    
    # hidden layer for output as the final results
    #############################################
    # 用 final_state,final_state = [c_state, h_state], 用final_state[1] 就是最后一个时刻的输出值了
    # results = tf.matmul(final_state[1], weights['out']) + biases['out']
    
    ## or
    # unpack to list [(batch, outputs)..] * steps
    # outputs 记录各个隐层的输出值
    # 将输出outputs输出[batch, step, hidden],变成[step, batch, hidden]
    # 再unstack就是使用outputs[-1] = last_step的[batch, hidden]，可以直接下标访问
    # [1,0,2]是告诉tf要如何翻转现有的三维张量，假设原有的张量是[0,1,2]的维度顺序，使用tf.transpose,会将[0,1,2]的0和1维的数据互换维度
    outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))
    # outputs[-1] = last_step的[batch, hidden] ---> [batch,class]
    results = tf.matmul(outputs[-1], weights['out']) + biases['out']    # shape = (128, 10)
    return results

In [None]:
# 调用RNN函数，得到pred为[batch, class]大小的矩阵
pred = RNN(x, weights, biases)
# 最后的损失函数，里面内容丰富，pred经过一个sofxmax层，再计算实际值和输出值之间的交叉熵，再对batch取平均
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
# 用adam算法优化
train_op = tf.train.AdamOptimizer(lr).minimize(cost)
# 沿着行取最大， 得到预测结果，判断两行是否相等，平均得到准确率，直接把预测graph定义到原始graph下，不用额外定义变量
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [None]:
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    step = 0
    while step * batch_size < training_iters:
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
        sess.run([train_op], feed_dict={x: batch_xs,y: batch_ys,})
        if step % 20 == 0:
            print(sess.run(accuracy, feed_dict={x: batch_xs,y: batch_ys,}))
        step += 1

