In [1]:
import tensorflow as tf
import numpy as np
tf.reset_default_graph()

In [None]:
np.random.seed(0)

#创建输入数据  正态分布 2：表示一次的批次数量 4：表示时间序列总数  5：表示具体的数据
X = np.random.randn(2,4,5)

#第二个样本长度为3
X[1,1:] = 0
#每一个输入序列的长度
seq_lengths = [4,1]
print('X:\n',X)

#分别建立一个LSTM与GRU的cell，比较输出的状态  3是隐藏层节点的个数
cell = tf.contrib.rnn.BasicLSTMCell(num_units = 3,state_is_tuple = True)
gru = tf.contrib.rnn.GRUCell(3)

#如果没有initial_state，必须指定a dtype
outputs,last_states = tf.nn.dynamic_rnn(cell,X,seq_lengths,dtype =tf.float64 )
gruoutputs,grulast_states = tf.nn.dynamic_rnn(gru,X,seq_lengths,dtype =tf.float64 )

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

result,sta,gruout,grusta = sess.run([outputs,last_states,gruoutputs,grulast_states])

print('全序列:\n',result[0])
print('短序列:\n',result[1])

#由于在BasicLSTMCell设置了state_is_tuple是True，所以lstm的值为 (状态ct,输出h）
print('LSTM的状态:',len(sta),'\n',sta[1])  

print('GRU的全序列：\n',gruout[0])
print('GRU的短序列：\n',gruout[1])
#GRU没有状态输出，其状态就是最终输出，因为批次是两个，所以输出为2
print('GRU的状态:',len(grusta),'\n',grusta[1]) 




In [2]:
def single_layer_static_lstm(input_x, n_steps, n_hidden):
    
    '''
    返回靜態單層 LSTM 單元的輸出，以及 cell 狀態
    args:
        input_x: 輸入張量 形狀為 [batch_size, n_steps, n_input]
        n_steps: 時序總數
        n_hidden：LSTM 單元輸出的節點個數 即隱藏層節點數
    '''
    
    #把輸入 input_x 按列拆分，並返回一個有 n_steps 個張量組成的 list 如 batch_size x 28 x 28 的輸入拆成
    #[(batch_size,28),((batch_size,28))....]
    #如果是調用的是靜態 rnn 函數，需要這一步處理 即相當於把序列作為第一維度
    input_x1 = tf.unstack(input_x, num=n_steps, axis=1)
    
    #可以看做隱藏層
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden, forget_bias=1.0)
    
    #靜態 rnn 函數傳入的是一個張量 list 每一個元素都是一個 (batch_size, n_input)大小的張量
    hiddens,states = tf.contrib.rnn.static_rnn(cell=lstm_cell, inputs=input_x1, dtype=tf.float32)

    return hiddens,states

In [3]:
def single_layer_static_lstm(input_x, n_steps, n_hidden):
    input_x1 = tf.unstack(input_x, num=n_steps, axis=1)
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden, forget_bias=1.0)
    hiddens,states = tf.contrib.rnn.static_rnn(cell=lstm_cell, inputs=input_x1, dtype=tf.float32)
    return hiddens,states

In [4]:
def single_layer_static_gru(input_x,n_steps,n_hidden):
    '''
    返回靜態單層 GRU 單元的輸出，以及 cell 狀態
    
    args:
        input_x: 輸入張量 形狀為 [batch_size, n_steps, n_input]
        n_steps: 時序總數
        n_hidden： gru 單元輸出的節點個數 即隱藏層節點數
    '''
    #把輸入 input_x 按列拆分，並返回一個有 n_steps 個張量組成的 list 如 batch_size x 28 x 28 的輸入拆成
    #[(batch_size,28),((batch_size,28))....]
    #如果是調用的是靜態rnn函數，需要這一步處理 即相當於把序列作為第一維度
    input_x1 = tf.unstack(input_x,num=n_steps,axis=1)

    #可以看做隱藏層
    gru_cell = tf.contrib.rnn.GRUCell(num_units=n_hidden)
    
    #靜態 rnn 函數傳入的是一個張量 list 每一個元素都是一個(batch_size, n_input)大小的張量
    hiddens,states = tf.contrib.rnn.static_rnn(cell=gru_cell,inputs=input_x1,dtype=tf.float32)
        
    return hiddens,states

In [5]:
def single_layer_static_gru(input_x, n_steps, n_hidden):
    input_x1 = tf.unstack(input_x, num=n_steps, axis=1)
    gru_cell = tf.contrib.rnn.GRUCell(num_units=n_hidden)
    hiddens,states = tf.contrib.rnn.static_rnn(cell=gru_cell, inputs=input_x1, dtype=tf.float32)
    return hiddens,states

In [6]:
def single_layer_dynamic_lstm(input_x,n_steps,n_hidden):
    '''
    返回動態單層 LSTM 單元的輸出，以及 cell 狀態
    
    args:
        input_x: 輸入張量 形狀為 [batch_size, n_steps, n_input]
        n_steps: 時序總數
        n_hidden： LSTM 單元輸出的節點個數 即隱藏層節點數
    '''
    # 可以看做隱藏層
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias=1.0)
    
    # 動態 rnn 函數傳入的是一個三維張量，[batch_size, n_steps, n_input]，輸出也是這種形狀
    hiddens, states = tf.nn.dynamic_rnn(cell=lstm_cell,inputs=input_x,dtype=tf.float32)

    # 注意這裡輸出需要轉置 轉換為時序優先的
    hiddens = tf.transpose(hiddens,[1,0,2])
    
    return hiddens,states

In [7]:
def single_layer_dynamic_lstm(input_x,n_steps,n_hidden):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias=1.0)
    hiddens, states = tf.nn.dynamic_rnn(cell=lstm_cell,inputs=input_x,dtype=tf.float32)
    hiddens = tf.transpose(hiddens,[1,0,2])
    return hiddens,states

In [8]:
def single_layer_dynamic_gru(input_x,n_steps,n_hidden):
    '''
    返回動態單層 GRU 單元的輸出，以及 cell 狀態
    
    args:
        input_x: 輸入張量 形狀為 [batch_size, n_steps, n_input]
        n_steps: 時序總數
        n_hidden： gru 單元輸出的節點個數 即隱藏層節點數
    '''
    # 可以看做隱藏層
    gru_cell = tf.contrib.rnn.GRUCell(num_units=n_hidden)

    #動態 rnn 函數傳入的是一個三維張量，[batch_size, n_steps, n_input] 輸出也是這種形狀
    hiddens,states = tf.nn.dynamic_rnn(cell=gru_cell,inputs=input_x,dtype=tf.float32)

    # 注意這裡輸出需要轉置 轉換為時序優先的
    hiddens = tf.transpose(hiddens,[1,0,2])   
    
    return hiddens,states

In [9]:
def single_layer_dynamic_gru(input_x,n_steps,n_hidden):
    gru_cell = tf.contrib.rnn.GRUCell(num_units=n_hidden)
    hiddens,states = tf.nn.dynamic_rnn(cell=gru_cell,inputs=input_x,dtype=tf.float32)
    hiddens = tf.transpose(hiddens,[1,0,2])   
    return hiddens,states

In [10]:
def multi_layer_static_lstm(input_x,n_steps,n_hidden):
    '''
    返回靜態多層LSTM單元的輸出，以及cell狀態
    
    args:
        input_x:輸入張量 形狀為[batch_size,n_steps,n_input]
        n_steps:時序總數
        n_hidden：LSTM單元輸出的節點個數 即隱藏層節點數
    '''
    
    #把輸入 input_x 按列拆分，並返回一個有 n_steps 個張量組成的 list 如 batch_size x 28 x 28 的輸入拆成
    #[(batch_size,28),((batch_size,28))....]
    #如果是調用的是靜態 rnn 函數，需要這一步處理 即相當於把序列作為第一維度
    input_x1 = tf.unstack(input_x,num=n_steps,axis=1)

    #可以看做 3 個隱藏層
    stacked_rnn = []
    for i in range(3):
        stacked_rnn.append(tf.contrib.rnn.LSTMCell(num_units=n_hidden))
        
    #多層RNN的實現 例如cells=[cell1,cell2]，則表示一共有兩層，數據經過cell1後還要經過cells
    mcell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn)
    
     #靜態rnn函數傳入的是一個張量list 每一個元素都是一個(batch_size,n_input)大小的張量 
    hiddens,states = tf.contrib.rnn.static_rnn(cell=mcell,inputs=input_x1,dtype=tf.float32)

    return hiddens,states

In [15]:
def multi_layer_static_lstm(input_x,n_steps,n_hidden):
    input_x1 = tf.unstack(input_x,num=n_steps,axis=1)
    stacked_rnn = []
    for i in range(3):
        stacked_rnn.append(tf.contrib.rnn.LSTMCell(num_units=n_hidden))
    mcell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn)
    hiddens,states = tf.contrib.rnn.static_rnn(cell=mcell,inputs=input_x1,dtype=tf.float32)
    return hiddens,states

In [16]:
def multi_layer_static_gru(input_x,n_steps,n_hidden):
    '''
    返回靜態多層GRU單元的輸出，以及cell狀態
    args:
        input_x:輸入張量 形狀為[batch_size,n_steps,n_input]
        n_steps:時序總數
        n_hidden：gru單元輸出的節點個數 即隱藏層節點數
    '''
    
    #把輸入 input_x 按列拆分，並返回一個有 n_steps 個張量組成的 list 如 batch_size x 28 x 28 的輸入拆成
    #[(batch_size,28),((batch_size,28))....]
    #如果是調用的是靜態rnn函數，需要這一步處理 即相當於把序列作為第一維度 
    input_x1 = tf.unstack(input_x,num=n_steps,axis=1)

    #可以看做 3 個隱藏層
    stacked_rnn = []
    for i in range(3):
        stacked_rnn.append(tf.contrib.rnn.GRUCell(num_units=n_hidden))    
        
    #多層 RNN 的實現 例如 cells=[cell1,cell2]，則表示一共有兩層，數據經過 cell1 後還要經過 cells
    mcell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn)
    
    #靜態rnn函數傳入的是一個張量list 每一個元素都是一個(batch_size,n_input)大小的張量
    hiddens,states = tf.contrib.rnn.static_rnn(cell=mcell,inputs=input_x1,dtype=tf.float32)
        
    return hiddens,states


In [17]:
def multi_layer_static_gru(input_x,n_steps,n_hidden):
    input_x1 = tf.unstack(input_x,num=n_steps,axis=1)
    stacked_rnn = []
    for i in range(3):
        stacked_rnn.append(tf.contrib.rnn.GRUCell(num_units=n_hidden))   
    mcell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn)
    hiddens,states = tf.contrib.rnn.static_rnn(cell=mcell,inputs=input_x1,dtype=tf.float32)
    return hiddens,states

In [18]:
def multi_layer_static_mix(input_x,n_steps,n_hidden):
    '''
    返回靜態多層GRU和LSTM混合單元的輸出，以及cell狀態
    
    args:
        input_x:輸入張量 形狀為[batch_size,n_steps,n_input]
        n_steps:時序總數
        n_hidden：gru單元輸出的節點個數 即隱藏層節點數
    '''

    #把輸入 input_x 按列拆分，並返回一個有 n_steps 個張量組成的 list 如 batch_size x 28 x 28 的輸入拆成
    #[(batch_size,28),((batch_size,28))....]
    #如果是調用的是靜態rnn函數，需要這一步處理 即相當於把序列作為第一維度
    input_x1 = tf.unstack(input_x,num=n_steps,axis=1)
    
    #可以看做2個隱藏層

    gru_cell = tf.contrib.rnn.GRUCell(num_units=n_hidden*2)
    lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_hidden)
    
    #多層RNN的實現 例如cells=[cell1,cell2]，則表示一共有兩層，數據經過cell1後還要經過cells
    mcell = tf.contrib.rnn.MultiRNNCell(cells=[lstm_cell,gru_cell])
    
    #靜態rnn函數傳入的是一個張量list 每一個元素都是一個(batch_size,n_input)大小的張量

    hiddens,states = tf.contrib.rnn.static_rnn(cell=mcell,inputs=input_x1,dtype=tf.float32)
    
    return hiddens,states

In [19]:
def multi_layer_static_mix(input_x,n_steps,n_hidden):
    input_x1 = tf.unstack(input_x,num=n_steps,axis=1)
    gru_cell = tf.contrib.rnn.GRUCell(num_units=n_hidden*2)
    lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_hidden)
    mcell = tf.contrib.rnn.MultiRNNCell(cells=[lstm_cell,gru_cell])
    hiddens,states = tf.contrib.rnn.static_rnn(cell=mcell,inputs=input_x1,dtype=tf.float32)
    return hiddens,states

In [20]:
def multi_layer_dynamic_lstm(input_x,n_steps,n_hidden):
    '''
    返回動態多層LSTM單元的輸出，以及cell狀態
    
    args:
        input_x:輸入張量 形狀為[batch_size,n_steps,n_input]
        n_steps:時序總數
        n_hidden：LSTM單元輸出的節點個數 即隱藏層節點數
    '''
    #可以看做3個隱藏層
    stacked_rnn = []
    for i in range(3):
        stacked_rnn.append(tf.contrib.rnn.LSTMCell(num_units=n_hidden))
        
    #多層RNN的實現 例如cells=[cell1,cell2]，則表示一共有兩層，數據經過cell1後還要經過cells
    mcell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn)
    
    #動態rnn函數傳入的是一個三維張量，[batch_size,n_steps,n_input] 輸出也是這種形狀
    hiddens,states = tf.nn.dynamic_rnn(cell=mcell,inputs=input_x,dtype=tf.float32)
    
    #注意這裡輸出需要轉置 轉換為時序優先的
    hiddens = tf.transpose(hiddens,[1,0,2])    
    return hiddens,states

In [21]:
def multi_layer_dynamic_lstm(input_x,n_steps,n_hidden):
    stacked_rnn = []
    for i in range(3):
        stacked_rnn.append(tf.contrib.rnn.LSTMCell(num_units=n_hidden))
    mcell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn)
    hiddens,states = tf.nn.dynamic_rnn(cell=mcell,inputs=input_x,dtype=tf.float32)
    hiddens = tf.transpose(hiddens,[1,0,2])    
    return hiddens,states

In [22]:
def multi_layer_dynamic_gru(input_x,n_steps,n_hidden):
    '''
    返回動態多層GRU單元的輸出，以及cell狀態
    
    args:
        input_x:輸入張量 形狀為[batch_size,n_steps,n_input]
        n_steps:時序總數
        n_hidden：gru單元輸出的節點個數 即隱藏層節點數
    '''
    #可以看做3個隱藏層
    stacked_rnn = []
    for i in range(3):
        stacked_rnn.append(tf.contrib.rnn.GRUCell(num_units=n_hidden))
        
    #多層RNN的實現 例如cells=[cell1,cell2]，則表示一共有兩層，數據經過cell1後還要經過cells
    mcell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn)
    
    #動態rnn函數傳入的是一個三維張量，[batch_size,n_steps,n_input] 輸出也是這種形狀
    hiddens,states = tf.nn.dynamic_rnn(cell=mcell,inputs=input_x,dtype=tf.float32)
    
    #注意這裡輸出需要轉置 轉換為時序優先的
    hiddens = tf.transpose(hiddens,[1,0,2])    
    return hiddens,states 

In [23]:
def multi_layer_dynamic_gru(input_x,n_steps,n_hidden):
    stacked_rnn = []
    for i in range(3):
        stacked_rnn.append(tf.contrib.rnn.GRUCell(num_units=n_hidden))
    mcell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn)
    hiddens,states = tf.nn.dynamic_rnn(cell=mcell,inputs=input_x,dtype=tf.float32)
    hiddens = tf.transpose(hiddens,[1,0,2])    
    return hiddens,states 

In [24]:
def multi_layer_dynamic_mix(input_x,n_steps,n_hidden):
    '''
    返回動態多層GRU和LSTM混合單元的輸出，以及cell狀態
    args:
        input_x:輸入張量 形狀為[batch_size,n_steps,n_input]
        n_steps:時序總數
        n_hidden：gru單元輸出的節點個數 即隱藏層節點數
    '''
        
    #可以看做2個隱藏層
    gru_cell = tf.contrib.rnn.GRUCell(num_units=n_hidden*2)
    lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_hidden)
    
    #多層RNN的實現 例如cells=[cell1,cell2]，則表示一共有兩層，數據經過cell1後還要經過cells
    mcell = tf.contrib.rnn.MultiRNNCell(cells=[lstm_cell,gru_cell])
    
    #動態rnn函數傳入的是一個三維張量，[batch_size,n_steps,n_input] 輸出也是這種形狀
    hiddens,states = tf.nn.dynamic_rnn(cell=mcell,inputs=input_x,dtype=tf.float32)
    
    #注意這裡輸出需要轉置 轉換為時序優先的
    hiddens = tf.transpose(hiddens,[1,0,2])    
    return hiddens,states


In [25]:
def multi_layer_dynamic_mix(input_x,n_steps,n_hidden):
    gru_cell = tf.contrib.rnn.GRUCell(num_units=n_hidden*2)
    lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_hidden)
    mcell = tf.contrib.rnn.MultiRNNCell(cells=[lstm_cell,gru_cell])
    hiddens,states = tf.nn.dynamic_rnn(cell=mcell,inputs=input_x,dtype=tf.float32)
    hiddens = tf.transpose(hiddens,[1,0,2])    
    return hiddens,states

In [26]:
def single_layer_static_bi_lstm(input_x,n_steps,n_hidden):
    '''
    返回單層靜態雙向LSTM單元的輸出，以及cell狀態
    args:
        input_x:輸入張量 形狀為[batch_size,n_steps,n_input]
        n_steps:時序總數
        n_hidden：LSTM單元輸出的節點個數 即隱藏層節點數
    '''
    
    #把輸入 input_x 按列拆分，並返回一個有 n_steps 個張量組成的 list 如 batch_size x 28 x 28 的輸入拆成
    #[(batch_size,28),((batch_size,28))....]
    #如果是調用的是靜態rnn函數，需要這一步處理 即相當於把序列作為第一維度 
    input_x1 = tf.unstack(input_x,num=n_steps,axis=1)



    #正向
    lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0)
    #反向
    lstm_bw_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0)


    ##靜態rnn函數傳入的是一個張量list 每一個元素都是一個(batch_size,n_input)大小的張量 這裡的輸出hiddens是一個list 每一個元素都是前向輸出,後向輸出的合併

    hiddens,fw_state,bw_state = tf.contrib.rnn.static_bidirectional_rnn(cell_fw=lstm_fw_cell,cell_bw=lstm_bw_cell,inputs=input_x1,dtype=tf.float32)
        
    print('hiddens:\n',type(hiddens),len(hiddens),hiddens[0].shape,hiddens[1].shape)    #<class 'list'> 28 (?, 256) (?, 256)
    
    return hiddens,fw_state,bw_state

In [27]:
def single_layer_static_bi_lstm(input_x,n_steps,n_hidden):
    input_x1 = tf.unstack(input_x,num=n_steps,axis=1)
    lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0)
    lstm_bw_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0)
    hiddens,fw_state,bw_state = tf.contrib.rnn.static_bidirectional_rnn(cell_fw=lstm_fw_cell,cell_bw=lstm_bw_cell,
                                                                        inputs=input_x1,dtype=tf.float32)
    print('hiddens:\n',type(hiddens),len(hiddens),hiddens[0].shape,hiddens[1].shape)
    return hiddens,fw_state,bw_state

In [28]:
def single_layer_dynamic_bi_lstm(input_x,n_steps,n_hidden):
    '''
    返回單層動態雙向LSTM單元的輸出，以及cell狀態
    
    args:
        input_x:輸入張量 形狀為[batch_size,n_steps,n_input]
        n_steps:時序總數
        n_hidden：gru單元輸出的節點個數 即隱藏層節點數
    '''
    
    #正向
    lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0)
    #反向
    lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0)

    
    #動態rnn函數傳入的是一個三維張量，[batch_size,n_steps,n_input] 輸出是一個元組 每一個元素也是這種形狀

    hiddens,state = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell,cell_bw=lstm_bw_cell,inputs=input_x,dtype=tf.float32)
    
    print('hiddens:\n',type(hiddens),len(hiddens),hiddens[0].shape,hiddens[1].shape)   #<class 'tuple'> 2 (?, 28, 128) (?, 28, 128)
    #按axis=2合併 (?,28,128) (?,28,128)按最後一維合併(?,28,256)
    hiddens = tf.concat(hiddens,axis=2)
    
    #注意這裡輸出需要轉置 轉換為時序優先的
    hiddens = tf.transpose(hiddens,[1,0,2])    
        
    return hiddens,state

In [29]:
def single_layer_dynamic_bi_lstm(input_x,n_steps,n_hidden):
    lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0)
    lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0)
    hiddens,state = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell,cell_bw=lstm_bw_cell,
                                                    inputs=input_x,dtype=tf.float32)
    print('hiddens:\n',type(hiddens),len(hiddens),hiddens[0].shape,hiddens[1].shape)
    hiddens = tf.concat(hiddens,axis=2)
    hiddens = tf.transpose(hiddens,[1,0,2])    
    return hiddens,state

In [30]:
def multi_layer_static_bi_lstm(input_x,n_steps,n_hidden):
    '''
    返回多層靜態雙向LSTM單元的輸出，以及cell狀態
    args:
        input_x:輸入張量 形狀為[batch_size,n_steps,n_input]
        n_steps:時序總數
        n_hidden：LSTM單元輸出的節點個數 即隱藏層節點數
    '''
    
    #把輸入 input_x 按列拆分，並返回一個有 n_steps 個張量組成的 list 如 batch_size x 28 x 28 的輸入拆成
    #[(batch_size,28),((batch_size,28))....]
    #如果是調用的是靜態rnn函數，需要這一步處理 即相當於把序列作為第一維度
    input_x1 = tf.unstack(input_x,num=n_steps,axis=1)

    stacked_fw_rnn = []
    stacked_bw_rnn = []
    for i in range(3):
        #正向
        stacked_fw_rnn.append(tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0))
        #反向
        stacked_bw_rnn.append(tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0))


    
    hiddens,fw_state,bw_state = tf.contrib.rnn.stack_bidirectional_rnn(stacked_fw_rnn,stacked_bw_rnn,inputs=input_x1,dtype=tf.float32)
        
    print('hiddens:\n',type(hiddens),len(hiddens),hiddens[0].shape,hiddens[1].shape)    #<class 'list'> 28 (?, 256) (?, 256)

    return hiddens,fw_state,bw_state


In [31]:
def multi_layer_static_bi_lstm(input_x,n_steps,n_hidden):
    input_x1 = tf.unstack(input_x,num=n_steps,axis=1)
    stacked_fw_rnn = []
    stacked_bw_rnn = []
    for i in range(3):
        stacked_fw_rnn.append(tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0))
        stacked_bw_rnn.append(tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0))
    hiddens,fw_state,bw_state = tf.contrib.rnn.stack_bidirectional_rnn(stacked_fw_rnn,stacked_bw_rnn,
                                                                       inputs=input_x1,dtype=tf.float32)  
    print('hiddens:\n',type(hiddens),len(hiddens),hiddens[0].shape,hiddens[1].shape)    
    return hiddens,fw_state,bw_state

In [32]:
def multi_layer_dynamic_bi_lstm(input_x,n_steps,n_hidden):
    '''
    返回多層動態雙向LSTM單元的輸出，以及cell狀態
    args:
        input_x:輸入張量 形狀為[batch_size,n_steps,n_input]
        n_steps:時序總數
        n_hidden：gru單元輸出的節點個數 即隱藏層節點數
    '''    
    stacked_fw_rnn = []
    stacked_bw_rnn = []
    for i in range(3):
        #正向
        stacked_fw_rnn.append(tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0))
        #反向
        stacked_bw_rnn.append(tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0))
    tf.contrib.rnn.MultiRNNCell
    
    #動態rnn函數傳入的是一個三維張量，[batch_size,n_steps,n_input] 輸出也是這種形狀，n_input變成了正向和反向合併之後的 即n_input*2
    hiddens,fw_state,bw_state = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(stacked_fw_rnn,stacked_bw_rnn,inputs=input_x,dtype=tf.float32)
    
    print('hiddens:\n',type(hiddens),hiddens.shape)   # <class 'tensorflow.python.framework.ops.Tensor'> (?, 28, 256)
        
    #注意這裡輸出需要轉置 轉換為時序優先的
    hiddens = tf.transpose(hiddens,[1,0,2])    
    
    return hiddens,fw_state,bw_state

In [33]:
def multi_layer_dynamic_bi_lstm(input_x,n_steps,n_hidden):
    stacked_fw_rnn = []
    stacked_bw_rnn = []
    for i in range(3):
        stacked_fw_rnn.append(tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0))
        stacked_bw_rnn.append(tf.contrib.rnn.BasicLSTMCell(num_units=n_hidden,forget_bias = 1.0))
    tf.contrib.rnn.MultiRNNCell
    hiddens,fw_state,bw_state = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(stacked_fw_rnn,stacked_bw_rnn,
                                                                               inputs=input_x,dtype=tf.float32)
    print('hiddens:\n',type(hiddens),hiddens.shape)   
    hiddens = tf.transpose(hiddens,[1,0,2])    
    return hiddens,fw_state,bw_state

In [34]:
def  mnist_rnn_classfication(flag):
        
    '''
    1. 導入數據集
    '''
    tf.reset_default_graph()
    from tensorflow.examples.tutorials.mnist import input_data
    
    # mnist 是一個輕量級的類，它以 numpy 數組的形式存儲著訓練，校驗，測試數據集 one_hot 表示輸出二值化後的 10 維
    mnist = input_data.read_data_sets('MNIST-data',one_hot=True)
    
    print(type(mnist)) #<class 'tensorflow.contrib.learn.python.learn.datasets.base.Datasets'>

    print('Training data shape:',mnist.train.images.shape)           #Training data shape: (55000, 784)
    print('Test data shape:',mnist.test.images.shape)                #Test data shape: (10000, 784)
    print('Validation data shape:',mnist.validation.images.shape)    #Validation data shape: (5000, 784)
    print('Training label shape:',mnist.train.labels.shape)          #Training label shape: (55000, 10)
    
    '''
    2 定義參數與網路結構
    '''
    n_input = 28             # LSTM 單元輸入節點的個數
    n_steps = 28             #序列長度
    n_hidden = 128           # LSTM 單元輸出節點個數 (即隱藏層個數)
    n_classes = 10           #列別
    batch_size = 128         #批次大小
    training_step = 5000     #迭代次數
    display_step  = 200      #幾次顯示一次
    learning_rate = 1e-4     #學習率
    #定義佔位符
    # batch_size：表示一次的批次樣本數量 batch_size 
    # n_steps：表示時間序列總數 
    # n_input： 表示一個時序具體的數據長度 即一共 28 個時序，一個時序送入28個數據進入 LSTM 網絡
    input_x = tf.placeholder(dtype=tf.float32,shape=[None,n_steps,n_input])
    input_y = tf.placeholder(dtype=tf.float32,shape=[None,n_classes])

    if  flag == 1:
        print('單層靜態 LSTM 網路：')
        hiddens,states = single_layer_static_lstm(input_x,n_steps,n_hidden)
    elif flag == 2:
        print('單層靜態 gru 網路：')
        hiddens,states = single_layer_static_gru(input_x,n_steps,n_hidden)
    elif  flag == 3:
        print('單層動態 LSTM 網路：')
        hiddens,states = single_layer_dynamic_lstm(input_x,n_steps,n_hidden)
    elif flag == 4:
        print('單層動態 gru 網路：')
        hiddens,states = single_layer_dynamic_gru(input_x,n_steps,n_hidden) 
    elif flag == 5:
        print('多層靜態 LSTM 網路：')
        hiddens,states = multi_layer_static_lstm(input_x,n_steps,n_hidden)
    elif flag == 6:
        print('多層靜態 gru 網絡：')
        hiddens,states = multi_layer_static_gru(input_x,n_steps,n_hidden)
    elif flag == 7:
        print('多層靜態 LSTM 和 gru 混合網絡：')
        hiddens,states = multi_layer_static_mix(input_x,n_steps,n_hidden)
    elif flag == 8:
        print('多層動態 LSTM 網絡：')
        hiddens,states = multi_layer_dynamic_lstm(input_x,n_steps,n_hidden)
    elif flag == 9:
        print('多層動態 gru 網絡：')
        hiddens,states = multi_layer_dynamic_gru(input_x,n_steps,n_hidden)
    elif flag == 10:
        print('多層動態 LSTM 和 gru 混合網絡：')
        hiddens,states = multi_layer_dynamic_mix(input_x,n_steps,n_hidden)
    elif flag == 11:
        print('單層靜態雙向 LSTM 網絡：')
        hiddens,fw_state,bw_state = single_layer_static_bi_lstm(input_x,n_steps,n_hidden)
    elif flag == 12:
        print('單層動態雙向 LSTM 網絡：')
        hiddens,bw_state = single_layer_dynamic_bi_lstm(input_x,n_steps,n_hidden)
    elif flag == 13:
        print('多層靜態雙向 LSTM 網絡：')
        hiddens,fw_state,bw_state = multi_layer_static_bi_lstm(input_x,n_steps,n_hidden)
    elif flag == 14:
        print('多層動態雙向 LSTM 網絡：')
        hiddens,fw_state,bw_state = multi_layer_dynamic_bi_lstm(input_x,n_steps,n_hidden)
    
    print('hidden:',hiddens[-1].shape)      #(128,128)
    
    #取 LSTM 最後一個時序的輸出，然後經過全連接網絡得到輸出值
    output = tf.contrib.layers.fully_connected(inputs=hiddens[-1],num_outputs=n_classes,activation_fn = tf.nn.softmax)
    '''
    3 設置對數似然損失函數
    '''
    #代價函數 J =-(Σy.logaL)/n    .表示逐元素乘
    cost = tf.reduce_mean(-tf.reduce_sum(input_y*tf.log(output),axis=1))
    '''
    4 求解
    '''
    train = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    #預測結果評估
    #tf.argmax(output,1)  按行統計最大值得索引
    correct = tf.equal(tf.argmax(output,1),tf.argmax(input_y,1))       #返回一個數組 表示統計預測正確或者錯誤 
    accuracy = tf.reduce_mean(tf.cast(correct,tf.float32))             #求準確率
    
    #創建 list 保存每一迭代的結果
    test_accuracy_list = []
    test_cost_list=[]
    
    with tf.Session() as sess:
        #使用會話執行圖
        sess.run(tf.global_variables_initializer())   #初始化變量    
        
        #開始迭代 使用Adam優化的隨機梯度下降法
        for i in range(training_step): 
            x_batch,y_batch = mnist.train.next_batch(batch_size = batch_size)   
           #Reshape data to get 28 seq of 28 elements
            x_batch = x_batch.reshape([-1,n_steps,n_input])
            
            #開始訓練
            train.run(feed_dict={input_x:x_batch,input_y:y_batch})   
            if (i+1) % display_step == 0:
                 #輸出訓練習準確率
                training_accuracy,training_cost = sess.run([accuracy,cost],feed_dict={input_x:x_batch,input_y:y_batch})   
                print('Step {0}:Training set accuracy {1},cost {2}.'.format(i+1,training_accuracy,training_cost))
        
        #全部訓練完成做測試 分成200次，一次測試50個樣本
        #輸出測試機準確率 如果一次性全部做測試，內容不夠用會出現OOM錯誤。所以測試時選取比較小的mini_batch來測試
        for i in range(200):        
            x_batch,y_batch = mnist.test.next_batch(batch_size = 50)      
            #Reshape data to get 28 seq of 28 elements
            x_batch = x_batch.reshape([-1,n_steps,n_input])
            test_accuracy,test_cost = sess.run([accuracy,cost],feed_dict={input_x:x_batch,input_y:y_batch})
            test_accuracy_list.append(test_accuracy)
            test_cost_list.append(test_cost) 
            if (i+1)% 20 == 0:
                 print('Step {0}:Test set accuracy {1},cost {2}.'.format(i+1,test_accuracy,test_cost)) 
        print('Test accuracy:',np.mean(test_accuracy_list))

In [35]:
mnist_rnn_classfication(1)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST-data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
<class 'tensorflow.contrib.learn.python.learn.datasets.base.Datasets'>
Training data shape: (55000, 784)
Test data shape: (10000, 784)
Validation data shape: (5000, 784)
Training label shape: (55000, 10)
單層靜態 LSTM 網路：
hidden: (?, 128)
Step 200:Training set accuracy 0.6953125,cost 1.231407642364502.
S