In [2]:
import tensorflow as tf 
import tensorflow.keras as keras

In [3]:
cell = keras.layers.SimpleRNNCell(3)  # 这里的3代表的这个单元的输出的在维度上是3(前向传递3次)
cell.build(input_shape = (None,4)) # 4是输入的的feature宽度


# RNN在时间序列逐渐增加的情况下, 会出现梯度爆炸或者梯度弥散

In [4]:
cell.trainable_variables
# kernel:0 是Wxh, 是与输入进行计算的矩阵
# recurrent_kernel:0 是Whh  是与原状态进行计算的矩阵

[<tf.Variable 'kernel:0' shape=(4, 3) dtype=float32, numpy=
 array([[-0.89848304, -0.8368476 , -0.1992647 ],
        [-0.09867746, -0.11240393, -0.26739293],
        [-0.80526453,  0.72694147, -0.46086255],
        [ 0.21163678, -0.3832984 ,  0.41287196]], dtype=float32)>,
 <tf.Variable 'recurrent_kernel:0' shape=(3, 3) dtype=float32, numpy=
 array([[ 0.17403531,  0.98466516, -0.01210808],
        [ 0.5858792 , -0.11341876, -0.80242246],
        [-0.7914906 ,  0.13255578, -0.5966336 ]], dtype=float32)>,
 <tf.Variable 'bias:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>]

In [5]:
# 模拟一个简单的RNN
simp_cell = keras.layers.SimpleRNNCell(64)
simp_cell.build(input_shape = (None,100))
input_data = tf.random.normal([8,80,100])
xt0 = input_data[:,0,:]

out,xt1 = simp_cell(xt0,[tf.zeros([8,64])])

In [6]:
out.shape,xt1[0].shape

(TensorShape([8, 64]), TensorShape([8, 64]))

In [7]:
simp_cell.trainable_variables

[<tf.Variable 'kernel:0' shape=(100, 64) dtype=float32, numpy=
 array([[-0.10118189,  0.16297732, -0.13973466, ...,  0.18670829,
          0.01483352, -0.09996396],
        [-0.04824269, -0.11835521, -0.0110748 , ...,  0.01231547,
         -0.0790868 , -0.13209271],
        [ 0.10127021,  0.00553292,  0.14211215, ..., -0.01101975,
          0.14316942,  0.13125478],
        ...,
        [ 0.02747473, -0.02128161,  0.15769969, ...,  0.16052411,
         -0.05295689, -0.18953992],
        [-0.05982266,  0.1769282 ,  0.16905452, ...,  0.02833901,
         -0.189273  , -0.10281652],
        [-0.13563968,  0.10484405, -0.10137802, ...,  0.16961826,
         -0.06462876,  0.01164556]], dtype=float32)>,
 <tf.Variable 'recurrent_kernel:0' shape=(64, 64) dtype=float32, numpy=
 array([[-0.20373714, -0.06992456,  0.05466264, ...,  0.1458063 ,
         -0.17252831, -0.13206734],
        [ 0.13179272, -0.1348455 ,  0.19324532, ...,  0.01583637,
          0.08473799,  0.16714104],
        [ 0.001593

In [8]:
# 多层RNN的实现
simp_rnn_cell0 = keras.layers.SimpleRNNCell(64)
simp_rnn_cell1 = keras.layers.SimpleRNNCell(64)

input_data = tf.random.normal([8,80,100])
xt0 = input_data[:,0,:]

state0 = [tf.zeros([8,64])]
state1 = [tf.zeros([8,64])]

out0,state0 = simp_rnn_cell0(xt0,state0)
out1,state1 = simp_rnn_cell1(out0,state1)

In [9]:
# 对批量的句子进行迭代
for words in tf.unstack(input_data,axis=1): # 在1维度上进行迭代
    out0,state0 = simp_rnn_cell0(words,state0)
    out1,state1 = simp_rnn_cell1(out0,state1)


In [10]:
units = 64 # 这个值代表的是RNN网络cell的输出维度
# 一种比较简单的RNN网购狗仔方法:使用Sequential对象进行管理
rnn = keras.Sequential([
    keras.layers.SimpleRNN(units,dropout = 0.5, return_sequences = True, unroll = True), 
    # return_sequences 表示每次时间节点的计算都需要范湖一个值给下一层网络当做输入使用, 如果不设置这个值, 那么网络层只返回最后一个时间节点计算得到的值值
    keras.layers.SimpleRNN(units,dropout = 0.5, unroll = True)
])
# SimpleRNN相对于SimpleRNNCell的区别就是, SimpleRNN不需要我们对输入项进行时间维度的抽取切片, 系统会自动完成
input_data = tf.random.normal([8,80,100])
out = rnn(input_data)
# out是最后一层的最优一个时间节点的计算输出

In [11]:
out.shape

TensorShape([8, 64])