In [1]:
import tensorflow as tf
from tensorflow import keras

### Embedding

In [2]:
x = tf.range(10) # 生成 10 个单词的数字编码
x = tf.random.shuffle(x) # 打散
# 创建共 10 个单词,每个单词用长度为 4 的向量表示的层
net = keras.layers.Embedding(10, 4)
out = net(x) # 获取词向量
out

<tf.Tensor: id=19, shape=(10, 4), dtype=float32, numpy=
array([[ 0.04661176, -0.01172519, -0.01176087,  0.03441907],
       [-0.0439463 ,  0.04628224, -0.03346934,  0.02707321],
       [ 0.01686611,  0.01982513, -0.0047652 , -0.02863593],
       [-0.0164413 ,  0.0465979 , -0.03928905, -0.04807564],
       [-0.0037771 , -0.00973933,  0.04973311,  0.00934125],
       [ 0.03580567,  0.04993408,  0.0094985 ,  0.04075507],
       [ 0.04588017, -0.00286304,  0.00501217, -0.02724497],
       [ 0.0308351 ,  0.02274876, -0.02220624,  0.01495382],
       [ 0.01969836,  0.02583   , -0.00447027, -0.0243047 ],
       [-0.04000853, -0.00137558, -0.01037323, -0.0041633 ]],
      dtype=float32)>

In [3]:
# 查看 Embedding 层内部的 查询表 table
net.embeddings

<tf.Variable 'embedding/embeddings:0' shape=(10, 4) dtype=float32, numpy=
array([[-0.0164413 ,  0.0465979 , -0.03928905, -0.04807564],
       [ 0.01969836,  0.02583   , -0.00447027, -0.0243047 ],
       [-0.04000853, -0.00137558, -0.01037323, -0.0041633 ],
       [ 0.04661176, -0.01172519, -0.01176087,  0.03441907],
       [ 0.04588017, -0.00286304,  0.00501217, -0.02724497],
       [-0.0439463 ,  0.04628224, -0.03346934,  0.02707321],
       [ 0.03580567,  0.04993408,  0.0094985 ,  0.04075507],
       [-0.0037771 , -0.00973933,  0.04973311,  0.00934125],
       [ 0.01686611,  0.01982513, -0.0047652 , -0.02863593],
       [ 0.0308351 ,  0.02274876, -0.02220624,  0.01495382]],
      dtype=float32)>

### SimpleRNNCell
- 通过 SimpleRNNCell 层的使用,我们可以非常深入地理解循环神经网络前向运算的每个细节

In [4]:
cell = keras.layers.SimpleRNNCell(3)
cell.build(input_shape=(None, 4))

In [5]:
# 可以看到， SimpleRNNCell 内部维护了 3 个变量
# kernel: W(xh)
# recurrent_kernel: W(hh)
# bias: 偏置 b
cell.trainable_variables

[<tf.Variable 'kernel:0' shape=(4, 3) dtype=float32, numpy=
 array([[ 0.2711134 ,  0.64594245, -0.79462874],
        [-0.08172673, -0.89395094, -0.46817762],
        [-0.5066167 ,  0.4430678 , -0.24764663],
        [-0.49155673,  0.3404919 , -0.4044373 ]], dtype=float32)>,
 <tf.Variable 'recurrent_kernel:0' shape=(3, 3) dtype=float32, numpy=
 array([[ 0.54365706,  0.8365746 , -0.06767614],
        [-0.66276455,  0.3784318 , -0.64616764],
        [-0.51495665,  0.39614686,  0.76018906]], dtype=float32)>,
 <tf.Variable 'bias:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>]

In [6]:
# 示例 2
# 初始化状态向量，用列表包裹，统一格式
h0 = [tf.zeros([4, 64])]
# [b, s, n] 输入 4 个长度为 80 的句子，每个单词向量长度为 100
x = tf.random.normal([4, 80, 100])
# 构建 cell
cell = keras.layers.SimpleRNNCell(64)
# 所有句子的自带一个单词
xt = x[:, 0, :]
# 前向计算
out, h1 = cell(xt, h0)
print(out.shape, h1[0].shape)

(4, 64) (4, 64)


**可以看到经过一个时间戳的计算后,输出和状态张量的 shape 都为 [b, h]**

In [7]:
# 由于 Memory 向量 h 并不是由 SimpleRNNCell 维护，用户需要自行初始化 h0 并记录每个时间戳上的 ht
h = h0
# 在序列长度的维度解开输入,得到 xt:[b,n]
for xt in tf.unstack(x, axis=1):
    out, h = cell(xt, h) # 前向计算，out 和 h 均被覆盖
# 最后一个时间戳的输出变量 out 将作为网络的最终输出
out = out

### 多层 SimpleRNNCell 网络

In [8]:
x = tf.random.normal([4, 80, 100])
xt = x[:, 0, :]
cell0 = keras.layers.SimpleRNNCell(64)
cell1 = keras.layers.SimpleRNNCell(64)
h0 = [tf.zeros([4, 64])]
h1 = [tf.zeros([4, 64])]
# 在时间轴上面循环计算多次来实现整个网络的前向运算,每个时间戳上的输入 xt 首先通过
# 第一层,得到输出 out0,再通过第二层,得到输出 out1
for xt in tf.unstack(x, axis=1):
    # xt 作为输入， 输出 out0
    out0, h0 = cell0(xt, h0)
    # 上一个 cell 的输出 out0 作为本 cell 的输入
    out1, h1 = cell1(out0, h1)
print(out1.shape)

(4, 64)


### SimpleRNN
- 实际使用中,为了简便,不希望手动参与循环神经网络内部的计算过程
- 通过 SimpleRNN层高层接口可以非常方便地帮助我们实现此目的。

In [9]:
# 单层循环神经网络的运算
layer = keras.layers.SimpleRNN(64) # 创建状态向量长度为 64 的 SimpleRNN 层
x = tf.random.normal([4, 80, 100])
out = layer(x)
print(out.shape)

(4, 64)


In [10]:
# 如果希望返回所有时间戳上的输出列表,可以设置 return_sequences=True 参数
layer = keras.layers.SimpleRNN(64, return_sequences=True)
out = layer(x)
print(out.shape) 
# 中间维度的 80 即为时间戳维度。

(4, 80, 64)


In [11]:
# 多层 SimpleRNN 
# 每层都需要上一层在每个时间戳上面的状态输出,因此除了最末层以外,所有的 RNN 层
# 都需要返回每个时间戳上面的状态输出,通过设置 return_sequences=True 来实现。
net = keras.Sequential([
    keras.layers.SimpleRNN(64, return_sequences=True),
    keras.layers.SimpleRNN(64)
])
out = net(x)
out.shape

TensorShape([4, 64])

### RNN 情感分类问题实战

#### 1、加载数据

In [12]:
batchsz = 128 # 批量大小
total_words = 10000 # 词汇表大小 N_vocab
max_review_len = 80 # 句子最大长度 s,大于的句子部分将截断,小于的将填充
embedding_len = 100 # 词向量特征长度 n
# 加载 IMDB 数据集,此处的数据采用数字编码,一个数字代表一个单词
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# 打印输入的形状,标签的形状
print(x_train.shape, len(x_train[0]), y_train.shape)
print(x_test.shape, len(x_test[0]), y_test.shape)

(25000,) 218 (25000,)
(25000,) 68 (25000,)


In [20]:
# x_train:[b, 80]
# x_test: [b, 80]
# 截断和填充句子，使得等长，此处长句子保留句子后面的部分，短句子在前面填充
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)

In [21]:
# 构建数据集，打散，批量，并丢掉最后一个不够batchsz的batch
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)

x_train shape: (25000, 80) tf.Tensor(1, shape=(), dtype=int64) tf.Tensor(0, shape=(), dtype=int64)
x_test shape: (25000, 80)


#### 2、网络模型

In [59]:
# 其中词向量编码为长度n = 100,RNN 的状态向量长度h = units参数
# 分类网络完成 2 分类任务,故输出节点设置为 1。
class MyRNN(keras.Model):
    # Cell方式构建多层网络
    def __init__(self, units):
        super(MyRNN, self).__init__() 
        # 词向量编码 [b, 80] => [b, 80, 100]
        self.embedding = layers.Embedding(total_words, embedding_len,
                                          input_length=max_review_len)
        # 构建RNN
        self.rnn = keras.Sequential([
            layers.SimpleRNN(units, dropout=0.5, return_sequences=True),
            layers.SimpleRNN(units, dropout=0.5)
        ])
        # 构建分类网络，用于将CELL的输出特征进行分类，2分类
        # [b, 80, 100] => [b, 64] => [b, 1]
        self.outlayer = Sequential([
        	layers.Dense(32),
        	layers.Dropout(rate=0.5),
        	layers.ReLU(),
        	layers.Dense(1)])

    def call(self, inputs, training=None):
        x = inputs # [b, 80]
        # embedding: [b, 80] => [b, 80, 100]
        x = self.embedding(x)
        # rnn cell compute,[b, 80, 100] => [b, 64]
        x = self.rnn(x)
        # 末层最后一个输出作为分类网络的输入: [b, 64] => [b, 1]
        x = self.outlayer(x,training)
        # p(y is pos|x)
        prob = tf.sigmoid(x)

        return prob

#### 3、训练与测试

In [60]:
units = 64 # RNN 状态向量长度 n
epochs = 10 # 训练 epochs
model = MyRNN(units) # 创建模型
# 装配
model.compile(optimizer = keras.optimizers.Adam(0.001),
loss = keras.losses.BinaryCrossentropy(),
metrics=['accuracy'])
# 训练和验证
model.fit(db_train, epochs=epochs, validation_data=db_test)
# 测试
model.evaluate(db_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.5211831464217259, 0.81774837]

## 2、LSTM
### LSTMCell

In [44]:
# LSTM 的状态变量 List 有两个,即[ t , c t ]
# 需要分别初始化,其中 List 第一个元素为 t ,第二个元素为c t 。
x = tf.random.normal([2, 80, 100])
xt = x[:, 0, :] # 得到第一个时间戳的输入
cell = keras.layers.LSTMCell(64)
# 初始化 List [h, c]
state = [tf.zeros([2, 64]), tf.zeros([2, 64])]
# 前向计算
out, state = cell(xt, state)
id(out), id(state[0]), id(state[1])
# 可以看到,返回的输出 out 和 List 的第一个元素 t 的 id 是相同的
# 这与基础的 RNN 初衷一致,都是为了格式的统一。

(139754918809272, 139754918809272, 139753464692576)

In [46]:
# 在序列长度维度上解开,循环送入 LSTM Cell 单元

for xt in tf.unstack(x, axis=1):
    # 前向计算
    out, state = cell(xt, state)

### LSTM 层

In [47]:
x = tf.random.normal([2, 80, 100])
layer = keras.layers.LSTM(64)
# 序列通过 LSTM 层,默认返回最后一个时间戳的输出 h
out = layer(x)
out.shape

TensorShape([2, 64])

In [48]:
# 创建 LSTM 层时,设置返回每个时间戳上的输出
layer = layers.LSTM(64, return_sequences=True)
out = layer(x)
out.shape

TensorShape([2, 80, 64])

In [49]:
# 多层神经网络，通过 Sequential 容器包裹多层 LSTM 层
# 并设置所有非末层网络 return_sequences=True
# 这是因为非末层的 LSTM 层需要上一层在所有时间戳的输出作为输入
net = keras.Sequential([
    keras.layers.LSTM(64, return_sequences=True),
    keras.layers.LSTM(64)
])
out = net(x)
out.shape

TensorShape([2, 64])

#### LSTM情感分类实战

In [54]:
class MyLSTM(keras.Model):
    # Cell方式构建多层网络
    def __init__(self, units):
        super(MyLSTM, self).__init__() 
        # 词向量编码 [b, 80] => [b, 80, 100]
        self.embedding = layers.Embedding(total_words, embedding_len,
                                          input_length=max_review_len)
        # 构建RNN
        self.rnn = keras.Sequential([
            layers.LSTM(units, dropout=0.5, return_sequences=True),
            layers.LSTM(units, dropout=0.5)
        ])
        # 构建分类网络，用于将CELL的输出特征进行分类，2分类
        # [b, 80, 100] => [b, 64] => [b, 1]
        self.outlayer = Sequential([
        	layers.Dense(32),
        	layers.Dropout(rate=0.5),
        	layers.ReLU(),
        	layers.Dense(1)])

    def call(self, inputs, training=None):
        x = inputs # [b, 80]
        # embedding: [b, 80] => [b, 80, 100]
        x = self.embedding(x)
        # rnn cell compute,[b, 80, 100] => [b, 64]
        x = self.rnn(x)
        # 末层最后一个输出作为分类网络的输入: [b, 64] => [b, 1]
        x = self.outlayer(x,training)
        # p(y is pos|x)
        prob = tf.sigmoid(x)

        return prob

**训练与测试**

In [55]:
units = 32 # RNN状态向量长度f
epochs = 10 # 训练epochs

model = MyLSTM(units)
# 装配
model.compile(optimizer = optimizers.Adam(0.001),
              loss = losses.BinaryCrossentropy(),
              metrics=['accuracy'])
# 训练和验证
model.fit(db_train, epochs=epochs, validation_data=db_test)
# 测试
model.evaluate(db_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.8195503366299165, 0.816867]

## 3、GRU使用方法
### GRUCell 、GRU层

In [50]:
# 初始化状态向量,GRU 只有一个
h = [tf.zeros([2,64])]
cell = layers.GRUCell(64) # 新建 GRU Cell,向量长度为 64
# 在时间戳维度上解开,循环通过 cell
for xt in tf.unstack(x, axis=1):
    out, h = cell(xt, h)
    
# 输出形状
out.shape

TensorShape([2, 64])

In [51]:
net = keras.Sequential([
    layers.GRU(64, return_sequences=True),
    layers.GRU(64)
])
out = net(x)
out.shape

TensorShape([2, 64])

In [58]:
x_train[0]

array([  15,  256,    4,    2,    7, 3766,    5,  723,   36,   71,   43,
        530,  476,   26,  400,  317,   46,    7,    4,    2, 1029,   13,
        104,   88,    4,  381,   15,  297,   98,   32, 2071,   56,   26,
        141,    6,  194, 7486,   18,    4,  226,   22,   21,  134,  476,
         26,  480,    5,  144,   30, 5535,   18,   51,   36,   28,  224,
         92,   25,  104,    4,  226,   65,   16,   38, 1334,   88,   12,
         16,  283,    5,   16, 4472,  113,  103,   32,   15,   16, 5345,
         19,  178,   32], dtype=int32)