In [1]:
import tensorflow as tf
import numpy as np
import math
import os
os.sys.path.append(os.path.dirname(os.path.abspath('..')))

  from ._conv import register_converters as _register_converters


In [2]:
def get_default_params():
    return tf.contrib.training.HParams(
        emb_size=64,
        t_size=50,
        lstm_size=[32, 32],
        lstm_layers=2,
        fc_size=32,
        dropout_rate=0.5,
        att_size=32,
        batch_size=64,
        grad_thresh=1.0,    # 梯度阈值
        lr=0.001,
        cnt_thresh=10,    # 词的频率阈值
    )


params = get_default_params()


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



## 数据载入

In [3]:
from dataset.dataset import load_news

train_data, test_data = load_news(batch_size=params.batch_size,
                                  cnt_thresh=params.cnt_thresh,
                                  t_size=params.t_size)

(50000, 50) (50000,)
(10000, 50) (10000,)


## 网络搭建

In [4]:
vocal_size = train_data.voc_size

unit_O = 10    # 输出单元数，类别数
tf.reset_default_graph()

### 输入部分

In [5]:
X = tf.placeholder(tf.int32, [None, params.t_size])
Y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)    # 训练标识位

# 自动计数
global_step = tf.Variable(tf.zeros([], tf.int32),
                          name='global_step', trainable=False)

Instructions for updating:
Colocations handled automatically by placer.


### 嵌入部分

In [6]:
with tf.variable_scope('emb', initializer=tf.random_uniform_initializer(-1.0, 1.0)):
    emb_lookup = tf.get_variable('embedding', [vocal_size, params.emb_size],
                                 dtype=tf.float32)
    # (batch_size,t_size,emb_size)
    emb = tf.nn.embedding_lookup(emb_lookup, X)

### RNN

In [7]:
xavier_scale = 1 / math.sqrt(params.emb_size + params.lstm_size[-1]) / 3
initializer = tf.random_uniform_initializer(-xavier_scale, xavier_scale)

with tf.variable_scope('RNN', initializer=initializer):
    lstm_layers = list()
    for i in range(params.lstm_layers):
        layer = tf.nn.rnn_cell.LSTMCell(params.lstm_size[i])

        # DropoutWrapper没有training参数，只能使用tf.cond来实现
        keep_prob = tf.cond(is_training,
                            lambda: 1-params.dropout_rate,
                            lambda: tf.constant(1.0))
        layer = tf.nn.rnn_cell.DropoutWrapper(layer,
                                              output_keep_prob=keep_prob)

        lstm_layers.append(layer)

    lstm_layers = tf.nn.rnn_cell.MultiRNNCell(lstm_layers)
    
    # (None, t_size, output_size)
    lstm_outputs, _ = tf.nn.dynamic_rnn(lstm_layers,
                                        inputs=emb, dtype=tf.float32)

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


## Attention

In [11]:
# reference: https://www.tensorflow.org/beta/tutorials/text/nmt_with_attention#write_the_encoder_and_decoder_model
with tf.variable_scope('Attention', initializer=tf.truncated_normal_initializer(stddev=0.1)):
    last_state_with_time = tf.expand_dims(lstm_outputs[:, -1, :], 1)
    score = tf.layers.dense(tf.nn.tanh(tf.layers.dense(lstm_outputs, params.att_size) +
                                       tf.layers.dense(last_state_with_time, params.att_size)), 1)
    att_w = tf.nn.softmax(score, axis=1)    # 在t维度上做softmax
    context_vec = tf.reduce_sum(att_w*lstm_outputs, axis=1)    # t维度上求和

In [13]:
with tf.name_scope('FC'):
    fc = tf.layers.dense(context_vec, params.fc_size, activation=tf.nn.relu)
    fc = tf.layers.dropout(fc, rate=params.dropout_rate, training=is_training)

logits = tf.layers.dense(fc, unit_O, activation=None)    # 输出层，无激活

with tf.name_scope('Eval'):
    loss = tf.losses.sparse_softmax_cross_entropy(labels=Y, logits=logits)
    predict = tf.argmax(logits, 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predict, Y), tf.float32))

with tf.name_scope('train_op'):
    lr = 1e-3
    t_vars = tf.trainable_variables()    # 可训练变量
    # 应用梯度截断
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, t_vars),
                                      params.grad_thresh)
    optimizer = tf.train.AdamOptimizer(lr)
    train_op = optimizer.apply_gradients(zip(grads, t_vars),
                                         global_step=global_step)

init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True    # 按需使用显存

Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.


## 训练

In [14]:
import numpy as np

with tf.Session(config=config) as sess:
    sess.run(init)
    epochs = 10

    batch_cnt = 0
    for epoch in range(epochs):
        for batch_data, batch_labels in train_data.next_batch():
            batch_cnt += 1
            loss_val, acc_val, _ = sess.run([loss, accuracy, train_op],
                                            feed_dict={X: batch_data,
                                                       Y: batch_labels,
                                                       is_training: True})

            # 每1000batch输出一次信息
            if (batch_cnt+1) % 1000 == 0:
                print('epoch: {}, batch_loss: {}, batch_acc: {}'
                      .format(epoch, loss_val, acc_val))

            # 每5000batch做一次验证
            if (batch_cnt+1) % 5000 == 0:
                all_test_acc_val = list()
                for test_batch_data, test_batch_labels in test_data.next_batch():
                    test_acc_val = sess.run(accuracy, feed_dict={X: test_batch_data,
                                                                 Y: test_batch_labels,
                                                                 is_training: False})
                    all_test_acc_val.append(test_acc_val)
                test_acc = np.mean(all_test_acc_val)
                print('epoch: {}, test_acc: {}'.format(epoch, test_acc))

epoch: 1, batch_loss: 0.6478109359741211, batch_acc: 0.8125
epoch: 2, batch_loss: 0.29187753796577454, batch_acc: 0.859375
epoch: 3, batch_loss: 0.4985269010066986, batch_acc: 0.84375
epoch: 5, batch_loss: 0.08525906503200531, batch_acc: 0.96875
epoch: 6, batch_loss: 0.10814020037651062, batch_acc: 0.953125
epoch: 6, test_acc: 0.8997395634651184
epoch: 7, batch_loss: 0.055949945002794266, batch_acc: 0.984375
epoch: 8, batch_loss: 0.021084854379296303, batch_acc: 1.0
epoch: 10, batch_loss: 0.08329428732395172, batch_acc: 0.984375
epoch: 11, batch_loss: 0.013094472698867321, batch_acc: 1.0
epoch: 12, batch_loss: 0.022063706070184708, batch_acc: 1.0
epoch: 12, test_acc: 0.9064503312110901
epoch: 14, batch_loss: 0.029317373409867287, batch_acc: 0.984375
epoch: 15, batch_loss: 0.04670435190200806, batch_acc: 1.0
epoch: 16, batch_loss: 0.050063278526067734, batch_acc: 0.96875
epoch: 17, batch_loss: 0.0317554697394371, batch_acc: 0.984375
epoch: 19, batch_loss: 0.014806315302848816, batch_acc

采用attention machanism后收敛速度大大加快，表现也有所提高，但是过拟合仍然存在。