# Basic bidirectional LSTM model on Answer Selection task

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
sentence_length = 100
vector_size = 300
batch_size = 16
hidden_units1 = 64
hidden_units2 = 32
learning_rate = 0.001
drop_rate = 0.5
augment_feature_num = 10
dense_units1 = 64
dense_units2 = 32
reg_coefficient = 0.001

## Data Generator

You can impelment whatever you want, just make sure to have the *has_next* and *get_batch* function with the corresponding output format.

In [3]:
class DataGenerator:
    def __init__(self, data_dir):
        '''Initialization'''
        npz_data = np.load(data_dir)
        names = sorted(npz_data.files)
        self._data = []
        for name in names:
            self._data.append(npz_data[name])
        self._num_examples = self._data[0].shape[0]

    def shuffle_data(self, idx):
        for i in range(len(self._data)):
            self._data[i]=self._data[i][idx]

    def get_data(self, start, end):
        res=[]
        for i in range(len(self._data)):
            res.append(self._data[i][start : end])
        return tuple(res)
    
    def init_epoch(self, batch_size, shuffle=True):
        self._index_in_epoch = 0
        idx = np.arange(0, self._num_examples)  # get all possible indexes
        if shuffle:
            np.random.shuffle(idx)  # shuffle indexes
        self.shuffle_data(idx)  # get list of `num` random samples
        if batch_size <= 0:
            self.batch_size = self._num_examples
        else:
            self.batch_size = batch_size

    def has_next(self):
        '''return bool: whether there is a next batch'''
        return self._index_in_epoch < self._num_examples

    def get_batch(self):
        '''
        return the next batch in the following tuple format:
        (batch_input_q, batch_input_a, augmented_data, score_label)
        Where
        batch_input_q: word2vec representation for the question in shape [batch_size, sentence_length, vector_size]
        batch_input_a: word2vec representation for the answer in shape [batch_size, sentence_length, vector_size]
        augmented_data: the extra data for MLP in shape [batch_size, augment_feature_num]
        score_label: ground truth semantic similarity score in shape [batch_size, 1]
        comment_id: the commet id
        '''
        start = self._index_in_epoch
        self._index_in_epoch += self.batch_size
        end = self._index_in_epoch
        return self.get_data(start, end)

## Model Definition

In [4]:
tf.reset_default_graph()

In [5]:
# placeholders
batch_input_q = tf.placeholder(dtype=tf.float32, shape=(None, sentence_length, vector_size))
batch_input_a = tf.placeholder(dtype=tf.float32, shape=(None, sentence_length, vector_size))
augmented_data = tf.placeholder(dtype=tf.float32, shape=(None, augment_feature_num))
score_label = tf.placeholder(dtype=tf.float32, shape=(None, 1))
enable_dropout = tf.placeholder(dtype=tf.bool, shape=())
batch_input = tf.concat(values=[batch_input_q, batch_input_a], axis=1)

In [6]:
# bidirectional lstms
cell_fw1 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units1, name='forward1')
cell_bw1 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units1, name='backward1')
outputs1, states1 = tf.nn.bidirectional_dynamic_rnn(
    cell_fw=cell_fw1, cell_bw=cell_bw1, inputs=batch_input, dtype=tf.float32)
batch_middle = tf.concat(values=outputs1, axis=2)
cell_fw2 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units2, name='forward2')
cell_bw2 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units2, name='backward2')
outputs2, states2 = tf.nn.bidirectional_dynamic_rnn(
    cell_fw=cell_fw2, cell_bw=cell_bw2, inputs=batch_middle, dtype=tf.float32)
output_fw2, output_bw2 = outputs2
bilstm_output = output_fw2 + output_bw2
bilstm_flaten = tf.reshape(bilstm_output, (-1, hidden_units2 * sentence_length * 2))
mlp_batch_input = tf.concat(values=[bilstm_flaten, augmented_data], axis=1)

In [7]:
# multi-layers perceptrons
dropout1 = tf.layers.dropout(mlp_batch_input, rate=drop_rate, training=enable_dropout, name='dropout1')
dense1 = tf.layers.dense(dropout1, dense_units1, activation=tf.nn.relu, name='dense1')
dropout2 = tf.layers.dropout(dense1, rate=drop_rate, training=enable_dropout, name='dropout2')
dense2 = tf.layers.dense(dropout2, dense_units2, activation=tf.nn.relu, name='dense2')
dropout3 = tf.layers.dropout(dense2, rate=drop_rate, training=enable_dropout, name='dropout3')
logits = tf.layers.dense(dropout3, 1, name='final_output')
batch_loss = tf.losses.mean_squared_error(score_label, logits)
tf.summary.scalar('batch_mse_loss', batch_loss)

<tf.Tensor 'batch_mse_loss:0' shape=() dtype=string>

In [8]:
# regularization term
tv = tf.trainable_variables()
regularization = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv])
tf.summary.scalar('regularization', regularization)
loss_with_reg = batch_loss + reg_coefficient * regularization
tf.summary.scalar('loss_with_regularization', loss_with_reg)

<tf.Tensor 'loss_with_regularization:0' shape=() dtype=string>

In [9]:
# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_with_reg, name='train_op')

## Model training

In [10]:
root_dir = 'result_qa_basic_' + str(hidden_units1) + '_' + str(hidden_units2) + '_' +\
str(dense_units1) + '_' + str(dense_units2) + '_' + str(reg_coefficient)
log_dir = root_dir + '/log'
tensorboard_dir = root_dir + '/tensorboard'
cQA_train_embedding_dir = 'cQA_train_embedding.npz'
cQA_test_embedding_dir = 'cQA_test_embedding.npz'
epoch_num = 10
load_model = False
save_model = True
print_train_info = True
print_test_info = True
print_train_batch = 50
print_test_epoch = 1
save_model_epoch = 1
saver = tf.train.Saver()
summary_op = tf.summary.merge_all()

In [11]:
train_data_generator = DataGenerator(cQA_train_embedding_dir)
test_data_generator = DataGenerator(cQA_test_embedding_dir)

In [None]:
# training
with tf.Session() as sess:
    writer = tf.summary.FileWriter(tensorboard_dir, sess.graph)
    if load_model:
        saver.restore(sess, log_dir + "/model.ckpt")
    else:
        sess.run(tf.global_variables_initializer())
    batch_i = 0
    for epoch_i in range(epoch_num):
        train_data_generator.init_epoch(batch_size)
        while train_data_generator.has_next():
            input_q, input_a, aug_data, labels = train_data_generator.get_batch()
            _, summary_log, current_batch_loss = sess.run([train_op, summary_op, batch_loss], feed_dict={
                batch_input_q: input_q,
                batch_input_a: input_a,
                augmented_data: aug_data,
                score_label: labels,
                enable_dropout: True})
            batch_i += 1
            if print_train_info and batch_i % print_train_batch == 0:
                writer.add_summary(summary_log, batch_i)
                print('Epoch %d batch %d: training loss %f' % (epoch_i, batch_i, current_batch_loss.item()))
        print('Epoch %d done: training loss %f' % (epoch_i, current_batch_loss.item()))
        if print_test_info and epoch_i % print_test_epoch == 0:
            test_data_generator.init_epoch(-1, False)
            input_q, input_a, aug_data, labels = test_data_generator.get_batch()
            test_batch_loss = sess.run([batch_loss], feed_dict={
                batch_input_q: input_q,
                batch_input_a: input_a,
                augmented_data: aug_data,
                score_label: labels,
                enable_dropout: False})
            print('Epoch %d done: testing loss %f' % (epoch_i, test_batch_loss[0].item()))
        if save_model and epoch_i % save_model_epoch == 0:
            save_path = saver.save(sess, log_dir + "/model.ckpt")
    writer.close()

Epoch 0 batch 50: training loss 0.277439
Epoch 0 batch 100: training loss 0.153597
Epoch 0 batch 150: training loss 0.234903
Epoch 0 batch 200: training loss 0.097543
Epoch 0 batch 250: training loss 0.116364
Epoch 0 batch 300: training loss 0.318267
Epoch 0 batch 350: training loss 0.231397
Epoch 0 batch 400: training loss 0.155965
Epoch 0 batch 450: training loss 0.171484
Epoch 0 batch 500: training loss 0.137690
Epoch 0 batch 550: training loss 0.182077
Epoch 0 batch 600: training loss 0.252278
Epoch 0 batch 650: training loss 0.247577
Epoch 0 batch 700: training loss 0.182637
Epoch 0 batch 750: training loss 0.204921
Epoch 0 batch 800: training loss 0.186273
Epoch 0 batch 850: training loss 0.217641
Epoch 0 done: training loss 0.127808
Epoch 0 done: testing loss 0.181902
Epoch 1 batch 900: training loss 0.189113
Epoch 1 batch 950: training loss 0.204325
Epoch 1 batch 1000: training loss 0.168804
Epoch 1 batch 1050: training loss 0.190128
Epoch 1 batch 1100: training loss 0.186275
E