# Optimized BLSTM model on Answer Selection task

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
sentence_length = 100
vector_size = 300
batch_size = 16
hidden_units1 = 64
hidden_units2 = 64
learning_rate = 0.001
droprate_output = 0.8
droprate_state = 1.0
drop_rate = 0.5
augment_feature_num = 10
dense_units1 = 128
dense_units2 = 128
reg_coefficient = 0.001

## Data Generator

You can impelment whatever you want, just make sure to have the *has_next* and *get_batch* function with the corresponding output format.

In [3]:
class DataGenerator:
    def __init__(self, data_dir):
        '''Initialization'''
        npz_data = np.load(data_dir)
        names = sorted(npz_data.files)
        self._data = []
        for name in names:
            self._data.append(npz_data[name])
        self._num_examples = self._data[0].shape[0]

    def shuffle_data(self, idx):
        for i in range(len(self._data)):
            self._data[i]=self._data[i][idx]

    def get_data(self, start, end):
        res=[]
        for i in range(len(self._data)):
            res.append(self._data[i][start : end])
        return tuple(res)
    
    def init_epoch(self, batch_size, shuffle=True):
        self._index_in_epoch = 0
        idx = np.arange(0, self._num_examples)  # get all possible indexes
        if shuffle:
            np.random.shuffle(idx)  # shuffle indexes
        self.shuffle_data(idx)  # get list of `num` random samples
        if batch_size <= 0:
            self.batch_size = self._num_examples
        else:
            self.batch_size = batch_size

    def has_next(self):
        '''return bool: whether there is a next batch'''
        return self._index_in_epoch < self._num_examples

    def get_batch(self):
        '''
        return the next batch in the following tuple format:
        (batch_input_q, batch_input_a, augmented_data, score_label)
        Where
        batch_input_q: word2vec representation for the question in shape [batch_size, sentence_length, vector_size]
        batch_input_a: word2vec representation for the answer in shape [batch_size, sentence_length, vector_size]
        batch_q_len: [batch_size]
        batch_a_len: [batch_size]
        augmented_data: the extra data for MLP in shape [batch_size, augment_feature_num]
        score_label: ground truth semantic similarity score in shape [batch_size, 1]
        comment_id: the commet id
        '''
        start = self._index_in_epoch
        self._index_in_epoch += self.batch_size
        end = self._index_in_epoch
        return self.get_data(start, end)

## Model Definition

In [4]:
tf.reset_default_graph()

In [5]:
# placeholders
batch_input_q = tf.placeholder(dtype=tf.float32, shape=(None, sentence_length, vector_size))
batch_input_a = tf.placeholder(dtype=tf.float32, shape=(None, sentence_length, vector_size))
batch_input_q_len = tf.placeholder(dtype=tf.int32, shape=(None,))
batch_input_a_len = tf.placeholder(dtype=tf.int32, shape=(None,))
score_label = tf.placeholder(dtype=tf.float32, shape=(None, 1))
enable_dropout = tf.placeholder(dtype=tf.bool, shape=())

In [6]:
# 2 bidirectional lstm layers
def blstm_func(input_batch, sequence_length, init_states=((None, None), (None, None))):
    with tf.variable_scope("blstm1"):
        cell_fw1 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units1, name='forward')
        cell_fw1 = tf.nn.rnn_cell.DropoutWrapper(cell_fw1, output_keep_prob=droprate_output, state_keep_prob=droprate_state, variational_recurrent=True, dtype=tf.float32)
        cell_bw1 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units1, name='backward')
        cell_bw1 = tf.nn.rnn_cell.DropoutWrapper(cell_bw1, output_keep_prob=droprate_output, state_keep_prob=droprate_state, variational_recurrent=True, dtype=tf.float32)
        outputs1, states1 = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw1, cell_bw=cell_bw1, inputs=input_batch, sequence_length=sequence_length, \
            initial_state_fw=init_states[0][0], initial_state_bw=init_states[0][1], dtype=tf.float32)
        output_blstm1 = tf.concat(values=outputs1, axis=2)
    with tf.variable_scope("blstm2"):
        cell_fw2 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units2, name='forward')
        cell_fw2 = tf.nn.rnn_cell.DropoutWrapper(cell_fw2, output_keep_prob=droprate_output, state_keep_prob=droprate_state, variational_recurrent=True, dtype=tf.float32)
        cell_bw2 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units2, name='backward')
        cell_bw2 = tf.nn.rnn_cell.DropoutWrapper(cell_bw2, output_keep_prob=droprate_output, state_keep_prob=droprate_state, variational_recurrent=True, dtype=tf.float32)
        _, states2 = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw2, cell_bw=cell_bw2, inputs=output_blstm1, sequence_length=sequence_length, \
            initial_state_fw=init_states[1][0], initial_state_bw=init_states[1][1], dtype=tf.float32)
    return states1[0], states1[1], states2[0], states2[1]

In [7]:
# bidirectional lstms
# Q lstms
with tf.variable_scope('blstm_q'):
    q_s1f, q_s1b, q_s2f, q_s2b = blstm_func(batch_input_q, batch_input_q_len)
# A lstms
with tf.variable_scope('blstm_a'):
    a_s1f, a_s1b, a_s2f, a_s2b = blstm_func(batch_input_a, batch_input_a_len, ((q_s1f, q_s1b), (q_s2f, q_s2b)))
mlp_batch_input = tf.concat(values=[a_s2f.h, a_s2b.h], axis=1)

Instructions for updating:
seq_dim is deprecated, use seq_axis instead
Instructions for updating:
batch_dim is deprecated, use batch_axis instead


In [8]:
# multi-layers perceptrons
with tf.variable_scope('final_dense'):
    dropout0 = tf.layers.dropout(mlp_batch_input, rate=drop_rate, training=enable_dropout, name='dropout0')
    dense1 = tf.layers.dense(dropout0, dense_units1, activation=tf.nn.relu, name='dense1')
    dropout1 = tf.layers.dropout(dense1, rate=drop_rate, training=enable_dropout, name='dropout1')
    dense2 = tf.layers.dense(dropout1, dense_units2, activation=tf.nn.relu, name='dense2')
    dropout2 = tf.layers.dropout(dense2, rate=drop_rate, training=enable_dropout, name='dropout2')
    logits = tf.layers.dense(dropout2, 1, name='final_output')
    batch_loss = tf.losses.mean_squared_error(score_label, logits)
    tf.summary.scalar('batch_mse_loss', batch_loss)

In [9]:
# regularization term
tv = tf.trainable_variables()
regularization = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv])
tf.summary.scalar('regularization', regularization)
loss_with_reg = batch_loss
#tf.summary.scalar('loss_with_regularization', loss_with_reg)

In [10]:
# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_with_reg, name='train_op')

## Model training

In [11]:
root_dir = 'result_qa_wlen_conc_dropout_' + str(hidden_units1) + '_' + str(hidden_units2) + '_' +\
str(dense_units1) + '_' + str(dense_units2)
log_dir = root_dir + '/log'
tensorboard_dir = root_dir + '/tensorboard'
cQA_train_embedding_dir = 'cQA_train_embedding_wlen.npz'
cQA_test_embedding_dir = 'cQA_test_embedding_wlen.npz'
epoch_num = 10
load_model = False
save_model = True
print_train_info = True
print_test_info = True
print_train_batch = 50
print_test_epoch = 1
save_model_epoch = 1
saver = tf.train.Saver()
summary_op = tf.summary.merge_all()

In [12]:
train_data_generator = DataGenerator(cQA_train_embedding_dir)
test_data_generator = DataGenerator(cQA_test_embedding_dir)

# Validation Functions

We will run the validation after each training epoch, so we define the validation pre-processing functions here.

In [13]:
def predict_score(matrices, log_dir):
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, log_dir + "/model.ckpt")
        input_q, input_a, input_q_len, input_a_len = matrices
        scores = sess.run(logits, feed_dict={
                batch_input_q: input_q,
                batch_input_a: input_a,
                batch_input_q_len: input_q_len,
                batch_input_a_len: input_a_len,
                enable_dropout: False})
        return scores

In [14]:
def test_driver(data_generator, log_dir, thread_len=10):
    data_generator.init_epoch(-1, shuffle=False)
    input_q, input_a, input_q_len, input_a_len, aug_data, label_vec, cid_list = data_generator.get_batch()
    pred_vec = predict_score([input_q, input_a, input_q_len, input_a_len], log_dir)
    ans = []
    ans_human = []
    print(label_vec.shape, pred_vec.shape)
    for i in range(0, input_q.shape[0], thread_len):
        tmp_rank = []
        tmp_rank_human = []
        for j in range(thread_len):
            tmp_rank.append((float(pred_vec[i + j]), j, round(2 * float(label_vec[i + j]))))
            tmp_rank_human.append((float(pred_vec[i + j]), j, round(2 * float(label_vec[i + j])), str(cid_list[i + j][0])))
        tmp_rank.sort(reverse=True)
        tmp_rank_human.sort(reverse=True)
        ans.append(tmp_rank)
        ans_human.append(tmp_rank_human)
    return ans, ans_human

In [15]:
# MAP
class MeasurementCalculator:
    def __init__(self, resultFromModel, threshold=1):
        self.resultFromModel = resultFromModel
        self.binaryResult = self.toBinaryResult(threshold)
        
    def toBinaryResult(self, threshold):
        binaryResult = []
        for eachBatch in self.resultFromModel:
            eachBinaryResult = []
            for eachComment in eachBatch:
                
                # eachComment[0] -> score
                # eachComment[1] -> cid
                # eachComment[2] -> label
                currentLabel = eachComment[2]
                if currentLabel >= threshold:
                    eachBinaryResult.append(1)
                else:
                    eachBinaryResult.append(0)
                    
            if len(eachBinaryResult) != len(eachBatch):
                print('binary single batch length not equal to each bathch size...')
                return None
            binaryResult.append(eachBinaryResult)
        return binaryResult
        
    def precisionAtk(self, r, k):
        """Score is precision @ k
        Relevance is binary (nonzero is relevant).
        >>> r = [0, 0, 1]
        >>> precisionAtk(r, 1)
        0.0
        >>> precisionAtk(r, 2)
        0.0
        >>> precisionAtk(r, 3)
        0.33333333333333331
        >>> precisionAtk(r, 4)
        Traceback (most recent call last):
            File "<stdin>", line 1, in ?
        ValueError: Relevance score length < k
        Args:
            r: Relevance scores (list or numpy) in rank order
                (first element is the first item)
        Returns:
            Precision @ k
        Raises:
            ValueError: len(r) must be >= k
        """
        assert k >= 1
        r = np.asarray(r)[:k] != 0
    #     print('processing:')
    #     print(r)
    
        if r.size != k:
            raise ValueError('Relevance score length < k')
        return np.mean(r)

    def averagePrecision(self, r):
        """Score is average precision (area under PR curve)
        Relevance is binary (nonzero is relevant).
        >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
        >>> delta_r = 1. / sum(r)
        >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y])
        0.7833333333333333
        >>> average_precision(r)
        0.78333333333333333
        Args:
            r: Relevance scores (list or numpy) in rank order
                (first element is the first item)
        Returns:
            Average precision
        """
        r = np.asarray(r) != 0
        out = [self.precisionAtk(r, k + 1) for k in range(r.size) if r[k]]
        if not out:
            return 0.
        return np.mean(out)

#     def meanAveragePrecision(self, rs):
    def meanAveragePrecision(self):
        """Score is mean average precision
        Relevance is binary (nonzero is relevant).
        >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
        >>> mean_average_precision(rs)
        0.78333333333333333
        >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
        >>> mean_average_precision(rs)
        0.39166666666666666
        Args:
            rs: Iterator of relevance scores (list or numpy) in rank order
                (first element is the first item)
        Returns:
            Mean average precision
        """
        rs = self.binaryResult
        return np.mean([self.averagePrecision(r) for r in rs])
    
    # MRR
#     def meanReciprocalRank(rs):
    def meanReciprocalRank(self):
        # Eample
        """Score is reciprocal of the rank of the first relevant item
        First element is 'rank 1'.  Relevance is binary (nonzero is relevant).
        Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank
        >>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
        >>> mean_reciprocal_rank(rs)
        0.61111111111111105
        >>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]])
        >>> mean_reciprocal_rank(rs)
        0.5
        >>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]]
        >>> mean_reciprocal_rank(rs)
        0.75
        Args:
            rs: Iterator of relevance scores (list or numpy) in rank order
                (first element is the first item)
        Returns:
            Mean reciprocal rank
        """
        rs = self.binaryResult
        rs = (np.asarray(r).nonzero()[0] for r in rs)
        return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])
    
    # AveRec
    def recallAtk(self, r, k):
        assert k >= 1
        totalOne = sum(r)
        retrivedOne = sum(r[:k])
        return retrivedOne / totalOne

    def averageRecallEach(self, r):
        #r = np.asarray(r) != 0
        #out = [self.recallAtk(r, k + 1) for k in range(r.size) if r[k]]
        out = [self.recallAtk(r, k + 1) for k in range(len(r)) if r[k]]
        if not out:
            return 0.
        return np.mean(out)

    def averageRecall(self):
        rs = self.binaryResult
        return np.mean([self.averageRecallEach(r) for r in rs])

# Run training

In [16]:
# training
with tf.Session() as sess:
    writer = tf.summary.FileWriter(tensorboard_dir, sess.graph)
    if load_model:
        saver.restore(sess, log_dir + "/model.ckpt")
    else:
        sess.run(tf.global_variables_initializer())
    batch_i = 0
    for epoch_i in range(epoch_num):
        train_data_generator.init_epoch(batch_size)
        while train_data_generator.has_next():
            input_q, input_a, input_q_len, input_a_len, aug_data, labels, _ = train_data_generator.get_batch()
            _, summary_log, current_batch_loss = sess.run([train_op, summary_op, batch_loss], feed_dict={
                batch_input_q: input_q,
                batch_input_a: input_a,
                batch_input_q_len: input_q_len,
                batch_input_a_len: input_a_len,
                score_label: labels,
                enable_dropout: True})
            batch_i += 1
            if print_train_info and batch_i % print_train_batch == 0:
                writer.add_summary(summary_log, batch_i)
                print('Epoch %d batch %d: training loss %f' % (epoch_i, batch_i, current_batch_loss.item()))
        print('Epoch %d done: training loss %f' % (epoch_i, current_batch_loss.item()))
        if print_test_info and epoch_i % print_test_epoch == 0:
            test_data_generator.init_epoch(-1, False)
            input_q, input_a, input_q_len, input_a_len, aug_data, labels, _ = test_data_generator.get_batch()
            test_batch_loss = sess.run([batch_loss], feed_dict={
                batch_input_q: input_q,
                batch_input_a: input_a,
                batch_input_q_len: input_q_len,
                batch_input_a_len: input_a_len,
                score_label: labels,
                enable_dropout: False})
            print('Epoch %d done: testing loss %f' % (epoch_i, test_batch_loss[0].item()))
        if save_model and epoch_i % save_model_epoch == 0:
            save_path = saver.save(sess, log_dir + "/model.ckpt")
            ranks, _ = test_driver(test_data_generator, log_dir)
            mc = MeasurementCalculator(ranks, 2)
            print('Epoch %d done: testing MAP %f, MPR %f' % (epoch_i, mc.meanAveragePrecision(), mc.meanReciprocalRank()))
    writer.close()

Epoch 0 batch 50: training loss 0.149885
Epoch 0 batch 100: training loss 0.269800
Epoch 0 batch 150: training loss 0.120062
Epoch 0 batch 200: training loss 0.187240
Epoch 0 batch 250: training loss 0.145395
Epoch 0 batch 300: training loss 0.124735
Epoch 0 batch 350: training loss 0.188034
Epoch 0 batch 400: training loss 0.264026
Epoch 0 batch 450: training loss 0.173756
Epoch 0 batch 500: training loss 0.104842
Epoch 0 batch 550: training loss 0.156751
Epoch 0 batch 600: training loss 0.126132
Epoch 0 batch 650: training loss 0.136384
Epoch 0 batch 700: training loss 0.166024
Epoch 0 batch 750: training loss 0.128984
Epoch 0 batch 800: training loss 0.217763
Epoch 0 batch 850: training loss 0.140879
Epoch 0 done: training loss 0.245111
Epoch 0 done: testing loss 0.170501
INFO:tensorflow:Restoring parameters from result_qa_wlen_conc_dropout_64_64_128_128/log/model.ckpt
(3270, 1) (3270, 1)
Epoch 0 done: testing MAP 0.712585, MPR 0.789946
Epoch 1 batch 900: training loss 0.213442
Epoc

Epoch 8 batch 7400: training loss 0.055139
Epoch 8 batch 7450: training loss 0.056004
Epoch 8 batch 7500: training loss 0.066110
Epoch 8 batch 7550: training loss 0.035958
Epoch 8 batch 7600: training loss 0.067032
Epoch 8 batch 7650: training loss 0.041874
Epoch 8 batch 7700: training loss 0.032068
Epoch 8 batch 7750: training loss 0.100234
Epoch 8 batch 7800: training loss 0.092916
Epoch 8 batch 7850: training loss 0.093039
Epoch 8 batch 7900: training loss 0.107577
Epoch 8 done: training loss 0.014886
Epoch 8 done: testing loss 0.179179
INFO:tensorflow:Restoring parameters from result_qa_wlen_conc_dropout_64_64_128_128/log/model.ckpt
(3270, 1) (3270, 1)
Epoch 8 done: testing MAP 0.727592, MPR 0.801897
Epoch 9 batch 7950: training loss 0.071338
Epoch 9 batch 8000: training loss 0.123138
Epoch 9 batch 8050: training loss 0.070864
Epoch 9 batch 8100: training loss 0.112731
Epoch 9 batch 8150: training loss 0.100497
Epoch 9 batch 8200: training loss 0.128364
Epoch 9 batch 8250: training

# Validation

During the running above, we did the validation after each epoch. We notice the highest validation performance achieved after the 2nd epoch, which is MAP: 74.52% and MRR: 82.15%.