In [1]:
import numpy as np
import tensorflow as tf

In [2]:
sentence_length = 100
vector_size = 300
batch_size = 16
hidden_units1 = 64
hidden_units2 = 32
learning_rate = 0.001
drop_rate = 0.5
augment_feature_num = 1
dense_units1 = 64
dense_units2 = 32
reg_coefficient = 0.001

In [11]:
class DataGenerator:
    def __init__(self, data_dir):
        '''Initialization'''
        npz_data = np.load(data_dir)
        names = sorted(npz_data.files)
        self._data = []
        for name in names:
            self._data.append(npz_data[name])
        self._num_examples = self._data[0].shape[0]

    def shuffle_data(self, idx):
        for i in range(len(self._data)):
            self._data[i]=self._data[i][idx]

    def get_data(self, start, end):
        res=[]
        for i in range(len(self._data)):
            res.append(self._data[i][start : end])
        return res
    
    def init_epoch(self, batch_size, shuffle=True):
        self._index_in_epoch = 0
        idx = np.arange(0, self._num_examples)  # get all possible indexes
        if shuffle:
            np.random.shuffle(idx)  # shuffle indexes
        self.shuffle_data(idx)  # get list of `num` random samples
        if batch_size <= 0:
            self.batch_size = self._num_examples
        else:
            self.batch_size = batch_size

    def has_next(self):
        '''return bool: whether there is a next batch'''
        return self._index_in_epoch < self._num_examples

    def get_batch(self):
        '''
        return the next batch in the following tuple format:
        (batch_input_q, batch_input_a, augmented_data, score_label)
        Where
        batch_input_q: word2vec representation for the question in shape [batch_size, sentence_length, vector_size]
        batch_input_a: word2vec representation for the answer in shape [batch_size, sentence_length, vector_size]
        augmented_data: the extra data for MLP in shape [batch_size, augment_feature_num]
        score_label: ground truth semantic similarity score in shape [batch_size, 1]
        '''
        start = self._index_in_epoch
        self._index_in_epoch += self.batch_size
        end = self._index_in_epoch
        return self.get_data(start, end)

#-----------------------Model definition--------------------------------

tf.reset_default_graph()
# placeholders
batch_input_q = tf.placeholder(dtype=tf.float32, shape=(None, sentence_length, vector_size))
batch_input_a = tf.placeholder(dtype=tf.float32, shape=(None, sentence_length, vector_size))
augmented_data = tf.placeholder(dtype=tf.float32, shape=(None, augment_feature_num))
score_label = tf.placeholder(dtype=tf.float32, shape=(None, 1))
enable_dropout = tf.placeholder(dtype=tf.bool, shape=())
batch_input = tf.concat(values=[batch_input_q, batch_input_a], axis=1)
# bidirectional lstms
cell_fw1 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units1, name='forward1')
cell_bw1 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units1, name='backward1')
outputs1, states1 = tf.nn.bidirectional_dynamic_rnn(
    cell_fw=cell_fw1, cell_bw=cell_bw1, inputs=batch_input, dtype=tf.float32)
batch_middle = tf.concat(values=outputs1, axis=2)
cell_fw2 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units2, name='forward2')
cell_bw2 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units2, name='backward2')
outputs2, states2 = tf.nn.bidirectional_dynamic_rnn(
    cell_fw=cell_fw2, cell_bw=cell_bw2, inputs=batch_middle, dtype=tf.float32)
output_fw2, output_bw2 = outputs2
bilstm_output = output_fw2 + output_bw2
bilstm_flaten = tf.reshape(bilstm_output, (-1, hidden_units2 * sentence_length * 2))
mlp_batch_input = tf.concat(values=[bilstm_flaten, augmented_data], axis=1)
# multi-layers perceptrons
dropout1 = tf.layers.dropout(mlp_batch_input, rate=drop_rate, training=enable_dropout, name='dropout1')
dense1 = tf.layers.dense(dropout1, dense_units1, activation=tf.nn.relu, name='dense1')
dropout2 = tf.layers.dropout(dense1, rate=drop_rate, training=enable_dropout, name='dropout2')
dense2 = tf.layers.dense(dropout2, dense_units2, activation=tf.nn.relu, name='dense2')
dropout3 = tf.layers.dropout(dense2, rate=drop_rate, training=enable_dropout, name='dropout3')
logits = tf.layers.dense(dropout3, 1, name='final_output')
batch_loss = tf.losses.mean_squared_error(score_label, logits)
tf.summary.scalar('batch_mse_loss', batch_loss)
# regularization term
tv = tf.trainable_variables()
regularization = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv])
tf.summary.scalar('regularization', regularization)
loss_with_reg = batch_loss + reg_coefficient * regularization
tf.summary.scalar('loss_with_regularization', loss_with_reg)
# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_with_reg, name='train_op')

In [12]:
def predict_score(matrices, log_dir):
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, log_dir + "/model.ckpt")
        input_q, input_a, aug_data = matrices
        scores = sess.run(logits, feed_dict={
                batch_input_q: input_q,
                batch_input_a: input_a,
                augmented_data: aug_data,
                enable_dropout: False})
        return scores

In [20]:
def test_driver(data_generator, log_dir, thread_len=10):
    data_generator.init_epoch(-1, shuffle=False)
    q_vec, a_vec, aug_data, label_vec, cid_list = data_generator.get_batch()
    pred_vec = predict_score([q_vec, a_vec, aug_data], log_dir)
    ans = []
    ans_human = []
    print(label_vec.shape, pred_vec.shape)
    for i in range(0, q_vec.shape[0], thread_len):
        tmp_rank = []
        tmp_rank_human = []
        for j in range(thread_len):
            tmp_rank.append((float(pred_vec[i + j]), j, round(2 * float(label_vec[i + j]))))
            tmp_rank_human.append((float(pred_vec[i + j]), j, round(2 * float(label_vec[i + j])), str(cid_list[i + j][0])))
        tmp_rank.sort(reverse=True)
        tmp_rank_human.sort(reverse=True)
        ans.append(tmp_rank)
        ans_human.append(tmp_rank_human)
    return ans, ans_human

In [21]:
root_dir = 'test_' + str(hidden_units1) + '_' + str(hidden_units2) + '_' +\
str(dense_units1) + '_' + str(dense_units2) + '_' + str(reg_coefficient)
log_dir = root_dir + '/log'
tensorboard_dir = root_dir + '/tensorboard'
cQA_test_embedding_dir = 'cQA_test_embedding_id.npz'

In [22]:
# validation
test_data_generator = DataGenerator(cQA_test_embedding_dir)
ranks, ranks_human = test_driver(test_data_generator, log_dir)

INFO:tensorflow:Restoring parameters from test_64_32_64_32_0.001/log/model.ckpt
(3270, 1) (3270, 1)


In [23]:
ranks_human

[[(0.6669316291809082, 0, 2, 'Q318_R6_C1'),
  (0.6617441773414612, 8, 2, 'Q318_R6_C9'),
  (0.5226553678512573, 2, 2, 'Q318_R6_C3'),
  (0.48358386754989624, 3, 2, 'Q318_R6_C4'),
  (0.4303157329559326, 9, 2, 'Q318_R6_C10'),
  (0.33921319246292114, 6, 0, 'Q318_R6_C7'),
  (0.2234971523284912, 4, 2, 'Q318_R6_C5'),
  (0.2077908217906952, 1, 2, 'Q318_R6_C2'),
  (0.20249584317207336, 7, 1, 'Q318_R6_C8'),
  (0.1794426590204239, 5, 0, 'Q318_R6_C6')],
 [(0.542432963848114, 4, 0, 'Q318_R52_C5'),
  (0.45750096440315247, 9, 0, 'Q318_R52_C10'),
  (0.28412073850631714, 3, 1, 'Q318_R52_C4'),
  (0.2320551872253418, 7, 1, 'Q318_R52_C8'),
  (0.2224946916103363, 1, 1, 'Q318_R52_C2'),
  (0.22239631414413452, 0, 2, 'Q318_R52_C1'),
  (0.2157919555902481, 8, 2, 'Q318_R52_C9'),
  (0.1643580198287964, 6, 2, 'Q318_R52_C7'),
  (0.15652069449424744, 5, 0, 'Q318_R52_C6'),
  (0.13311785459518433, 2, 0, 'Q318_R52_C3')],
 [(0.774219810962677, 1, 2, 'Q319_R1_C2'),
  (0.7406054139137268, 0, 2, 'Q319_R1_C1'),
  (0.6908975

In [80]:
# MAP
class MeasurementCalculator:
    def __init__(self, resultFromModel, threshold=1):
        self.resultFromModel = resultFromModel
        self.binaryResult = self.toBinaryResult(threshold)
        
    def toBinaryResult(self, threshold):
        binaryResult = []
        for eachBatch in self.resultFromModel:
            eachBinaryResult = []
            for eachComment in eachBatch:
                
                # eachComment[0] -> score
                # eachComment[1] -> cid
                # eachComment[2] -> label
                currentLabel = eachComment[2]
                if currentLabel >= threshold:
                    eachBinaryResult.append(1)
                else:
                    eachBinaryResult.append(0)
                    
            if len(eachBinaryResult) != len(eachBatch):
                print('binary single batch length not equal to each bathch size...')
                return None
            binaryResult.append(eachBinaryResult)
        return binaryResult
        
    def precisionAtk(self, r, k):
        """Score is precision @ k
        Relevance is binary (nonzero is relevant).
        >>> r = [0, 0, 1]
        >>> precisionAtk(r, 1)
        0.0
        >>> precisionAtk(r, 2)
        0.0
        >>> precisionAtk(r, 3)
        0.33333333333333331
        >>> precisionAtk(r, 4)
        Traceback (most recent call last):
            File "<stdin>", line 1, in ?
        ValueError: Relevance score length < k
        Args:
            r: Relevance scores (list or numpy) in rank order
                (first element is the first item)
        Returns:
            Precision @ k
        Raises:
            ValueError: len(r) must be >= k
        """
        assert k >= 1
        r = np.asarray(r)[:k] != 0
    #     print('processing:')
    #     print(r)
    
        if r.size != k:
            raise ValueError('Relevance score length < k')
        return np.mean(r)

    def averagePrecision(self, r):
        """Score is average precision (area under PR curve)
        Relevance is binary (nonzero is relevant).
        >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
        >>> delta_r = 1. / sum(r)
        >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y])
        0.7833333333333333
        >>> average_precision(r)
        0.78333333333333333
        Args:
            r: Relevance scores (list or numpy) in rank order
                (first element is the first item)
        Returns:
            Average precision
        """
        r = np.asarray(r) != 0
        out = [self.precisionAtk(r, k + 1) for k in range(r.size) if r[k]]
        if not out:
            return 0.
        return np.mean(out)

#     def meanAveragePrecision(self, rs):
    def meanAveragePrecision(self):
        """Score is mean average precision
        Relevance is binary (nonzero is relevant).
        >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
        >>> mean_average_precision(rs)
        0.78333333333333333
        >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
        >>> mean_average_precision(rs)
        0.39166666666666666
        Args:
            rs: Iterator of relevance scores (list or numpy) in rank order
                (first element is the first item)
        Returns:
            Mean average precision
        """
        rs = self.binaryResult
        return np.mean([self.averagePrecision(r) for r in rs])
    
    # MRR
#     def meanReciprocalRank(rs):
    def meanReciprocalRank(self):
        # Eample
        """Score is reciprocal of the rank of the first relevant item
        First element is 'rank 1'.  Relevance is binary (nonzero is relevant).
        Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank
        >>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
        >>> mean_reciprocal_rank(rs)
        0.61111111111111105
        >>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]])
        >>> mean_reciprocal_rank(rs)
        0.5
        >>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]]
        >>> mean_reciprocal_rank(rs)
        0.75
        Args:
            rs: Iterator of relevance scores (list or numpy) in rank order
                (first element is the first item)
        Returns:
            Mean reciprocal rank
        """
        rs = self.binaryResult
        rs = (np.asarray(r).nonzero()[0] for r in rs)
        return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])
    
    # AveRec
    def recallAtk(self, r, k):
        assert k >= 1
        totalOne = sum(r)
        retrivedOne = sum(r[:k])
        return retrivedOne / totalOne

    def averageRecallEach(self, r):
        #r = np.asarray(r) != 0
        #out = [self.recallAtk(r, k + 1) for k in range(r.size) if r[k]]
        out = [self.recallAtk(r, k + 1) for k in range(len(r)) if r[k]]
        if not out:
            return 0.
        return np.mean(out)

    def averageRecall(self):
        rs = self.binaryResult
        return np.mean([self.averageRecallEach(r) for r in rs])

In [81]:
mc = MeasurementCalculator(ranks, 2)

In [82]:
print(mc.meanAveragePrecision())
print(mc.averageRecall())
print(mc.meanReciprocalRank())

0.7473934349195562
0.6520235668171448
0.8233374593466336
