In [1]:
import numpy as np
import tensorflow as tf

In [3]:
sentence_length = 100
vector_size = 300
batch_size = 16
hidden_units1 = 64
hidden_units2 = 32
learning_rate = 0.001
drop_rate = 0.5
augment_feature_num = 1
dense_units1 = 128
dense_units2 = 32

## Data Generator

You can impelment whatever you want, just make sure to have the *has_next* and *get_batch* function with the corresponding output format.

In [4]:
class DataGenerator:
    def __init__(self, data_dir):
        '''Initialization'''
        npz_data = np.load(data_dir)
        names = sorted(npz_data.files)
        self._data = []
        for name in names:
            self._data.append(npz_data[name])
        self._num_examples = self._data[0].shape[0]

    def shuffle_data(self, idx):
        for i in range(len(self._data)):
            self._data[i]=self._data[i][idx]

    def get_data(self, start, end):
        res=[]
        for i in range(len(self._data)):
            res.append(self._data[i][start : end])
        return tuple(res)
    
    def init_epoch(self, batch_size, shuffle=True):
        self._index_in_epoch = 0
        idx = np.arange(0, self._num_examples)  # get all possible indexes
        if shuffle:
            np.random.shuffle(idx)  # shuffle indexes
        self.shuffle_data(idx)  # get list of `num` random samples
        if batch_size <= 0:
            self.batch_size = self._num_examples
        else:
            self.batch_size = batch_size

    def has_next(self):
        '''return bool: whether there is a next batch'''
        return self._index_in_epoch < self._num_examples

    def get_batch(self):
        '''
        return the next batch in the following tuple format:
        (batch_input_q, batch_input_a, augmented_data, score_label)
        Where
        batch_input_q: word2vec representation for the question in shape [batch_size, sentence_length, vector_size]
        batch_input_a: word2vec representation for the answer in shape [batch_size, sentence_length, vector_size]
        augmented_data: the extra data for MLP in shape [batch_size, augment_feature_num]
        score_label: ground truth semantic similarity score in shape [batch_size, 1]
        '''
        start = self._index_in_epoch
        self._index_in_epoch += self.batch_size
        end = self._index_in_epoch
        return self.get_data(start, end)

## Model Definition

In [5]:
tf.reset_default_graph()

In [6]:
# placeholders
batch_input_q = tf.placeholder(dtype=tf.float32, shape=(None, sentence_length, vector_size))
batch_input_a = tf.placeholder(dtype=tf.float32, shape=(None, sentence_length, vector_size))
augmented_data = tf.placeholder(dtype=tf.float32, shape=(None, augment_feature_num))
score_label = tf.placeholder(dtype=tf.float32, shape=(None, 1))
enable_dropout = tf.placeholder(dtype=tf.bool, shape=())
batch_input = tf.concat(values=[batch_input_q, batch_input_a], axis=1)

In [7]:
# bidirectional lstms
cell_fw1 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units1, name='forward1')
cell_bw1 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units1, name='backward1')
outputs1, states1 = tf.nn.bidirectional_dynamic_rnn(
    cell_fw=cell_fw1, cell_bw=cell_bw1, inputs=batch_input, dtype=tf.float32)
batch_middle = tf.concat(values=outputs1, axis=2)
cell_fw2 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units2, name='forward2')
cell_bw2 = tf.nn.rnn_cell.LSTMCell(num_units=hidden_units2, name='backward2')
outputs2, states2 = tf.nn.bidirectional_dynamic_rnn(
    cell_fw=cell_fw2, cell_bw=cell_bw2, inputs=batch_middle, dtype=tf.float32)
output_fw2, output_bw2 = outputs2
bilstm_output = output_fw2 + output_bw2
bilstm_flaten = tf.reshape(bilstm_output, (-1, hidden_units2 * sentence_length * 2))
mlp_batch_input = tf.concat(values=[bilstm_flaten, augmented_data], axis=1)

In [8]:
# multi-layers perceptrons
dropout1 = tf.layers.dropout(mlp_batch_input, rate=drop_rate, training=enable_dropout, name='dropout1')
dense1 = tf.layers.dense(dropout1, dense_units1, activation=tf.nn.relu, name='dense1')
dropout2 = tf.layers.dropout(dense1, rate=drop_rate, training=enable_dropout, name='dropout2')
dense2 = tf.layers.dense(dropout2, dense_units2, activation=tf.nn.relu, name='dense2')
dropout3 = tf.layers.dropout(dense2, rate=drop_rate, training=enable_dropout, name='dropout3')
logits = tf.layers.dense(dropout3, 1, name='final_output')
batch_loss = tf.losses.mean_squared_error(score_label, logits)
tf.summary.scalar('batch_mse_loss', batch_loss)

<tf.Tensor 'batch_mse_loss:0' shape=() dtype=string>

In [9]:
# regularization term
tv = tf.trainable_variables()
regularization = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv])
tf.summary.scalar('regularization', regularization)
loss_with_reg = batch_loss + regularization
tf.summary.scalar('loss_with_regularization', loss_with_reg)

<tf.Tensor 'loss_with_regularization:0' shape=() dtype=string>

In [10]:
# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_with_reg, name='train_op')

## Model training

In [12]:
log_dir = 'log_64_32_128_32'
tensorboard_dir = 'tensorboard'
cQA_train_embedding_dir = 'cQA_train_embedding.npz'
cQA_test_embedding_dir = 'cQA_test_embedding.npz'
epoch_num = 1
load_model = True
save_model = False
print_train_info = True
print_test_info = True
print_train_batch = 20
print_test_epoch = 1
save_model_epoch = 1
saver = tf.train.Saver()
summary_op = tf.summary.merge_all()

In [13]:
with tf.Session() as sess:
    saver.restore(sess, log_dir + "/model.ckpt")
    res = sess.run(regularization)
    print(res)

INFO:tensorflow:Restoring parameters from log_64_32_128_32/model.ckpt
8902.852


In [10]:
train_data_generator = DataGenerator(cQA_train_embedding_dir)
test_data_generator = DataGenerator(cQA_test_embedding_dir)

In [11]:
train_data_generator._data[0].shape

(14110, 100, 300)

In [None]:
# predicting
with tf.Session() as sess:
    saver.restore(sess, log_dir + "/model.ckpt")
    test_data_generator.init_epoch(-1, False)
    input_q, input_a, aug_data, labels = test_data_generator.get_batch()
    test_batch_loss = sess.run([batch_loss], feed_dict={
        batch_input_q: input_q,
        batch_input_a: input_a,
        augmented_data: aug_data,
        score_label: labels,
        enable_dropout: False})

In [12]:
# training
with tf.Session() as sess:
    writer = tf.summary.FileWriter(tensorboard_dir, sess.graph)
    if load_model:
        saver.restore(sess, log_dir + "/model.ckpt")
    else:
        sess.run(tf.global_variables_initializer())
    batch_i = 0
    for epoch_i in range(epoch_num):
        train_data_generator.init_epoch(batch_size)
        while train_data_generator.has_next():
            input_q, input_a, aug_data, labels = train_data_generator.get_batch()
            _, summary_log, current_batch_loss = sess.run([train_op, summary_op, batch_loss], feed_dict={
                batch_input_q: input_q,
                batch_input_a: input_a,
                augmented_data: aug_data,
                score_label: labels,
                enable_dropout: True})
            batch_i += 1
            if print_train_info and batch_i % print_train_batch == 0:
                writer.add_summary(summary_log, batch_i)
                print('Epoch %d batch %d: training loss %f' % (epoch_i, batch_i, current_batch_loss.item()))
        print('Epoch %d done: training loss %f' % (epoch_i, current_batch_loss.item()))
        if print_test_info and epoch_i % print_test_epoch == 0:
            test_data_generator.init_epoch(-1, False)
            input_q, input_a, aug_data, labels = test_data_generator.get_batch()
            test_batch_loss = sess.run([batch_loss], feed_dict={
                batch_input_q: input_q,
                batch_input_a: input_a,
                augmented_data: aug_data,
                score_label: labels,
                enable_dropout: False})
            print('Epoch %d done: testing loss %f' % (epoch_i, test_batch_loss[0].item()))
        if save_model and epoch_i % save_model_epoch == 0:
            save_path = saver.save(sess, log_dir + "/model.ckpt")
    writer.close()

Epoch 0 batch 20: training loss 0.139585
Epoch 0 batch 40: training loss 0.240153
Epoch 0 batch 60: training loss 0.188243
Epoch 0 batch 80: training loss 0.205591
Epoch 0 batch 100: training loss 0.171620
Epoch 0 batch 120: training loss 0.151209
Epoch 0 batch 140: training loss 0.216156
Epoch 0 batch 160: training loss 0.252703
Epoch 0 batch 180: training loss 0.155523
Epoch 0 batch 200: training loss 0.152540
Epoch 0 batch 220: training loss 0.175155
Epoch 0 batch 240: training loss 0.206150
Epoch 0 batch 260: training loss 0.166518
Epoch 0 batch 280: training loss 0.227060


KeyboardInterrupt: 

In [None]:
sess