In [None]:
import os; os.environ['CUDA_VISIBLE_DEVICES'] = '2'
from glob import glob

import numpy as np
import tensorflow as tf

In [None]:
feature_hidden_dim = 512
lstm_units = 512
classifier_hidden_dim = 256

bow_dim = 2632
visfeat_dim = 4096+8
T = 15

weight_decay = 5e-4
max_epoches = 10000

#pos_weight = 0.5 / 0.05
#neg_weight = 0.5 / 0.95
margin = 1

data_filter = '../exp-txt-classifier/data/training_batches/*/*.npz'

exp_name = 'explanation_ranker3_different_class_no_flip'
save_dir = './tfmodel/%s/' % exp_name
log_dir = './tb/%s/' % exp_name

os.makedirs(save_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)

In [None]:
def explanation_classification_model(bow_batch, visfeat_batch, bbox_score_batch, seq_length_batch,
    feature_hidden_dim, lstm_units, classifier_hidden_dim,
    scope='explanation_classifier', reuse=None):
    
    # concatenate all the features, and
    # use a fully-connected layer to map the features to a new dimension
    all_features = tf.concat([bow_batch, visfeat_batch, bbox_score_batch], axis=-1)
    all_features = tf.reshape(all_features, [-1, all_features.get_shape().as_list()[-1]])
    all_features_mapped = tf.layers.dense(all_features, feature_hidden_dim, activation=tf.nn.relu)
    all_features_mapped = tf.reshape(all_features_mapped, [T, -1, feature_hidden_dim])
    
    # feed the features into a LSTM
    cell = tf.contrib.rnn.BasicLSTMCell(lstm_units)
    _, state = tf.nn.dynamic_rnn(cell, all_features_mapped, sequence_length=seq_length_batch,
                             dtype=tf.float32, time_major=True)
    
    # the final classifier: a two-layer network
    embeddings = state.h
    embeddings_mapped = tf.layers.dense(embeddings, classifier_hidden_dim, activation=tf.nn.relu)
    scores = tf.layers.dense(embeddings_mapped, 1)
    scores = tf.reshape(scores, [-1])
    return scores

In [None]:
# the model
seq_length_batch = tf.placeholder(tf.int32, [None])

bow_batch = tf.placeholder(tf.float32, [T, None, bow_dim])
visfeat_batch = tf.placeholder(tf.float32, [T, None, visfeat_dim])
bbox_score_batch = tf.placeholder(tf.float32, [T, None, 1])

In [None]:
scores = explanation_classification_model(bow_batch, visfeat_batch,
    bbox_score_batch, seq_length_batch, feature_hidden_dim, lstm_units, classifier_hidden_dim)

In [None]:
# Ranking loss
# The scores can be reshaped to (N/2, 2), where in the last dimension 2, the
# first one is the positive explanation and the second one is the negative explanation
scores_reshaped = tf.reshape(scores, [-1, 2])
scores_pos = scores_reshaped[..., 0]
scores_neg = scores_reshaped[..., 1]
loss_rank = tf.reduce_mean(tf.nn.relu(scores_neg - scores_pos + margin))

# Regularization loss
regularization_vars = [v for v in tf.trainable_variables()
                       if v.op.name.endswith('kernel') or v.op.name.endswith('weights')]
print('variables for regularization:')
for v in regularization_vars:
    print(v.op.name)
loss_reg = weight_decay*tf.add_n([tf.nn.l2_loss(v) for v in regularization_vars])

train_op = tf.train.AdamOptimizer().minimize(loss_rank + loss_reg)

In [None]:
saver = tf.train.Saver(max_to_keep=None)

In [None]:
# Tensorboard logging
log_writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())
tf.summary.scalar("loss_rank", loss_rank)
log_op = tf.summary.merge_all()

In [None]:
sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))
sess.run(tf.global_variables_initializer())

In [None]:
# load data list
filelist = glob(data_filter)
np.random.shuffle(filelist)

num_batch = len(filelist)

In [None]:
# save snapshot
save_path = os.path.join(save_dir, '%08d' % 0)
saver.save(sess, save_path)
print('Model saved to %s' % save_path)

total_pos = 0
total_all = 0
for n_epoch in range(max_epoches):
    for n_batch in range(num_batch):
        n_iter = n_epoch*num_batch + n_batch
        
        batch = dict(np.load(filelist[n_batch]))
        
        # Randomly sample a subset of negative instances
        labels = batch['label_batch'].copy()
        labels_pos = np.nonzero(labels)[0]
        labels_neg = np.nonzero(~labels)[0]
        labels_neg = np.random.choice(
            labels_neg, size=len(labels_pos), replace=False)
        labels_new = np.concatenate((
                labels_pos.reshape((-1, 1)),
                labels_neg.reshape((-1, 1))), axis=-1).reshape(-1)
        batch['seq_length_batch'] = batch['seq_length_batch'][labels_new]
        batch['label_batch'] = batch['label_batch'][labels_new]
        batch['bow_batch'] = batch['bow_batch'][:, labels_new, :]
        batch['visfeat_batch'] = batch['visfeat_batch'][:, labels_new, :]
        batch['bbox_score_batch'] = batch['bbox_score_batch'][:, labels_new, :]
        
        print('batch size:', len(batch['seq_length_batch']))
        total_pos += np.sum(batch['label_batch'])
        total_all += batch['label_batch'].size
        print('pos ratio: %f = %d / %d' % (total_pos / total_all, total_pos, total_all))
        
        # just to check the label format
        labels = batch['label_batch'].reshape((-1, 2))
        assert(np.all(labels[..., 0]))
        assert(np.all(~labels[..., 1]))
        _, summary = sess.run((train_op, log_op),
                              {seq_length_batch: batch['seq_length_batch'],
                               bow_batch: batch['bow_batch'],
                               visfeat_batch: batch['visfeat_batch'],
                               bbox_score_batch: batch['bbox_score_batch']})
        print('epoch = %d, batch = %d / %d' % (n_epoch, n_batch, num_batch))
        log_writer.add_summary(summary, n_iter)

    # save snapshot
    save_path = os.path.join(save_dir, '%08d' % (n_epoch+1))
    saver.save(sess, save_path)
    print('Model saved to %s' % save_path)