In [145]:
%load_ext autoreload
%autoreload 2
from os import listdir, mkdir
from os.path import join, exists
from time import time
import json


import tensorflow as tf
import numpy as np


import skimage
from skimage.transform import resize
from scipy.misc import imread
from collections import Counter
from time import time

from utils import load_dataset, load_vocab,read_features, Dataset, load_emb_matrix

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [173]:
class VisBOW(object):
    def __init__(self, hyperparams):
        self.dq = hyperparams['dq']
        self.da = hyperparams['da']
        self.di = hyperparams['di']
        self.Nq = hyperparams['Nq']
        self.Na = hyperparams['Na']
        self.trainable_embeddings = hyperparams['trainable_embeddings']
        
        with tf.device('/cpu:0'):
            self.qemb_W = tf.get_variable('qemb_w',
                                          initializer=tf.random_uniform([self.Nq, self.dq], -0.1, 0.1),
                                          trainable = self.trainable_embeddings)
        
        self.W = tf.get_variable('W',shape = [self.dq + self.di, self.Na],
                                 initializer=tf.contrib.layers.xavier_initializer())
        self.b = tf.get_variable('b',
                                 initializer=tf.zeros_initializer([self.Na]))
    def build_model(self):
        
        p_image = tf.placeholder(tf.float32,
                                [None, self.di],
                                 name="p_image")        
        p_question = tf.placeholder(tf.int32, 
                                    [None, None],
                                    name="p_question")
        p_answer = tf.placeholder(tf.float32, 
                                  [None,self.Na],
                                  name="p_answer")
        with tf.device('/cpu:0'):
            question_seq = tf.nn.embedding_lookup(self.qemb_W, p_question)
            question_emb = tf.reduce_sum(question_seq, reduction_indices=[1])
        concat = tf.concat(1, [p_image, question_emb])
        answer_logits = tf.nn.xw_plus_b(concat,self.W,self.b)
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(answer_logits, p_answer) # (batch_size, )
        
        answer_pred = tf.argmax(answer_logits,1)
        loss = tf.reduce_mean(cross_entropy)
        loss_s = tf.scalar_summary('train loss', loss, name='train_loss')
        train_op = tf.train.AdamOptimizer().minimize(loss)
        
        
        p_acc = tf.placeholder(tf.float32,name='acc',shape=None)
        test_acc_s = tf.scalar_summary("test acc",p_acc,name="test_acc")

        output = {'train_op':train_op,
                 'loss':loss,
                  'loss_s':loss_s,
                 'question':p_question,
                 'answer':p_answer,
                  'acc':p_acc,
                  'acc_s':test_acc_s,
                 'answer_pred':answer_pred,
                 'image':p_image}
        return output
    

In [3]:
train_set = Dataset("/home/hbenyounes/vqa/datasets/coco/train/images.feat",
                    "/home/hbenyounes/vqa/datasets/coco/train/img_ids.txt",
                    "/home/hbenyounes/vqa/datasets/coco/train/questions.idxs",
                    "/home/hbenyounes/vqa/datasets/coco/train/answers.idxs")


test_set = Dataset("/home/hbenyounes/vqa/datasets/coco/test/images.feat",
                    "/home/hbenyounes/vqa/datasets/coco/test/img_ids.txt",
                    "/home/hbenyounes/vqa/datasets/coco/test/questions.idxs",
                    "/home/hbenyounes/vqa/datasets/coco/test/answers.idxs")

q_i2w, q_w2i = load_vocab('datasets/coco/train/questions.vocab')
a_i2w, a_w2i = load_vocab('datasets/coco/train/answers.vocab')

Parse features file
Parse questions file
Parse answers file
Parse features file
Parse questions file
Parse answers file


In [174]:
print("Graph initialization")
model_name = "model8"
root_path = "/home/hbenyounes/vqa/visbow/"
embedding_path = '/home/hbenyounes/vqa/GoogleNews.model'
if not exists(join(root_path, model_name)):
    mkdir(join(root_path, model_name))
vector_size = 600
max_q = train_set.max_q
H = {"dq":vector_size,
     "da":vector_size/2, 
     "di":4096,
     "Nq":len(q_i2w),
     "Na":len(a_i2w),
     "batch_size":200,
     "trainable_embeddings":True,
     "word2vec":False}
tf.reset_default_graph()
model = VisBOW(H)
M = model.build_model()
if H['word2vec']:
    q_i2w, q_w2i = load_vocab('datasets/coco/train/questions.vocab')
    print("Load word2Vec")
    embeddings = {}
    for n,l in enumerate(open(embedding_path,encoding='utf-8')):
        l = l.strip().split()
        w = l[0]
        vec = [float(x) for x in l[1:]]
        embeddings[w] = vec
    emb,c = load_emb_matrix(q_i2w, embeddings, vector_size)
    del embeddings
    
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options, 
                                                   intra_op_parallelism_threads=1))

saver = tf.train.Saver(max_to_keep=100)
writer = tf.train.SummaryWriter(join(root_path,model_name,'tf_log'), sess.graph)

init = tf.initialize_all_variables()
sess.run(init)

if H['word2vec']:
    sess.run(model.qemb_W.assign(emb))
n_parameters = sum( [np.prod(v.get_shape(),dtype='int') for v in tf.trainable_variables()])
H['n_parameters'] = n_parameters

Graph initialization


In [175]:
def create_feed_dict(batch,Na,batch_size,max_q=None):
    if max_q is None:
        max_q = max([len(b[1]) for b in batch])
    V = np.zeros((batch_size, len(batch[0][0])), 'float32')
    Q = np.zeros((batch_size, max_q), 'int32')
    ans = np.zeros((batch_size,Na),'int32')
    
    for i,(im,s,a) in enumerate(batch):
        V[i] = im
        Q[i] = np.pad(s, (0,max_q-len(s)), 'constant')
        ans[i,a] = 1
    return V,Q,ans

def test(verbose=None):
    acc = []
    test_batches = test_set.batch_gen(H['batch_size'])
    for idx,batch in enumerate(test_batches):    
        if verbose:
            if idx%20==0:
                print("%d - accuracy = %1.3f"%(idx, np.mean(acc)))
        V,Q,ans = create_feed_dict(batch,H['Na'],H['batch_size'])
        a_pred = sess.run(M['answer_pred'], 
                          feed_dict={M['question']:Q,
                                     M['answer']:ans,
                                     M['image']:V})
        equals = 1*np.equal(ans.argmax(axis=1),a_pred)
        equals = list(equals[:len(batch)])
        acc += equals
    return acc

In [176]:
break_all = False
ct = 0
toc = time()
with tf.device('/gpu:0'):
    n_epochs = 100
    max_test_acc = -np.Inf
    patience = 0
    for epoch in range(0,n_epochs):
        epoch_loss = []
        times = 0.
        times_fd = 0.
        n_batches = train_set.N // H['batch_size']
        train_batches = train_set.batch_gen(H['batch_size'])
        for idx,batch in enumerate(train_batches):
            step = idx + epoch*n_batches
            tic = time()
            if idx%(n_batches//10)==0:
                ct += 1
                print("Epoch %d - %d/%d : loss = %1.4f - elapsed = %1.3fs "%(epoch,idx,
                                       n_batches,
                                       np.mean(epoch_loss),
                                       times))
                print("time sess.run = %1.3fs" % times_fd)
                times_fd = 0.
            V,Q,ans = create_feed_dict(batch,H['Na'],H['batch_size'])
            tic_fd = time()
            _,l,l_s = sess.run([M['train_op'],
                                M['loss'],
                                M['loss_s']], 
                                   feed_dict={M['question']:Q,
                                      M['answer']:ans,
                                      M['image']:V})
            toc_fd = time()
            times_fd += toc_fd - tic_fd
            writer.add_summary(l_s,step)
            
            if np.isnan(l):
                break_all = True
            epoch_loss.append(l)
            toc = time()
            times += toc - tic
            if break_all:
                print("Loss is nan at iteration %d" % (idx+n_batches*epoch))
                break
        if break_all:
            break
        with tf.device('/cpu:0'):
            acc = np.mean(test())
            test_acc_s = sess.run(M['acc_s'], feed_dict={M['acc']:acc})
            writer.add_summary(test_acc_s,step)
            print("Epoch %d - Test accuracy = %1.3f" % (epoch+1, acc))
        if acc > max_test_acc:
            patience += 3
            saver.save(sess, join(root_path,model_name,'model'), global_step=epoch)
        max_test_acc = max(acc, max_test_acc)
        if epoch >= patience:
            print("EARLY STOPPING")
            break
            
H['max_test_acc'] = max_test_acc
with open(join(root_path, model_name, 'hyperparams'),'w') as f:
    for h in H:
        f.write("%s = %s\n" % (h, str(H[h])))
    f.write('\n\nMaximal test accuracy = %1.4f\n' % max_test_acc)



Epoch 0 - 0/393 : loss = nan - elapsed = 0.000s 
time sess.run = 0.000s
Epoch 0 - 39/393 : loss = 4.9059 - elapsed = 5.841s 
time sess.run = 3.948s
Epoch 0 - 78/393 : loss = 4.2566 - elapsed = 11.491s 
time sess.run = 3.848s
Epoch 0 - 117/393 : loss = 3.9164 - elapsed = 17.333s 
time sess.run = 3.859s
Epoch 0 - 156/393 : loss = 3.6988 - elapsed = 23.133s 
time sess.run = 3.972s
Epoch 0 - 195/393 : loss = 3.5620 - elapsed = 28.963s 
time sess.run = 3.897s
Epoch 0 - 234/393 : loss = 3.4541 - elapsed = 35.031s 
time sess.run = 4.129s
Epoch 0 - 273/393 : loss = 3.3617 - elapsed = 41.243s 
time sess.run = 4.235s
Epoch 0 - 312/393 : loss = 3.2812 - elapsed = 47.451s 
time sess.run = 4.263s
Epoch 0 - 351/393 : loss = 3.2175 - elapsed = 53.727s 
time sess.run = 4.323s
Epoch 0 - 390/393 : loss = 3.1640 - elapsed = 59.938s 
time sess.run = 4.260s
Epoch 1 - Test accuracy = 0.442
Epoch 1 - 0/393 : loss = nan - elapsed = 0.000s 
time sess.run = 0.000s
Epoch 1 - 39/393 : loss = 1.3328 - elapsed = 5.

In [169]:
H['max_test_acc'] = max_test_acc
with open(join(root_path, model_name, 'hyperparams'),'w') as f:
    for h in H:
        f.write("%s = %s\n" % (h, str(H[h])))
    f.write('\n\nMaximal test accuracy = %1.4f\n' % max_test_acc)