In [1]:
from os import listdir, mkdir
from os.path import join, exists
from time import time
import json


import tensorflow as tf
import numpy as np


import skimage
from skimage.transform import resize
from scipy.misc import imread
from collections import Counter
from time import time

from utils import load_dataset, load_vocab,read_features, Dataset, load_emb_matrix

In [3]:
class VisBOW(object):
    def __init__(self, hyperparams):
        self.dq = hyperparams['dq']
        self.da = hyperparams['da']
        self.di = hyperparams['di']
        self.Nq = hyperparams['Nq']
        self.Na = hyperparams['Na']
        self.trainable_embeddings = hyperparams['trainable_embeddings']
        
        with tf.device('/cpu:0'):
            self.qemb_W = tf.get_variable('qemb_w',
                                          initializer=tf.random_uniform([self.Nq, self.dq], -0.1, 0.1),
                                          trainable = self.trainable_embeddings)
        
        self.W = tf.get_variable('W',
                                 initializer=tf.random_uniform([self.dq + self.di, self.Na], -0.1, 0.1))
        self.b = tf.get_variable('b',
                                 initializer=tf.random_uniform([self.Na], 
                                                               -0.1, 0.1))
    def build_model(self):
        
        p_image = tf.placeholder(tf.float32,
                                [None, self.di],
                                 name="p_image")        
        p_question = tf.placeholder(tf.int32, 
                                    [None, None],
                                    name="p_question")
        p_answer = tf.placeholder(tf.float32, 
                                  [None,self.Na],
                                  name="p_answer")
        with tf.device('/cpu:0'):
            question_seq = tf.nn.embedding_lookup(self.qemb_W, p_question)
            question_emb = tf.reduce_mean(question_seq, reduction_indices=[1])
        concat = tf.concat(1, [p_image, question_emb])
        answer_logits = tf.nn.xw_plus_b(concat,self.W,self.b)
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(answer_logits, p_answer) # (batch_size, )
        
        answer_pred = tf.argmax(answer_logits,1)
        loss = tf.reduce_mean(cross_entropy)
        train_op = tf.train.AdamOptimizer().minimize(loss)
        output = {'train_op':train_op,
                 'loss':loss,
                 'question':p_question,
                 'answer':p_answer,
                 'answer_pred':answer_pred,
                 'image':p_image}
        return output
    

In [4]:
train_set = Dataset("/home/hbenyounes/vqa/datasets/coco/train/images.feat",
                    "/home/hbenyounes/vqa/datasets/coco/train/img_ids.txt",
                    "/home/hbenyounes/vqa/datasets/coco/train/questions.idxs",
                    "/home/hbenyounes/vqa/datasets/coco/train/answers.idxs", n_max=100)


test_set = Dataset("/home/hbenyounes/vqa/datasets/coco/test/images.feat",
                    "/home/hbenyounes/vqa/datasets/coco/test/img_ids.txt",
                    "/home/hbenyounes/vqa/datasets/coco/test/questions.idxs",
                    "/home/hbenyounes/vqa/datasets/coco/test/answers.idxs", n_max=100)

q_i2w, q_w2i = load_vocab('datasets/coco/train/questions.vocab')
a_i2w, a_w2i = load_vocab('datasets/coco/train/answers.vocab')

Parse features file
Parse questions file
Parse answers file
Parse features file
Parse questions file
Parse answers file


In [29]:
print("Graph initialization")
model_name = "model1"
root_path = "/home/hbenyounes/vqa/visbow/"
if not exists(join(root_path, model_name)):
    mkdir(join(root_path, model_name))
vector_size = 300
max_q = train_set.max_q
H = {"dq":vector_size,
               "da":200, 
               "di":4096,
               "Nq":len(q_i2w),
               "Na":len(a_i2w),
               "batch_size":5,
               "keep_prob":0.5,
               "trainable_embeddings":True,
               "word2vec":False}
tf.reset_default_graph()
model = VisBOW(H)
M = model.build_model()
if H['word2vec']:
    q_i2w, q_w2i = load_vocab('datasets/coco/train/questions.vocab')
    print("Load word2Vec")
    embeddings = {}
    for n,l in enumerate(open(embedding_path,encoding='utf-8')):
        l = l.strip().split()
        w = l[0]
        vec = [float(x) for x in l[1:]]
        embeddings[w] = vec
    emb,c = load_emb_matrix(q_i2w, embeddings)
    del embeddings
    
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options, 
                                                   intra_op_parallelism_threads=1))

saver = tf.train.Saver(max_to_keep=100)
writer = tf.train.SummaryWriter(join(root_path,model_name,'tf_log'), sess.graph)

init = tf.initialize_all_variables()
sess.run(init)

Graph initialization


In [30]:
def create_feed_dict(batch,Na,batch_size,max_q=None):
    if max_q is None:
        max_q = max([len(b[1]) for b in batch])
    V = np.zeros((batch_size, len(batch[0][0])), 'float32')
    Q = np.zeros((batch_size, max_q), 'int32')
    ans = np.zeros((batch_size,Na),'int32')
    
    for i,(im,s,a) in enumerate(batch):
        V[i] = im
        Q[i] = np.pad(s, (0,max_q-len(s)), 'constant')
        ans[i,a] = 1
    return V,Q,ans

def test(verbose=None):
    acc = []
    test_batches = test_set.batch_gen(H['batch_size'])
    for idx,batch in enumerate(test_batches):    
        if verbose:
            if idx%20==0:
                print("%d - accuracy = %1.3f"%(idx, np.mean(acc)))
        V,Q,ans = create_feed_dict(batch,H['Na'],H['batch_size'])
        a_pred = sess.run(M['answer_pred'], 
                          feed_dict={M['question']:Q,
                                     M['answer']:ans,
                                     M['image']:V})
        equals = 1*np.equal(ans.argmax(axis=1),a_pred)
        equals = list(equals[:len(batch)])
        acc += equals
    return np.mean(acc)

In [31]:
break_all = False
with tf.device('/gpu:0'):
    n_epochs = 50
    max_test_acc = -np.Inf
    patience = 3
    for epoch in range(n_epochs):
        epoch_loss = []
        times = 0.
        n_batches = train_set.N // H['batch_size']
        train_batches = train_set.batch_gen(H['batch_size'])
        for idx,batch in enumerate(train_batches):
            step = idx + epoch*n_batches
            tic = time()
            if idx%(n_batches//10)==0:
                print("Epoch %d - %d/%d : loss = %1.4f - time = %1.3fs"%(epoch,idx,
                                                                         n_batches,np.mean(epoch_loss),
                                                                         times))
            V,Q,ans = create_feed_dict(batch,H['Na'],H['batch_size'])
            _,l = sess.run([M['train_op'],
                            M['loss']], 
                           feed_dict={M['question']:Q,
                                      M['answer']:ans,
                                      M['image']:V})
            l_s = tf.scalar_summary('train loss', l, name='train_loss')
            l_s = sess.run([l_s])
            writer.add_summary(l_s,step)
            
            if np.isnan(l):
                break_all = True
            epoch_loss.append(l)
            times += time() - tic
            if break_all:
                print("Loss is nan at iteration %d" % (idx+n_batches*epoch))
                break
        if break_all:
            break
        with tf.device('/cpu:0'):
            test_acc = test()
            test_acc_s = tf.scalar_summary("test acc",test_acc,name="test acc")
            test_acc_s = sess.run([test_acc_s])
            writer.add_summary(test_acc_s,step)
            print("Epoch %d - Test accuracy = %1.3f" % (epoch+1, test_acc))
        if test_acc > max_test_acc:
            patience += 3
            saver.save(sess, join(root_path,model_name,'model'), global_step=epoch)
        max_test_acc = max(test_acc, max_test_acc)
        if epoch >= patience:
            print("EARLY STOPPING")
            break

Epoch 0 - 0/36 : loss = nan - time = 0.000s




InvalidArgumentError: Cannot assign a device to node 'train_loss': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
	 [[Node: train_loss = ScalarSummary[T=DT_FLOAT, _device="/device:GPU:0"](train_loss/tags, train_loss/values)]]
Caused by op 'train_loss', defined at:
  File "/usr/lib/python3.4/runpy.py", line 170, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.4/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.4/dist-packages/traitlets/config/application.py", line 596, in launch_instance
    app.start()
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelapp.py", line 442, in start
    ioloop.IOLoop.instance().start()
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python3.4/dist-packages/tornado/ioloop.py", line 883, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python3.4/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 391, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/ipkernel.py", line 199, in do_execute
    shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2723, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2825, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2885, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-31-774641ec7895>", line 24, in <module>
    l_s = tf.scalar_summary('train loss', l, name='train_loss')
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/logging_ops.py", line 316, in scalar_summary
    val = gen_logging_ops._scalar_summary(tags=tags, values=values, name=scope)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/gen_logging_ops.py", line 242, in _scalar_summary
    name=name)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 2260, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 1230, in __init__
    self._traceback = _extract_stack()


In [36]:
l_s = tf.scalar_summary('train loss', l, name='train_loss')
l_s = sess.run(l_s)
writer.add_summary(l_s,step)

TypeError: Parameter to MergeFrom() must be instance of same class: expected Summary got list. for field Event.summary