In [28]:
from os import listdir
from os.path import join
from time import time
import json


import tensorflow as tf
import numpy as np


import skimage
from skimage.transform import resize
from scipy.misc import imread
from collections import Counter
from time import time

from utils import load_dataset, load_vocab, get_batch

def load_image(path):
    # load image
    img = imread(path,mode='RGB')
    img = img / 255.0
    assert (0 <= img).all() and (img <= 1.0).all()
    #print "Original Image Shape: ", img.shape
    # we crop image from center
    short_edge = min(img.shape[:2])
    yy = int((img.shape[0] - short_edge) / 2)
    xx = int((img.shape[1] - short_edge) / 2)
    crop_img = img[yy : yy + short_edge, xx : xx + short_edge]
    # resize to 224, 224
    resized_img = resize(crop_img, (224, 224))
    return resized_img

def extract_features(ids, path, output_path, extractor, batch_size=64):
    images_names = dict()
    for p in listdir(path):
        image_id = int(p.split('_')[-1].split('.')[0])
        if image_id in ids:
            images_names[image_id] = p
    batch,names = [],[]
    with open(output_path,'w') as output_file:
        for idx,n in enumerate(images_names):
            p = join(path, images_names[n])
            batch.append(load_image(p))
            names.append(n)
            if len(batch)==batch_size:
                batch = np.stack(batch)
                feed_dict = {images: batch}
                with tf.device('/gpu:0'):
                    features = sess.run(extractor, feed_dict=feed_dict)
                for n,f in zip(names,features):
                    output_file.write("%s;%s\n" % (n, " ".join(str(x) for x in f)))
                print("%d/%d" % (idx,len(images_names)))
                batch, names = [],[]
                output_file.flush()
                

Check if all the COCOQA images are in train/val MSCOCO

In [2]:
train_ids = set([int(j) for j in open('datasets/coco/train/img_ids.txt','r')])

source = '/home/hbenyounes/vqa/datasets/train2014'
image_ids = set()
for p in listdir(source):
    image_id = int(p.split('_')[-1].split('.')[0])
    image_ids.add(image_id)

print("Source: %s" % source)
print("Train : %d\nTrain absent from source : %d" % (len(train_ids), len(train_ids-image_ids)))


test_ids = set([int(j) for j in open('datasets/coco/test/img_ids.txt','r')])
source = '/home/hbenyounes/vqa/datasets/val2014'
image_ids = set()
for p in listdir(source):
    image_id = int(p.split('_')[-1].split('.')[0])
    image_ids.add(image_id)

print("Source: %s" % source)    
print("Test : %d\nTest absent from source : %d" % (len(test_ids), len(test_ids-image_ids)))

Source: /home/hbenyounes/vqa/datasets/train2014
Train : 46293
Train absent from source : 0
Source: /home/hbenyounes/vqa/datasets/val2014
Test : 22879
Test absent from source : 0


## Extract all the visual embeddings

In [3]:
with open("tensorflow-vgg16/vgg16.tfmodel", mode='rb') as f:
    fileContent = f.read()

with tf.device('/gpu:0'):
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(fileContent)

    images = tf.placeholder("float", [None, 224, 224, 3])

    tf.import_graph_def(graph_def, input_map={ "images": images })

    graph = tf.get_default_graph()
    out_tensor = graph.get_tensor_by_name("import/Relu_1:0")

In [4]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)

sess = tf.Session(config=tf.ConfigProto(log_device_placement=True,
                                        gpu_options=gpu_options))
init = tf.initialize_all_variables()
sess.run(init)

In [30]:
def extract_features(ids, path, output_path, extractor, batch_size=64):
    images_names = dict()
    for p in listdir(path):
        image_id = int(p.split('_')[-1].split('.')[0])
        if image_id in ids:
            images_names[image_id] = p
    batch,names = [],[]
    with open(output_path,'w') as output_file:
        for idx,n in enumerate(images_names):
            p = join(path, images_names[n])
            if len(batch)<batch_size:
                batch.append(load_image(p))
                names.append(n)
            else:
                batch = np.stack(batch)
                feed_dict = {images: batch}
                with tf.device('/gpu:0'):
                    features = sess.run(extractor, feed_dict=feed_dict)
                for n,f in zip(names,features):
                    output_file.write("%s;%s\n" % (n, " ".join(str(x) for x in f)))
                print("%d/%d" % (idx,len(images_names)))
                batch, names = [],[]
                output_file.flush()
                
                
extract_features(train_ids,'/home/hbenyounes/vqa/datasets/train2014', 
                 "/home/hbenyounes/vqa/datasets/coco/train/images.feat", out_tensor)

64/46293
129/46293
194/46293


KeyboardInterrupt: 

In [29]:
ids = train_ids 
path = '/home/hbenyounes/vqa/datasets/train2014'
output_path = "/home/hbenyounes/vqa/datasets/coco/train/images.feat" 
extractor = out_tensor
batch_size = 64
images_names = dict()
for p in listdir(path):
    image_id = int(p.split('_')[-1].split('.')[0])
    if image_id in ids:
        images_names[image_id] = p
batch,names = [],[]
with open(output_path,'w') as output_file:
    for idx,n in enumerate(images_names):
        p = join(path, images_names[n])
        if len(batch)<batch_size:
            im = load_image(p)
            if len(im.shape) == 2:
                print("IMSHAPE = 2")
                break
            batch.append(im)
            names.append(n)
        else:
            batch = np.stack(batch)
            feed_dict = {images: batch}
            with tf.device('/gpu:0'):
                features = sess.run(extractor, feed_dict=feed_dict)
            for n,f in zip(names,features):
                output_file.write("%s;%s\n" % (n, " ".join(str(x) for x in f)))
            print("%d/%d" % (idx,len(images_names)))
            batch, names = [],[]
            output_file.flush()

64/46293
129/46293
194/46293
259/46293
324/46293
389/46293


TypeError: unsupported operand type(s) for /: 'JpegImageFile' and 'float'

In [1]:
from os import listdir
from os.path import join
from time import time
import json


import tensorflow as tf
import numpy as np


import skimage
from skimage.transform import resize
from scipy.misc import imread
from collections import Counter
from time import time

from utils import load_dataset, load_vocab, get_batch

def read_features(path,n_max = np.Inf):
    feats = {}
    for n,l in enumerate(open(path,encoding='latin1')):
        l = l.strip().split(";")
        idx = l[0]
        feat = [float(x) for x in l[1].split()]
        feats[idx] = feat
        if n>n_max:
            break
    return feats

In [13]:
class Dataset(object):
    def __init__(self, f_path, i_path, q_path, a_path):
        self.f_path = f_path
        self.i_path = i_path
        self.q_path = q_path
        self.a_path = a_path
        print('Parse features file')
        f_data = read_features(self.f_path,n_max=50)
        print('Parse questions file')
        q_data = load_dataset(self.q_path)
        self.max_q = len(max(q_data, key=lambda x:len(x)))
        print('Parse answers file')
        a_data = load_dataset(self.a_path)
        self.data = []
        for q_id,q,a in zip(open(i_path),q_data,a_data):
            q_id = q_id.strip()
            try:
                f = f_data[q_id]
            except:
                continue
            datum = (f_data[q_id],q,a)
            self.data.append(datum)
        del f_data,q_data,a_data
        self.N = len(self.data)
        self.indexes = np.arange(self.N)
    
    def __iter__(self):
        return self
    
    def batch_gen(self,batch_size=64):
        batch = []
        np.random.shuffle(self.indexes)
        for idx in self.indexes:
            batch.append(self.data[self.indexes[idx]])
            if len(batch) == batch_size:
                yield batch
                batch = []
        if len(batch)>0:
            yield batch
        

f_path = "/home/hbenyounes/vqa/datasets/coco/test/images.feat"
i_path = "/home/hbenyounes/vqa/datasets/coco/test/img_ids.txt"
q_path = "/home/hbenyounes/vqa/datasets/coco/test/questions.idxs"
a_path = "/home/hbenyounes/vqa/datasets/coco/test/answers.idxs"

dataset = Dataset(f_path, i_path, q_path, a_path)

Parse features file
Parse questions file
Parse answers file


In [23]:
def create_feed_dict(batch,max_q,Na):
    batch_size = len(batch)
    V = np.zeros((batch_size, len(batch[0][0])), 'float32')
    Q = np.zeros((batch_size, max_q), 'int32')
    mask = np.zeros((max_q,batch_size), 'int32')
    ans = np.zeros((batch_size,Na),'int32')
    
    for i,(im,s,a) in enumerate(batch):
        V[i] = im
        Q[i] = np.pad(s, (0,max_q-len(s)), 'constant')
        mask[len(s)-1,i] = 1
        ans[i,a] = 1
    mask = mask[:,:,None]
    return V,Q,mask,ans

In [16]:
b_gen = dataset.batch_gen(64)

In [17]:
q_i2w, q_w2i = load_vocab('datasets/coco/train/questions.vocab')
a_i2w, a_w2i = load_vocab('datasets/coco/train/answers.vocab')
Nq = len(q_i2w)
Na = len(a_i2w)

In [19]:
for batch in b_gen:
    V,Q,mask,ans = create_feed_dict(batch,dataset.max_q,Na)

ValueError: cannot copy sequence with size 4096 to array axis with dimension 3