In [1]:
import os
import numpy as np
from glob import glob

In [2]:
vocab_file = './data/vocab.txt'

ann_file = './data/image_ann_dict_ranker_all.npz'
explanation_feat_dir = './data/explanation_feat/'
save_dir = './data/training_batches/'

T = 15

In [3]:
ann_dict = np.load(ann_file)['im_name2ann'][()]

In [4]:
# find the maximum number of noun phrases in the expression (used to determine the T in LSTM)
max_noun_phrase_num = 0
for _, ann in ann_dict.items():
    for explanation_info in ann:
        max_noun_phrase_num = max(max_noun_phrase_num, len(explanation_info['noun_phrases']))

print('maximum number of noun phrases:', max_noun_phrase_num)

maximum number of noun phrases: 13


In [5]:
with open(vocab_file) as f:
    vocab_list = [l.lower().strip() for l in f.readlines()]
num_vocab = len(vocab_list)
word2vocab_idx = {v: n_v for n_v, v in enumerate(vocab_list)}
print('number of words in vocab_list:', num_vocab)

number of words in vocab_list: 2632


In [6]:
import re

SENTENCE_SPLIT_REGEX = re.compile(r'(\W+)')
def tokenize(sentence):
    tokens = SENTENCE_SPLIT_REGEX.split(sentence.lower())
    tokens = [t.strip() for t in tokens if len(t.strip()) > 0]
    return tokens

def noun_phrases_to_bow(phrase, vocab_list):
    '''
    Construct bag-of-words features from a phrase
    '''
    bow = np.zeros(len(vocab_list), np.float32)
    words = tokenize(phrase)
    for w in words:
        if w in word2vocab_idx:
            bow[word2vocab_idx[w]] += 1
    return bow

In [7]:
for im_name, ann in ann_dict.items():
    N = len(ann)
    
    seq_length_batch = np.zeros(N, np.int32)
    label_batch = np.zeros(N, np.bool)
    
    bow_batch = np.zeros((T, N, num_vocab), np.float32)
    visfeat_batch = np.zeros((T, N, 4096 + 8), np.float32)
    bbox_score_batch = np.zeros((T, N, 1), np.float32)
    
    explanation_feat_file = os.path.join(explanation_feat_dir, im_name.replace('.jpg', '.npz'))
    # we've ensured that all files exist in image_ann_dict_ranker_all.npz
#     if not os.path.exists(explanation_feat_file):
#         print('file not exist:', explanation_feat_file)
#         continue
    explanation_feat = np.load(explanation_feat_file)['explanation_feat'][()]
    
    query_dict = explanation_feat['query_dict']
    for n, explanation_info in enumerate(ann):
        label_batch[n] = explanation_info['label']
        seq_length_batch[n] = 1 + len(explanation_info['noun_phrases'])
        
        # Put the whole sentence feature at the beginning
        bow_batch[0, n] = noun_phrases_to_bow(explanation_info['explanation'], vocab_list)
        visfeat_batch[0, n]  = explanation_feat['im_visfeat']
        bbox_score_batch[0, n] = 0  # assign 0 as scores of every sentence
        
        for t, p in enumerate(explanation_info['noun_phrases']):
            phrase = p['phrase']
            bow_batch[t+1, n] = noun_phrases_to_bow(phrase, vocab_list)
            visfeat_batch[t+1, n] = query_dict[phrase]['visfeat']
            bbox_score_batch[t+1, n] = query_dict[phrase]['score']
    
    save_file = os.path.join(save_dir, im_name.replace('.jpg', '.npz'))
    os.makedirs(os.path.dirname(save_file), exist_ok=True)
    
    np.savez(save_file,
             seq_length_batch=seq_length_batch,
             label_batch=label_batch,
             bow_batch=bow_batch,
             visfeat_batch=visfeat_batch,
             bbox_score_batch=bbox_score_batch)