In [1]:
%pylab inline
from sklearn.datasets import load_wine
import pandas as pd
import os
import json

Populating the interactive namespace from numpy and matplotlib


In [2]:
def load_json(path):
    with open(path) as f:
        return json.load(f)

## Get all image ids

In [3]:
img_path = os.path.join('data', 'images')
img_names = next(os.walk(img_path))[2]
img_ids = [int(img_name.split('.')[0].split('_')[2])
           for img_name in img_names]
img_ids.sort()
len(img_ids)

2809

## Get questions

In [89]:
question_path = os.path.join('data', 'coco_questions.json')
with open(question_path) as f:
    questions_data = json.load(f)
    questions = questions_data['questions']

## Filter questions by having images

In [90]:
questions.sort(key=lambda x: x['image_id'])
filtered_questions = list(filter(lambda x: x['image_id'] in img_ids, questions))

question_ids = [question['question_id'] for question in filtered_questions]
len(filtered_questions), len(img_ids)

(14869, 2809)

## Get answers

In [3]:
answer_path = os.path.join('data', 'coco_answers.json')
with open(answer_path) as f:
    answers_data = json.load(f)
answers_data.keys()

dict_keys(['info', 'license', 'data_subtype', 'annotations', 'data_type'])

In [9]:
x = {a['answer_type'] for a in answers_data['annotations']}

In [10]:
x

{'number', 'other', 'yes/no'}

In [11]:
bool_answers = [answer
    for answer in answers_data['annotations']
    if answer['multiple_choice_answer'] in {'yes', 'no'}
]

In [13]:
bool_answers[0]

{'answer_type': 'yes/no',
 'answers': [{'answer': 'yes', 'answer_confidence': 'yes', 'answer_id': 1},
  {'answer': 'yes', 'answer_confidence': 'yes', 'answer_id': 2},
  {'answer': 'yes', 'answer_confidence': 'maybe', 'answer_id': 3},
  {'answer': 'yes', 'answer_confidence': 'yes', 'answer_id': 4},
  {'answer': 'yes', 'answer_confidence': 'maybe', 'answer_id': 5},
  {'answer': 'no', 'answer_confidence': 'maybe', 'answer_id': 6},
  {'answer': 'yes', 'answer_confidence': 'yes', 'answer_id': 7},
  {'answer': 'yes', 'answer_confidence': 'yes', 'answer_id': 8},
  {'answer': 'yes', 'answer_confidence': 'yes', 'answer_id': 9},
  {'answer': 'yes', 'answer_confidence': 'maybe', 'answer_id': 10}],
 'image_id': 458752,
 'multiple_choice_answer': 'yes',
 'question_id': 458752003,
 'question_type': 'is this'}

In [None]:
res = []
for a in bool_answers:
    a[]

In [77]:
filtered_answers = list(filter(lambda x: x['image_id'] in img_ids, answers_data['annotations']))
filtered_bool_answers = [answer
    for answer in filtered_answers
    if answer['multiple_choice_answer'] in {'yes', 'no'}
]
len(bool_answers)

5485

## Captions

In [4]:
captions = load_json('data/captions_train2014.json')
filtered_captions = list(filter(lambda x: x['image_id'] in img_ids, captions['annotations']))
filtered_captions.sort(key=lambda x: x['image_id'])

In [17]:
len(filtered_captions)

14053

In [83]:
caption_df = pd.DataFrame(captions['annotations'])

In [84]:
joint_captions = caption_df.groupby('image_id').caption.apply(lambda x: ''.join(x.values))
joint_captions.apply(len).median()

259.0

In [88]:
joint_captions.shape

(82783,)

## To dataframe

In [91]:
drop_columns = ['answer_type', 'answers', 'question_type']

bool_answers_df = pd.DataFrame(bool_answers).drop(drop_columns, axis=1)
bool_answers_df.sort_values('image_id')

questions_df = pd.DataFrame(questions)

q_indexed = questions_df.set_index('question_id')
ba_indexed = bool_answers_df.set_index('question_id')

In [92]:
joint = q_indexed[['question']].join(ba_indexed[['multiple_choice_answer']])
joint = joint.dropna()

joint = joint.join(ba_indexed[['image_id']])
joint = joint.reset_index().set_index('image_id')
joint = joint.join(joint_captions)

joint.rename({'multiple_choice_answer': 'is_yes'}, axis=1, inplace=True)
joint.is_yes = joint.is_yes.apply(lambda x: x == 'yes')
print(joint.shape)
joint.reset_index().head(5)

(167494, 4)


Unnamed: 0,image_id,question_id,question,is_yes,caption
0,25,25002,Could this photo be from a zoo?,True,A giraffe eating food from the top of the tree...
1,25,25003,Are the animals eating?,True,A giraffe eating food from the top of the tree...
2,25,25005,Is there a zebra?,False,A giraffe eating food from the top of the tree...
3,25,25007,Is the giraffe eating the tree?,True,A giraffe eating food from the top of the tree...
4,25,25008,Are both giraffes standing?,False,A giraffe eating food from the top of the tree...


## Save data

In [93]:
joint.to_csv(os.path.join('data', 'joint.csv'))

In [9]:
with open(os.path.join('data', 'filtered_questions.json'), 'w') as f:
    json.dump(filtered_questions, f)

with open(os.path.join('data', 'filtered_answers.json'), 'w') as f:
    json.dump(filtered_answers, f)

joint_captions.to_csv(os.path.join('data', 'filtered_joint_captions.csv'))

  import sys
