# Exploring Visual Genome

In [1]:
! ln -s /usr/local/courses/lt2318/data/ data

In [6]:
! ls data/visual_genome/

relationships.json  relationships.json.zip


### Load python libraries

In [4]:
import json
from collections import Counter, defaultdict

### Load/read the dataset

In [41]:
# read from file
relationships_from_file = json.load(open('data/visual_genome/relationships.json'))

# name/names correction for reading content of nodes in the dataset
name_extract = lambda x: x['names'][0].lower() if 'names' in x and len(x['names']) else x['name'].lower() if 'name' in x else '' 

In [43]:
print('number of images:', len(relationships_from_file))

number of images: 108077


In [48]:
image_data_from_file = json.load(open('data/visual_genome/image_data.json'))
image_id_to_size = {
    image_data['image_id']: (image_data['width'], image_data['height'])
    for image_data in image_data_from_file
}

In [49]:
image_id_to_size

{1: (800, 600),
 2: (800, 600),
 3: (640, 480),
 4: (640, 480),
 5: (800, 600),
 6: (800, 600),
 7: (800, 600),
 8: (800, 600),
 9: (640, 480),
 10: (800, 600),
 11: (800, 600),
 12: (800, 600),
 13: (800, 600),
 14: (800, 600),
 15: (800, 600),
 16: (800, 600),
 17: (800, 640),
 18: (800, 600),
 19: (800, 600),
 20: (800, 600),
 21: (256, 256),
 22: (800, 600),
 23: (800, 600),
 24: (800, 600),
 25: (800, 600),
 26: (800, 600),
 27: (800, 600),
 28: (800, 600),
 29: (800, 600),
 30: (800, 600),
 31: (800, 600),
 32: (800, 600),
 33: (800, 600),
 34: (800, 600),
 35: (800, 600),
 36: (800, 600),
 37: (800, 600),
 38: (782, 800),
 39: (800, 534),
 40: (560, 800),
 41: (800, 600),
 42: (800, 600),
 43: (800, 600),
 44: (800, 600),
 45: (800, 600),
 46: (800, 600),
 47: (800, 600),
 48: (800, 600),
 49: (600, 800),
 50: (800, 600),
 51: (800, 533),
 52: (800, 600),
 53: (800, 534),
 54: (800, 600),
 55: (800, 600),
 56: (800, 600),
 57: (800, 600),
 58: (800, 600),
 59: (800, 600),
 60: (

In [57]:
def get_description(relations_data):
    triplet = (
        name_extract(relation_data['subject']),
        relation_data['predicate'].lower(), # synset?
        name_extract(relation_data['object']),
    )
    
    return triplet

def get_bboxes(relations_data, size):
    w0, h0 = size
    def normalize(bbox):
        x, y, w, h = bbox
        return (x/w0, y/h0, w/w0, h/h0, )
    
    bboxes = (
        normalize([relation_data['subject'][d] for d in ['x', 'y', 'w', 'h', ]]),
        normalize([relation_data['object'][d] for d in ['x', 'y', 'w', 'h', ]]),
    )
    
    return bboxes

In [63]:
for img_relations in relationships_from_file:
    for relation_data in img_relations['relationships']:
        print(get_description(relation_data))
        bbox1, bbox2 = get_bboxes(relation_data, image_id_to_size[img_relations['image_id']])
        print("{0:.2f} {1:.2f} {2:.2f} {3:.2f}".format(*bbox1))
        print("{0:.2f} {1:.2f} {2:.2f} {3:.2f}".format(*bbox2))
        
    break

('shade', 'on', 'sidewalk')
0.15 0.56 0.34 0.32
0.10 0.51 0.90 0.48
('man', 'wears', 'shoes')
0.30 0.41 0.07 0.44
0.48 0.81 0.06 0.05
('car', 'has', 'headlight')
0.60 0.53 0.09 0.16
0.64 0.61 0.03 0.03
('sign', 'on', 'building')
0.15 0.02 0.11 0.30
0.00 0.00 0.27 0.89
('tree trunk', 'on', 'sidewalk')
0.78 0.39 0.11 0.55
0.10 0.55 0.90 0.44
('man', 'has', 'shirt')
0.30 0.41 0.07 0.44
0.30 0.48 0.07 0.17
('sidewalk', 'next to', 'street')
0.10 0.55 0.90 0.44
0.45 0.47 0.55 0.39
('car', 'has', 'back')
0.89 0.57 0.11 0.29
0.90 0.56 0.08 0.28
('man', 'has', 'glasses')
0.30 0.41 0.07 0.44
0.34 0.45 0.03 0.02
('parking meter', 'on', 'sidewalk')
0.72 0.55 0.04 0.24
0.10 0.55 0.90 0.44
('man', 'wears', 'shoes')
0.30 0.41 0.07 0.44
0.48 0.81 0.06 0.05
('man', 'has', 'shoes')
0.47 0.44 0.09 0.42
0.49 0.80 0.06 0.06
('man', 'has', 'shirt')
0.30 0.41 0.07 0.44
0.30 0.48 0.07 0.17
('man', 'wears', 'pants')
0.30 0.41 0.07 0.44
0.31 0.64 0.05 0.20
('man', 'has', 'jacket')
0.47 0.44 0.09 0.42
0.45 0.49 

### Ramisa et al. (2015) test

http://www.aclweb.org/anthology/D/D15/D15-1022.pdf

http://www.aclweb.org/anthology/attachments/D/D15/D15-1022.Attachment.pdf


In [64]:
bbox1, bbox2

([0.00125, 0.0033333333333333335, 0.2725, 0.8933333333333333],
 [0.7525, 0.006666666666666667, 0.21625, 0.24666666666666667])