# COCO data preprocessing

This code will download the caption anotations for coco and preprocess them into an hdf5 file and a json file. 

These will then be read by the COCO data loader in Lua and trained on.

In [1]:
# lets download the annotations from http://mscoco.org/dataset/#download
import os
os.system('wget http://msvocds.blob.core.windows.net/annotations-1-0-3/captions_train-val2014.zip') # ~19MB

0

In [2]:
os.system('unzip captions_train-val2014.zip')

0

In [1]:
import json
val = json.load(open('annotations/captions_val2014.json', 'r'))
train = json.load(open('annotations/captions_train2014.json', 'r'))

In [13]:
val.keys()

dict_keys(['info', 'images', 'licenses', 'annotations'])

In [14]:
val['info']

{'contributor': 'Microsoft COCO group',
 'date_created': '2015-01-27 09:11:52.357475',
 'description': 'This is stable 1.0 version of the 2014 MS COCO dataset.',
 'url': 'http://mscoco.org',
 'version': '1.0',
 'year': 2014}

In [16]:
len(val['images'])

40504

In [17]:
len(val['annotations'])

202654

In [9]:
train['images'][0]

{'coco_url': 'http://mscoco.org/images/57870',
 'date_captured': '2013-11-14 16:28:13',
 'file_name': 'COCO_train2014_000000057870.jpg',
 'flickr_url': 'http://farm4.staticflickr.com/3153/2970773875_164f0c0b83_z.jpg',
 'height': 480,
 'id': 57870,
 'license': 5,
 'width': 640}

In [18]:
val['images'][0]

{'coco_url': 'http://mscoco.org/images/391895',
 'date_captured': '2013-11-14 11:18:45',
 'file_name': 'COCO_val2014_000000391895.jpg',
 'flickr_url': 'http://farm9.staticflickr.com/8186/8119368305_4e622c8349_z.jpg',
 'height': 360,
 'id': 391895,
 'license': 3,
 'width': 640}

In [15]:
val['annotations'][:4]

[{'caption': 'A bicycle replica with a clock as the front wheel.',
  'id': 37,
  'image_id': 203564},
 {'caption': 'A black Honda motorcycle parked in front of a garage.',
  'id': 38,
  'image_id': 179765},
 {'caption': 'A room with blue walls and a white sink and door.',
  'id': 49,
  'image_id': 322141},
 {'caption': 'A car that seems to be parked illegally behind a legally parked car',
  'id': 89,
  'image_id': 16977}]

In [11]:
import json
import os

# combine all images and annotations together
imgs = val['images'] + train['images']
annots = val['annotations'] + train['annotations']

# for efficiency lets group annotations by image
itoa = {}
for a in annots:
    imgid = a['image_id']
    if not imgid in itoa: itoa[imgid] = []
#     所有图片ID下的描述
    itoa[imgid].append(a)

# create the json blob
out = []
for i,img in enumerate(imgs):
    imgid = img['id']
    
    # coco specific here, they store train/val images separately
    loc = 'train2014' if 'train' in img['file_name'] else 'val2014'
    
    jimg = {}
    jimg['file_path'] = os.path.join(loc, img['file_name'])
    jimg['id'] = imgid
    
    sents = []
    annotsi = itoa[imgid]
    for a in annotsi:
        sents.append(a['caption'])
    jimg['captions'] = sents
    out.append(jimg)
    
json.dump(out, open('coco_raw.json', 'w'))

In [14]:
itoa

{203564: [{'caption': 'A bicycle replica with a clock as the front wheel.',
   'id': 37,
   'image_id': 203564},
  {'caption': 'The bike has a clock as a tire.',
   'id': 181,
   'image_id': 203564},
  {'caption': 'A black metal bicycle with a clock inside the front wheel.',
   'id': 478,
   'image_id': 203564},
  {'caption': 'A bicycle figurine in which the front wheel is replaced with a clock\n',
   'id': 6637,
   'image_id': 203564},
  {'caption': 'A clock with the appearance of the wheel of a bicycle ',
   'id': 6802,
   'image_id': 203564}],
 179765: [{'caption': 'A black Honda motorcycle parked in front of a garage.',
   'id': 38,
   'image_id': 179765},
  {'caption': 'A Honda motorcycle parked in a grass driveway',
   'id': 182,
   'image_id': 179765},
  {'caption': 'A black Honda motorcycle with a dark burgundy seat.',
   'id': 479,
   'image_id': 179765},
  {'caption': 'Ma motorcycle parked on the gravel in front of a garage',
   'id': 6638,
   'image_id': 179765},
  {'caption

In [13]:
# lets see what they look like
out[0]

{'captions': ['A man with a red helmet on a small moped on a dirt road. ',
  'Man riding a motor bike on a dirt road on the countryside.',
  'A man riding on the back of a motorcycle.',
  'A dirt path with a young person on a motor bike rests to the foreground of a verdant area with a bridge and a background of cloud-wreathed mountains. ',
  'A man in a red shirt and a red hat is on a motorcycle on a hill side.'],
 'file_path': 'val2014/COCO_val2014_000000391895.jpg',
 'id': 391895}