In [7]:
import os
import struct
import numpy as np
from IPython.core.debugger import Tracer
import json
import time
from collections import defaultdict

In [17]:
annFile = 'coco/annotations/instances_train2014.json'
data_path = 'coco/train2014'
out_path = 'coco_voc_train.txt'

In [18]:
voc_classes_num = {'aeroplane': 0, 'bicycle': 1, 'bird': 2, 'boat': 3, 'bottle': 4, 'bus': 5,
                   'car': 6, 'cat': 7, 'chair': 8, 'cow': 9, 'diningtable': 10, 'dog': 11,
                   'horse': 12, 'motorbike': 13, 'person': 14, 'pottedplant': 15, 'sheep': 16,
                   'sofa': 17, 'train': 18, 'tvmonitor': 19}

In [19]:
coco_to_voc = {'airplane': 'aeroplane', 'bicycle': 'bicycle', 'bird': 'bird', 'boat': 'boat',
               'bottle': 'bottle', 'bus': 'bus', 'car': 'car', 'cat': 'cat', 'chair': 'chair',
               'cow': 'cow', 'dining table': 'diningtable', 'dog': 'dog', 'horse': 'horse', 
               'motorcycle': 'motorbike', 'person': 'person', 'potted plant': 'pottedplant',
               'sheep': 'sheep', 'couch': 'sofa', 'train': 'train', 'tv': 'tvmonitor'}
print(type(coco_to_voc))
print('aeroplane' in coco_to_voc.keys())

<class 'dict'>
False


In [20]:
class COCO:
    def __init__(self, annotation_file=None):
        """
        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
        :param annotation_file (str): location of annotation file
        :param image_folder (str): location to the folder that hosts images.
        :return:
        """
        # load dataset
        self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
        if not annotation_file == None:
            print('loading annotations into memory...')
            tic = time.time()
            dataset = json.load(open(annotation_file, 'r'))
            assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
            print('Done (t={:0.2f}s)'.format(time.time()- tic))
            self.dataset = dataset
            self.createIndex()

    def createIndex(self):
        # create index
        print('creating index...')
        anns, cats, imgs = {}, {}, {}
        imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
        if 'annotations' in self.dataset:
            for ann in self.dataset['annotations']:
                imgToAnns[ann['image_id']].append(ann)
                anns[ann['id']] = ann

        if 'images' in self.dataset:
            for img in self.dataset['images']:
                imgs[img['id']] = img

        if 'categories' in self.dataset:
            for cat in self.dataset['categories']:
                cats[cat['id']] = cat

        if 'annotations' in self.dataset and 'categories' in self.dataset:
            for ann in self.dataset['annotations']:
                catToImgs[ann['category_id']].append(ann['image_id'])

        print('index created!')

        # create class members
        self.anns = anns
        self.imgToAnns = imgToAnns
        self.catToImgs = catToImgs
        self.imgs = imgs
        self.cats = cats

In [21]:
coco=COCO(annFile)

loading annotations into memory...
Done (t=11.35s)
creating index...
index created!


In [22]:
def parse_coco_ann(coco, Idx, data_path):
    labels = []
    for i in range(len(coco.imgToAnns[Idx])):
        catId = coco.imgToAnns[Idx][i]['category_id']
        catNm = coco.cats[catId]['name']
        if catNm in coco_to_voc.keys():
            xmin, ymin, w, h = coco.imgToAnns[Idx][i]['bbox']
            xmax = xmin + w
            ymax = ymin + h
            voc_catId = voc_classes_num[coco_to_voc[catNm]]
            labels.append([round(xmin), round(ymin), round(xmax), round(ymax), voc_catId])
    imageId = coco.imgToAnns[Idx][0]['image_id']
    image_file = coco.imgs[imageId]['file_name']
    image_path = os.path.join(data_path, image_file)
    return image_path, labels

In [23]:
def convert_to_string(image_path, labels):
    """convert image_path, lables to string 
    Returns:
    string 
    """
    out_string = ''
    out_string += image_path
    for label in labels:
        for i in label:
            out_string += ' ' + str(i)
    out_string += '\n'
    return out_string

In [24]:
def process_json(coco, data_path, out_path):
    out_file = open(out_path, 'w')
    
    for Idx in coco.imgToAnns.keys():
        try:
            image_path, labels = parse_coco_ann(coco, Idx, data_path)            
            if len(labels) > 0:
                record = convert_to_string(image_path, labels)
                out_file.write(record)
        except Exception:
            pass
    out_file.close()

In [25]:
process_json(coco, data_path, out_path)
print('done')

done
