In [1]:
import lmdb
import json

In [2]:
class PhotoData(object):
    def __init__(self, path):
        self.env = lmdb.open(
            path, map_size=2**36, readonly=True, lock=False
        )
        
    def __iter__(self):
        with self.env.begin() as t:
            with t.cursor() as c:
                for key, value in c:
                    yield key, value
        
    def __getitem__(self, index):
        key = str(index).encode('ascii')
        with self.env.begin() as t:
            data = t.get(key)
        if not data:
            return None
        return data
        
    def __len__(self):
        return self.env.stat()['entries']

    def save_img(self, id, path):
        data = self.__getitem__(id)
        with open(path, 'wb') as f:
            f.write(data)


In [3]:
photos_db = PhotoData('./photos.lmdb')
#val_annos = json.load(open('./val/modanet2018_instances_val.json')) #doesnt come annotated...
train_annos = json.load(open('./train/modanet2018_instances_train.json'))

```
{
'info' : info, 'images' : [image], 'annotations' : [annotation], 'licenses' : [license],'year': year, 'categories': [category], 'type': type
}

info{
'version' : str, 'description' : str, 'contributor' : str, 'date_created' : datetime,
}

image{
'id' : int, 'width' : int, 'height' : int, 'file_name' : str, 'license' : int
}

license{
'id' : int, 'name' : str, 'url' : str,
}

annotation{
  'area': int, 
  'bbox': [x,y,width,height],
  'segmentation': [polygon],
  'image_id': int,
  'id': int,
  'category_id': int,
  'iscrowd': int
}
category{
  'supercategory': str, 'id': int, 'name': str,
}
```

In [4]:
# uncomment to dump images
# for image in train_annos['images']:
#     photos_db.save_img(image['id'], f'train/{image["file_name"]}')

In [5]:
# making the labels smaller
train_annos.pop('info')
train_annos.pop('licenses')
train_annos.pop('type')
train_annos.pop('year')
for image in train_annos['images']:
    image.pop('license')
for cat in train_annos['categories']:
    cat.pop('supercategory')
with open('modanet_instances_all.json', 'w') as f:
    json.dump(train_annos, f, separators=(',', ':'))