In [118]:
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
import json
from pycocotools.coco import COCO
from pathlib import Path
import requests

In [20]:
COCO_DIR = "coco"

In [110]:
def download_annotations(coco_dir,year=2017,annotation_type="instances"):
    if isinstance(coco_dir,str):
        coco_dir = Path(coco_dir)
     
    year = str(year)
    
    print("checking if annotations already exist...")
    
    annotation_dir = coco_dir/'annotations'
    if not annotation_dir.is_dir():
        annotation_dir.mkdir()
        print(f"{annotation_dir} directory not found, created it.")
    
    train_pth_exists = [s for s in annotation_dir.iterdir() if annotation_type+"_train"+year in str(s)]
    val_pth_exists = [s for s in annotation_dir.iterdir() if annotation_type+"_val"+year in str(s)]
    
    if len(train_pth_exists)>0 and len(val_pth_exists)>0:
        print(f"found annotations for type '{annotation_type}' from {year} COCO dataset on disk")
        print("Done.")
        return train_pth_exists[0],val_pth_exists[0]
    
    print(f"downloading annotations for type '{annotation_type}' from {year} COCO dataset...")
    
    resp = urlopen(f'http://images.cocodataset.org/annotations/annotations_trainval{year}.zip')
    zipfile = ZipFile(BytesIO(resp.read()))
    
    for filename in zipfile.namelist():
        if annotation_type in filename:
            filepath = coco_dir/filename
            print(f"saving annotations to {filepath}")
            
            if "train" in filename:
                train_pth = filepath
            elif "val" in filename:
                val_pth = filepath
                
            with zipfile.open(filename) as f:
                data = f.read()
                d = json.loads(data)
                with open(filepath, "w") as fout:
                    fout.write(json.dumps(d))
    
    print("Done.")
    return train_pth, val_pth

In [112]:
download_annotations(COCO_DIR,2014)

checking if annotations already exist...
downloading annotations for type 'instances' from 2014 COCO dataset...
saving annotations to coco/annotations/instances_train2014.json
saving annotations to coco/annotations/instances_val2014.json
Done.


(PosixPath('coco/annotations/instances_train2014.json'),
 PosixPath('coco/annotations/instances_val2014.json'))

In [113]:
# instantiate COCO specifying the annotations json path
coco = COCO('coco/annotations/instances_val2017.json')

loading annotations into memory...
Done (t=2.47s)
creating index...
index created!


In [121]:
# Specify a list of category names of interest
catIds = coco.getCatIds(catNms=['person','train','horse'])

In [122]:
catIds

[1, 7, 19]

In [123]:
# Get the corresponding image ids and images using loadImgs
imgIds = coco.getImgIds(catIds=catIds)
images = coco.loadImgs(imgIds)

In [124]:
images

[{'license': 3,
  'file_name': '000000016228.jpg',
  'coco_url': 'http://images.cocodataset.org/val2017/000000016228.jpg',
  'height': 440,
  'width': 640,
  'date_captured': '2013-11-19 00:09:53',
  'flickr_url': 'http://farm4.staticflickr.com/3737/10031812195_372ae7538f_z.jpg',
  'id': 16228},
 {'license': 3,
  'file_name': '000000148999.jpg',
  'coco_url': 'http://images.cocodataset.org/val2017/000000148999.jpg',
  'height': 427,
  'width': 640,
  'date_captured': '2013-11-17 08:17:32',
  'flickr_url': 'http://farm3.staticflickr.com/2848/9343952414_29967b3cc4_z.jpg',
  'id': 148999}]

In [119]:
# Save the images into a local folder
for im in images:
    img_data = requests.get(im['coco_url']).content
    with open('...path_saved_ims/coco_person/' + im['file_name'], 'wb') as handler:
        handler.write(img_data)

FileNotFoundError: [Errno 2] No such file or directory: '...path_saved_ims/coco_person/000000148999.jpg'

In [None]:
from pycocotools.coco import COCO
import requests

# instantiate COCO specifying the annotations json path
coco = COCO('...path_to_annotations/instances_train2014.json')
# Specify a list of category names of interest
catIds = coco.getCatIds(catNms=['person'])

In [None]:
dataset