### 00-COCO-Dataset

Get 100 images of each of these catagories: person, bird

In [10]:
import os
from pycocotools.coco import COCO
import requests

from pathlib import Path
import zipfile

In [11]:
# URL to download COCO "2017 Val images [5K/1GB]"
# Taken from: https://cocodataset.org/#download
annotations_URL = 'http://images.cocodataset.org/annotations/annotations_trainval2017.zip'
annotations_file_name = annotations_URL.split('/')[len(annotations_URL.split('/'))-1]
local_annotations_dir = '../datasets/annotations'
annotation_file_to_use = 'annotations/instances_train2017.json' # The file from downloaded annotations to use

# Get all images containing given COCO categories
category_names = ['person'] #['bird'] # ['person'] # Run 2 times
images_to_download = 200 #10 #1000, 3000 # How many images to download from the COCO dataset

local_images_path = '../datasets/coco_images'

In [12]:
# Download file from web
def download_file(url, local_filename):  
    # Send a HTTP request to the URL  
    response = requests.get(url)  
      
    # Write the content of the response (the file) to a local file  
    with open(local_filename, 'wb') as f:  
        f.write(response.content)

    return response.content

# Only download the annotations if they do not exist
if not Path(local_annotations_dir).exists():
    # Create directory
    os.makedirs(local_annotations_dir, exist_ok=True)
    print('Local annotations directory created')

    # Download annotation file
    print('Started downloading annotations')
    download_file(annotations_URL, os.path.join(local_annotations_dir, annotations_file_name))
    print('Annotations downloaded')

    # Unzip the file
    zipfile.ZipFile(os.path.join(local_annotations_dir, annotations_file_name)).extractall(os.path.join(local_annotations_dir, annotations_file_name.replace('.zip','')))
    print('Annotations unzipped')

else:
    print('Local annotations directory already exists')

    
# Initialise the COCO api
coco = COCO(os.path.join(
    os.path.join(local_annotations_dir, annotations_file_name.replace('.zip','')),
    annotation_file_to_use
))
print('COCO API initialised')

# -------------
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
nms=[cat['name'] for cat in cats]
print('COCO categories: \n{}\n'.format(' '.join(nms)))

nms = set([cat['supercategory'] for cat in cats])
print('COCO supercategories: \n{}'.format(' '.join(nms)))
# -------------

catIds = coco.getCatIds(catNms=category_names)
imgIds = coco.getImgIds(catIds=catIds)
print(f'Found {len(imgIds)} images for the categories {str(category_names)}')

# Images to download cannot be more than available images
if images_to_download > len(imgIds):
    images_to_download = len(imgIds)

# Load image information
img_info_list = coco.loadImgs(imgIds)

# Create directory
os.makedirs(local_images_path, exist_ok=True)
print('Local images directory created')

# Keep track of how many images downloaded
image_counter = 0

for img_info in img_info_list:

    # Get the coco_url
    coco_url = img_info['coco_url']
    # print(f"COCO URL: {coco_url}")

    coco_filename = coco_url.split('/')[len(coco_url.split('/'))-1]

    # Save locally (for easy viewing)
    binary_data = download_file(coco_url, os.path.join(local_images_path, coco_filename))    

    image_counter += 1

    if image_counter >= images_to_download:
        print(f'Downloaded {images_to_download} images')
        break

Local annotations directory already exists
loading annotations into memory...
Done (t=18.19s)
creating index...
index created!
COCO API initialised
COCO categories: 
person bicycle car motorcycle airplane bus train truck boat traffic light fire hydrant stop sign parking meter bench bird cat dog horse sheep cow elephant bear zebra giraffe backpack umbrella handbag tie suitcase frisbee skis snowboard sports ball kite baseball bat baseball glove skateboard surfboard tennis racket bottle wine glass cup fork knife spoon bowl banana apple sandwich orange broccoli carrot hot dog pizza donut cake chair couch potted plant bed dining table toilet tv laptop mouse remote keyboard cell phone microwave oven toaster sink refrigerator book clock vase scissors teddy bear hair drier toothbrush

COCO supercategories: 
person vehicle food sports indoor outdoor appliance electronic accessory kitchen animal furniture
Found 64115 images for the categories ['person']
Local images directory created
Downloaded 