# Binary Masks to COCO Annotation Format

by _Tobias Reaper_

The goal of this notebook is to use the binary masks created in the [previous notebook](03-png-to-binary-mask.ipynb) to create an annotated object detection dataset in the COCO format.

Resources:

- [Create your own COCO-style datasets](https://patrickwasp.com/create-your-own-coco-style-dataset/)

Note:

> Although the image dataset used in the notebooks in this repo is not huge, it is large enough that it is not included in this shared repository. The dataset before and after the conversion into the COCO format is available at [tobias-fyi/neurecycle](https://github.com/tobias-fyi/neurecycle/tree/master/pycoco)

---

In [1]:
# === Imports === #
import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np
from pycococreatortools import pycococreatortools

In [2]:
# === Directories === #
DATASET_NAME = "items_train2019"
ROOT_DIR = "/Users/Tobias/workshop/buildbox/neurecycle/pycoco/items/train"
IMAGE_DIR = os.path.join(ROOT_DIR, DATASET_NAME)
ANNOTATION_DIR = os.path.join(ROOT_DIR, "annotations")

In [3]:
# === MetaDataset === #

INFO = {
    "description": "Trash Panda Foreground Detection Sample Data",
    "url": "https://github.com/tobias-fyi/neurecycle/tree/master/pycoco/items/train",
    "version": "0.1.0",
    "year": 2019,
    "contributor": "tobias-fyi",
    "date_created": datetime.datetime.utcnow().isoformat(" ")
}

LICENSES = [
    {
        "id": 1,
        "name": "Attribution-NonCommercial-ShareAlike License",
        "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
    }
]

CATEGORIES = [
    {"id": 1, "name": "cardboard", "supercategory": "item",},
    {"id": 2, "name": "glass_container", "supercategory": "item",},
    {"id": 3, "name": "metal_cans", "supercategory": "item",},
    {"id": 4, "name": "mixed_paper", "supercategory": "item",},
    {"id": 5, "name": "paper_treated", "supercategory": "item",},
    {"id": 6, "name": "plastic_bottle", "supercategory": "item",},
    {"id": 7, "name": "plastic_bubblewrap", "supercategory": "item",},
    {"id": 8, "name": "plastic_container_food", "supercategory": "item",},
    {"id": 9, "name": "plastic_film", "supercategory": "item",},
    {"id": 10, "name": "wrapper", "supercategory": "item",},
]

In [4]:
# === Helper functions === #
def filter_for_jpeg(root, files):
    """Filters out non-jpg or -jpeg files."""
    file_types = ["*.jpeg", "*.jpg"]
    file_types = r"|".join([fnmatch.translate(x) for x in file_types])
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]

    return files


def filter_for_png(root, files):
    """Returns only .png files"""
    file_types = ["*.png"]
    file_types = r"|".join([fnmatch.translate(x) for x in file_types])
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]

    return files


def filter_for_annotations(root, files, image_filename):
    file_types = ["*.png"]
    file_types = r"|".join([fnmatch.translate(x) for x in file_types])
    basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0]
    file_name_prefix = basename_no_extension + ".*"
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    files = [
        f
        for f in files
        if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])
    ]

    return files

In [5]:
def create_annotations():

    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": [],
    }

    image_id = 1
    segmentation_id = 1

    # Filter for png images
    for root, _, files in os.walk(IMAGE_DIR):
        image_files = filter_for_png(root, files)

        # Loop through each image
        for image_filename in image_files:
            image = Image.open(image_filename)
            image_info = pycococreatortools.create_image_info(
                image_id, os.path.basename(image_filename), image.size
            )
            coco_output["images"].append(image_info)

            # Filter for associated png annotations
            for root, _, files in os.walk(ANNOTATION_DIR):
                annotation_files = filter_for_annotations(root, files, image_filename)

                # Go through each associated annotation
                for annotation_filename in annotation_files:

                    print(annotation_filename)
                    class_id = [
                        x["id"] for x in CATEGORIES if x["name"] in annotation_filename
                    ][0]

                    category_info = {
                        "id": class_id,
                        "is_crowd": "crowd" in image_filename,
                    }
                    binary_mask = np.asarray(
                        Image.open(annotation_filename).convert("1")
                    ).astype(np.uint8)

                    annotation_info = pycococreatortools.create_annotation_info(
                        segmentation_id,
                        image_id,
                        category_info,
                        binary_mask,
                        image.size,
                        tolerance=2,
                    )

                    if annotation_info is not None:
                        coco_output["annotations"].append(annotation_info)

                    segmentation_id += 1

            image_id += 1

    with open(f"{ROOT_DIR}/instances_{DATASET_NAME}.json", "w") as output_json_file:
        json.dump(coco_output, output_json_file)

In [6]:
# === Send it! === #
create_annotations()

cardboard_0233.png
/Users/Tobias/workshop/buildbox/neurecycle/pycoco/items/train/annotations/0237_cardboard_0237.png
/Users/Tobias/workshop/buildbox/neurecycle/pycoco/items/train/annotations/0223_cardboard_0223.png
/Users/Tobias/workshop/buildbox/neurecycle/pycoco/items/train/annotations/0009_metal_cans_0009.png
/Users/Tobias/workshop/buildbox/neurecycle/pycoco/items/train/annotations/0035_metal_cans_0035.png
/Users/Tobias/workshop/buildbox/neurecycle/pycoco/items/train/annotations/0021_metal_cans_0021.png
/Users/Tobias/workshop/buildbox/neurecycle/pycoco/items/train/annotations/0182_glass_container_0182.png
/Users/Tobias/workshop/buildbox/neurecycle/pycoco/items/train/annotations/0196_glass_container_0196.png
/Users/Tobias/workshop/buildbox/neurecycle/pycoco/items/train/annotations/0169_glass_container_0169.png
/Users/Tobias/workshop/buildbox/neurecycle/pycoco/items/train/annotations/0141_wrapper_0141.png
/Users/Tobias/workshop/buildbox/neurecycle/pycoco/items/train/annotations/0155_w