In [1]:
# Copyright 2022 Sony Semiconductor Solutions Corp. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Convert Annotation from VoTT Format to COCO Format

With this notebook, you can convert an annotation file tagged and exported with VoTT into COCO format.

## Imports

In [1]:
import datetime
import glob
import json
import os
import pathlib
from collections import OrderedDict, defaultdict

import ipywidgets as widgets
import pycocotools._mask as _mask
from IPython.display import display

## Set input VoTT annotation file

Run the following code cell and select a VoTT json file you want to convert from the drop-down list.<br>

The annotation file exported by VoTT has the following filename and only the files with this name appear in the drop-down list.<br>
`[VoTT-project-name]-export.json`

In [None]:
files = glob.glob("/workspaces/**/*-export.json", recursive=True)
if not files:
    raise FileNotFoundError(
        "A VoTT Json file is not found. "
        "Please store the file in the devcontainer file system."
    )

select_input_file = widgets.Dropdown(options=files)
display(select_input_file)

## Set output file for COCO annotation file

Here, set the path of the file to output annotations to be converted to COCO format.<br>
The setting requires a path to the output json file. If you want to output to the same file as when you converted using this notebook last time, you can get the path by running the following code cell. Enter the obtained path in the text box displayed by executing the second code cell. No path information remains after restarting the container.

In [None]:
# Display the path of the last converted file
%store -r
if "file_path" in locals():
    print("the last converted file path : ", file_path)
else:
    print("No path information remains for the last converted file.")

Run the following second code cell to set the output file.<br>
After executing the code cell, enter the output json file path and filename in the text box and click "OK".<br>
If the entered file does not exist, it will be newly created.<br>
Example : **`/workspaces/test_dataset/test_annotation.json`**<br>
<br>

> **NOTE** 
> 
> If the newly created file does not appear in the Explorer, follow the following steps to update your Explorer.
> 1. Hold the pointer over the workspace root in the Explorer and four icons appear to the right of Explorer.
> 2. Click the third icon, a circular arrow.

In [None]:
try:
    default_path = os.path.dirname(select_input_file.value)
    date_now = datetime.datetime.now()
    text_output_file = widgets.Text(
        default_path + "/" + date_now.strftime("%Y%m%d%H%M%S") + ".json"
    )
    button_output_file = widgets.Button(description="OK")
    output_output_file = widgets.Output(layout={"border": "1px solid black"})

    @output_output_file.capture()
    def on_click_callback_input(clicked_button: widgets.Button) -> None:
        if text_output_file.value != "" and text_output_file.value.endswith(".json"):
            if not os.path.isfile(text_output_file.value):
                dirname = os.path.dirname(text_output_file.value)
                if not os.path.exists(dirname):
                    os.mkdir(dirname)

                p_new = pathlib.Path(text_output_file.value)
                with p_new.open(mode="w") as f:
                    f.write("")

                print(text_output_file.value, "is created.")
            else:
                print(text_output_file.value, "is found.")
        else:
            raise ValueError(
                text_output_file.value
                + " is not a json file path. Please enter a json file path."
            )

    button_output_file.on_click(on_click_callback_input)

    print("Enter output coco annotation json file path:")
    display(text_output_file, button_output_file, output_output_file)
except (TypeError, NameError):
    raise ValueError("The input VoTT annotation file is not set. Please set the file.")

## Convert

Run the following code cells to convert the VoTT format annotation file to COCO format.

In [None]:
def create_coco_format(info, images, annotations, categories):
    coco_format = {
        "info": info,
        "images": images,
        "annotations": annotations,
        "categories": categories,
        "licenses": [],
    }
    return coco_format


def create_info():
    date = datetime.date.today()
    info = {
        "year": date.year,
        "version": "1.0",
        "description": "COCO format dataset",
        "contributor": "",
        "url": "",
        "date_created": date.strftime("%Y/%m/%d"),
    }
    return info


def create_image(id, width, height, file_name):
    image = {
        "id": id,
        "width": width,
        "height": height,
        "file_name": file_name,
        "license": None,
        "flickr_url": None,
        "coco_url": None,
        "date_captured": None,
    }
    return image


def create_annotation(id, image_id, category_id, segmentation, area, bbox):
    annotation = {
        "id": id,
        "image_id": image_id,
        "category_id": category_id,
        "segmentation": segmentation,
        "area": area,
        "bbox": bbox,
        "iscrowd": 0,
    }
    return annotation


def create_category(id, name):
    category = {
        "id": id,
        "name": name,
        "supercategory": "",
    }
    return category

In [None]:
# Load VoTT json file:
try:
    vott_json_file = select_input_file.value
    with open(vott_json_file, "r") as f:
        vott_json = json.load(f)

    # Create info
    info = create_info()

    # Create categories
    tags = vott_json["tags"]
    coco_categories = []
    for i, tag in enumerate(tags):
        coco_category = create_category(id=i, name=tag["name"])
        coco_categories.append(coco_category)

    assets = OrderedDict(vott_json["assets"])
    image_ids = defaultdict(int)
    images = []
    annotations = []
    ann_id = 0
    for image_id, image_info in enumerate(assets):
        asset = assets[image_info]["asset"]

        # Create images
        image = create_image(
            id=image_id,
            width=asset["size"]["width"],
            height=asset["size"]["height"],
            file_name=asset["name"],
        )
        images.append(image)

        regions = assets[image_info]["regions"]
        for region in regions:
            # Create bbox
            bbox = []
            bbox.append(region["boundingBox"]["left"])
            bbox.append(region["boundingBox"]["top"])
            bbox.append(region["boundingBox"]["width"])
            bbox.append(region["boundingBox"]["height"])

            # Create segmentation
            segmentation = []
            for points in region["points"]:
                segmentation.append(points["x"])
                segmentation.append(points["y"])

            # Create area
            segmentations = []
            segmentations.append(segmentation)
            h = asset["size"]["height"]
            w = asset["size"]["width"]
            R = _mask.frPoly(segmentations, h, w)
            np_area = _mask.area(R)
            np_areas = sum(np_area)
            areas = np_areas.item()

            def get_category_id(categories_list, category_name):
                for category in categories_list:
                    if category["name"] == category_name:
                        return category["id"]
                raise KeyError("Category ID not found")

            # Create annotations
            region_tags = region["tags"]
            for region_tag in region_tags:
                annotation = create_annotation(
                    id=ann_id,
                    image_id=image_id,
                    category_id=get_category_id(coco_categories, region_tag),
                    segmentation=segmentations,
                    area=areas,
                    bbox=bbox,
                )
                annotations.append(annotation)
                ann_id += 1

    # Create coco format
    coco_format = create_coco_format(info, images, annotations, coco_categories)
except (TypeError, NameError, KeyError):
    if os.path.isfile(text_output_file.value):
        os.remove(text_output_file.value)
    raise ValueError("The input VoTT annotation file is not set. Please set the file.")

In [None]:
# Output the coco format json file
try:
    if text_output_file.value != "" and text_output_file.value.endswith(".json"):
        with open(text_output_file.value, "w") as f:
            json.dump(coco_format, f, ensure_ascii=False, indent=4)
            print("Format conversion is complete.")

        # Save output path
        file_path = text_output_file.value
        %store file_path
    else:
        raise ValueError(
            text_output_file.value
            + " is not a json file path. Set a json file path as the output file."
        )
except (NameError, FileNotFoundError):
    raise ValueError("The output path is not set. Please set the path.")