# Script for converting COCO Object Detection labels to jsonl file

In [2]:
import json
import os

In [16]:
# Path to annotation file
data_dir = os.path.join("data", "odFridgeObjects") # path to images (local dir or azure data asset)
annotation_file = os.path.join(data_dir, "annotations_coco.jsonl") # path to write annotation file
coco_file = os.path.join(data_dir, "odFridgeObjects_coco.json")

In [17]:
class CocoToJSONLinesConverter:
    def convert(self):
        raise NotImplementedError


class BoundingBoxConverter(CocoToJSONLinesConverter):
    def __init__(self, coco_data):
        self.json_lines_data = []
        self.categories = {}
        self.coco_data = coco_data
        self.image_id_to_data_index = {}
        for i in range(0, len(coco_data["images"])):
            self.json_lines_data.append({})
            self.json_lines_data[i]["image_url"] = ""
            self.json_lines_data[i]["image_details"] = {}
            self.json_lines_data[i]["label"] = []
        for i in range(0, len(coco_data["categories"])):
            self.categories[coco_data["categories"][i]["id"]] = coco_data["categories"][
                i
            ]["name"]

    def _populate_image_url(self, index, coco_image):
        self.json_lines_data[index]["image_url"] = coco_image["file_name"]
        self.image_id_to_data_index[coco_image["id"]] = index

    def _populate_image_details(self, index, coco_image):
        file_name = coco_image["file_name"]
        self.json_lines_data[index]["image_details"]["format"] = file_name[
            file_name.rfind(".") + 1 :
        ]
        self.json_lines_data[index]["image_details"]["width"] = coco_image["width"]
        self.json_lines_data[index]["image_details"]["height"] = coco_image["height"]

    def _populate_bbox_in_label(self, label, annotation, image_details):
        # if bbox comes as normalized, skip normalization.
        if max(annotation["bbox"]) < 1.5:
            width = 1
            height = 1
        else:
            width = image_details["width"]
            height = image_details["height"]
        label["topX"] = annotation["bbox"][0] / width
        label["topY"] = annotation["bbox"][1] / height
        label["bottomX"] = (annotation["bbox"][0] + annotation["bbox"][2]) / width
        label["bottomY"] = (annotation["bbox"][1] + annotation["bbox"][3]) / height

    def _populate_label(self, annotation):
        index = self.image_id_to_data_index[annotation["image_id"]]
        image_details = self.json_lines_data[index]["image_details"]
        label = {"label": self.categories[annotation["category_id"]]}
        self._populate_bbox_in_label(label, annotation, image_details)
        self._populate_isCrowd(label, annotation)
        self.json_lines_data[index]["label"].append(label)

    def _populate_isCrowd(self, label, annotation):
        if "iscrowd" in annotation.keys():
            label["isCrowd"] = annotation["iscrowd"]

    def convert(self):
        for i in range(0, len(self.coco_data["images"])):
            self._populate_image_url(i, self.coco_data["images"][i])
            self._populate_image_details(i, self.coco_data["images"][i])
        for i in range(0, len(self.coco_data["annotations"])):
            self._populate_label(self.coco_data["annotations"][i])
        return self.json_lines_data

In [20]:
def coco2jsonl(data_dir, annotation_file, coco_file):
    # Baseline of json line dictionary
    json_line_sample = {
        "image_url": "",
        "label": [],
    }
    base_url = data_dir

    with open(coco_file, "r") as coco_file:
        coco_file = json.load(coco_file)
        
        converter = BoundingBoxConverter(coco_file)
        json_lines_data = converter.convert()

        with open(annotation_file, "w") as annotation_f:
            for json_line in json_lines_data:
                if base_url is not None:
                    image_url = json_line["image_url"]
                    json_line["image_url"] = (
                        os.path.join(base_url, image_url)
                    )
                json.dump(json_line, annotation_f)
                annotation_f.write("\n")

In [21]:
coco2jsonl(data_dir, annotation_file, coco_file)