# Script for converting COCO Instance Segmentation labels to jsonl file

In [8]:
import json
import os

In [9]:
# Path to annotation file
data_dir = os.path.join("data", "taco") # path to images (local dir or azure data asset)
annotation_file = os.path.join(data_dir, "annotations_coco.jsonl") # path to write annotation file
coco_file = os.path.join(data_dir, "taco.json")

In [16]:
def generate_jsonl_annotations(source, target_path, annotation_file):
    annotations = []
    # delete annotation flie if it exists
    if os.path.exists(annotation_file):
        os.remove(annotation_file)

    categories = {}
    for i in range(0, len(source["categories"])):
        categories[source["categories"][i]["id"]] = source["categories"][i]["name"]

    with open(annotation_file, 'w') as annotation_f:
        
        # loop through images
        for img_idx, image in enumerate(source['images']):

            id = image['id']
            width = image['width']
            height = image['height']
            file_name = image['file_name']
            extension = file_name.split('.')[-1].lower()
            
            image_dict = {
                "image_url" : target_path + '/' + file_name,
                "image_details" : {
                    "format" : extension,
                    "width" : width,
                    "height" : height },
                "label" : []
            }
        
            # get all annotations for current image
            image_annotations = [annotation for annotation in source['annotations'] if annotation['image_id'] == id]
            
            label = {}

            # loop through annotations
            for anno_idx, annotation in enumerate(image_annotations):
                class_id = annotation['category_id']
                iscrowd = annotation['iscrowd']
                # processing normal cases (iscrowd is 0):
                if iscrowd == 0:
                
                    polygons = []
                    # loop through list of polygons - will be 1 in most cases
                    for segmentation in annotation['segmentation']:
                        
                        polygon = []
                        # loop through vertices:
                        for id, vertex in enumerate(segmentation):
                            if (id % 2) == 0:
                                # x-coordinates (even index)
                                x = vertex / width
                                polygon.append(x)
                    
                            else:
                                y = vertex / height
                                polygon.append(y)
                        polygons.append(polygon)
                
                    image_dict['label'].append({
                        "label" : categories[class_id],
                        "isCrowd" : iscrowd,
                        "polygon" : polygons
                    })
                # TODO: process iscrowd annotations
                if iscrowd != 0:
                    pass
                json.dump(image_dict, annotation_f)
                annotation_f.write("\n")

In [17]:
with open(coco_file, 'r') as f:
    source = json.load(f)
generate_jsonl_annotations(source, os.path.join(data_dir, "images"), annotation_file)