In [None]:
import json

# input and output paths
# (change as needed based on your project folder structure)
input_path = 'data/train_annotations.json'
output_path = 'data/train_annotations_coco.json'

# read train annotations data
with open(input_path) as f:
    train_annotations = json.load(f)

# initialize COCO data structure
coco_data = {
    "images": [],
    "annotations": [],
    "categories": [
        {"id": 1, "name": "individual_tree", "supercategory": "tree"},
        {"id": 2, "name": "group_of_trees", "supercategory": "tree"},
    ]
}

# category mapping
category_map = {
    "individual_tree": 1,
    "group_of_trees": 2
}

# initialize annotation and image ID counters
annotation_id = 1
image_id = 1

# for each image...
for image in train_annotations["images"]:
    
    # add image metadata
    coco_data["images"].append(
        {
            "id": image_id,
            "file_name": image["file_name"],
            "width": image["width"],
            "height": image["height"]
        }
    )

    # for each annotation in this image
    for ann in image.get("annotations", []):
        # extract segmentation polygon
        segmentation = ann["segmentation"]

        # skip if fewer than 3 points (expected to cause errors later)
        if len(segmentation) < 6:
            continue

        # append annotation
        coco_data["annotations"].append(
            {
                "id": annotation_id,                                    # annotation ID
                "image_id": image_id,                                   # image ID
                "category_id": category_map[ann["class"]],              # category ID
                "segmentation": [segmentation],                         # segmentation polygon
                "area": 0,                                              # area (not used but setting anyway)
                "bbox": [                                               # bounding box
                    min(segmentation[::2]),                             # ... x
                    min(segmentation[1::2]),                            # ... y
                    max(segmentation[::2]) - min(segmentation[::2]),    # ... w
                    max(segmentation[1::2]) - min(segmentation[1::2])   # ... h
                ],
                "iscrowd": 0,                                           # is-crowded (not used but setting anyway)
                "score": ann.get("confidence_score", 1.0)               # confidence score (nonsense for ground-truth but setting anyway)
            }
        )

        # increment annotation ID counter
        annotation_id += 1

    # increment image ID counter
    image_id += 1

# save output
with open(output_path, "w") as f:
    json.dump(coco_data, f, indent=2)