<a href="https://colab.research.google.com/github/AvellinaLeong/NHM-Nannofossil-Segmentation-Project/blob/main/01_JSONCOCO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Convert JSON Annotation to COCO JSON Format

Output: COCO JSON annotated images saved to correct directories

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

# Set script location to own development space
MY_DEVELOPMENT_SPACE = '/content/drive/MyDrive/development/avellina/'
import os
os.chdir(MY_DEVELOPMENT_SPACE)
!pwd
!ls

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/MyDrive/development/avellina
detectron2  Detectron2	Mask-RCNN


In [None]:
project_dir = "/content/drive/MyDrive/data/species_53"

## Convert JSON to COCO JSON

In [None]:
import json
import os
import cv2
import numpy as np

In [None]:
def convert_to_coco(json_file, output_file, category_id=1):
    with open(json_file) as f:
        data = json.load(f)

    coco_format = {
        "images": [],
        "annotations": [],
        "categories": [{
            "id": category_id,
            "name": "t_orionatus",   # change to specific nannofossil species
        }]
    }

    annotation_id = 1
    for idx, (file_key, info) in enumerate(data.items()):
        image_id = idx + 1
        filename = info["filename"]
        full_path = os.path.join(os.path.dirname(json_file), filename)

        # Read the image to get dimensions
        image = cv2.imread(full_path)
        if image is None:
            print(f"Error reading image {full_path}")
            continue
        height, width = image.shape[:2]

        coco_format["images"].append({
            "id": image_id,
            "file_name": filename,
            "height": height,
            "width": width,
        })

        for region in info["regions"]:
            shape_attributes = region.get("shape_attributes", {})
            all_points_x = shape_attributes.get("all_points_x", [])
            all_points_y = shape_attributes.get("all_points_y", [])

            if not all_points_x or not all_points_y:
                print(f"No points found for region in {filename}")
                continue

            # Calculate bounding boxes
            x_min = min(all_points_x)
            y_min = min(all_points_y)
            x_max = max(all_points_x)
            y_max = max(all_points_y)
            width = x_max - x_min
            height = y_max - y_min
            bbox = [x_min, y_min, width, height]

            # Create segmentations
            segmentation = []
            for x, y in zip(all_points_x, all_points_y):
                segmentation.append([x, y])
            segmentation = [point for sublist in segmentation for point in sublist]

            annotation = {
                "id": annotation_id,
                "image_id": image_id,
                "category_id": category_id,
                "bbox": bbox,
                "bbox_mode": 1,
                "area": width * height,
                "segmentation": [segmentation],
                "iscrowd": 0,
            }
            coco_format["annotations"].append(annotation)
            annotation_id += 1

    with open(output_file, 'w') as f:
        json.dump(coco_format, f, indent=4)
    print(f"Saved COCO format JSON to {output_file}")

# Convert train, val, and test JSON files to COCO JSON format
sub_dirs = ["train", "val", "test"]
file_names = ["t_orionatus_label_train_json.json", "t_orionatus_label_val_json.json", "t_orionatus_label_test_json.json"]
output_names = ["coco_train.json", "coco_val.json", "coco_test.json"]

for sub_dir, file_name, output_name in zip(sub_dirs, file_names, output_names):
    json_file = os.path.join(project_dir, "data", sub_dir, file_name)
    output_file = os.path.join(project_dir, "data", sub_dir, output_name)
    convert_to_coco(json_file, output_file)


Saved COCO format JSON to /content/drive/MyDrive/data/species_53/data/train/coco_train.json
Saved COCO format JSON to /content/drive/MyDrive/data/species_53/data/val/coco_val.json
Saved COCO format JSON to /content/drive/MyDrive/data/species_53/data/test/coco_test.json
