In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from pycocotools import mask, coco
import matplotlib.pyplot as plt
import json

%matplotlib inline

In [2]:
df = pd.read_csv("sample_heart.csv")

In [3]:
df.head()

Unnamed: 0,Image Index,Dice RCA (Mean),Dice RCA (Max),Landmarks,Heart,Height,Width
0,00000013_005.png,0.795482,0.837427,"387,199,349,205,312,225,280,253,254,286,238,32...",448990 16 450012 34 451034 38 452055 44 453077...,1024,1024
1,00000013_026.png,0.817105,0.875244,"400,184,363,195,330,227,297,265,270,308,252,35...",504418 33 505419 71 506435 81 507450 92 508466...,1024,1024
2,00000017_001.png,0.833868,0.885746,"356,167,320,174,284,198,252,230,228,267,209,30...",541143 22 542153 75 543169 85 544190 90 545211...,1024,1024
3,00000030_001.png,0.868048,0.917175,"400,151,360,156,321,175,284,202,255,236,233,27...",512542 5 513562 17 514583 27 515603 38 516624 ...,1024,1024
4,00000032_001.png,0.772204,0.851568,"341,73,314,82,285,103,256,129,232,159,213,188,...",359918 35 360917 76 361929 90 362941 104 36395...,1024,1024


In [4]:
heart = df["Heart"].tolist()
index = df["Image Index"].tolist()

In [5]:
def get_mask_from_RLE(rle, height, width):
    runs = np.array([int(x) for x in rle.split()])
    starts = runs[::2]
    lengths = runs[1::2]

    mask = np.zeros((height * width), dtype=np.uint8)

    for start, length in zip(starts, lengths):
        start -= 1
        end = start + length
        mask[start:end] = 255

    mask = mask.reshape((height, width))

    return mask

In [6]:
rle_masks = heart  # List of RLE-encoded masks
image_ids = index  # List of image IDs corresponding to RLE masks

In [7]:
image_ids[0].split(".")[0],image_ids[0]

('00000013_005', '00000013_005.png')

In [8]:
coco_annotations = {
    "info": {"description": "ChestX-Ray Dataset", "version": "1.0", "year": 2024},
    "images": [],
    "annotations": [],
    "categories": [{"id": 1, "name": "heart", "supercategory": "none"}],
}

In [9]:
height, width = 1024, 1024
for idx, rle_mask in enumerate(rle_masks):
    # Image information
    image_info = {
        "id": image_ids[idx].split(".")[0],
        "width": width,
        "height": height,
        "file_name": f"{image_ids[idx]}",
    }
    coco_annotations["images"].append(image_info)

    # RLE to COCO annotation
    mask_array = get_mask_from_RLE(heart[0],height, width)
    mask_array = np.asfortranarray(mask_array)
    coco_mask = mask.encode(mask_array)
    coco_mask["counts"] = coco_mask["counts"].decode('utf-8')  # Convert bytes to string
    annotation = {
        "id": int(idx + 1),
        "image_id": image_ids[idx].split(".")[0],
        "category_id": int(1),
        "segmentation": coco_mask,
        "area": float(mask.area(coco_mask)),
        "bbox": mask.toBbox(coco_mask).tolist(),
        "iscrowd": int(0),  # Set to 0 for non-crowd objects
    }
    coco_annotations["annotations"].append(annotation)

In [10]:
with open("ChestX-Ray_annotations.json", "w") as f:
    json.dump(coco_annotations, f)