In [5]:
import os
import numpy as np
from skimage.measure import label, regionprops
import matplotlib.pyplot as plt
import cv2
import seaborn
import json

In [3]:
labels = np.load('../input/cancer-inst-segmentation-and-classification/Part 1/Images/types.npy')
images = np.load('../input/cancer-inst-segmentation-and-classification/Part 1/Images/images.npy',allow_pickle=True)
masks = np.load('../input/cancer-inst-segmentation-and-classification/Part 1/Masks/masks.npy',allow_pickle=True)

print(labels.shape)
print(images.shape)
print(masks.shape)

(2656,)
(2656, 256, 256, 3)
(2656, 256, 256, 6)


In [23]:
nuclei_names = ["neoplastic","inflammatory","softtissue","dead","epithelial"]
tissue_names = list(np.unique(labels))

In [34]:
  def create_bboxes(image_id,mask_id):
    # Load the binary mask image
    mask = masks[image_id,:,:,mask_id]

    # Label the connected components in the mask
    label_mask = label(mask)

    # Extract the bounding box coordinates for each object
    bboxes = []
    for region in regionprops(label_mask):
        minr, minc, maxr, maxc = region.bbox
        bbox = [minc, minr, maxc - minc, maxr - minr]
        bboxes.append(bbox)
    return bboxes

In [28]:
def iscrowd_from_object(bbox, area):
    if area < 100 or bbox[0] > 500 or bbox[1] > 500:
        return 0
    else:
        return 1

In [51]:
# Get the file path
file_path = 'pan-nuclei-cancer.json'

def get_coco_json(file_path,images,labels,masks,nuclei_names):
    # Create a dictionary to store the COCO-formatted data
    coco_data = {
        "images": [],
        "annotations": [],
        "categories": []
    }

    # Add the image information to the "images" list
    for i, image in enumerate(images):
        coco_data["images"].append({
            "id": i,
            "file_name": labels[i]
        })

    # Add the annotation information to the "annotations" list
    annotation_id = 0
    for image_id, image in enumerate(images):
        for mask_id,mask in enumerate(nuclei_names):
            bboxes = create_bboxes(image_id,mask_id)
            for bbox_id, bbox in enumerate(bboxes):
                area = bbox[2]* bbox[3] #area = width * height
                iscrowd = iscrowd_from_object(bbox, area)
                coco_data["annotations"].append({
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": mask_id,
                    "bbox": bbox,
                    "area": area,
                    "iscrowd": iscrowd
                })
                annotation_id += 1

    # Add the category information to the "categories" list
    for i, name in enumerate(nuclei_names):
        coco_data["categories"].append({
            "id": i,
            "name": name
        })

    # Save the COCO-formatted data to a JSON file
    with open(file_path, 'w') as f:
        json.dump(coco_data, f)

In [52]:
get_coco_json(file_path,images,labels,masks,nuclei_names)