In [1]:

from os import listdir
from os.path import isfile, isdir, join
import shutil
import cv2
import numpy as np
import pycocotools.mask as mask  
from tqdm import tqdm
from pycocotools.coco import COCO
import base64
from detectron2.structures import BoxMode


def decode_base64_rles(coco):
    for ann in coco.dataset['annotations']:
        segm = ann['segmentation']
        if type(segm) != list and type(segm['counts']) != list:
            segm['counts'] = base64.b64decode(segm['counts'])

inpath = "./dataset/train/"  # the train folder download from kaggle
outpath = "./train/"  # the folder putting all nuclei image

images_name = listdir(inpath)
cocoformat = {"licenses":[], "info":[], "images":[], "annotations":[], "categories":[]}


In [2]:

cat = {"id": 1, 
       "name": "nucleus", 
       "supercategory": "nucleus",
      }
cocoformat["categories"].append(cat)

In [3]:
from PIL import Image # (pip install Pillow)

def create_sub_masks(mask_image):
    width, height = mask_image.size

    # Initialize a dictionary of sub-masks indexed by RGB colors
    # sub_masks = {}
    sub_mask = Image.new('L', (width+2, height+2))
    for x in range(width):
        for y in range(height):
            # Get the RGB values of the pixel
            pixel = mask_image.getpixel((x,y))# [:3]

            # If the pixel is not black...
            if pixel != 0:
                # Set the pixel value to 1 (default is 0), accounting for padding
                sub_mask.putpixel((x+1, y+1), 1)

    return sub_mask

In [4]:
import numpy as np                                 # (pip install numpy)
from skimage import measure                        # (pip install scikit-image)
from shapely.geometry import Polygon, MultiPolygon # (pip install Shapely)

def create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd):
    # Find contours (boundary lines) around each sub-mask
    # Note: there could be multiple contours if the object
    # is partially occluded. (E.g. an elephant behind a tree)
    # print(sub_mask)
    contours = measure.find_contours(sub_mask, 0.5, positive_orientation='low')

    segmentations = []
    polygons = []
    for contour in contours:
        # Flip from (row, col) representation to (x, y)
        # and subtract the padding pixel
        for i in range(len(contour)):
            row, col = contour[i]
            contour[i] = (col - 1, row - 1)

        # Make a polygon and simplify it
        poly = Polygon(contour)
        poly = poly.simplify(1.0, preserve_topology=False)
        polygons.append(poly)
        segmentation = np.array(poly.exterior.coords).ravel().tolist()
        segmentations.append(segmentation)

    # Combine the polygons to calculate the bounding box and area
    multi_poly = MultiPolygon(polygons)
    x, y, max_x, max_y = multi_poly.bounds
    width = max_x - x
    height = max_y - y
    bbox = (x, y, width, height)
    area = multi_poly.area

    annotation = {
        'segmentation': segmentations,
        'iscrowd': is_crowd,
        'image_id': image_id,
        'category_id': category_id,
        'id': annotation_id,
        'bbox': bbox,
        'bbox_mode': BoxMode.XYWH_ABS, 
        'area': area
    }

    return annotation

In [None]:
mask_id = 1
is_crowd = 0
category_id = 1
for i, im_name in tqdm(enumerate(images_name)):
    t_image = cv2.imread(inpath + im_name + "/images/" + im_name + ".png")
    mask_folder = listdir(inpath + im_name + "/masks/")
    im = {"id": int(i+1), 
          "width": int(t_image.shape[1]), 
          "height": int(t_image.shape[0]), 
          "file_name": im_name + ".png",
         }
    cocoformat["images"].append(im)
    for mask_img in mask_folder:
        # t_image = Image.open(inpath + im_name + "/masks/" + mask_img)
        t_image = cv2.imread(inpath + im_name + "/masks/" + mask_img, 0)
        annotation = create_sub_mask_annotation(t_image, int(i+1), category_id, mask_id, is_crowd)
        cocoformat["annotations"].append(annotation)
        mask_id += 1   

In [None]:
import numpy
mask_id = 1
is_crowd = 0
category_id = 1
for i, im_name in enumerate(images_name):
    t_image = cv2.imread(inpath + im_name + "/images/" + im_name + ".png")
    mask_folder = listdir(inpath + im_name + "/masks/")
    im = {"id": int(i+1), 
          "width": int(t_image.shape[1]), 
          "height": int(t_image.shape[0]), 
          "file_name": im_name + ".png",
         }
    cocoformat["images"].append(im)
    for mask_img in tqdm(mask_folder):
        t_image = Image.open(inpath + im_name + "/masks/" + mask_img)
        # t_image = cv2.imread(inpath + im_name + "/masks/" + mask_img, 0)
        # print(t_image)
        sub_mask = create_sub_masks(t_image)
        open_cv_image = numpy.array(sub_mask) 
        # print(sub_mask)
        category_id = 1
        annotation = create_sub_mask_annotation(open_cv_image, int(i+1), category_id, mask_id, is_crowd)
        # annotation = create_sub_mask_annotation(t_image, int(i+1), category_id, mask_id, is_crowd)
        cocoformat["annotations"].append(annotation)
        mask_id += 1 

In [5]:
import numpy
mask_id = 1
is_crowd = 0
category_id = 1
for i, im_name in enumerate(images_name):
    t_image = cv2.imread(inpath + im_name + "/images/" + im_name + ".png")
    mask_folder = listdir(inpath + im_name + "/masks/")
    im = {"id": int(i+1), 
          "width": int(t_image.shape[1]), 
          "height": int(t_image.shape[0]), 
          "file_name": im_name + ".png",
         }
    cocoformat["images"].append(im)
    for mask_img in tqdm(mask_folder):
        t_image = Image.open(inpath + im_name + "/masks/" + mask_img)
        ground_truth_binary_mask = numpy.array(t_image)
        fortran_ground_truth_binary_mask = np.asfortranarray(ground_truth_binary_mask)
        encoded_ground_truth = mask.encode(fortran_ground_truth_binary_mask)
        ground_truth_area = mask.area(encoded_ground_truth)
        ground_truth_bounding_box = mask.toBbox(encoded_ground_truth)
        contours = measure.find_contours(ground_truth_binary_mask, 0.5)

        annotation = {
                "segmentation": [],
                "area": ground_truth_area.tolist(),
                "iscrowd": 0,
                "image_id": int(i+1),
                "bbox": ground_truth_bounding_box.tolist(),
                "bbox_mode": BoxMode.XYWH_ABS,
                "category_id": 1,
                "id": mask_id
        }

        for contour in contours:
            contour = np.flip(contour, axis=1)
            segmentation = contour.ravel().tolist()
            annotation["segmentation"].append(segmentation)
            # cocoformat["annotations"].append(segmentation)
        
        cocoformat["annotations"].append(annotation)
        
        mask_id += 1 

100%|██████████| 480/480 [00:08<00:00, 58.41it/s]
100%|██████████| 398/398 [00:05<00:00, 67.16it/s]
100%|██████████| 440/440 [00:06<00:00, 64.59it/s]
100%|██████████| 424/424 [00:06<00:00, 64.40it/s]
100%|██████████| 557/557 [00:08<00:00, 67.44it/s]
100%|██████████| 356/356 [00:05<00:00, 68.43it/s]
100%|██████████| 432/432 [00:06<00:00, 66.33it/s]
100%|██████████| 405/405 [00:06<00:00, 59.56it/s]
100%|██████████| 357/357 [00:05<00:00, 63.41it/s]
100%|██████████| 359/359 [00:06<00:00, 59.39it/s]
100%|██████████| 342/342 [00:05<00:00, 65.24it/s]
100%|██████████| 294/294 [00:04<00:00, 59.11it/s]
100%|██████████| 342/342 [00:05<00:00, 63.19it/s]
100%|██████████| 378/378 [00:06<00:00, 58.92it/s]
100%|██████████| 328/328 [00:04<00:00, 67.65it/s]
100%|██████████| 442/442 [00:06<00:00, 65.93it/s]
100%|██████████| 472/472 [00:05<00:00, 84.18it/s]
100%|██████████| 354/354 [00:04<00:00, 83.23it/s]
100%|██████████| 1073/1073 [00:11<00:00, 89.86it/s]
100%|██████████| 1584/1584 [00:18<00:00, 87.23it

In [6]:
import json
with open("nucleus_cocoformat_poly2.json", "w") as f:
    json.dump(cocoformat, f)

json_obj = json.dumps(cocoformat, indent=4)
with open("test_poly2.json", "w") as outfile:
     outfile.write(json_obj)

In [None]:
# copy image to another folder
for f in images_name:
    image = listdir(inpath + f + "/images/")
    shutil.copyfile(inpath + f + "/images/" + image[0], outpath + image[0])