In [1]:
import os

## Move data from drive to colab instance

In [11]:
import shutil
source_folder = '/content/drive/MyDrive/Projects/DermViz/Synthetic_data/superimposed_segmentation'  # path to your source folder
destination_folder = '/content/superimposed_segmentation'  # destination folder

shutil.copytree(source_folder, destination_folder)


'/content/superimposed_segmentation'

## Split data to train, test & val

In [14]:
source_folder = "/content/superimposed_segmentation"
destination_folder = "/content/segmentation/train"
files = os.listdir("/content/superimposed_segmentation")
length = len(files)
train_len = round(0.8*length)
test_len = round(0.1 * length)
val_len = length - train_len - test_len
for i in range(train_len):
  source_path = os.path.join(source_folder, files[i])
  destination_path = os.path.join(destination_folder, files[i])
  shutil.copy(source_path, destination_path)

destination_folder = "/content/segmentation/test"
for i in range(train_len,(train_len+test_len)):
  source_path = os.path.join(source_folder, files[i])
  destination_path = os.path.join(destination_folder, files[i])
  shutil.copy(source_path, destination_path)

destination_folder = "/content/segmentation/val"
for i in range((train_len+test_len),length):
  source_path = os.path.join(source_folder, files[i])
  destination_path = os.path.join(destination_folder, files[i])
  shutil.copy(source_path, destination_path)

## Convert data to COCO format

In [21]:
import glob
import json
import os
import cv2
import numpy as np
# Label ids of the dataset
category_ids = {
    "lesion": 1
}
# Function to find contours in a binary mask
def find_contours(mask_image):
    gray = cv2.cvtColor(mask_image, cv2.COLOR_BGR2GRAY)
    _, binary_mask = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return contours
# Create a single annotation for a contour
def create_annotation_format(contour, image_id, category_id, annotation_id):
    segmentation = [int(point) for point in contour.flatten()]
    area = cv2.contourArea(contour)
    bbox = cv2.boundingRect(contour)
    annotation = {
        "id": annotation_id,
        "image_id": image_id,
        "category_id": category_id,
        "segmentation": [segmentation],
        "bbox": bbox,
        "area": area,
        "iscrowd": 0
    }
    return annotation
# Create category annotation
def create_category_annotation(category_ids):
    categories = [{"id": id, "name": name} for name, id in category_ids.items()]
    return categories
# Create image annotation
def create_image_annotation(file_name, width, height):
    image_id = file_name.split('.')[0]
    image_annotation = {
        "id": image_id,
        "file_name": file_name,
        "width": width,
        "height": height
    }
    return image_annotation
# Get COCO JSON format
def get_coco_json_format():
    coco_format = {
        "info": {},
        "licenses": [],
        "images": [],
        "annotations": [],
        "categories": []
    }
    return coco_format
# Get "images" and "annotations" info
def images_annotations_info(maskpath):
    annotation_id = 0
    annotations = []
    images = []
    for category in category_ids.keys():
        for mask_image_path in glob.glob(os.path.join(maskpath, f'*.{MASK_EXT}')):
            original_file_name = os.path.basename(mask_image_path).replace('_segmented', '')  # Remove "_segmented"
            mask_image_open = cv2.imread(mask_image_path)
            height, width, c = mask_image_open.shape
            image = create_image_annotation(file_name=original_file_name, width=width, height=height)
            images.append(image)
            contours = find_contours(mask_image_open)
            for contour in contours:
                annotation = create_annotation_format(contour, image['id'], category_ids[category], annotation_id)
                if annotation['area'] > 0:
                    annotations.append(annotation)
                    annotation_id += 1
    return images, annotations, annotation_id
if __name__ == "__main__":
    MASK_EXT = 'jpg'
    ORIGINAL_EXT = 'jpg'
    coco_format = get_coco_json_format()
    for keyword in ["train","test","val"]:
      mask_path = f"/content/segmentation/{keyword}/"
      output_path = f"/content/output/{keyword}.json"
      coco_format["categories"] = create_category_annotation(category_ids)
      coco_format["images"], coco_format["annotations"], annotation_cnt = images_annotations_info(mask_path)
      with open(output_path, "w+") as outfile:
          json.dump(coco_format, outfile, sort_keys=True, indent=4)
      print(f"Created {annotation_cnt} annotations for images in folder: {mask_path}")
      print(f"Saved COCO data to: {output_path}")
      # Print the list of mask images being processed
      mask_images = glob.glob(os.path.join(mask_path, f'*.{MASK_EXT}'))
      print(f"Mask images found in folder: {mask_images}")

Created 90488 annotations for images in folder: /content/segmentation/train/
Saved COCO data to: /content/output/train.json
Mask images found in folder: ['/content/segmentation/train/U16_2009_01_12_scaled_1_4_segmented.jpg', '/content/segmentation/train/05_2008_flipped_5_segmented.jpg', '/content/segmentation/train/F16_2010_10_11_scaled_0_2_segmented.jpg', '/content/segmentation/train/U16_2009_01_12_Regist_scaled_1_0_segmented.jpg', '/content/segmentation/train/U16_2005_10_31_Regist_flipped_5_segmented.jpg', '/content/segmentation/train/H14_2009_12_09_Regist_flipped_0_segmented.jpg', '/content/segmentation/train/21_2009_flipped_4_segmented.jpg', '/content/segmentation/train/N16_2006_04_17_Regist_2_segmented.jpg', '/content/segmentation/train/05_2003_scaled_0_1_segmented.jpg', '/content/segmentation/train/20_2008_scaled_0_3_segmented.jpg', '/content/segmentation/train/O14_2015_10_12_scaled_0_2_segmented.jpg', '/content/segmentation/train/N16_2010_07_09_3_segmented.jpg', '/content/segmen