<a href="https://colab.research.google.com/github/NischalSuresh/lesion_detection/blob/main/mask_to_coco.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os

## Move data from drive to colab instance

In [2]:
import shutil
source_folder = '/content/drive/MyDrive/Projects/DermViz/Synthetic_data/superimposed_back'  # path to your source folder
destination_folder = '/content/superimposed_back'  # destination folder
shutil.copytree(source_folder, destination_folder)

'/content/superimposed_back'

In [None]:
!zip -r /content/images.zip /content/superimposed_back

In [5]:
import shutil
source_folder = '/content/drive/MyDrive/Projects/DermViz/Synthetic_data/superimposed_segmentation'  # path to your source folder
destination_folder = '/content/superimposed_segmentation'  # destination folder
shutil.copytree(source_folder, destination_folder)

'/content/superimposed_segmentation'

In [None]:
!zip -r /content/masks.zip /content/superimposed_segmentation

## move zip files to drive

In [7]:
source_path = '/content/images.zip'
destination_path = '/content/drive/MyDrive/Projects/DermViz/Synthetic_data/full_back_syn/'
shutil.copy(source_path, destination_path)

'/content/drive/MyDrive/Projects/DermViz/Synthetic_data/full_back_syn/images.zip'

In [14]:
source_path = '/content/masks.zip'
destination_path = '/content/drive/MyDrive/Projects/DermViz/Synthetic_data/full_back_syn/'
shutil.copy(source_path, destination_path)

'/content/drive/MyDrive/Projects/DermViz/Synthetic_data/full_back_syn/masks.zip'

## renamed the segmentation images to remove "_segmented"

In [12]:
dir = "/content/superimposed_segmentation"
files_dir = os.listdir(dir)
for old_filename in files_dir:
    if old_filename.endswith('_segmented.jpg'):
        new_filename = old_filename.replace('_segmented', '')
        old_path = os.path.join(dir, old_filename)
        new_path = os.path.join(dir, new_filename)
        os.rename(old_path, new_path)

## sanity check if masks and images folder are the same

In [15]:
source_folder_images = "/content/superimposed_back"
source_folder_masks = "/content/superimposed_segmentation"
files_images = sorted(os.listdir(source_folder_images))
files_masks = sorted(os.listdir(source_folder_masks))
if files_images == files_masks:
    print("Great! The lists are identical")
else:
    print("Check input!! The lists are NOT identical")

Great! The lists are identical


## Split data to train, test & val

In [16]:
source_folder_images = "/content/superimposed_back"
source_folder_masks = "/content/superimposed_segmentation"
files = sorted(os.listdir(source_folder_images))
length = len(files)
train_len = round(0.8*length)
test_len = round(0.1 * length)
val_len = length - train_len - test_len

# train split
destination_folder_images = "/content/images/train"
destination_folder_masks = '/content/masks/train'
for i in range(train_len):
  #images
  source_path = os.path.join(source_folder_images, files[i])
  destination_path = os.path.join(destination_folder_images, files[i])
  shutil.copy(source_path, destination_path)
  #masks
  source_path = os.path.join(source_folder_masks, files[i])
  destination_path = os.path.join(destination_folder_masks, files[i])
  shutil.copy(source_path, destination_path)

# test split
destination_folder_images = "/content/images/test"
destination_folder_masks = '/content/masks/test'
for i in range(train_len,(train_len+test_len)):
  #images
  source_path = os.path.join(source_folder_images, files[i])
  destination_path = os.path.join(destination_folder_images, files[i])
  shutil.copy(source_path, destination_path)
  #masks
  source_path = os.path.join(source_folder_masks, files[i])
  destination_path = os.path.join(destination_folder_masks, files[i])
  shutil.copy(source_path, destination_path)

# val split
destination_folder_images = "/content/images/val"
destination_folder_masks = '/content/masks/val'
for i in range((train_len+test_len),length):
  #images
  source_path = os.path.join(source_folder_images, files[i])
  destination_path = os.path.join(destination_folder_images, files[i])
  shutil.copy(source_path, destination_path)
  #masks
  source_path = os.path.join(source_folder_masks, files[i])
  destination_path = os.path.join(destination_folder_masks, files[i])
  shutil.copy(source_path, destination_path)

## sanity check if masks and images folder are the same in train test and val splits

In [18]:
for val in {'train','test','val'}:
  source_folder_images = "/content/images/" + val
  source_folder_masks = "/content/masks/" + val
  files_images = sorted(os.listdir(source_folder_images))
  files_masks = sorted(os.listdir(source_folder_masks))
  if files_images == files_masks:
      print("Great! The lists are identical")
  else:
      print("Check input!! The lists are NOT identical")

Great! The lists are identical
Great! The lists are identical
Great! The lists are identical


## zip move masks and images to drive

In [None]:
!zip -r /content/drive/MyDrive/Projects/DermViz/Synthetic_data/full_back_syn/masks_split.zip /content/masks
!zip -r /content/drive/MyDrive/Projects/DermViz/Synthetic_data/full_back_syn/images_split.zip /content/images

## Convert data to COCO format

In [20]:
import glob
import json
import os
import cv2
import numpy as np
# Label ids of the dataset
category_ids = {
    "lesion": 1
}
# Function to find contours in a binary mask
def find_contours(mask_image):
    gray = cv2.cvtColor(mask_image, cv2.COLOR_BGR2GRAY)
    _, binary_mask = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return contours
# Create a single annotation for a contour
def create_annotation_format(contour, image_id, category_id, annotation_id):
    segmentation = [int(point) for point in contour.flatten()]
    area = cv2.contourArea(contour)
    bbox = cv2.boundingRect(contour)
    annotation = {
        "id": annotation_id,
        "image_id": image_id,
        "category_id": category_id,
        "segmentation": [segmentation],
        "bbox": bbox,
        "area": area,
        "iscrowd": 0
    }
    return annotation
# Create category annotation
def create_category_annotation(category_ids):
    categories = [{"id": id, "name": name} for name, id in category_ids.items()]
    return categories
# Create image annotation
def create_image_annotation(file_name, width, height):
    image_id = file_name.split('.')[0]
    image_annotation = {
        "id": image_id,
        "file_name": file_name,
        "width": width,
        "height": height
    }
    return image_annotation
# Get COCO JSON format
def get_coco_json_format():
    coco_format = {
        "info": {},
        "licenses": [],
        "images": [],
        "annotations": [],
        "categories": []
    }
    return coco_format
# Get "images" and "annotations" info
def images_annotations_info(maskpath):
    annotation_id = 0
    annotations = []
    images = []
    for category in category_ids.keys():
        for mask_image_path in glob.glob(os.path.join(maskpath, f'*.{MASK_EXT}')):
            original_file_name = os.path.basename(mask_image_path) #.replace('_segmented', '')  # Remove "_segmented"
            mask_image_open = cv2.imread(mask_image_path)
            height, width, c = mask_image_open.shape
            image = create_image_annotation(file_name=original_file_name, width=width, height=height)
            images.append(image)
            contours = find_contours(mask_image_open)
            for contour in contours:
                annotation = create_annotation_format(contour, image['id'], category_ids[category], annotation_id)
                if annotation['area'] > 0:
                    annotations.append(annotation)
                    annotation_id += 1
    return images, annotations, annotation_id
if __name__ == "__main__":
    MASK_EXT = 'jpg'
    ORIGINAL_EXT = 'jpg'
    coco_format = get_coco_json_format()
    for keyword in ["train","test","val"]:
      mask_path = f"/content/masks/{keyword}/"
      output_path = f"/content/jsons_{keyword}.json"
      coco_format["categories"] = create_category_annotation(category_ids)
      coco_format["images"], coco_format["annotations"], annotation_cnt = images_annotations_info(mask_path)
      with open(output_path, "w+") as outfile:
          json.dump(coco_format, outfile, sort_keys=True, indent=4)
      print(f"Created {annotation_cnt} annotations for images in folder: {mask_path}")
      print(f"Saved COCO data to: {output_path}")
      # Print the list of mask images being processed
      mask_images = glob.glob(os.path.join(mask_path, f'*.{MASK_EXT}'))
      print(f"Mask images found in folder: {mask_images}")

Created 89701 annotations for images in folder: /content/masks/train/
Saved COCO data to: /content/jsons_train.json
Mask images found in folder: ['/content/masks/train/07_2003_flipped_3.jpg', '/content/masks/train/H14_2009_12_09_Regist_flipped_5.jpg', '/content/masks/train/O14_2010_03_08_Regist_1.jpg', '/content/masks/train/22_2011_flipped_9.jpg', '/content/masks/train/20_2008_scaled_0_8.jpg', '/content/masks/train/18_2008_9.jpg', '/content/masks/train/08_2012_PT_scaled_0_4.jpg', '/content/masks/train/G16_2010_07_12_scaled_0_7.jpg', '/content/masks/train/24_2009_scaled_0_6.jpg', '/content/masks/train/21_2009_scaled_1_0.jpg', '/content/masks/train/Q16_2009_01_26_Regist_flipped_2.jpg', '/content/masks/train/19_2008_scaled_0_4.jpg', '/content/masks/train/19_2010png_flipped_1.jpg', '/content/masks/train/21_2011_scaled_0_2.jpg', '/content/masks/train/N16_2010_07_09_flipped_0.jpg', '/content/masks/train/J16_2009_05_04_scaled_1_1.jpg', '/content/masks/train/F14_2016_09_09_scaled_1_9.jpg', '/c

## Move json files to drive

In [23]:
shutil.copy("/content/jsons_train.json", "/content/drive/MyDrive/Projects/DermViz/Synthetic_data/full_back_syn/json_files/")
shutil.copy("/content/jsons_test.json", "/content/drive/MyDrive/Projects/DermViz/Synthetic_data/full_back_syn/json_files/")
shutil.copy("/content/jsons_val.json", "/content/drive/MyDrive/Projects/DermViz/Synthetic_data/full_back_syn/json_files/")

'/content/drive/MyDrive/Projects/DermViz/Synthetic_data/full_back_syn/json_files/jsons_val.json'