# MoNuSAC to COCO

In [None]:
import os
import cv2
import numpy as np
import shutil
import json

from PIL import Image
from pycococreatortools import pycococreatortools
from tqdm import tqdm

In [None]:
masks_src_dir = '../data/interim/MoNuSAC/MoNuSAC_masks'
img_dir = '../data/raw/MoNuSAC/MoNuSAC_images_and_annotations'
counter = 0
monusac_dir = '../data/interim/MoNuSAC_precoco'

for patient_dir in os.listdir(masks_src_dir):
    f = os.path.join(masks_src_dir, patient_dir)
    for image_dir in os.listdir(f):
        dir_create = os.path.join(monusac_dir, 'image_' + str(counter), 'images')
        os.makedirs(dir_create, exist_ok=True)
        masks_dir = os.path.join(monusac_dir, 'image_' + str(counter), 'masks')
        os.makedirs(masks_dir, exist_ok=True)
        for categorie in os.listdir(os.path.join(f, image_dir)):
            # print(categorie)
            os.makedirs(os.path.join(masks_dir, categorie), exist_ok=True)
            mask_counter = 0
            for mask in os.listdir(os.path.join(f, image_dir, categorie)):
                mask_path = os.path.join(f, image_dir, categorie, mask)
                img = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
                for i in np.unique(img):
                    if i > 0:
                        # print(img == i)
                        # mask = Image.fromarray(img == i)
                        # print((img == i).astype(np.uint8))
                        mask = (img == i).astype(np.uint8) * 255
                        cv2.imwrite(os.path.join(masks_dir, categorie, 'mask_' + str(mask_counter) + '.jpg'), mask)
                        # mask = (img == i)
                        # mask.save(os.path.join(masks_dir, categorie, 'mask_' + str(mask_counter) + '.jpg'))
                        mask_counter += 1
                # print(img)
        # print(img_path)
        dst_image_path = os.path.join(dir_create, 'image_' + str(counter) + '.jpg')
        img_path = os.path.join(img_dir, patient_dir, image_dir+str('.tif'))
        img = cv2.imread(img_path)
        cv2.imwrite(dst_image_path, img)
        counter += 1
    img_path = os.path.join(img_dir, patient_dir)

In [None]:
# loop to save images in one folder as desired by detextron 2
def final_directory_structure(input_path, output_path):
    print(input_path)
    images = os.listdir(input_path)
    
    if not os.path.isdir(output_path):
        os.makedirs(output_path)
        
    for img in tqdm(images):
        shutil.copyfile(input_path+img+'/images/'+img+'.jpg', output_path+img+'.jpg')
        # time.sleep(0.05)

In [None]:
def convert_to_coco_format(image_path, 
                           output_path, 
                           categories, 
                           dataset_name):
    '''
    this function converts the pannuke dataset format to the coco format which makes it easier to apply detectron 
    2 algorithms on.
    '''
    images_name = os.listdir(image_path)
    cocoformat = {"images":[], "annotations":[], "categories":[]}
    
    for i in range(len(categories)):
        cocoformat["categories"].append({"id": int(i+1), "name": categories[i], "supercategory": dataset_name})
    
    m_id = 1
    
    for i, img in tqdm(enumerate(images_name)):
#         print(img)
        image = Image.open(image_path + img + "/images/" + img + ".jpg")
        image_info = pycococreatortools.create_image_info(int(i+1), 
                                                          img + ".jpg" , 
                                                          image.size)
        
        cocoformat["images"].append(image_info)
        c_types = os.listdir(image_path + img + "/masks/")
        
        for c in c_types:
#             print(c)
            masks = os.listdir(image_path + img + "/masks/"+c)
            for msk in masks:
#                 print(msk)
                category_info = {'id': int(categories.index(c)+1), 'is_crowd': False}
                m_image = np.asarray(Image.open(image_path + img + "/masks/"+c+"/"+ msk).convert('1')).astype(np.uint8)
                annotation_info = pycococreatortools.create_annotation_info(
                    m_id, int(i+1), category_info, m_image,
                    image.size, tolerance=2)
                m_id = m_id + 1
                
                if annotation_info is not None:
                    cocoformat["annotations"].append(annotation_info) 
                    
#         time.sleep(0.1)
    with open(output_path, "w") as f:
        json.dump(cocoformat, f)

In [None]:
train_input_path = '../data/interim/MoNuSAC_precoco/'
train_output_path = '../data/processed/MoNuSAC_coco/train/'

final_directory_structure(train_input_path, train_output_path)

In [None]:
train_path = '../data/interim/MoNuSAC_precoco/'
train_output_path = '../data/processed/MoNuSAC_coco/train.json'

convert_to_coco_format(image_path = train_path, 
                       output_path = train_output_path, 
                       categories = ["Epithelial","Lymphocyte","Macrophage","Neutrophil"],
                       dataset_name="MoNuSAC")

In [None]:
!rm -r ../data/interim/MoNuSAC_precoco