In [5]:
import json
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [2]:
TRAIN_BASE_DIR = "DATASET/Brain Tumor Image/train"
TRAIN_OUT_DIR = "DATASET/OUT/train"
TEST_BASE_DIR = "DATASET/Brain Tumor Image/test"
TEST_OUT_DIR = "DATASET/OUT/test"
VALID_BASE_DIR = "DATASET/Brain Tumor Image/valid"
VALID_OUT_DIR = "DATASET/OUT/valid"
JSON_FILE = "_annotations.coco.json"

In [3]:
def reload_images(BASE_DIR, IMAGE_OUT_DIR, image_datas):
    if not os.path.exists(IMAGE_OUT_DIR):
        os.makedirs(IMAGE_OUT_DIR)
    for image_data in image_datas:
        filename = image_data['file_name']
        identity = image_data['id']
        image = cv2.imread(os.path.join(BASE_DIR, filename))
        cv2.imwrite(os.path.join(IMAGE_OUT_DIR, f"{identity}.png"), image)

In [4]:
def reload_masks(IMAGE_OUT_DIR, ANNOTAION_OUT_DIR, annotation_datas):
    if not os.path.exists(ANNOTAION_OUT_DIR):
        os.makedirs(ANNOTAION_OUT_DIR)
    for annotation_data in annotation_datas:
        identity = annotation_data['image_id']
        image = cv2.imread(os.path.join(IMAGE_OUT_DIR, f"{identity}.png"))
        musk = np.zeros(image.shape[:2], dtype=np.uint8)
        segmentation = np.array(annotation_data['segmentation'], np.int32)
        points = segmentation.reshape((-1,2))
        musk = cv2.fillPoly(musk, [points], 255)
        cv2.imwrite(os.path.join(ANNOTAION_OUT_DIR, f"{identity}.png"), musk)
        

In [5]:
def data_organize(BASE_DIR, OUT_DIR, json_file):
    with open(os.path.join(BASE_DIR, json_file), 'r') as f:
        data = json.load(f)
    image_datas = data['images']
    annotation_datas = data['annotations']
    reload_images(BASE_DIR, os.path.join(OUT_DIR, "images"), image_datas)
    reload_masks(os.path.join(OUT_DIR, "images"), os.path.join(OUT_DIR, "masks"), annotation_datas)

In [6]:
data_organize(TRAIN_BASE_DIR, TRAIN_OUT_DIR, JSON_FILE)
data_organize(TEST_BASE_DIR, TEST_OUT_DIR, JSON_FILE)
data_organize(VALID_BASE_DIR, VALID_OUT_DIR, JSON_FILE)

In [6]:
def remove_duplicate_images(IMAGE_OUT_DIR, ANNOTAION_OUT_DIR):
    images = os.listdir(IMAGE_OUT_DIR)
    masks = os.listdir(ANNOTAION_OUT_DIR)
    for file in images:
        if file not in masks:
            os.remove(os.path.join(IMAGE_OUT_DIR, file))
            print("remove", file)
    for file in masks:
        if file not in images:
            os.remove(os.path.join(ANNOTAION_OUT_DIR, file))
            print("remove", file)

In [8]:
remove_duplicate_images(os.path.join(TRAIN_OUT_DIR, "images"), os.path.join(TRAIN_OUT_DIR, "masks"))
remove_duplicate_images(os.path.join(TEST_OUT_DIR, "images"), os.path.join(TEST_OUT_DIR, "masks"))
remove_duplicate_images(os.path.join(VALID_OUT_DIR, "images"), os.path.join(VALID_OUT_DIR, "masks"))