In [10]:
import os
import json
import glob
from PIL import Image


def convert_to_coco_format(root_dir, filename):
    target_width = 400
    target_height = 400

    coco_data = {"images": [], "annotations": [], "categories": []}

    category_names = ["증상없음", "각막궤양", "각막부골편", "결막염", "비궤양성각막염", "안검염"]
    idx = 0

    for i, category_name in enumerate(category_names):
        json_files = glob.glob(
            os.path.join(root_dir + "/" + category_name, "**/*.json"), recursive=True
        )
        # category
        category = {"id": i + 1, "name": category_name}
        coco_data["categories"].append(category)

        for image_id, json_file in enumerate(json_files):
            with open(json_file, "r") as f:
                data = json.load(f)

            x1, y1, x2, y2 = map(float, data["label"]["label_bbox"])
            width, height = map(float, data["images"]["meta"]["width_height"])

            width_scale = target_width / width
            height_scale = target_height / height

            # bbox -> x1, y1, width, height
            bbox = [
                round(x1 * width_scale, 2),
                round(y1 * height_scale, 2),
                round((x2 - x1) * width_scale, 2),
                round((y2 - y1) * height_scale, 2),
            ]

            # area가 음수인 경우 skip
            if bbox[2] < 0 or bbox[3] < 0:
                continue

            # image
            image = {
                "id": idx + image_id + 1,
                "file_name": data["images"]["meta"]["file_name"],
                "width": target_width,
                "height": target_height,
            }
            coco_data["images"].append(image)

            category_id = category_names.index(category_name) + 1

            # annotation
            annotation = {
                "id": image_id + idx + 1,
                "image_id": image_id + idx + 1,
                "category_id": category_id,
                "bbox": bbox,
                "area": bbox[2] * bbox[3],
                "iscrowd": 0,
            }
            coco_data["annotations"].append(annotation)

        idx += len(json_files)

    with open(filename, "w") as f:
        json.dump(coco_data, f, indent=4)

In [16]:
root_dir = "/Users/nehcream/Documents/Workspace/kdt_hackerton/datasets/cats_eyes/Training"
convert_to_coco_format(root_dir, "cats_eyes_train.json")

root_dir = "/Users/nehcream/Documents/Workspace/kdt_hackerton/datasets/cats_eyes/Validation"
convert_to_coco_format(root_dir, "cats_eyes_val.json")

In [18]:
train_coco = '/Users/nehcream/Documents/Workspace/kdt_hackerton/datasets/annotations/cats_eyes_train.json'
val_coco = '/Users/nehcream/Documents/Workspace/kdt_hackerton/datasets/annotations/cats_eyes_val.json'

dict_train = json.load(open(train_coco))
dict_val = json.load(open(val_coco))

for i, img in enumerate(dict_train['images']):
    img['id'] = i + 1

for i, annotation in enumerate(dict_train['annotations']):
    annotation['id'] = i + 1
    annotation['image_id'] = i + 1
    
for i, img in enumerate(dict_val['images']):
    img['id'] = i + 1
    
for i, annotation in enumerate(dict_val['annotations']):
    annotation['id'] = i + 1
    annotation['image_id'] = i + 1
    
with open('train_coco.json', 'w') as f:
    json.dump(dict_train, f, indent=4)

with open('val_coco.json', 'w') as f:
    json.dump(dict_val, f, indent=4)