In [1]:
import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np
from pycococreatortools import pycococreatortools
from tqdm import tqdm

In [27]:
ROOT_DIR = '/mnt/data2/datasets/detr_doors_99_real_notDoor'
IMAGE_DIR = os.path.join(ROOT_DIR, "val2017")
ANNOTATION_DIR = os.path.join(ROOT_DIR, "val_masks")

In [28]:
INFO = {
    "description": "Door_object_panoptic_segmentation",
    "url": "cab.se",
    "version": "0.1.0",
    "year": 2020,
    "contributor": "Cab",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
}

In [29]:
LICENSES = [
    {
        "id": 1,
        "name": "Fake License",
        "url": "http://creativefakeys.org/licenses/by-nc-sa/2.0/"
    }
]

In [30]:
CATEGORIES = [
    {
        'id': 0,
        'name': 'front_door',
        'supercategory': 'door',
    },
    {
        'id': 1,
        'name': 'rear_door',
        'supercategory': 'door',
    },
    {
        'id': 2,
        'name': 'not_door',
        'supercategory': 'background',
    }
]

In [31]:
def filter_for_png(root, files):
    file_types = ['*.png', '*.jpg', '*.jpeg']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    
    return files

def filter_for_similar(root, files, image_filename):
    file_types = ['*.png', '*.jpg', '*.jpeg']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0]
    file_name_prefix = basename_no_extension + '.*'
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    files = [f for f in files if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])]

    return files

In [32]:
image_filename = "0.png"
for root, _, files in os.walk(ANNOTATION_DIR):
    print(root + ' ' + str(len(files)))
    annotation_files = filter_for_annotations(root, files, image_filename)
    print(str(len(annotation_files)))

    # go through each associated annotation
    for annotation_filename in annotation_files:
        print(annotation_filename)

/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks 297
297
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/038_front_door_0.jpg
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/001_rear_door_0.jpg
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/088_not_door_0.jpg
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/083_front_door_0.jpg
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/030_not_door_0.jpg
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/037_not_door_0.jpg
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/054_rear_door_0.jpg
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/082_front_door_0.jpg
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/038_not_door_0.jpg
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/094_front_door_0.jpg
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/015_front_door_0.jpg
/mnt/data2/datasets/detr_doors_99_real_notDoor/val_masks/011_front_door_0.jpg
/mnt/data

In [33]:
def run():

    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }

    image_id = 1
    segmentation_id = 1
    
    # filter for jpeg images
    for root, _, files in os.walk(IMAGE_DIR):
        image_files = filter_for_png(root, files)

        # go through each image
        for image_filename in tqdm(image_files):
            image = Image.open(image_filename)
            image_info = pycococreatortools.create_image_info(
                image_id, os.path.basename(image_filename), image.size)
            coco_output["images"].append(image_info)

            # filter for associated png annotations
            for root, _, files in os.walk(ANNOTATION_DIR):
                annotation_files = filter_for_similar(root, files, image_filename)

                # go through each associated annotation
                for annotation_filename in annotation_files:
                    class_id = [x['id'] for x in CATEGORIES if x['name'] in annotation_filename][0]

                    category_info = {'id': class_id, 'is_crowd': 'crowd' in image_filename}
                    binary_mask = np.asarray(Image.open(annotation_filename)
                        .convert('1')).astype(np.uint8)
                    
                    annotation_info = pycococreatortools.create_annotation_info(
                        segmentation_id, image_id, category_info, binary_mask,
                        image.size, tolerance=2)

                    if annotation_info is not None:
                        coco_output["annotations"].append(annotation_info)

                    segmentation_id = segmentation_id + 1

            image_id = image_id + 1

    with open('{}/custom_train.json'.format(ROOT_DIR), 'w') as output_json_file:
        json.dump(coco_output, output_json_file)


In [34]:
run()

100%|██████████| 99/99 [00:09<00:00, 10.71it/s]
