In [None]:
# Copyright (c) 2021  IBM Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

This file merges json annotation file from cvat into a format suitable for training object detector from Detectron2 (i.e., COCO format).

For training, we suggest to set [[ ignore_categories = ["block", "planar sign"] ]],
while for testing, set [[ ignore_categories = ["planar sign"] ]],
since we largely evaluate whether texts and symbols are grouped with the correct arrows and directional texts.

In [None]:
import json
import os

In [None]:
def merge_json_anno(path_anno_folder, path_json_save_to, ignore_categories):
    """
    Function to merge json with ignored category and save to a new json file
    
    INPUT
    ------
    path_anno_folder   : string            - path to folder of json files
    path_json_save_to  : string or None    - path to save json or None for not to save
    ignore_categories  : list of strings   - categories to not include 
    
    OUTPUT
    ------
    universal_json     : dictionary        - dictionary of output merged data
    """
    
    img_counter = 1
    anno_counter = 1

    universal_json = {}

    for name_json_file in os.listdir(path_anno_folder):

        # disregard none json file
        if '.json' not in name_json_file:
            continue

        # path of the json file
        dir_json_file = os.path.join(path_anno_folder, name_json_file)

        # dict from image id of this json to img_counter
        dict_imgid = {}

        print("Processing: {}".format(dir_json_file))

        # read json file
        with open(dir_json_file) as json_file:
            json_obj = json.load(json_file)
            json_file.close()

        # if first file, copy the ['licenses', 'info', 'categories']
        # and create empty list for ['images', 'annotations']
        if img_counter == 1:
            for key in ['licenses', 'info', 'categories']:
                universal_json[key] = json_obj[key]

            for key in ['images', 'annotations']:
                universal_json[key] = []

            # edit the categories to ignore some categories
            counter_cat = 1
            categories  = []
            dict_categories = {}
            for i in range(len(universal_json['categories'])):

                cat = universal_json['categories'][i]

                # skip categories in ignore_categories
                if cat['name'] in ignore_categories:
                    continue

                # dictionary for updating categories 
                dict_categories[cat['id']] = counter_cat 
                cat['id'] = counter_cat
                counter_cat += 1
                categories.append(cat)
            universal_json['categories'] = categories # update universal_json


        # replace the image id with the universal image id
        for it_img in json_obj['images']:
            if it_img['id'] not in dict_imgid:
                dict_imgid[it_img['id']] = img_counter
                img_counter += 1
            it_img['id'] = dict_imgid[it_img['id']]

        # replace the image id in the annotation with the universal image 
        valid_ann = []
        for it_anno in json_obj['annotations']:

            # check if the category is valid
            if it_anno['category_id'] not in dict_categories:
                continue

            it_anno['category_id'] = dict_categories[it_anno['category_id']] # change category
            it_anno['id'] = anno_counter
            anno_counter += 1
            it_anno['image_id'] = dict_imgid[it_anno['image_id']]
            valid_ann.append(it_anno)
        json_obj['annotations'] = valid_ann

        # concatenate to the universal_json
        for key in ['images', 'annotations']:
            universal_json[key] += json_obj[key]
    
    if path_json_save_to is not None:
        with open(path_json_save_to, 'w') as json_file:
            json.dump(universal_json, json_file)
            json_file.close()
    
    return universal_json

In [None]:
# For training detectron2 object detector.
# We do not train detectron2 to detect [block] object.
path_anno_folder = 'dataset/raw/annotations/'
path_json_save_to = 'dataset/annotations.json'
ignore_categories = ["block", "planar sign"]
# ignore_categories = []
 
universal_json = merge_json_anno(path_anno_folder, path_json_save_to, ignore_categories)

In [None]:
# for turning into training data for grouping signs
path_anno_folder = 'dataset/raw/annotations/'
path_json_save_to = 'dataset/annotations_gt.json'
ignore_categories = ["planar sign"]
# ignore_categories = []
 
universal_json = merge_json_anno(path_anno_folder, path_json_save_to, ignore_categories)

# Done
The rest of this section is just printing to show the variables

In [None]:
universal_json.keys()

In [None]:
universal_json['categories']