In [1]:
import os
import json
import random
from tqdm import tqdm
from glob import glob

In [None]:
def bdd100k_to_coco(split_name, annotation_files, output_coco_json):
    coco_data = {
        "images": [],
        "annotations": [],
        "categories": []
    }

    classes_2_idx = {'traffic sign':0, 'traffic light':1, 'person':2, 'car':3, 'truck':4, 'bus':5}
    idx_2_class = {v: k for k, v in classes_2_idx.items()}

    annotation_id = 1
    image_id = 1

    print(f'--> {split_name} split COCO construction')
    for annot_file in tqdm(annotation_files, total=len(annotation_files)):
        with open(annot_file, "r") as f:
            bdd_data = json.load(f)

        image_name = bdd_data['name']
        image_objects = bdd_data['frames'][0]['objects']
        width, height = 1280, 720
        image_suffix = ".jpg"

        # Add image metadata
        coco_data["images"].append({
            "id": image_id,
            "file_name": image_name+image_suffix,
            "width": width,
            "height": height
        })

        for image_object in image_objects:

            if 'box2d' in image_object:
                object_name = image_object['category']

                if object_name in classes_2_idx.keys():
                    object_idx = classes_2_idx[object_name]

                    coco_data["categories"].append({
                        "id": object_idx,
                        "name": object_name
                    })
                    
                
                    x1, y1, x2, y2 = image_object["box2d"]["x1"], image_object["box2d"]["y1"], image_object["box2d"]["x2"], image_object["box2d"]["y2"]
                    width_bbox, height_bbox = x2 - x1, y2 - y1
                    area = width_bbox * height_bbox

                    coco_data["annotations"].append({
                        "id": annotation_id,
                        "image_id": image_id,
                        "category_id": object_idx,
                        "bbox": [x1, y1, width_bbox, height_bbox],
                        "area": area,
                        "iscrowd": 0
                    })
                    annotation_id += 1

        image_id += 1
    
    save_coco_path = split_name + '_' + output_coco_json
    with open(save_coco_path, "w") as f:
        json.dump(coco_data, f, indent=4)

    print(f"BDD100K annotations in {split_name} split converted to COCO format -> {save_coco_path}")
    print('-*-'*20)


In [3]:
## Dataset Definitions
ds_main_path = r'C:\BDD100K\100k_labels'
ds_splits = ['train', 'val', 'test']

In [None]:
for ds_split in ds_splits:
    my_object_classes = dict()
    split_path = os.path.join(ds_main_path, ds_split)

    json_files = os.listdir(split_path)
    json_files_paths = [os.path.join(split_path, json_file) for json_file in json_files]

    # since my comp. resource is limited, only shuffle and choose 100 images
    random.shuffle(json_files_paths)
    json_files_paths = json_files_paths[:100]

    bdd100k_to_coco(ds_split, json_files_paths, 'subset_COCO_Format.json')


['C:\\BDD100K\\100k_labels\\train\\7a4cba5c-7f79912f.json', 'C:\\BDD100K\\100k_labels\\train\\51cad596-5813fa03.json', 'C:\\BDD100K\\100k_labels\\train\\17719f6d-8337aae5.json']
