# Convert all BDD labels into coco format

To run this file, it has to be put under the folder of bdd100k toolkit, same with `setup.py`.


## Data Structure

The structure of BDD100K box-tracking dataset looks like this:

- bdd100k
  - labels
    - box_track_20
      - train
      - val
  - images
    - track
      - train
      - val
      - test

The required format of the zipfile to be uploaded to BML is:

- xxx.zip
  - Images
    - 1.jpg
    - 2.jpg
  - Annotations
    - coco_info.json

![upload requirements](upload_requirements.png)

## Import packages & Set directories

The dataset type `set_type` can either be `'train'`, or `'val'`.

`start` can be `0`, meaning the loop will start from the first directory available, or can be any number indexing the directory you want to start with, sorted alphabetically.

`end` can be `-1`, meaning the script will convert until the very last directory available, or can be any number indexing the directory you want to end with, sorted alphabetically.


In [17]:
import os
import shutil
import zipfile

set_type = 'train'
img_home_dir = f'E:\\dataset\\bdd100k\\bdd100k\\images\\track\\{set_type}\\'
label_dir = f'E:\\dataset\\bdd100k\\bdd100k\\labels\\box_track_20\\{set_type}\\'
output_dir = f'C:\\Users\XingkuanYu\\Desktop\\bdd100k\\{set_type}\\'
start = 0
end = 1


## Load directories

This function compares if a directory has both an image folder and a label file, and appends it to `available_dirs` if so.


In [18]:
def load_dir(img_home_dir, label_dir):
    label_dirs = []
    for dirname, _, filenames in os.walk(label_dir):
        for filename in filenames:
            label_dirs.append(filename.strip('.json'))

    img_dirs = []
    for dirname, _, filenames in os.walk(img_home_dir):
        img_dirs.append(dirname.split('\\')[-1])

    available_dirs = []
    for dir in label_dirs:
        if dir in img_dirs:
            available_dirs.append(dir)
    available_dirs.sort()

    return available_dirs


## mkdir()

Avoid repetitive use of try...except... statements


In [19]:
def mkdir(path):
    try:
        os.mkdir(path)
    except FileExistsError:
        pass


## Main loop

In [21]:
available_dirs = load_dir(img_home_dir, label_dir)
print(f'[+]{len(available_dirs)} dirs available')

# Loop over all image folders in available dirs
for i, dir in enumerate(available_dirs[:end]):
    if i < start:  # not using slicing to mantain index consistency in output
        continue
    print(f'\n[+]Converting {dir} ({i+1}/{len(available_dirs)})...')

    # Generate path
    image_dir = os.path.join(img_home_dir, dir)
    label_path = os.path.join(label_dir, f'{dir}.json')
    output_folder_dir = os.path.join(output_dir, dir)
    temp_label_path = os.path.join(
        output_folder_dir, 'Annotations', f'{dir}.json')

    # Make dirs
    mkdir(output_dir)
    mkdir(output_folder_dir)
    mkdir(os.path.join(output_folder_dir, 'Annotations'))

    # Copy images
    try:
        shutil.copytree(image_dir,
                        os.path.join(output_folder_dir, 'Images'))
    except FileExistsError:
        pass

    # Convert bdd labels into coco format using built-in converter
    os.system(f'python -m bdd100k.label.to_coco \
                -m box_track -i {label_path} -o {temp_label_path}')

    # Trim the json file removes the 'xxx\\' so that BML can process it
    # e.g., '123456\\123456-1.jpg' ==> '123456-1.jpg'
    with open(temp_label_path, 'r') as temp:
        with open(os.path.join(output_folder_dir, 'Annotations', 'coco_info.json'), 'w') as label_file:
            label_file.write(temp.readline().replace(f'{dir}\\\\', ''))

    # Remove temp label file
    os.remove(temp_label_path)

    # Compress the folders
    zip_file = output_folder_dir + '.zip'
    with zipfile.ZipFile(zip_file, 'w', zipfile.ZIP_DEFLATED) as zip:
        for dirname, _, filenames in os.walk(output_folder_dir):
            filepath = dirname.replace(output_folder_dir, '')
            for filename in filenames:
                zip.write(os.path.join(dirname, filename),
                          os.path.join(filepath, filename))

    # Remove unzipped folders
    shutil.rmtree(output_folder_dir)


[+]200 dirs available

[+]Converting 0000f77c-6257be58 (1/200)...


CalledProcessError: Command 'python -m bdd100k.label.to_coco -m box_track         -i E:\dataset\bdd100k\bdd100k\labels\box_track_20\train\0000f77c-6257be58.json -o C:\Users\XingkuanYu\Desktop\bdd100k\train\0000f77c-6257be58\Annotations\0000f77c-6257be58.json' returned non-zero exit status 1.