In [3]:
import torch

print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

Setup complete. Using torch 1.8.0 _CudaDeviceProperties(name='NVIDIA GeForce GTX 960', major=5, minor=2, total_memory=4028MB, multi_processor_count=8)


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
!mkdir labels/valids/
!mkdir labels/trains/

In [24]:
import os
import json
import argparse
from tqdm import tqdm



def bdd2coco_detection(id_dict, labeled_images, fn):
    images = list()
    annotations = list()
    counter = 0
    for i in tqdm(labeled_images):
        counter += 1
        image = dict()
        image['file_name'] = i['name']
        image['height'] = 720
        image['width'] = 1280
        image['id'] = counter

        empty_image = True

        # Ensure 'labels' key exists
        if 'labels' in i:
            for label in i['labels']:
                annotation = dict()
                category = label['category']
                if category == "traffic light":
                    color = label['attributes']['trafficLightColor']
                    category = "tl_" + color
                if category in id_dict.keys():
                    empty_image = False
                    annotation["iscrowd"] = 0
                    annotation["image_id"] = image['id']
                    x1 = label['box2d']['x1']
                    y1 = label['box2d']['y1']
                    x2 = label['box2d']['x2']
                    y2 = label['box2d']['y2']
                    annotation['bbox'] = [x1, y1, x2-x1, y2-y1]
                    annotation['area'] = float((x2 - x1) * (y2 - y1))
                    annotation['category_id'] = id_dict[category]
                    annotation['ignore'] = 0
                    annotation['id'] = label['id']
                    annotation['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
                    annotations.append(annotation)

        if empty_image:
            continue

        images.append(image)

    attr_dict["images"] = images
    attr_dict["annotations"] = annotations
    attr_dict["type"] = "instances"

    print('saving...')
    json_string = json.dumps(attr_dict)
    with open(fn, "w") as file:
        file.write(json_string)


if __name__ == '__main__':

    label_dir="data/bdd100k/labels/det_20/"
    save_path="labels/"


    attr_dict = dict()
    attr_dict["categories"] = [
        {"supercategory": "none", "id": 1, "name": "pedestrian"},
        {"supercategory": "none", "id": 2, "name": "rider"},
        {"supercategory": "none", "id": 3, "name": "car"},
        {"supercategory": "none", "id": 4, "name": "bus"},
        {"supercategory": "none", "id": 5, "name": "truck"},
        {"supercategory": "none", "id": 6, "name": "bicycle"},
        {"supercategory": "none", "id": 7, "name": "motorcycle"},
        {"supercategory": "none", "id": 8, "name": "tl_G"},
        {"supercategory": "none", "id": 9, "name": "tl_R"},
        {"supercategory": "none", "id": 10, "name": "tl_Y"},
        {"supercategory": "none", "id": 11, "name": "tl_none"},
        {"supercategory": "none", "id": 12, "name": "traffic sign"},
        {"supercategory": "none", "id": 13, "name": "train"}
    ]

    attr_id_dict = {i['name']: i['id'] for i in attr_dict['categories']}

    # create BDD training set detections in COCO format
    print('Loading training set...')
    with open(os.path.join(label_dir,
                           'det_train.json')) as f:
        train_labels = json.load(f)
    print('Converting training set...')

    out_fn = os.path.join(save_path,
                          'bdd100k_labels_images_det_coco_train.json')
    bdd2coco_detection(attr_id_dict, train_labels, out_fn)

    print('Loading validation set...')
    # create BDD validation set detections in COCO format
    with open(os.path.join(label_dir,
                           'det_val.json')) as f:
        val_labels = json.load(f)
    print('Converting validation set...')

    out_fn = os.path.join(save_path,
                          'bdd100k_labels_images_det_coco_val.json')
    bdd2coco_detection(attr_id_dict, val_labels, out_fn)

Loading training set...
Converting training set...


100%|███████████████████████████████████| 69863/69863 [00:07<00:00, 9401.59it/s]


saving...
Loading validation set...
Converting validation set...


100%|███████████████████████████████████| 10000/10000 [00:02<00:00, 3930.95it/s]


saving...


In [28]:
import os
import json
import numpy as np
from pathlib import Path
from collections import defaultdict
from tqdm import tqdm



def coco_to_yolo(coco_json_path, cls_list_path, output_path, img_path, img_type):
    # Load class names
    with open(cls_list_path, "r") as file:
        cls_names = file.read().strip().split('\n')
    cls_to_id = {name: idx for idx, name in enumerate(cls_names)}
    
    # Load COCO JSON
    with open(coco_json_path) as f:
        coco_data = json.load(f)
    
    # Create image and annotation dictionaries
    images = {img["id"]: img for img in coco_data["images"]}
    annotations = defaultdict(list)
    for ann in coco_data["annotations"]:
        annotations[ann["image_id"]].append(ann)

    # Create output directory
    Path(output_path).mkdir(parents=True, exist_ok=True)
    
    for img_id, anns in tqdm(annotations.items(), desc="Processing images"):
        img = images[img_id]
        img_name = img["file_name"]
        img_w, img_h = img["width"], img["height"]
        
        # Create YOLO label file
        label_file_path = Path(output_path) / (Path(img_name).with_suffix(".txt"))
        with open(label_file_path, "w") as label_file:
            for ann in anns:
                if ann["iscrowd"]:
                    continue
                
                # Convert bounding box
                x, y, w, h = ann["bbox"]
                x_center = (x + w / 2) / img_w
                y_center = (y + h / 2) / img_h
                width = w / img_w
                height = h / img_h
                
                cls_id = ann["category_id"]  # Assuming category_id starts from 1
                
                # Write to file
                label_file.write(f"{cls_id} {x_center} {y_center} {width} {height}\n")
    
    print("Conversion completed.")

def main(config):
    if config["datasets"] == "COCO":
        coco_to_yolo(
            coco_json_path=config["label"],
            cls_list_path=config["cls_list"],
            output_path=config["output_path"],
            img_path=config["img_path"],
            img_type=config["img_type"]
        )
    else:
        print("Unknown datasets")

if __name__ == '__main__':
    
#     config ={
#         "datasets": "COCO",
#         "img_path": "data/bdd100k/images/100k/train",
#         "label": "labels/bdd100k_labels_images_det_coco_train.json",
#         "img_type": ".jpg",
#         "manipast_path": "./",
#         "output_path": "labels/trains/",
#         "cls_list": "data/bdd100k.names",
#         }
    config = {
        "datasets": "COCO",
        "img_path": "data/bdd100k/images/100k/val",
        "label": "labels/bdd100k_labels_images_det_coco_val.json",
        "img_type": ".jpg",
        "output_path": "labels/valids/",
        "cls_list": "data/bdd100k.names",
    }

    main(config)

Processing images: 100%|████████████████| 10000/10000 [00:01<00:00, 6377.57it/s]


Conversion completed.


In [23]:
 import glob, os, shutil

'''
Sometimes your image data set might not match with your label data set.
This code does the folowing
(1) Go through your image data set
(2) Search if the corresponding label file exist in the label data set. 
(3) If not, remove current image
'''


def copy_filter(label_dir,image_dir,target_dir_images,target_dir_labels):
    for image in os.listdir(image_dir):
        if image.endswith('jpg'):
            image_name = os.path.splitext(image)[0]

            # Corresponding label file name
            label_name = image_name + '.txt'
            image_path = image_dir + '/' + image_name + '.jpg'
            if os.path.isfile(label_dir + '/' + label_name) == False:
                print(" -- DELETE IMAGE [Label file not found -- ]")
                
                print(image_path)
#                 os.remove(image_path)
#             else:
                target_images=target_dir_images+ '/' + image_name + '.jpg'
                shutil.copy(image_path,target_dir_images )
                print(" --COPY IMAGE "+target_images)


    for label in os.listdir(label_dir):
        if label.endswith('.txt'):
            label_name = os.path.splitext(label)[0]

            # Corresponding label file name
            image_name = label_name + '.jpg'
            label_path = label_dir + '/' + label_name + '.txt'
            if os.path.isfile(image_dir + '/' + image_name) == False:
                print(" -- DELETE LABEL [Image file not found -- ]")
                print(label_path)
#                 os.remove(label_path)
#             else:
                target_labels=target_dir_labels+ '/' + label_name + '.txt'
                shutil.copy(label_path,target_labels )
                print(" --COPY lABELS "+target_labels)

In [None]:
label_dir = '/labels/trains'
image_dir = 'data/bdd100k/images/100k/train/'
target_dir_images="bdd100k/images/trains"
target_dir_labels="bdd100k/labels/trains"
copy_filter(label_dir,image_dir,target_dir_images,target_dir_labels)