In [5]:
import os
import json
from PIL import Image
from collections import OrderedDict
from tqdm import tqdm
from typing import Tuple, Dict, List, Optional
# 配置参数
TRAIN_LABELS_PATH: str = "original_bdd100k_det/det_train.json"
VAL_LABELS_PATH: str = "original_bdd100k_det/det_val.json"
TRAIN_IMAGES_DIR: str = "datasets/bdd100k_det/images/train"
VAL_IMAGES_DIR: str = "datasets/bdd100k_det/images/val"
OUTPUT_TRAIN_LABELS_DIR: str = "datasets/bdd100k_det/labels/train"
OUTPUT_VAL_LABELS_DIR: str = "datasets/bdd100k_det/labels/val"
CUSTOM_YAML_PATH: str = "datasets/bdd100k_det/bdd100K_det_yolo11.yaml"

In [6]:
# 初始化一个有序的字典来保存类别，确保类别顺序按照首次遇到的顺序
class_mapping: OrderedDict = OrderedDict()


def convert_bbox(size: Tuple[int, int], box: Dict[str, float]) -> Tuple[float, float, float, float]:
    """
    将边界框从像素坐标转换为相对于图像尺寸的归一化坐标。

    Args:
        size (Tuple[int, int]): 图像的尺寸 (宽度, 高度)。
        box (Dict[str, float]): 包含边界框坐标的字典，格式为 {'x1': float, 'y1': float, 'x2': float, 'y2': float}。

    Returns:
        Tuple[float, float, float, float]: 归一化后的 (x_center, y_center, width, height)。
    """
    dw: float = 1.0 / size[0]
    dh: float = 1.0 / size[1]
    x_center: float = (box['x1'] + box['x2']) / 2.0
    y_center: float = (box['y1'] + box['y2']) / 2.0
    width: float = box['x2'] - box['x1']
    height: float = box['y2'] - box['y1']

    x_center *= dw
    width *= dw
    y_center *= dh
    height *= dh

    return x_center, y_center, width, height


def process_annotations(json_file: str, images_dir: str, output_dir: str) -> None:
    """
    处理JSON格式的注释文件，将其转换为YOLO格式的标签文件。
    即使缺少标签，也会创建一个空的标签文件以保持数据完整性。

    Args:
        json_file (str): 注释JSON文件的路径。
        images_dir (str): 对应图像文件所在的目录。
        output_dir (str): 转换后YOLO标签文件的输出目录。
    """
    global class_mapping

    # 创建输出目录（如果不存在）
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # 读取JSON注释文件
    try:
        with open(json_file, 'r') as f:
            data = json.load(f)
    except json.JSONDecodeError as e:
        print(f"无法解析 JSON 文件 {json_file}: {e}")
        return
    except FileNotFoundError:
        print(f"JSON 文件未找到: {json_file}")
        return

    # 使用 tqdm 显示处理进度
    for item in tqdm(data, desc=f"Processing {os.path.basename(json_file)}"):
        image_name: Optional[str] = item.get('name')
        if image_name is None:
            print("缺少 'name' 键，跳过此项。")
            continue

        image_path: str = os.path.join(images_dir, image_name)

        # 检查图像文件是否存在
        if not os.path.exists(image_path):
            print(f"图片不存在: {image_path}")
            continue

        # 获取图像尺寸
        try:
            with Image.open(image_path) as img:
                width, height = img.size
        except Exception as e:
            print(f"无法打开图像 {image_path}: {e}")
            continue

        # 构建标签文件路径
        label_filename: str = os.path.splitext(image_name)[0] + '.txt'
        label_path: str = os.path.join(output_dir, label_filename)

        # 获取 'labels' 键，可能为 None
        labels: Optional[List[Dict]] = item.get('labels')

        # 打开标签文件，无论是否有标签
        try:
            with open(label_path, 'w') as label_file:
                if labels:
                    for obj in labels:
                        category: Optional[str] = obj.get('category')
                        if category is None:
                            print(f"缺少 'category' 键的对象，跳过。")
                            continue

                        # 如果类别未在类映射中，则添加并分配新的ID
                        if category not in class_mapping:
                            class_mapping[category] = len(class_mapping)

                        class_id: int = class_mapping[category]
                        bbox: Optional[Dict[str, float]] = obj.get('box2d')
                        if bbox is None:
                            print(f"缺少 'box2d' 键的对象，跳过。")
                            continue

                        try:
                            x_center, y_center, w, h = convert_bbox((width, height), bbox)
                        except KeyError as e:
                            print(f"边界框数据不完整 {bbox}: 缺少 {e} 键，跳过。")
                            continue

                        # 写入格式: <class_id> <x_center> <y_center> <width> <height>
                        label_file.write(f"{class_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")
                # 如果没有标签，创建一个空文件（占位）
                else:
                    pass  # 空文件已被创建
        except Exception as e:
            print(f"无法写入标签文件 {label_path}: {e}")


def generate_custom_yaml(train_img_dir: str, val_img_dir: str, num_classes: int,
                        class_names: List[str], yaml_path: str) -> None:
    """
    生成YOLO所需的配置文件custom.yaml。

    Args:
        train_img_dir (str): 训练图像目录的路径。
        val_img_dir (str): 验证图像目录的路径。
        num_classes (int): 类别数量。
        class_names (List[str]): 类别名称列表。
        yaml_path (str): 输出的YAML文件路径。
    """
    yaml_content: str = f"""train: {os.path.abspath(train_img_dir)}
val: {os.path.abspath(val_img_dir)}

nc: {num_classes}
names: {class_names}
"""

    # 写入YAML文件
    with open(yaml_path, 'w') as f:
        f.write(yaml_content)

    print(f"{yaml_path} 已生成.")

In [7]:
def main() -> None:
    """
    主函数，负责处理训练集和验证集的注释文件，并生成YOLO配置文件。
    """
    # 处理训练集注释
    process_annotations(TRAIN_LABELS_PATH, TRAIN_IMAGES_DIR, OUTPUT_TRAIN_LABELS_DIR)

    # 处理验证集注释
    process_annotations(VAL_LABELS_PATH, VAL_IMAGES_DIR, OUTPUT_VAL_LABELS_DIR)

    # 生成custom.yaml配置文件。备注：数据集保证不会出现训练集和验证集类别不一致的情况。
    class_names_list: List[str] = list(class_mapping.keys())
    generate_custom_yaml(
        train_img_dir=TRAIN_IMAGES_DIR,
        val_img_dir=VAL_IMAGES_DIR,
        num_classes=len(class_names_list),
        class_names=class_names_list,
        yaml_path=CUSTOM_YAML_PATH
    )

    print("转换完成。")
    print("类别映射如下：")
    for cls, idx in class_mapping.items():
        print(f"{idx}: {cls}")

In [8]:
if __name__ == "__main__":
    main()

Processing det_train.json: 100%|██████████| 69863/69863 [00:20<00:00, 3442.46it/s]
Processing det_val.json: 100%|██████████| 10000/10000 [00:02<00:00, 3436.23it/s]


datasets/bdd100k_det/bdd100K_det_yolo11.yaml 已生成.
转换完成。
类别映射如下：
0: traffic light
1: traffic sign
2: car
3: pedestrian
4: bus
5: truck
6: rider
7: bicycle
8: motorcycle
9: train
10: other vehicle
11: other person
12: trailer
