In [None]:
# Import required libraries
import json
import os
import shutil
from pathlib import Path
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import yaml

# Dataset paths
dataset_dir = Path("../../Datasets/ObjectDetectionSet/bdd100k")
images_dir = dataset_dir / "bdd100k/bdd100k/images/100k"
annotations_dir = dataset_dir / "bdd100k_labels_release/bdd100k/labels"

# Output directories for YOLO format
output_dir = dataset_dir / "yolo_format"
output_dir.mkdir(exist_ok=True)

for split in ['train', 'val', 'test']:
    (output_dir / 'images' / split).mkdir(parents=True, exist_ok=True)
    (output_dir / 'labels' / split).mkdir(parents=True, exist_ok=True)

print(f"📁 Dataset directory: {dataset_dir}")
print(f"📁 Images directory: {images_dir}")
print(f"📁 Annotations directory: {annotations_dir}")
print(f"📁 Output directory: {output_dir}")


In [None]:
# BDD100K class mapping to YOLO format
class_mapping = {
    'pedestrian': 0,
    'rider': 1,
    'car': 2,
    'truck': 3,
    'bus': 4,
    'train': 5,
    'motorcycle': 6,
    'bicycle': 7,
    'traffic light': 8,
    'traffic sign': 9
}

# BDD100K category mapping (from JSON to our classes)
bdd_to_yolo = {
    'person': 'pedestrian',
    'rider': 'rider',
    'car': 'car',
    'truck': 'truck',
    'bus': 'bus',
    'train': 'train',
    'motorcycle': 'motorcycle',
    'bike': 'bicycle',
    'traffic light': 'traffic light',
    'traffic sign': 'traffic sign'
}

class_names = list(class_mapping.keys())
print(f"📋 Number of classes: {len(class_names)}")
print(f"📋 Classes: {class_names}")
print(f"📋 Class mapping: {class_mapping}")


In [None]:
# Load annotation files
train_annotations_file = annotations_dir / "bdd100k_labels_images_train.json"
val_annotations_file = annotations_dir / "bdd100k_labels_images_val.json"

print(f"📋 Loading train annotations from: {train_annotations_file}")
with open(train_annotations_file, 'r') as f:
    train_annotations = json.load(f)

print(f"📋 Loading validation annotations from: {val_annotations_file}")
with open(val_annotations_file, 'r') as f:
    val_annotations = json.load(f)

print(f"📊 Train annotations: {len(train_annotations)}")
print(f"📊 Validation annotations: {len(val_annotations)}")

# Analyze annotation structure
sample_annotation = train_annotations[0]
print(f"\n📋 Sample annotation structure:")
print(f"  Image name: {sample_annotation['name']}")
print(f"  Number of labels: {len(sample_annotation['labels'])}")
print(f"  Attributes: {sample_annotation['attributes']}")

if sample_annotation['labels']:
    sample_label = sample_annotation['labels'][0]
    print(f"  Sample label category: {sample_label['category']}")
    print(f"  Sample label box2d: {sample_label['box2d']}")


In [None]:
def convert_bbox_to_yolo(box2d, img_width=1280, img_height=720):
    """
    Convert BDD100K bounding box format to YOLO format.
    
    Args:
        box2d: Dictionary with x1, y1, x2, y2 coordinates
        img_width: Image width (default: 1280 for BDD100K)
        img_height: Image height (default: 720 for BDD100K)
    
    Returns:
        Tuple of (center_x, center_y, width, height) normalized to [0,1]
    """
    x1, y1 = box2d['x1'], box2d['y1']
    x2, y2 = box2d['x2'], box2d['y2']
    
    # Calculate center coordinates
    center_x = (x1 + x2) / 2
    center_y = (y1 + y2) / 2
    
    # Calculate width and height
    width = x2 - x1
    height = y2 - y1
    
    # Normalize to [0, 1]
    center_x_norm = center_x / img_width
    center_y_norm = center_y / img_height
    width_norm = width / img_width
    height_norm = height / img_height
    
    return center_x_norm, center_y_norm, width_norm, height_norm

# Test the conversion function
if sample_annotation['labels']:
    sample_box2d = sample_annotation['labels'][0]['box2d']
    yolo_coords = convert_bbox_to_yolo(sample_box2d)
    print(f"📐 Original box2d: {sample_box2d}")
    print(f"📐 YOLO format: {yolo_coords}")
    
    # Verify conversion
    center_x, center_y, width, height = yolo_coords
    print(f"📐 Center: ({center_x:.3f}, {center_y:.3f})")
    print(f"📐 Size: {width:.3f} x {height:.3f}")


In [None]:
def process_annotations(annotations, split_name):
    """
    Process annotations and convert to YOLO format.
    
    Args:
        annotations: List of annotation dictionaries
        split_name: Name of the split (train/val/test)
    
    Returns:
        Dictionary mapping image names to YOLO labels
    """
    yolo_labels = {}
    
    for annotation in tqdm(annotations, desc=f"Processing {split_name} annotations"):
        image_name = annotation['name']
        yolo_lines = []
        
        for label in annotation['labels']:
            category = label['category']
            
            # Map BDD100K category to our class
            if category in bdd_to_yolo:
                yolo_class = bdd_to_yolo[category]
                class_id = class_mapping[yolo_class]
                
                # Convert bounding box
                if 'box2d' in label:
                    center_x, center_y, width, height = convert_bbox_to_yolo(label['box2d'])
                    
                    # Create YOLO format line
                    yolo_line = f"{class_id} {center_x:.6f} {center_y:.6f} {width:.6f} {height:.6f}"
                    yolo_lines.append(yolo_line)
        
        if yolo_lines:
            yolo_labels[image_name] = yolo_lines
    
    return yolo_labels

# Process train and validation annotations
print("🔄 Processing train annotations...")
train_yolo_labels = process_annotations(train_annotations, "train")

print("🔄 Processing validation annotations...")
val_yolo_labels = process_annotations(val_annotations, "val")

print(f"📊 Train images with labels: {len(train_yolo_labels)}")
print(f"📊 Validation images with labels: {len(val_yolo_labels)}")


In [None]:
def copy_images_and_create_labels(split_name, yolo_labels):
    """
    Copy images and create YOLO label files for a split.
    
    Args:
        split_name: Name of the split (train/val/test)
        yolo_labels: Dictionary of YOLO labels
    """
    images_source_dir = images_dir / split_name
    images_dest_dir = output_dir / 'images' / split_name
    labels_dest_dir = output_dir / 'labels' / split_name
    
    copied_count = 0
    label_count = 0
    
    for image_name, yolo_lines in tqdm(yolo_labels.items(), desc=f"Processing {split_name}"):
        # Copy image
        source_image = images_source_dir / image_name
        dest_image = images_dest_dir / image_name
        
        if source_image.exists():
            shutil.copy2(source_image, dest_image)
            copied_count += 1
            
            # Create label file
            label_filename = image_name.replace('.jpg', '.txt')
            label_file = labels_dest_dir / label_filename
            
            with open(label_file, 'w') as f:
                f.write('\n'.join(yolo_lines))
            
            label_count += 1
    
    return copied_count, label_count

# Process train split
print("📁 Processing train split...")
train_images, train_labels = copy_images_and_create_labels("train", train_yolo_labels)

# Process validation split
print("📁 Processing validation split...")
val_images, val_labels = copy_images_and_create_labels("val", val_yolo_labels)

print(f"✅ Train: {train_images} images, {train_labels} labels")
print(f"✅ Validation: {val_images} images, {val_labels} labels")


In [None]:
# Create YOLO configuration
yolo_config = {
    'path': str(output_dir.absolute()),  # Dataset root directory
    'train': 'images/train',  # Train images (relative to 'path')
    'val': 'images/val',      # Validation images (relative to 'path')
    'test': 'images/test',    # Test images (relative to 'path')
    'nc': len(class_names),   # Number of classes
    'names': class_names      # Class names
}

# Save configuration
config_file = output_dir / 'data.yaml'
with open(config_file, 'w') as f:
    yaml.dump(yolo_config, f, default_flow_style=False, sort_keys=False)

print(f"📝 YOLO configuration saved to: {config_file}")
print(f"📋 Configuration:")
for key, value in yolo_config.items():
    print(f"  {key}: {value}")

print(f"\n✅ YOLO format conversion completed!")
print(f"📁 Output directory: {output_dir}")
print(f"📝 Configuration file: {config_file}")
print(f"🚀 Ready for YOLO training!")
