In [1]:
import json
import os
from PIL import Image, UnidentifiedImageError
from tqdm import tqdm

import logging


In [2]:
# Define updated category mapping based on new labels
category_mapping = {
    "person": 0,
    "pedestrian": 0,  # Merge pedestrian into person
    "rider": 1,
    "car": 2,
    "truck": 3,
    "bus": 4,
    "train": 5,
    "motor": 6,  # Motorcycle
    "motorcycle": 6,  # Merge motorcycle into motor
    "bike": 7,  # Bicycle
    "bicycle": 7,  # Merge bicycle into bike
    "traffic light": 8,
    "traffic sign": 9,
    "trailer": 10,
    "other person": 11,
    "other vehicle": 12
}

In [3]:

# Setup logging
log_dir = os.path.abspath(os.path.join(r"C:\Users\sathish\Downloads\FL_ModelForAV\logs\data_100k"))
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(log_dir, r"train_yolo_conversion_json_to_yolov5_100k.log")

logging.basicConfig(filename=log_file, level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s')



# Paths (Update these as needed)
bdd_annotations_path_train = r"C:\Users\sathish\Downloads\FL_ModelForAV\data\bdd100k\label_json\bdd100k_labels_images_train.json"
image_folder_path = r"C:\Users\sathish\Downloads\FL_ModelForAV\data\bdd100k\images\train"
output_label_folder = r"C:\Users\sathish\Downloads\FL_ModelForAV\data\bdd100k\labels\train"

# Create output folder
os.makedirs(output_label_folder, exist_ok=True)

# Load JSON annotations with error handling
try:
    with open(bdd_annotations_path_train, 'r') as file:
        annotations = json.load(file)
except (FileNotFoundError, json.JSONDecodeError) as e:
    logging.error(f"Error loading JSON file: {e}")
    raise SystemExit(f"Error loading JSON file: {e}")

# Track statistics
skipped_labels = 0
unrecognized_categories = set()
processed_images = 0
successful_conversions = 0
error_images = 0
missing_images = 0

logging.info("Starting YOLO conversion process...")
#print("Processing annotations...")
for annotation in tqdm(annotations, desc="Converting to YOLO format"):
    image_name = annotation['name']
    labels = annotation.get('labels', [])
    image_path = os.path.join(image_folder_path, image_name)
    
    if not os.path.exists(image_path):
        missing_images += 1
        warning_msg = f"Warning: Image {image_name} not found. Skipping."
        #print(warning_msg)
        logging.warning(warning_msg)
        continue
    
    try:
        with Image.open(image_path) as img:
            image_width, image_height = img.size
    except UnidentifiedImageError as e:
        error_images += 1
        error_msg = f"Error opening image {image_name}: {e}. Skipping."
        #print(error_msg)
        logging.error(error_msg)
        continue
    
    processed_images += 1
    label_file_path = os.path.join(output_label_folder, os.path.splitext(image_name)[0] + '.txt')
    try:
        with open(label_file_path, 'w') as label_file:
            for label in labels:
                category = label.get('category', '').strip().lower()
                
                # Skip unrecognized categories but still create a label file
                if category not in category_mapping:
                    unrecognized_categories.add(category)
                    warning_msg = f"Warning: Unrecognized category '{category}' in image {image_name}. Skipping label."
                    #print(warning_msg)
                    logging.warning(warning_msg)
                    continue
                
                class_id = category_mapping[category]
                
                if 'box2d' not in label:
                    skipped_labels += 1
                    warning_msg = f"Warning: 'box2d' missing for image {image_name}. Skipping label."
                    #print(warning_msg)
                    logging.warning(warning_msg)
                    continue
                
                x1, y1 = label['box2d']['x1'], label['box2d']['y1']
                x2, y2 = label['box2d']['x2'], label['box2d']['y2']
                
                x_center = ((x1 + x2) / 2) / image_width
                y_center = ((y1 + y2) / 2) / image_height
                width = (x2 - x1) / image_width
                height = (y2 - y1) / image_height
                
                label_file.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
                successful_conversions += 1
    except IOError as e:
        error_images += 1
        error_msg = f"Error writing label file {label_file_path}: {e}"
        #print(error_msg)
        logging.error(error_msg)
        continue
    
    logging.info(f"Processed image: {image_name}")

completion_msg = f"Conversion complete! YOLOv5 labels saved in {output_label_folder}"
#print(completion_msg)
logging.info(completion_msg)

stats_msg = (
    f"Total processed images: {processed_images}\n"
    f"Total successful conversions: {successful_conversions}\n"
    f"Total skipped labels due to missing 'box2d': {skipped_labels}\n"
    f"Total unrecognized categories: {len(unrecognized_categories)}\n"
    f"Total missing images: {missing_images}\n"
    f"Total error images (unreadable): {error_images}"
)
#print(stats_msg)
logging.info(stats_msg)

if unrecognized_categories:
    unrecognized_msg = f"Unrecognized categories found: {unrecognized_categories}"
    #print(unrecognized_msg)
    logging.warning(unrecognized_msg)


Converting to YOLO format: 100%|██████████| 69863/69863 [04:31<00:00, 256.93it/s]


In [4]:
# Setup logging
log_dir = os.path.abspath(os.path.join(r"C:\Users\sathish\Downloads\FL_ModelForAV\logs\data_100k"))
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(log_dir, r"val_yolo_conversion_json_to_yolov5_100k.log")

logging.basicConfig(filename=log_file, level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s')


# Paths (Update these as needed)
bdd_annotations_path_val = r"C:\Users\sathish\Downloads\FL_ModelForAV\data\bdd100k\label_json\bdd100k_labels_images_val.json"
image_folder_path_val = r"C:\Users\sathish\Downloads\FL_ModelForAV\data\bdd100k\images\val"
output_label_folder_val = r"C:\Users\sathish\Downloads\FL_ModelForAV\data\bdd100k\labels\yolo_val"

# Create output folder
os.makedirs(output_label_folder_val, exist_ok=True)

# Load JSON annotations with error handling
try:
    with open(bdd_annotations_path_val, 'r') as file:
        annotations = json.load(file)
except (FileNotFoundError, json.JSONDecodeError) as e:
    logging.error(f"Error loading JSON file: {e}")
    raise SystemExit(f"Error loading JSON file: {e}")

# Track statistics
skipped_labels = 0
unrecognized_categories = set()
processed_images = 0
successful_conversions = 0
error_images = 0

logging.info("Starting YOLO conversion process...")
#print("Processing annotations...")
for annotation in tqdm(annotations, desc="Converting to YOLO format"):
    image_name = annotation['name']
    labels = annotation.get('labels', [])
    image_path = os.path.join(image_folder_path_val, image_name)
    
    if not os.path.exists(image_path):
        error_images += 1
        warning_msg = f"Warning: Image {image_name} not found. Skipping."
        #print(warning_msg)
        logging.warning(warning_msg)
        continue
    
    try:
        with Image.open(image_path) as img:
            image_width, image_height = img.size
    except UnidentifiedImageError as e:
        error_images += 1
        error_msg = f"Error opening image {image_name}: {e}. Skipping."
        #print(error_msg)
        logging.error(error_msg)
        continue
    
    processed_images += 1
    label_file_path = os.path.join(output_label_folder_val, os.path.splitext(image_name)[0] + '.txt')
    try:
        with open(label_file_path, 'w') as label_file:
            for label in labels:
                category = label.get('category', '').strip().lower()
                
                # Normalize category names
                if category in category_mapping:
                    class_id = category_mapping[category]
                else:
                    unrecognized_categories.add(category)
                    warning_msg = f"Warning: Unrecognized category '{category}' in image {image_name}. Skipping label."
                    #print(warning_msg)
                    logging.warning(warning_msg)
                    continue
                
                if 'box2d' not in label:
                    skipped_labels += 1
                    warning_msg = f"Warning: 'box2d' missing for image {image_name}. Skipping label."
                    #print(warning_msg)
                    logging.warning(warning_msg)
                    continue
                
                x1, y1 = label['box2d']['x1'], label['box2d']['y1']
                x2, y2 = label['box2d']['x2'], label['box2d']['y2']
                
                x_center = ((x1 + x2) / 2) / image_width
                y_center = ((y1 + y2) / 2) / image_height
                width = (x2 - x1) / image_width
                height = (y2 - y1) / image_height
                
                label_file.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
                successful_conversions += 1
    except IOError as e:
        error_images += 1
        error_msg = f"Error writing label file {label_file_path}: {e}"
        #print(error_msg)
        logging.error(error_msg)
        continue
    
    logging.info(f"Processed image: {image_name}")

completion_msg = f"Conversion complete! YOLOv5 labels saved in {output_label_folder_val}"
#print(completion_msg)
logging.info(completion_msg)

stats_msg = (
    f"Total processed images: {processed_images}\n"
    f"Total successful conversions: {successful_conversions}\n"
    f"Total skipped labels due to missing 'box2d': {skipped_labels}\n"
    f"Total unrecognized categories: {len(unrecognized_categories)}\n"
    f"Total error images (not found or unreadable): {error_images}"
)
#print(stats_msg)
logging.info(stats_msg)

if unrecognized_categories:
    unrecognized_msg = f"Unrecognized categories found: {unrecognized_categories}"
    #print(unrecognized_msg)
    logging.warning(unrecognized_msg)


Converting to YOLO format: 100%|██████████| 10000/10000 [00:54<00:00, 183.04it/s]
