In [3]:
import json
import os
from tqdm import tqdm

# Paths to your dataset
train_json_path = r"C:\Users\Mahesh\OneDrive\Desktop\BDD100K\labels\bdd100k_labels_images_train.json"
val_json_path = r"C:\Users\Mahesh\OneDrive\Desktop\BDD100K\labels\bdd100k_labels_images_val.json"

train_images_path = r"C:\Users\Mahesh\OneDrive\Desktop\BDD100K\100k\train\trainA"
val_images_path = r"C:\Users\Mahesh\OneDrive\Desktop\BDD100K\100k\val"

yolo_train_labels_path = r"C:\Users\Mahesh\OneDrive\Desktop\BDD100K\yolo_labels\train"
yolo_val_labels_path = r"C:\Users\Mahesh\OneDrive\Desktop\BDD100K\yolo_labels\val"

# BDD100K class names
bdd100k_classes = ['car', 'bus', 'truck', 'pedestrian', 'traffic sign', 'rider', 'bicycle', 'motorcycle', 'train', 'traffic light']
class_to_id = {cls: idx for idx, cls in enumerate(bdd100k_classes)}

# Create YOLO label directories
os.makedirs(yolo_train_labels_path, exist_ok=True)
os.makedirs(yolo_val_labels_path, exist_ok=True)

# Function to convert BDD100K JSON to YOLO format
def convert_bdd100k_to_yolo(json_path, images_path, output_labels_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    for item in tqdm(data, desc=f"Converting {json_path}"):
        img_name = item["name"]
        img_w, img_h = 1280, 720  # BDD100K image size

        label_file = os.path.join(output_labels_path, img_name.replace(".jpg", ".txt"))

        with open(label_file, 'w') as label_f:
            for label in item["labels"]:
                category = label["category"]
                if category not in class_to_id:
                    continue  # Skip unknown categories

                cls_id = class_to_id[category]

                # Bounding box (normalized)
                x1, y1, x2, y2 = label["box2d"].values()
                x_center = (x1 + x2) / 2 / img_w
                y_center = (y1 + y2) / 2 / img_h
                width = (x2 - x1) / img_w
                height = (y2 - y1) / img_h

                label_f.write(f"{cls_id} {x_center} {y_center} {width} {height}\n")

# Convert train and val datasets
convert_bdd100k_to_yolo(train_json_path, train_images_path, yolo_train_labels_path)
convert_bdd100k_to_yolo(val_json_path, val_images_path, yolo_val_labels_path)


Converting C:\Users\Mahesh\OneDrive\Desktop\BDD100K\labels\bdd100k_labels_images_train.json: 100%|██████████| 69863/69863 [02:14<00:00, 518.03it/s]
Converting C:\Users\Mahesh\OneDrive\Desktop\BDD100K\labels\bdd100k_labels_images_val.json: 100%|██████████| 10000/10000 [00:13<00:00, 730.89it/s]


In [5]:
import os

# Define paths
image_folder = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/100k/train/trainA"
label_folder = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/yolo_labels/train/"

# Get image and label filenames (without extensions)
image_files = {f.split(".")[0] for f in os.listdir(image_folder) if f.endswith((".jpg", ".png"))}
label_files = {f.split(".")[0] for f in os.listdir(label_folder) if f.endswith(".txt")}

# Find images without labels
missing_labels = image_files - label_files
print(f"Total missing labels: {len(missing_labels)}")
print("Missing image names:", missing_labels)


Total missing labels: 137
Missing image names: {'765f15fb-ef824308', '6e0fa2a3-a8f2be95', '5e464562-65962983', '6032f7e2-a7062c48', '7809356e-8e74fe0a', '647e8e1c-0141c492', '5d178102-9d5e50e2', '69061757-1459064f', '6f0cc882-8b3e2238', '7aca5bc0-23859d6f', '6cf7a093-29129f20', '58208859-5316743a', '68ce94b4-855f26a3', '54ea83be-ce461fd7', '68aca905-2c504ffd', '659a7a5e-92995bc3', '641d1337-af06cf4e', '6051b591-f7ec53c8', '6188d9b2-08ac3765', '62bd3830-aa6cb431', '7764bde0-03b1070a', '66fab43d-46c5b8f2', '65551742-016ff56a', '7a53506f-9c59128b', '612affad-42702404', '573809a9-e5a1b3b8', '7a36201f-1b550e67', '5b4727ac-d3fd8666', '6a76c075-d995ef0a', '70cc96e1-1802a164', '5ee6af4d-b34992f0', '76b72431-735ebc8f', '6d3eab65-1a1f9a3b', '6b1f5022-48037f59', '5d1905d8-875b2482', '75417b66-b26413f4', '749161f9-5281a0e0', '7adaeab3-10ec73b5', '67c665d7-662c6e08', '66ed92d6-a7529a59', '6a90b276-de74a29c', '6fcbd17b-9cd6c3a2', '5f9e9266-78dac9cc', '5ea6db6f-fa6b86fb', '6a42c0ab-5a1df402', '5de4c7

In [1]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.version.cuda)  # Should match installed CUDA


True
12.6


In [7]:
import os

# Define paths
image_folder = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/100k/train/trainA"
label_folder = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/yolo_labels/train/"

# Get image and label filenames (without extensions)
image_files = {f for f in os.listdir(image_folder) if f.endswith((".jpg", ".png"))}
label_files = {f.split(".")[0] for f in os.listdir(label_folder) if f.endswith(".txt")}

# Find images without labels
for img in image_files:
    img_name = os.path.splitext(img)[0]  # Remove extension
    if img_name not in label_files:
        os.remove(os.path.join(image_folder, img))  # Delete image

print("Deleted all images without labels.")


Deleted all images without labels.


In [7]:
import torch
torch.cuda.empty_cache()

In [9]:
import os

def check_yolo_label_matching(image_dir, label_dir, image_exts=('jpg', 'png', 'jpeg')):
    """
    Checks if all images in the dataset have corresponding YOLO labels and vice versa.
    
    :param image_dir: Path to the image directory
    :param label_dir: Path to the YOLO label directory
    :param image_exts: Tuple of valid image file extensions
    """
    # Get all image and label filenames without extensions
    image_files = {os.path.splitext(f)[0] for f in os.listdir(image_dir) if f.lower().endswith(image_exts)}
    label_files = {os.path.splitext(f)[0] for f in os.listdir(label_dir) if f.endswith('.txt')}
    
    # Find mismatches
    images_without_labels = image_files - label_files
    labels_without_images = label_files - image_files
    
    # Print results
    if images_without_labels:
        print(f"⚠️ {len(images_without_labels)} images without labels:")
        print(sorted(images_without_labels)[:10])  # Print first 10 mismatches
    else:
        print("✅ All images have corresponding labels!")
    
    if labels_without_images:
        print(f"⚠️ {len(labels_without_images)} labels without images:")
        print(sorted(labels_without_images)[:10])  # Print first 10 mismatches
    else:
        print("✅ All labels have corresponding images!")
    
    return images_without_labels, labels_without_images

# Example usage
image_dir = r"C:/Users/Mahesh/OneDrive/Desktop/BDD100K/100k/train/trainA"
label_dir = r"C:\Users\Mahesh\OneDrive\Desktop\BDD100K\yolo_labels\train"

images_without_labels, labels_without_images = check_yolo_label_matching(image_dir, label_dir)


✅ All images have corresponding labels!
✅ All labels have corresponding images!


In [None]:
# Original dataset paths
train_img_path = r"C:/Users/Mahesh/OneDrive/Desktop/BDD100K/100k/train/trainA"
train_label_path = r"C:\Users\Mahesh\OneDrive\Desktop\BDD100K\yolo_labels\train"

val_img_path = r"C:/Users/Mahesh/OneDrive/Desktop/BDD100K/100k/val"
val_label_path = r"C:\Users\Mahesh\OneDrive\Desktop\BDD100K\yolo_labels\val"

test_img_path = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/100k/test"
# test_label_path = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/labels/test"

# Subset paths
subset_train_img = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/100k/train_subset"
subset_train_label = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/yolo_labels/train_subset"

subset_val_img = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/100k/val_subset"
subset_val_label = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/yolo_labels/val_subset"

subset_test_img = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/100k/test_subset"
subset_test_label = "C:/Users/Mahesh/OneDrive/Desktop/BDD100K/labels/test_subset"

# # Generate subsets
# train_subset_img, train_subset_label = create_subset(train_img_path, train_label_path, subset_train_img, subset_train_label)
# val_subset_img, val_subset_label = create_subset(val_img_path, val_label_path, subset_val_img, subset_val_label)
# test_subset_img, test_subset_label = create_subset(test_img_path, test_label_path, subset_test_img, subset_test_label)


In [None]:
# Step 1: Import Required Libraries
from ultralytics import YOLO
import torch
import os
import random

# Step 2: Check GPU Availability
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using Device: {device}")

# Step 3: Define Dataset Paths
dataset_root = r"C:/Users/Mahesh/OneDrive/Desktop/BDD100K"
full_yaml_path = r"C:/Users/Mahesh/OneDrive/Desktop/BDD100K/data.yaml"

# Define image and label paths for train and val
train_img_path = os.path.join(dataset_root, "100k/train/trainA")
train_label_path = os.path.join(dataset_root, "yolo_labels/train")
val_img_path = os.path.join(dataset_root, "100k/val")
val_label_path = os.path.join(dataset_root, "yolo_labels/val")

# Step 4: Choose Between Full Dataset and 10% Subset
use_subset = True  # Set to False to use the full dataset

if use_subset:
    subset_yaml_path = os.path.join(dataset_root, "data_subset.yaml")  # Subset YAML file
    subset_root = os.path.join(dataset_root, "subset")  # Directory for subset data

    # Define subset paths
    subset_train_img = os.path.join(subset_root, "train/images")
    subset_train_label = os.path.join(subset_root, "train/labels")
    subset_val_img = os.path.join(subset_root, "val/images")
    subset_val_label = os.path.join(subset_root, "val/labels")

    def create_subset(original_img_path, original_label_path, subset_img_path, subset_label_path, percentage=0.5):
        """
        Create a subset of images and labels for YOLO training.
        
        - original_img_path: Path to original images
        - original_label_path: Path to original YOLO label files
        - subset_img_path: Destination for subset images
        - subset_label_path: Destination for subset labels
        - percentage: Fraction of data to use
        """
        if not os.path.exists(subset_img_path):
            os.makedirs(subset_img_path)
        if not os.path.exists(subset_label_path):
            os.makedirs(subset_label_path)

        all_images = os.listdir(original_img_path)
        sampled_images = random.sample(all_images, int(len(all_images) * percentage))

        for image_file in sampled_images:
            image_src = os.path.join(original_img_path, image_file)
            image_dst = os.path.join(subset_img_path, image_file)

            # Copy image
            if not os.path.exists(image_dst):
                os.link(image_src, image_dst)  # Hard link for efficiency

            # Find corresponding label file (.txt)
            label_file = os.path.splitext(image_file)[0] + ".txt"
            label_src = os.path.join(original_label_path, label_file)
            label_dst = os.path.join(subset_label_path, label_file)

            # Copy label if it exists
            if os.path.exists(label_src):
                if not os.path.exists(label_dst):
                    os.link(label_src, label_dst)  # Hard link for labels too

        return subset_img_path, subset_label_path

    # Create train and validation subsets
    train_subset_img, train_subset_label = create_subset(train_img_path, train_label_path, subset_train_img, subset_train_label)
    val_subset_img, val_subset_label = create_subset(val_img_path, val_label_path, subset_val_img, subset_val_label)

    # Create a new data.yaml for the subset
    with open(subset_yaml_path, "w") as f:
        f.write(f"train: {train_subset_img}\n")
        f.write(f"val: {val_subset_img}\n")
        f.write("nc: 10\n")
        f.write("names: ['car', 'pedestrian', 'truck', 'bicycle', 'motorcycle', 'bus', 'rider', 'traffic light', 'traffic sign', 'train']\n")

    yaml_path = subset_yaml_path  # Use subset for training
    print(f"✅ Using 10% subset dataset at: {subset_yaml_path}")
else:
    yaml_path = full_yaml_path  # Use full dataset
    print(f"✅ Using full dataset at: {full_yaml_path}")

# Step 5: Load YOLOv8 Model (Nano Version for Faster Training)
model = YOLO("yolov8n.pt")

# Step 6: Train Model
results = model.train(
    data=yaml_path,   # Use subset or full dataset
    epochs=2,        # Increase if needed
    imgsz=640,        # Lower resolution for MX450
    batch=2,          # Reduce batch size for GPU memory
    device=device,
    workers=2,        # Reduce CPU usage
    project="BDD100K_YOLOv8",
    name="yolov8n_train"
)

# Step 7: Validate Model (Using Validation Set Instead of Test)
val_results = model.val()
print("\n🔹 Validation Accuracy Metrics:")
print(f"   - mAP@50: {val_results.box.map:.4f}")
print(f"   - mAP@50-95: {val_results.box.maps.mean():.4f}")  


Using Device: cuda
✅ Using 10% subset dataset at: C:/Users/Mahesh/OneDrive/Desktop/BDD100K\data_subset.yaml
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:01<00:00, 5.82MB/s]


Ultralytics 8.3.78  Python-3.11.5 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce MX450, 2048MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=C:/Users/Mahesh/OneDrive/Desktop/BDD100K\data_subset.yaml, epochs=2, time=None, patience=100, batch=2, imgsz=640, save=True, save_period=-1, cache=False, device=cuda, workers=2, project=BDD100K_YOLOv8, name=yolov8n_train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf

100%|██████████| 5.35M/5.35M [00:01<00:00, 5.40MB/s]


[34m[1mAMP: [0mchecks passed 


[34m[1mtrain: [0mScanning C:\Users\Mahesh\OneDrive\Desktop\BDD100K\subset\train\labels... 34931 images, 9 backgrounds, 0 corrupt: 100%|██████████| 34931/34931 [05:48<00:00, 100.19it/s]


[34m[1mtrain: [0mNew cache created: C:\Users\Mahesh\OneDrive\Desktop\BDD100K\subset\train\labels.cache


[34m[1mval: [0mScanning C:\Users\Mahesh\OneDrive\Desktop\BDD100K\subset\val\labels... 5000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:58<00:00, 86.14it/s]


[34m[1mval: [0mNew cache created: C:\Users\Mahesh\OneDrive\Desktop\BDD100K\subset\val\labels.cache
Plotting labels to BDD100K_YOLOv8\yolov8n_train\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mBDD100K_YOLOv8\yolov8n_train[0m
Starting training for 2 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/2     0.541G      1.539      1.355      1.074         41        640: 100%|██████████| 17466/17466 [1:48:46<00:00,  2.68it/s]  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1250/1250 [04:11<00:00,  4.97it/s]


                   all       5000      85165       0.42      0.285       0.28      0.158


In [None]:
import torch
from ultralytics import YOLO

# Load the trained model
model = YOLO("runs/train/exp/weights/best.pt")  # Update the path if needed

# Define input video and output path
input_video = "input.mp4"  # Change this to the path of your video
output_video = "output.mp4"

# Run object detection on the video
results = model.predict(source=input_video, save=True, save_txt=True, conf=0.25)

print("Detection completed. Check the output folder for results.")
