In [None]:
%pip uninstall torch torchvision torchaudio ultralytics -y

In [None]:
%pip install gdown ultralytics tqdm pycocotools opencv-python
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
import os
import shutil
import random
import json
from tqdm import tqdm
from ultralytics import YOLO
import torch

MERGED_PATH = 'merged_dataset'
COCO_PATH = 'coco2017'
RDD_PATH = 'RDD_SPLIT'

CLASSES = [
    'car', 'pedestrian', 'bicycle', 'truck',
    'longitudinal crack', 'alligator crack', 'transverse crack', 'other corruption', 'pothole'
]

COCO_CATEGORY_MAPPING = {
    3: 0,  # car
    1: 1,  # pedestrian
    2: 2,  # bicycle
    8: 3   # truck
}

In [None]:
os.makedirs(f"{MERGED_PATH}/images", exist_ok=True)
os.makedirs(f"{MERGED_PATH}/labels", exist_ok=True)

image_paths = []

# Converting COCO to YOLO format
print("Processing COCO")
coco_images_dir = os.path.join(COCO_PATH, 'train2017')
coco_annotations_file = os.path.join(COCO_PATH, 'annotations', 'instances_train2017.json')

with open(coco_annotations_file, 'r') as f:
    coco_data = json.load(f)

images = {img['id']: img for img in coco_data['images']}
annotations = coco_data['annotations']

image_to_labels = {}

for ann in annotations:
    image_id = ann['image_id']
    category_id = ann['category_id']

    if category_id not in COCO_CATEGORY_MAPPING:
        continue

    mapped_class = COCO_CATEGORY_MAPPING[category_id]

    bbox = ann['bbox']
    img_info = images[image_id]
    img_width = img_info['width']
    img_height = img_info['height']

    x_center = (bbox[0] + bbox[2] / 2) / img_width
    y_center = (bbox[1] + bbox[3] / 2) / img_height
    width = bbox[2] / img_width
    height = bbox[3] / img_height

    label = f"{mapped_class} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"

    filename = img_info['file_name']
    if filename not in image_to_labels:
        image_to_labels[filename] = []
    image_to_labels[filename].append(label)

for filename, labels in tqdm(image_to_labels.items(), desc="Copying COCO images"):
    src_img = os.path.join(coco_images_dir, filename)
    dest_img = os.path.join(MERGED_PATH, 'images', filename)
    dest_lbl = os.path.join(MERGED_PATH, 'labels', filename.replace('.jpg', '.txt'))

    if not os.path.exists(src_img):
        continue

    shutil.copyfile(src_img, dest_img)
    with open(dest_lbl, 'w') as f:
        f.write('\n'.join(labels))

    image_paths.append(dest_img)

print("Processing RDD2022")
for split in ['train', 'test']:
    rdd_images_dir = os.path.join(RDD_PATH, split, 'images')
    rdd_labels_dir = os.path.join(RDD_PATH, split, 'labels')

    if not os.path.exists(rdd_images_dir):
        continue

    for img_file in tqdm(os.listdir(rdd_images_dir), desc=f"Copying {split} images"):
        if not img_file.endswith('.jpg'):
            continue
        src_img = os.path.join(rdd_images_dir, img_file)
        src_lbl = os.path.join(rdd_labels_dir, img_file.replace('.jpg', '.txt'))

        if not os.path.exists(src_lbl):
            continue

        dest_img = os.path.join(MERGED_PATH, 'images', img_file)
        dest_lbl = os.path.join(MERGED_PATH, 'labels', img_file.replace('.jpg', '.txt'))

        shutil.copyfile(src_img, dest_img)

        with open(src_lbl, 'r') as f_in, open(dest_lbl, 'w') as f_out:
            for line in f_in:
                cls_id, x_center, y_center, width, height = map(float, line.strip().split())
                new_class_id = int(cls_id) + 4
                f_out.write(f"{new_class_id} {x_center} {y_center} {width} {height}\n")

        image_paths.append(dest_img)

# Splitting dataset
random.shuffle(image_paths)
train_split = int(0.7 * len(image_paths))
val_split = int(0.9 * len(image_paths))

with open(f"{MERGED_PATH}/train.txt", 'w') as f:
    f.write('\n'.join(image_paths[:train_split]))
with open(f"{MERGED_PATH}/val.txt", 'w') as f:
    f.write('\n'.join(image_paths[train_split:val_split]))
with open(f"{MERGED_PATH}/test.txt", 'w') as f:
    f.write('\n'.join(image_paths[val_split:]))

print("Merged dataset ready at:", MERGED_PATH)

Processing COCO


Copying COCO images: 100%|██████████| 69260/69260 [03:43<00:00, 310.54it/s]


Processing RDD2022


Copying train images: 100%|██████████| 26869/26869 [03:23<00:00, 131.75it/s]
Copying test images: 100%|██████████| 5758/5758 [00:44<00:00, 127.99it/s]


Splitting dataset
Merged dataset ready at: merged_dataset


In [None]:
import os

train_txt_path = 'merged_dataset/train.txt'
balanced_txt_path = 'merged_dataset/train_balanced.txt'

coco_lines = []
rdd_lines = []

with open(train_txt_path, 'r') as f:
    for line in f:
        img_path = line.strip()
        label_path = img_path.replace('images', 'labels').replace('.jpg', '.txt').replace('\\', '/')

        if not os.path.exists(label_path):
            continue

        try:
            with open(label_path, 'r') as label_file:
                labels = label_file.readlines()
                if not labels:
                    continue
                # Parse class IDs from label file
                first_class = int(labels[0].split()[0])
                if first_class < 4:
                    coco_lines.append(line)
                else:
                    rdd_lines.append(line)
        except Exception as e:
            print(f"Error reading label file {label_path}: {e}")
            continue

print(f"Found {len(coco_lines)} COCO images")
print(f"Found {len(rdd_lines)} RDD images")

# Oversample RDD images
rdd_oversampled = rdd_lines * 3

# Combine and shuffle
import random
all_lines = coco_lines + rdd_oversampled
random.shuffle(all_lines)

with open(balanced_txt_path, 'w') as f:
    f.writelines(all_lines)

print(f"Balanced training file created: {balanced_txt_path}")
print(f"Total training samples: {len(all_lines)}")

Found 48461 COCO images
Found 15978 RDD images
Balanced training file created: merged_dataset/train_balanced.txt
Total training samples: 96395


In [None]:
import os
import random
from collections import defaultdict

# balance dataset since there is an imbalance

MERGED_PATH = "merged_dataset"
labels_dir = os.path.join(MERGED_PATH, 'labels')
images_dir = os.path.join(MERGED_PATH, 'images')

train_small = os.path.join(MERGED_PATH, 'train_small.txt')
val_small = os.path.join(MERGED_PATH, 'val_small.txt')
test_small = os.path.join(MERGED_PATH, 'test_small.txt')

target_per_class_train = 1000
target_per_class_val = 300
target_per_class_test = 300
val_ratio = 0.15
test_ratio = 0.15

class_to_images = defaultdict(set)
all_images = []

for label_file in os.listdir(labels_dir):
    if not label_file.endswith('.txt'):
        continue

    label_path = os.path.join(labels_dir, label_file)
    image_name = os.path.splitext(label_file)[0] + '.jpg'
    image_path = os.path.join(images_dir, image_name)

    if not os.path.exists(image_path):
        continue

    all_images.append(image_path)

    with open(label_path, 'r') as f:
        for line in f:
            if line.strip():
                cls = int(line.split()[0])
                class_to_images[cls].add(image_path)

def sample_subset(class_to_images, target_per_class):
    selected = set()
    for cls, images in class_to_images.items():
        selected.update(random.sample(list(images), min(target_per_class, len(images))))
    return list(selected)

balanced_pool = sample_subset(class_to_images, target_per_class_train + target_per_class_val + target_per_class_test)
random.shuffle(balanced_pool)

val_count = int(len(balanced_pool) * val_ratio)
test_count = int(len(balanced_pool) * test_ratio)

val_images = balanced_pool[:val_count]
test_images = balanced_pool[val_count:val_count + test_count]
train_images = balanced_pool[val_count + test_count:]

def write_list(path, images):
    with open(path, 'w') as f:
        f.writelines([img + '\n' for img in sorted(images)])

write_list(train_small, train_images)
write_list(val_small, val_images)
write_list(test_small, test_images)

print(f"Train: {len(train_images)}\nVal: {len(val_images)}\nTest: {len(test_images)}")

✅ Done!
Train: 9340
Val: 2001
Test: 2001


In [None]:
# ------------------ YAML FILE ------------------
yaml_content = f"""
path: C:/Users/tommy/Desktop/CMPE 258/RoadObjectDetection/merged_dataset
train: train_small.txt
val: val_small.txt
test: test_small.txt

nc: 9
names: ['car', 'pedestrian', 'bicycle', 'truck', 'longitudinal crack', 'alligator crack', 'transverse crack', 'other corruption', 'pothole']

"""

with open('dataset.yaml', 'w') as f:
    f.write(yaml_content)
print("dataset.yaml written.")

dataset.yaml written.


In [2]:
import os
import shutil
import random
import json
from tqdm import tqdm
from ultralytics import YOLO
import torch

In [None]:
import torch.nn as nn
from ultralytics import YOLO
from ultralytics.nn.modules import C2f, Detect

# Training
device = 'cuda'

model = YOLO('yolov8_custom.yaml')

model.train(
    data='dataset.yaml',
    epochs=100,
    imgsz=480,  
    batch=8,
    optimizer='AdamW',
    lr0=0.001,
    weight_decay=0.0005,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    translate=0.05,
    scale=0.4,
    shear=0.4,
    perspective=0.0003,
    fliplr=0.3,
    mosaic=0.25,
    device='cuda',
    project='experiments',
    name='Custom_Model'
)
print("Training complete.")

In [None]:
model = YOLO("C:/Users/tommy/Desktop/CMPE 258/RoadObjectDetection/experiments/Custom_Model3/weights/best.pt")

metrics = model.val(data='dataset.yaml', split='test', device = 'cpu')


print(f"mAP@0.5: {metrics.box.map50:.4f}")
print(f"mAP@0.5:0.95: {metrics.box.map:.4f}")

Ultralytics 8.3.126  Python-3.10.2 torch-2.7.0+cu118 CPU (Intel Core(TM) i7-10700F 2.90GHz)
YOLOv8_custom summary (fused): 90 layers, 18,674,915 parameters, 0 gradients, 45.8 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.30.1 ms, read: 12.05.0 MB/s, size: 79.4 KB)


[34m[1mval: [0mScanning merged_dataset\labels... 2001 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2001/2001 [00:03<00:00, 590.37it/s]


[34m[1mval: [0mNew cache created: merged_dataset\labels.cache


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 126/126 [03:53<00:00,  1.86s/it]


                   all       2001       9274      0.563       0.45      0.463      0.242
                   car        440       1651      0.647      0.434      0.477       0.27
            pedestrian        675       3508      0.692      0.467      0.523       0.28
               bicycle        258        580      0.525      0.378      0.386      0.215
                 truck        337        530      0.532      0.394      0.409      0.248
    longitudinal crack        479        925      0.511       0.38      0.382      0.181
       alligator crack        375        574      0.491      0.423      0.415      0.176
      transverse crack        419        545      0.547      0.523      0.524      0.269
      other corruption        375        522      0.572      0.655      0.623      0.358
               pothole        267        439      0.549      0.394      0.431      0.183
Speed: 0.6ms preprocess, 105.3ms inference, 0.0ms loss, 0.7ms postprocess per image
Results saved to [1mc:\Us

In [None]:
model = YOLO("C:/Users/tommy/Desktop/CMPE 258/RoadObjectDetection/experiments/AdamW_V2/weights/best.pt")
results = model.predict(
    source='C:/Users/tommy/Desktop/CMPE 258/RoadObjectDetection/RDD_SPLIT/test/images/Japan_006154.jpg',
    save=True,
    device='cpu'
)


image 1/1 C:\Users\tommy\Desktop\CMPE 258\RoadObjectDetection\RDD_SPLIT\test\images\Japan_006154.jpg: 480x480 2 trucks, 58.4ms
Speed: 1.7ms preprocess, 58.4ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 480)
Results saved to [1mc:\Users\tommy\runs\detect\predict7[0m
