In [1]:
import os, random, shutil, glob, yaml


In [2]:
# Input dataset
DATA_DIR = "data"
IM_DIR = r"/home/memad/Desktop/Road Damage Detection/data/images"
LBL_DIR =  r"/home/memad/Desktop/Road Damage Detection/data/labels"
# Output dirs
MODEL_DIR = r"/home/memad/Desktop/Road Damage Detection/models"
VIS_DIR = r"/home/memad/Desktop/Road Damage Detection/visuals"
NOTEBOOKS_DIR = r"/home/memad/Desktop/Road Damage Detection/notebooks"

os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(VIS_DIR, exist_ok=True)
os.makedirs(NOTEBOOKS_DIR, exist_ok=True)

# Split parameters
VAL_SPLIT = 0.1
SEED = 42
random.seed(SEED)


In [3]:
images = sorted(glob.glob(os.path.join(IM_DIR, "*.*")))
print("Total images:", len(images))

random.shuffle(images)
val_count = max(1, int(len(images) * VAL_SPLIT))
val_images = images[:val_count]
train_images = images[val_count:]

# Create split folders in YOLO format
split_base = r"/home/memad/Desktop/Road Damage Detection/dataset"
for split in ["train", "val"]:
    for sub in ["images", "labels"]:
        os.makedirs(os.path.join(split_base, split, sub), exist_ok=True)

def copy_files(img_list, split):
    for img_path in img_list:
        fname = os.path.basename(img_path)
        stem, _ = os.path.splitext(fname)
        lbl_path = os.path.join(LBL_DIR, stem + ".txt")
        # copy image
        shutil.copy(img_path, os.path.join(split_base, split, "images", fname))
        # copy label if exists
        if os.path.exists(lbl_path):
            shutil.copy(lbl_path, os.path.join(split_base, split, "labels", stem + ".txt"))

copy_files(train_images, "train")
copy_files(val_images, "val")

print("Train images:", len(train_images))
print("Val images:", len(val_images))


Total images: 2009
Train images: 1809
Val images: 200


In [4]:
dataset_yaml = {
    "path": os.path.abspath(split_base),
    "train": "train/images",
    "val": "val/images",
    "names": {0: "Pothole", 1: "Crack", 2: "Manhole"}
}

yaml_path = os.path.join(split_base, "dataset.yaml")
with open(yaml_path, "w") as f:
    yaml.dump(dataset_yaml, f)

print("YAML saved to", yaml_path)


YAML saved to /home/memad/Desktop/Road Damage Detection/dataset/dataset.yaml


In [5]:
from ultralytics import YOLO

# Load a pretrained YOLOv8n model
model = YOLO("yolov8n.pt")

# Train
results = model.train(
    data=yaml_path,
    epochs=30,
    imgsz=640,
    project=MODEL_DIR,
    name="road_damage",
    plots=True
)


Ultralytics 8.3.195 🚀 Python-3.10.12 torch-2.8.0+cu128 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 5938MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/home/memad/Desktop/Road Damage Detection/dataset/dataset.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=road_damage, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overl

In [6]:
# Ultralytics already saves plots in MODEL_DIR/road_damage
# Copy them to /visuals as required
import shutil, glob

exp_dir = results.save_dir  # e.g. /models/road_damage

for f in glob.glob(os.path.join(exp_dir, "*.png")):
    shutil.copy(f, VIS_DIR)

print("Copied training curves to", VIS_DIR)


Copied training curves to /home/memad/Desktop/Road Damage Detection/visuals
