In [8]:
# Make sure to run this script before running the rest of the jupyter notebook.
# pip install ultralytics lxml

In [9]:
# ## Step 1: Import Necessary Libraries
# In this cell, we import the required libraries, including `os`, `json`, `KFold` from `sklearn`, and `YOLO` from `ultralytics`.

import os
import json
from sklearn.model_selection import KFold
from ultralytics import YOLO
import shutil

In [10]:
# ## Step 2: Load Configuration from `config.json`
# Load the dataset paths from the `config.json` file. This file should contain the base path and the relative paths for training and test directories for each country.

with open("config.json", "r") as f:
    config = json.load(f)

base_path = config["base_path"]

In [11]:
all_train_images = []
all_train_labels = []

for train_dir in config["train_dirs"]:
    print("Processing directory:", train_dir)
    images_dir = os.path.join(base_path, train_dir, "images")
    annotations_dir = os.path.join(base_path, train_dir, "annotations", "xmls")

    print("Images directory:", images_dir)
    print("Annotations directory:", annotations_dir)

    # Check if directories exist
    if not os.path.exists(images_dir):
        print(f"Warning: Images directory does not exist: {images_dir}")
        continue
    if not os.path.exists(annotations_dir):
        print(f"Warning: Annotations directory does not exist: {annotations_dir}")
        continue

    # List all image files in the images directory
    images = [os.path.join(images_dir, img) for img in os.listdir(images_dir) if img.endswith(".jpg")]
    # print("Found images:", images)

    # Corresponding label paths based on image names
    labels = [os.path.join(annotations_dir, img.replace(".jpg", ".txt")) for img in os.listdir(images_dir) if img.endswith(".jpg")]
    # print("Expected labels:", labels)

    # Ensure that the annotation files exist for each image
    valid_images = []
    valid_labels = []
    for img, lbl in zip(images, labels):
        if os.path.exists(lbl):  # Only include images with corresponding .txt labels
            valid_images.append(img)
            valid_labels.append(lbl)
        else:
            print(f"Missing label for image: {img}")

    all_train_images.extend(valid_images)
    all_train_labels.extend(valid_labels)

# Final check on collected images and labels
print("All collected training images:", len(all_train_images))
print("All collected training labels:", len(all_train_labels))


Processing directory: China_Drone/train
Images directory: E:/Honours Project/RDD2022\China_Drone/train\images
Annotations directory: E:/Honours Project/RDD2022\China_Drone/train\annotations\xmls
Processing directory: China_MotorBike/train
Images directory: E:/Honours Project/RDD2022\China_MotorBike/train\images
Annotations directory: E:/Honours Project/RDD2022\China_MotorBike/train\annotations\xmls
Processing directory: Czech/train
Images directory: E:/Honours Project/RDD2022\Czech/train\images
Annotations directory: E:/Honours Project/RDD2022\Czech/train\annotations\xmls
Processing directory: India/train
Images directory: E:/Honours Project/RDD2022\India/train\images
Annotations directory: E:/Honours Project/RDD2022\India/train\annotations\xmls
Processing directory: Japan/train
Images directory: E:/Honours Project/RDD2022\Japan/train\images
Annotations directory: E:/Honours Project/RDD2022\Japan/train\annotations\xmls
Processing directory: Norway/train
Images directory: E:/Honours Pro

In [12]:
# ## Step 4: Define Cross-Validation Parameters
# Set up the cross-validation parameters, including the number of folds, training epochs, image size, and an empty list to store results.

# Parameters for cross-validation
k = 5  # Number of folds
epochs = 50
img_size = 640
results = []

kf = KFold(n_splits=k, shuffle=True, random_state=42)

In [None]:
# ## Step 5: Perform K-Fold Cross-Validation
# This cell iterates over each fold, creating training and validation splits, setting up temporary directories, and running YOLOv8 training.

for fold, (train_index, val_index) in enumerate(kf.split(all_train_images)):
    print(f"Starting fold {fold + 1} of {k}...")

    # Get train and validation splits for this fold
    train_images = [all_train_images[i] for i in train_index]
    val_images = [all_train_images[i] for i in val_index]
    train_labels = [all_train_labels[i] for i in train_index]
    val_labels = [all_train_labels[i] for i in val_index]

    # Create directory for YAML file if it doesn't exist
    fold_dir = os.path.abspath(f"temp_data/fold_{fold}")
    os.makedirs(fold_dir, exist_ok=True)

    # Write the YAML config file for this fold, referencing the original directories
    yaml_content = f"""
    train: {os.path.dirname(train_images[0])}
    val: {os.path.dirname(val_images[0])}
    nc: 4
    names: ['D00', 'D10', 'D20', 'D40']
    """
    yaml_file = os.path.join(fold_dir, "road_damage.yaml")
    with open(yaml_file, "w") as f:
        f.write(yaml_content)

    # Train YOLOv8 on this fold
    model = YOLO("yolov8n.yaml")
    fold_results = model.train(data=yaml_file, epochs=epochs, imgsz=img_size)
    results.append(fold_results)

Starting fold 1 of 5...


FileNotFoundError: [Errno 2] No such file or directory: 'e:\\Honours Project\\RoadDamageDetection\\temp_data\\fold_0\\road_damage.yaml'

In [None]:
# ## Step 6: Calculate Average Performance Metrics
# After completing all folds, we calculate and print the average precision, recall, and mAP across the folds.

avg_precision = sum(r['metrics/precision'] for r in results) / k
avg_recall = sum(r['metrics/recall'] for r in results) / k
avg_map = sum(r['metrics/mAP_0.5'] for r in results) / k

print(f"Average Precision: {avg_precision:.4f}")
print(f"Average Recall: {avg_recall:.4f}")
print(f"Average mAP@0.5: {avg_map:.4f}")

Testing the model

In [None]:
# # Load the YOLOv8 model (or create a new one)
# model = YOLO("yolov8n.yaml")  # using the 'nano' model variant; adjust if needed

# # Train the model
# model.train(data="Honours Project/roadDamage.yaml", epochs=50, imgsz=640)

In [None]:
# # Evaluate on the validation or test set
# results = model.val(data="C:/Users/alexa/Desktop/Honours Project/road_damage.yaml", imgsz=640)
# print(results)