**UXO Detection using YOLOv8:**

This notebook serves to outline the code used in developing a detection model using YOLOv8. The detection is performed on labeled image data of unexploded ordnances (UXOs) underwater. The full project can be accessed on this github repository: https://github.com/MansourSaliba/UXO_detection_model

***Initial steps:***

In [None]:
# Mount Google Drive

from google.colab import drive

drive.mount('/content/drive')

In [None]:
# Import libraries
# Install packages before this step if not installed (e.g. !pip install ultralytics mlflow)

import os
import json
import mlflow
import torch
import yaml
from pathlib import Path
from ultralytics import YOLO
import shutil
import random
from tqdm import tqdm

In [None]:
# Define paths (Adjust based on your paths)

IMAGE_DIR = "/content/drive/MyDrive/UXO_project/UXO_dataset/processed/UXO/gopro"
LABEL_DIR = "/content/drive/MyDrive/UXO_project/UXO_dataset/processed/UXO/labels"
OUTPUT_DIR = "/content/drive/MyDrive/UXO_project/UXO_dataset/processed/UXO" # Output for split data (train, validate, and test)

In [None]:
# Set up MLflow

mlflow.set_tracking_uri("file:/content/drive/MyDrive/UXO_project/mlruns")
mlflow.set_experiment("yolo_detection")

In [None]:
# Check runtime

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

***Preparing dataset:***

In [None]:
# Splitting data and saving in new directories

import os
import shutil
from sklearn.model_selection import train_test_split

def reorganize_yolo_dataset(base_path):
    # Define paths
    source_images = os.path.join(base_path, 'gopro')
    source_labels = os.path.join(base_path, 'labels')

    # Create new directory structure
    for split in ['train', 'val', 'test']:
        for folder in ['images', 'labels']:
            os.makedirs(os.path.join(base_path, split, folder), exist_ok=True)

    # Get list of files
    image_files = [f for f in os.listdir(source_images) if f.endswith('.jpg')]
    label_files = [f for f in os.listdir(source_labels) if f.endswith('.json')]

    # Split data (80% train, 10% val, 10% test)
    train_files, temp_files = train_test_split(image_files, test_size=0.2, random_state=42)
    val_files, test_files = train_test_split(temp_files, test_size=0.5, random_state=42)

    # Function to copy files
    def copy_files(files, split):
        for file in files:
            # Copy image
            src_img = os.path.join(source_images, file)
            dst_img = os.path.join(base_path, split, 'images', file)
            shutil.copy2(src_img, dst_img)

            # Copy corresponding label
            label_file = file.replace('.jpg', '.json')
            src_label = os.path.join(source_labels, label_file)
            dst_label = os.path.join(base_path, split, 'labels', label_file)
            if os.path.exists(src_label):
                shutil.copy2(src_label, dst_label)

    # Copy files to respective directories
    copy_files(train_files, 'train')
    copy_files(val_files, 'val')
    copy_files(test_files, 'test')

    print("Dataset reorganization complete!")
    print(f"Train set: {len(train_files)} images")
    print(f"Validation set: {len(val_files)} images")
    print(f"Test set: {len(test_files)} images")

# Call the function with your base path
base_path = '/content/drive/MyDrive/UXO_project/UXO_dataset/processed/UXO'
reorganize_yolo_dataset(base_path)

In [None]:
# Convert JSON labels to YOLO format

import os
import json
import yaml
from tqdm import tqdm

# Define paths
BASE_PATH = '/content/drive/MyDrive/UXO_project/UXO_dataset/processed/UXO'

def convert_json_to_yolo_format():
    """Convert JSON labels to YOLO format for all splits"""
    for split in ['train', 'val', 'test']:
        print(f"\nProcessing {split} set...")

        # Paths for current split
        images_dir = os.path.join(BASE_PATH, split, 'images')
        labels_dir = os.path.join(BASE_PATH, split, 'labels')

        # Get all JSON label files
        json_files = [f for f in os.listdir(labels_dir) if f.endswith('.json')]

        for json_file in tqdm(json_files, desc=f"Converting {split} labels"):
            # Read JSON label
            json_path = os.path.join(labels_dir, json_file)
            with open(json_path, 'r') as f:
                label_data = json.load(f)

            # Get corresponding image to get dimensions
            img_file = json_file.replace('.json', '.jpg')
            img_path = os.path.join(images_dir, img_file)

            # Convert to YOLO format
            yolo_labels = []

            # Get image dimensions (assuming all images are same size)
            # If you need exact dimensions, you can use PIL to get them
            img_width = 1920  # Adjust if your images are different size
            img_height = 1080

            # Calculate YOLO format coordinates from min/max values
            width = label_data['x_max'] - label_data['x_min']
            height = label_data['y_max'] - label_data['y_min']
            x_center = (label_data['x_min'] + width/2) / img_width
            y_center = (label_data['y_min'] + height/2) / img_height
            width_norm = width / img_width
            height_norm = height / img_height

            # Format: class_id x_center y_center width height
            # Using class_id 0 for "Unexploded Ordnance"
            yolo_labels.append(f"0 {x_center:.6f} {y_center:.6f} {width_norm:.6f} {height_norm:.6f}")

            # Save YOLO format label
            yolo_file = json_file.replace('.json', '.txt')
            yolo_path = os.path.join(labels_dir, yolo_file)

            with open(yolo_path, 'w') as f:
                f.write('\n'.join(yolo_labels))

def create_dataset_yaml():
    """Create YOLO dataset configuration file"""
    yaml_content = {
        'path': BASE_PATH,  # Base directory
        'train': 'train/images',  # Relative to base path
        'val': 'val/images',
        'test': 'test/images',
        'names': {
            0: 'UXO'  # Class names
        }
    }

    yaml_path = os.path.join(BASE_PATH, 'dataset.yaml')
    with open(yaml_path, 'w') as f:
        yaml.dump(yaml_content, f)

    print(f"\nCreated dataset.yaml at: {yaml_path}")

# Run the conversion
print("Converting JSON labels to YOLO format...")
convert_json_to_yolo_format()fd

# Create YAML configuration
print("\nCreating dataset configuration...")
create_dataset_yaml()

# Verify the conversion
def verify_conversion():
    """Verify that labels were converted correctly"""
    for split in ['train', 'val', 'test']:
        labels_dir = os.path.join(BASE_PATH, split, 'labels')
        txt_files = [f for f in os.listdir(labels_dir) if f.endswith('.txt')]
        print(f"\n{split.upper()} SET:")
        print(f"Converted {len(txt_files)} labels to YOLO format")

        # Check a sample label
        if txt_files:
            sample_path = os.path.join(labels_dir, txt_files[0])
            with open(sample_path, 'r') as f:
                print(f"Sample YOLO label format: {f.read().strip()}")

print("\nVerifying conversion...")
verify_conversion()

***Training:***

In [None]:
# Initialize YOLO model
model = YOLO('yolov8n.pt')  # Using YOLOv8 nano model

# Training configuration
training_config = {
    'data': os.path.join(OUTPUT_DIR, 'dataset.yaml'),
    'epochs': 50,
    'imgsz': 640,
    'batch': 16,
    'patience': 10,
    'device': 0 if torch.cuda.is_available() else 'cpu'
}

# Start MLflow run
with mlflow.start_run(run_name="yolov8_training") as run:
    # Log parameters
    mlflow.log_params(training_config)

    # Train model
    results = model.train(
        data=training_config['data'],
        epochs=training_config['epochs'],
        imgsz=training_config['imgsz'],
        batch=training_config['batch'],
        patience=training_config['patience'],
        device=training_config['device']
    )

    # Log metrics
    mlflow.log_metrics({
        'mAP50': results.results_dict['metrics/mAP50(B)'],
        'mAP50-95': results.results_dict['metrics/mAP50-95(B)'],
        'precision': results.results_dict['metrics/precision(B)'],
        'recall': results.results_dict['metrics/recall(B)']
    })

    # Save model
    model_path = os.path.join(OUTPUT_DIR, 'YOLO_uxo.pt')
    model.export(format='onnx')  # Export to ONNX format
    mlflow.log_artifact(model_path)
    mlflow.log_artifact(os.path.join(OUTPUT_DIR, 'dataset.yaml'))

***Testing:***

In [None]:
# Checking metrics on test data

def evaluate_on_test(model_path=None, model=None, data_yaml=None):
    """Evaluate model on test set with enhanced metrics and error handling

    Args:
        model_path (str): Path to .pt model file (either this or 'model' must be provided)
        model (YOLO): Loaded YOLO model object (alternative to model_path)
        data_yaml (str): Path to dataset.yaml (defaults to OUTPUT_DIR/dataset.yaml)
    """
    from ultralytics import YOLO
    import os
    import mlflow

    # Ensure any existing run is ended
    if mlflow.active_run():
        mlflow.end_run()

    # Handle input paths
    if data_yaml is None:
        data_yaml = os.path.join(OUTPUT_DIR, 'dataset.yaml')

    # Load model if path provided
    if model is None:
        if model_path is None:
            raise ValueError("Either model_path or model object must be provided")
        model = YOLO(model_path)

    # Start MLflow run
        with mlflow.start_run(run_name="yolov8_test_evaluation", nested=True) as run:
            # Run evaluation
            test_results = model.val(
                data=data_yaml,
                split='test',
                plots=True,
                save_json=True
            )

    # Run evaluation
    try:
        test_results = model.val(
            data=data_yaml,
            split='test',
            plots=True,  # Generate confusion matrix and F1 curve
            save_json=True  # Save metrics to JSON
        )

        # Extract all available metrics
        metrics = {
            'test_mAP50': test_results.box.map50,  # mAP@0.5
            'test_mAP50-95': test_results.box.map,  # mAP@0.5:0.95
            'test_precision': test_results.box.mp,   # Mean precision
            'test_recall': test_results.box.mr,      # Mean recall
            # Calculate F1 manually: 2*(precision*recall)/(precision+recall)
            'test_f1': 2 * (test_results.box.mp * test_results.box.mr) /
                      (test_results.box.mp + test_results.box.mr + 1e-16),
            'test_inference_time': test_results.speed['inference'],
        }



        mlflow.log_metrics(metrics)
        mlflow.log_artifacts(test_results.save_dir)

        print(f"\n✅ Evaluation complete! Metrics logged to MLflow run: {run.info.run_id}")
        return metrics

    except Exception as e:
        # Ensure run is ended even if error occurs
        if mlflow.active_run():
            mlflow.end_run()
        print(f"❌ Evaluation failed: {str(e)}")
        raise

evaluate_on_test(model_path='/content/drive/MyDrive/UXO_project/models/YOLO_uxo.pt')

***Sample prediction:***

In [None]:
from ultralytics import YOLO
import matplotlib.pyplot as plt

model = YOLO('/content/drive/MyDrive/UXO_project/models/YOLO_uxo.pt')
results = model.predict('/content/drive/MyDrive/UXO_project/UXO_dataset/processed/UXO/test/images/6.jpg')

# Display with matplotlib
annotated_img = results[0].plot()  # Returns numpy array
plt.imshow(annotated_img)
plt.axis('off')
plt.show()