# GPR Hyperbola Detection with YOLOv8 (Colab-ready)

This notebook implements YOLOv8 for Ground Penetrating Radar (GPR) hyperbola detection.

**Setup Instructions:**
- Enable GPU: Runtime → Change runtime type → Hardware accelerator → GPU
- Uses Colab's built-in PyTorch (no torch install needed)
- Pipeline: dataset → conversion → train → evaluate mAP → predict & visualize

## 1. Setup and Installation

In [None]:
# GPU info (optional)
!nvidia-smi | cat

In [None]:
# Setup
!pip -q install ultralytics opencv-python numpy tqdm matplotlib

from ultralytics import YOLO
import torch
print("Ultralytics ready. Torch:", torch.__version__)

## 2. Get Dataset

In [None]:
# Get dataset
!mkdir -p /content/data
!test -d /content/data/gpr-data-classifier || git clone https://github.com/irenexychen/gpr-data-classifier /content/data/gpr-data-classifier
!ls -la /content/data/gpr-data-classifier | head -n 40

## 3. Convert Dataset from VOC to YOLO Format

In [None]:
# Convert VOC XMLs to YOLO and create dataset YAML
import os, random, shutil, xml.etree.ElementTree as ET
from pathlib import Path

def read_label_map(label_map_path: Path):
    if not label_map_path.exists():
        return ["hyperbola"]
    text = label_map_path.read_text()
    names = []
    for line in text.splitlines():
        line = line.strip()
        if line.startswith("name:"):
            names.append(line.split(":", 1)[1].strip().strip('\'"\''))
    return names or ["hyperbola"]

def convert_voc_box_to_yolo(bbox, img_w, img_h):
    xmin, ymin, xmax, ymax = bbox
    xmin = max(0, xmin - 1); ymin = max(0, ymin - 1)
    xmax = min(img_w - 1, xmax - 1); ymax = min(img_h - 1, ymax - 1)
    bw = xmax - xmin; bh = ymax - ymin
    cx = xmin + bw / 2.0; cy = ymin + bh / 2.0
    return (cx / img_w if img_w > 0 else 0.0,
            cy / img_h if img_h > 0 else 0.0,
            bw / img_w if img_w > 0 else 0.0,
            bh / img_h if img_h > 0 else 0.0)

def parse_voc_xml(xml_path: Path):
    root = ET.parse(str(xml_path)).getroot()
    size = root.find("size")
    img_w = int(size.find("width").text); img_h = int(size.find("height").text)
    objects = []
    for obj in root.findall("object"):
        name = obj.find("name").text
        bnd = obj.find("bndbox")
        xmin = int(float(bnd.find("xmin").text))
        ymin = int(float(bnd.find("ymin").text))
        xmax = int(float(bnd.find("xmax").text))
        ymax = int(float(bnd.find("ymax").text))
        objects.append((name, (xmin, ymin, xmax, ymax)))
    return img_w, img_h, objects

def ensure_dirs(paths):
    for p in paths: Path(p).mkdir(parents=True, exist_ok=True)

def write_dataset_yaml(output_root: Path, dataset_name: str, names: list):
    yaml_path = output_root / f"{dataset_name}.yaml"
    yaml_path.write_text(
        f"path: {output_root}\ntrain: images/train\nval: images/val\nnames: {names}\n"
    )
    return yaml_path

In [None]:
def convert_dataset(source_root, output_root, val_split=0.2, seed=42, dataset_name="gpr"):
    random.seed(seed)
    source_root = Path(source_root)
    images_dir = source_root / "images"
    xml_dir = source_root / "annotations" / "xmls"
    label_map_path = source_root / "annotations" / "label_map.pbtxt"

    assert images_dir.exists() and xml_dir.exists(), f"Missing {images_dir} or {xml_dir}"
    class_names = read_label_map(label_map_path)
    name_to_id = {n: i for i, n in enumerate(class_names)}

    output_root = Path(output_root)
    images_train = output_root / "images" / "train"
    images_val = output_root / "images" / "val"
    labels_train = output_root / "labels" / "train"
    labels_val = output_root / "labels" / "val"
    ensure_dirs([images_train, images_val, labels_train, labels_val])

    xml_files = sorted([p for p in xml_dir.glob("*.xml")])
    samples = []
    for xml_path in xml_files:
        base = xml_path.stem
        img_path = None
        for ext in (".jpg", ".jpeg", ".png"):
            cand = images_dir / f"{base}{ext}"
            if cand.exists():
                img_path = cand; break
        if img_path is None:
            try:
                filename_node = ET.parse(str(xml_path)).getroot().find("filename")
                if filename_node is not None:
                    cand = images_dir / filename_node.text
                    if cand.exists(): img_path = cand
            except Exception:
                pass
        if img_path is not None:
            samples.append((img_path, xml_path))

    assert len(samples) > 0, "No image-xml pairs found."

    random.shuffle(samples)
    val_count = max(1, int(len(samples) * val_split))
    val_samples = set(samples[:val_count])

    def convert_and_copy(sample, subset):
        img_path, xml_path = sample
        try:
            img_w, img_h, objects = parse_voc_xml(xml_path)
        except Exception as e:
            print("Parse error, skip:", xml_path, e); return
        lines = []
        for name, bbox in objects:
            if name not in name_to_id: continue
            cls = name_to_id[name]
            x, y, w, h = convert_voc_box_to_yolo(bbox, img_w, img_h)
            if w <= 0 or h <= 0: continue
            lines.append(f"{cls} {x:.6f} {y:.6f} {w:.6f} {h:.6f}")
        if not lines: return
        if subset == "train":
            dst_img = images_train / img_path.name
            dst_lbl = labels_train / (img_path.stem + ".txt")
        else:
            dst_img = images_val / img_path.name
            dst_lbl = labels_val / (img_path.stem + ".txt")
        shutil.copy2(str(img_path), str(dst_img))
        dst_lbl.write_text("\n".join(lines) + "\n")

    for sample in samples:
        subset = "val" if sample in val_samples else "train"
        convert_and_copy(sample, subset)

    yaml_path = write_dataset_yaml(output_root, dataset_name, class_names)
    print("Done. YAML at:", yaml_path)
    print("Train images:", len(list(images_train.glob('*'))), "Val images:", len(list(images_val.glob('*'))))
    return str(yaml_path)

In [None]:
# Run conversion
data_yaml = convert_dataset(
    source_root="/content/data/gpr-data-classifier/hyperbola-classifier",
    output_root="/content/data/gpr_yolo",
    val_split=0.2, seed=42, dataset_name="gpr"
)
print("Using data yaml:", data_yaml)

## 4. Train YOLOv8 Model

In [None]:
# Train YOLOv8 (strong settings for T4)
from ultralytics import YOLO

data_yaml = "/content/data/gpr_yolo/gpr.yaml"
project = "/content/Results/yolo_runs"
name = "gpr_yolov8m_e150_i960_aug"

model = YOLO("yolov8m.pt")  # try 'yolov8l.pt' if VRAM permits

results = model.train(
    data=data_yaml,
    epochs=150,
    imgsz=960,
    batch=8,
    patience=30,
    lr0=0.01,
    lrf=0.01,
    weight_decay=0.0005,
    momentum=0.937,
    warmup_epochs=3.0,
    hsv_h=0.015, hsv_s=0.7, hsv_v=0.4,
    degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0,
    flipud=0.0, fliplr=0.5,
    mosaic=0.7, mixup=0.1, copy_paste=0.0,
    erasing=0.4,
    project=project,
    name=name,
)
print(results)

best_weights = f"{project}/{name}/weights/best.pt"
print("Best weights:", best_weights)

## 5. Evaluate Model Performance

In [None]:
# Evaluate mAP on val
from ultralytics import YOLO

model = YOLO(best_weights)
metrics = model.val(
    data=data_yaml,
    split="val",
    imgsz=960,
    batch=8,
    conf=0.001,
    iou=0.6,
    plots=True,
    project="/content/Results/yolo_val",
    name="gpr_eval"
)
print(f"mAP50-95: {metrics.box.map:.4f}")
print(f"mAP50:    {metrics.box.map50:.4f}")
print(f"mAP75:    {metrics.box.map75:.4f}")

## 6. Predict and Visualize Results

In [None]:
# Predict and visualize
from ultralytics import YOLO
from IPython.display import Image, display
import glob

model = YOLO(best_weights)
_ = model.predict(
    source="/content/data/gpr_yolo/images/val",
    imgsz=960,
    save=True,
    project="/content/Results/yolo_preds",
    name="preds"
)

pred_imgs = sorted(glob.glob("/content/Results/yolo_preds/preds/*.jpg"))[:12]
for p in pred_imgs:
    display(Image(filename=p))