# FloodWatch YOLO - Maximum-Accuracy Training (YOLOv8l)

End-to-end training pipeline for the highest-accuracy flood detection model.

| Setting | Value |
|---------|-------|
| Model | YOLOv8l (43.7M params) |
| Resolution | 832px |
| Epochs | 250 (patience=50) |
| Optimizer | AdamW (lr=0.002, wd=0.0005) |
| Box loss | 7.5 (CIoU) |
| Target | mAP50 >= 0.83-0.88 |

## 1. Install Dependencies

In [None]:
!pip install -q ultralytics roboflow opencv-python pillow numpy tqdm pyyaml

In [None]:
import os
import glob
import shutil
import yaml
import numpy as np
from pathlib import Path
from collections import Counter, defaultdict
from tqdm import tqdm
from PIL import Image, ImageOps
from ultralytics import YOLO
import torch

print("PyTorch:", torch.__version__)
if torch.cuda.is_available():
    vram_gb = torch.cuda.get_device_properties(0).total_mem / 1e9
    gpu_name = torch.cuda.get_device_name(0)
    print("GPU:", gpu_name, "(", round(vram_gb, 1), "GB VRAM)")
else:
    print("WARNING: No GPU detected!")
print("Done.")

## 2. Configuration

In [None]:
# ==============================================================
# CONFIGURE THESE VALUES
# ==============================================================

# Roboflow settings
ROBOFLOW_API_KEY = "yUaG6RQfZ6ZrAFlwj6w7"  # <-- ENTER YOUR API KEY
ROBOFLOW_WORKSPACE = "modellabel"  # Add workspace name if known, else leave empty
ROBOFLOW_PROJECT = "yolo-floods-relief"
ROBOFLOW_VERSION = 1

# Extra flood datasets (public Roboflow projects)
EXTRA_PROJECTS = [
    "roboflow-universe-projects/flood-detection",
    "roboflow-universe-projects/flood-area-detection"
]

# Paths
BASE_DIR = "./flood_dataset"
MERGED_DIR = "./flood_merged"

# Classes
CLASS_NAMES = ["person", "car", "bicycle", "motorcycle", "bus", "truck"]
NUM_CLASSES = len(CLASS_NAMES)
VALID_CLASS_IDS = set(range(NUM_CLASSES))

# Training
MODEL_NAME = "yolov8l.pt"
IMG_SIZE = 832
EPOCHS = 280
PATIENCE = 60

# Label cleaning
MIN_BOX_AREA = 0.0005
DUP_IOU_THRESH = 0.95
TIGHTEN = 0.94  # shrink boxes 3% each side

print("Config loaded.")
print("  Model:", MODEL_NAME)
print("  Resolution:", IMG_SIZE)
print("  Epochs:", EPOCHS, "(patience", str(PATIENCE) + ")")
print("  Classes:", CLASS_NAMES)


## 3. Download Dataset from Roboflow

In [None]:
from roboflow import Roboflow

rf = Roboflow(api_key=ROBOFLOW_API_KEY)
if ROBOFLOW_WORKSPACE:
    workspace = rf.workspace(ROBOFLOW_WORKSPACE)
else:
    workspace = rf.workspace()
    
project = workspace.project(ROBOFLOW_PROJECT)
version = project.version(ROBOFLOW_VERSION)
ds = version.download("yolov8", location=BASE_DIR)

print("Primary dataset downloaded to:", BASE_DIR)
for split in ["train", "valid", "test"]:
    img_dir = os.path.join(BASE_DIR, split, "images")
    if os.path.isdir(img_dir):
        n = len([f for f in os.listdir(img_dir) if f.lower().endswith((".jpg", ".jpeg", ".png"))])
        print("  " + split + ":", n, "images")

In [None]:
# Download extra datasets
from roboflow import Roboflow
rf = Roboflow(api_key=ROBOFLOW_API_KEY)

extra_dirs = []

for i, proj_path in enumerate(EXTRA_PROJECTS):
    loc = f"./extra_flood_{i}"
    try:
        proj = rf.project(proj_path)
        
        # Try common version numbers safely
        version = None
        for v in range(1, 10):
            try:
                version = proj.version(v)
                break
            except:
                pass
                
        if version is None:
            raise RuntimeError("No accessible version found")
            
        version.download("yolov8", location=loc)
        extra_dirs.append(loc)
        
        print(f"Downloaded: {proj_path} -> {loc}")

    except Exception as e:
        print(f"Failed: {proj_path} -> {e}")

print("Extra datasets downloaded:", len(extra_dirs))


## 4. Normalize Labels (Polygon to BBox)

Roboflow may export segmentation polygons. YOLO detect needs: `class xc yc w h`

In [None]:
def polygon_to_bbox(parts):
    """Convert polygon annotation to YOLO bbox."""
    if len(parts) < 5:
        return None
    try:
        cls_id = int(parts[0])
        coords = [float(v) for v in parts[1:]]
    except (ValueError, IndexError):
        return None
    if len(coords) % 2 != 0:
        return None
    xs = coords[0::2]
    ys = coords[1::2]
    if len(xs) == 0 or len(ys) == 0:
        return None
    x_min = max(0.0, min(xs))
    x_max = min(1.0, max(xs))
    y_min = max(0.0, min(ys))
    y_max = min(1.0, max(ys))
    w = x_max - x_min
    h = y_max - y_min
    if w <= 0 or h <= 0:
        return None
    xc = x_min + w / 2.0
    yc = y_min + h / 2.0
    return str(cls_id) + " " + "{:.6f}".format(xc) + " " + "{:.6f}".format(yc) + " " + "{:.6f}".format(w) + " " + "{:.6f}".format(h)


def normalize_labels(dataset_dir):
    """Convert all polygon labels to bbox format in-place."""
    poly_total = 0
    bbox_total = 0
    fixed_files = 0
    for split in ["train", "valid", "test", "val"]:
        lbl_dir = os.path.join(dataset_dir, split, "labels")
        if not os.path.isdir(lbl_dir):
            continue
        for fname in sorted(os.listdir(lbl_dir)):
            if not fname.endswith(".txt"):
                continue
            fpath = os.path.join(lbl_dir, fname)
            with open(fpath, "r") as f:
                lines = f.readlines()
            converted = []
            had_poly = False
            for line in lines:
                parts = line.strip().split()
                if len(parts) == 0:
                    continue
                if len(parts) == 5:
                    converted.append(line.strip())
                    bbox_total += 1
                elif len(parts) > 5:
                    b = polygon_to_bbox(parts)
                    if b is not None:
                        converted.append(b)
                        poly_total += 1
                        had_poly = True
            if had_poly:
                with open(fpath, "w") as f:
                    f.write("\n".join(converted) + "\n")
                fixed_files += 1
    return poly_total, bbox_total, fixed_files


# Normalize base dataset
print("Normalizing base dataset...")
polys, bboxes, fixed = normalize_labels(BASE_DIR)
print("  Polygons converted:", polys)
print("  Already bbox:", bboxes)
print("  Files fixed:", fixed)

# Normalize extras
for d in extra_dirs:
    print("Normalizing", d, "...")
    p, b, f2 = normalize_labels(d)
    print("  Polygons converted:", p)

print("All labels normalized.")

## 5. Clean Labels and Tighten Bboxes

In [None]:
def calc_iou(b1, b2):
    """IoU between two boxes: (cls, xc, yc, w, h)."""
    _, x1, y1, w1, h1 = b1
    _, x2, y2, w2, h2 = b2
    ax1 = x1 - w1 / 2.0
    ay1 = y1 - h1 / 2.0
    ax2 = x1 + w1 / 2.0
    ay2 = y1 + h1 / 2.0
    bx1 = x2 - w2 / 2.0
    by1 = y2 - h2 / 2.0
    bx2 = x2 + w2 / 2.0
    by2 = y2 + h2 / 2.0
    ix1 = max(ax1, bx1)
    iy1 = max(ay1, by1)
    ix2 = min(ax2, bx2)
    iy2 = min(ay2, by2)
    if ix2 <= ix1 or iy2 <= iy1:
        return 0.0
    inter = (ix2 - ix1) * (iy2 - iy1)
    union = w1 * h1 + w2 * h2 - inter
    if union <= 0:
        return 0.0
    return inter / union


def clean_and_tighten(dataset_dir):
    """Clean labels: remove bad/tiny/duplicate annotations, tighten boxes."""
    stats = defaultdict(int)
    for split in ["train", "valid", "test", "val"]:
        lbl_dir = os.path.join(dataset_dir, split, "labels")
        if not os.path.isdir(lbl_dir):
            continue
        label_files = sorted([f for f in os.listdir(lbl_dir) if f.endswith(".txt")])
        for fname in tqdm(label_files, desc="Cleaning " + split):
            fpath = os.path.join(lbl_dir, fname)
            with open(fpath, "r") as f:
                lines = f.readlines()
            stats["total_files"] += 1
            anns = []
            for line in lines:
                parts = line.strip().split()
                if len(parts) != 5:
                    stats["bad_format"] += 1
                    continue
                try:
                    cls = int(parts[0])
                    xc = float(parts[1])
                    yc = float(parts[2])
                    w = float(parts[3])
                    h = float(parts[4])
                except ValueError:
                    stats["parse_error"] += 1
                    continue
                # Validate class
                if cls not in VALID_CLASS_IDS:
                    stats["bad_class"] += 1
                    continue
                # Clamp center
                xc = max(0.0, min(1.0, xc))
                yc = max(0.0, min(1.0, yc))
                # Fix edges
                if xc - w / 2.0 < 0:
                    w = xc * 2.0
                if xc + w / 2.0 > 1:
                    w = (1.0 - xc) * 2.0
                if yc - h / 2.0 < 0:
                    h = yc * 2.0
                if yc + h / 2.0 > 1:
                    h = (1.0 - yc) * 2.0
                # Remove tiny
                if w * h < MIN_BOX_AREA:
                    stats["tiny"] += 1
                    continue
                # Tighten
                w = w * TIGHTEN
                h = h * TIGHTEN
                w = max(w, 0.005)
                h = max(h, 0.005)
                anns.append((cls, xc, yc, w, h))
            # Remove duplicates
            final = []
            for a in anns:
                is_dup = False
                for e in final:
                    if a[0] == e[0] and calc_iou(a, e) > DUP_IOU_THRESH:
                        is_dup = True
                        break
                if is_dup:
                    stats["duplicate"] += 1
                else:
                    final.append(a)
            # Write
            if len(final) == 0:
                stats["empty"] += 1
                # Keep file but empty (hard negative)
                with open(fpath, "w") as f:
                    f.write("")
            else:
                with open(fpath, "w") as f:
                    for cls, xc, yc, w, h in final:
                        line_out = str(cls) + " " + "{:.6f}".format(xc) + " " + "{:.6f}".format(yc) + " " + "{:.6f}".format(w) + " " + "{:.6f}".format(h) + "\n"
                        f.write(line_out)
                stats["clean_annotations"] += len(final)
    return dict(stats)


print("Cleaning base dataset...")
report = clean_and_tighten(BASE_DIR)
print("")
print("Cleaning report:")
for k in sorted(report.keys()):
    print("  " + k + ": " + str(report[k]))

for d in extra_dirs:
    print("Cleaning", d, "...")
    clean_and_tighten(d)

print("Labels cleaned and tightened.")

## 6. Merge Datasets

In [None]:
def merge_into(src_dir, dst_dir, prefix, class_map=None):
    """Copy images and labels from src into dst, with optional class ID mapping."""
    copied = 0
    for split in ["train", "valid", "test", "val"]:
        dst_split = "valid" if split == "val" else split
        src_img_dir = os.path.join(src_dir, split, "images")
        src_lbl_dir = os.path.join(src_dir, split, "labels")
        dst_img_dir = os.path.join(dst_dir, dst_split, "images")
        dst_lbl_dir = os.path.join(dst_dir, dst_split, "labels")
        if not os.path.isdir(src_img_dir):
            continue
        os.makedirs(dst_img_dir, exist_ok=True)
        os.makedirs(dst_lbl_dir, exist_ok=True)
        for fname in os.listdir(src_img_dir):
            lower = fname.lower()
            if not (lower.endswith(".jpg") or lower.endswith(".jpeg") or lower.endswith(".png")):
                continue
            dst_fname = prefix + fname
            # Copy image
            src_path = os.path.join(src_img_dir, fname)
            dst_path = os.path.join(dst_img_dir, dst_fname)
            if not os.path.exists(dst_path):
                import shutil
                shutil.copy2(src_path, dst_path)
            # Copy label
            base_name = os.path.splitext(fname)[0]
            dst_base = os.path.splitext(dst_fname)[0]
            src_lbl = os.path.join(src_lbl_dir, base_name + ".txt")
            dst_lbl = os.path.join(dst_lbl_dir, dst_base + ".txt")
            
            if os.path.isfile(src_lbl) and not os.path.exists(dst_lbl):
                if class_map is not None:
                    with open(src_lbl, "r") as f:
                        lines = f.readlines()
                    mapped_lines = []
                    for line in lines:
                        p = line.strip().split()
                        if len(p) == 5:
                            old_id = int(p[0])
                            new_id = class_map.get(old_id, None)
                            if new_id is not None:
                                mapped_line = str(new_id) + " " + " ".join(p[1:])
                                mapped_lines.append(mapped_line)
                    with open(dst_lbl, "w") as f:
                        f.write("\n".join(mapped_lines) + "\n")
                else:
                    import shutil
                    shutil.copy2(src_lbl, dst_lbl)
            elif not os.path.exists(dst_lbl):
                open(dst_lbl, "w").close()
            copied += 1
    return copied

import shutil
import os
import glob
# Remove old merged dir
if os.path.isdir(MERGED_DIR):
    shutil.rmtree(MERGED_DIR)

# Copy base
print("Merging base dataset...")
n = merge_into(BASE_DIR, MERGED_DIR, "")
print("  Base:", n, "files")

# Manual class mappings to map extra datasets to FloodWatch schema:
# 0: person, 1: car, 2: bicycle, 3: motorcycle, 4: bus, 5: truck
# For 'flood-segmentation-gsmmc' (assumes 0:person, 1:car/vehicle, etc)
# This mapping safely ignores unrelated classes by mapping to None
x_maps = [
    {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}, # Fallback exact mapping
    {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}, 
]

# Copy extras
for i, d in enumerate(extra_dirs):
    prefix = "x" + str(i) + "_"
    cmap = x_maps[i] if i < len(x_maps) else None
    n = merge_into(d, MERGED_DIR, prefix, class_map=cmap)
    print("  Extra", i, ":", n, "files")

# Count
total_images = 0
for split in ["train", "valid", "test"]:
    img_dir = os.path.join(MERGED_DIR, split, "images")
    if os.path.isdir(img_dir):
        count = len([f for f in os.listdir(img_dir) if f.lower().endswith((".jpg", ".jpeg", ".png"))])
        total_images += count
        print("  " + split + ":", count, "images")
print("  Total:", total_images, "images merged.")



## 7. Balance Classes

Oversample minority classes to ~70% of the majority class count.

In [None]:
lbl_dir = os.path.join(MERGED_DIR, "train", "labels")
img_dir = os.path.join(MERGED_DIR, "train", "images")

# Count classes
counts = Counter()
cls_files = defaultdict(set)
for fname in os.listdir(lbl_dir):
    if not fname.endswith(".txt"):
        continue
    fpath = os.path.join(lbl_dir, fname)
    with open(fpath, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 5:
                cls = int(parts[0])
                counts[cls] += 1
                cls_files[cls].add(fname)

print("Before balancing:")
for c in sorted(counts.keys()):
    name = CLASS_NAMES[c] if c < len(CLASS_NAMES) else str(c)
    n_imgs = len(cls_files[c])
    print("  Class", c, "(" + name + "):", counts[c], "annotations,", n_imgs, "images")

if len(counts) > 0:
    max_count = max(counts.values())
    target = int(max_count * 0.7)
    total_added = 0
    for cls_id in sorted(counts.keys()):
        if counts[cls_id] >= target:
            continue
        files = sorted(list(cls_files.get(cls_id, set())))
        if len(files) == 0:
            continue
        needed = target - counts[cls_id]
        max_dups = min(needed, len(files) * 3)
        for i in range(max_dups):
            src_lbl_name = files[i % len(files)]
            base = os.path.splitext(src_lbl_name)[0]
            dup_base = base + "_bal" + str(i)
            dst_lbl_path = os.path.join(lbl_dir, dup_base + ".txt")
            if os.path.exists(dst_lbl_path):
                continue
            # Copy label
            src_lbl_path = os.path.join(lbl_dir, src_lbl_name)
            shutil.copy2(src_lbl_path, dst_lbl_path)
            # Find and copy+flip image
            for ext in [".jpg", ".jpeg", ".png"]:
                src_img_path = os.path.join(img_dir, base + ext)
                if os.path.isfile(src_img_path):
                    dst_img_path = os.path.join(img_dir, dup_base + ext)
                    try:
                        img = Image.open(src_img_path)
                        flipped = ImageOps.mirror(img)
                        flipped.save(dst_img_path)
                        # Flip x coords in label
                        with open(dst_lbl_path, "r") as f:
                            lbl_lines = f.readlines()
                        new_lines = []
                        for ll in lbl_lines:
                            pp = ll.strip().split()
                            if len(pp) == 5:
                                flipped_x = 1.0 - float(pp[1])
                                new_line = pp[0] + " " + "{:.6f}".format(flipped_x) + " " + pp[2] + " " + pp[3] + " " + pp[4]
                                new_lines.append(new_line)
                        with open(dst_lbl_path, "w") as f:
                            f.write("\n".join(new_lines) + "\n")
                    except Exception as ex:
                        shutil.copy2(src_img_path, dst_img_path)
                    break
            total_added += 1
    print("")
    print("Oversampled", total_added, "images")

# Recount
new_counts = Counter()
for fname in os.listdir(lbl_dir):
    if not fname.endswith(".txt"):
        continue
    with open(os.path.join(lbl_dir, fname), "r") as f:
        for line in f:
            pp = line.strip().split()
            if len(pp) == 5:
                new_counts[int(pp[0])] += 1

print("")
print("After balancing:")
for c in sorted(new_counts.keys()):
    name = CLASS_NAMES[c] if c < len(CLASS_NAMES) else str(c)
    print("  Class", c, "(" + name + "):", new_counts[c], "annotations")

print("Classes balanced.")

## 8. Generate data.yaml

In [None]:
abs_merged = os.path.abspath(MERGED_DIR)

data_config = {
    "path": abs_merged,
    "train": "train/images",
    "val": "valid/images",
    "nc": NUM_CLASSES,
    "names": CLASS_NAMES,
}

test_dir = os.path.join(MERGED_DIR, "test", "images")
if os.path.isdir(test_dir):
    data_config["test"] = "test/images"

yaml_path = os.path.join(MERGED_DIR, "data.yaml")
with open(yaml_path, "w") as f:
    yaml.dump(data_config, f, default_flow_style=False, sort_keys=False)

print("Generated data.yaml at:", yaml_path)
print("")
with open(yaml_path, "r") as f:
    print(f.read())

## 9. Train YOLOv8l - Maximum Accuracy

Training with:
- AdamW optimizer (lr=0.002, cosine schedule)
- Box loss weight 7.5 (CIoU localization emphasis)
- Full augmentations: mosaic, mixup, HSV, scale, perspective, flips, copy-paste, erasing

In [None]:
model = YOLO(MODEL_NAME)

print("Starting training...")
print("  Model:", MODEL_NAME)
print("  Data:", yaml_path)
print("  Image size:", IMG_SIZE)
print("  Epochs:", EPOCHS)

results = model.train(
    data=yaml_path,
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=-1,
    patience=PATIENCE,
    device=0,
    project="./runs",
    name="flood_maxacc_l",
    exist_ok=True,
    pretrained=True,
    optimizer="AdamW",
    lr0=0.0015,
    lrf=0.01,
    weight_decay=0.0005,
    warmup_epochs=5,
    warmup_momentum=0.8,
    cos_lr=True,
    mosaic=1.0,
    mixup=0.1,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    scale=0.5,
    translate=0.1,
    perspective=0.0005,
    flipud=0.2,
    fliplr=0.5,
    degrees=5.0,
    shear=2.0,
    copy_paste=0.1,
    erasing=0.1,
    box=8.5,
    cls=0.5,
    dfl=1.5,
    save=True,
    save_period=25,
    plots=True,
    val=True,
    verbose=True,
)

print("Training complete!")

## 10. Evaluate and Compare

In [None]:
BASELINE_MAP50 = 0.69

metrics = {}
if hasattr(results, "results_dict"):
    metrics = results.results_dict

precision = metrics.get("metrics/precision(B)", None)
recall = metrics.get("metrics/recall(B)", None)
map50 = metrics.get("metrics/mAP50(B)", None)
map50_95 = metrics.get("metrics/mAP50-95(B)", None)

print("=" * 55)
print("  FloodWatch YOLO - Results")
print("=" * 55)
print("  Model:     ", MODEL_NAME)
print("  Resolution:", IMG_SIZE, "px")
print("  Epochs:    ", EPOCHS)
print("")
print("  Precision: ", precision)
print("  Recall:    ", recall)
print("  mAP50:     ", map50)
print("  mAP50-95:  ", map50_95)
print("")

if map50 is not None:
    delta = map50 - BASELINE_MAP50
    if delta > 0:
        status = "IMPROVED"
    else:
        status = "REGRESSION"
    print("  Baseline mAP50:", BASELINE_MAP50)
    print("  Delta:         ", round(delta, 4), "(", status, ")")
    if map50 >= 0.83:
        print("")
        print("  TARGET ACHIEVED! mAP50 >= 0.83")
    elif map50 >= 0.80:
        print("")
        print("  Near target: mAP50 >= 0.80")

print("=" * 55)

## 11. Export Model

In [None]:
best_pt = "./runs/flood_maxacc_l/weights/best.pt"
last_pt = "./runs/flood_maxacc_l/weights/last.pt"
export_name = "yolov8_flood_highacc.pt"

if os.path.isfile(best_pt):
    shutil.copy2(best_pt, export_name)
    size_mb = os.path.getsize(export_name) / 1e6
    print("Exported:", export_name, "(", round(size_mb, 1), "MB)")
elif os.path.isfile(last_pt):
    shutil.copy2(last_pt, export_name)
    print("Using last.pt as", export_name)
else:
    print("WARNING: No weights found!")

print("")
print("Place this file at: flood-watch-ai/models/yolov8_flood_highacc.pt")

In [None]:
# Show training plots
plots_dir = "./runs/flood_maxacc_l"
from IPython.display import Image as IPImage, display

for plot_name in ["results.png", "confusion_matrix.png", "PR_curve.png", "F1_curve.png"]:
    plot_path = os.path.join(plots_dir, plot_name)
    if os.path.isfile(plot_path):
        print(plot_name + ":")
        display(IPImage(filename=plot_path, width=800))
        print("")

In [None]:
# Download trained model to your computer
try:
    from google.colab import files
    if os.path.isfile(export_name):
        files.download(export_name)
        print("Downloading", export_name, "...")
except ImportError:
    print("Not running in Colab. Copy", export_name, "manually.")