# FloodWatch YOLO — Max-Accuracy Training (YOLOv8m)

Train YOLOv8m at 832px for 200 epochs on the normalized flood dataset.

| Setting | Value |
|---------|-------|
| Model | YOLOv8m (25.9M params) |
| Resolution | 832px |
| Epochs | 200 (patience=40) |
| Optimizer | AdamW lr=0.002 |
| Box loss | 7.5 (CIoU) |
| Target | mAP50 ≥ 0.80–0.85 |

## 1️⃣ Setup & Install

In [None]:
!pip install -q ultralytics roboflow Pillow

import os, glob, shutil, json
from collections import Counter, defaultdict
from ultralytics import YOLO

import torch
if torch.cuda.is_available():
    vram = torch.cuda.get_device_properties(0).total_mem / 1e9
    print(f'GPU: {torch.cuda.get_device_name(0)} ({vram:.1f} GB)')
else:
    print('WARNING: No GPU detected!')

## 2️⃣ Download Flood Dataset

In [None]:
from roboflow import Roboflow

# Enter your Roboflow API key
rf = Roboflow(api_key="YOUR_API_KEY")
project = rf.workspace().project("flood-detection-oelzf")
version = project.version(1)
dataset = version.download("yolov8", location="./flood_dataset")
print('Dataset downloaded!')

for split in ['train', 'valid', 'test']:
    imgs = glob.glob(f'./flood_dataset/{split}/images/*')
    print(f'  {split}: {len(imgs)} images')

## 3️⃣ Normalize Labels (Polygon → BBox)

**Critical:** Roboflow exports segmentation polygons. YOLO detect needs `class xc yc w h`.

In [None]:
def polygon_to_bbox(parts):
    if len(parts) < 5: return None
    try:
        cls_id = int(parts[0])
        coords = [float(v) for v in parts[1:]]
    except (ValueError, IndexError): return None
    if len(coords) % 2 != 0: return None
    xs, ys = coords[0::2], coords[1::2]
    if not xs or not ys: return None
    x_min, x_max = max(0, min(xs)), min(1, max(xs))
    y_min, y_max = max(0, min(ys)), min(1, max(ys))
    w, h = x_max - x_min, y_max - y_min
    if w <= 0 or h <= 0: return None
    return f'{cls_id} {x_min+w/2:.6f} {y_min+h/2:.6f} {w:.6f} {h:.6f}'

poly_count = 0
bbox_count = 0
files_fixed = 0

for split in ['train', 'valid', 'test']:
    lbl_dir = f'./flood_dataset/{split}/labels'
    if not os.path.isdir(lbl_dir): continue
    for fname in sorted(os.listdir(lbl_dir)):
        if not fname.endswith('.txt'): continue
        fpath = os.path.join(lbl_dir, fname)
        with open(fpath) as f:
            lines = f.readlines()
        converted = []
        had_poly = False
        for line in lines:
            parts = line.strip().split()
            if len(parts) == 5:
                converted.append(line.strip())
                bbox_count += 1
            elif len(parts) > 5:
                b = polygon_to_bbox(parts)
                if b:
                    converted.append(b)
                    poly_count += 1
                    had_poly = True
        if had_poly:
            with open(fpath, 'w') as f:
                f.write('\n'.join(converted) + '\n')
            files_fixed += 1

print(f'Polygons converted: {poly_count}')
print(f'Already bbox: {bbox_count}')
print(f'Files fixed: {files_fixed}')
print('✅ All labels normalized to detection format!')

## 4️⃣ Audit & Clean Labels

In [None]:
VALID_CLS = {0, 1, 2, 3, 4, 5}
MIN_AREA = 0.001
DUP_IOU = 0.95

def iou(b1, b2):
    _, x1, y1, w1, h1 = b1
    _, x2, y2, w2, h2 = b2
    ax1, ay1, ax2, ay2 = x1-w1/2, y1-h1/2, x1+w1/2, y1+h1/2
    bx1, by1, bx2, by2 = x2-w2/2, y2-h2/2, x2+w2/2, y2+h2/2
    ix1, iy1 = max(ax1,bx1), max(ay1,by1)
    ix2, iy2 = min(ax2,bx2), min(ay2,by2)
    if ix2<=ix1 or iy2<=iy1: return 0.0
    inter = (ix2-ix1)*(iy2-iy1)
    union = w1*h1 + w2*h2 - inter
    return inter / union if union > 0 else 0.0

stats = defaultdict(int)
for split in ['train', 'valid', 'test']:
    lbl_dir = f'./flood_dataset/{split}/labels'
    if not os.path.isdir(lbl_dir): continue
    for fname in sorted(os.listdir(lbl_dir)):
        if not fname.endswith('.txt'): continue
        fpath = os.path.join(lbl_dir, fname)
        with open(fpath) as f:
            lines = f.readlines()
        anns = []
        for line in lines:
            p = line.strip().split()
            if len(p) < 5: continue
            try:
                cls = int(p[0])
                xc, yc, w, h = float(p[1]), float(p[2]), float(p[3]), float(p[4])
            except: continue
            if cls not in VALID_CLS: stats['invalid_class'] += 1; continue
            xc = max(0, min(1, xc)); yc = max(0, min(1, yc))
            if w*h < MIN_AREA: stats['tiny'] += 1; continue
            anns.append((cls, xc, yc, w, h))
        final = []
        for a in anns:
            if any(a[0]==e[0] and iou(a,e)>DUP_IOU for e in final):
                stats['duplicate'] += 1
            else:
                final.append(a)
        if not final:
            stats['empty'] += 1
            os.remove(fpath)
            for ext in ('.jpg','.jpeg','.png'):
                img = fpath.replace('/labels/','/images/').replace('.txt', ext)
                if os.path.isfile(img): os.remove(img); break
            continue
        if len(final) != len(lines):
            with open(fpath, 'w') as f:
                for c, xc, yc, w, h in final:
                    f.write(f'{c} {xc:.6f} {yc:.6f} {w:.6f} {h:.6f}\n')
            stats['fixed'] += 1

print('Label Audit:')
for k, v in sorted(stats.items()):
    print(f'  {k}: {v}')
print('✅ Labels cleaned!')

## 5️⃣ Balance Class Distribution

In [None]:
from PIL import Image, ImageOps

NAMES = {0:'person', 1:'car', 2:'bicycle', 3:'motorcycle', 4:'bus', 5:'truck'}

counts = Counter()
cls_files = defaultdict(set)
lbl_dir = './flood_dataset/train/labels'
img_dir = './flood_dataset/train/images'

for fname in os.listdir(lbl_dir):
    if not fname.endswith('.txt'): continue
    with open(os.path.join(lbl_dir, fname)) as f:
        for line in f:
            p = line.strip().split()
            if len(p) >= 5:
                c = int(p[0])
                counts[c] += 1
                cls_files[c].add(fname)

print('Class distribution (before):')
for c in sorted(counts):
    print(f'  {c} ({NAMES.get(c,"?"):12s}): {counts[c]:5d} ann, {len(cls_files[c]):4d} imgs')

if counts:
    max_count = max(counts.values())
    target = int(max_count * 0.7)
    dup = 0
    for cls_id, cnt in counts.items():
        if cnt >= target: continue
        files = list(cls_files.get(cls_id, []))
        if not files: continue
        needed = target - cnt
        for i in range(min(needed, len(files)*3)):
            src = files[i % len(files)]
            base = os.path.splitext(src)[0]
            dst_lbl = os.path.join(lbl_dir, f'{base}_dup{i}.txt')
            if os.path.exists(dst_lbl): continue
            shutil.copy2(os.path.join(lbl_dir, src), dst_lbl)
            for ext in ('.jpg','.jpeg','.png'):
                src_img = os.path.join(img_dir, base + ext)
                if os.path.isfile(src_img):
                    dst_img = os.path.join(img_dir, f'{base}_dup{i}{ext}')
                    try:
                        img = Image.open(src_img)
                        ImageOps.mirror(img).save(dst_img)
                        with open(dst_lbl) as f:
                            lbls = f.readlines()
                        flipped = []
                        for l in lbls:
                            p = l.strip().split()
                            if len(p)==5:
                                flipped.append(f'{p[0]} {1-float(p[1]):.6f} {p[2]} {p[3]} {p[4]}')
                        with open(dst_lbl, 'w') as f:
                            f.write('\n'.join(flipped)+'\n')
                    except:
                        shutil.copy2(src_img, dst_img)
                    break
            dup += 1
    print(f'\nOversampled {dup} images for minority classes')
print('✅ Classes balanced!')

## 6️⃣ Tighten Bounding Boxes

Shrink loose boxes by 3% to remove auto-annotation padding.

In [None]:
tightened = 0
for split in ['train', 'valid', 'test']:
    d = f'./flood_dataset/{split}/labels'
    if not os.path.isdir(d): continue
    for fname in os.listdir(d):
        if not fname.endswith('.txt'): continue
        fpath = os.path.join(d, fname)
        with open(fpath) as f:
            lines = f.readlines()
        out = []
        for line in lines:
            p = line.strip().split()
            if len(p) != 5: out.append(line.strip()); continue
            try:
                c = int(p[0])
                xc, yc, w, h = float(p[1]), float(p[2]), float(p[3]), float(p[4])
                w *= 0.94; h *= 0.94
                w = max(w, 0.01); h = max(h, 0.01)
                xc = max(w/2, min(1-w/2, xc))
                yc = max(h/2, min(1-h/2, yc))
                out.append(f'{c} {xc:.6f} {yc:.6f} {w:.6f} {h:.6f}')
                tightened += 1
            except:
                out.append(line.strip())
        with open(fpath, 'w') as f:
            f.write('\n'.join(out) + '\n')

print(f'Tightened {tightened} bboxes')
print('✅ Bboxes tightened!')

## 7️⃣ Train YOLOv8m — Max-Accuracy Config

In [None]:
MODEL_NAME = 'yolov8m.pt'
IMG_SIZE = 832
EPOCHS = 200
PATIENCE = 40
BATCH = -1  # auto-fit GPU

# Use yolov8l if enough VRAM
if torch.cuda.is_available():
    vram = torch.cuda.get_device_properties(0).total_mem / 1e9
    if vram >= 24:
        MODEL_NAME = 'yolov8l.pt'
        print(f'Large VRAM detected, using {MODEL_NAME}')

print(f'Training: {MODEL_NAME} @ {IMG_SIZE}px for {EPOCHS} epochs')
model = YOLO(MODEL_NAME)

results = model.train(
    data='./flood_dataset/data.yaml',
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH,
    patience=PATIENCE,
    device=0,
    project='./runs',
    name='flood_maxacc',
    exist_ok=True,
    pretrained=True,
    # Optimizer
    optimizer='AdamW',
    lr0=0.002,
    lrf=0.01,
    weight_decay=0.0005,
    warmup_epochs=5,
    warmup_momentum=0.8,
    cos_lr=True,
    # Augmentations
    mosaic=1.0,
    mixup=0.1,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    scale=0.5,
    translate=0.1,
    perspective=0.0005,
    flipud=0.2,
    fliplr=0.5,
    degrees=5.0,
    shear=2.0,
    copy_paste=0.1,
    erasing=0.1,
    # Loss weights (localization emphasis)
    box=7.5,
    cls=0.5,
    dfl=1.5,
    # Output
    save=True,
    save_period=25,
    plots=True,
    val=True,
    verbose=True,
)

print('✅ Training complete!')

## 8️⃣ Evaluate & Compare with Baseline

In [None]:
BASELINE_MAP50 = 0.69

m = results.results_dict if hasattr(results, 'results_dict') else {}
p  = m.get('metrics/precision(B)', None)
r  = m.get('metrics/recall(B)', None)
m50 = m.get('metrics/mAP50(B)', None)
m95 = m.get('metrics/mAP50-95(B)', None)

print('=' * 55)
print('  FloodWatch YOLO — Max-Accuracy Results')
print('=' * 55)
print(f'  Model:      {MODEL_NAME}')
print(f'  Resolution: {IMG_SIZE}px')
print(f'  Epochs:     {EPOCHS}')
print()
print(f'  Precision:  {p}')
print(f'  Recall:     {r}')
print(f'  mAP50:      {m50}')
print(f'  mAP50-95:   {m95}')
print()
if m50 is not None:
    delta = m50 - BASELINE_MAP50
    status = 'IMPROVED' if delta > 0 else 'REGRESSION'
    print(f'  Baseline mAP50: {BASELINE_MAP50:.4f}')
    print(f'  Delta:          {delta:+.4f} ({status})')
print('=' * 55)

## 9️⃣ Export Production Model

In [None]:
import shutil

best_pt = './runs/flood_maxacc/weights/best.pt'
export_name = 'yolov8_flood_highacc.pt'

if os.path.isfile(best_pt):
    shutil.copy2(best_pt, export_name)
    size_mb = os.path.getsize(export_name) / 1e6
    print(f'Exported: {export_name} ({size_mb:.1f} MB)')
    print('Download this file and place at: models/yolov8_flood_highacc.pt')
else:
    print(f'best.pt not found at {best_pt}')

# Also export ONNX for deployment
try:
    best_model = YOLO(best_pt)
    best_model.export(format='onnx', imgsz=IMG_SIZE)
    print('ONNX exported!')
except Exception as e:
    print(f'ONNX export failed: {e}')