In [None]:
#Triangles plotting lines pxl up from endpoint

In [None]:
#!/usr/bin/env python3
# Ultra-fast, batched pipeline with threaded I/O + per-frame timing & counts
# Now renders ALL masks + triangles coloured by the mask sampled N px above the tip
# + Scout lines: per-pixel coloured line from tip to sample point (render-only)

import os, glob, sys, time
import cv2, torch, numpy as np
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from ultralytics import YOLO

# =======================
# Config
# =======================
home       = os.path.expanduser("~")
weights    = f"{home}/models/jakes-loped/jakes-finder-mk1/1/weights.pt"
frames_dir = Path(home) / "Documents" / "GitHub" / "Ai-plays-SubwaySurfers" / "frames"
out_dir    = Path(home) / "Documents" / "GitHub" / "Ai-plays-SubwaySurfers" / "out_overlays2"
out_dir.mkdir(parents=True, exist_ok=True)

RAIL_ID    = 9
IMG_SIZE   = 512
CONF, IOU  = 0.30, 0.45
MAX_DET    = 30

# Color/region filter
TARGET_COLORS_RGB  = [(119,104,67), (81,42,45)]
TOLERANCE          = 20.0
MIN_REGION_SIZE    = 30
MIN_REGION_HEIGHT  = 150

# Heat/triangle
HEAT_BLUR_KSIZE     = 51
RED_SCORE_THRESH    = 220
EXCLUDE_TOP_FRAC    = 0.40
EXCLUDE_BOTTOM_FRAC = 0.15
MIN_DARK_RED_AREA   = 1200
MIN_DARK_FRACTION   = 0.15
TRI_SIZE_PX         = 18

# Triangle mask scan distance (N pixels above tip)
SAMPLE_UP_PX        = 65

# Colours (BGR)
COLOR_GREEN  = (0, 255, 0)
COLOR_PINK   = (203, 192, 255)  # readable pink
COLOR_YELLOW = (0, 255, 255)
COLOR_RED    = (0, 0, 255)

# Runtime
BATCH               = 1
THREADS_IO          = max(2, (os.cpu_count() or 4) // 2)
SHOW_FIRST_N        = None   # None → all frames
RENDER_FIRST_N      = 150     # render overlays for first N frames only

# =======================
# System/Backends
# =======================
cv2.setUseOptimized(True)
try: cv2.setNumThreads(max(1, (os.cpu_count() or 1) - 1))
except Exception: pass

if torch.cuda.is_available():
    device, half = 0, True
    torch.backends.cudnn.benchmark = True
    try: torch.set_float32_matmul_precision('high')
    except Exception: pass
elif getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
    device, half = "mps", False
else:
    device, half = "cpu", False

# =======================
# Model
# =======================
model = YOLO(weights)
try: model.fuse()
except Exception: pass

# Warmup
_dummy = np.zeros((IMG_SIZE, IMG_SIZE, 3), np.uint8)
_ = model.predict(_dummy, task="segment", imgsz=IMG_SIZE, device=device,
                  conf=CONF, iou=IOU, verbose=False, half=half, max_det=MAX_DET)

# =======================
# Precomputed
# =======================
TARGETS_BGR_F32 = np.array([(r,g,b)[::-1] for (r,g,b) in TARGET_COLORS_RGB], dtype=np.float32)
TOL2            = TOLERANCE * TOLERANCE

CLASS_COLOURS = {
    0:(255,255,0),1:(192,192,192),2:(0,128,255),3:(0,255,0),
    4:(255,0,255),5:(0,255,255),6:(255,128,0),7:(128,0,255),
    8:(0,0,128),9:(0,0,255),10:(128,128,0),11:(255,255,102)
}
LABELS = {
    0:"BOOTS",1:"GREYTRAIN",2:"HIGHBARRIER1",3:"JUMP",4:"LOWBARRIER1",
    5:"LOWBARRIER2",6:"ORANGETRAIN",7:"PILLAR",8:"RAMP",9:"RAILS",
    10:"SIDEWALK",11:"YELLOWTRAIN"
}

SAFE_GREEN = {9, 10}          # rails or sidewalk or no mask -> green
WARN_YELLOW = {2,3,4,5,8}   # barrier/jump/train/ramp -> yellow

# =======================
# Helpers
# =======================
def load_image_with_time(path: str):
    t0 = time.perf_counter()
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    t1 = time.perf_counter()
    return img, (t1 - t0) * 1000.0

def chunked(iterable, n):
    for i in range(0, len(iterable), n):
        yield iterable[i:i+n]

def highlight_rails_mask_only_fast(img_bgr, rail_mask):
    H, W = img_bgr.shape[:2]
    if not rail_mask.any():
        return np.zeros((H, W), dtype=bool)

    ys, xs = np.where(rail_mask)
    y0, y1 = ys.min(), ys.max()+1
    x0, x1 = xs.min(), xs.max()+1

    img_roi  = img_bgr[y0:y1, x0:x1]
    mask_roi = rail_mask[y0:y1, x0:x1]

    img_f = img_roi.astype(np.float32)
    diff  = img_f[:, :, None, :] - TARGETS_BGR_F32[None, None, :, :]
    dist2 = np.sum(diff * diff, axis=-1)
    colour_hit = np.any(dist2 <= TOL2, axis=-1)

    combined = np.logical_and(colour_hit, mask_roi)
    comp = combined.astype(np.uint8)
    n, lbls, stats, _ = cv2.connectedComponentsWithStats(comp, 8)
    if n <= 1:
        return np.zeros((H, W), dtype=bool)

    good = np.zeros_like(combined)
    areas  = stats[1:, cv2.CC_STAT_AREA]
    hs     = stats[1:, cv2.CC_STAT_HEIGHT]
    keep   = np.where((areas >= MIN_REGION_SIZE) & (hs >= MIN_REGION_HEIGHT))[0] + 1
    for k in keep:
        good[lbls == k] = True

    full = np.zeros((H, W), dtype=bool)
    full[y0:y1, x0:x1] = good
    return full

def red_vs_green_score(red_mask, green_mask):
    k = (HEAT_BLUR_KSIZE, HEAT_BLUR_KSIZE)
    r = cv2.blur(red_mask.astype(np.float32), k)
    g = cv2.blur(green_mask.astype(np.float32), k)
    diff = r - g
    amax = float(np.max(np.abs(diff))) + 1e-6
    norm = (diff / (2.0 * amax) + 0.5)
    return np.clip(norm * 255.0, 0, 255.0).astype(np.uint8)

def purple_triangles(score, H):
    top_ex = int(H * EXCLUDE_TOP_FRAC)
    bot_ex = int(H * EXCLUDE_BOTTOM_FRAC)

    dark = (score >= RED_SCORE_THRESH).astype(np.uint8)
    if top_ex: dark[:top_ex, :] = 0
    if bot_ex: dark[-bot_ex:, :] = 0

    dark = cv2.morphologyEx(
        dark, cv2.MORPH_OPEN,
        cv2.getStructuringElement(cv2.MORPH_RECT, (5, 9)),
        iterations=1
    )
    total_dark = int(dark.sum())
    if total_dark == 0:
        return [], None

    frac_thresh = int(np.ceil(MIN_DARK_FRACTION * total_dark))
    n_lbl, lbls, stats, _ = cv2.connectedComponentsWithStats(dark, 8)
    if n_lbl <= 1:
        return [], None

    tris = []
    for lbl in range(1, n_lbl):
        area = stats[lbl, cv2.CC_STAT_AREA]
        if area >= MIN_DARK_RED_AREA and area >= frac_thresh:
            ys, xs = np.where(lbls == lbl)
            if ys.size == 0:
                continue
            y_top = ys.min()
            x_mid = int(xs[ys == y_top].mean())
            tris.append((int(x_mid), int(y_top)))

    if not tris:
        return [], None

    best = min(tris, key=lambda xy: xy[1])
    return tris, best

# ---- Triangle classification by sampling masks N px above tip (no resizes) ----
def classify_triangles_at_sample(tri_positions, masks_np, classes_np, frame_H, frame_W, sample_up=SAMPLE_UP_PX):
    """
    For each triangle (x,y), sample (x, y - N) and determine which mask/class covers it.
    If no mask there: GREEN. If class==0: PINK. If class in {2,3,4,5,6,8}: YELLOW.
    If class in {9,10}: GREEN. Else: RED.
    Uses scale mapping into masks grid; avoids resizing masks.
    """
    if masks_np is None or classes_np is None or len(tri_positions) == 0:
        return []

    mh, mw = masks_np.shape[1], masks_np.shape[2]
    sx = (mw - 1) / max(1, (frame_W - 1))
    sy = (mh - 1) / max(1, (frame_H - 1))

    colours = []
    for (x, y) in tri_positions:
        ys = max(0, y - sample_up)
        mx = int(round(x * sx))
        my = int(round(ys * sy))
        if mx < 0: mx = 0
        elif mx >= mw: mx = mw - 1
        if my < 0: my = 0
        elif my >= mh: my = mh - 1

        cls_here = None
        for m, c in zip(masks_np, classes_np):
            if m[my, mx] > 0.5:  # mask hit
                cls_here = int(c)
                break

        if (cls_here is None) or (cls_here in SAFE_GREEN):
            colours.append(COLOR_GREEN)
        elif cls_here == 0:
            colours.append(COLOR_PINK)
        elif cls_here in WARN_YELLOW:
            colours.append(COLOR_YELLOW)
        else:
            colours.append(COLOR_RED)

    return colours

# --- NEW (render-only): colour for any frame-point via masks (no resizes) ---
def _colour_for_point(x, y, masks_np, classes_np, frame_H, frame_W):
    """
    Determine display colour at frame point (x,y) using same mapping rules as classifier.
    No mask or class in SAFE_GREEN -> GREEN; class==0 -> PINK; class in WARN_YELLOW -> YELLOW; else RED.
    """
    if masks_np is None or classes_np is None or masks_np.size == 0:
        return COLOR_GREEN
    mh, mw = masks_np.shape[1], masks_np.shape[2]
    # Precompute scales
    sx = (mw - 1) / max(1, (frame_W - 1))
    sy = (mh - 1) / max(1, (frame_H - 1))
    mx = int(round(x * sx))
    my = int(round(y * sy))
    if mx < 0: mx = 0
    elif mx >= mw: mx = mw - 1
    if my < 0: my = 0
    elif my >= mh: my = mh - 1

    cls_here = None
    for m, c in zip(masks_np, classes_np):
        if m[my, mx] > 0.5:
            cls_here = int(c)
            break

    if (cls_here is None) or (cls_here in SAFE_GREEN):
        return COLOR_GREEN
    if cls_here == 0:
        return COLOR_PINK
    if cls_here in WARN_YELLOW:
        return COLOR_YELLOW
    return COLOR_RED

# Returns: (tri_best_xy, tri_count, mask_count, to_cpu_ms, post_ms, masks_np, classes_np, rail_mask, green, tri_positions, tri_colours)
def process_frame_post(frame_bgr, yolo_res):
    H, W = frame_bgr.shape[:2]
    if yolo_res.masks is None:
        return None, 0, 0, 0.0, 0.0, None, None, None, None, [], []

    t0_to_cpu = time.perf_counter()
    masks_np = yolo_res.masks.data.cpu().numpy()  # [n,h,w]
    mask_count = int(masks_np.shape[0])
    if hasattr(yolo_res.masks, "cls") and yolo_res.masks.cls is not None:
        classes_np = yolo_res.masks.cls.cpu().numpy().astype(int)
    else:
        classes_np = yolo_res.boxes.cls.cpu().numpy().astype(int)
    t1_to_cpu = time.perf_counter()
    to_cpu_ms = (t1_to_cpu - t0_to_cpu) * 1000.0

    if mask_count == 0 or classes_np.size == 0:
        return None, 0, mask_count, to_cpu_ms, 0.0, masks_np, classes_np, None, None, [], []

    rail_sel = (classes_np == RAIL_ID)
    if not np.any(rail_sel):
        return None, 0, mask_count, to_cpu_ms, 0.0, masks_np, classes_np, None, None, [], []

    t0_post = time.perf_counter()
    rail_masks = masks_np[rail_sel].astype(bool)        # [k,h,w]
    union = np.any(rail_masks, axis=0).astype(np.uint8) # [h,w]
    rail_mask = cv2.resize(union, (W, H), interpolation=cv2.INTER_NEAREST).astype(bool)

    green = highlight_rails_mask_only_fast(frame_bgr, rail_mask)
    red   = np.logical_and(rail_mask, np.logical_not(green))
    score = red_vs_green_score(red, green)
    tri_positions, tri_best = purple_triangles(score, H)

    # classify triangles by sampling masks above tip
    tri_colours = classify_triangles_at_sample(tri_positions, masks_np, classes_np, H, W, SAMPLE_UP_PX)

    t1_post = time.perf_counter()
    post_ms = (t1_post - t0_post) * 1000.0

    return tri_best, len(tri_positions), mask_count, to_cpu_ms, post_ms, masks_np, classes_np, rail_mask, green, tri_positions, tri_colours

# --- rendering (excluded from timing) ---
def draw_triangle(img, x, y, size=TRI_SIZE_PX, colour=COLOR_RED):
    h = int(size * 1.2)
    pts = np.array([[x, y], [x-size, y+h], [x+size, y+h]], np.int32)
    cv2.fillConvexPoly(img, pts, colour)
    cv2.polylines(img, [pts.reshape(-1,1,2)], True, (0,0,0), 1, cv2.LINE_AA)

def render_overlays(frame_bgr, masks_np, classes_np, rail_mask, green_mask, tri_positions, tri_colours):
    """Draw all masks (class color) + labels, rail tint/green, coloured triangles, and scout lines on a copy of original frame."""
    out = frame_bgr.copy()
    H, W = out.shape[:2]
    alpha = 0.45

    if masks_np is not None and classes_np is not None and masks_np.size:
        for m, c in zip(masks_np, classes_np):
            m_full = m
            if m.shape != (H, W):
                m_full = cv2.resize(m.astype(np.uint8), (W, H), interpolation=cv2.INTER_NEAREST).astype(bool)
            color = CLASS_COLOURS.get(int(c), (255,255,255))
            out[m_full] = (np.array(color, dtype=np.uint8) * alpha + out[m_full] * (1 - alpha)).astype(np.uint8)
            ys, xs = np.where(m_full)
            if xs.size:
                xc, yc = int(xs.mean()), int(ys.mean())
                label = LABELS.get(int(c), f"C{int(c)}")
                cv2.putText(out, label, (max(5, xc-40), max(20, yc)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0), 2, cv2.LINE_AA)
                cv2.putText(out, label, (max(5, xc-40), max(20, yc)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1, cv2.LINE_AA)

    if rail_mask is not None:
        tint = out.copy()
        tint[rail_mask] = (0, 0, 255)  # red tint for rails
        out = cv2.addWeighted(tint, 0.30, out, 0.70, 0)
    if green_mask is not None:
        out[green_mask] = (0, 255, 0)

    # --- Scout lines (render-only): per-pixel coloured vertical line up to sample point ---
    if tri_positions:
        for (x, y) in tri_positions:
            y_end = max(0, y - SAMPLE_UP_PX)
            # Draw 1px dots along path; color per mask under that pixel
            # (Cheap: ≤ SAMPLE_UP_PX points per triangle)
            for yy in range(y, y_end - 1, -1):
                col = _colour_for_point(x, yy, masks_np, classes_np, H, W)
                # draw a 1px point
                out[yy, x] = col  # direct assign faster than cv2.circle for 1px

    # Draw triangles with per-triangle colours (after scout lines so tips are visible)
    for (x, y), col in zip(tri_positions, tri_colours):
        draw_triangle(out, x, y, colour=col)

    return out

# =======================
# Batched execution with prints; overlays saved for first N
# =======================
def run_pipeline_with_prints_and_overlays():
    paths = (
        glob.glob(str(frames_dir/"frame_*.jpg")) +
        glob.glob(str(frames_dir/"frame_*.png")) +
        glob.glob(str(frames_dir/"*.jpg")) +
        glob.glob(str(frames_dir/"*.png"))
    )
    paths = sorted(set(paths))
    if not paths:
        raise FileNotFoundError(f"No images in: {frames_dir}")
    if SHOW_FIRST_N is not None:
        paths = paths[:SHOW_FIRST_N]

    N = len(paths)
    results_triangle_xy = [None] * N

    def load_batch(batch_paths):
        imgs = [None] * len(batch_paths)
        read_ms = [0.0] * len(batch_paths)
        with ThreadPoolExecutor(max_workers=THREADS_IO) as ex:
            fut2idx = {ex.submit(load_image_with_time, p): i for i, p in enumerate(batch_paths)}
            for fut in as_completed(fut2idx):
                i = fut2idx[fut]
                img, r_ms = fut.result()
                imgs[i] = img
                read_ms[i] = r_ms
        ok = [(p, im, rm) for p, im, rm in zip(batch_paths, imgs, read_ms) if im is not None]
        if not ok:
            return [], [], []
        b_paths, b_imgs, b_read = zip(*ok)
        return list(b_paths), list(b_imgs), list(b_read)

    idx_global = 0
    for batch_paths in chunked(paths, BATCH):
        batch_paths, imgs_bgr, read_ms_list = load_batch(batch_paths)
        B = len(imgs_bgr)
        if B == 0:
            idx_global += len(batch_paths)
            continue

        t0_inf = time.perf_counter()
        res_list = model.predict(
            imgs_bgr, task="segment", imgsz=IMG_SIZE, device=device,
            conf=CONF, iou=IOU, verbose=False, half=half, max_det=MAX_DET,
            batch=B
        )
        try:
            if device == 0 and torch.cuda.is_available():
                torch.cuda.synchronize()
            elif device == "mps" and getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
                torch.mps.synchronize()
        except Exception:
            pass
        t1_inf = time.perf_counter()
        infer_ms_share = ((t1_inf - t0_inf) * 1000.0) / B

        for j, (img, yres, read_ms) in enumerate(zip(imgs_bgr, res_list, read_ms_list)):
            (tri_best_xy, tri_count, mask_count, to_cpu_ms, post_ms,
             masks_np, classes_np, rail_mask, green_mask, tri_positions, tri_colours) = process_frame_post(img, yres)

            results_triangle_xy[idx_global + j] = tri_best_xy
            proc_ms = infer_ms_share + to_cpu_ms + post_ms
            fname = os.path.basename(batch_paths[j])
            frame_idx = idx_global + j + 1

            print(f"[{frame_idx}/{N}] {fname}  "
                  f"read {read_ms:.1f} | infer {infer_ms_share:.1f} | "
                  f"to_cpu {to_cpu_ms:.1f} | post {post_ms:.1f} | "
                  f"masks {mask_count} | triangles {tri_count} "
                  f"=> proc {proc_ms:.1f} ms")

            # --- RENDERING (EXCLUDED from timing) ---
            if frame_idx <= RENDER_FIRST_N:
                overlay = render_overlays(img, masks_np, classes_np, rail_mask, green_mask, tri_positions, tri_colours)
                out_path = out_dir / f"overlay_{frame_idx:04d}_{fname}"
                cv2.imwrite(str(out_path), overlay)

        idx_global += B

    return results_triangle_xy

# =======================
# Entry
# =======================
if __name__ == "__main__":
    _ = run_pipeline_with_prints_and_overlays()


In [2]:
'''
Nice—this fits cleanly into what you already compute from the heatmap (the dark connected components). I tweaked your pipeline so we:
tag each triangle with its component label during purple_triangles, look up which component contains the lane probe point (mocked “Jake lane” = 0), and

draw a pink outline around the triangle whose component matches that lane’s hot region.
This adds essentially no overhead: we’re already building lbls and iterating labels; we just carry the label for each triangle and do a single pixel lookup.
'''

'\nNice—this fits cleanly into what you already compute from the heatmap (the dark connected components). I tweaked your pipeline so we:\ntag each triangle with its component label during purple_triangles, look up which component contains the lane probe point (mocked “Jake lane” = 0), and\n\ndraw a pink outline around the triangle whose component matches that lane’s hot region.\nThis adds essentially no overhead: we’re already building lbls and iterating labels; we just carry the label for each triangle and do a single pixel lookup.\n'

In [22]:
#!/usr/bin/env python3
# Ultra-fast, batched pipeline with threaded I/O + per-frame timing & counts
# Now renders ALL masks + triangles coloured by the mask sampled N px above the tip
# + Scout lines: per-pixel coloured line from tip to sample point (render-only)
# + NEW: starburst lines from each triangle tip to Jake's middle-lane point

import os, glob, sys, time
import cv2, torch, numpy as np
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from ultralytics import YOLO
import os, glob, sys, time, math


# =======================
# Config
# =======================
home       = os.path.expanduser("~")
weights    = f"{home}/models/jakes-loped/jakes-finder-mk1/1/weights.pt"
frames_dir = Path(home) / "Documents" / "GitHub" / "Ai-plays-SubwaySurfers" / "frames"

# SAVE HERE (changed to overlays3)
out_dir    = Path(home) / "Documents" / "GitHub" / "Ai-plays-SubwaySurfers" / "out_overlays3"
out_dir.mkdir(parents=True, exist_ok=True)

RAIL_ID    = 9
IMG_SIZE   = 512
CONF, IOU  = 0.30, 0.45
MAX_DET    = 30

# Color/region filter
TARGET_COLORS_RGB  = [(119,104,67), (81,42,45)]
TOLERANCE          = 20.0
MIN_REGION_SIZE    = 30
MIN_REGION_HEIGHT  = 150

# Heat/triangle
HEAT_BLUR_KSIZE     = 51
RED_SCORE_THRESH    = 220
EXCLUDE_TOP_FRAC    = 0.40
EXCLUDE_BOTTOM_FRAC = 0.15
MIN_DARK_RED_AREA   = 1200
MIN_DARK_FRACTION   = 0.15
TRI_SIZE_PX         = 18

# Triangle mask scan distance (N pixels above tip)
SAMPLE_UP_PX        = 65

# Colours (BGR)
COLOR_GREEN  = (0, 255, 0)
COLOR_PINK   = (203, 192, 255)  # readable pink
COLOR_YELLOW = (0, 255, 255)
COLOR_RED    = (0, 0, 255)
COLOR_WHITE  = (255, 255, 255)

# Runtime
BATCH               = 1
THREADS_IO          = max(2, (os.cpu_count() or 4) // 2)
SHOW_FIRST_N        = None   # None → all frames
RENDER_FIRST_N      = 150     # render overlays for first N frames only

# =======================
# Jake lane points (hardcode middle for now)
# =======================
LANE_LEFT   = (300, 1340)
LANE_MID    = (490, 1340)
LANE_RIGHT  = (680, 1340)
JAKE_POINT  = LANE_RIGHT   # hardcoded to middle lane as requested

# =======================
# System/Backends
# =======================
cv2.setUseOptimized(True)
try: cv2.setNumThreads(max(1, (os.cpu_count() or 1) - 1))
except Exception: pass

if torch.cuda.is_available():
    device, half = 0, True
    torch.backends.cudnn.benchmark = True
    try: torch.set_float32_matmul_precision('high')
    except Exception: pass
elif getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
    device, half = "mps", False
else:
    device, half = "cpu", False

# =======================
# Model
# =======================
model = YOLO(weights)
try: model.fuse()
except Exception: pass

# Warmup
_dummy = np.zeros((IMG_SIZE, IMG_SIZE, 3), np.uint8)
_ = model.predict(_dummy, task="segment", imgsz=IMG_SIZE, device=device,
                  conf=CONF, iou=IOU, verbose=False, half=half, max_det=MAX_DET)

# =======================
# Precomputed
# =======================
TARGETS_BGR_F32 = np.array([(r,g,b)[::-1] for (r,g,b) in TARGET_COLORS_RGB], dtype=np.float32)
TOL2            = TOLERANCE * TOLERANCE

CLASS_COLOURS = {
    0:(255,255,0),1:(192,192,192),2:(0,128,255),3:(0,255,0),
    4:(255,0,255),5:(0,255,255),6:(255,128,0),7:(128,0,255),
    8:(0,0,128),9:(0,0,255),10:(128,128,0),11:(255,255,102)
}
LABELS = {
    0:"BOOTS",1:"GREYTRAIN",2:"HIGHBARRIER1",3:"JUMP",4:"LOWBARRIER1",
    5:"LOWBARRIER2",6:"ORANGETRAIN",7:"PILLAR",8:"RAMP",9:"RAILS",
    10:"SIDEWALK",11:"YELLOWTRAIN"
}

SAFE_GREEN = {9, 10}          # rails or sidewalk or no mask -> green
WARN_YELLOW = {2,3,4,5,8}     # barrier/jump/ramp -> yellow

# =======================
# Helpers
# =======================
def load_image_with_time(path: str):
    t0 = time.perf_counter()
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    t1 = time.perf_counter()
    return img, (t1 - t0) * 1000.0

def chunked(iterable, n):
    for i in range(0, len(iterable), n):
        yield iterable[i:i+n]

def highlight_rails_mask_only_fast(img_bgr, rail_mask):
    H, W = img_bgr.shape[:2]
    if not rail_mask.any():
        return np.zeros((H, W), dtype=bool)

    ys, xs = np.where(rail_mask)
    y0, y1 = ys.min(), ys.max()+1
    x0, x1 = xs.min(), xs.max()+1

    img_roi  = img_bgr[y0:y1, x0:x1]
    mask_roi = rail_mask[y0:y1, x0:x1]

    img_f = img_roi.astype(np.float32)
    diff  = img_f[:, :, None, :] - TARGETS_BGR_F32[None, None, :, :]
    dist2 = np.sum(diff * diff, axis=-1)
    colour_hit = np.any(dist2 <= TOL2, axis=-1)

    combined = np.logical_and(colour_hit, mask_roi)
    comp = combined.astype(np.uint8)
    n, lbls, stats, _ = cv2.connectedComponentsWithStats(comp, 8)
    if n <= 1:
        return np.zeros((H, W), dtype=bool)

    good = np.zeros_like(combined)
    areas  = stats[1:, cv2.CC_STAT_AREA]
    hs     = stats[1:, cv2.CC_STAT_HEIGHT]
    keep   = np.where((areas >= MIN_REGION_SIZE) & (hs >= MIN_REGION_HEIGHT))[0] + 1
    for k in keep:
        good[lbls == k] = True

    full = np.zeros((H, W), dtype=bool)
    full[y0:y1, x0:x1] = good
    return full

def red_vs_green_score(red_mask, green_mask):
    k = (HEAT_BLUR_KSIZE, HEAT_BLUR_KSIZE)
    r = cv2.blur(red_mask.astype(np.float32), k)
    g = cv2.blur(green_mask.astype(np.float32), k)
    diff = r - g
    amax = float(np.max(np.abs(diff))) + 1e-6
    norm = (diff / (2.0 * amax) + 0.5)
    return np.clip(norm * 255.0, 0, 255.0).astype(np.uint8)

def purple_triangles(score, H):
    top_ex = int(H * EXCLUDE_TOP_FRAC)
    bot_ex = int(H * EXCLUDE_BOTTOM_FRAC)

    dark = (score >= RED_SCORE_THRESH).astype(np.uint8)
    if top_ex: dark[:top_ex, :] = 0
    if bot_ex: dark[-bot_ex:, :] = 0

    dark = cv2.morphologyEx(
        dark, cv2.MORPH_OPEN,
        cv2.getStructuringElement(cv2.MORPH_RECT, (5, 9)),
        iterations=1
    )
    total_dark = int(dark.sum())
    if total_dark == 0:
        return [], None

    frac_thresh = int(np.ceil(MIN_DARK_FRACTION * total_dark))
    n_lbl, lbls, stats, _ = cv2.connectedComponentsWithStats(dark, 8)
    if n_lbl <= 1:
        return [], None

    tris = []
    for lbl in range(1, n_lbl):
        area = stats[lbl, cv2.CC_STAT_AREA]
        if area >= MIN_DARK_RED_AREA and area >= frac_thresh:
            ys, xs = np.where(lbls == lbl)
            if ys.size == 0:
                continue
            y_top = ys.min()
            x_mid = int(xs[ys == y_top].mean())
            tris.append((int(x_mid), int(y_top)))

    if not tris:
        return [], None

    best = min(tris, key=lambda xy: xy[1])
    return tris, best

# ---- Triangle classification by sampling masks N px above tip (no resizes) ----
def classify_triangles_at_sample(tri_positions, masks_np, classes_np, frame_H, frame_W, sample_up=SAMPLE_UP_PX):
    if masks_np is None or classes_np is None or len(tri_positions) == 0:
        return []

    mh, mw = masks_np.shape[1], masks_np.shape[2]
    sx = (mw - 1) / max(1, (frame_W - 1))
    sy = (mh - 1) / max(1, (frame_H - 1))

    colours = []
    for (x, y) in tri_positions:
        ys = max(0, y - sample_up)
        mx = int(round(x * sx))
        my = int(round(ys * sy))
        if mx < 0: mx = 0
        elif mx >= mw: mx = mw - 1
        if my < 0: my = 0
        elif my >= mh: my = mh - 1

        cls_here = None
        for m, c in zip(masks_np, classes_np):
            if m[my, mx] > 0.5:
                cls_here = int(c)
                break

        if (cls_here is None) or (cls_here in SAFE_GREEN):
            colours.append(COLOR_GREEN)
        elif cls_here == 0:
            colours.append(COLOR_PINK)
        elif cls_here in WARN_YELLOW:
            colours.append(COLOR_YELLOW)
        else:
            colours.append(COLOR_RED)

    return colours

# --- colour for any frame-point via masks (no resizes) ---
def _colour_for_point(x, y, masks_np, classes_np, frame_H, frame_W):
    if masks_np is None or classes_np is None or masks_np.size == 0:
        return COLOR_GREEN
    mh, mw = masks_np.shape[1], masks_np.shape[2]
    sx = (mw - 1) / max(1, (frame_W - 1))
    sy = (mh - 1) / max(1, (frame_H - 1))
    mx = int(round(x * sx))
    my = int(round(y * sy))
    if mx < 0: mx = 0
    elif mx >= mw: mx = mw - 1
    if my < 0: my = 0
    elif my >= mh: my = mh - 1

    cls_here = None
    for m, c in zip(masks_np, classes_np):
        if m[my, mx] > 0.5:
            cls_here = int(c)
            break

    if (cls_here is None) or (cls_here in SAFE_GREEN):
        return COLOR_GREEN
    if cls_here == 0:
        return COLOR_PINK
    if cls_here in WARN_YELLOW:
        return COLOR_YELLOW
    return COLOR_RED

# Returns: (tri_best_xy, tri_count, mask_count, to_cpu_ms, post_ms, masks_np, classes_np, rail_mask, green, tri_positions, tri_colours)
def process_frame_post(frame_bgr, yolo_res):
    H, W = frame_bgr.shape[:2]
    if yolo_res.masks is None:
        return None, 0, 0, 0.0, 0.0, None, None, None, None, [], []

    t0_to_cpu = time.perf_counter()
    masks_np = yolo_res.masks.data.cpu().numpy()  # [n,h,w]
    mask_count = int(masks_np.shape[0])
    if hasattr(yolo_res.masks, "cls") and yolo_res.masks.cls is not None:
        classes_np = yolo_res.masks.cls.cpu().numpy().astype(int)
    else:
        classes_np = yolo_res.boxes.cls.cpu().numpy().astype(int)
    t1_to_cpu = time.perf_counter()
    to_cpu_ms = (t1_to_cpu - t0_to_cpu) * 1000.0

    if mask_count == 0 or classes_np.size == 0:
        return None, 0, mask_count, to_cpu_ms, 0.0, masks_np, classes_np, None, None, [], []

    rail_sel = (classes_np == RAIL_ID)
    if not np.any(rail_sel):
        return None, 0, mask_count, to_cpu_ms, 0.0, masks_np, classes_np, None, None, [], []

    t0_post = time.perf_counter()
    rail_masks = masks_np[rail_sel].astype(bool)        # [k,h,w]
    union = np.any(rail_masks, axis=0).astype(np.uint8) # [h,w]
    rail_mask = cv2.resize(union, (W, H), interpolation=cv2.INTER_NEAREST).astype(bool)

    green = highlight_rails_mask_only_fast(frame_bgr, rail_mask)
    red   = np.logical_and(rail_mask, np.logical_not(green))
    score = red_vs_green_score(red, green)
    tri_positions, tri_best = purple_triangles(score, H)

    # classify triangles by sampling masks above tip
    tri_colours = classify_triangles_at_sample(tri_positions, masks_np, classes_np, H, W, SAMPLE_UP_PX)

    t1_post = time.perf_counter()
    post_ms = (t1_post - t0_post) * 1000.0

    return tri_best, len(tri_positions), mask_count, to_cpu_ms, post_ms, masks_np, classes_np, rail_mask, green, tri_positions, tri_colours

# --- rendering (excluded from timing) ---
def draw_triangle(img, x, y, size=TRI_SIZE_PX, colour=COLOR_RED):
    h = int(size * 1.2)
    pts = np.array([[x, y], [x-size, y+h], [x+size, y+h]], np.int32)
    cv2.fillConvexPoly(img, pts, colour)
    cv2.polylines(img, [pts.reshape(-1,1,2)], True, (0,0,0), 1, cv2.LINE_AA)

def render_overlays(frame_bgr, masks_np, classes_np, rail_mask, green_mask, tri_positions, tri_colours):
    """Draw all masks (class color) + labels, rail tint/green, coloured triangles, scout lines,
       and NEW: straight lines from each triangle to Jake's middle-lane point."""
    out = frame_bgr.copy()
    H, W = out.shape[:2]
    alpha = 0.45

    if masks_np is not None and classes_np is not None and masks_np.size:
        for m, c in zip(masks_np, classes_np):
            m_full = m
            if m.shape != (H, W):
                m_full = cv2.resize(m.astype(np.uint8), (W, H), interpolation=cv2.INTER_NEAREST).astype(bool)
            color = CLASS_COLOURS.get(int(c), (255,255,255))
            out[m_full] = (np.array(color, dtype=np.uint8) * alpha + out[m_full] * (1 - alpha)).astype(np.uint8)
            ys, xs = np.where(m_full)
            if xs.size:
                xc, yc = int(xs.mean()), int(ys.mean())
                label = LABELS.get(int(c), f"C{int(c)}")
                cv2.putText(out, label, (max(5, xc-40), max(20, yc)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0), 2, cv2.LINE_AA)
                cv2.putText(out, label, (max(5, xc-40), max(20, yc)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1, cv2.LINE_AA)

    if rail_mask is not None:
        tint = out.copy()
        tint[rail_mask] = (0, 0, 255)  # red tint for rails
        out = cv2.addWeighted(tint, 0.30, out, 0.70, 0)
    if green_mask is not None:
        out[green_mask] = (0, 255, 0)

    # Scout lines (render-only): tiny, cheap
    if tri_positions:
        for (x, y) in tri_positions:
            y_end = max(0, y - SAMPLE_UP_PX)
            for yy in range(y, y_end - 1, -1):
                col = _colour_for_point(x, yy, masks_np, classes_np, H, W)
                out[yy, x] = col

    # NEW: starburst lines from each triangle to Jake's middle-lane point
    xj, yj = JAKE_POINT
    for (xt, yt) in tri_positions:
        # clamp target to frame bounds just in case
        xt = max(0, min(W-1, int(xt)))
        yt = max(0, min(H-1, int(yt)))

        dx = xt - xj
        dy = yt - yj

        # Degrees FROM VERTICAL (0° = perfectly vertical line)
        deg_from_vertical = 90.0 if dy == 0 else math.degrees(math.atan2(abs(dx), abs(dy)))

        # Draw the line
        cv2.line(out, (xj, yj), (xt, yt), COLOR_WHITE, 2, cv2.LINE_AA)

        # Label near the midpoint of the line
        mid_x = int((xj + xt) / 2)
        mid_y = int((yj + yt) / 2)
        label = f"{deg_from_vertical:.1f}°"

        # outlined text for readability
        cv2.putText(out, label, (mid_x, mid_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0,0,0), 2, cv2.LINE_AA)
        cv2.putText(out, label, (mid_x, mid_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255,255,255), 1, cv2.LINE_AA)


    # Triangles (after lines so tips stay visible)
    for (x, y), col in zip(tri_positions, tri_colours):
        draw_triangle(out, x, y, colour=col)
       

    return out

# =======================
# Batched execution with prints; overlays saved for first N
# =======================
def run_pipeline_with_prints_and_overlays():
    paths = (
        glob.glob(str(frames_dir/"frame_*.jpg")) +
        glob.glob(str(frames_dir/"frame_*.png")) +
        glob.glob(str(frames_dir/"*.jpg")) +
        glob.glob(str(frames_dir/"*.png"))
    )
    paths = sorted(set(paths))
    if not paths:
        raise FileNotFoundError(f"No images in: {frames_dir}")
    if SHOW_FIRST_N is not None:
        paths = paths[:SHOW_FIRST_N]

    N = len(paths)
    results_triangle_xy = [None] * N

    def load_batch(batch_paths):
        imgs = [None] * len(batch_paths)
        read_ms = [0.0] * len(batch_paths)
        with ThreadPoolExecutor(max_workers=THREADS_IO) as ex:
            fut2idx = {ex.submit(load_image_with_time, p): i for i, p in enumerate(batch_paths)}
            for fut in as_completed(fut2idx):
                i = fut2idx[fut]
                img, r_ms = fut.result()
                imgs[i] = img
                read_ms[i] = r_ms
        ok = [(p, im, rm) for p, im, rm in zip(batch_paths, imgs, read_ms) if im is not None]
        if not ok:
            return [], [], []
        b_paths, b_imgs, b_read = zip(*ok)
        return list(b_paths), list(b_imgs), list(b_read)

    idx_global = 0
    for batch_paths in chunked(paths, BATCH):
        batch_paths, imgs_bgr, read_ms_list = load_batch(batch_paths)
        B = len(imgs_bgr)
        if B == 0:
            idx_global += len(batch_paths)
            continue

        t0_inf = time.perf_counter()
        res_list = model.predict(
            imgs_bgr, task="segment", imgsz=IMG_SIZE, device=device,
            conf=CONF, iou=IOU, verbose=False, half=half, max_det=MAX_DET,
            batch=B
        )
        try:
            if device == 0 and torch.cuda.is_available():
                torch.cuda.synchronize()
            elif device == "mps" and getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
                torch.mps.synchronize()
        except Exception:
            pass
        t1_inf = time.perf_counter()
        infer_ms_share = ((t1_inf - t0_inf) * 1000.0) / B

        for j, (img, yres, read_ms) in enumerate(zip(imgs_bgr, res_list, read_ms_list)):
            (tri_best_xy, tri_count, mask_count, to_cpu_ms, post_ms,
             masks_np, classes_np, rail_mask, green_mask, tri_positions, tri_colours) = process_frame_post(img, yres)

            results_triangle_xy[idx_global + j] = tri_best_xy
            proc_ms = infer_ms_share + to_cpu_ms + post_ms
            fname = os.path.basename(batch_paths[j])
            frame_idx = idx_global + j + 1

            print(f"[{frame_idx}/{N}] {fname}  "
                  f"read {read_ms:.1f} | infer {infer_ms_share:.1f} | "
                  f"to_cpu {to_cpu_ms:.1f} | post {post_ms:.1f} | "
                  f"masks {mask_count} | triangles {tri_count} "
                  f"=> proc {proc_ms:.1f} ms")

            # --- RENDERING (EXCLUDED from timing) ---
            if frame_idx <= RENDER_FIRST_N:
                overlay = render_overlays(img, masks_np, classes_np, rail_mask, green_mask, tri_positions, tri_colours)
                out_path = out_dir / f"overlay_{frame_idx:04d}_{fname}"
                cv2.imwrite(str(out_path), overlay)

        idx_global += B

    return results_triangle_xy

# =======================
# Entry
# =======================
if __name__ == "__main__":
    _ = run_pipeline_with_prints_and_overlays()


YOLO11n-seg summary (fused): 113 layers, 2,836,908 parameters, 0 gradients, 10.2 GFLOPs
[1/144] frame_00000.png  read 43.7 | infer 101.9 | to_cpu 5.5 | post 53.8 | masks 2 | triangles 1 => proc 161.3 ms
[2/144] frame_00001.png  read 37.1 | infer 42.8 | to_cpu 1.2 | post 75.1 | masks 3 | triangles 1 => proc 119.1 ms
[3/144] frame_00002.png  read 36.0 | infer 45.8 | to_cpu 0.8 | post 95.8 | masks 3 | triangles 1 => proc 142.4 ms
[4/144] frame_00003.png  read 39.4 | infer 51.2 | to_cpu 1.2 | post 104.9 | masks 2 | triangles 1 => proc 157.3 ms
[5/144] frame_00004.png  read 61.8 | infer 87.9 | to_cpu 0.8 | post 169.5 | masks 5 | triangles 1 => proc 258.3 ms
[6/144] frame_00005.png  read 37.4 | infer 43.4 | to_cpu 0.9 | post 166.9 | masks 5 | triangles 1 => proc 211.2 ms
[7/144] frame_00006.png  read 40.9 | infer 55.0 | to_cpu 0.9 | post 160.1 | masks 6 | triangles 2 => proc 215.9 ms
[8/144] frame_00007.png  read 39.7 | infer 37.7 | to_cpu 1.0 | post 311.2 | masks 4 | triangles 2 => proc 349

KeyboardInterrupt: 

In [23]:
#Jakes triangle finder final

In [28]:
#!/usr/bin/env python3
# Ultra-fast, batched pipeline with threaded I/O + per-frame timing & counts
# Renders ALL masks + triangles coloured by the mask sampled N px above the tip
# + Scout lines (render only)
# + Starburst lines from each triangle tip to Jake's lane point
# + NEW: Lane-bearing selector: choose triangle whose signed degrees-from-vertical
#       is closest to the lane's target (left=+10.7°, mid=+1.5°, right=-15.0°)

import os, glob, sys, time, math
import cv2, torch, numpy as np
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from ultralytics import YOLO

# =======================
# Config
# =======================
home       = os.path.expanduser("~")
weights    = f"{home}/models/jakes-loped/jakes-finder-mk1/1/weights.pt"
frames_dir = Path(home) / "Documents" / "GitHub" / "Ai-plays-SubwaySurfers" / "frames"

# SAVE HERE
out_dir    = Path(home) / "Documents" / "GitHub" / "Ai-plays-SubwaySurfers" / "out_overlays4"
out_dir.mkdir(parents=True, exist_ok=True)

RAIL_ID    = 9
IMG_SIZE   = 512
CONF, IOU  = 0.30, 0.45
MAX_DET    = 30

# Color/region filter
TARGET_COLORS_RGB  = [(119,104,67), (81,42,45)]
TOLERANCE          = 20.0
MIN_REGION_SIZE    = 30
MIN_REGION_HEIGHT  = 150

# Heat/triangle
HEAT_BLUR_KSIZE     = 51
RED_SCORE_THRESH    = 220
EXCLUDE_TOP_FRAC    = 0.40
EXCLUDE_BOTTOM_FRAC = 0.15
MIN_DARK_RED_AREA   = 1200
MIN_DARK_FRACTION   = 0.15
TRI_SIZE_PX         = 18

# Triangle mask scan distance (N pixels above tip)
SAMPLE_UP_PX        = 65

# Colours (BGR)
COLOR_GREEN  = (0, 255, 0)
COLOR_PINK   = (203, 192, 255)
COLOR_YELLOW = (0, 255, 255)
COLOR_RED    = (0, 0, 255)
COLOR_WHITE  = (255, 255, 255)
COLOR_CYAN   = (255, 255, 0)
COLOR_BLACK  = (0, 0, 0)

# Runtime
BATCH               = 1
THREADS_IO          = max(2, (os.cpu_count() or 4) // 2)
SHOW_FIRST_N        = None   # None → all frames
RENDER_FIRST_N      = 150    # render overlays for first N frames only

# =======================
# Jake lane points (you can switch which one by changing JAKE_POINT)
# =======================
LANE_LEFT   = (300, 1340)
LANE_MID    = (490, 1340)
LANE_RIGHT  = (680, 1340)
JAKE_POINT  = LANE_RIGHT  # pick one: LANE_LEFT / LANE_MID / LANE_RIGHT

# Lane target bearings (degrees from vertical; left=positive, right=negative)
LANE_TARGET_DEG = {
    "left":  -10.7,
    "mid":   +1.5,
    "right": +15.0,
}

def lane_name_from_point(p):
    if p == LANE_LEFT:  return "left"
    if p == LANE_MID:   return "mid"
    if p == LANE_RIGHT: return "right"
    # default to mid if unmatched
    return "mid"

# =======================
# System/Backends
# =======================
cv2.setUseOptimized(True)
try: cv2.setNumThreads(max(1, (os.cpu_count() or 1) - 1))
except Exception: pass

if torch.cuda.is_available():
    device, half = 0, True
    torch.backends.cudnn.benchmark = True
    try: torch.set_float32_matmul_precision('high')
    except Exception: pass
elif getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
    device, half = "mps", False
else:
    device, half = "cpu", False

# =======================
# Model
# =======================
model = YOLO(weights)
try: model.fuse()
except Exception: pass

# Warmup
_dummy = np.zeros((IMG_SIZE, IMG_SIZE, 3), np.uint8)
_ = model.predict(_dummy, task="segment", imgsz=IMG_SIZE, device=device,
                  conf=CONF, iou=IOU, verbose=False, half=half, max_det=MAX_DET)

# =======================
# Precomputed
# =======================
TARGETS_BGR_F32 = np.array([(r,g,b)[::-1] for (r,g,b) in TARGET_COLORS_RGB], dtype=np.float32)
TOL2            = TOLERANCE * TOLERANCE

CLASS_COLOURS = {
    0:(255,255,0),1:(192,192,192),2:(0,128,255),3:(0,255,0),
    4:(255,0,255),5:(0,255,255),6:(255,128,0),7:(128,0,255),
    8:(0,0,128),9:(0,0,255),10:(128,128,0),11:(255,255,102)
}
LABELS = {
    0:"BOOTS",1:"GREYTRAIN",2:"HIGHBARRIER1",3:"JUMP",4:"LOWBARRIER1",
    5:"LOWBARRIER2",6:"ORANGETRAIN",7:"PILLAR",8:"RAMP",9:"RAILS",
    10:"SIDEWALK",11:"YELLOWTRAIN"
}

SAFE_GREEN = {9, 10}
WARN_YELLOW = {2,3,4,5,8}

# =======================
# Helpers
# =======================
def load_image_with_time(path: str):
    t0 = time.perf_counter()
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    t1 = time.perf_counter()
    return img, (t1 - t0) * 1000.0

def chunked(iterable, n):
    for i in range(0, len(iterable), n):
        yield iterable[i:i+n]

def highlight_rails_mask_only_fast(img_bgr, rail_mask):
    H, W = img_bgr.shape[:2]
    if not rail_mask.any():
        return np.zeros((H, W), dtype=bool)

    ys, xs = np.where(rail_mask)
    y0, y1 = ys.min(), ys.max()+1
    x0, x1 = xs.min(), xs.max()+1

    img_roi  = img_bgr[y0:y1, x0:x1]
    mask_roi = rail_mask[y0:y1, x0:x1]

    img_f = img_roi.astype(np.float32)
    diff  = img_f[:, :, None, :] - TARGETS_BGR_F32[None, None, :, :]
    dist2 = np.sum(diff * diff, axis=-1)
    colour_hit = np.any(dist2 <= TOL2, axis=-1)

    combined = np.logical_and(colour_hit, mask_roi)
    comp = combined.astype(np.uint8)
    n, lbls, stats, _ = cv2.connectedComponentsWithStats(comp, 8)
    if n <= 1:
        return np.zeros((H, W), dtype=bool)

    good = np.zeros_like(combined)
    areas  = stats[1:, cv2.CC_STAT_AREA]
    hs     = stats[1:, cv2.CC_STAT_HEIGHT]
    keep   = np.where((areas >= MIN_REGION_SIZE) & (hs >= MIN_REGION_HEIGHT))[0] + 1
    for k in keep:
        good[lbls == k] = True

    full = np.zeros((H, W), dtype=bool)
    full[y0:y1, x0:x1] = good
    return full

def red_vs_green_score(red_mask, green_mask):
    k = (HEAT_BLUR_KSIZE, HEAT_BLUR_KSIZE)
    r = cv2.blur(red_mask.astype(np.float32), k)
    g = cv2.blur(green_mask.astype(np.float32), k)
    diff = r - g
    amax = float(np.max(np.abs(diff))) + 1e-6
    norm = (diff / (2.0 * amax) + 0.5)
    return np.clip(norm * 255.0, 0, 255.0).astype(np.uint8)

def purple_triangles(score, H):
    top_ex = int(H * EXCLUDE_TOP_FRAC)
    bot_ex = int(H * EXCLUDE_BOTTOM_FRAC)

    dark = (score >= RED_SCORE_THRESH).astype(np.uint8)
    if top_ex: dark[:top_ex, :] = 0
    if bot_ex: dark[-bot_ex:, :] = 0

    dark = cv2.morphologyEx(
        dark, cv2.MORPH_OPEN,
        cv2.getStructuringElement(cv2.MORPH_RECT, (5, 9)),
        iterations=1
    )
    total_dark = int(dark.sum())
    if total_dark == 0:
        return [], None

    frac_thresh = int(np.ceil(MIN_DARK_FRACTION * total_dark))
    n_lbl, lbls, stats, _ = cv2.connectedComponentsWithStats(dark, 8)
    if n_lbl <= 1:
        return [], None

    tris = []
    for lbl in range(1, n_lbl):
        area = stats[lbl, cv2.CC_STAT_AREA]
        if area >= MIN_DARK_RED_AREA and area >= frac_thresh:
            ys, xs = np.where(lbls == lbl)
            if ys.size == 0:
                continue
            y_top = ys.min()
            x_mid = int(xs[ys == y_top].mean())
            tris.append((int(x_mid), int(y_top)))

    if not tris:
        return [], None

    best = min(tris, key=lambda xy: xy[1])
    return tris, best

def classify_triangles_at_sample(tri_positions, masks_np, classes_np, frame_H, frame_W, sample_up=SAMPLE_UP_PX):
    if masks_np is None or classes_np is None or len(tri_positions) == 0:
        return []
    mh, mw = masks_np.shape[1], masks_np.shape[2]
    sx = (mw - 1) / max(1, (frame_W - 1))
    sy = (mh - 1) / max(1, (frame_H - 1))
    colours = []
    for (x, y) in tri_positions:
        ys = max(0, y - sample_up)
        mx = int(round(x * sx)); my = int(round(ys * sy))
        if mx < 0: mx = 0
        elif mx >= mw: mx = mw - 1
        if my < 0: my = 0
        elif my >= mh: my = mh - 1
        cls_here = None
        for m, c in zip(masks_np, classes_np):
            if m[my, mx] > 0.5:
                cls_here = int(c); break
        if (cls_here is None) or (cls_here in SAFE_GREEN):
            colours.append(COLOR_GREEN)
        elif cls_here == 0:
            colours.append(COLOR_PINK)
        elif cls_here in WARN_YELLOW:
            colours.append(COLOR_YELLOW)
        else:
            colours.append(COLOR_RED)
    return colours

def _colour_for_point(x, y, masks_np, classes_np, frame_H, frame_W):
    if masks_np is None or classes_np is None or masks_np.size == 0:
        return COLOR_GREEN
    mh, mw = masks_np.shape[1], masks_np.shape[2]
    sx = (mw - 1) / max(1, (frame_W - 1))
    sy = (mh - 1) / max(1, (frame_H - 1))
    mx = int(round(x * sx)); my = int(round(y * sy))
    if mx < 0: mx = 0
    elif mx >= mw: mx = mw - 1
    if my < 0: my = 0
    elif my >= mh: my = mh - 1
    cls_here = None
    for m, c in zip(masks_np, classes_np):
        if m[my, mx] > 0.5:
            cls_here = int(c); break
    if (cls_here is None) or (cls_here in SAFE_GREEN): return COLOR_GREEN
    if cls_here == 0: return COLOR_PINK
    if cls_here in WARN_YELLOW: return COLOR_YELLOW
    return COLOR_RED

def draw_triangle(img, x, y, size=TRI_SIZE_PX, colour=COLOR_RED):
    h = int(size * 1.2)
    pts = np.array([[x, y], [x-size, y+h], [x+size, y+h]], np.int32)
    cv2.fillConvexPoly(img, pts, colour)
    cv2.polylines(img, [pts.reshape(-1,1,2)], True, COLOR_BLACK, 1, cv2.LINE_AA)

def triangle_pts(x, y, size=TRI_SIZE_PX):
    h = int(size * 1.2)
    return np.array([[x, y], [x-size, y+h], [x+size, y+h]], np.int32)

# ===== Bearing-based Jake triangle selection =====
def signed_degrees_from_vertical(dx, dy):
    """
    Signed angle (degrees) from vertical (upwards).
    Conventions:
      - left of Jake (dx < 0) => +degrees
      - right of Jake (dx > 0) => -degrees
      - straight ahead (vertical) => ~0°
    Implementation: angle = -atan2(dx, -dy)  (since -dy>0 for triangles above)
    """
    if dx == 0 and dy == 0:
        return 0.0
    return -math.degrees(math.atan2(dx, -dy))

def select_triangle_by_bearing(tri_positions, jx, jy, target_deg, min_dy=6):
    """
    Pick the triangle ahead of Jake (yt < jy - min_dy) whose signed degrees-from-vertical
    is closest to the lane target.
    Returns (best_index, best_deg, best_err) or (-1, None, None) if none ahead.
    """
    best_i, best_deg, best_err = -1, None, None
    for i, (xt, yt) in enumerate(tri_positions):
        dx = xt - jx
        dy = yt - jy
        if dy >= -min_dy:  # requires triangle be above Jake by at least min_dy px
            continue
        deg = signed_degrees_from_vertical(dx, dy)
        err = abs(deg - target_deg)
        if (best_err is None) or (err < best_err):
            best_i, best_deg, best_err = i, deg, err
    return best_i, best_deg, best_err

# --- rendering (excluded from timing) ---
def render_overlays(frame_bgr, masks_np, classes_np, rail_mask, green_mask, tri_positions, tri_colours):
    """Draw all masks (class color) + labels, rail tint/green, coloured triangles,
       scout lines, and starburst lines to Jake's lane point. Highlight the triangle
       whose bearing is closest to the lane target."""
    out = frame_bgr.copy()
    H, W = out.shape[:2]
    alpha = 0.45

    # masks
    if masks_np is not None and classes_np is not None and masks_np.size:
        for m, c in zip(masks_np, classes_np):
            m_full = m
            if m.shape != (H, W):
                m_full = cv2.resize(m.astype(np.uint8), (W, H), interpolation=cv2.INTER_NEAREST).astype(bool)
            color = CLASS_COLOURS.get(int(c), (255,255,255))
            out[m_full] = (np.array(color, dtype=np.uint8) * alpha + out[m_full] * (1 - alpha)).astype(np.uint8)
            ys, xs = np.where(m_full)
            if xs.size:
                xc, yc = int(xs.mean()), int(ys.mean())
                label = LABELS.get(int(c), f"C{int(c)}")
                cv2.putText(out, label, (max(5, xc-40), max(20, yc)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, COLOR_BLACK, 2, cv2.LINE_AA)
                cv2.putText(out, label, (max(5, xc-40), max(20, yc)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1, cv2.LINE_AA)

    if rail_mask is not None:
        tint = out.copy()
        tint[rail_mask] = (0, 0, 255)
        out = cv2.addWeighted(tint, 0.30, out, 0.70, 0)
    if green_mask is not None:
        out[green_mask] = (0, 255, 0)

    # Scout lines
    if tri_positions:
        for (x, y) in tri_positions:
            y_end = max(0, y - SAMPLE_UP_PX)
            for yy in range(y, y_end - 1, -1):
                col = _colour_for_point(x, yy, masks_np, classes_np, H, W)
                out[yy, x] = col

    # Prepare lane target
    lane_name = lane_name_from_point(JAKE_POINT)
    target_deg = LANE_TARGET_DEG[lane_name]

    # Select best triangle by bearing
    xj, yj = JAKE_POINT
    best_idx, best_deg, _ = select_triangle_by_bearing(tri_positions, xj, yj, target_deg, min_dy=6)

    # Starburst lines + degree labels; highlight best
    for idx, (xt, yt) in enumerate(tri_positions):
        xt = max(0, min(W-1, int(xt)))
        yt = max(0, min(H-1, int(yt)))

        dx = xt - xj
        dy = yt - yj
        # degrees from vertical (signed as defined)
        deg_signed = signed_degrees_from_vertical(dx, dy)
        label = f"{deg_signed:.1f}°"

        line_color = COLOR_CYAN if idx == best_idx else COLOR_WHITE
        cv2.line(out, (xj, yj), (xt, yt), line_color, 2, cv2.LINE_AA)

        mid_x = int((xj + xt) / 2)
        mid_y = int((yj + yt) / 2)
        cv2.putText(out, label, (mid_x, mid_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, COLOR_BLACK, 2, cv2.LINE_AA)
        cv2.putText(out, label, (mid_x, mid_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255,255,255), 1, cv2.LINE_AA)

    # Triangles (draw after lines)
    for (x, y), col in zip(tri_positions, tri_colours):
        draw_triangle(out, x, y, colour=col)

    # Emphasize selected triangle + tag
    if best_idx is not None and 0 <= best_idx < len(tri_positions):
        xt, yt = tri_positions[best_idx]
        pts = triangle_pts(int(xt), int(yt), size=TRI_SIZE_PX)
        cv2.polylines(out, [pts.reshape(-1,1,2)], True, COLOR_CYAN, 3, cv2.LINE_AA)
        tag = f"JAKE_TRI ({lane_name}: target {target_deg:.1f}°)"
        cv2.putText(out, tag, (max(5, int(xt)-70), max(20, int(yt)-16)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, COLOR_BLACK, 2, cv2.LINE_AA)
        cv2.putText(out, tag, (max(5, int(xt)-70), max(20, int(yt)-16)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255,255,255), 1, cv2.LINE_AA)

    return out

# =======================
# Batched execution with prints; overlays saved for first N
# =======================
def run_pipeline_with_prints_and_overlays():
    paths = (
        glob.glob(str(frames_dir/"frame_*.jpg")) +
        glob.glob(str(frames_dir/"frame_*.png")) +
        glob.glob(str(frames_dir/"*.jpg")) +
        glob.glob(str(frames_dir/"*.png"))
    )
    paths = sorted(set(paths))
    if not paths:
        raise FileNotFoundError(f"No images in: {frames_dir}")
    if SHOW_FIRST_N is not None:
        paths = paths[:SHOW_FIRST_N]

    N = len(paths)
    results_triangle_xy = [None] * N

    def load_batch(batch_paths):
        imgs = [None] * len(batch_paths)
        read_ms = [0.0] * len(batch_paths)
        with ThreadPoolExecutor(max_workers=THREADS_IO) as ex:
            fut2idx = {ex.submit(load_image_with_time, p): i for i, p in enumerate(batch_paths)}
            for fut in as_completed(fut2idx):
                i = fut2idx[fut]
                img, r_ms = fut.result()
                imgs[i] = img
                read_ms[i] = r_ms
        ok = [(p, im, rm) for p, im, rm in zip(batch_paths, imgs, read_ms) if im is not None]
        if not ok:
            return [], [], []
        b_paths, b_imgs, b_read = zip(*ok)
        return list(b_paths), list(b_imgs), list(b_read)

    idx_global = 0
    for batch_paths in chunked(paths, BATCH):
        batch_paths, imgs_bgr, read_ms_list = load_batch(batch_paths)
        B = len(imgs_bgr)
        if B == 0:
            idx_global += len(batch_paths)
            continue

        t0_inf = time.perf_counter()
        res_list = model.predict(
            imgs_bgr, task="segment", imgsz=IMG_SIZE, device=device,
            conf=CONF, iou=IOU, verbose=False, half=half, max_det=MAX_DET,
            batch=B
        )
        try:
            if device == 0 and torch.cuda.is_available():
                torch.cuda.synchronize()
            elif device == "mps" and getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
                torch.mps.synchronize()
        except Exception:
            pass
        t1_inf = time.perf_counter()
        infer_ms_share = ((t1_inf - t0_inf) * 1000.0) / B

        for j, (img, yres, read_ms) in enumerate(zip(imgs_bgr, res_list, read_ms_list)):
            (tri_best_xy, tri_count, mask_count, to_cpu_ms, post_ms,
             masks_np, classes_np, rail_mask, green_mask, tri_positions, tri_colours) = process_frame_post(img, yres)

            results_triangle_xy[idx_global + j] = tri_best_xy
            proc_ms = infer_ms_share + to_cpu_ms + post_ms
            fname = os.path.basename(batch_paths[j])
            frame_idx = idx_global + j + 1

            print(f"[{frame_idx}/{N}] {fname}  "
                  f"read {read_ms:.1f} | infer {infer_ms_share:.1f} | "
                  f"to_cpu {to_cpu_ms:.1f} | post {post_ms:.1f} | "
                  f"masks {mask_count} | triangles {tri_count} "
                  f"=> proc {proc_ms:.1f} ms")

            if frame_idx <= RENDER_FIRST_N:
                overlay = render_overlays(img, masks_np, classes_np, rail_mask, green_mask, tri_positions, tri_colours)
                out_path = out_dir / f"overlay_{frame_idx:04d}_{fname}"
                cv2.imwrite(str(out_path), overlay)

        idx_global += B

    return results_triangle_xy

# =======================
# Entry
# =======================
if __name__ == "__main__":
    _ = run_pipeline_with_prints_and_overlays()


YOLO11n-seg summary (fused): 113 layers, 2,836,908 parameters, 0 gradients, 10.2 GFLOPs
[1/144] frame_00000.png  read 38.0 | infer 97.0 | to_cpu 0.8 | post 63.5 | masks 2 | triangles 1 => proc 161.3 ms
[2/144] frame_00001.png  read 37.0 | infer 38.9 | to_cpu 0.9 | post 91.6 | masks 3 | triangles 1 => proc 131.4 ms
[3/144] frame_00002.png  read 36.0 | infer 42.3 | to_cpu 0.7 | post 147.6 | masks 3 | triangles 1 => proc 190.6 ms
[4/144] frame_00003.png  read 82.6 | infer 43.9 | to_cpu 0.7 | post 102.4 | masks 2 | triangles 1 => proc 147.0 ms
[5/144] frame_00004.png  read 52.6 | infer 45.5 | to_cpu 0.9 | post 165.1 | masks 5 | triangles 1 => proc 211.5 ms
[6/144] frame_00005.png  read 41.3 | infer 40.0 | to_cpu 0.8 | post 156.1 | masks 5 | triangles 1 => proc 196.8 ms
[7/144] frame_00006.png  read 43.9 | infer 323.7 | to_cpu 1.5 | post 172.1 | masks 6 | triangles 2 => proc 497.2 ms
[8/144] frame_00007.png  read 37.3 | infer 44.7 | to_cpu 0.8 | post 179.9 | masks 4 | triangles 2 => proc 22