In [None]:
import os
import math
import time
from pathlib import Path
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional

import cv2
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import torch
from ultralytics import YOLO

# You are running from:  <root>/notebooks/
ROOT = Path("").resolve()

DATA_DIR   = ROOT / "data"
VIDEOS_DIR = DATA_DIR / "videos_compressed"

# IMPORTANT: use your 48-class weights
YOLO_WEIGHTS = ROOT / "best.pt"   # <-- your 48-class model

OUT_DIR = ROOT / "outputs" / "notebook5"
TIMELINE_DIR = OUT_DIR / "timelines"
OUT_DIR.mkdir(parents=True, exist_ok=True)
TIMELINE_DIR.mkdir(parents=True, exist_ok=True)

print("ROOT:", ROOT)
print("DATA_DIR exists:", DATA_DIR.exists(), DATA_DIR)
print("VIDEOS_DIR exists:", VIDEOS_DIR.exists(), VIDEOS_DIR)
print("YOLO_WEIGHTS exists:", YOLO_WEIGHTS.exists(), YOLO_WEIGHTS)

DEVICE0 = "cuda:0" if torch.cuda.is_available() else "cpu"
DEVICE1 = "cuda:1" if (torch.cuda.is_available() and torch.cuda.device_count() > 1) else None
print("CUDA available:", torch.cuda.is_available(), "num_gpus:", torch.cuda.device_count())
print("DEVICE0:", DEVICE0, "DEVICE1:", DEVICE1)

videos = sorted(VIDEOS_DIR.glob("*.mp4"))
print("Found videos:", len(videos))
print("Example:", videos[0] if videos else None)

ROOT: /data/wesleyferreiramaia/workzone
DATA_DIR exists: True /data/wesleyferreiramaia/workzone/data
VIDEOS_DIR exists: True /data/wesleyferreiramaia/workzone/data/videos_compressed
YOLO_WEIGHTS exists: True /data/wesleyferreiramaia/workzone/best.pt
CUDA available: True num_gpus: 1
DEVICE0: cuda:0 DEVICE1: None
Found videos: 406
Example: /data/wesleyferreiramaia/workzone/data/videos_compressed/boston_042e1caf93114d3286c11ba14ddaa759_000001_02790_snippet.mp4


In [None]:
# Class groups (name-based)

def normalize_name(s: str) -> str:
    return str(s).strip().lower()

def build_groups_from_model(model: YOLO) -> Dict[str, List[int]]:
    names = model.names  # dict: id -> name
    id_to_name = {i: normalize_name(n) for i, n in names.items()}

    def ids_matching(substrs: List[str]) -> List[int]:
        out = []
        for i, n in id_to_name.items():
            for ss in substrs:
                if ss in n:
                    out.append(i)
                    break
        return sorted(set(out))

    groups = {
        "channelization": ids_matching(["cone", "drum", "barricade", "barrier", "vertical panel", "tubular marker", "fence"]),
        "workers": ids_matching(["worker", "police officer", "flagger"]),
        "vehicles": ids_matching(["work vehicle", "police vehicle"]),
        "ttc": ids_matching(["temporary traffic control sign"]),
        "msg": ids_matching(["message board", "arrow board"]),
    }
    return groups

print("Ready.")


Ready.


In [None]:
# Scoring config

@dataclass
class ScoreConfig:
    # video sampling
    stride: int = 3              # process every N frames
    imgsz: int = 960
    conf: float = 0.25
    iou: float = 0.7

    # EMA smoothing
    ema_alpha: float = 0.20      # higher = less lag, more jitter; lower = smoother
    # Hysteresis thresholds on EMA score
    enter_th: float = 0.55
    exit_th: float = 0.45
    # Require k consecutive frames above/below to toggle
    k_enter: int = 6
    k_exit: int = 10

    # Core weights (you will tune after summary stats)
    w_channel: float = 0.35
    w_workers: float = 0.35
    w_vehicles: float = 0.15
    w_ttc: float = 0.40
    w_msg: float = 0.40

    # NEW: proximity proxies (reduce “far cone” false alarms)
    # bottom_half_count: objects whose bbox center is in bottom 50% of frame
    w_bottom: float = 0.45
    # near_proxy: large bbox area fraction (rough depth)
    w_near: float = 0.35

    # Bias (shifts score down so you need persistent evidence)
    bias: float = -0.65

CFG = ScoreConfig()
print(CFG)


ScoreConfig(stride=3, imgsz=960, conf=0.25, iou=0.7, ema_alpha=0.2, enter_th=0.55, exit_th=0.45, k_enter=6, k_exit=10, w_channel=0.35, w_workers=0.35, w_vehicles=0.15, w_ttc=0.4, w_msg=0.4, w_bottom=0.45, w_near=0.35, bias=-0.65)


In [None]:
# Load model + groups

model0 = YOLO(str(YOLO_WEIGHTS))
try:
    model0.to(DEVICE0)
except Exception:
    pass
model0.eval()

groups0 = build_groups_from_model(model0)
print("Loaded:", YOLO_WEIGHTS)
print("Num classes:", len(model0.names))
print("Group sizes:", {k: len(v) for k, v in groups0.items()})
print("Example names:", list(model0.names.items())[:8])


Loaded: /data/wesleyferreiramaia/workzone/best.pt
Num classes: 48
Group sizes: {'channelization': 7, 'workers': 4, 'vehicles': 3, 'ttc': 33, 'msg': 2}
Example names: [(0, 'Police Officer'), (1, 'Police Vehicle'), (2, 'Cone'), (3, 'Fence'), (4, 'Drum'), (5, 'Barricade'), (6, 'Barrier'), (7, 'Work Vehicle')]


In [None]:
# Feature extraction helpers

def bbox_area_norm(xyxy: np.ndarray, w: int, h: int) -> float:
    x1, y1, x2, y2 = xyxy
    area = max(0.0, x2 - x1) * max(0.0, y2 - y1)
    return float(area / float(w * h + 1e-9))

def compute_frame_features(result, w: int, h: int, groups: Dict[str, List[int]]) -> Dict[str, float]:
    """
    Returns counts per group + bottom_half_count + near_proxy
    near_proxy: sum of area_norm for hazard groups (channel/workers/ttc/msg) clipped
    """
    feats = {
        "channel": 0,
        "workers": 0,
        "vehicles": 0,
        "ttc": 0,
        "msg": 0,
        "bottom_half": 0,
        "near_proxy": 0.0,
        "total": 0,
    }

    if result.boxes is None or len(result.boxes) == 0:
        return feats

    xyxy = result.boxes.xyxy.detach().cpu().numpy()
    cls   = result.boxes.cls.detach().cpu().numpy().astype(int)

    feats["total"] = int(len(cls))

    hazard_ids = set(groups["channelization"] + groups["workers"] + groups["ttc"] + groups["msg"])

    for i in range(len(cls)):
        cid = int(cls[i])
        box = xyxy[i]
        cx = 0.5 * (box[0] + box[2])
        cy = 0.5 * (box[1] + box[3])

        if cid in groups["channelization"]:
            feats["channel"] += 1
        if cid in groups["workers"]:
            feats["workers"] += 1
        if cid in groups["vehicles"]:
            feats["vehicles"] += 1
        if cid in groups["ttc"]:
            feats["ttc"] += 1
        if cid in groups["msg"]:
            feats["msg"] += 1

        # proximity proxy: bottom half
        if cy >= 0.5 * h and cid in hazard_ids:
            feats["bottom_half"] += 1

        # proximity proxy: bbox area
        if cid in hazard_ids:
            feats["near_proxy"] += bbox_area_norm(box, w, h)

    # clip near_proxy to avoid huge boxes dominating
    feats["near_proxy"] = float(np.clip(feats["near_proxy"], 0.0, 1.0))
    return feats

def sigmoid(x: float) -> float:
    return 1.0 / (1.0 + math.exp(-x))


In [None]:
# Scoring + state machine

def score_from_feats(feats: Dict[str, float], cfg: ScoreConfig) -> float:
    """
    Score in [0,1]. We use a logistic over a weighted sum.
    Uses raw counts and proximity proxies to reduce early false triggers.
    """
    raw = cfg.bias
    raw += cfg.w_channel * feats["channel"]
    raw += cfg.w_workers * feats["workers"]
    raw += cfg.w_vehicles * feats["vehicles"]
    raw += cfg.w_ttc * feats["ttc"]
    raw += cfg.w_msg * feats["msg"]
    raw += cfg.w_bottom * feats["bottom_half"]
    raw += cfg.w_near * (10.0 * feats["near_proxy"])  # scale up near_proxy (0..1)

    return float(sigmoid(raw))

def run_hysteresis(scores: List[float], cfg: ScoreConfig) -> Tuple[List[float], List[int]]:
    """
    Returns (ema_scores, is_workzone timeline)
    """
    ema = []
    state = 0
    is_wz = []

    above = 0
    below = 0
    ema_val = 0.0

    for s in scores:
        ema_val = cfg.ema_alpha * s + (1.0 - cfg.ema_alpha) * ema_val
        ema.append(float(ema_val))

        if state == 0:
            if ema_val >= cfg.enter_th:
                above += 1
            else:
                above = 0
            if above >= cfg.k_enter:
                state = 1
                above = 0
        else:
            if ema_val <= cfg.exit_th:
                below += 1
            else:
                below = 0
            if below >= cfg.k_exit:
                state = 0
                below = 0

        is_wz.append(int(state))

    return ema, is_wz


In [None]:
# Process one video (debug)

def process_video_timeline(video_path: Path, model: YOLO, groups: Dict[str, List[int]], cfg: ScoreConfig, device: str) -> pd.DataFrame:
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        raise RuntimeError(f"Could not open video: {video_path}")

    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) or 0
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) or 0

    frames = []
    scores = []
    feats_rows = []

    fidx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if fidx % cfg.stride != 0:
            fidx += 1
            continue

        # YOLO inference
        res = model.predict(frame, imgsz=cfg.imgsz, conf=cfg.conf, iou=cfg.iou, verbose=False, device=device)[0]

        feats = compute_frame_features(res, w=w, h=h, groups=groups)
        s = score_from_feats(feats, cfg)

        frames.append(fidx)
        scores.append(s)
        feats_rows.append(feats)

        fidx += 1

    cap.release()

    ema, is_wz = run_hysteresis(scores, cfg)

    times = [fr / float(fps) for fr in frames]
    df = pd.DataFrame({
        "frame": frames,
        "time_sec": times,
        "score_raw": scores,
        "score_ema": ema,
        "is_workzone": is_wz,
        "channel": [r["channel"] for r in feats_rows],
        "workers": [r["workers"] for r in feats_rows],
        "vehicles": [r["vehicles"] for r in feats_rows],
        "ttc": [r["ttc"] for r in feats_rows],
        "msg": [r["msg"] for r in feats_rows],
        "bottom_half": [r["bottom_half"] for r in feats_rows],
        "near_proxy": [r["near_proxy"] for r in feats_rows],
        "total": [r["total"] for r in feats_rows],
    })
    df.attrs["fps"] = fps
    df.attrs["total_frames"] = total_frames
    df.attrs["w"] = w
    df.attrs["h"] = h
    return df

# Debug on 1 video
VIDEO_PATH = videos[0]
df_debug = process_video_timeline(VIDEO_PATH, model0, groups0, CFG, device=DEVICE0)
print("Video:", VIDEO_PATH.name, "rows:", len(df_debug))
display(df_debug.head())
display(df_debug[["score_raw", "score_ema", "is_workzone"]].describe())


Video: boston_042e1caf93114d3286c11ba14ddaa759_000001_02790_snippet.mp4 rows: 300


Unnamed: 0,frame,time_sec,score_raw,score_ema,is_workzone,channel,workers,vehicles,ttc,msg,bottom_half,near_proxy,total
0,0,0.0,0.34299,0.068598,0,0,0,0,0,0,0,0.0,0
1,3,0.1,0.34299,0.123476,0,0,0,0,0,0,0,0.0,0
2,6,0.2,0.34299,0.167379,0,0,0,0,0,0,0,0.0,0
3,9,0.3,0.34299,0.202501,0,0,0,0,0,0,0,0.0,0
4,12,0.4,0.34299,0.230599,0,0,0,0,0,0,0,0.0,0


Unnamed: 0,score_raw,score_ema,is_workzone
count,300.0,300.0,300.0
mean,0.637335,0.628761,0.48
std,0.279274,0.275491,0.500435
min,0.34299,0.068598,0.0
25%,0.34299,0.367208,0.0
50%,0.562612,0.533154,0.0
75%,0.97104,0.946046,1.0
max,0.998838,0.994501,1.0


In [None]:
# Metrics extraction

def summarize_timeline(df: pd.DataFrame, video_name: str) -> Dict[str, float]:
    t = df["time_sec"].values
    wz = df["is_workzone"].values.astype(int)
    ema = df["score_ema"].values

    duration = float(t[-1] - t[0]) if len(t) > 1 else 0.0
    toggles = int(np.sum(np.abs(np.diff(wz))))
    flicker_per_min = float(toggles / (max(duration, 1e-6) / 60.0))

    # enter / exit times
    enter_idx = np.where((wz[:-1] == 0) & (wz[1:] == 1))[0]
    exit_idx  = np.where((wz[:-1] == 1) & (wz[1:] == 0))[0]

    enter_time = float(t[enter_idx[0] + 1]) if len(enter_idx) else np.nan
    exit_time  = float(t[exit_idx[-1] + 1]) if len(exit_idx) else np.nan

    time_in_wz = float(np.sum(wz) * np.median(np.diff(t)) if len(t) > 2 else 0.0)

    # early trigger proxy: enter happens very early AND then stays mostly off afterwards
    # (You will refine this later using metadata / text)
    early_enter = int((not np.isnan(enter_time)) and (enter_time < 2.0) and (np.mean(wz) < 0.2))

    never_enter = int(np.sum(wz) == 0)

    return {
        "video": video_name,
        "duration_sec": duration,
        "mean_ema": float(np.mean(ema)) if len(ema) else 0.0,
        "max_ema": float(np.max(ema)) if len(ema) else 0.0,
        "enter_time_sec": enter_time,
        "exit_time_sec": exit_time,
        "time_in_workzone_sec": time_in_wz,
        "toggle_count": toggles,
        "flicker_per_min": flicker_per_min,
        "early_enter_flag": early_enter,
        "never_enter_flag": never_enter,
    }

print("Ready.")


Ready.


In [31]:
# Multi-GPU dataset processing

from concurrent.futures import ProcessPoolExecutor, as_completed

def worker_process(video_paths: List[str], weights_path: str, device: str, out_dir: str, cfg: ScoreConfig) -> str:
    """
    Runs in a separate process (so CUDA context is clean per GPU).
    Writes per-video timelines into out_dir/timelines/
    Writes partial summary into out_dir/summary_part_<device>.csv
    """
    out_dir = Path(out_dir)
    timelines_dir = out_dir / "timelines"
    timelines_dir.mkdir(parents=True, exist_ok=True)

    model = YOLO(weights_path)
    try:
        model.to(device)
    except Exception:
        pass
    model.eval()
    groups = build_groups_from_model(model)

    rows = []
    for vp in tqdm(video_paths, desc=f"GPU {device}"):
        vpath = Path(vp)
        try:
            df = process_video_timeline(vpath, model, groups, cfg, device=device)
            # save timeline
            out_csv = timelines_dir / f"{vpath.stem}_timeline.csv"
            df.to_csv(out_csv, index=False)
            rows.append(summarize_timeline(df, vpath.name))
        except Exception as e:
            rows.append({"video": vpath.name, "error": str(e)})

    part_path = out_dir / f"summary_part_{device.replace(':','_')}.csv"
    pd.DataFrame(rows).to_csv(part_path, index=False)
    return str(part_path)

# Split videos across 1 or 2 GPUs
video_strs = [str(v) for v in videos]

if DEVICE1 is None:
    print("Only 1 GPU detected. Running single process on", DEVICE0)
    part0 = worker_process(video_strs, str(YOLO_WEIGHTS), DEVICE0, str(OUT_DIR), CFG)
    parts = [part0]
else:
    # 2 GPUs
    mid = len(video_strs) // 2
    splits = [(video_strs[:mid], DEVICE0), (video_strs[mid:], DEVICE1)]
    parts = []
    with ProcessPoolExecutor(max_workers=2) as ex:
        futs = []
        for vlist, dev in splits:
            futs.append(ex.submit(worker_process, vlist, str(YOLO_WEIGHTS), dev, str(OUT_DIR), CFG))
        for f in as_completed(futs):
            parts.append(f.result())

print("Parts written:", parts)


Only 1 GPU detected. Running single process on cuda:0


GPU cuda:0: 100%|█████████████████████████████| 406/406 [38:15<00:00,  5.65s/it]

Parts written: ['/data/wesleyferreiramaia/workzone/outputs/notebook5/summary_part_cuda_0.csv']





In [32]:
# Merge summaries + hard cases

part_files = sorted(OUT_DIR.glob("summary_part_*.csv"))
assert part_files, "No summary parts found. Did Cell 9 run?"

df_parts = [pd.read_csv(p) for p in part_files]
df_sum = pd.concat(df_parts, ignore_index=True)

# Some rows may have 'error'
if "error" in df_sum.columns:
    err = df_sum[df_sum["error"].notna()]
    ok  = df_sum[df_sum["error"].isna()].copy()
else:
    err = pd.DataFrame()
    ok  = df_sum.copy()

summary_path = OUT_DIR / "video_summary.csv"
ok.to_csv(summary_path, index=False)
print("Saved:", summary_path, "rows:", len(ok))
if len(err):
    print("Errors:", len(err))
    display(err.head())

# Hard cases
hard_early  = ok.sort_values(["early_enter_flag", "flicker_per_min", "enter_time_sec"], ascending=[False, False, True]).head(50)
hard_never  = ok[ok["never_enter_flag"] == 1].sort_values(["max_ema"], ascending=False).head(50)
hard_flick  = ok.sort_values("flicker_per_min", ascending=False).head(50)

early_path = OUT_DIR / "hardcases_early_enter.csv"
never_path = OUT_DIR / "hardcases_never_enter.csv"
flick_path = OUT_DIR / "hardcases_flicker.csv"

hard_early.to_csv(early_path, index=False)
hard_never.to_csv(never_path, index=False)
hard_flick.to_csv(flick_path, index=False)

print("Saved:", early_path)
print("Saved:", never_path)
print("Saved:", flick_path)

display(ok.describe(include="all").T.head(25))


Saved: /data/wesleyferreiramaia/workzone/outputs/notebook5/video_summary.csv rows: 406
Saved: /data/wesleyferreiramaia/workzone/outputs/notebook5/hardcases_early_enter.csv
Saved: /data/wesleyferreiramaia/workzone/outputs/notebook5/hardcases_never_enter.csv
Saved: /data/wesleyferreiramaia/workzone/outputs/notebook5/hardcases_flicker.csv


Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
video,406.0,406.0,seattle_83d7f710e54b48f5a7730576be90ddfc_00000...,1.0,,,,,,,
duration_sec,406.0,,,,29.839975,1.167349,15.8,29.9,30.0,30.0,30.03
mean_ema,406.0,,,,0.754444,0.152998,0.400202,0.640552,0.773251,0.880723,0.986702
max_ema,406.0,,,,0.986388,0.035814,0.671718,0.992917,0.999419,0.999984,1.0
enter_time_sec,406.0,,,,3.383346,3.73502,0.8,0.8,1.1,4.875,15.6
exit_time_sec,239.0,,,,21.703706,6.240772,2.6,18.8,22.4,26.25,30.0
time_in_workzone_sec,406.0,,,,22.538295,6.660429,3.5,17.725,24.4,28.9,29.3
toggle_count,406.0,,,,2.083744,1.216302,1.0,1.0,2.0,3.0,7.0
flicker_per_min,406.0,,,,4.190793,2.436917,1.998002,2.006689,4.0,6.0,14.0
early_enter_flag,406.0,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
# Inspect a hard case timeline quickly

def load_timeline_for_video(video_name: str) -> pd.DataFrame:
    stem = Path(video_name).stem
    p = TIMELINE_DIR / f"{stem}_timeline.csv"
    if not p.exists():
        raise FileNotFoundError(p)
    return pd.read_csv(p)

# Pick one from early_enter / flicker / never_enter
example_video = hard_early.iloc[0]["video"] if len(hard_early) else ok.iloc[0]["video"]
df_case = load_timeline_for_video(example_video)
print("Example:", example_video, "rows:", len(df_case))
display(df_case.head())
display(df_case[["score_raw", "score_ema", "is_workzone"]].describe())


Example: jacksonville_9a92764c4c4144e8b47b673df783c906_000003_02940_snippet.mp4 rows: 301


Unnamed: 0,frame,time_sec,score_raw,score_ema,is_workzone,channel,workers,vehicles,ttc,msg,bottom_half,near_proxy,total
0,0,0.0,0.539375,0.107875,0,0,1,0,0,0,1,0.002236,1
1,3,0.1,0.539344,0.194169,0,0,1,0,0,0,1,0.002201,1
2,6,0.2,0.884146,0.332164,0,2,1,0,0,0,3,0.080655,3
3,9,0.3,0.875869,0.440905,0,2,1,0,0,0,3,0.058251,3
4,12,0.4,0.781236,0.508971,0,1,1,0,0,0,2,0.092253,2


Unnamed: 0,score_raw,score_ema,is_workzone
count,301.0,301.0,301.0
mean,0.651028,0.64273,0.740864
std,0.218473,0.187049,0.43889
min,0.34299,0.107875,0.0
25%,0.537706,0.509125,0.0
50%,0.609087,0.626191,1.0
75%,0.792673,0.776948,1.0
max,0.999749,0.993782,1.0
