In [14]:
from pathlib import Path
import os
import math
import time
import json
from dataclasses import dataclass

import cv2
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import torch
from ultralytics import YOLO

# You are running from: <root>/notebooks/
ROOT = Path("").resolve()

DATA_DIR   = ROOT / "data"
VIDEOS_DIR = DATA_DIR / "videos_compressed"

# Use your 48-class weights
YOLO_WEIGHTS = ROOT / "best.pt"   # <-- make sure this is correct in your repo

OUT_DIR = ROOT / "outputs" / "notebook4"
OUT_DIR.mkdir(parents=True, exist_ok=True)

print("ROOT:", ROOT)
print("DATA_DIR exists:", DATA_DIR.exists(), DATA_DIR)
print("VIDEOS_DIR exists:", VIDEOS_DIR.exists(), VIDEOS_DIR)
print("YOLO_WEIGHTS exists:", YOLO_WEIGHTS.exists(), YOLO_WEIGHTS)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("DEVICE:", DEVICE)
print("OUT_DIR:", OUT_DIR)

ROOT: /data/wesleyferreiramaia/workzone
DATA_DIR exists: True /data/wesleyferreiramaia/workzone/data
VIDEOS_DIR exists: True /data/wesleyferreiramaia/workzone/data/videos_compressed
YOLO_WEIGHTS exists: True /data/wesleyferreiramaia/workzone/best.pt
DEVICE: cuda
OUT_DIR: /data/wesleyferreiramaia/workzone/outputs/notebook4


In [15]:
model = YOLO(str(YOLO_WEIGHTS))
try:
    model.to(DEVICE)
except Exception as e:
    print("Warning: model.to failed, continuing:", e)

names = model.names  # dict: id -> name
num_classes = len(names)

print("Loaded weights:", YOLO_WEIGHTS)
print("Num classes:", num_classes)
print("First 15 class names:")
for k in list(names.keys())[:15]:
    print(k, ":", names[k])

assert num_classes == 48, f"Expected 48 classes, got {num_classes}. Check you loaded the right best.pt."

Loaded weights: /data/wesleyferreiramaia/workzone/best.pt
Num classes: 48
First 15 class names:
0 : Police Officer
1 : Police Vehicle
2 : Cone
3 : Fence
4 : Drum
5 : Barricade
6 : Barrier
7 : Work Vehicle
8 : Vertical Panel
9 : Arrow Board
10 : Tubular Marker
11 : Bike Lane
12 : Work Equipment
13 : Worker
14 : Temporary Traffic Control Message Board


In [16]:
video_paths = sorted(VIDEOS_DIR.glob("*.mp4"))
print("Found videos:", len(video_paths))
for p in video_paths[:10]:
    print("-", p.name)

# Run mode controls
RUN_ALL_VIDEOS = True          # set False to test just a few quickly
N_SAMPLE = 3                   # used when RUN_ALL_VIDEOS=False

# Processing controls
STRIDE = 3                     # process every N frames (3 => faster, still stable)
MAX_VIDEOS_TO_ANNOTATE = 10    # annotate only top worst later to save time

if not RUN_ALL_VIDEOS:
    video_paths = video_paths[:N_SAMPLE]

print("\nWill process videos:", len(video_paths))


Found videos: 406
- boston_042e1caf93114d3286c11ba14ddaa759_000001_02790_snippet.mp4
- boston_042e1caf93114d3286c11ba14ddaa759_000001_13410_snippet.mp4
- boston_10cb4a5e9c0740c8a6ff092bae0d4873_000000_08460_snippet.mp4
- boston_10cb4a5e9c0740c8a6ff092bae0d4873_000000_11610_snippet.mp4
- boston_10cb4a5e9c0740c8a6ff092bae0d4873_000001_22890_snippet.mp4
- boston_10cb4a5e9c0740c8a6ff092bae0d4873_000003_00150_snippet.mp4
- boston_10cb4a5e9c0740c8a6ff092bae0d4873_000004_00810_snippet.mp4
- boston_2bdb5a72602342a5991b402beb8b7ab4_000001_23370_snippet.mp4
- boston_2d8e13b1a8304d8395dcf6479ca61814_000004_04710_snippet.mp4
- boston_2d8e13b1a8304d8395dcf6479ca61814_000006_01530_snippet.mp4

Will process videos: 406


In [17]:
def norm(s: str) -> str:
    return str(s).strip().lower()

# Build ID->name and name->ID
id_to_name = {int(k): str(v) for k, v in names.items()}
name_to_id = {norm(v): int(k) for k, v in id_to_name.items()}

# Define groups by keywords (robust to slight wording differences)
GROUP_RULES = {
    "channelization": ["cone", "drum", "barricade", "barrier", "vertical panel", "tubular marker", "fence"],
    "workers": ["worker", "police officer", "flagger"],
    "vehicles": ["work vehicle", "police vehicle"],
    "message_board": ["message board", "arrow board"],
    "ttc_signs": ["temporary traffic control sign"],
    "other_roadwork": ["work equipment", "other roadwork"],
}

def class_to_group(class_name: str) -> str:
    n = norm(class_name)
    for g, kws in GROUP_RULES.items():
        for kw in kws:
            if kw in n:
                return g
    return "other"

# Precompute cls_id -> group
clsid_to_group = {cid: class_to_group(cname) for cid, cname in id_to_name.items()}

# Print group coverage
group_counts = {}
for cid, g in clsid_to_group.items():
    group_counts[g] = group_counts.get(g, 0) + 1

print("Group coverage (#classes per group):", group_counts)
print("\nExamples:")
for cid in list(id_to_name.keys())[:20]:
    print(f"{cid:2d}  {id_to_name[cid]:55s} -> {clsid_to_group[cid]}")


Group coverage (#classes per group): {'workers': 4, 'vehicles': 3, 'channelization': 7, 'message_board': 2, 'other': 1, 'other_roadwork': 1, 'ttc_signs': 30}

Examples:
 0  Police Officer                                          -> workers
 1  Police Vehicle                                          -> vehicles
 2  Cone                                                    -> channelization
 3  Fence                                                   -> channelization
 4  Drum                                                    -> channelization
 5  Barricade                                               -> channelization
 6  Barrier                                                 -> channelization
 7  Work Vehicle                                            -> vehicles
 8  Vertical Panel                                          -> channelization
 9  Arrow Board                                             -> message_board
10  Tubular Marker                                          -> channeli

In [18]:
def logistic(x: float) -> float:
    return 1.0 / (1.0 + math.exp(-x))

@dataclass
class WorkzoneConfig:
    # raw scoring weights (you will tune later)
    w_channel: float = 1.0
    w_workers: float = 1.1
    w_vehicles: float = 0.6
    w_ttc: float = 0.9
    w_msg: float = 0.9
    w_near: float = 1.2

    # baseline bias (higher negative => harder to trigger)
    bias: float = -4.0

    # EMA smoothing (higher alpha => faster response, more flicker)
    ema_alpha: float = 0.20

    # hysteresis thresholds on EMA score
    enter_th: float = 0.62
    exit_th: float = 0.45

    # persistence requirements (seconds)
    min_enter_sec: float = 1.0
    min_exit_sec: float = 1.0

    # after entering, ignore exit for a short time (seconds)
    cooldown_after_enter_sec: float = 1.0

CFG = WorkzoneConfig()

def compute_near_proxy(boxes_xyxy: np.ndarray, img_w: int, img_h: int) -> float:
    """
    Near proxy: fraction of detections with relatively large box area.
    We don’t have depth; we approximate proximity via bbox area / image area.
    """
    if boxes_xyxy is None or len(boxes_xyxy) == 0:
        return 0.0
    img_area = float(img_w * img_h)
    areas = (boxes_xyxy[:, 2] - boxes_xyxy[:, 0]) * (boxes_xyxy[:, 3] - boxes_xyxy[:, 1])
    area_norm = areas / (img_area + 1e-9)
    # count "near-ish" boxes
    return float(np.mean(area_norm > 0.02))

def compute_frame_score(groups_count: dict, near_proxy: float, cfg: WorkzoneConfig) -> float:
    raw = (
        cfg.bias
        + cfg.w_channel * groups_count.get("channelization", 0)
        + cfg.w_workers * groups_count.get("workers", 0)
        + cfg.w_vehicles * groups_count.get("vehicles", 0)
        + cfg.w_ttc * groups_count.get("ttc_signs", 0)
        + cfg.w_msg * groups_count.get("message_board", 0)
        + cfg.w_near * near_proxy
    )
    # map to 0..1
    return logistic(raw), raw

def run_state_machine(timeline_df: pd.DataFrame, cfg: WorkzoneConfig) -> pd.DataFrame:
    """
    Input: timeline_df with columns: time_sec, score_raw
    Output adds: score_ema, is_workzone, state, toggles
    """
    score_ema = 0.0
    state = "OUT"  # OUT / IN
    is_workzone = 0

    enter_accum = 0.0
    exit_accum = 0.0
    cooldown = 0.0
    toggles = 0

    ema_list = []
    is_list = []
    state_list = []
    toggles_list = []

    prev_is = 0
    prev_t = float(timeline_df["time_sec"].iloc[0]) if len(timeline_df) else 0.0

    for _, row in timeline_df.iterrows():
        t = float(row["time_sec"])
        dt = max(0.0, t - prev_t)
        prev_t = t

        # EMA
        score_ema = (1 - cfg.ema_alpha) * score_ema + cfg.ema_alpha * float(row["score_raw"])

        # Cooldown update
        cooldown = max(0.0, cooldown - dt)

        if state == "OUT":
            if score_ema >= cfg.enter_th:
                enter_accum += dt
            else:
                enter_accum = max(0.0, enter_accum - 2 * dt)

            if enter_accum >= cfg.min_enter_sec:
                state = "IN"
                is_workzone = 1
                cooldown = cfg.cooldown_after_enter_sec
                enter_accum = 0.0
                exit_accum = 0.0
        else:  # IN
            if cooldown > 0:
                # ignore exit decisions during cooldown
                pass
            else:
                if score_ema <= cfg.exit_th:
                    exit_accum += dt
                else:
                    exit_accum = max(0.0, exit_accum - 2 * dt)

                if exit_accum >= cfg.min_exit_sec:
                    state = "OUT"
                    is_workzone = 0
                    exit_accum = 0.0
                    enter_accum = 0.0

        if is_workzone != prev_is:
            toggles += 1
        prev_is = is_workzone

        ema_list.append(score_ema)
        is_list.append(is_workzone)
        state_list.append(state)
        toggles_list.append(toggles)

    out = timeline_df.copy()
    out["score_ema"] = ema_list
    out["is_workzone"] = is_list
    out["state"] = state_list
    out["toggle_count_so_far"] = toggles_list
    return out


In [19]:
def process_one_video(video_path: Path, cfg: WorkzoneConfig, stride: int = 3, imgsz: int = 960, conf: float = 0.25, iou: float = 0.7):
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        raise RuntimeError(f"Could not open video: {video_path}")

    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    rows = []
    frame_idx = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx % stride != 0:
            frame_idx += 1
            continue

        t_sec = frame_idx / fps

        results = model.predict(
            frame,
            imgsz=imgsz,
            conf=conf,
            iou=iou,
            verbose=False,
            device=0 if DEVICE.startswith("cuda") else "cpu",
        )
        r = results[0]

        groups_count = {
            "channelization": 0,
            "workers": 0,
            "vehicles": 0,
            "ttc_signs": 0,
            "message_board": 0,
            "other_roadwork": 0,
        }

        near_proxy = 0.0

        if r.boxes is not None and len(r.boxes) > 0:
            cls_ids = r.boxes.cls.detach().cpu().numpy().astype(int)
            xyxy = r.boxes.xyxy.detach().cpu().numpy().astype(float)

            for cid in cls_ids:
                g = clsid_to_group.get(int(cid), "other")
                if g not in groups_count:
                    groups_count[g] = 0
                groups_count[g] += 1

            near_proxy = compute_near_proxy(xyxy, w, h)

        score_raw, raw = compute_frame_score(groups_count, near_proxy, cfg)

        rows.append({
            "frame": frame_idx,
            "time_sec": float(t_sec),
            "score_raw": float(score_raw),
            "raw": float(raw),
            "channel": int(groups_count.get("channelization", 0)),
            "workers": int(groups_count.get("workers", 0)),
            "vehicles": int(groups_count.get("vehicles", 0)),
            "ttc": int(groups_count.get("ttc_signs", 0)),
            "msg": int(groups_count.get("message_board", 0)),
            "near_proxy": float(near_proxy),
        })

        frame_idx += 1

    cap.release()

    df = pd.DataFrame(rows)
    if len(df) == 0:
        # return empty but consistent
        return df, {"fps": fps, "w": w, "h": h, "total_frames": total_frames}

    df = run_state_machine(df, cfg)
    meta = {"fps": fps, "w": w, "h": h, "total_frames": total_frames}
    return df, meta


In [20]:
summary_rows = []

all_csv_dir = OUT_DIR / "timelines_csv"
all_csv_dir.mkdir(parents=True, exist_ok=True)

for vp in tqdm(video_paths, desc="Processing videos"):
    df_tl, meta = process_one_video(vp, CFG, stride=STRIDE, imgsz=960, conf=0.25, iou=0.7)

    csv_path = all_csv_dir / f"{vp.stem}_timeline.csv"
    df_tl.to_csv(csv_path, index=False)

    if len(df_tl) == 0:
        summary_rows.append({
            "video": vp.name,
            "frames_processed": 0,
            "duration_sec": meta["total_frames"] / (meta["fps"] or 30.0),
            "toggles": 0,
            "first_enter_time": np.nan,
            "time_in_workzone": 0.0,
            "mean_score_ema": np.nan,
        })
        continue

    # metrics
    toggles = int(df_tl["toggle_count_so_far"].iloc[-1])
    duration_sec = float((meta["total_frames"] / (meta["fps"] or 30.0)))
    mean_score_ema = float(df_tl["score_ema"].mean())

    enters = df_tl.index[(df_tl["is_workzone"].diff().fillna(0) == 1)].tolist()
    first_enter_time = float(df_tl.loc[enters[0], "time_sec"]) if len(enters) else np.nan

    # time in workzone approx = fraction of processed samples * stride/fps
    dt_eff = (STRIDE / (meta["fps"] or 30.0))
    time_in_workzone = float(df_tl["is_workzone"].sum() * dt_eff)

    summary_rows.append({
        "video": vp.name,
        "frames_processed": int(len(df_tl)),
        "duration_sec": duration_sec,
        "toggles": toggles,
        "first_enter_time": first_enter_time,
        "time_in_workzone": time_in_workzone,
        "mean_score_ema": mean_score_ema,
        "csv_path": str(csv_path),
    })

df_summary = pd.DataFrame(summary_rows)

summary_path = OUT_DIR / "summary_all_videos.csv"
df_summary.to_csv(summary_path, index=False)

print("Saved summary:", summary_path)
df_summary.head()


Processing videos: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 406/406 [39:02<00:00,  5.77s/it]

Saved summary: /data/wesleyferreiramaia/workzone/outputs/notebook4/summary_all_videos.csv





Unnamed: 0,video,frames_processed,duration_sec,toggles,first_enter_time,time_in_workzone,mean_score_ema,csv_path
0,boston_042e1caf93114d3286c11ba14ddaa759_000001...,300,30.0,2,15.4,9.4,0.334517,/data/wesleyferreiramaia/workzone/outputs/note...
1,boston_042e1caf93114d3286c11ba14ddaa759_000001...,300,30.0,2,14.2,1.9,0.110878,/data/wesleyferreiramaia/workzone/outputs/note...
2,boston_10cb4a5e9c0740c8a6ff092bae0d4873_000000...,300,30.0,2,15.0,9.4,0.324342,/data/wesleyferreiramaia/workzone/outputs/note...
3,boston_10cb4a5e9c0740c8a6ff092bae0d4873_000000...,300,30.0,2,9.8,11.2,0.407039,/data/wesleyferreiramaia/workzone/outputs/note...
4,boston_10cb4a5e9c0740c8a6ff092bae0d4873_000001...,301,30.033333,1,8.6,21.5,0.724988,/data/wesleyferreiramaia/workzone/outputs/note...


In [21]:
df_summary2 = df_summary.copy()

# Flicker rate per minute (avoid divide by zero)
df_summary2["flicker_per_min"] = df_summary2["toggles"] / np.maximum(df_summary2["duration_sec"] / 60.0, 1e-6)

# Flag cases
df_summary2["never_enters"] = df_summary2["first_enter_time"].isna().astype(int)
df_summary2["early_enter"] = (df_summary2["first_enter_time"] < 2.0).fillna(False).astype(int)  # enters in first 2 seconds

# rank worst: flicker first, then early_enter, then never_enters
df_ranked = df_summary2.sort_values(
    by=["flicker_per_min", "early_enter", "never_enters"],
    ascending=[False, False, False],
).reset_index(drop=True)

print("Top 20 worst videos by flicker:")
df_ranked[["video", "duration_sec", "toggles", "flicker_per_min", "first_enter_time", "time_in_workzone", "never_enters", "early_enter"]].head(20)


Top 20 worst videos by flicker:


Unnamed: 0,video,duration_sec,toggles,flicker_per_min,first_enter_time,time_in_workzone,never_enters,early_enter
0,san_antonio_43beaf3d39a249d5a2e65f0b2c016b7c_0...,30.033333,7,13.984462,4.7,18.2,0,0
1,san_antonio_4eb110af489b46bc90ca20dd4c83c2b8_0...,30.0,6,12.0,1.5,16.7,0,1
2,los_angeles_71233e9e3baa4cfc9de7ee7df9c54888_0...,30.033333,6,11.986681,5.7,14.7,0,0
3,boston_e16d504a5070463f8e0fe68767ce98cd_000001...,30.0,5,10.0,2.2,14.7,0,0
4,denver_c9c32a73117b4b6b80dbc612b4cbd540_000000...,30.0,5,10.0,5.5,17.5,0,0
5,denver_5ed81176d43f41e2acb357de33c5936e_000001...,30.033333,5,9.988901,12.7,7.6,0,0
6,detroit_1298b4f588c34df698657f67df72f419_00000...,30.033333,5,9.988901,2.2,14.6,0,0
7,los_angeles_cca14054afbb44ddadfd2b1a157f2d70_0...,30.033333,5,9.988901,12.2,9.4,0,0
8,san_antonio_43beaf3d39a249d5a2e65f0b2c016b7c_0...,30.033333,5,9.988901,11.6,14.4,0,0
9,san_antonio_4eb110af489b46bc90ca20dd4c83c2b8_0...,30.033333,5,9.988901,4.7,10.7,0,0


In [22]:
def annotate_video(video_path: Path, timeline_csv: Path, out_mp4: Path, cfg: WorkzoneConfig):
    df_tl = pd.read_csv(timeline_csv)
    if len(df_tl) == 0:
        return

    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        raise RuntimeError(f"Could not open video: {video_path}")

    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(str(out_mp4), fourcc, fps, (w, h))

    # Build map: processed frame idx -> (score_ema, is_workzone)
    tl_map = {int(r["frame"]): (float(r["score_ema"]), int(r["is_workzone"])) for _, r in df_tl.iterrows()}

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx in tl_map:
            score_ema, is_wz = tl_map[frame_idx]
            label = "WORKZONE" if is_wz == 1 else "NO WORKZONE"
            color = (0, 0, 255) if is_wz == 1 else (0, 200, 0)

            cv2.rectangle(frame, (0, 0), (w, 60), color, -1)
            cv2.putText(frame, f"{label}  ema={score_ema:.2f}", (20, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2, cv2.LINE_AA)

        out.write(frame)
        frame_idx += 1

    cap.release()
    out.release()

annot_dir = OUT_DIR / "annotated_worst"
annot_dir.mkdir(parents=True, exist_ok=True)

worst_videos = df_ranked.head(MAX_VIDEOS_TO_ANNOTATE)

for _, row in worst_videos.iterrows():
    vp = VIDEOS_DIR / row["video"]
    csvp = Path(row["csv_path"])
    outp = annot_dir / f"{vp.stem}_annotated_stable.mp4"
    annotate_video(vp, csvp, outp, CFG)

print("Saved annotated videos to:", annot_dir)


Saved annotated videos to: /data/wesleyferreiramaia/workzone/outputs/notebook4/annotated_worst


In [None]:
# If it flickers too much:
# - lower ema_alpha (more smoothing)
# - increase min_enter_sec / min_exit_sec
# - increase bias (harder to trigger)
# - increase enter_th, decrease exit_th gap (or widen hysteresis)

CFG.ema_alpha = 0.15
CFG.min_enter_sec = 1.5
CFG.min_exit_sec = 1.5
CFG.enter_th = 0.65
CFG.exit_th = 0.40

print("Updated CFG:", CFG)
print("Re-run Cell 7+8 to see if flicker improves.")
