# Object Tracking & Intent Analysis (v7 - Interaction Distance, Full Annotations)

In [1]:
# Install Ultralytics (YOLOv8) and BoxMOT
!pip install ultralytics --quiet
!pip install boxmot --quiet

# Import necessary libraries
import os
import cv2
import time
import yaml
import torch
import json
import numpy as np
from pathlib import Path
from collections import defaultdict, deque
from ultralytics import YOLO
import datetime
# from google.colab.patches import cv2_imshow # Usually not needed if running locally or if cv2.imshow works

# Define DummyTracker globally for fallback

class DummyTracker:
    def __init__(self, *args, **kwargs):
        print("Initialized DummyTracker for BoxMOT fallback.")
        self.frame_id = 0
    def update(self, dets, img): # img argument is often expected by trackers
        self.frame_id += 1
        if dets is None or len(dets) == 0:
            return np.empty((0, 7)) # BoxMOT ByteTrack returns 7 columns: x1,y1,x2,y2,id,cls,conf
        
        if hasattr(dets, 'cpu') and not isinstance(dets, np.ndarray):
            dets_np = dets.cpu().numpy()
        else:
            dets_np = dets
            
        fake_tracks = []
        for i, det_row in enumerate(dets_np):
            x1, y1, x2, y2 = det_row[:4]
            conf = det_row[4] if len(det_row) > 4 else 0.5 
            cls = det_row[5] if len(det_row) > 5 else (i % 5) # Placeholder class
            fake_tracks.append([x1, y1, x2, y2, self.frame_id * 1000 + i, cls, conf])
        return np.array(fake_tracks)


# Attempt to import BoxMOT and its utilities
try:
    from boxmot import create_tracker
    from boxmot.utils import TRACKER_CONFIGS
    print("BoxMOT and create_tracker imported successfully.")
    if TRACKER_CONFIGS is None:
        print("WARNING: boxmot.utils.TRACKER_CONFIGS is None. main() will attempt to find config path manually.")
except ImportError as e:
    print(f"ERROR: Failed to import BoxMOT: {e}")
    print("Please ensure BoxMOT is installed: pip install boxmot")
    print("WARNING: BoxMOT not available. Real tracking will not work. Falling back to DummyTracker.")
    create_tracker = lambda *args, **kwargs: DummyTracker(*args, **kwargs)
    TRACKER_CONFIGS = Path("dummy_boxmot_configs") # Relative path for dummy config
    if not TRACKER_CONFIGS.exists():
        TRACKER_CONFIGS.mkdir(parents=True, exist_ok=True)


BoxMOT and create_tracker imported successfully.


## Configuration Parameters & Global Variables

In [2]:
# --- Configuration & Global Variables ---
# ROI_MODE: 0 = Manual ROI, 1 = Dynamic ROI from frame margin
ROI_MODE = 1 
ROI_MARGIN_PIXELS = 10 # Margin in pixels for dynamic ROI (if ROI_MODE = 1)
MANUAL_ROI = (100, 100, 500, 400) # Manual ROI: (x1, y1, x2, y2) or None (if ROI_MODE = 0)

# Behavior analysis parameters
LOITERING_THRESHOLD_SEC = 5
INTERACTION_PROXIMITY_THRESHOLD = 70
TRACK_HISTORY_LENGTH = 60 # Frames of history for behavior analysis
EVENT_LOG_FILE = "event_log_boxmot_v7.json" # Relative path

# Event Media Saving Parameters
ENABLE_EVENT_CLIPS = True
ENABLE_EVENT_SNAPSHOTS = True
# EVENT_CLIP_OUTPUT_DIR = "event_clips" # Relative path, will be created if not exists
# EVENT_SNAPSHOT_OUTPUT_DIR = "event_snapshots" # Relative path, will be created if not exists
LOITERING_EVENT_CLIP_PRE_BUFFER_SEC = 2
LOITERING_EVENT_CLIP_POST_BUFFER_SEC = 1 # Per user request, loitering clip ends at event time
INSTANT_EVENT_CLIP_TOTAL_DURATION_SEC = 10 # Centered around event time (5s before, 5s after)
FRAME_BUFFER_DURATION_SEC = 15 # Max duration of ANNOTATED frames to keep in memory

# Global data structures
ROI = None
track_history = defaultdict(lambda: deque(maxlen=TRACK_HISTORY_LENGTH))
object_loitering_start_time = defaultdict(lambda: None)
event_log = [] # Will be populated by log_event, and saved at the end

# Frame buffer for video clip saving (stores (ANNOTATED_frame_copy, timestamp))
frame_buffer = deque()
active_clip_capture_tasks = []

# Statistics counters
total_frames_read_count = 0
total_frames_processed_count = 0
cumulative_detected_class_counts = defaultdict(int)


In [3]:
# ===== Path Helper (放在 Notebook 最前面即可) =====================
# from pathlib import Path
from datetime import datetime

# ▶ 修改這兩行就能切換資料來源與輸出根目錄
INPUT_SOURCE = "dali_cam2_0519am_otherman.mp4"      # 或 RTSP/HTTP URL
OUTPUT_ROOT  = Path("output")               # 建議集中管理

def init_paths(input_path: str | Path, add_timestamp: bool = True):
    """依輸入檔名自動建立版本化輸出目錄與全域變數。"""
    global RUN_NAME, RUN_DIR, EVENT_LOG_FILE
    global EVENT_CLIP_OUTPUT_DIR, EVENT_SNAPSHOT_OUTPUT_DIR

    input_path = Path(str(input_path))
    RUN_NAME   = input_path.stem
    ts_layer   = datetime.now().strftime("%Y%m%d_%H%M%S") if add_timestamp else ""
    RUN_DIR    = OUTPUT_ROOT / RUN_NAME / ts_layer

    EVENT_CLIP_OUTPUT_DIR     = RUN_DIR / "clips"
    EVENT_SNAPSHOT_OUTPUT_DIR = RUN_DIR / "snapshots"
    EVENT_LOG_FILE            = RUN_DIR / f"{RUN_NAME}_events.json"

    # 建立必要目錄
    EVENT_CLIP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    EVENT_SNAPSHOT_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

    print("▍Path initialised")
    print(" RUN_DIR                  :", RUN_DIR)
    print(" EVENT_CLIP_OUTPUT_DIR    :", EVENT_CLIP_OUTPUT_DIR)
    print(" EVENT_SNAPSHOT_OUTPUT_DIR:", EVENT_SNAPSHOT_OUTPUT_DIR)
    print(" EVENT_LOG_FILE           :", EVENT_LOG_FILE)

# ★ 呼叫一次，之後整支 Notebook 都能用全域變數
init_paths(INPUT_SOURCE)


▍Path initialised
 RUN_DIR                  : output\dali_cam2_0519am_otherman\20250522_091346
 EVENT_CLIP_OUTPUT_DIR    : output\dali_cam2_0519am_otherman\20250522_091346\clips
 EVENT_SNAPSHOT_OUTPUT_DIR: output\dali_cam2_0519am_otherman\20250522_091346\snapshots
 EVENT_LOG_FILE           : output\dali_cam2_0519am_otherman\20250522_091346\dali_cam2_0519am_otherman_events.json


## Helper Functions (Analysis, Drawing, Logging, Media Saving)

In [4]:
def get_centroid(bbox):
    x1, y1, x2, y2 = bbox[:4]
    return int((x1 + x2) / 2), int((y1 + y2) / 2)

def is_within_roi(centroid, current_roi):
    if current_roi is None: return False
    cx, cy = centroid; rx1, ry1, rx2, ry2 = current_roi
    return rx1 <= cx <= rx2 and ry1 <= cy <= ry2

def calculate_distance(p1, p2): return np.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)

def save_event_snapshot(annotated_frame, event_type, track_id, event_timestamp):
    if annotated_frame is None:
        print("No frame provided for snapshot.")
        return
    try:
        snap_dir = Path(EVENT_SNAPSHOT_OUTPUT_DIR)
        snap_dir.mkdir(parents=True, exist_ok=True)
        ts_str = event_timestamp.strftime("%Y%m%d_%H%M%S_%f")[:-3]
        filename_parts = [event_type]
        if track_id is not None: filename_parts.append(f"id{track_id}")
        filename_parts.append(ts_str)
        filename = "_".join(map(str, filename_parts)) + ".jpg"
        filepath = snap_dir / filename
        cv2.imwrite(str(filepath), annotated_frame)
        print(f"Event snapshot saved: {filepath}")
    except Exception as e:
        print(f"ERROR saving event snapshot: {e}")

def save_video_clip(frames_to_save, output_path_str, fps, frame_width, frame_height):
    if not frames_to_save:
        print(f"No frames to save for {output_path_str}.")
        return
    output_path = Path(output_path_str)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    writer = cv2.VideoWriter(str(output_path), fourcc, fps, (frame_width, frame_height))
    for frame in frames_to_save:
        writer.write(frame)
    writer.release()
    print(f"Event clip saved: {output_path}")

def log_event(event_data, annotated_frame_for_media):
    global event_log, active_clip_capture_tasks, frame_buffer
    
    # Basic log entry structure from event_data
    log_entry = {
        "timestamp": event_data['event_timestamp'].strftime("%Y-%m-%d %H:%M:%S.%f")[:-3],
        "event_type": event_data['event_type'],
        "track_id": int(event_data['track_id']) if event_data.get('track_id') is not None else None,
        "class_name": event_data.get('class_name'),
        "details": event_data.get('details') or {}
    }
    event_log.append(log_entry)

    # Save snapshot if enabled, using the provided fully annotated frame
    if ENABLE_EVENT_SNAPSHOTS and annotated_frame_for_media is not None:
        save_event_snapshot(annotated_frame_for_media, event_data['event_type'], event_data.get('track_id'), event_data['event_timestamp'])

    # Create video clip task if enabled
    if ENABLE_EVENT_CLIPS:
        desired_clip_start_ts, desired_clip_end_ts = None, None
        event_timestamp = event_data['event_timestamp']
        event_type = event_data['event_type']
        track_id = event_data.get('track_id')
        details = event_data.get('details', {})
        current_fps = event_data['current_fps']
        frame_w = event_data['frame_w']
        frame_h = event_data['frame_h']

        ts_str = event_timestamp.strftime("%Y%m%d_%H%M%S_%f")[:-3]
        clip_name_parts = [event_type]

        if event_type == "loitering":
            loiter_start_time = object_loitering_start_time.get(track_id) # Assumes object_loitering_start_time is globally updated
            if loiter_start_time:
                desired_clip_start_ts = loiter_start_time - datetime.timedelta(seconds=LOITERING_EVENT_CLIP_PRE_BUFFER_SEC)
                desired_clip_end_ts = event_timestamp + datetime.timedelta(seconds=LOITERING_EVENT_CLIP_POST_BUFFER_SEC) # POST_BUFFER_SEC is 0
                if track_id is not None: clip_name_parts.append(f"id{track_id}")
        elif event_type in ["roi_enter", "roi_exit", "interaction"]:
            half_duration = datetime.timedelta(seconds=INSTANT_EVENT_CLIP_TOTAL_DURATION_SEC / 2)
            desired_clip_start_ts = event_timestamp - half_duration
            desired_clip_end_ts = event_timestamp + half_duration
            if track_id is not None: clip_name_parts.append(f"id{track_id}")
            if event_type == "interaction":
                p_id = details.get("person_id"); pkg_id = details.get("package_id")
                if p_id is not None: clip_name_parts.append(f"p{p_id}")
                if pkg_id is not None: clip_name_parts.append(f"pkg{pkg_id}")
        
        if desired_clip_start_ts and desired_clip_end_ts:
            clip_name_parts.append(ts_str)
            filename = "_".join(map(str, clip_name_parts)) + ".mp4"
            output_filepath = Path(EVENT_CLIP_OUTPUT_DIR) / filename

            task = {
                'log_entry_ts': log_entry['timestamp'], # Use the string timestamp from log_entry
                'desired_clip_start_ts': desired_clip_start_ts,
                'desired_clip_end_ts': desired_clip_end_ts,
                'collected_frames': [],
                'output_filename': str(output_filepath),
                'fps': current_fps, 'width': frame_w, 'height': frame_h,
                'header_printed': False
            }
            # Pre-fill with ANNOTATED frames already in buffer
            for f_in_buf, ts_in_buf in list(frame_buffer):
                if ts_in_buf >= desired_clip_start_ts and ts_in_buf <= event_timestamp: # Collect up to current event time
                    task['collected_frames'].append((f_in_buf, ts_in_buf))
            active_clip_capture_tasks.append(task)

def analyze_behavior(track_id, history, current_bbox, class_id, class_name, current_ts, fps_val, current_roi, f_w, f_h):
    global object_loitering_start_time
    events_to_log = []
    if not history or current_roi is None: return events_to_log
    centroid = get_centroid(current_bbox)
    event_data_template = {'track_id': track_id, 'class_name': class_name, 'current_fps': fps_val, 'frame_w': f_w, 'frame_h': f_h}

    if is_within_roi(centroid, current_roi):
        if len(history) > 1:
            _, prev_cx, prev_cy, _, _ = history[-2] # Timestamp of prev point is history[-2][0]
            if not is_within_roi((prev_cx, prev_cy), current_roi):
                events_to_log.append({**event_data_template, 'event_timestamp': current_ts, 'event_type': "roi_enter", 'details': {"roi": current_roi}})
                object_loitering_start_time[track_id] = current_ts
        elif object_loitering_start_time.get(track_id) is None: # First time in ROI for this track
             events_to_log.append({**event_data_template, 'event_timestamp': current_ts, 'event_type': "roi_enter", 'details': {"roi": current_roi}})
             object_loitering_start_time[track_id] = current_ts
        
        start_time = object_loitering_start_time.get(track_id)
        if start_time:
            duration = (current_ts - start_time).total_seconds()
            if duration > LOITERING_THRESHOLD_SEC:
                # Check if a loitering event for this track_id was logged recently to avoid spamming
                if not any(e['event_type'] == "loitering" and e['track_id'] == track_id and 
                           abs((current_ts - datetime.datetime.strptime(e["timestamp"], "%Y-%m-%d %H:%M:%S.%f")).total_seconds()) < LOITERING_THRESHOLD_SEC * 0.8 
                           for e in reversed(event_log[-20:])):
                    events_to_log.append({**event_data_template, 'event_timestamp': current_ts, 'event_type': "loitering", 
                                          'details': {"duration_sec": round(duration,1), "roi": current_roi}})
    else: # Object is outside ROI
        if object_loitering_start_time.get(track_id) is not None: # Was previously in ROI
            events_to_log.append({**event_data_template, 'event_timestamp': current_ts, 'event_type': "roi_exit", 'details': {"roi": current_roi}})
            object_loitering_start_time[track_id] = None # Reset loitering timer
    return events_to_log

def analyze_interactions_for_frame(trk_objs, current_ts, annotated_frame, cls_names, fps_val, f_w, f_h):
    events_to_log = []
    persons = [o for o in trk_objs if cls_names.get(int(o[5]),".").lower() in ["person","other_person","delivery_worker","food_delivery"]]
    packages = [o for o in trk_objs if cls_names.get(int(o[5]),".").lower() in ["package","bag"]]
    pairs = set() # To avoid duplicate interaction events for the same pair in the same frame analysis
    event_data_template = {'current_fps': fps_val, 'frame_w': f_w, 'frame_h': f_h} # No track_id/class_name here, specific to interaction

    for p_idx, p_data in enumerate(persons):
        p_id, p_cent, p_cls_id = int(p_data[4]), get_centroid(p_data[:4]), int(p_data[5])
        p_cls_name = cls_names.get(p_cls_id, "Person")
        for pkg_idx, pkg_data in enumerate(packages):
            pkg_id, pkg_cent, pkg_cls_id = int(pkg_data[4]), get_centroid(pkg_data[:4]), int(pkg_data[5])
            pkg_cls_name = cls_names.get(pkg_cls_id, "Package")
            
            dist = calculate_distance(p_cent, pkg_cent)
            if dist < INTERACTION_PROXIMITY_THRESHOLD:
                # Ensure unique pair (person_id, package_id) to avoid duplicate logging if order changes
                current_pair = tuple(sorted((p_id, pkg_id)))
                if current_pair not in pairs:
                    # Check if this specific interaction was logged very recently
                    if not any(e['event_type'] == "interaction" and 
                               e['details'].get("person_id") == p_id and 
                               e['details'].get("package_id") == pkg_id and 
                               abs((current_ts - datetime.datetime.strptime(e["timestamp"], "%Y-%m-%d %H:%M:%S.%f")).total_seconds()) < 5 # Cooldown for same pair
                               for e in reversed(event_log[-30:])):
                        
                        interaction_details = {
                            "person_id": p_id, "person_class": p_cls_name,
                            "package_id": pkg_id, "package_class": pkg_cls_name,
                            "distance": round(dist,1)
                        }
                        events_to_log.append({**event_data_template, 'event_timestamp': current_ts, 'event_type': "interaction", 
                                              'track_id': None, 'class_name': None, # Interaction is between two objects
                                              'details': interaction_details})
                        pairs.add(current_pair)
                        
                        # Draw interaction annotation (distance and line) onto the frame
                        mid_x = int((p_cent[0] + pkg_cent[0]) / 2)
                        mid_y = int((p_cent[1] + pkg_cent[1]) / 2)
                        interaction_text = f"Interaction Dist: {dist:.1f}px"
                        cv2.line(annotated_frame, p_cent, pkg_cent, (0, 255, 255), 2) # Yellow line
                        cv2.putText(annotated_frame, interaction_text, (mid_x - 50, mid_y - 10), 
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2) # Yellow text
    return events_to_log

def draw_tracked_objects_and_stats(frame_to_draw_on, trk_objs, cls_names, current_roi):
    # This function now MODIFIES frame_to_draw_on IN PLACE
    global total_frames_read_count, total_frames_processed_count, cumulative_detected_class_counts
    if current_roi: cv2.rectangle(frame_to_draw_on, (current_roi[0], current_roi[1]), (current_roi[2], current_roi[3]), (255,255,0),2); cv2.putText(frame_to_draw_on,"ROI",(current_roi[0],current_roi[1]-10),cv2.FONT_HERSHEY_SIMPLEX,0.7,(255,255,0),2)
    for o in trk_objs:
        if len(o)==7:
            x1, y1, x2, y2, tid, cid, scr = map(float, o)
            x1, y1, x2, y2, tid, cid = int(x1), int(y1), int(x2), int(y2), int(tid), int(cid)
            cname = cls_names.get(cid, "Unk")
            clr = get_color_by_id(tid)
            cv2.rectangle(frame_to_draw_on, (x1, y1), (x2, y2), clr, 2)
            lbl = f"ID:{tid} {cname} {scr:.2f}"
            (lw, lh), bl = cv2.getTextSize(lbl, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
            ly = max(lh + 5, y1 - 5)
            lx = x1
            cv2.rectangle(frame_to_draw_on, (lx, ly - lh - bl), (lx + lw, ly + bl), clr, cv2.FILLED)
            cv2.putText(frame_to_draw_on, lbl, (lx, ly), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
            if object_loitering_start_time.get(tid) and isinstance(object_loitering_start_time[tid], datetime.datetime):
                dur = (datetime.datetime.now() - object_loitering_start_time[tid]).total_seconds()
                cv2.putText(frame_to_draw_on, f"Loiter:{dur:.1f}s", (x1, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
    '''
    y_off=20; cv2.putText(frame_to_draw_on,f"Read:{total_frames_read_count}",(10,y_off),cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,255,0),2); y_off+=25; cv2.putText(frame_to_draw_on,f"Proc:{total_frames_processed_count}",(10,y_off),cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,255,0),2); y_off+=25; cv2.putText(frame_to_draw_on,"Detects:",(10,y_off),cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,255,0),2);y_off+=20
    for cid, cnt in sorted(cumulative_detected_class_counts.items()):
        cn = cls_names.get(cid, f"Cls{cid}")
        cv2.putText(frame_to_draw_on, f"- {cn}:{cnt}", (15, y_off), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        y_off += 20
        if y_off > frame_to_draw_on.shape[0] - 20:
            break
    '''
    return frame_to_draw_on # Return the modified frame

def get_color_by_id(track_id): np.random.seed(track_id); return tuple(np.random.randint(0,255,size=3).tolist())

def save_event_log_final(log_data, filepath):
    try:
        existing_log = []
        if os.path.exists(filepath):
            with open(filepath, "r", encoding="utf-8") as f_in:
                try: existing_log = json.load(f_in)
                except json.JSONDecodeError: existing_log = []
                if not isinstance(existing_log, list): existing_log = []
        with open(filepath, "w", encoding="utf-8") as f_out:
            json.dump(existing_log + log_data, f_out, indent=4, ensure_ascii=False)
        print(f"Event log ({len(log_data)} new entries) appended to: {filepath}")
    except Exception as e: print(f"ERROR saving event log: {e}")


## Main Processing Function

In [5]:
import datetime
def main():
    global event_log, track_history, object_loitering_start_time, ROI, frame_buffer, active_clip_capture_tasks
    global total_frames_read_count, total_frames_processed_count, cumulative_detected_class_counts
    
    event_log.clear(); track_history.clear(); object_loitering_start_time.clear(); ROI = None
    frame_buffer.clear(); active_clip_capture_tasks.clear()
    total_frames_read_count = 0; total_frames_processed_count = 0; cumulative_detected_class_counts.clear()

    model_path = 'best.pt'
    local_video_path = INPUT_SOURCE # Make sure this video exists or provide a new one
    output_video_path = RUN_DIR / 'output_tracked_intent_boxmot_v7.mp4'
    conf_threshold = 0.3
    max_duration_sec = None # Set to None or a large number for full video processing

    # Create output directories if they don't exist
    if ENABLE_EVENT_CLIPS: Path(EVENT_CLIP_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
    if ENABLE_EVENT_SNAPSHOTS: Path(EVENT_SNAPSHOT_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

    print(f"Loading model: {model_path}")
    if not Path(model_path).exists():
        print(f"Model {model_path} not found. Downloading yolov8n.pt.")
        try: torch.hub.download_url_to_file('https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt', model_path); print("yolov8n.pt downloaded.")
        except Exception as e: print(f"Error downloading default model: {e}. Upload manually."); return
            
    try: model = YOLO(model_path); class_names_dict = model.names; print(f"Model loaded. Classes: {class_names_dict}")
    except Exception as e: print(f"ERROR loading YOLO model: {e}"); return

    if not Path(local_video_path).exists():
        print(f"ERROR: Video {local_video_path} not found. Please upload a video named 'sample_video.mp4' or change the path."); return

    cap = cv2.VideoCapture(local_video_path)
    if not cap.isOpened(): print(f"ERROR: Cannot open video: {local_video_path}"); return

    fps = cap.get(cv2.CAP_PROP_FPS) or 30
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)); frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print(f"Video: {frame_width}x{frame_height} @ {fps:.2f} FPS")
    
    frame_buffer = deque(maxlen=int(fps * FRAME_BUFFER_DURATION_SEC))
    print(f"Annotated frame buffer size: {frame_buffer.maxlen} frames ({FRAME_BUFFER_DURATION_SEC}s at {fps:.2f} FPS)")

    if ROI_MODE == 1:
        if frame_width > 2*ROI_MARGIN_PIXELS and frame_height > 2*ROI_MARGIN_PIXELS:
            ROI = (ROI_MARGIN_PIXELS, ROI_MARGIN_PIXELS, frame_width-ROI_MARGIN_PIXELS, frame_height-ROI_MARGIN_PIXELS)
        else: ROI = (0,0,frame_width,frame_height); print("WARN: Frame too small for margin, using full frame ROI.")
    elif ROI_MODE == 0: ROI = MANUAL_ROI
    else: ROI = (0,0,frame_width,frame_height); print("WARN: Invalid ROI_MODE, using full frame ROI.")
    if ROI: print(f"Using ROI: {ROI}")
    else: print("No ROI defined (MANUAL_ROI is None and ROI_MODE is not 1 or frame too small). Processing full frame for ROI checks.")

    tracker = None; current_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    try:
        tracker_type = 'bytetrack'
        tracker_config_path_cand = TRACKER_CONFIGS / (tracker_type + '.yaml') if isinstance(TRACKER_CONFIGS, Path) and (TRACKER_CONFIGS / (tracker_type + '.yaml')).exists() else None
        if not tracker_config_path_cand:
            import boxmot; pkg_cfg_path = Path(boxmot.__file__).parent/'configs'/(tracker_type+'.yaml')
            if pkg_cfg_path.exists(): tracker_config_path_cand = pkg_cfg_path
            else: raise FileNotFoundError(f"BoxMOT config for {tracker_type} not found.")
        print(f"Using BoxMOT tracker config: {tracker_config_path_cand}")
        tracker = create_tracker(tracker_type, tracker_config_path_cand, None, current_device, False, False)
        print(f"BoxMOT {tracker_type} tracker initialized on {current_device}.")
    except Exception as e: print(f"ERROR initializing BoxMOT: {e}. Using DummyTracker."); tracker = DummyTracker()
    if tracker is None: print("CRITICAL: Tracker is None. Aborting."); return

    out_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width,frame_height))
    processing_start_time = time.time()
    print("Starting video processing...")

    while cap.isOpened():
        ret, original_frame = cap.read()
        if not ret: print("End of video or read error."); break
        total_frames_read_count += 1
        current_frame_timestamp = datetime.datetime.now()
        
        annotated_frame = original_frame.copy()

        yolo_results = model.predict(original_frame, conf=conf_threshold, verbose=False)
        detections_tensor = yolo_results[0].boxes.data
        if detections_tensor.numel() > 0:
            for cls_id in detections_tensor[:, 5].int().tolist(): cumulative_detected_class_counts[cls_id] += 1

        detections_np = np.empty((0,6))
        if isinstance(detections_tensor, torch.Tensor) and detections_tensor.numel() > 0:
            detections_np = detections_tensor.detach().cpu().numpy().astype("float32")

        tracked_dets_np = np.empty((0,7))
        if detections_np.shape[0] > 0 or not isinstance(tracker, DummyTracker):
            tracked_dets_np = tracker.update(detections_np, original_frame) 
            total_frames_processed_count +=1
        elif isinstance(tracker, DummyTracker):
            tracked_dets_np = tracker.update(None, original_frame); total_frames_processed_count +=1
        else: total_frames_processed_count +=1

        current_tracked_objects_list = []
        active_ids_this_frame = set()
        all_events_for_this_frame = []

        if tracked_dets_np.shape[0] > 0:
            for trk_data in tracked_dets_np:
                x1,y1,x2,y2,trk_id,conf,cls_id = trk_data[:7] # [x1, y1, x2, y2, track_id, score, cls_id]   # ← score 在第 6 欄
                current_tracked_objects_list.append([x1,y1,x2,y2,trk_id,cls_id,conf])
                active_ids_this_frame.add(int(trk_id))
                trk_centroid = get_centroid(trk_data); trk_class_name = class_names_dict.get(int(cls_id), "Unknown")
                track_history[int(trk_id)].append((current_frame_timestamp, trk_centroid[0], trk_centroid[1], int(cls_id), conf))
                
                # Analyze behavior for this object (e.g., loitering, ROI entry/exit)
                # Pass annotated_frame here in case analyze_behavior needs to draw (though it currently doesn't)
                behavior_events = analyze_behavior(int(trk_id), track_history[int(trk_id)], [x1,y1,x2,y2], int(cls_id), trk_class_name, current_frame_timestamp, fps, ROI, frame_width, frame_height)
                all_events_for_this_frame.extend(behavior_events)
        
        # Analyze interactions between all currently tracked objects for this frame
        # This function WILL draw on annotated_frame if interactions occur
        interaction_events = analyze_interactions_for_frame(current_tracked_objects_list, current_frame_timestamp, annotated_frame, class_names_dict, fps, frame_width, frame_height)
        all_events_for_this_frame.extend(interaction_events)
        
        # Clean up history for tracks that are no longer active
        for inactive_id in list(track_history.keys() - active_ids_this_frame):
            if inactive_id in track_history: del track_history[inactive_id]
            if inactive_id in object_loitering_start_time: del object_loitering_start_time[inactive_id]

        # Draw all general annotations (object boxes, stats, ROI) onto the annotated_frame
        # This happens AFTER interaction-specific annotations might have been drawn by analyze_interactions_for_frame
        draw_tracked_objects_and_stats(annotated_frame, current_tracked_objects_list, class_names_dict, ROI)

        # Now, log all collected events for this frame, using the fully annotated_frame for media
        for event_data_item in all_events_for_this_frame:
            log_event(event_data_item, annotated_frame_for_media=annotated_frame)

        # Add the fully ANNOTATED frame to the buffer for clip saving
        frame_buffer.append((annotated_frame.copy(), current_frame_timestamp))

        # Write the ANNOTATED frame to the output video
        out_writer.write(annotated_frame)

        # --- Handle active clip capture tasks (uses ANNOTATED frames from buffer) ---
        if ENABLE_EVENT_CLIPS:
            remaining_tasks = []
            for task in active_clip_capture_tasks:
                is_complete = False
                # Check if enough frames collected or if it's the end of the video
                if task['collected_frames']:
                    # Ensure frames are sorted by timestamp before checking end condition
                    task['collected_frames'].sort(key=lambda x: x[1])
                    last_collected_ts = task['collected_frames'][-1][1]
                    if last_collected_ts >= task['desired_clip_end_ts']:
                        is_complete = True
                
                # If processing has ended (not ret) and task has frames, consider it complete for saving
                if (not ret and task['collected_frames']) or is_complete:
                    frames_data_to_save = [f_data for f_data, ts_data in task['collected_frames'] 
                                           if ts_data >= task['desired_clip_start_ts'] and ts_data <= task['desired_clip_end_ts']]
                    if frames_data_to_save:
                         save_video_clip(frames_data_to_save, task['output_filename'], task['fps'], task['width'], task['height'])
                    # Mark as processed by not adding to remaining_tasks
                else:
                    # If not complete, keep collecting frames if current frame is within desired range
                    if current_frame_timestamp <= task['desired_clip_end_ts']:
                         # Only add if current frame is relevant to this task's time window
                         if current_frame_timestamp >= task['desired_clip_start_ts']:
                            # Check if frame already added (e.g. from pre-fill)
                            if not any(f_ts == current_frame_timestamp for _, f_ts in task['collected_frames']):
                                task['collected_frames'].append((annotated_frame.copy(), current_frame_timestamp))
                    remaining_tasks.append(task)
            active_clip_capture_tasks = remaining_tasks

        if max_duration_sec and (time.time() - processing_start_time > max_duration_sec): print(f"Max duration {max_duration_sec}s reached."); break
        if total_frames_read_count % 100 == 0: print(f"Processed {total_frames_read_count} frames... {len(active_clip_capture_tasks)} active clip tasks.")

    # After loop, process any remaining clip tasks
    if ENABLE_EVENT_CLIPS:
        for task in active_clip_capture_tasks:
            if task['collected_frames']:
                task['collected_frames'].sort(key=lambda x: x[1]) # Sort before final save
                frames_data_to_save = [f_data for f_data, ts_data in task['collected_frames'] 
                                       if ts_data >= task['desired_clip_start_ts'] and ts_data <= task['desired_clip_end_ts']]
                if frames_data_to_save:
                    save_video_clip(frames_data_to_save, task['output_filename'], task['fps'], task['width'], task['height'])
        active_clip_capture_tasks.clear()

    cap.release(); out_writer.release()
    print(f"Processing finished. Output video: {output_video_path}")
    save_event_log_final(event_log, EVENT_LOG_FILE)


## Run Main Processing

In [6]:
# Run the main processing function
# Make sure you have uploaded 'sample_video.mp4' and optionally 'yolov8n.pt' (if not auto-downloaded).
# Event clips will be saved in 'event_clips/', snapshots in 'event_snapshots/' (relative to notebook execution).
# You might need to create 'sample_video.mp4' or change 'local_video_path' in Cell 4.
main()


Loading model: best.pt
Model loaded. Classes: {0: 'package', 1: 'bag', 2: 'other_person', 3: 'delivery_worker', 4: 'food_delivery'}
Video: 720x480 @ 23.98 FPS
Annotated frame buffer size: 359 frames (15s at 23.98 FPS)
Using ROI: (10, 10, 710, 470)
Using BoxMOT tracker config: C:\Users\user\venv_tracker\Lib\site-packages\boxmot\configs\bytetrack.yaml
BoxMOT bytetrack tracker initialized on cpu.
Starting video processing...
Event snapshot saved: output\dali_cam2_0519am_otherman\20250522_091346\snapshots\roi_enter_id1_20250522_091441_278.jpg
Event snapshot saved: output\dali_cam2_0519am_otherman\20250522_091346\snapshots\roi_enter_id2_20250522_091442_878.jpg
Processed 100 frames... 2 active clip tasks.
Processed 200 frames... 2 active clip tasks.
Processed 300 frames... 2 active clip tasks.
Processed 400 frames... 2 active clip tasks.
Processed 500 frames... 2 active clip tasks.
Processed 600 frames... 2 active clip tasks.
Processed 700 frames... 2 active clip tasks.
Processed 800 frames.

## Review Event Logs, Clips, and Snapshots (Illustrative)

In [7]:
from pathlib import Path
import json
from collections import Counter, defaultdict
import pandas as pd      # ✅ 用來排版統計表

# --- Review Event Logs, Clips, and Snapshots (Enhanced) --------------------
print(f"\n--- Event Log ({EVENT_LOG_FILE}) Status ---")
object_counter   = Counter()                 # 物件類別 → 出現次數
event_counter    = Counter()                 # 事件類別 → 出現次數
obj_event_matrix = defaultdict(Counter)      # 物件類別 → (事件類別 → 次數)

if Path(EVENT_LOG_FILE).exists():
    try:
        with open(EVENT_LOG_FILE, "r", encoding="utf-8") as f:
            logged_events_content = json.load(f)

        total_events = len(logged_events_content)
        print(f"Found {total_events} events in {EVENT_LOG_FILE}.")

        # ▍統計迴圈
        for evt in logged_events_content:
            # 1. 事件類別 (可依實際欄位名稱增減備援鍵)
            evt_type = evt.get("event_type") or evt.get("event") or evt.get("type") or "<unknown>"
            event_counter[evt_type] += 1

            # 2. 物件類別 (單一字串或 list 皆可)
            raw_obj = evt.get("class_name") or evt.get("class") or evt.get("object_classes")
            obj_classes = raw_obj if isinstance(raw_obj, list) else [raw_obj or "<unknown>"]

            for cls in obj_classes:
                object_counter[cls] += 1
                obj_event_matrix[cls][evt_type] += 1

        # ▍輸出統計表 -------------------------------------------------------
        obj_df  = (pd.DataFrame(object_counter.items(), columns=["Object Class", "Count"])
                     .sort_values("Count", ascending=False))
        evt_df  = (pd.DataFrame(event_counter.items(),  columns=["Event Type",  "Count"])
                     .sort_values("Count", ascending=False))
        cross_df = (pd.DataFrame(obj_event_matrix).fillna(0).astype(int).T
                      .loc[obj_df["Object Class"]])   # 依物件出現頻次排序

        print("\n=== Object Class Distribution ===")
        print(obj_df.to_string(index=False))

        print("\n=== Event Type Distribution ===")
        print(evt_df.to_string(index=False))

        print("\n=== Object × Event Crosstab ===")
        print(cross_df.to_string())
    except Exception as e:
        print(f"Error reading event log: {e}")
else:
    print(f"Event log file {EVENT_LOG_FILE} not found.")

# --- Event Clips Status (原樣保留) -----------------------------------------
print(f"\n--- Event Clips ({EVENT_CLIP_OUTPUT_DIR}/) Status ---")
if Path(EVENT_CLIP_OUTPUT_DIR).is_dir():
    clips = list(Path(EVENT_CLIP_OUTPUT_DIR).glob("*.mp4"))
    print(f"Found {len(clips)} video clips in {EVENT_CLIP_OUTPUT_DIR}.")
else:
    print(f"Event clips directory {EVENT_CLIP_OUTPUT_DIR} not found.")

# --- Event Snapshots Status (原樣保留) --------------------------------------
print(f"\n--- Event Snapshots ({EVENT_SNAPSHOT_OUTPUT_DIR}/) Status ---")
if Path(EVENT_SNAPSHOT_OUTPUT_DIR).is_dir():
    snaps = list(Path(EVENT_SNAPSHOT_OUTPUT_DIR).glob("*.jpg"))
    print(f"Found {len(snaps)} snapshots in {EVENT_SNAPSHOT_OUTPUT_DIR}.")
else:
    print(f"Event snapshots directory {EVENT_SNAPSHOT_OUTPUT_DIR} not found.")




--- Event Log (output\dali_cam2_0519am_otherman\20250522_091346\dali_cam2_0519am_otherman_events.json) Status ---
Found 3 events in output\dali_cam2_0519am_otherman\20250522_091346\dali_cam2_0519am_otherman_events.json.

=== Object Class Distribution ===
 Object Class  Count
 other_person      2
food_delivery      1

=== Event Type Distribution ===
Event Type  Count
 roi_enter      3

=== Object × Event Crosstab ===
               roi_enter
other_person           2
food_delivery          1

--- Event Clips (output\dali_cam2_0519am_otherman\20250522_091346\clips/) Status ---
Found 3 video clips in output\dali_cam2_0519am_otherman\20250522_091346\clips.

--- Event Snapshots (output\dali_cam2_0519am_otherman\20250522_091346\snapshots/) Status ---
Found 3 snapshots in output\dali_cam2_0519am_otherman\20250522_091346\snapshots.
