<div align="center">
  <a href="http://www.sharif.edu/">
    <img src="https://cdn.freebiesupply.com/logos/large/2x/sharif-logo-png-transparent.png" alt="SUT Logo" width="140">
  </a>
  
  # Sharif University of Technology
  ### Electrical Engineering Department

  ## Signals and Systems
  #### *Final Project - Spring 2025*
</div>

---

<div align="center">
  <h1>
    <b>Object Tracker</b>
  </h1>
  <p>
    An object tracking system using YOLO for detection and various algorithms (KCF, CSRT, MOSSE) for tracking.
  </p>
</div>

<br>

| Professor                  |
| :-------------------------: |
| Dr. Mohammad Mehdi Mojahedian |

<br>

| Contributors              |
| :-----------------------: |
| **Amirreza Mousavi** |
| **Mahdi Falahi** |
| **Zahra Miladipour** |

---

# 0: Imports

In [28]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
from ultralytics import YOLO
import time
import torch
from scipy.optimize import linear_sum_assignment
import os

# 1: Object Detection

## Preparing Models

In [21]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"ObjectTracker using device: {device}")
model = YOLO('./yolo11n.pt').to(device)

ObjectTracker using device: cuda


## ObjectTracker Class

In [9]:
import cv2
import numpy as np
import time
from scipy.optimize import linear_sum_assignment
from collections import deque

class ObjectTracker:
    def __init__(self, model, tracker_type='KCF', detect_interval=24, conf_threshold=0.5, 
                 max_lost_frames=30, max_objects=10, 
                 use_kalman=True, track_classes=None, 
                 appearance_weight=0.6, occlusion_iou_threshold=0.2):
        
        self.model = model
        self.detect_interval = detect_interval
        self.conf_threshold = conf_threshold
        self.max_lost_frames = max_lost_frames
        self.max_objects = max_objects
        self.use_kalman = use_kalman
        self.track_classes = track_classes if track_classes is not None else []
        
        self.appearance_weight = appearance_weight
        self.occlusion_iou_threshold = occlusion_iou_threshold
        
        self.frame_idx = 0
        self.tracked_objects = []
        self.next_track_id = 0

        self.tracker_constructors = {
            'CSRT': cv2.legacy.TrackerCSRT_create,
            'KCF': cv2.legacy.TrackerKCF_create,
            'MOSSE': cv2.legacy.TrackerMOSSE_create,
            'MEDIAN_FLOW': cv2.legacy.TrackerMedianFlow_create
        }
        if tracker_type not in self.tracker_constructors:
            raise ValueError(f"Invalid tracker type: {tracker_type}. Choose from {list(self.tracker_constructors.keys())}")
        self.tracker_type = tracker_type
        print(f"--- Object Tracker Initialized (Manual Mode) ---")

    def _extract_histogram(self, frame, bbox):
        h, w = frame.shape[:2]
        x1, y1, x2, y2 = [int(c) for c in bbox]
        
        clamped_x1 = max(0, x1); clamped_y1 = max(0, y1)
        clamped_x2 = min(w, x2); clamped_y2 = min(h, y2)
        
        if clamped_x1 >= clamped_x2 or clamped_y1 >= clamped_y2: return None
            
        roi = frame[clamped_y1:clamped_y2, clamped_x1:clamped_x2]
        if roi.size == 0: return None
            
        hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
        hist = cv2.calcHist([hsv_roi], [0, 1, 2], None, [8, 4, 4], [0, 180, 0, 256, 0, 256])
        cv2.normalize(hist, hist)
        return hist.flatten()

    def process_frame(self, frame):
        self.tracked_objects = [t for t in self.tracked_objects if t['state'] == 'OCCLUDED' or t['lost_frames'] < self.max_lost_frames]
        height, width = frame.shape[:2]

        if self.use_kalman:
            self._predict_phase()

        self._apply_boundary_conditions(height, width)
        self._update_phase(frame)
        self._match_and_update_phase(frame)
        annotated_frame = self._drawing_phase(frame)

        self.frame_idx += 1
        return annotated_frame

    def _apply_boundary_conditions(self, frame_h, frame_w):
        for track in self.tracked_objects:
            x1, y1, x2, y2 = track['bbox']
            bbox_w, bbox_h = x2 - x1, y2 - y1
            
            if bbox_w > frame_w or bbox_h > frame_h:
                track['lost_frames'] = self.max_lost_frames
                continue
            
            visible_x1 = max(x1, 0); visible_y1 = max(y1, 0)
            visible_x2 = min(x2, frame_w); visible_y2 = min(y2, frame_h)
            visible_area = (visible_x2 - visible_x1) * (visible_y2 - visible_y1)
            total_area = bbox_w * bbox_h
            
            if total_area > 0 and (visible_area / total_area) < 0.5:
                track['lost_frames'] = self.max_lost_frames

    def _predict_phase(self):
        for obj in self.tracked_objects:
            obj['kf'].predict()
            predicted_state = obj['kf'].statePost
            cx, cy, w, h = predicted_state[0], predicted_state[1], predicted_state[2], predicted_state[3]
            obj['bbox'] = (int(cx - w/2), int(cy - h/2), int(cx + w/2), int(cy + h/2))

    def _update_phase(self, frame):
        for obj in self.tracked_objects:
            if obj['state'] == 'OCCLUDED': continue
            success, bbox = obj['tracker'].update(frame)
            if success:
                x1, y1, w, h = [int(v) for v in bbox]
                obj['bbox'] = (x1, y1, x1 + w, y1 + h)
                if self.use_kalman:
                    cx, cy = x1 + w/2, y1 + h/2
                    measurement = np.array([cx, cy, w, h], dtype=np.float32)
                    obj['kf'].correct(measurement)
            else:
                obj['lost_frames'] += 1

    def _match_and_update_phase(self, frame):
        if self.frame_idx % self.detect_interval != 0 or not self.tracked_objects:
            return

        detections = self.detect(frame)
        if not detections: return

        num_tracks = len(self.tracked_objects)
        num_dets = len(detections)
        cost_matrix = np.full((num_tracks, num_dets), 1e6)

        for t_idx, track in enumerate(self.tracked_objects):
            for d_idx, det in enumerate(detections):
                if track['class_name'] != det['class_name']: continue
                
                iou = self._calculate_iou([track['bbox']], [[det['x1'], det['y1'], det['x2'], det['y2']]])[0,0]
                iou_cost = 1 - iou

                det_hist = self._extract_histogram(frame, (det['x1'], det['y1'], det['x2'], det['y2']))
                if det_hist is None or not track['histogram_gallery']:
                    appearance_cost = 0.5
                else:
                    correlations = [cv2.compareHist(det_hist, track_hist, cv2.HISTCMP_CORREL) for track_hist in track['histogram_gallery']]
                    appearance_cost = 1 - max(correlations)

                cost = (self.appearance_weight * appearance_cost) + ((1 - self.appearance_weight) * iou_cost)
                cost_matrix[t_idx, d_idx] = cost

        track_indices, det_indices = linear_sum_assignment(cost_matrix)
        
        matched_track_indices = set()
        max_cost_threshold = 0.85
        for t_idx, d_idx in zip(track_indices, det_indices):
            if cost_matrix[t_idx, d_idx] < max_cost_threshold:
                self._update_matched_track(frame, self.tracked_objects[t_idx], detections[d_idx])
                matched_track_indices.add(t_idx)

        for t_idx in range(num_tracks):
            if t_idx not in matched_track_indices:
                self._handle_unmatched_track(t_idx, matched_track_indices)

    def _update_matched_track(self, frame, track, det):
        x1, y1, x2, y2 = det['x1'], det['y1'], det['x2'], det['y2']
        w, h = x2 - x1, y2 - y1
        
        track['bbox'] = (x1, y1, x2, y2)
        track['lost_frames'] = 0
        track['tracker'].init(frame, (x1, y1, w, h))
        track['state'] = 'CONFIRMED'
            
        new_hist = self._extract_histogram(frame, track['bbox'])
        if new_hist is not None:
            track['histogram_gallery'].append(new_hist)
            
        if self.use_kalman:
            cx, cy = x1 + w/2, y1 + h/2
            measurement = np.array([cx, cy, w, h], dtype=np.float32)
            track['kf'].correct(measurement)

    def _handle_unmatched_track(self, t_idx, matched_track_indices):
        track = self.tracked_objects[t_idx]
        is_occluded = False
        for m_idx in matched_track_indices:
            iou = self._calculate_iou([track['bbox']], [self.tracked_objects[m_idx]['bbox']])[0,0]
            if iou > self.occlusion_iou_threshold:
                is_occluded = True
                break
        
        if is_occluded:
            track['state'] = 'OCCLUDED'
        else:
            track['lost_frames'] += 1
            if track['state'] == 'OCCLUDED':
                track['state'] = 'CONFIRMED'

    def add_manual_track(self, frame, bbox, class_name):
        x1, y1, x2, y2 = [int(c) for c in bbox]
        w, h = x2 - x1, y2 - y1
        if w <= 0 or h <= 0: return

        new_track = {
            'id': self.next_track_id, 'class_name': class_name, 'conf': 1.0,
            'color': np.random.uniform(0, 255, 3).tolist(), 'bbox': (x1, y1, x2, y2),
            'lost_frames': 0, 'state': 'CONFIRMED', 
            'histogram_gallery': deque(maxlen=10)
        }

        tracker = self.tracker_constructors[self.tracker_type]()
        tracker.init(frame, (x1, y1, w, h))
        new_track['tracker'] = tracker

        hist = self._extract_histogram(frame, new_track['bbox'])
        if hist is not None: new_track['histogram_gallery'].append(hist)

        if self.use_kalman:
            new_kf = self._create_kalman_filter()
            new_kf.statePost = np.array([x1 + w/2, y1 + h/2, w, h, 0, 0, 0, 0], dtype=np.float32)
            new_track['kf'] = new_kf
        
        self.tracked_objects.append(new_track)
        self.next_track_id += 1
                        
    def _drawing_phase(self, frame):
        frame_copy = frame.copy()
        for obj in self.tracked_objects:
            color = (255, 165, 0) if obj['state'] == 'OCCLUDED' else obj['color']
            x1, y1, x2, y2 = [int(c) for c in obj['bbox']]
            label = f"ID-{obj['id']}"
            cv2.rectangle(frame_copy, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame_copy, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
        return frame_copy

    def detect(self, frame):
        results = self.model(frame, verbose=False)[0]
        detections = []
        for box in results.boxes:
            conf = box.conf[0].item()
            if conf > self.conf_threshold:
                class_name = self.model.names[int(box.cls[0].item())]
                if self.track_classes and class_name not in self.track_classes:
                    continue
                coords = box.xyxy[0].tolist()
                detections.append({
                    'class_name': class_name, 'x1': int(coords[0]), 'y1': int(coords[1]),
                    'x2': int(coords[2]), 'y2': int(coords[3]), 'conf': conf
                })
        return detections
    
    def _calculate_iou(self, bboxes1, bboxes2):
        bboxes1 = np.array(bboxes1)
        bboxes2 = np.array(bboxes2)
        if bboxes1.size == 0 or bboxes2.size == 0: return np.empty((len(bboxes1), len(bboxes2)))
        xA = np.maximum(bboxes1[:, 0][:, np.newaxis], bboxes2[:, 0])
        yA = np.maximum(bboxes1[:, 1][:, np.newaxis], bboxes2[:, 1])
        xB = np.minimum(bboxes1[:, 2][:, np.newaxis], bboxes2[:, 2])
        yB = np.minimum(bboxes1[:, 3][:, np.newaxis], bboxes2[:, 3])
        interArea = np.maximum(0, xB - xA) * np.maximum(0, yB - yA)
        boxAArea = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1])
        boxBArea = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1])
        iou = interArea / (boxAArea[:, np.newaxis] + boxBArea - interArea + 1e-6)
        return iou
    
    def _create_kalman_filter(self):
        kf = cv2.KalmanFilter(8, 4)
        kf.transitionMatrix = np.array([[1,0,0,0,1,0,0,0],[0,1,0,0,0,1,0,0],[0,0,1,0,0,0,1,0],[0,0,0,1,0,0,0,1],
                                     [0,0,0,0,1,0,0,0],[0,0,0,0,0,1,0,0],[0,0,0,0,0,0,1,0],[0,0,0,0,0,0,0,1]], np.float32)
        kf.measurementMatrix = np.array([[1,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0],[0,0,1,0,0,0,0,0],[0,0,0,1,0,0,0,0]], np.float32)
        kf.processNoiseCov = np.eye(8, dtype=np.float32) * 0.03
        kf.processNoiseCov[4:, 4:] *= 10
        kf.measurementNoiseCov = np.eye(4, dtype=np.float32) * 0.1
        return kf

## Sample Detection Test

In [23]:
# test_img = cv2.imread('./assets/images/0001.jpg')

# test_tracker = ObjectTracker(model)
# test_img_detections = test_tracker.detect(test_img)
# test_tracker.plot_image(test_tracker.draw_boxes(test_img, test_img_detections))

# 2: Tracking

## VideoPlayer class

In [10]:
class VideoPlayer:
    def __init__(self, source, target_fps=30, size_multiplier=1.0, window_title="Video Playback"):
        self.window_title = window_title
        self.source = source
        self.target_fps = target_fps

        if os.path.isdir(self.source):
            self.source_type = 'images'
            image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
            self.image_files = sorted([os.path.join(self.source, f) for f in os.listdir(self.source) if f.lower().endswith(image_extensions)])
            if not self.image_files: raise ValueError("Source directory contains no supported image files.")
            first_frame = cv2.imread(self.image_files[0])
            if first_frame is None: raise IOError(f"Could not read the first image: {self.image_files[0]}")
            self.frame_height, self.frame_width = first_frame.shape[:2]
            self.cap = None
            self.original_fps = 30
        else:
            self.source_type = 'video'
            self.cap = cv2.VideoCapture(self.source)
            if not self.cap.isOpened(): raise IOError(f"Could not open video file: {self.source}")
            self.frame_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            self.frame_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            self.original_fps = self.cap.get(cv2.CAP_PROP_FPS)

        if self.target_fps == 0:
            self.target_fps = self.original_fps
            print(f"Target FPS set to 0. Using original video FPS: {self.target_fps:.2f}")

        # --- New: Adaptive UI Scaling Factor ---
        self.ui_scale_factor = max(0.5, min(self.frame_height, 2200.0) / 1080.0) # Base scale on 1080p, with a minimum

        self.total_processing_time = 0.0
        self.processed_frame_count = 0
        self.state = 'INITIALIZING'
        self.selectable_detections, self.user_selections = [], []
        self.is_drawing_roi, self.show_help = False, True
        self.roi_start_point, self.roi_end_point, self.new_manual_box = None, None, None
        
        self.YOLO_CLASSES = {
            0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 
            5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light',
            10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',
            14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
            20: 'other'
        }
        
        cv2.namedWindow(self.window_title, cv2.WINDOW_NORMAL)
        cv2.resizeWindow(self.window_title, int(self.frame_width * size_multiplier), int(self.frame_height * size_multiplier))
        
        cv2.setMouseCallback(self.window_title, self._mouse_callback)
        print("--- Video Player Initialized for Interactive Tracking ---")

    def _mouse_callback(self, event, x, y, flags, param):
        if self.state != 'PAUSED_FOR_SELECTION': return

        if event == cv2.EVENT_LBUTTONDOWN:
            self.is_drawing_roi = True
            self.roi_start_point, self.roi_end_point = (x, y), (x, y)
        elif event == cv2.EVENT_MOUSEMOVE:
            if self.is_drawing_roi: self.roi_end_point = (x, y)
        elif event == cv2.EVENT_LBUTTONUP:
            if self.is_drawing_roi:
                self.is_drawing_roi = False
                if self.roi_end_point and self.roi_start_point and abs(self.roi_start_point[0] - self.roi_end_point[0]) > 5:
                    x1, y1, x2, y2 = self.roi_start_point[0], self.roi_start_point[1], self.roi_end_point[0], self.roi_end_point[1]
                    self.new_manual_box = (min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
                self.roi_start_point, self.roi_end_point = None, None
        elif event == cv2.EVENT_RBUTTONDOWN:
            removed_selection = False
            for i, sel in reversed(list(enumerate(self.user_selections))):
                bbox = sel.get('bbox') or (sel['x1'], sel['y1'], sel['x2'], sel['y2'])
                if bbox[0] < x < bbox[2] and bbox[1] < y < bbox[3]:
                    removed_item = self.user_selections.pop(i)
                    if 'x1' in removed_item: self.selectable_detections.append(removed_item)
                    removed_selection = True
                    break
            if not removed_selection:
                for i, det in reversed(list(enumerate(self.selectable_detections))):
                    if det['x1'] < x < det['x2'] and det['y1'] < y < det['y2']:
                        self.user_selections.append(self.selectable_detections.pop(i))
                        break

    def _draw_pause_menu(self, frame):
        s = self.ui_scale_factor
        # Scaled values for fonts and layout
        bg_height = int(240 * s)
        title_scale, head_scale, text_scale = 1.8 * s, 1.0 * s, 0.9 * s
        thick_main, thick_sub = max(1, int(3 * s)), max(1, int(2 * s))

        overlay = frame.copy()
        cv2.rectangle(overlay, (0, 0), (frame.shape[1], bg_height), (0, 0, 0), -1)
        frame = cv2.addWeighted(overlay, 0.7, frame, 0.3, 0)
        
        cv2.putText(frame, "PAUSED - SELECTION MODE", (int(25*s), int(60*s)), cv2.FONT_HERSHEY_TRIPLEX, title_scale, (0, 255, 255), thick_main)
        cv2.putText(frame, "Mouse Controls:", (int(25*s), int(115*s)), cv2.FONT_HERSHEY_SIMPLEX, head_scale, (255, 255, 255), thick_main)
        cv2.putText(frame, "- Left-Click & Drag: Draw a new box to track", (int(35*s), int(145*s)), cv2.FONT_HERSHEY_SIMPLEX, text_scale, (255, 255, 255), thick_sub)
        cv2.putText(frame, "- Right-Click: Select (Red) / Deselect (Green)", (int(35*s), int(170*s)), cv2.FONT_HERSHEY_SIMPLEX, text_scale, (255, 255, 255), thick_sub)
        cv2.putText(frame, "Keyboard: C: Confirm | H: Toggle Help | Space: Pause | Q: Quit", (int(25*s), int(210*s)), cv2.FONT_HERSHEY_SIMPLEX, text_scale, (255, 255, 255), thick_sub)
        return frame
    
    def _get_numeric_input(self, frame):
        s = self.ui_scale_factor
        # Scaled values for fonts and layout
        title_scale, text_scale = 1.8 * s, 1.2 * s
        thick_main, thick_sub = max(1, int(4 * s)), max(1, int(3 * s))
        y_offset_start, y_offset_inc = int(120*s), int(45*s)

        num_input = ""
        while True:
            frame_copy, overlay = frame.copy(), frame.copy()
            cv2.rectangle(overlay, (0, 0), (frame_copy.shape[1], frame_copy.shape[0]), (0, 0, 0), -1)
            frame_copy = cv2.addWeighted(overlay, 0.85, frame_copy, 0.15, 0)
            
            current_selection_id = -1
            try:
                if num_input: current_selection_id = int(num_input)
            except ValueError: pass

            cv2.putText(frame_copy, "Enter Class ID & Press Enter:", (int(50*s), int(65*s)), cv2.FONT_HERSHEY_TRIPLEX, title_scale, (0, 255, 255), thick_main)
            y_offset = y_offset_start
            for i, name in self.YOLO_CLASSES.items():
                if y_offset < frame.shape[0] - 30:
                    color = (0, 255, 0) if i == current_selection_id else (255, 255, 255)
                    thickness = thick_main if i == current_selection_id else thick_sub
                    cv2.putText(frame_copy, f"{i}: {name}", (int(50*s), y_offset), cv2.FONT_HERSHEY_SIMPLEX, text_scale, color, thickness)
                    y_offset += y_offset_inc
            
            cv2.imshow(self.window_title, frame_copy)
            key = cv2.waitKey(0)
            if key == 13:
                try:
                    if num_input and int(num_input) in self.YOLO_CLASSES: return int(num_input)
                    else: print(f"Error: Invalid ID. Please try again."); num_input = ""
                except ValueError: print("Error: Invalid input."); num_input = ""
            elif key == 8: num_input = num_input[:-1]
            elif ord('0') <= key <= ord('9'): num_input += chr(key)
            elif key == 27: return None

    def play(self, tracker):
        frame_idx = -1 # Start at -1 to handle loop logic correctly
        temp_frame = None

        while True:
            loop_start_time = time.perf_counter()

            # --- Unified Frame Loading ---
            ret, frame = False, None
            if self.state in ['INITIALIZING', 'PLAYING']:
                frame_idx += 1
                if self.source_type == 'video':
                    ret, frame = self.cap.read()
                elif self.source_type == 'images':
                    if frame_idx < len(self.image_files):
                        frame = cv2.imread(self.image_files[frame_idx])
                        ret = frame is not None
                if ret: temp_frame = frame.copy()
                else: break
            else: # Paused state
                frame = temp_frame.copy()

            # --- State Machine ---
            display_frame = frame.copy()
            if self.state == 'INITIALIZING' and frame_idx >= 1:
                self.state = 'PAUSED_FOR_SELECTION'
                self.selectable_detections = tracker.detect(display_frame)
            elif self.state == 'PLAYING':
                display_frame = tracker.process_frame(display_frame)
            elif self.state == 'PAUSED_FOR_SELECTION':
                if self.show_help: display_frame = self._draw_pause_menu(display_frame)
                for det in self.selectable_detections: cv2.rectangle(display_frame, (det['x1'], det['y1']), (det['x2'], det['y2']), (0, 0, 255), 2)
                for sel in self.user_selections:
                    bbox = sel.get('bbox') or (sel['x1'], sel['y1'], sel['x2'], sel['y2'])
                    cv2.rectangle(display_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 3)
                if self.is_drawing_roi and self.roi_start_point and self.roi_end_point:
                    cv2.rectangle(display_frame, self.roi_start_point, self.roi_end_point, (255, 255, 0), 2)
                if self.new_manual_box:
                    class_id = self._get_numeric_input(display_frame)
                    if class_id is not None:
                        self.user_selections.append({'bbox': self.new_manual_box, 'class_name': self.YOLO_CLASSES[class_id]})
                    self.new_manual_box = None
            
            # --- Live FPS and Final Display ---
            processing_time = time.perf_counter() - loop_start_time
            live_fps = 1.0 / processing_time if processing_time > 0 else float('inf')
            if self.state != 'PAUSED_FOR_SELECTION':
                self.total_processing_time += processing_time
                self.processed_frame_count += 1
            
            s = self.ui_scale_factor
            cv2.putText(display_frame, f"FPS: {live_fps:.1f}", (int(20*s), int(40*s)), cv2.FONT_HERSHEY_SIMPLEX, 1.2*s, (0, 255, 0), max(1, int(2*s)))
            cv2.imshow(self.window_title, display_frame)

            wait_ms = 1
            if self.target_fps != -1 and self.state == 'PLAYING':
                target_duration = 1.0 / self.target_fps
                if (delay_needed := target_duration - processing_time) > 0: wait_ms = int(delay_needed * 1000)
            elif self.state == 'PAUSED_FOR_SELECTION': wait_ms = 20
            
            key = cv2.waitKey(wait_ms) & 0xFF
            if key == ord('q'): break
            elif key == ord('h'): self.show_help = not self.show_help
            elif key == 32 and self.state == 'PLAYING':
                self.state = 'PAUSED_FOR_SELECTION'
                self.selectable_detections = tracker.detect(frame)
                self.user_selections = list(tracker.tracked_objects)
            elif key == ord('c') and self.state == 'PAUSED_FOR_SELECTION':
                tracker.tracked_objects, tracker.next_track_id = [], 0
                for sel in self.user_selections:
                    bbox = sel.get('bbox') or (sel['x1'], sel['y1'], sel['x2'], sel['y2'])
                    tracker.add_manual_track(temp_frame, bbox, sel['class_name'])
                self.selectable_detections, self.user_selections, self.state = [], [], 'PLAYING'

        if self.processed_frame_count > 0:
            avg_fps = self.processed_frame_count / self.total_processing_time
            print(f"\n--- Playback Finished ---\nAverage Processing FPS: {avg_fps:.2f}\n-------------------------")
        
        self.release()

    def release(self):
        print("Releasing resources...")
        if self.cap and self.cap.isOpened(): self.cap.release()
        cv2.destroyAllWindows()
        for _ in range(5): cv2.waitKey(1)

## Test Playback

In [11]:
# VIDEO_PATH = './assets/OTB100/Bird1/img/'
VIDEO_PATH = './assets/footage/person4.mp4'
MODEL_PATH = './yolo11n.pt'
TARGET_FPS = 0
WINDOW_SIZE = 1

try:
    tracker = ObjectTracker(model, 'CSRT', track_classes=['person'])
    player = VideoPlayer(
        source=VIDEO_PATH,
        target_fps=TARGET_FPS,
        size_multiplier=WINDOW_SIZE,
        window_title="High-Performance Player"
    )
    player.play(tracker)
except IOError as e:
    print(e)
except Exception as e:
    print(f"An unexpected error occurred: {e}")

--- Object Tracker Initialized (Manual Mode) ---
Target FPS set to 0. Using original video FPS: 30.00
--- Video Player Initialized for Interactive Tracking ---

--- Playback Finished ---
Average Processing FPS: 12.37
-------------------------
Releasing resources...
