<div align="center">
  <a href="http://www.sharif.edu/">
    <img src="https://cdn.freebiesupply.com/logos/large/2x/sharif-logo-png-transparent.png" alt="SUT Logo" width="140">
  </a>
  
  # Sharif University of Technology
  ### Electrical Engineering Department

  ## Signals and Systems
  #### *Final Project - Spring 2025*
</div>

---

<div align="center">
  <h1>
    <b>Object Tracker</b>
  </h1>
  <p>
    An object tracking system using YOLO for detection and various algorithms (KCF, CSRT, MOSSE) for tracking.
  </p>
</div>

<br>

| Professor                  |
| :-------------------------: |
| Dr. Mohammad Mehdi Mojahedian |

<br>

| Contributors              |
| :-----------------------: |
| **Amirreza Mousavi** |
| **Mahdi Falahi** |
| **Zahra Miladipour** |

---

# 0: Imports

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
from ultralytics import YOLO
import time
import torch
from scipy.optimize import linear_sum_assignment

# 1: Object Detection

## Preparing Models

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"ObjectTracker using device: {device}")
model = YOLO('./yolo11n.pt').to(device)

## ObjectTracker Class

In [None]:
class ObjectTracker:
    def __init__(self, model, tracker_type='KCF', detect_interval=48, conf_threshold=0.5):

        self.model = model
        self.detect_interval = detect_interval
        self.conf_threshold = conf_threshold
        self.frame_idx = 0
        self.tracked_objects = []

        self.tracker_constructors = {
            'CSRT': cv2.legacy.TrackerCSRT_create,
            'KCF': cv2.legacy.TrackerKCF_create,
            'MOSSE': cv2.legacy.TrackerMOSSE_create,
            'MEDIAN_FLOW': cv2.legacy.TrackerMedianFlow_create
        }
        if tracker_type not in self.tracker_constructors:
            raise ValueError(f"Invalid tracker type: {tracker_type}. Choose from {list(self.tracker_constructors.keys())}")
        self.tracker_type = tracker_type
        print(f"Using tracker: {self.tracker_type}")

    def process_frame(self, frame, iou_threshold=0.7, max_lost_frames=5, max_objects=1):
        height, width = frame.shape[:2]

        # Lazily initialize a tracker ID counter
        if not hasattr(self, 'next_track_id'):
            self.next_track_id = 0
            # print(frame.shape)

        # --- PHASE 1: PREDICT with Kalman Filter ---
        # Predict the new state for each track based on its motion model
        for obj in self.tracked_objects:
            obj['kf'].predict()
            # Update bbox to the KF's prediction. This is our guess before we get new data.
            predicted_state = obj['kf'].statePost
            cx, cy, w, h = predicted_state[0], predicted_state[1], predicted_state[2], predicted_state[3]
            obj['bbox'] = (int(cx - w/2), int(cy - h/2), int(cx + w/2), int(cy + h/2))

        for obj in self.tracked_objects:
            x1, y1, x2, y2 = obj['bbox']
            # Check if fully out of frame
            if x2 < 0 or y2 < 0 or x1 > width or y1 > height:
                obj['lost_frames'] = max_lost_frames  # Force removal in cleanup
            # Or, minimal overlap: compute intersection area with frame
            elif max(0, min(x2, width) - max(x1, 0)) * max(0, min(y2, height) - max(y1, 0)) < 0.1 * (x2 - x1) * (y2 - y1):
                obj['lost_frames'] += 1  # Gradual loss for partial exits
            # Clip bbox to frame (optional, for drawing)
            obj['bbox'] = (max(0, x1), max(0, y1), min(width, x2), min(height, y2))

        # --- PHASE 2: UPDATE with Lightweight Tracker ---
        lost_track_detected = False
        for obj in self.tracked_objects:
            success, bbox = obj['tracker'].update(frame)
            if success:
                # Measurement is successful. Correct the KF with the tracker's output.
                x1, y1, w, h = [int(v) for v in bbox]
                cx, cy = x1 + w/2, y1 + h/2
                measurement = np.array([cx, cy, w, h], dtype=np.float32)
                obj['kf'].correct(measurement)
                obj['lost_frames'] = 0 # Reset lost counter
            else:
                # Tracker failed. Increment the lost counter.
                obj['lost_frames'] += 1
                lost_track_detected = True

        # --- PHASE 3: CLEANUP & DETECT ---
        # Remove tracks that have been lost for too long
        survived_tracks = [t for t in self.tracked_objects if t['lost_frames'] < max_lost_frames]
        
        # Trigger detector if it's the first frame or a track was lost (even temporarily) or periodic
        if self.frame_idx == 0 or lost_track_detected or (self.frame_idx % self.detect_interval == 0):
            detections = self.detect(frame)
            
            # Associate detections with survived tracks
            if detections:
                tracked_bboxes = [t['bbox'] for t in survived_tracks]
                detected_bboxes = [[d['x1'], d['y1'], d['x2'], d['y2']] for d in detections]
                
                iou_matrix = self._calculate_iou(tracked_bboxes, detected_bboxes)
                matched_pairs, _, unmatched_detections = self._apply_matching(iou_matrix, iou_threshold)

                # Update matched tracks with detector's more accurate data
                for t_idx, d_idx in matched_pairs:
                    track = survived_tracks[t_idx]
                    det = detections[d_idx]
                    x1, y1, x2, y2 = det['x1'], det['y1'], det['x2'], det['y2']
                    w, h = x2 - x1, y2 - y1
                    cx, cy = x1 + w/2, y1 + h/2
                    
                    # Correct the KF with the accurate detector measurement
                    measurement = np.array([cx, cy, w, h], dtype=np.float32)
                    track['kf'].correct(measurement)
                    
                    # Re-initialize the lightweight tracker to prevent drift
                    track['tracker'].init(frame, (x1, y1, w, h))
                    track['lost_frames'] = 0 # Reset lost counter as we found it
                
                # Create new tracks for unmatched detections
                for d_idx in unmatched_detections:
                    det = detections[d_idx]
                    x1, y1, x2, y2 = det['x1'], det['y1'], det['x2'], det['y2']
                    w, h = x2-x1, y2-y1
                    
                    new_kf = self._create_kalman_filter()
                    new_kf.statePost = np.array([x1+w/2, y1+h/2, w, h, 0, 0, 0, 0], dtype=np.float32)
                    
                    tracker = self.tracker_constructors[self.tracker_type]()
                    tracker.init(frame, (x1, y1, w, h))
                    
                    survived_tracks.append({
                        'id': self.next_track_id, 'kf': new_kf, 'tracker': tracker,
                        'class_name': det['class_name'], 'conf': det['conf'],
                        'color': np.random.uniform(0, 255, 3).tolist(), 'bbox': (x1, y1, x2, y2),
                        'lost_frames': 0
                    })
                    self.next_track_id += 1
        
        self.tracked_objects = survived_tracks[:min(max_objects, len(survived_tracks))]

        # --- PHASE 4: DRAWING ---
        # Final bboxes are taken from the corrected KF state for smoothness
        objects_to_draw = []
        for obj in self.tracked_objects:
            # Get the smoothed bbox from the Kalman Filter's state
            state = obj['kf'].statePost
            cx, cy, w, h = state[0], state[1], state[2], state[3]
            x1, y1, x2, y2 = int(cx-w/2), int(cy-h/2), int(cx+w/2), int(cy+h/2)
            
            objects_to_draw.append({
                'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2,
                'class_name': f"ID-{obj['id']} {obj['class_name']}",
                'conf': obj.get('conf', 1.0), 'color': obj['color']
            })
        
        annotated_frame = self.draw_boxes(frame, objects_to_draw)

        self.frame_idx += 1
        return annotated_frame
    
    def _calculate_iou(self, bboxes1, bboxes2):
        """
        Calculates the Intersection over Union (IoU) matrix between two sets of boxes.
        - bboxes1: A NumPy array of shape (N, 4) for the first set of boxes.
        - bboxes2: A NumPy array of shape (M, 4) for the second set of boxes.
        Returns a NumPy array of shape (N, M) with the IoU scores.
        """
        # Ensure we have NumPy arrays
        bboxes1 = np.array(bboxes1)
        bboxes2 = np.array(bboxes2)

        # Return empty if either input is empty
        if bboxes1.size == 0 or bboxes2.size == 0:
            return np.empty((len(bboxes1), len(bboxes2)))

        # Determine the coordinates of the intersection rectangles
        xA = np.maximum(bboxes1[:, 0][:, np.newaxis], bboxes2[:, 0])
        yA = np.maximum(bboxes1[:, 1][:, np.newaxis], bboxes2[:, 1])
        xB = np.minimum(bboxes1[:, 2][:, np.newaxis], bboxes2[:, 2])
        yB = np.minimum(bboxes1[:, 3][:, np.newaxis], bboxes2[:, 3])

        # Compute the area of intersection
        interArea = np.maximum(0, xB - xA) * np.maximum(0, yB - yA)

        # Compute the area of both sets of bounding boxes
        boxAArea = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1])
        boxBArea = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1])

        # Compute the IoU, adding a small epsilon to avoid division by zero
        iou = interArea / (boxAArea[:, np.newaxis] + boxBArea - interArea + 1e-6)
        
        return iou
    
    def _create_kalman_filter(self):
        """
        Creates a new Kalman Filter configured for tracking bounding boxes.
        The state is [cx, cy, w, h, vx, vy, vw, vh] - 8 variables.
        The measurement is [cx, cy, w, h] - 4 variables.
        """
        kf = cv2.KalmanFilter(8, 4)
        # State transition matrix (F) - constant velocity model
        kf.transitionMatrix = np.array([
            [1, 0, 0, 0, 1, 0, 0, 0],
            [0, 1, 0, 0, 0, 1, 0, 0],
            [0, 0, 1, 0, 0, 0, 1, 0],
            [0, 0, 0, 1, 0, 0, 0, 1],
            [0, 0, 0, 0, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 0, 0, 0, 1]], np.float32)
        # Measurement matrix (H) - we only measure position and size
        kf.measurementMatrix = np.array([
            [1, 0, 0, 0, 0, 0, 0, 0],
            [0, 1, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 1, 0, 0, 0, 0]], np.float32)
        
        # Process noise covariance (Q) - accounts for uncertainty in the model
        # Tuned for more uncertainty in velocity
        kf.processNoiseCov = np.eye(8, dtype=np.float32) * 0.03
        kf.processNoiseCov[4:, 4:] *= 10
        
        # Measurement noise covariance (R) - accounts for uncertainty in the measurement
        kf.measurementNoiseCov = np.eye(4, dtype=np.float32) * 0.1
        
        return kf

    def _apply_matching(self, iou_matrix, iou_threshold):
        """
        Performs optimal matching between tracks and detections using the
        Hungarian algorithm.
        """
        # Use Hungarian algorithm for optimal assignment. We want to maximize
        # IoU, so we use (1 - IoU) as the cost for the assignment problem.
        cost_matrix = 1 - iou_matrix
        track_indices, detection_indices = linear_sum_assignment(cost_matrix)
        
        matched_pairs = []
        unmatched_track_indices = set(range(iou_matrix.shape[0]))
        unmatched_detection_indices = set(range(iou_matrix.shape[1]))
        
        # Filter out matches that are below the IoU threshold
        for t_idx, d_idx in zip(track_indices, detection_indices):
            if iou_matrix[t_idx, d_idx] >= iou_threshold:
                matched_pairs.append((t_idx, d_idx))
                unmatched_track_indices.discard(t_idx)
                unmatched_detection_indices.discard(d_idx)
                
        return matched_pairs, list(unmatched_track_indices), list(unmatched_detection_indices)

    def detect(self, frame):
        results = self.model(frame, verbose=False)[0]
        detections = []
        for box in results.boxes:
            conf = box.conf[0].item()
            if conf > self.conf_threshold:
                class_name = self.model.names[int(box.cls[0].item())]
                coords = box.xyxy[0].tolist()
                detections.append({
                    'class_name': class_name,
                    'x1': int(coords[0]), 'y1': int(coords[1]),
                    'x2': int(coords[2]), 'y2': int(coords[3]),
                    'conf': conf
                })
        return detections

    def draw_boxes(self, frame, objects, default_color='random'):
        frame_copy = frame.copy()

        for obj in objects:
            x1, y1, x2, y2 = obj['x1'], obj['y1'], obj['x2'], obj['y2']
            label = f'{obj['class_name']} {obj['conf']:.2f}'
            if 'color' in obj:
                box_color = obj['color']
            else:
                box_color = np.random.uniform(0, 255, 3).tolist() if default_color == 'random' else default_color

            box_w = x2 - x1
            font_scale = max(0.5, box_w / 150)
            font_thickness = max(1, int(box_w / 100))

            font = cv2.FONT_HERSHEY_SIMPLEX
            (tw, th), bl = cv2.getTextSize(label, font, font_scale, font_thickness)


            # Adaptive text color
            brightness_vect = np.array([0.114, 0.587, 0.299])
            brightness = np.dot(box_color, brightness_vect)
            text_color = (0,0,0) if brightness > 128 else (255,255,255)

            cv2.rectangle(frame_copy, (x1, y1 - th - 8), (x1 + tw, y1), box_color, -1)
            cv2.rectangle(frame_copy, (x1, y1), (x2, y2), box_color, 3)
            cv2.putText(frame_copy, label, (x1, y1 - 4), font, font_scale, text_color, font_thickness, cv2.LINE_AA)
            
        return frame_copy
    
    def plot_image(self, frame, size_mult=1.0, frame_title=False, axis=False):
        h, w = frame.shape[:2]
        base_figsize = (w / 100, h / 100)
        fig_w, fig_h = base_figsize[0]*size_mult, base_figsize[1]*size_mult
        plt.figure(figsize=(fig_w, fig_h))
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        plt.imshow(frame_rgb)
        plt.axis(axis)
        if frame_title != False:
            plt.title(frame_title)
        plt.show()

## Sample Detection Test

In [None]:
# test_img = cv2.imread('./assets/images/0001.jpg')

# test_tracker = ObjectTracker(model)
# test_img_detections = test_tracker.detect(test_img)
# test_tracker.plot_image(test_tracker.draw_boxes(test_img, test_img_detections))

# 2: Tracking

## VideoPlayer class

In [None]:
class VideoPlayer:
    def __init__(self, source, size_multiplier=1.0, playback_speed=1.0, window_title="Video Playback"):
        self.cap = cv2.VideoCapture(source)
        self.window_title = window_title
        
        # Get video properties from the underlying stream
        self.fps = self.cap.get(cv2.CAP_PROP_FPS)
        self.frame_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.frame_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.total_frames = self.cap.get(cv2.CAP_PROP_FRAME_COUNT)
        
        # Calculate target delay based on original FPS and desired playback speed
        self.target_delay_ms = (1000 / self.fps) / playback_speed if self.fps > 0 else 33

        # Create and resize the display window
        cv2.namedWindow(self.window_title, cv2.WINDOW_NORMAL)
        cv2.resizeWindow(
            self.window_title,
            int(self.frame_width * size_multiplier),
            int(self.frame_height * size_multiplier)
        )
        print("--- Video Player Initialized ---")
        print(f"  Resolution: {self.frame_width}x{self.frame_height}")
        print(f"  Original FPS: {self.fps:.2f}")
        print(f"  Total Frames: {self.total_frames}")
        print(f"  Playback Speed: {playback_speed}x")
        print(f"  Target Delay: {self.target_delay_ms:.2f} ms")
        print("--------------------------------")

    def play(self, tracker):
        print("Starting playback... Press 'q' to quit.")
        last_time = time.time()
        
        while True:
            start_time = time.perf_counter()
            
            ret, frame = self.cap.read()
            
            if not ret:
                print("Video stream ended.")
                break

            # Process the object tracking
            processed_frame = tracker.process_frame(frame)

            # Adding FPS Annotation
            fps = 1 / (time.time() - last_time)
            last_time = time.time()
            cv2.putText(processed_frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            cv2.imshow(self.window_title, processed_frame)

            # Calculate the actual delay needed to maintain the target playback speed
            processing_time_ms = (time.perf_counter() - start_time) * 1000
            real_delay_ms = int(self.target_delay_ms - processing_time_ms)
            
            # Exit if 'q' is pressed
            wait_key = cv2.waitKey(max(1, real_delay_ms))
            if wait_key & 0xFF == ord('q'):
                print("Playback stopped by user.")
                break
            
        self.release()

    def release(self):
        """Stops the reader thread and closes all OpenCV windows."""
        print("Releasing resources...")
        self.cap.release()
        cv2.destroyAllWindows()
        # Add a small delay to ensure windows close properly on all systems
        for _ in range(5):
            cv2.waitKey(1)

## Test Playback

In [None]:
# You can change these parameters
VIDEO_PATH = './assets/footage/person4.mp4' # Make sure this path is correct
MODEL_PATH = './yolo11n.pt'
PLAYBACK_SPEED = 1.5 # Play at 1.5x speed
WINDOW_SIZE = 0.5   # Display window at 75% of original size

try:
    tracker = ObjectTracker(model, 'CSRT')
    player = VideoPlayer(
        source=VIDEO_PATH,
        playback_speed=PLAYBACK_SPEED,
        size_multiplier=WINDOW_SIZE,
        window_title="High-Performance Player"
    )
    player.play(tracker)
except IOError as e:
    print(e)
except Exception as e:
    print(f"An unexpected error occurred: {e}")