<div align="center">
  <a href="http://www.sharif.edu/">
    <img src="https://cdn.freebiesupply.com/logos/large/2x/sharif-logo-png-transparent.png" alt="SUT Logo" width="140">
  </a>
  
  # Sharif University of Technology
  ### Electrical Engineering Department

  ## Signals and Systems
  #### *Final Project - Spring 2025*
</div>

---

<div align="center">
  <h1>
    <b>Object Tracker</b>
  </h1>
  <p>
    An object tracking system using YOLO for detection and various algorithms (KCF, CSRT, MOSSE) for tracking.
  </p>
</div>

<br>

| Professor                  |
| :-------------------------: |
| Dr. Mohammad Mehdi Mojahedian |

<br>

| Contributors              |
| :-----------------------: |
| **Amirreza Mousavi** |
| **Mahdi Falahi** |
| **Zahra Miladipour** |


## 1: Preparing The Materials


In [35]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
from ultralytics import YOLO
import time

1.1 : Calculating HOG ( return the hog of the image)
 

In [49]:
def hog_scaling (image):
    img = cv2.cvtColor(image ,cv2.COLOR_BGR2GRAY)
    filter = cv2.HOGDescriptor()
    result = filter.compute(img)
    return result

In [50]:
def hog_channel(image):
    img = cv2.cvtColor(image , cv2.COLOR_BGR2GRAY)
    win_size = (image.shape[1], image.shape[0])
    filter = cv2.HOGDescriptor(win_size , (16 ,16 ) , (8,8) , (8,8) , 9)
    result = filter.compute(img)
    height =  (win_size[1] - 16) // 8 + 1    # 8 is the block strid(x) we can change that consider the trade off of time _ accuracy
    width = (win_size[0] - 16) // 8 + 1   # 8 is the block strid(y) we can change that consider the trade off of time _ accuracy
    features_per_block = 2 * 2 * 9
    hog_features = result.reshape((height, width, features_per_block))
    return cv2.resize(hog_features, (win_size[0] // 8, win_size[1] // 8))


1.2 : Checking The Scale (return the scale of the image in the current frame)

In [37]:
def scaled_check (image ,h, source_hog):
    w , h = image.shape()
    n = np.arange(0.9 , 1.2 , 0.05)
    source = np.fft.fft2(source_hog)
    h_fft = np.fft.fft2(h)
    x = h_fft * source
    scale = 1
    min = 500
    for coefficient in n :
        new_img = cv2.resize(image , coefficient * w , coefficient * h)
        new_fft = np.fft.fft2(new_img)
        new_result = new_fft * h_fft
        diff = new_result - x
        sum = np.sum(diff ** 2)
        if (sum < min ):
            min = sum
            scale = coefficient 
    return scale    

1.3 : Prediction The Next Frame's Center ( Kalman Filter )

In [38]:
def kalman_prediction( F , X_k_1 , P_k_1 , Q_k):
    x_k = np.dot(F , X_k_1)
    p_k = np.dot( F , np.dot(P_k_1 , F.T)) + Q_k
    return x_k , p_k

In [39]:
def kalman_updating(x_k , p_k , H_k , z_k , R_k):
    k_1 = np.dot(np.dot(H_k , p_k) , H_k.T) + R_k
    k_2 = np.dot(p_k , H_k.T)
    K = np.dot(k_2 , np.linalg.inv(k_1))
    P_k_new = p_k - np.dot(np.dot(K , H_k) , p_k)
    x_k_new = x_k + np.dot(K , (z_k - np.dot(H_k , x_k)))
    return x_k_new , P_k_new

1.4 : Updating Method

In [40]:
def filter_updating(H_new , H_old , alpha):
    result = alpha * H_new + (1-alpha) * H_old
    return result

1.5 : Finding The Channels

In [41]:
def extract_channels(image):
    colors = np.array([
    [0.00, 0.00, 0.00], [45.37, -4.33, -33.43], [43.08, 17.51, 37.53],
    [53.59, 0.00, 0.00], [47.31, -45.33, 41.35], [65.75, 71.45, 63.32],
    [76.08, 22.25, -21.46], [32.30, 79.19, -107.86], [52.23, 75.43, 37.36],
    [100.00, 0.00, 0.00], [92.13, -16.53, 93.35]
], dtype=np.float32)
    image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2Lab)
    h, w, _ = image_lab.shape
    num_colors = colors.shape[0]
    
    new = image_lab.reshape(-1, 3).astype(np.float32)
    distances = np.sum((new[:, np.newaxis, :] - colors[np.newaxis, :, :]) ** 2, axis=2)
    closest_color_indices = np.argmin(distances, axis=1)
    
    cn_features_flat = np.zeros((new.shape[0], num_colors), dtype=np.float32)
    cn_features_flat[np.arange(new.shape[0]), closest_color_indices] = 1
    
    cn_features = cn_features_flat.reshape(h, w, num_colors)
    return cn_features

1.6 : Teaching The Filters

In [48]:
def teaching ( f , g , lambda_trust):
    X = np.fft.fft2(f , axes = (0 ,1 ))
    G = np.fft.fft2(g)
    G1 = np.expand_dims(G , axis = 2)
    num = np.dot(X , G1)
    denom = np.sum(np.dot(np.conj(X) , X) , axis =2) + lambda_trust
    denomf = np.expand_dims( denom , axis =2)
    H = num / denomf 
    return H

## 2 : Main Detector

2.1 : Uploading Video

In [52]:
cap = cv2.VideoCapture('person1.mp4')
model = YOLO('yolo11n.pt')

2.1 : The Main Part