In [1]:
import cv2
import numpy as np

from IPython.display import clear_output, Image, display
from PIL import Image as PILImage

from src.utils import display_jupyter, display_cv2, detect_initial_point
from src.kalman_filter import KalmanFilter

## Initialization

This section defines the video sources and configures the parameters used in the tracking setup.

In [2]:
# Video path
video_path = "hand_tracking.mp4"
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    raise IOError("Cannot open video file")

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

canvas = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)

# State variables
initial_point = None
tracking_started = False

## Tracking

In this section, we perform the tracking task based on predefined algorithms.

### Tracking with CSRT Tracker

In [3]:
# # Tracking using the CSRT tracker from OpenCV

# try:
    
#     # Initialize tracker as none
#     tracker = None
    
#     while True:
#         ret, frame = cap.read()
#         if not ret:
#             cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
#             continue
        
#         output_frame = frame.copy()
        
#         # If tracking is not started, look for initial fingertip to start tracking.
#         if not tracking_started:
            
#             # Detect the initial point using the MediaPipe library
#             initial_point = detect_initial_point(frame)
            
#             if initial_point is not None:
#                 tracking_started = True
#                 print("Initial point detected! Starting tracking...")
                
#                 # Initialize tracker
#                 tracker = cv2.TrackerCSRT_create()  # or cv2.TrackerKCF_create()
                
#                 # Create bounding box around initial point of 50x50 pixels
#                 # The goal of the bounding box is to create an area of interest for the tracker
#                 box_size = 50
#                 bbox = (
#                     initial_point[0] - box_size//2,
#                     initial_point[1] - box_size//2,
#                     box_size,
#                     box_size
#                 )
#                 tracker.init(frame, bbox)
#                 current_point = initial_point
        
#         if tracking_started:
#             # Update tracker
#             success, bbox = tracker.update(frame)
            
#             if success:
#                 # Get center point of bounding box for plotting
#                 current_point = (
#                     int(bbox[0] + bbox[2]//2),
#                     int(bbox[1] + bbox[3]//2)
#                 )
                
#                 cv2.circle(output_frame, current_point, 5, (0, 255, 0), -1)
#                 cv2.rectangle(output_frame, 
#                             (int(bbox[0]), int(bbox[1])), 
#                             (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])),
#                             (255, 0, 0), 2)
#                 cv2.putText(output_frame, "Tracking Active", (10, 30),
#                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
#             else:
#                 cv2.putText(output_frame, "Tracking Lost", (10, 30),
#                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
#                 tracking_started = False
#         else:
#             cv2.putText(output_frame, "Waiting for pointing gesture...", (10, 30),
#                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        
#         # Display the frame in jupyter notebook or OpenCV window
#         display_cv2(frame, output_frame)
        
#         # Add a small delay to control display speed
#         cv2.waitKey(int(1000/fps))
        
# except KeyboardInterrupt:
#     print("Interrupted by user")
# finally:
#     cap.release()

### Tracking with Kalman Filters

Tutorial Guide: https://machinelearningspace.com/2d-object-tracking-using-kalman-filter/

In [None]:
# Tracking using the Kalman Filter tracker from OpenCV

try:
    
    # Initialize tracker as none
    tracker = None
    old_frame = None
    old_gray = None
    
    # Luka-Kanade parameters
    lk_params = dict(winSize=(15, 15),
                 maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS |
                           cv2.TERM_CRITERIA_COUNT, 10, 0.03))

    
    # Define the Kalman Filter
    kf = KalmanFilter(0.1, 1, 1, 1, 0.1,0.1)
    
    while True:
        ret, frame = cap.read()
        if not ret:
            cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
            continue
        
        output_frame = frame.copy()
        
        # If tracking is not started, look for initial fingertip to start tracking.
        if not tracking_started:
            
            # Detect the initial point using the MediaPipe library
            initial_point = detect_initial_point(frame)
            
            if initial_point is not None:
                tracking_started = True
                print("Initial point detected! Starting tracking...")
                
                # Set current point for Kalman Filter
                current_point = initial_point
                
                # Set current point for Lukas-Kanade tracker
                p0 = np.array([[current_point[0], current_point[1]]], dtype=np.float32).reshape(-1, 1, 2)
            
            old_frame = frame
            #old_frame = cv2.flip(old_frame, 1)
            old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
        
        if tracking_started:
            # Get the current frame
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            
            # Get the Kalman Filter prediction
            (kalman_pred_x, kalman_pred_y) = kf.predict()
            
            # Get the measurement from Lukas-Kanade tracker for Kalman update step
            p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
            
            # If Lukas-Kanade tracker is successful, update the Kalman Filter
            if st[0][0] == 1:
                # Good point found - use it as measurement
                measurement = p1.reshape(-1, 2)
                x_meas, y_meas = measurement[0]
                
                # Update the Kalman Filter 
                (kalman_updated_x, kalman_updated_y) = kf.update([[x_meas], [y_meas]])
                
                # Plot the Kalman predicted and the updated points
                cv2.circle(output_frame, (int(kalman_pred_x), int(kalman_pred_y)), 5, (0, 255, 255), -1)  # Predicted point in yellow
                
                cv2.circle(output_frame, (int(kalman_updated_x), int(kalman_updated_y)), 5, (0, 255, 0), -1)  # Updated point in green
                
                cv2.rectangle(output_frame, 
                              (int(kalman_updated_x - 15), int(kalman_updated_y - 15)), 
                              (int(kalman_updated_x + 15), int(kalman_updated_y + 15)),
                              (255, 0, 0), 2)
                cv2.putText(output_frame, "Tracking Active", (10, 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                
                # Update the p0 for Lukas-Kanade tracker
                p0 = p1.reshape(-1, 1, 2)
                
                # Update the old frame and old gray frame
                old_frame = frame
                old_gray = frame_gray
            else:
                
                # If Lukas-Kanade tracker is not successful, use Kalman Filter prediction as previous point
                p0 = np.array([[kalman_pred_x, kalman_pred_y]], dtype=np.float32).reshape(-1, 1, 2)
                
                # Display the Kalman Filter prediction
                cv2.circle(output_frame, (int(kalman_pred_x), int(kalman_pred_y)), 5, (0, 255, 255), -1)
                cv2.rectangle(output_frame, 
                              (int(kalman_pred_x - 15), int(kalman_pred_y - 15)), 
                              (int(kalman_pred_x + 15), int(kalman_pred_y + 15)),
                              (255, 0, 0), 2)
                cv2.putText(output_frame, "Tracking Lost - Kalman Guesses only", (10, 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                tracking_started = False
        else:
            cv2.putText(output_frame, "Waiting for pointing gesture...", (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        
        # Display the frame in jupyter notebook or OpenCV window
        display_cv2(frame, output_frame)
        
        # Add a small delay to control display speed
        cv2.waitKey(int(1000/fps))
        
except KeyboardInterrupt:
    print("Interrupted by user")
finally:
    cap.release()