In [1]:
import cv2
import numpy as np

from IPython.display import clear_output, Image, display
from PIL import Image as PILImage

from src.utils import display_jupyter, display_cv2, detect_initial_point
from src.kalman_filter import KalmanFilter

## Initialization

This section defines the video sources and configures the parameters used in the tracking setup.

In [None]:
# Video path
video_path = "hand_video.mp4"
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    raise IOError("Cannot open video file")

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

canvas = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)

# State variables
initial_point = None
tracking_started = False
points = []

## Tracking

In this section, we perform the tracking task based on predefined algorithms.

### Tracking with CSRT Tracker

In [None]:
# Initialize points list and drawing canvas before the try block

try:
    tracker = None
    
    while True:
        ret, frame = cap.read()
        if not ret:
            cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
            continue
        
        frame = cv2.flip(frame, 1)
        output_frame = frame.copy()
        
        if not tracking_started:
            
            # Detect the initial point using the MediaPipe library
            initial_point = detect_initial_point(frame)
            
            if initial_point is not None:
                tracking_started = True
                print("Initial point detected! Starting tracking...")
                
                # Initialize tracker
                tracker = cv2.TrackerCSRT_create()
                
                # Create bounding box around initial point of 50x50 pixels
                box_size = 50
                bbox = (
                    initial_point[0] - box_size//2,
                    initial_point[1] - box_size//2,
                    box_size,
                    box_size
                )
                tracker.init(frame, bbox)
                current_point = initial_point
                points = [initial_point]
        
        if tracking_started:
            
            # Update tracker to get new bounding box
            success, bbox = tracker.update(frame)
            
            # If tracking is successful, get the center point of the bounding box
            if success:
                current_point = (
                    int(bbox[0] + bbox[2]//2),
                    int(bbox[1] + bbox[3]//2)
                )
                
                points.append(current_point)
                
                # Draw line on canvas
                if len(points) > 1:
                    cv2.line(canvas, points[-2], points[-1], (0, 0, 255), 3)
                
                # Combine canvas with output frame
                output_frame = cv2.addWeighted(output_frame, 1.0, canvas, 0.5, 0)
                
                cv2.circle(output_frame, current_point, 5, (0, 255, 0), -1)
                cv2.rectangle(output_frame, 
                            (int(bbox[0]), int(bbox[1])), 
                            (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])),
                            (255, 0, 0), 2)
                cv2.putText(output_frame, "Tracking Active", (10, 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            else:
                cv2.putText(output_frame, "Tracking Lost", (10, 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                
                # Reset tracking variables
                tracking_started = False
                points = []
                canvas = np.zeros_like(frame)  # Clear canvas when tracking is lost
        else:
            # Combine existing canvas with output frame
            output_frame = cv2.addWeighted(output_frame, 1.0, canvas, 0.5, 0)
            cv2.putText(output_frame, "Waiting for pointing gesture...", (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        
        display_cv2(frame, output_frame)
        cv2.waitKey(int(1000/fps))
        
except KeyboardInterrupt:
    print("Interrupted by user")
finally:
    cap.release()

### Tracking with Kalman Filters

Tutorial Guide: https://machinelearningspace.com/2d-object-tracking-using-kalman-filter/

In [3]:
# Tracking using the Kalman Filter tracker from OpenCV

try:
    
    # Initialize tracker as none
    tracker = None
    old_frame = None
    old_gray = None
    
    # Luka-Kanade parameters
    lk_params = dict(winSize=(15, 15),
                 maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS |
                           cv2.TERM_CRITERIA_COUNT, 10, 0.03))

    
    # Define the Kalman Filter
    kf = KalmanFilter(0.1, 1, 1, 1, 0.1,0.1)
    
    while True:
        ret, frame = cap.read()
        if not ret:
            cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
            continue
        
        frame = cv2.flip(frame, 1)
        output_frame = frame.copy()
        
        # If tracking is not started, look for initial fingertip to start tracking.
        if not tracking_started:
            
            # Detect the initial point using the MediaPipe library
            initial_point = detect_initial_point(frame)
            
            if initial_point is not None:
                tracking_started = True
                print("Initial point detected! Starting tracking...")
                
                # Set current point for Kalman Filter
                current_point = initial_point
                points = [initial_point]
                
                # Set current point for Lukas-Kanade tracker
                p0 = np.array([[current_point[0], current_point[1]]], dtype=np.float32).reshape(-1, 1, 2)
            
            old_frame = frame
            #old_frame = cv2.flip(old_frame, 1)
            old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
        
        if tracking_started:
            # Get the current frame
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            
            # Get the Kalman Filter prediction
            (kalman_pred_x, kalman_pred_y) = kf.predict()
            
            # Get the measurement from Lukas-Kanade tracker for Kalman update step
            p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
            
            # If Lukas-Kanade tracker is successful, update the Kalman Filter
            if st[0][0] == 1:
                # Good point found - use it as measurement
                measurement = p1.reshape(-1, 2)
                x_meas, y_meas = measurement[0]
                
                # Update the Kalman Filter 
                (kalman_updated_x, kalman_updated_y) = kf.update([[x_meas], [y_meas]])
                
                points.append((int(kalman_updated_x), int(kalman_updated_y)))
                
                # Draw line on canvas
                if len(points) > 1:
                    cv2.line(canvas, points[-2], points[-1], (0, 0, 255), 3)
                
                # Combine canvas with output frame
                output_frame = cv2.addWeighted(output_frame, 1.0, canvas, 0.5, 0)
                
                # Plot the Kalman predicted and the updated points
                cv2.circle(output_frame, (int(kalman_pred_x), int(kalman_pred_y)), 5, (0, 255, 255), -1)  # Predicted point in yellow
                
                cv2.circle(output_frame, (int(kalman_updated_x), int(kalman_updated_y)), 5, (0, 255, 0), -1)  # Updated point in green
                
                cv2.rectangle(output_frame, 
                              (int(kalman_updated_x - 15), int(kalman_updated_y - 15)), 
                              (int(kalman_updated_x + 15), int(kalman_updated_y + 15)),
                              (255, 0, 0), 2)
                cv2.putText(output_frame, "Tracking Active", (10, 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                
                # Update the p0 for Lukas-Kanade tracker
                p0 = p1.reshape(-1, 1, 2)
                
                # Update the old frame and old gray frame
                old_frame = frame
                old_gray = frame_gray
            else:
                
                # If Lukas-Kanade tracker is not successful, use Kalman Filter prediction as previous point
                p0 = np.array([[kalman_pred_x, kalman_pred_y]], dtype=np.float32).reshape(-1, 1, 2)
                
                output_frame = cv2.addWeighted(output_frame, 1.0, canvas, 0.5, 0)
                
                # Display the Kalman Filter prediction
                cv2.circle(output_frame, (int(kalman_pred_x), int(kalman_pred_y)), 5, (0, 255, 255), -1)
                cv2.rectangle(output_frame, 
                              (int(kalman_pred_x - 15), int(kalman_pred_y - 15)), 
                              (int(kalman_pred_x + 15), int(kalman_pred_y + 15)),
                              (255, 0, 0), 2)
                cv2.putText(output_frame, "Tracking Lost - Kalman Guesses only", (10, 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                
                # Reset tracking variables
                tracking_started = False
                points = []
                canvas = np.zeros_like(frame)  # Clear canvas when tracking is lost
        else:
            cv2.putText(output_frame, "Waiting for pointing gesture...", (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        
        # Display the frame in jupyter notebook or OpenCV window
        display_cv2(frame, output_frame)
        
        # Add a small delay to control display speed
        cv2.waitKey(int(1000/fps))
        
except KeyboardInterrupt:
    print("Interrupted by user")
finally:
    cap.release()

I0000 00:00:1739064915.022104 90299693 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1
W0000 00:00:1739064915.030429 90299693 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1739064915.034991 90299693 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1739064915.055277 90299899 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064915.072097 90299904 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064915.074211 90299897 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedb

Initial point detected! Starting tracking...


2025-02-09 02:35:15.503 Python[40707:90299693] +[IMKClient subclass]: chose IMKClient_Modern
2025-02-09 02:35:15.503 Python[40707:90299693] +[IMKInputSession subclass]: chose IMKInputSession_Modern
  cv2.circle(output_frame, (int(kalman_pred_x), int(kalman_pred_y)), 5, (0, 255, 255), -1)
  (int(kalman_pred_x - 15), int(kalman_pred_y - 15)),
  (int(kalman_pred_x + 15), int(kalman_pred_y + 15)),
I0000 00:00:1739064919.100204 90299693 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1
W0000 00:00:1739064919.100468 90299693 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1739064919.101372 90299693 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
W0000 00:00:1739064919.107469 90300026 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:

Initial point detected! Starting tracking...


I0000 00:00:1739064921.830522 90299693 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1
W0000 00:00:1739064921.830754 90299693 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1739064921.831592 90299693 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
W0000 00:00:1739064921.837528 90300285 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064921.841974 90300285 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064921.842349 90300287 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064921.842384 90300287 infe

Initial point detected! Starting tracking...


I0000 00:00:1739064923.422101 90299693 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1
W0000 00:00:1739064923.422341 90299693 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1739064923.423235 90299693 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
W0000 00:00:1739064923.428741 90300323 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064923.433140 90300322 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064923.433475 90300327 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064923.433508 90300327 infe

Initial point detected! Starting tracking...


I0000 00:00:1739064926.734503 90299693 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1
W0000 00:00:1739064926.734739 90299693 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1739064926.735647 90299693 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
W0000 00:00:1739064926.741402 90300508 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064926.746225 90300509 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064926.746596 90300510 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064926.746630 90300510 infe

Initial point detected! Starting tracking...


I0000 00:00:1739064927.094866 90299693 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1
W0000 00:00:1739064927.095101 90299693 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1739064927.096007 90299693 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
W0000 00:00:1739064927.101780 90300515 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064927.106145 90300513 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064927.106544 90300517 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1739064927.106581 90300517 infe

Interrupted by user
