In [2]:
import cv2

def preprocess_frame(frame):
    # Convert frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply thresholding to enhance edges
    _, thresholded = cv2.threshold(blurred, 50, 255, cv2.THRESH_BINARY)
    
    return thresholded

def detect_gesture(video_path, reference_path, threshold=0.1345):
    """
    Detects a gesture in a video based on a reference gesture video using feature extraction and matching.

    Args:
        video_path: Path to the test video.
        reference_path: Path to the reference gesture video.
        threshold: Minimum ratio of good matches for gesture detection (default 0.2).
    """
    # Feature extractor (SIFT)
    sift = cv2.SIFT_create()

    # Load reference gesture frame
    cap_ref = cv2.VideoCapture(reference_path)
    if not cap_ref.isOpened():
        print(f"Error: Could not open reference video at {reference_path}.")
        return

    ret, reference_frame = cap_ref.read()
    if not ret:
        print(f"Error: Could not read reference frame from {reference_path}.")
        cap_ref.release()
        return

    reference_frame = cv2.cvtColor(reference_frame, cv2.COLOR_BGR2GRAY)
    _, reference_des = sift.detectAndCompute(reference_frame, None)
    cap_ref.release()

    if reference_des is None:
        print("Error: No features detected in the reference frame.")
        return

    # Open test video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open test video.")
        return

    gesture_detected = False
    while True:
        # Read frame
        ret, frame = cap.read()
        if not ret:
            break

        # Preprocess frame
        preprocessed_frame = preprocess_frame(frame)

        # Extract features from preprocessed frame
        kp2, des2 = sift.detectAndCompute(preprocessed_frame, None)

        # Matching between features using Brute-force matcher
        bf = cv2.BFMatcher()
        matches = bf.knnMatch(reference_des, des2, k=2)
        good_matches = [m for m, n in matches if m.distance < 0.7 * n.distance]

        # Check for enough good matches and set gesture_detected flag
        gesture_detected = len(good_matches) > threshold * len(reference_des)

        # Overlay "DETECTED" text if gesture detected
        if gesture_detected:
            text_origin = (frame.shape[1] - 200, 50)  # Adjust position for top right corner
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 1
            font_thickness = 3
            font_color = (0, 255, 0)  # Green color
            line_type = cv2.LINE_AA
            cv2.putText(frame, "DETECTED", text_origin, font, font_scale, font_color, font_thickness, line_type)
            # Add black outline
            cv2.putText(frame, "DETECTED", text_origin , font, font_scale, (0, 0, 0), 0, line_type)

        # Display frame with a delay (important for visualization)
        cv2.imshow("Gesture Detection", frame)
        cv2.waitKey(100)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()

# Example usage
video_path = "skip_test2.mp4"
reference_path = "train_gesture.mp4"
detect_gesture(video_path, reference_path)
