<a href="https://colab.research.google.com/github/Sohyla03Said/feature-extraction-and-matching/blob/main/video-object-detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install opencv-python-headless

import cv2
import numpy as np

query_path = '/content/Screenshot 2025-04-19 202346.png'     # Object image
video_path = '/content/Vegetables Vocabulary Chant for Children _ Fun Kids English.mp4'     # Input video
output_path = '/content/output_video.mp4'  # Path to save output

# Load the query image
query_img_gray = cv2.imread(query_path, cv2.IMREAD_GRAYSCALE)
if query_img_gray is None:
    raise FileNotFoundError(f"Query image not found at {query_path}")
h_query, w_query = query_img_gray.shape

#  Initialize SIFT and feature matcher
sift = cv2.SIFT_create()
kp_query, des_query = sift.detectAndCompute(query_img_gray, None)

FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)

# Open the video
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise FileNotFoundError(f"Video not found or cannot be opened: {video_path}")

# Video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Set up video writer to save output
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

# Process video frame by frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    kp_frame, des_frame = sift.detectAndCompute(frame_gray, None)

    if des_frame is not None and des_query is not None and len(des_frame) >= 2 and len(des_query) >= 2:
        matches = flann.knnMatch(des_query, des_frame, k=2)

        # Apply Lowe's ratio test
        good_matches = []
        for m, n in matches:
            if m.distance < 0.7 * n.distance:
                good_matches.append(m)

        if len(good_matches) > 10:
            src_pts = np.float32([kp_query[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
            dst_pts = np.float32([kp_frame[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

            M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)

            if M is not None:
                corners = np.float32([[0, 0], [0, h_query], [w_query, h_query], [w_query, 0]]).reshape(-1, 1, 2)
                transformed_corners = cv2.perspectiveTransform(corners, M)
                cv2.polylines(frame, [np.int32(transformed_corners)], isClosed=True, color=(0, 255, 0), thickness=3)

    # Save the frame
    out.write(frame)

# Release resources
cap.release()
out.release()

print("✅ Done! Annotated video saved at:", output_path)


✅ Done! Annotated video saved at: /content/output_video.mp4
