In [4]:
import cv2 as cv
import numpy as np

# Load the query image in grayscale (the object to detect)
query_img = cv.imread('/content/r.jpg', cv.IMREAD_GRAYSCALE)
if query_img is None:
    raise ValueError("Could not load query image")

# Initialize SIFT detector with more features
sift = cv.SIFT_create(nfeatures=1000)
kp1, des1 = sift.detectAndCompute(query_img, None)

# Verify we got enough features in query image
if des1 is None or len(des1) < 10:
    raise ValueError("Query image doesn't have enough features")

# FLANN matcher setup
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)  # Lower this for faster but less accurate matching
flann = cv.FlannBasedMatcher(index_params, search_params)

# Load the video
cap = cv.VideoCapture('/content/rrr.mp4')
if not cap.isOpened():
    raise ValueError("Could not open video file")

# Get video info
frame_width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv.CAP_PROP_FPS)

# Set up video writer
fourcc = cv.VideoWriter_fourcc(*'mp4v')
out = cv.VideoWriter('/content/output.mp4', fourcc, fps, (frame_width, frame_height))

MIN_MATCH_COUNT = 10
MIN_FEATURES = 5  # Minimum features needed in frame to attempt matching

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    kp2, des2 = sift.detectAndCompute(frame_gray, None)

    # Only proceed if we have enough features in the frame
    if des2 is not None and len(des2) >= MIN_FEATURES:
        try:
            # Perform matching with error handling
            matches = flann.knnMatch(des1, des2, k=2)

            # Apply Lowe's ratio test
            good = []
            for m, n in matches:
                if m.distance < 0.7 * n.distance:
                    good.append(m)

            if len(good) > MIN_MATCH_COUNT:
                src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
                dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)

                M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 5.0)
                if M is not None:
                    h, w = query_img.shape
                    pts = np.float32([[0, 0], [0, h-1], [w-1, h-1], [w-1, 0]]).reshape(-1, 1, 2)
                    dst = cv.perspectiveTransform(pts, M)
                    frame = cv.polylines(frame, [np.int32(dst)], True, (0, 255, 0), 3, cv.LINE_AA)

                    # Add confidence text
                    cv.putText(frame, f"Matches: {len(good)}", (10, 30),
                              cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        except cv.error as e:
            print(f"Matching error: {e}")
            continue  # Skip this frame if matching fails

    # Write the processed frame
    out.write(frame)

cap.release()
out.release()
cv.destroyAllWindows()