In [16]:
import cv2
import numpy as np

def rotate_image(image, angle):
    """Rotate the given image by the given angle."""
    (h, w) = image.shape[:2]
    center = (w / 2, h / 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated_image = cv2.warpAffine(image, M, (w, h))
    return rotated_image

def find_image_in_video(video_path, input_image_path):
    """Find the input image in the given video."""
    # Read input image
    input_image = cv2.imread(input_image_path, cv2.IMREAD_GRAYSCALE)

    # Read video
    video_capture = cv2.VideoCapture(video_path)
    fps = video_capture.get(cv2.CAP_PROP_FPS)
    frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))

    # Define the method for template matching
    method = cv2.TM_CCOEFF_NORMED

    # Define the threshold for matching
    threshold = 0.5

    # Iterate through each frame of the video
    for i in range(frame_count):
        # Capture frame-by-frame
        ret, frame = video_capture.read()
        if not ret:
            break

        # Convert frame to grayscale
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Rotate the input image and its mirror for matching
        for angle in [0, 90, 180, 270]:
            rotated_input_image = rotate_image(input_image, angle)
            rotated_input_image = cv2.resize(rotated_input_image, (gray_frame.shape[1], gray_frame.shape[0]))

            rotated_input_image_flipped = np.flip(rotated_input_image, 1)

            # Match template
            res = cv2.matchTemplate(gray_frame, rotated_input_image, method)
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)

            res_flipped = cv2.matchTemplate(gray_frame, rotated_input_image_flipped, method)
            min_val_flipped, max_val_flipped, min_loc_flipped, max_loc_flipped = cv2.minMaxLoc(res_flipped)

            # Check if match found
            if max_val > threshold:
                match_time = i / fps
                print(f"Match found at time: {match_time:.2f} seconds")

            if max_val_flipped > threshold:
                match_time_flipped = i / fps
                print(f"Match (flipped) found at time: {match_time_flipped:.2f} seconds")

    video_capture.release()

# Example usage
video_path = 'input_video/test_video.mp4'
input_image_path = 'input_images/test2.jpg'
find_image_in_video(video_path, input_image_path)


In [2]:
import cv2
import numpy as np

def find_image_in_video(video_path, input_image_path):
    # Read input image
    input_image = cv2.imread(input_image_path, cv2.IMREAD_GRAYSCALE)

    # Read video
    video_capture = cv2.VideoCapture(video_path)
    fps = video_capture.get(cv2.CAP_PROP_FPS)
    frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))

    # Iterate through each frame of the video
    for i in range(frame_count):
        # Capture frame-by-frame
        ret, frame = video_capture.read()
        if not ret:
            break

        # Convert frame to grayscale
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Iterate over different scales of the input image
        for scale in np.linspace(0.2, 1.0, 20)[::-1]:
            resized_input_image = cv2.resize(input_image, (int(input_image.shape[1] * scale), int(input_image.shape[0] * scale)))

            # Check if resized input image is smaller or equal to frame size
            if gray_frame.shape[0] < resized_input_image.shape[0] or gray_frame.shape[1] < resized_input_image.shape[1]:
                continue

            # Match template
            result = cv2.matchTemplate(gray_frame, resized_input_image, cv2.TM_CCOEFF_NORMED)
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

            # Define a threshold for matching
            threshold = 0.51

            # Check if match found
            if max_val > threshold:
                match_time = i / fps
                print(f"Match found at time: {match_time:.2f} seconds")

    video_capture.release()

# Example usage
video_path = 'input_video/test_video.mp4'
input_image_path = 'input_images/test1.jpg'
find_image_in_video(video_path, input_image_path)


Match found at time: 5.68 seconds
Match found at time: 5.72 seconds
Match found at time: 9.57 seconds
Match found at time: 9.61 seconds
Match found at time: 15.73 seconds
Match found at time: 24.67 seconds
Match found at time: 38.45 seconds
Match found at time: 38.49 seconds
Match found at time: 38.53 seconds
Match found at time: 38.57 seconds
Match found at time: 38.61 seconds
Match found at time: 39.72 seconds
Match found at time: 39.76 seconds
Match found at time: 39.80 seconds
Match found at time: 44.05 seconds
Match found at time: 51.09 seconds
Match found at time: 51.13 seconds
Match found at time: 51.36 seconds
Match found at time: 51.48 seconds
Match found at time: 51.52 seconds
Match found at time: 51.56 seconds
Match found at time: 51.60 seconds
Match found at time: 51.64 seconds
Match found at time: 51.68 seconds
Match found at time: 51.72 seconds
Match found at time: 51.76 seconds
Match found at time: 51.80 seconds
Match found at time: 51.84 seconds
Match found at time: 51.

In [None]:
import cv2
import streamlit as st

def main():
    st.title("SIFT Matching")

    st.subheader("Upload Input Image:")
    input_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
    input_image = cv2.imread(input_image, cv2.IMREAD_GRAYSCALE)
    st.subheader("Upload Video:")
    video_file = st.file_uploader("Choose a video...", type=["mp4"])
    
    cap = cv2.VideoCapture(video_file)

    sift = cv2.SIFT_create()
    
    keypoints_input, descriptors_input = sift.detectAndCompute(input_image, None)
    
    bf = cv2.BFMatcher()
    
    occurrences = 0
    occurrence_start = 0
    occurrence_duration = 0
    prev_matches = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("End of video reached.")
            break
        
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        keypoints_frame, descriptors_frame = sift.detectAndCompute(frame_gray, None)
        
        matches = bf.knnMatch(descriptors_input, descriptors_frame, k=2)
        
        good_matches = []
        for m, n in matches:
            if m.distance < 0.75 * n.distance:
                good_matches.append(m)
      
        if len(good_matches) >= 6:
            if not prev_matches:
                occurrence_start = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000
                occurrences += 1
            
            prev_matches = good_matches
            occurrence_duration = (cap.get(cv2.CAP_PROP_POS_MSEC) / 1000) - occurrence_start
        else:
            if prev_matches:
                print(f"Occurrence {occurrences}: Start time: {occurrence_start:.2f}s, Duration: {occurrence_duration:.2f}s")
                prev_matches = []
        
    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


In [4]:
import cv2

print("Loading input image...")
input_image = cv2.imread('input_images/test2.jpg', cv2.IMREAD_GRAYSCALE)

print("Loading video...")
cap = cv2.VideoCapture('input_video/test_video.mp4')

sift = cv2.SIFT_create()

keypoints_input, descriptors_input = sift.detectAndCompute(input_image, None)

bf = cv2.BFMatcher()

occurrences = 0
occurrence_start = 0
occurrence_duration = 0
prev_matches = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video reached.")
        break
    
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    keypoints_frame, descriptors_frame = sift.detectAndCompute(frame_gray, None)
    
    matches = bf.knnMatch(descriptors_input, descriptors_frame, k=2)
    
    good_matches = []
    for m, n in matches:
        if m.distance < 0.75 * n.distance:
            good_matches.append(m)
  
    if len(good_matches) >= 6:
        if not prev_matches:
            occurrence_start = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000
            occurrences += 1
        
        prev_matches = good_matches
        occurrence_duration = (cap.get(cv2.CAP_PROP_POS_MSEC) / 1000) - occurrence_start
    else:
        if prev_matches:
            print(f"Occurrence {occurrences}: Start time: {occurrence_start:.2f}s, Duration: {occurrence_duration:.2f}s")
            prev_matches = []
    
cap.release()
cv2.destroyAllWindows()

Loading input image...
Loading video...
Occurrence 1: Start time: 0.00s, Duration: 3.64s
Occurrence 2: Start time: 29.76s, Duration: 5.08s
End of video reached.
