# 2. Frame Extraction

In [18]:
import cv2
import os

def extract_frames(video_path, output_dir, frame_rate=1):
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Open the video file
    video = cv2.VideoCapture(video_path)
    
    if not video.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return
    
    # Get video properties
    fps = video.get(cv2.CAP_PROP_FPS)
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps
    
    print(f"Video FPS: {fps}")
    print(f"Total frames: {total_frames}")
    print(f"Duration: {duration:.2f} seconds")
    
    # Initialize frame counter
    count = 0
    frame_count = 0
    
    # Read until video is completed
    while True:
        ret, frame = video.read()
        
        # Break the loop if we have reached the end of the video
        if not ret:
            break
        
        # Save frame at the specified interval
        if count % 1 == 0:
            frame_filename = os.path.join(output_dir, f"frame_{frame_count:04d}.jpg")
            cv2.imwrite(frame_filename, frame)
            print(f"Extracted: {frame_filename}")
            frame_count += 1
        
        count += 1
    
    # Release the video capture object
    video.release()
    
    print(f"Extraction complete. {frame_count} frames extracted to {output_dir}")

def get_file_paths():
    file_paths = []

    for root, dirs, files in os.walk("videos"):
        for file in files:
            file_path = os.path.join(root, file)
            file_paths.append(file_path)

    return file_paths

if not os.path.isdir('frames'): # If it exists, it was done already so do not run
    file_paths = get_file_paths()

    for file_path in file_paths:
        extract_frames(file_path, os.path.join("frames", os.path.basename(file_path)))
else :
    print("Frames had already been extracted, skipping...")

Frames had already been extracted, skipping...


# 3. Feature Detection and Description

In [19]:
import time

#feature detection using ORB, SIFT, and BRISK
def load_images_from_all_videos(base_folder, max_images_per_video=10):
    all_images = []
    video_folders = sorted(os.listdir(base_folder))

    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp']

    for video_folder in video_folders:
        full_path = os.path.join(base_folder, video_folder)
        if not os.path.isdir(full_path):
            continue  # skip non-folder entries

        image_filenames = sorted(os.listdir(full_path))
        image_files = [f for f in image_filenames if os.path.splitext(f)[1].lower() in image_extensions]

        for filename in image_files[:max_images_per_video]:
            img_path = os.path.join(full_path, filename)
            img_color = cv2.imread(img_path)
            if img_color is None:
                print(f"Warning: Couldn't read {img_path}")
                continue
            img_gray = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
            all_images.append((video_folder + "/" + filename, img_color, img_gray))

    return all_images

#detect features using ORB, SIFT, and BRISK
def detect_features(detector, images, use_gray=True):
    results = []
    for filename, img_color, img_gray in images:
        img = img_gray if use_gray else img_color
        start_time = time.time()
        keypoints, descriptors = detector.detectAndCompute(img, None)
        time_taken = time.time() - start_time
        results.append({
            'filename': filename,
            'image': img_color,
            'keypoints': keypoints,
            'descriptors': descriptors,
            'time': time_taken
        })
    return results

#draw keypoints on images
def draw_keypoints(results, alg_name, output_dir="output_keypoints"):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    for res in results:
        img_with_kp = cv2.drawKeypoints(res['image'], res['keypoints'], None, color=(0, 255, 0))
        out_path = os.path.join(output_dir, f"{alg_name}_{res['filename'].replace('/', '_')}")
        cv2.imwrite(out_path, img_with_kp)

#analyze results
def analysis(results, alg_name):
    print(f"\n - - {alg_name} - -")
    total_kps = 0
    total_time = 0

    #print summary of results
    for res in results:
        print(f"{res['filename']}: {len(res['keypoints'])} keypoints in {res['time']:.4f}s")
        total_kps += len(res['keypoints'])
        total_time += res['time']
    if len(results) > 0:
        avg_kps = total_kps / len(results)
        avg_time = total_time / len(results)
        print(f"Average keypoints: {avg_kps:.2f} | Average Time: {avg_time:.2f}s")
    else:
        print("No images were processed.")

frame_dir = "frames"  #path to frames
images = load_images_from_all_videos(frame_dir, max_images_per_video=10)

if not images:
    print("No images found in any of the video subfolders.")
    exit()

#initialize detectors
orb = cv2.ORB_create(nfeatures=1000)
sift = cv2.SIFT_create()
brisk = cv2.BRISK_create()

#detect features
orb_results = detect_features(orb, images)
sift_results = detect_features(sift, images)
brisk_results = detect_features(brisk, images)

#save keypoint visualizations
if not os.path.exists("output_keypoints"): # Exists so no need to save
    draw_keypoints(orb_results, "ORB")
    draw_keypoints(sift_results, "SIFT")
    draw_keypoints(brisk_results, "BRISK")

#print comparison summary
analysis(orb_results, "ORB")
analysis(sift_results, "SIFT")
analysis(brisk_results, "BRISK")


 - - ORB - -
Vid1.mp4/frame_0000.jpg: 976 keypoints in 0.0047s
Vid1.mp4/frame_0001.jpg: 977 keypoints in 0.0046s
Vid1.mp4/frame_0002.jpg: 976 keypoints in 0.0043s
Vid1.mp4/frame_0003.jpg: 976 keypoints in 0.0044s
Vid1.mp4/frame_0004.jpg: 977 keypoints in 0.0044s
Vid1.mp4/frame_0005.jpg: 983 keypoints in 0.0049s
Vid1.mp4/frame_0006.jpg: 985 keypoints in 0.0044s
Vid1.mp4/frame_0007.jpg: 985 keypoints in 0.0046s
Vid1.mp4/frame_0008.jpg: 980 keypoints in 0.0049s
Vid1.mp4/frame_0009.jpg: 980 keypoints in 0.0050s
Vid2.mp4/frame_0000.jpg: 947 keypoints in 0.0043s
Vid2.mp4/frame_0001.jpg: 954 keypoints in 0.0045s
Vid2.mp4/frame_0002.jpg: 948 keypoints in 0.0049s
Vid2.mp4/frame_0003.jpg: 957 keypoints in 0.0044s
Vid2.mp4/frame_0004.jpg: 952 keypoints in 0.0044s
Vid2.mp4/frame_0005.jpg: 960 keypoints in 0.0043s
Vid2.mp4/frame_0006.jpg: 951 keypoints in 0.0046s
Vid2.mp4/frame_0007.jpg: 946 keypoints in 0.0043s
Vid2.mp4/frame_0008.jpg: 949 keypoints in 0.0042s
Vid2.mp4/frame_0009.jpg: 953 keypoin

# 4/5. Feature Matching and Outlier Rejection + Fundamental Matrix Computation

In [20]:
import matplotlib.pyplot as plt
import numpy as np

#load images from all videos
def load_image_pairs(base_folder, step=1, max_pairs_per_video=3):
    pairs = []
    video_folders = sorted(os.listdir(base_folder))

    #filter out non-directory entries
    for video_folder in video_folders:
        folder_path = os.path.join(base_folder, video_folder)
        if not os.path.isdir(folder_path):
            continue

        filenames = sorted([f for f in os.listdir(folder_path) if f.endswith((".jpg", ".jpeg", ".png"))])
        num_pairs = min(len(filenames) - step, max_pairs_per_video)

        for i in range(num_pairs):
            img1_path = os.path.join(folder_path, filenames[i])
            img2_path = os.path.join(folder_path, filenames[i + step])

            img1 = cv2.imread(img1_path, cv2.IMREAD_GRAYSCALE)
            img2 = cv2.imread(img2_path, cv2.IMREAD_GRAYSCALE)

            if img1 is not None and img2 is not None:
                pairs.append((f"{video_folder}/{filenames[i]}", f"{video_folder}/{filenames[i+step]}", img1, img2))
    return pairs

#get feature detector
def get_detector(name="SIFT"):
    if name == "ORB":
        return cv2.ORB_create(nfeatures=1000)
    elif name == "BRISK":
        return cv2.BRISK_create()
    else:
        return cv2.SIFT_create()

#get matcher based on descriptor type
def get_matcher(desc_type):
    if desc_type == "float":
        index_params = dict(algorithm=1, trees=5)
        search_params = dict(checks=50)
        return cv2.FlannBasedMatcher(index_params, search_params)
    else:
        return cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

#match features between two images
def match_features(detector, img1, img2):
    kp1, des1 = detector.detectAndCompute(img1, None)
    kp2, des2 = detector.detectAndCompute(img2, None)

    if des1 is None or des2 is None:
        return kp1, kp2, []

    #check descriptor type
    desc_type = "float" if des1.dtype == np.float32 else "binary"
    matcher = get_matcher(desc_type)

    if desc_type == "float":
        matches = matcher.knnMatch(des1, des2, k=2)
        good_matches = []
        for m, n in matches:
            if m.distance < 0.75 * n.distance:
                good_matches.append(m)
    else:
        matches = matcher.match(des1, des2)
        good_matches = sorted(matches, key=lambda x: x.distance)

    return kp1, kp2, good_matches

#ransac filter to remove outliers
def ransac_filter(kp1, kp2, matches):
    if len(matches) < 8:
        return [], None

    #extract location of good matches
    pts1 = np.float32([kp1[m.queryIdx].pt for m in matches])
    pts2 = np.float32([kp2[m.trainIdx].pt for m in matches])

    F, mask = cv2.findFundamentalMat(pts1, pts2, cv2.RANSAC)
    inliers = [m for i, m in enumerate(matches) if mask[i]]

    return inliers, F

#draw matches between two images
def draw_matches(img1, kp1, img2, kp2, matches, title, out_folder="output_matches", filename_prefix="match"):
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    #draw matches
    matched_img = cv2.drawMatches(img1, kp1, img2, kp2, matches, None, flags=cv2.DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS)

    plt.figure(figsize=(12, 6))
    plt.imshow(matched_img, cmap='gray')
    plt.title(title)
    plt.axis('off')

    #save figure with title in filename
    safe_title = title.replace(" ", "_").lower()
    out_path = os.path.join(out_folder, f"{filename_prefix}_{safe_title}.png")
    plt.savefig(out_path, bbox_inches='tight')
    plt.close()


#set parameters
frame_dir = "frames"
detector_name = "SIFT"
detector = get_detector(detector_name)

pairs = load_image_pairs(frame_dir, step=1, max_pairs_per_video=3)

#check if pairs are loaded
for fname1, fname2, img1, img2 in pairs:
    print(f"\nProcessing pair: {fname1} & {fname2}")
    kp1, kp2, matches = match_features(detector, img1, img2)

    print(f"Total matches before RANSAC: {len(matches)}")
    draw_matches(img1, kp1, img2, kp2, matches, title="Before RANSAC", filename_prefix=f"{fname1.replace('/', '_')}_vs_{fname2.replace('/', '_')}")

    inliers, F = ransac_filter(kp1, kp2, matches)
    print(f"Matches after RANSAC: {len(inliers)}")
    draw_matches(img1, kp1, img2, kp2, inliers, title="After RANSAC", filename_prefix=f"{fname1.replace('/', '_')}_vs_{fname2.replace('/', '_')}")


Processing pair: Vid1.mp4/frame_0000.jpg & Vid1.mp4/frame_0001.jpg
Total matches before RANSAC: 240
Matches after RANSAC: 231

Processing pair: Vid1.mp4/frame_0001.jpg & Vid1.mp4/frame_0002.jpg
Total matches before RANSAC: 232
Matches after RANSAC: 229

Processing pair: Vid1.mp4/frame_0002.jpg & Vid1.mp4/frame_0003.jpg
Total matches before RANSAC: 246
Matches after RANSAC: 236

Processing pair: Vid2.mp4/frame_0000.jpg & Vid2.mp4/frame_0001.jpg
Total matches before RANSAC: 154
Matches after RANSAC: 150

Processing pair: Vid2.mp4/frame_0001.jpg & Vid2.mp4/frame_0002.jpg
Total matches before RANSAC: 155
Matches after RANSAC: 151

Processing pair: Vid2.mp4/frame_0002.jpg & Vid2.mp4/frame_0003.jpg
Total matches before RANSAC: 158
Matches after RANSAC: 152

Processing pair: Vid3.mp4/frame_0000.jpg & Vid3.mp4/frame_0001.jpg
Total matches before RANSAC: 245
Matches after RANSAC: 242

Processing pair: Vid3.mp4/frame_0001.jpg & Vid3.mp4/frame_0002.jpg
Total matches before RANSAC: 262
Matches af