Extracting frames from the videos

In [None]:
import cv2
import os

def extract_frames(video_path, skip_frames):
    # it reads video from the specified path
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print("Error opening video file")
        return
    
    video_dir = os.path.dirname(video_path)
    
    frame_idx = 0 
    saved_frame_idx = 0  
    while cap.isOpened():
        ret, frame = cap.read()
        
        # frame_idx is multiple of skip_frames which means we are at the frame that we wanna save
        if ret and frame_idx % skip_frames == 0:
            # naming the images like this : image0, image1 ...etc that's why we are using saved_frame_idx
            frame_path = os.path.join(video_dir, f"output_frame_{saved_frame_idx:04d}.png")
            cv2.imwrite(frame_path, frame)
            saved_frame_idx += 1
        
        if not ret:
            break
        
        frame_idx += 1
    
    # When everything done, release the video capture object
    cap.release()

video_path = 'videos/physics/VID_20240407_163112.mp4'  # Make sure to adjust this path
skip_frames = 10  # Adjust this based on your video and how much overlap you want
extract_frames(video_path, skip_frames)


Creating panoramic dataset

In [None]:
import cv2
import os

def create_panorama(image_folder, save_folder):
    images = []
    #i used sorted to make sure the images will be uploaded in the correct order
    for filename in sorted(os.listdir(image_folder)):
        img = cv2.imread(os.path.join(image_folder, filename))
        if img is not None:
            images.append(img)
    

    if len(images) < 2:
        print("Need at least two images to create a panorama.")
        return
    
    stitcher = cv2.Stitcher_create()
    # 1- Initialization
    # 2- Feature detection -> SIFT
    # 3- Feature matching -> FLANN
    # 4- Estimate camera parameters with enough images -> homography estimation
    # 5- Image warping and alignment ->  perspective transformation
    # 6- Blending to smooth them -> Multi-band blending
    status, panorama = stitcher.stitch(images)
    
    # if stitching successful
    if status == cv2.Stitcher_OK:
        cv2.imshow("Panorama", panorama)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        
        if not os.path.exists(save_folder):
            os.makedirs(save_folder)

        save_path = os.path.join(save_folder, 'panorama.jpg')
        cv2.imwrite(save_path, panorama)
        print(f"Panorama saved to {save_path}")
    else:
        print("Error during stitching: ", status)

image_folder = "videos/physics"
save_folder = "data/Place4"
create_panorama(image_folder, save_folder)


Cropping to remove stitching errors

In [None]:
import cv2
import os

def crop_and_save_image(image_path):

    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image {image_path}")
        return


    h, w = image.shape[:2]

    # if image is larg enough
    if h > 40 and w > 40:
        cropped_image = image[130:h-130, 130:w-130]
        save_path = os.path.join(os.path.dirname(image_path), 'cropped_' + os.path.basename(image_path))
        cv2.imwrite(save_path, cropped_image)
        print(f"Cropped image saved to {save_path}")
    else:
        print("Image is too small to crop 20 pixels from each side.")

image_path = 'data/Place4/panorama.jpg'  
crop_and_save_image(image_path)


SIFT file for all 4 places

In [None]:
import os
import cv2
import numpy as np

sift = cv2.SIFT_create()

dataset_path = 'data'

places_features = {}
for place in os.listdir(dataset_path):
    place_path = os.path.join(dataset_path, place)
    if not os.path.isdir(place_path):
        continue

    images = [img for img in os.listdir(place_path) if img.endswith(('.jpg', '.png', '.jpeg'))]
    descriptors_list = []

    for image_name in images:
        image_path = os.path.join(place_path, image_name)
        image = cv2.imread(image_path)
        # Bcz SIFT works with grayscal images
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        _, descriptors = sift.detectAndCompute(gray_image, None)
        if descriptors is not None:
            descriptors_list.append(descriptors)

    
    if descriptors_list:
        #this vstack to make sure that descriptors of each place are saved together as a single array, then we store this array in places_features
        combined_descriptors = np.vstack(descriptors_list)
        places_features[place] = combined_descriptors

np.savez('saved_sift_features.npz', **places_features)


In this part we can add : Detectron2 to classify images wether they are gonna be used for SIFT or for map reconstruction

Find most similar place according to SIFT

In [None]:
import cv2
import numpy as np
import json
import folium
import os
from collections import Counter
from IPython.display import display

def load_place_metadata(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        return json.load(file)

def get_best_match_coordinates(best_match_place, places_metadata):
    for place in places_metadata:
        if place['name'] == best_match_place:
            return place['coordinates']
    return None

def find_best_match(query_image_path, data):
    query_image = cv2.imread(query_image_path)
    if query_image is None:
        raise ValueError(f"Cannot read image at {query_image_path}")
    query_gray = cv2.cvtColor(query_image, cv2.COLOR_BGR2GRAY)
    sift = cv2.SIFT_create()
    _, query_descriptors = sift.detectAndCompute(query_gray, None)
    # 1- KD-tree as algorithms it uses nearest neighbor search 
    # 2- search for 2 NN (k=2) for each descriptor 
    # 3- distance to the nearest neighbor is less than 75% of the distance to the second nearest neighbor -> it's a good match
    # 4- place with the highest number of good matches is most similar place
    flann = cv2.FlannBasedMatcher({'algorithm': 1, 'trees': 5}, {'checks': 50})
    max_matches = 0
    best_match_place = None
    for place_name in data.files:
        place_descriptors = data[place_name]
        matches = flann.knnMatch(query_descriptors, place_descriptors, k=2)
        good_matches = [m for m, n in matches if m.distance < 0.75 * n.distance]
        if len(good_matches) > max_matches:
            max_matches = len(good_matches)
            best_match_place = place_name
    return best_match_place

if __name__ == "__main__":
    query_folder = 'Query/math/sift'
    data = np.load('saved_sift_features.npz', allow_pickle=True)
    places_metadata = load_place_metadata('places_metadata.json')
    results = Counter()

    for query_image_name in os.listdir(query_folder):
        query_image_path = os.path.join(query_folder, query_image_name)
        try:
            best_match_place = find_best_match(query_image_path, data)
            # save how many times each place was identified as a match across all query images
            results[best_match_place] += 1
            print(f"The most similar place for image {query_image_name} is {best_match_place}")
        except ValueError as e:
            print(e)

    if results:
        # most frequently occurring place in the results
        most_common_place, _ = results.most_common(1)[0]
        print(f"According to the voting, the most similar place is {most_common_place}")
        best_match_coordinates = get_best_match_coordinates(most_common_place, places_metadata)
        if best_match_coordinates:
            m = folium.Map(location=[best_match_coordinates['latitude'], best_match_coordinates['longitude']], zoom_start=15)
            folium.Marker(
                location=[best_match_coordinates['latitude'], best_match_coordinates['longitude']],
                popup=f"{most_common_place}",
                icon=folium.Icon(color='green')
            ).add_to(m)
            display(m)
        else:
            print("Failed to find coordinates for the most common match.")
    else:
        print("No similar places were found for any images.")
