Question 3 a and b

In [None]:
'''
Consider an image pair from your footage where the images are separated by at least 2 seconds. Also ensure there is at least some overlap of scenes in the two images. 
Pick a pixel (super-pixel patch as discussed in class) on image 1 and a corresponding pixel ((super-pixel patch as discussed in class)) on image 2 (the pixel on image 2 that corresponds to the same object area on image 1). Compute the SIFT feature for each of these 2 patches. Compute the sum of squared difference (SSD) value between the SIFT vector for these two pixels. Use MATLAB or Python or C++ implementation -- The MATLAB code for SIFT feature extraction and matching can be downloaded from here: https://www.cs.ubc.ca/~lowe/keypoints/ (Please first read the ReadMe document in the folder to find instructions to execute the code).
Compute the Homography matrix between these two images using MATLAB or Python or C++ implementation. Compute its inverse.


Both a and b sections are implemented in this code.

'''

In [4]:
import cv2
import depthai as dai
import numpy as np

import time
from pathlib import Path
from matplotlib import pyplot as plt

In [3]:
# Start defining a pipeline
pipeline = dai.Pipeline()

# Define a source - left grayscale cameras
cam_left = pipeline.createMonoCamera()
cam_left.setBoardSocket(dai.CameraBoardSocket.CAM_B)
cam_left.setResolution(dai.MonoCameraProperties.SensorResolution.THE_480_P)

# Create outputs
xout_left = pipeline.createXLinkOut()
xout_left.setStreamName('left')
cam_left.out.link(xout_left.input)

In [None]:
## RECORDING FOR 10 SECONDS
## THE FRAMES ARE BEING STORED IN DIRECTORY "video_frames"

# Connect and start the pipeline
with dai.Device(pipeline) as device:

    # Output queue will be used to get the grayscale frames from the output defined above
    q = device.getOutputQueue(name="left", maxSize=4, blocking=False)

    # Make sure the destination path is present before starting to store the examples
    Path(f"video_frames/").mkdir(parents=True, exist_ok=True)

    # running loop for 10 secs
    ten_secs = time.time() + 10
    
    while time.time() < ten_secs:
        # Blocking call, will wait until a new data has arrived
        inSrc = q.get()  
        # Data is originally represented as a flat 1D array, it needs to be converted into HxW form
        frame = inSrc.getCvFrame()
        # Frame is transformed and ready to be shown
        cv2.imshow("left", frame)
        cv2.waitKey(1)

        cv2.imwrite(f"video_frames/{int(time.time() * 10000)}.png", frame)

    cv2.destroyAllWindows()            

In [2]:
img1 = cv2.imread('video_frames/17109832415968.png')
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)

img2 = cv2.imread('video_frames/17109832440328.png')
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)

In [11]:
#Patch selection
x1, y1 = 100, 100
x2, y2 = 150, 150
patch_size = 5

patch_img1 = img1[y1-patch_size//2:y1+patch_size//2+1, x1-patch_size//2:x1+patch_size//2+1]
patch_img2 = img2[y2-patch_size//2:y2+patch_size//2+1, x2-patch_size//2:x2+patch_size//2+1]

# Display the selected patches
cv2.imshow('Patch in Image 1', patch_img1)
cv2.imshow('Patch in Image 2', patch_img2)
cv2.waitKey(1000)
cv2.destroyAllWindows()


In [12]:
# Computing the SIFT feature for each of these 2 patches

sift = cv2.SIFT_create() 

kp_img1, desc_img1 = sift.detectAndCompute(img1, None) 
kp_img2, desc_img2 = sift.detectAndCompute(img2, None) 

bf = cv2.BFMatcher()
matches = bf.knnMatch(desc_img1, desc_img2, k=2)

good_points=[]     
for m, n in matches: 
    if(m.distance < 0.6*n.distance): 
        good_points.append(m) 

In [14]:
# Filter descriptors based on matches
matched_desc_img1 = np.array([desc_img1[m.queryIdx] for m in good_points])
matched_desc_img2 = np.array([desc_img2[m.trainIdx] for m in good_points])

# Compute the Sum of Squared Difference (SSD) between SIFT descriptors
ssd_value = np.sum((matched_desc_img1 - matched_desc_img2) ** 2)
print("SSD value:", ssd_value)


SSD value: 7142664.0


In [15]:
query_pts = np.float32([kp_img1[m.queryIdx] 
                .pt for m in good_points]).reshape(-1, 1, 2) 
 
train_pts = np.float32([kp_img2[m.trainIdx] 
                .pt for m in good_points]).reshape(-1, 1, 2) 
 
matrix, mask = cv2.findHomography(query_pts, train_pts, cv2.RANSAC, 5.0) 
 
matches_mask = mask.ravel().tolist() 

h,w = img1.shape
 
pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
 
dst = cv2.perspectiveTransform(pts, matrix)
 
homography = cv2.polylines(img2, [np.int32(dst)], True, (255, 0, 0), 3) 
 
cv2.imshow("Homography", homography) 
cv2.imshow("Img", img1) 
cv2.waitKey(1000)
cv2.destroyAllWindows()

In [16]:
# Homography matrix
matrix

array([[ 1.02542872e+00, -1.93005712e-02,  5.57186410e+01],
       [-1.24831344e-02,  1.15331166e+00, -7.46176095e+01],
       [-2.90292532e-04,  1.04463575e-04,  1.00000000e+00]])

In [17]:
img3 = cv2.drawMatches(img1, kp_img1, img2, kp_img2, good_points, None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
cv2.imshow("lines", img3)
cv2.waitKey(1000)
cv2.imwrite("homography.png", img3)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [18]:
# Compute the inverse of the homography matrix
inverse_matrix = np.linalg.inv(matrix)
print("Inverse Homography Matrix:")
print(inverse_matrix)

Inverse Homography Matrix:
[[ 9.60739661e-01  2.07860997e-02 -5.19800992e+01]
 [ 2.82519837e-02  8.61858698e-01  6.27356736e+01]
 [ 2.75944245e-04 -8.39987912e-05  9.78356973e-01]]
