In [None]:
!pip install -q gdown

In [None]:
!gdown --id 1_C2D8FZH1S_G6e7onRCykqn7vzGAQ1HS -O walk_img1.png
!gdown --id 1tOGlbqLMQ8LnbzMkRhyhCF8cYS5GxJaj -O walk_img2.png

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import imageio.v2 as imageio
import cv2

walk_img_1 = imageio.imread("/content/walk_img1.png")
walk_img_2 = imageio.imread("/content/walk_img2.png")
print(walk_img_1.shape)

plt.figure(figsize=(10,10))
plt.subplot(1,2,1)
plt.imshow(walk_img_1, cmap='gray')
plt.axis('off')

plt.subplot(1,2,2)
plt.imshow(walk_img_2, cmap='gray')
plt.axis('off')
plt.show()

In [None]:
gray_walk_img_1 = cv2.cvtColor(walk_img_1, cv2.COLOR_BGR2GRAY)
gray_walk_img_2 = cv2.cvtColor(walk_img_2, cv2.COLOR_BGR2GRAY)

# Corner detector
corners1 = cv2.goodFeaturesToTrack(gray_walk_img_1, maxCorners=150, qualityLevel=0.01, minDistance=10)
corners2 = cv2.goodFeaturesToTrack(gray_walk_img_2, maxCorners=150, qualityLevel=0.01, minDistance=10)

# Convert corner coordinates to integer
corners1 = np.round(corners1.reshape(-1, 2)).astype(int)
corners2 = np.round(corners2.reshape(-1, 2)).astype(int)

print(f"Number of detected corners on the first image = {len(corners1)}")
print(f"Number of detected corners on the second image = {len(corners2)}")

plt.figure(figsize=(10,10))
plt.subplot(1,2,1)
plt.imshow(walk_img_1, cmap='gray')
for (x, y) in corners1:
    plt.plot(x, y, 'bo', markersize=3)
plt.axis('off')

plt.subplot(1,2,2)
plt.imshow(walk_img_2, cmap='gray')
for (x, y) in corners2:
    plt.plot(x, y, 'bo', markersize=3)
plt.axis('off')
plt.show()

In [None]:
# Extract patches around each corner
def extract_patches(img, corners, patch_size = 11):
    patches = []
    valid_pts = []
    half = patch_size // 2
    h, w = img.shape
    for (x, y) in corners:
        if x - half >= 0 and x + half < w and y - half >= 0 and y + half < h:
            patch = img[y - half:y + half + 1, x - half:x + half + 1]
            patches.append(patch.astype(np.float32).flatten())
            valid_pts.append((x, y))
    return np.array(patches), np.array(valid_pts)

patches1, pts1 = extract_patches(gray_walk_img_1, corners1)
patches2, pts2 = extract_patches(gray_walk_img_2, corners2)

# Compute SSD distances between all patch pairs
ssd_matrix = np.sum((patches1[:, np.newaxis, :] - patches2[np.newaxis, :, :]) ** 2, axis=2)

# Find best matches based on minimum SSD
matches = []
for i in range(len(pts1)):
    j = np.argmin(ssd_matrix[i])
    ssd_val = ssd_matrix[i, j]
    matches.append((i, j, ssd_val))

# Keep top-N matches (lowest SSD values)
matches = sorted(matches, key=lambda x: x[2])
top_n = 50
matches = matches[:top_n]

# Visualize correspondences
concat = np.hstack((gray_walk_img_1, gray_walk_img_2))

offset = gray_walk_img_1.shape[1]
plt.figure(figsize=(12, 12))
plt.imshow(concat, cmap='gray')
for (i, j, ssd) in matches:
    x1, y1 = pts1[i]
    x2, y2 = pts2[j]
    plt.plot([x1, x2 + offset], [y1, y2], 'r-', linewidth=0.8)
    plt.plot(x1, y1, 'bo', markersize=3)
    plt.plot(x2 + offset, y2, 'bo', markersize=3)
plt.title(f"Top {top_n} correspondences of corner features using SSD")
plt.axis('off')
plt.show()


In [None]:
sift = cv2.SIFT_create()
kp1, desc1 = sift.detectAndCompute(gray_walk_img_1, None)
kp2, desc2 = sift.detectAndCompute(gray_walk_img_2, None)

print(f"Detected {len(kp1)} SIFT keypoints on the first image")
print(f"Detected {len(kp2)} SIFT keypoints on the first image")

# draw keypoints on the two images
img1_kp = cv2.drawKeypoints(gray_walk_img_1, kp1, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
img2_kp = cv2.drawKeypoints(gray_walk_img_2, kp2, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

plt.figure(figsize=(12, 12))
plt.subplot(1,2,1)
plt.imshow(cv2.cvtColor(img1_kp, cv2.COLOR_BGR2RGB))
plt.axis("off")

plt.subplot(1,2,2)
plt.imshow(cv2.cvtColor(img2_kp, cv2.COLOR_BGR2RGB))
plt.axis("off")
plt.show()

In [None]:
# Compute Euclidean distances between every descriptor pair
# Each row i corresponds to desc1[i]; each column j to desc2[j]
similarity_matrix = np.sum((desc1[:, np.newaxis, :] - desc2[np.newaxis, :, :]) ** 2, axis=2)

# Find best match (lowest Euclidean distance) in img2 for each desc in img1
sift_matches = []
for i in range(len(kp1)):
    j = np.argmin(similarity_matrix[i])
    ssd_val = similarity_matrix[i, j]
    sift_matches.append(cv2.DMatch(_queryIdx=i, _trainIdx=j, _distance=ssd_val))

# Sort sift_matches by its descriptor similarities
sift_matches = sorted(sift_matches, key=lambda x: x.distance)

# Visualize top_n matches
top_n = 50
img_matches = cv2.drawMatches(
    gray_walk_img_1, kp1, gray_walk_img_2, kp2, sift_matches[:top_n], None,
    flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
)

plt.figure(figsize=(12, 12))
plt.imshow(img_matches)
plt.title(f"Top {top_n} correspondences of SIFT features")
plt.axis("off")
plt.show()

In [None]:
!gdown --id 1Cfr634zg1J7g7E3UfepRN517lT_dKaZY -O car.avi

In [None]:
from IPython.display import HTML
from base64 import b64encode

# Path of the video
video_path = "/content/car.avi"

# Encode video to base64 for HTML display
mp4 = video_path.replace(".avi", ".mp4")
!ffmpeg -i "{video_path}" -vcodec h264 -acodec aac -y "{mp4}"  # convert to mp4

# Display
mp4_bytes = open(mp4, "rb").read()
mp4_b64 = b64encode(mp4_bytes).decode()
HTML(f'''
<video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{mp4_b64}" type="video/mp4">
</video>
''')