## **Part 1: Homography-based planar object detection**

In [None]:
import os
import cv2
import copy
import itertools
import numpy as np
import time
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [40, 10] # figure size in inches

def draw_images(imgs):
    if len(imgs) > 1:
        fig, axs = plt.subplots(1, len(imgs))
        for idx, img in enumerate(imgs):
            axs[idx].axis("off")
            axs[idx].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    else:
        fig, ax = plt.subplots()
        ax.axis("off")
        ax.imshow(cv2.cvtColor(imgs[0], cv2.COLOR_BGR2RGB))
    plt.show()
    
filenames = ["images/pkmn.png", "images/bus.png"]

In [None]:
# Draw images
imgs = [cv2.imread(f) for f in filenames]
draw_images(imgs)

#### Detect keypoints and extract local invariant descriptors from the two input images.

Puntos detectados con MSER y descritos con SIFT:

In [None]:
import time
start_time = time.time()

mser = cv2.MSER_create(delta=5, min_area=60, max_area=14400)
extractor = cv2.SIFT_create()

keypoints_mser = []
descriptors_mser = []

for i in range(len(imgs)):
    gray = cv2.cvtColor(imgs[i], cv2.COLOR_BGR2GRAY)
    gray_eq = cv2.equalizeHist(gray)
    
    kp_mser = mser.detect(gray_eq, None)
    kp_final, des = extractor.compute(imgs[i], kp_mser)
    
    keypoints_mser.append(kp_final)
    descriptors_mser.append(des)

detection_time = time.time() - start_time
print(f"Detección (MSER) y Descripción (SIFT) completada en: {detection_time:.4f} seg")

if len(keypoints_mser) >= 2 and descriptors_mser[0] is not None:
    print(f"Keypoints Modelo: {len(keypoints_mser[0])}, Keypoints Escena: {len(keypoints_mser[1])}")

# Draw key points
_imgs = copy.deepcopy(imgs)
for idx in range(len(_imgs)):
    cv2.drawKeypoints(_imgs[idx], keypoints_mser[idx], _imgs[idx], (0, 255, 0), flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
draw_images(_imgs)

Descripción con ORB:

In [None]:
import time
start_time = time.time()

detector = cv2.ORB_create(nfeatures=30000) 
keypoints_orb = []
descriptors_orb = []

for i in range(len(imgs)):
    kp, des = detector.detectAndCompute(imgs[i], None)
    keypoints_orb.append(kp)
    descriptors_orb.append(des)

detection_time = time.time() - start_time
print(f"Detección y Descripción (ORB) completada en: {detection_time:.4f} seg")
print(f"Keypoints Modelo: {len(keypoints_orb[0])}, Keypoints Escena: {len(keypoints_orb[1])}")

# Draw key points
_imgs = copy.deepcopy(imgs)
for idx in range(len(_imgs)):
    cv2.drawKeypoints(_imgs[idx], keypoints_orb[idx], _imgs[idx], (0, 255, 0), flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
draw_images(_imgs)

Detección y descripción con SIFT:

In [None]:
import time
start_time = time.time()

detector = cv2.SIFT_create()
keypoints_sift = []
descriptors_sift = []

for i in range(len(imgs)):
    kp, des = detector.detectAndCompute(imgs[i], None)
    keypoints_sift.append(kp)
    descriptors_sift.append(des)

detection_time = time.time() - start_time
print(f"Detección y Descripción (SIFT) completada en: {detection_time:.4f} seg")
print(f"Keypoints Modelo: {len(keypoints_sift[0])}, Keypoints Escena: {len(keypoints_sift[1])}")

# Draw key points
_imgs = copy.deepcopy(imgs)
for idx in range(2):
    cv2.drawKeypoints(_imgs[idx], keypoints_sift[idx], _imgs[idx], (0, 255, 0), flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
draw_images(_imgs)

#### Match the descriptors between the two images.

Matching con FLANN (KD-Tree) de los puntos encontrados con MSER:

In [None]:
start_time = time.time()

FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)

matcher = cv2.FlannBasedMatcher(index_params, search_params)
knn_matches_mser = matcher.knnMatch(descriptors_mser[0], descriptors_mser[1], k=2)

match_time = time.time() - start_time
print(f"Matching completado en: {match_time:.4f} seg. Matches buenos encontrados: {len(knn_matches_mser)}")

# Draw all matches
img = cv2.drawMatchesKnn(imgs[0], keypoints_mser[0], imgs[1], keypoints_mser[1], knn_matches_mser, None, flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
draw_images([img])

Matching con FLANN (LSH) de los puntos descritos con ORB:

In [None]:
start_time = time.time()

FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1) 
search_params = dict(checks=50)

matcher = cv2.FlannBasedMatcher(index_params, search_params)
knn_matches_orb = matcher.knnMatch(descriptors_orb[0], descriptors_orb[1], k=2)

match_time = time.time() - start_time
print(f"Matching ORB completado en: {match_time:.4f} seg. Matches buenos: {len(knn_matches_orb)}")

# Draw all matches
img_matches = cv2.drawMatchesKnn(imgs[0], keypoints_orb[0], imgs[1], keypoints_orb[1], knn_matches_orb, None, flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
draw_images([img_matches])


Matching con FLANN (KD-Tree) de los puntos detectados y descritos con SIFT:

In [None]:
start_time = time.time()

FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)

matcher = cv2.FlannBasedMatcher(index_params, search_params)
knn_matches_sift = matcher.knnMatch(descriptors_sift[0], descriptors_sift[1], k=2)

match_time = time.time() - start_time
print(f"Matching completado en: {match_time:.4f} seg. Matches buenos encontrados: {len(knn_matches_sift)}")

# Draw all matches
img = cv2.drawMatchesKnn(imgs[0], keypoints_sift[0], imgs[1], keypoints_sift[1], knn_matches_sift, None, flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
draw_images([img])

Volvemos a hacer matching pero usando fuerza bruta:

In [None]:
# MSER
start_time = time.time()

matcher = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False)
knn_matches_mser_bf = matcher.knnMatch(descriptors_mser[0], descriptors_mser[1], k=2)

match_time = time.time() - start_time
print(f"Matching MSER (Brute Force) completado en: {match_time:.4f} seg. Matches encontrados: {len(knn_matches_mser_bf)}")

img = cv2.drawMatchesKnn(
    imgs[0], keypoints_mser[0], 
    imgs[1], keypoints_mser[1], 
    knn_matches_mser_bf, 
    None, 
    flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT
)

draw_images([img])

In [None]:
# ORB
start_time = time.time()

matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)
knn_matches_orb_bf = matcher.knnMatch(descriptors_orb[0], descriptors_orb[1], k=2)

match_time = time.time() - start_time

print(f"Matching ORB (Brute Force) completado en: {match_time:.4f} seg. Matches encontrados: {len(knn_matches_orb_bf)}")

img_matches = cv2.drawMatchesKnn(
    imgs[0], keypoints_orb[0], 
    imgs[1], keypoints_orb[1], 
    knn_matches_orb_bf, 
    None, 
    flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT
)
    
draw_images([img_matches])

In [None]:
# SIFT
start_time = time.time()

matcher = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False)
knn_matches_sift_bf = matcher.knnMatch(descriptors_sift[0], descriptors_sift[1], k=2)

match_time = time.time() - start_time
print(f"Matching (Brute Force) completado en: {match_time:.4f} seg. Matches encontrados: {len(knn_matches_sift_bf)}")

img = cv2.drawMatchesKnn(
    imgs[0], keypoints_sift[0], 
    imgs[1], keypoints_sift[1], 
    knn_matches_sift_bf, 
    None, 
    flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT
)

draw_images([img])

Ahora hacemos Ratio Test para eliminar matches incorrectos:

In [None]:
# MSER
good_matches_mser = []
for m, n in knn_matches_mser:
    if m.distance < 0.75 * n.distance:
        good_matches_mser.append(m)

img = cv2.drawMatches(imgs[0], keypoints_mser[0], imgs[1], keypoints_mser[1], good_matches_mser, None, flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
draw_images([img])

In [None]:
# ORB
good_matches_orb = []
for m, n in knn_matches_orb:
    if m.distance < 0.75 * n.distance:
        good_matches_orb.append(m)

img = cv2.drawMatches(imgs[0], keypoints_orb[0], imgs[1], keypoints_orb[1], good_matches_orb, None, flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
draw_images([img])

In [None]:
# SIFT
good_matches_sift = []
for m, n in knn_matches_sift:
    if m.distance < 0.75 * n.distance:
        good_matches_sift.append(m)

img = cv2.drawMatches(imgs[0], keypoints_sift[0], imgs[1], keypoints_sift[1], good_matches_sift, None, flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
draw_images([img])

#### Use the RANSAC algorithm to estimate a homography matrix H using the matched feature vectors and draw correct matches.

In [None]:
# MSER
if len(good_matches_mser) > 4: 
    src_pts = np.float32([keypoints_mser[0][m.queryIdx].pt for m in good_matches_mser]).reshape(-1, 1, 2)
    dst_pts = np.float32([keypoints_mser[1][m.trainIdx].pt for m in good_matches_mser]).reshape(-1, 1, 2)

    H_mser, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)

    # Draw correct matches
    draw_params = dict(matchColor=(0,255,0), singlePointColor=None, matchesMask=mask.ravel().tolist(), flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
    img_inliers = cv2.drawMatches(imgs[0], keypoints_mser[0], imgs[1], keypoints_mser[1], good_matches_mser, None, matchesThickness=3, **draw_params)
    draw_images([img_inliers])
    
else:
    print("No hay suficientes matches para calcular la homografía.")
    H_mser = None

In [None]:
# ORB
if len(good_matches_orb) > 4:
    src_pts = np.float32([keypoints_orb[0][m.queryIdx].pt for m in good_matches_orb]).reshape(-1, 1, 2)
    dst_pts = np.float32([keypoints_orb[1][m.trainIdx].pt for m in good_matches_orb]).reshape(-1, 1, 2)

    H_orb, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)

    # Draw correct matches
    draw_params = dict(matchColor=(0,255,0), singlePointColor=None, matchesMask=mask.ravel().tolist(), flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
    img_inliers = cv2.drawMatches(imgs[0], keypoints_orb[0], imgs[1], keypoints_orb[1], good_matches_orb, None, matchesThickness=3, **draw_params)
    draw_images([img_inliers])

else:
    print("No hay suficientes matches para calcular la homografía.")
    H_orb = None

In [None]:
# SIFT
if len(good_matches_sift) > 4: 
    src_pts = np.float32([keypoints_sift[0][m.queryIdx].pt for m in good_matches_sift]).reshape(-1, 1, 2)
    dst_pts = np.float32([keypoints_sift[1][m.trainIdx].pt for m in good_matches_sift]).reshape(-1, 1, 2)

    H_sift, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)

    # Draw correct matches
    draw_params = dict(matchColor=(0,255,0), singlePointColor=None, matchesMask=mask.ravel().tolist(), flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
    img_inliers = cv2.drawMatches(imgs[0], keypoints_sift[0], imgs[1], keypoints_sift[1], good_matches_sift, None, matchesThickness=3, **draw_params)
    draw_images([img_inliers])
    
else:
    print("No hay suficientes matches para calcular la homografía.")
    H_sift = None

#### Once camera resectioning is done from an estimated homography, this information is used to insert another image, with the correct perspective, appearing to be part of the original scene.

In [None]:
# SIFT
if H_sift is not None:
    img_replace = cv2.imread("images/ayto.png")
    h_model, w_model = imgs[0].shape[:2]
    
    img_replace_resized = cv2.resize(img_replace, (w_model, h_model))
    
    h_scene, w_scene = imgs[1].shape[:2]
    proj_img = cv2.warpPerspective(img_replace_resized, H_sift, (w_scene, h_scene))
    
    mask_model = np.ones((h_model, w_model), dtype=np.uint8) * 255
    mask_warped = cv2.warpPerspective(mask_model, H_sift, (w_scene, h_scene))
    
    scene_img = imgs[1].copy()
    
    mask_warped_inv = cv2.bitwise_not(mask_warped)
    scene_bg = cv2.bitwise_and(scene_img, scene_img, mask=mask_warped_inv)
    
    overlay_fg = cv2.bitwise_and(proj_img, proj_img, mask=mask_warped)
    
    overlay = cv2.add(scene_bg, overlay_fg)

    draw_images([imgs[1], proj_img, overlay])
else:
    print("No se pudo realizar el overlay por falta de homografía.")
