In [1]:
import numpy as np
from PIL import Image
import cv2
from sklearn.neighbors import NearestNeighbors

In [2]:
def set_corr_manual():
    """
    Specify correspondences between images

    Returns
    -------
    p1 : ndarray of shape (n, 2)
        Matched keypoint locations in image 1
    p2 : ndarray of shape (n, 2)
        Matched keypoint locations in image 2
    """
    # TODO
    p1 = np.array([
        [138, 74],   # left top
        [456, 80],   # right top
        [138, 194],  # left bottom
        [456, 198]   # right bottom
    ], dtype=np.float64)
        
    p2 = np.array([
        [0, 0],         # left top
        [1200-1, 0],    # right top
        [0, 600-1],     # left bottom
        [1200-1, 600-1] # right bottom
    ], dtype=np.float64)
    
    return p1, p2

In [3]:
def compute_H(p1, p2):
    """
    Estimate the homography between images

    Parameters
    ----------
    p1 : ndarray of shape (n, 2)
        Matched keypoint locations in image 1
    p2 : ndarray of shape (n, 2)
        Matched keypoint locations in image 2

    Returns
    -------
    H : ndarray of shape (3, 3)
        The estimated homography
    """
    # TODO
    # helper function to normalize points
    def normalize_points(points):
        centroid = np.mean(points, axis=0)
        shifted = points - centroid
        scale = np.sqrt(2) / np.mean(np.linalg.norm(shifted, axis=1))
        T = np.array([
            [scale, 0, -scale*centroid[0]],
            [0, scale, -scale*centroid[1]],
            [0, 0, 1]
        ])
        points_h = np.hstack([points, np.ones((points.shape[0], 1))])
        normalized = (T @ points_h.T).T
        return normalized[:, :2], T
    
    # get the normalized points of p1 and p2
    p1_norm, T1 = normalize_points(p1)
    p2_norm, T2 = normalize_points(p2)
    N = p1.shape[0]

    # create the matrix A for DLT
    # A * h = 0
    A = []
    for i in range(N):
        x, y = p1_norm[i]
        u, v = p2_norm[i]
        A.append([x, y, 1, 0, 0, 0, -u*x, -u*y, -u])
        A.append([0, 0, 0, x, y, 1, -v*x, -v*y, -v])
    A = np.array(A)

    # SVD to solve Ah = 0
    # h is the last column of V in SVD(A)
    U, D, Vt = np.linalg.svd(A)
    h = Vt[-1, :]
    # make h as a 3x3 matrix
    H_norm = h.reshape(3,3)

    # Denormalize
    # H = T2^-1 * H_norm * T1
    # With H we can compute the homography that maps points in image 1 to image 2
    H = np.linalg.inv(T2) @ H_norm @ T1
    # make the last element of H equal to 1
    H /= H[2,2]
    return H

In [4]:
def insert_image(base_img, input_img, H):
    """
    Insert projected input_img in base_img with estimated homography

    Parameters
    ----------
    base_img : ndarray of shape (h, w, 3), base image
    input_img : ndarray of image to be projected and inserted
    H : ndarray of shape (3, 3)
        The estimated homography

    Returns
    -------
    merged_img : ndarray of shape (h, w, 3)
    """
    # TODO
    # Get the size of the base image and input image
    h_base, w_base = base_img.shape[:2]
    h_in, w_in = input_img.shape[:2]
    # Create an output image with the same size as the base image
    merged_img = base_img.copy()

    for y in range(h_base):
        for x in range(w_base):
            # Get the coordinates in the base image
            p = np.array([x, y, 1])
            # Get the corresponding coordinates in the input image
            p_trans = H @ p
            # Divide by the last element to get the homogeneous coordinates
            p_trans /= p_trans[2]
            
            # Get the x and y coordinates
            u, v = p_trans[0], p_trans[1]
            # Check if the coordinates are within the bounds of the input image
            if 0 <= u < w_in and 0 <= v < h_in:
                # Get the pixel value of u, v
                u0, v0 = int(np.floor(u)), int(np.floor(v))

                # color the pixel in the merged image with the pixel value of the input image
                # color 3 times for each R G B
                for c in range(3):  
                    val = input_img[v0, u0, c]
                    merged_img[y, x, c] = val
    # Return the merged image
    return merged_img

In [5]:
# Q 2-1

# Read images
base_img = Image.open('00002.png').convert('RGB')
input_img = Image.open('00003.png').convert('RGB')
base_img = np.array(base_img)
input_img = np.array(input_img)

# Set correspondences manually
p1, p2 = set_corr_manual()

# Estimate the homography between images
H = compute_H(p1, p2)

# Insert warped input_img to base_img
merged_img = insert_image(base_img, input_img, H)
Image.fromarray(np.uint8(merged_img)).save('output_2-1.png')




In [6]:
def match_sift(loc1, des1, loc2, des2, distance_ratio):
    """
    Find the matches of SIFT features between two images

    Parameters
    ----------
    loc1 : ndarray of shape (n1, 2)
        Keypoint locations in image 1
    des1 : ndarray of shape (n1, 128)
        SIFT descriptors of the keypoints image 1
    loc2 : ndarray of shape (n2, 2)
        Keypoint locations in image 2
    des2 : ndarray of shape (n2, 128)
        SIFT descriptors of the keypoints image 2
    distance_ratio : threshold for the ratio test

    Returns
    -------
    x1 : ndarray of shape (n, 2)
        Matched keypoint locations in image 1
    x2 : ndarray of shape (n, 2)
        Matched keypoint locations in image 2
    """
    
    # TODO
    # Use NearestNeighbors to find the nearest neighbors
    nbrs = NearestNeighbors(n_neighbors=2, algorithm='auto').fit(des2)
    distances, indices = nbrs.kneighbors(des1)

    # Apply the ratio test to find the matches
    matches = []
    for i, (d1, d2) in enumerate(distances):
        if d1 < distance_ratio * d2:
            j = indices[i, 0]
            matches.append((i, j))

    # Get the matched keypoint locations from loc1 and loc2 each
    x1 = np.array([loc1[m[0]].pt for m in matches], dtype=np.float32)  
    x2 = np.array([loc2[m[1]].pt for m in matches], dtype=np.float32)  
    
    return x1, x2

In [None]:
def compute_H_ransac(x1, x2, ransac_n_iter, ransac_thr):
    """
    Estimate the homography between images using RANSAC

    Parameters
    ----------
    x1 : ndarray of shape (n, 2)
        Matched keypoint locations in image 1
    x2 : ndarray of shape (n, 2)
        Matched keypoint locations in image 2
    ransac_n_iter : int
        Number of RANSAC iterations
    ransac_thr : float
        Error threshold for RANSAC

    Returns
    -------
    H : ndarray of shape (3, 3)
        The estimated homography
    """
    
    # TODO
    def estimate_H(p1, p2):
        # The function used to estimate the homography in compute_H
        # DLT to estimate H
        N = p1.shape[0]
        # create the matrix A for DLT
        A = []
        for i in range(N):
            x, y = p1[i]
            u, v = p2[i]
            A.append([ x,  y, 1,  0,  0, 0, -u*x, -u*y, -u])
            A.append([ 0,  0, 0,  x,  y, 1, -v*x, -v*y, -v])
        A = np.array(A)
        # SVD to solve Ah = 0
        _, _, Vt = np.linalg.svd(A)
        # h is the last column of V in SVD(A)
        H = Vt[-1].reshape(3,3)
        # divide by the last element 1
        return H / H[2,2]

    # Initialize the best homography and inliers
    best_H = None
    best_inliers = []

    # RANSAC loop
    n_pts = x1.shape[0]
    for _ in range(ransac_n_iter):
        # Part 1. Randomly select 4 points
        idx = np.random.choice(n_pts, 4, replace=False)
        # Estimate homography using the selected points
        H_candidate = estimate_H(x1[idx], x2[idx])
        
        # Part 2. Compute reprojection error
        # Project points from image 1 to image 2 using the estimated homography
        pts1_h = np.hstack([x1, np.ones((n_pts,1))]).T             # 3×N
        # Apply the homography to the points
        # H_candidate is 3x3, pts1_h is 3xN, so proj2 is 3xN
        proj2 = H_candidate @ pts1_h                               # 3×N
        # Make the last element 1
        # proj[:2] is 2xN
        proj2 = proj2[:2] / proj2[2]                               # 2×N
        # Calculate the reprojection error
        # Transpose proj2 to get N×2
        errors = np.linalg.norm(proj2.T - x2, axis=1)

        # Part 3. Calculate inlier
        # Count the number of inliers based on the error
        # np.where returns the array of elements that satisfy the condition 
        inliers = np.where(errors < ransac_thr)[0]
        # If the number of inliers is greater than the best so far, update
        if len(inliers) > len(best_inliers):
            best_inliers = inliers
            best_H = H_candidate

    H = best_H
    inlier = best_inliers
    
    return H, inlier

In [None]:
def merge_image(img_1, img_2, H):
    """
    Merge projected input_img with base_img using estimated homography.

    Parameters
    ----------
    img_1 : ndarray of shape (h, w, 3), base image
    img_2 : ndarray of shape (h, w, 3), image to be projected and merged with base image
    H : ndarray of shape (3, 3)
        The estimated homography

    Returns
    -------
    merged_img : ndarray of shape (2h, 2w, 3), base image placed at the top-left corner
    """
    
    # TODO 
    # Get the size of the base image and input image
    h1, w1 = img_1.shape[:2]
    h2, w2 = img_2.shape[:2]

    # Create an output image with the base image
    merged_img = np.zeros((2*h1, 2*w1, 3), dtype=img_1.dtype)
    merged_img[:h1, :w1] = img_1

    # Calculate the homography matrix
    for y in range(2*h1):
        for x in range(2*w1):
            # Get the coordinates in the base image
            p = np.array([x, y, 1.])
            # Get the corresponding coordinates in the input image
            u_v = H @ p
            # Divide by the last element to get the homogeneous coordinates
            u, v = u_v[0]/u_v[2], u_v[1]/u_v[2]

            if 0 <= u < w2 and 0 <= v < h2:
                u0, v0 = int(np.floor(u)), int(np.floor(v))
                
                # color the pixel in the merged image with the pixel value of the input image
                # color 3 times to color R G B
                for c in range(3):
                    val = (img_2[v0, u0, c])
                    merged_img[y, x, c] = val    
    return merged_img

In [9]:
# Q 2-2

# Hyperparmeters, feel free to modify
ransac_n_iter = 500
ransac_thr = 3
distance_ratio = 0.75

# Read images
img_1 = Image.open('00004.jpg').convert('RGB')
img_2 = Image.open('00005.jpg').convert('RGB')
img_1 = np.array(img_1)
img_2 = np.array(img_2)

# Extract SIFT features
sift = cv2.SIFT_create()
loc1, des1 = sift.detectAndCompute(img_1, None)
loc2, des2 = sift.detectAndCompute(img_2, None)

# Find the matches between two images (x1 <--> x2)
x1, x2 = match_sift(loc1, des1, loc2, des2, distance_ratio)

# Estimate the homography between images using RANSAC
H, inliner = compute_H_ransac(x1, x2, ransac_n_iter, ransac_thr)

# Warp img_2 and merge with img_1
merged_img = merge_image(img_1, img_2, H) 
Image.fromarray(np.uint8(merged_img)).save('output_2-2.png')
