In [58]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation

import cv2
import os

## Load Dataset and Setup Pipeline

In [59]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            images.append(img)
    return images

In [60]:
def feature_extraction_set(images):
    sift = cv2.SIFT_create()

    kp, des = [], []
    for im in images:
        kp_tmp, des_tmp = sift.detectAndCompute(im, None) # This assumes the extraction method to be from the CV2 library
        kp.append(kp_tmp)
        des.append(des_tmp)
    return kp, des # Can't turn them into a np array since their shape can be inhomogeneous

In [61]:
def feature_matching_set(kp, des):
    # Initialize FLANN matching
    FLANN_INDEX_KDTREE = 0
    index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
    search_params = dict(checks=50)
    flann = cv2.FlannBasedMatcher(index_params, search_params)

    matches = {} # Dict for easier access to each match
    for i in range(len(kp)):
        for j in range(i+1, len(kp)): # Only match each image with the rest, we don't need the full matrix
            matches_tmp = flann.knnMatch(des[i], des[j], k=2)

            # Lowe's ratio test
            good_matches = [m for m, n in matches_tmp if m.distance < 0.7 * n.distance]

            if len(good_matches) >= 4:
                # RANSAC to find homography and get inlier's mask
                pts1 = np.float32([kp[i][m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
                pts2 = np.float32([kp[j][m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
                _, mask = cv2.findHomography(pts1, pts2, cv2.RANSAC, 5.0)

                inliers = [good_matches[k] for k in range(len(good_matches)) if mask[k]==1]
                matches[(i,j)] = inliers
            else:
                matches[(i,j)] = []
    
    return matches

In [62]:
def normalize(pts):
    x_mean = np.mean(pts[:, 0])
    y_mean = np.mean(pts[:, 1])
    sigma = np.mean(np.sqrt((pts[:, 0] - x_mean) ** 2 + (pts[:, 1] - y_mean) ** 2))
    M = np.sqrt(2) / sigma
    T = np.array([
        [M, 0, -M * x_mean],
        [0, M, -M * y_mean],
        [0, 0, 1]
    ])
    return T

def eight_point_algorithm(pts1, pts2):

    pts1_homo = np.vstack((pts1, np.ones(pts1.shape[1]))).T
    pts2_homo = np.vstack((pts2, np.ones(pts2.shape[1]))).T

    # Normalization
    T = normalize(pts1_homo)
    T_prime = normalize(pts2_homo)


    pts1_homo = (T @ pts1_homo.T).T
    pts2_homo = (T_prime @ pts2_homo.T).T

    # x2.T*F*x1=0
    # A*f=0, f is F flattened into a 1D array
    

    # Create A
    A = np.zeros((pts1.shape[1], 9))
    for i in range(pts1.shape[1]):
        A[i] = np.array([
            pts1_homo[i,0]*pts2_homo[i,0], pts1_homo[i,1]*pts2_homo[i,0], pts1_homo[i,2]*pts2_homo[i,0],
            pts1_homo[i,0]*pts2_homo[i,1], pts1_homo[i,1]*pts2_homo[i,1], pts1_homo[i,2]*pts2_homo[i,1],
            pts1_homo[i,0]*pts2_homo[i,2], pts1_homo[i,1]*pts2_homo[i,2], pts1_homo[i,2]*pts2_homo[i,2]
            ])
    
    # Solve Af=0 using svd
    U,S,Vt = np.linalg.svd(A)
    F = Vt[-1,:].reshape((3,3))

    # Enforce rank2 constraint
    U,S,Vt = np.linalg.svd(F)
    S[-1] = 0
    F = U @ np.diag(S) @ Vt

    F = T_prime.T @ F @ T
    return F

In [63]:
def essential_from_fundamental(K1, F, K2):
    return K1.T @ F @ K2

In [64]:
def pose_from_essential(R1, T1, E):
    U,_,Vt = np.linalg.svd(E)
    W = np.array([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
    
    # Array with all possible camera poses (extrinsics)
    R = np.array([U @ W @ Vt, U @ W.T @ Vt])
    T = np.array([U[:, 2], -U[:, 2]])

    for i in range(R.shape[0]):
        R[i] = R1 @ R[i]
        if np.linalg.det(R[i]) < 0:
            R[i] = R[i] * -1
        T[i] = R1 @ T[i] + T1

    return R, T

In [65]:
def linear_triangulation(K1, RT1, K2, RT2, pts1, pts2):
    # First, set all points to homogeneous
    pts1_homo = np.vstack((pts1, np.ones(pts1.shape[1])))
    pts2_homo = np.vstack((pts2, np.ones(pts2.shape[1])))

    # Calculate every projection matrix
    P1 = K1 @ RT1
    P2 = K2 @ RT2

    # Solve using svd
    pts3d = np.zeros((3, pts1.shape[1]))
    for i in range(pts1.shape[1]):
        A = np.array([pts1_homo[1,i]*P1[2,:] - P1[1,:],
            P1[0,:] - pts1_homo[0,i]*P1[2,:],
            pts2_homo[1,i]*P2[2,:] - P2[1,:],
            P2[0,:] - pts2_homo[0,i]*P2[2,:]])
        ATA = A.T @ A
        _, _, Vt = np.linalg.svd(ATA)
        pts3d[:, i] = Vt[-1, :3]/Vt[-1, -1]
    
    return pts3d

def linear_triangulation2(P1, P2, pts1, pts2):
    # First, set all points to homogeneous
    pts1_homo = np.vstack((pts1, np.ones(pts1.shape[1])))
    pts2_homo = np.vstack((pts2, np.ones(pts2.shape[1])))

    # Solve using svd
    pts3d = np.zeros((3, pts1.shape[1]))
    for i in range(pts1.shape[1]):
        A = np.array([pts1_homo[1,i]*P1[2,:] - P1[1,:],
                      P1[0,:] - pts1_homo[0,i]*P1[2,:],
                      pts2_homo[1,i]*P2[2,:] - P2[1,:],
                      P2[0,:] - pts2_homo[0,i]*P2[2,:]])
        ATA = A.T @ A
        _, _, Vt = np.linalg.svd(ATA)
        pts3d[:, i] = Vt[-1, :3]/Vt[-1, -1]
    
    return pts3d

def linear_triangulation3(K1, R1, T1, K2, R2, T2, pts1, pts2):
    # First, set all points to homogeneous
    pts1_homo = np.vstack((pts1, np.ones(pts1.shape[1])))
    pts2_homo = np.vstack((pts2, np.ones(pts2.shape[1])))

    # Calculate every projection matrix
    P1 = K1 @ np.hstack((R1, T1[:, np.newaxis]))
    P2 = K2 @ np.hstack((R2, T2[:, np.newaxis]))

    # Solve using svd
    pts3d = np.zeros((3, pts1.shape[1]))
    for i in range(pts1.shape[1]):
        A = np.array([pts1_homo[1,i]*P1[2,:] - P1[1,:],
            P1[0,:] - pts1_homo[0,i]*P1[2,:],
            pts2_homo[1,i]*P2[2,:] - P2[1,:],
            P2[0,:] - pts2_homo[0,i]*P2[2,:]])
        ATA = A.T @ A
        _, _, Vt = np.linalg.svd(ATA)
        pts3d[:, i] = Vt[-1, :3]/Vt[-1, -1]
    
    return pts3d

In [66]:
def reprojection(P1, P2, pts3d):
    pts3d_homo = np.vstack((pts3d, np.ones(pts3d.shape[1])))
    pts2d1_homo = np.dot(P1, pts3d_homo)
    pts2d2_homo = np.dot(P2, pts3d_homo)
    return pts2d1_homo/pts2d1_homo[-1], pts2d2_homo/pts2d2_homo[-1]

def double_disambiguation(K1, RT1, K2, RT2s, pts1, pts2, pts3d):
    max_positive_z = 0
    min_error = np.finfo('float').max
    best_RT = None
    best_pts3d = None
    P1 = K1 @ RT1

    pts1_homo = np.vstack((pts1, np.ones(pts1.shape[1])))
    pts2_homo = np.vstack((pts2, np.ones(pts2.shape[1])))

    for i in range(RT2s.shape[0]):
        P2 = K2 @ RT2s[i]
        num_positive_z = np.sum(pts3d[i][2, :] > 0)
        re1_pts2, re2_pts2 = reprojection(P1, P2, pts3d[i])

        err1 = np.sum(np.square(re1_pts2 - pts1_homo))
        err2 = np.sum(np.square(re2_pts2 - pts2_homo))

        err = err1 + err2

        if num_positive_z >= max_positive_z and err < min_error:
            max_positive_z = num_positive_z
            min_error = err
            best_RT = RT2s[i]
            best_pts3d = pts3d[i]
    
    return best_RT, best_pts3d

def double_disambiguation2(K1, R1, T1, K2, R2s, T2s, pts1, pts2, pts3d):
    max_positive_z = 0
    min_error = np.finfo('float').max
    best_R = None
    best_T = None
    best_pts3d = None
    P1 = K1 @ np.hstack((R1, T1[:, np.newaxis]))

    pts1_homo = np.vstack((pts1, np.ones(pts1.shape[1])))
    pts2_homo = np.vstack((pts2, np.ones(pts2.shape[1])))

    for i in range(R2s.shape[0]):
        P2 = K2 @ np.hstack((R2s[i], T2s[i, :, np.newaxis]))

        num_positive_z = np.sum(pts3d[i][2, :] > 0)
        re1_pts2, re2_pts2 = reprojection(P1, P2, pts3d[i])

        err1 = np.sum(np.square(re1_pts2 - pts1_homo))
        err2 = np.sum(np.square(re2_pts2 - pts2_homo))

        err = err1 + err2

        if num_positive_z >= max_positive_z and err < min_error:
            max_positive_z = num_positive_z
            min_error = err
            best_R = R2s[i]
            best_T = T2s[i]
            best_pts3d = pts3d[i]
    
    return best_R, best_T, best_pts3d

## Part 1: Two Images SfM

We're following the "guide" provided by this [GitHub's README by rohana96](https://github.com/rohana96/SfM), following an incremental SfM. However, we tried to make the different steps by ourselves.

### Previous feature extraction and matching

In [67]:
# Load all images
images = np.array(load_images_from_folder('dinos'))

In [68]:
# Intrinsic values of camera (assume all images were taken with the same camera)
# K = np.loadtxt('intrinsic_matrix.txt', dtype=float)

height, width = images.shape[1:3]
K = np.array([  # for dino
    [2360, 0, width / 2],
    [0, 2360, height / 2],
    [0, 0, 1]])

In [69]:
# Initialize Projection matrix list
P_list = [K @ np.hstack((np.eye(3), np.zeros((3, 1))))]
R_list = [np.eye(3)]
T_list = [np.zeros(3)]

In [70]:
RT = np.hstack((np.eye(3), np.zeros((3, 1))))
RT_homo = np.vstack((RT, np.array([0, 0, 0, 1])))

In [71]:
# Initialize 3D point cloud
pts3d_cloud = None

In [72]:
# Feature extraction and matching
kp, des = feature_extraction_set(images[:5])

matches = feature_matching_set(kp, des)

In [73]:
# Main for loop
for i in range(1, 5):

    # Define points for this iteration
    pts1 = np.transpose([kp[i-1][m.queryIdx].pt for m in matches[(i-1,i)]])
    pts2 = np.transpose([kp[i][m.trainIdx].pt for m in matches[(i-1,i)]])

    # Get Fundamental matrix from i - 1
    F = eight_point_algorithm(pts1, pts2)

    # Get Essential matrix from F
    E = essential_from_fundamental(K, F, K)

    # Get camera extrinsics from Essential
    R_set, T_set = pose_from_essential(R_list[i-1], T_list[i-1], E)

    # Possible triangulations
    pts3d = np.array([linear_triangulation3(K, R_list[i-1], T_list[i-1], K, R, T, pts1, pts2) for R in R_set for T in T_set])

    # Disambiguation
    R, T, pts3d = double_disambiguation2(K, R_list[i-1], T_list[i-1], K, R_set, T_set, pts1, pts2, pts3d)

    # Add R, T and pts3d to their respective lists
    P_list.append(K @ np.hstack((R, T[:, np.newaxis])))
    R_list.append(R)
    T_list.append(T)
    if isinstance(pts3d_cloud, np.ndarray):
        pts3d_cloud = np.hstack((pts3d_cloud, pts3d))
    else:
        pts3d_cloud = np.array(pts3d)

In [74]:
# for i in range(1, 5):
#     pts1 = np.transpose([kp[i-1][m.queryIdx].pt for m in matches[(i-1,i)]])
#     pts2 = np.transpose([kp[i][m.trainIdx].pt for m in matches[(i-1,i)]])
#     pts3d = linear_triangulation2(P_list[i-1], P_list[i], pts1, pts2)

#     if isinstance(pts3d_cloud, np.ndarray):
#         pts3d_cloud = np.hstack((pts3d_cloud, pts3d))
#     else:
#         pts3d_cloud = np.array(pts3d)

In [75]:
# Visualize Cameras
import numpy as np
import plotly.graph_objects as go

def plot_transforms(transforms):
    # Create a 3D figure
    fig = go.Figure()

    # Create the unit vectors
    i = np.array([1, 0, 0])
    j = np.array([0, 1, 0])
    k = np.array([0, 0, 1])

    for idx, transform in enumerate(transforms):
        R, T = transform

        # Apply the rotation matrix to the unit vectors
        i_prime = R @ i
        j_prime = R @ j
        k_prime = R @ k

        # Plot the new coordinate system
        fig.add_trace(go.Cone(x=[T[0]], y=[T[1]], z=[T[2]], u=[i_prime[0]], v=[i_prime[1]], w=[i_prime[2]], sizemode="scaled", sizeref=0.2, name='i'))
        fig.add_trace(go.Cone(x=[T[0]], y=[T[1]], z=[T[2]], u=[j_prime[0]], v=[j_prime[1]], w=[j_prime[2]], sizemode="scaled", sizeref=0.2, name='j'))
        fig.add_trace(go.Cone(x=[T[0]], y=[T[1]], z=[T[2]], u=[k_prime[0]], v=[k_prime[1]], w=[k_prime[2]], sizemode="scaled", sizeref=0.2, name='k'))

    fig.update_layout(scene=dict(aspectmode='data'))

    fig.show()

plot_transforms(zip(R_list, T_list))

In [76]:
# Visualize 3D points
import plotly.graph_objects as go

# Assuming points_3D is your array of 3D points
x = pts3d_cloud[0]
y = pts3d_cloud[1]
z = pts3d_cloud[2]

fig = go.Figure(data=[go.Scatter3d(x=x, y=y, z=z,
                                   mode='markers',
                                   marker=dict(size=2, color=z, colorscale='Viridis'))])

fig.update_layout(scene=dict(xaxis_title='X',
                             yaxis_title='Y',
                             zaxis_title='Z'))

fig.show()