# Image Transformation Hints
Image processing:
```
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - Gray Image
frame = cv2.GaussianBlur(frame, (3, 3), cv2.BORDER_DEFAULT) - Gaussian Blur
frame = cv2.medianBlur(frame, 5) - Median Blur
frame = cv2.blur(frame, (5, 5)) - Box Blur
frame = cv2.filter2D(src=frame, ddepth=-1, kernel=kernel) - Convert By Convolutional
```

In [40]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt
import math
import mpl_toolkits.mplot3d.axes3d as p3
import plotly.graph_objects as go

%matplotlib inline

In [48]:
videoFile = "videos/mouse_short.mp4"
NEXT_FRAMES_COUNT = 0
FRAMES_STEP = 30
K = np.array([[1006.34, 0., 0.],
                 [0., 1006.34, 0.],
                 [0., 0., 1.]])
CENTER = np.array([[1., 1., 1.]]).T

In [28]:
#Display video

cam = cv2.VideoCapture(videoFile)

while(cam.isOpened()):
    ret, frame = cam.read()
    if ret:
        cv2.imshow("frame", frame)
    else:
        cam = cv2.VideoCapture(videoFile)
    if cv2.waitKey(1) & 0xFF == ord('q') or cv2.waitKey(1) & 0xFF == 27:
        break

cam.release()
cv2.destroyAllWindows()

In [13]:
#Detect matches with ORB

cam = cv2.VideoCapture(videoFile)
frames = []

LS = []
RS = []

while(cam.isOpened()):
    ret, frame = cam.read()
    if ret:
        frames.append(frame)
    else:
        break

cam.release()

orb = cv2.ORB_create()
done = False

for i in range(0, len(frames) - FRAMES_STEP, FRAMES_STEP):
    if done:
        break
    for j in range(i + FRAMES_STEP, 
                   min(i + FRAMES_STEP + NEXT_FRAMES_COUNT + 1, len(frames))):
        gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
        grayPrev = cv2.cvtColor(frames[j], cv2.COLOR_BGR2GRAY)

        kp1, des1 = orb.detectAndCompute(gray, None)
        kp2, des2 = orb.detectAndCompute(grayPrev, None)
        
        bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
        matches = bf.match(des1, des2)
        matches = sorted(matches, key = lambda x:x.distance)
        
        L = []
        R = []
        good = []
        for k in range(0, 20):
            match = matches[k]
            p1 = kp1[match.queryIdx].pt
            p2 = kp2[match.trainIdx].pt
            L.append(p1)
            R.append(p2)
            good.append(match)
        
        LS.append(L)
        RS.append(R)
        
        res = cv2.drawMatches(frames[i], kp1, frames[j], kp2, good, None, flags=0)
        
        cv2.putText(res, "Frame #" + str(i+1) + " -> Frame #" + str(j+1) , (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 100)
        
        cv2.imshow('frame', res)
        
        if cv2.waitKey(1) & 0xFF == ord('q') or cv2.waitKey(1) & 0xFF == 27:
            done = True
            break
            
cv2.waitKey(3000)
cv2.destroyAllWindows()

In [14]:
#Detect matches with SIFT

cam = cv2.VideoCapture(videoFile)
frames = []

LS = []
RS = []

while(cam.isOpened()):
    ret, frame = cam.read()
    if ret:
        frames.append(frame)
    else:
        break

cam.release()

sift = cv2.SIFT_create()
bf = cv2.BFMatcher()
done = False

for i in range(0, len(frames) - FRAMES_STEP, FRAMES_STEP):
    if done:
        break
    for j in range(i + FRAMES_STEP, 
                   min(i + FRAMES_STEP + NEXT_FRAMES_COUNT + 1, len(frames))):
        gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
        grayPrev = cv2.cvtColor(frames[j], cv2.COLOR_BGR2GRAY)
        
        kp1, des1 = sift.detectAndCompute(gray, None)
        kp2, des2 = sift.detectAndCompute(grayPrev, None)
        
        matches = bf.knnMatch(des1, des2, k=2)
        
        L = []
        R = []
        good = []
        for m, n in matches:
            if m.distance < 0.75*n.distance:
                p1 = kp1[m.queryIdx].pt
                p2 = kp2[m.trainIdx].pt
                L.append(p1)
                R.append(p2)
                good.append([m])
        
        LS.append(L)
        RS.append(R)
        
        res = cv2.drawMatchesKnn(frames[i], kp1, frames[j], kp2, 
                                 good, None, flags=0)
        cv2.putText(res, "Frame #" + str(i+1) + " -> Frame #" + str(j+1) , (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 100)
        
        cv2.imshow('frame', res)
        
        if cv2.waitKey(1) & 0xFF == ord('q') or cv2.waitKey(1) & 0xFF == 27:
            done = True
            break

cv2.waitKey(3000)
cv2.destroyAllWindows()

In [89]:
#Detect matches with SIFT on FLANN

cam = cv2.VideoCapture(videoFile)
frames = []

LS = []
RS = []

while(cam.isOpened()):
    ret, frame = cam.read()
    if ret:
        frames.append(frame)
    else:
        break

cam.release()

sift = cv2.SIFT_create()
done = False

index_params = dict(algorithm=0, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)

for i in range(0, len(frames) - FRAMES_STEP, FRAMES_STEP):
    if done:
        break
    for j in range(i + FRAMES_STEP, 
                   min(i + FRAMES_STEP + NEXT_FRAMES_COUNT + 1, len(frames))):
        gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
        grayPrev = cv2.cvtColor(frames[j], cv2.COLOR_BGR2GRAY)
        
        kp1, des1 = sift.detectAndCompute(gray, None)
        kp2, des2 = sift.detectAndCompute(grayPrev, None)
        
        matches = flann.knnMatch(des1, des2, k=2)
        
        L = []
        R = []
        good = []
        for m, n in matches:
            if m.distance < 0.5*n.distance:
                p1 = kp1[m.queryIdx].pt
                p2 = kp2[m.trainIdx].pt
                L.append(p1)
                R.append(p2)
                good.append([m])
        
        LS.append(L)
        RS.append(R)
        
        res = cv2.drawMatchesKnn(frames[i], kp1, frames[j], kp2, 
                                 good, None, flags=0)
        cv2.putText(res, "Frame #" + str(i+1) + " -> Frame #" + str(j+1) , (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 100)
        
        cv2.imshow('frame', res)
        
        if cv2.waitKey(1) & 0xFF == ord('q') or cv2.waitKey(1) & 0xFF == 27:
            done = True
            break
cv2.waitKey(3000)
cv2.destroyAllWindows()

Let $A$ be the matrix, containing more or equal than $9$ keypoints coordinates between two images

Let $f$ be the vector, containing "Fundamental Matrix" values to convert any point from the first image to the relevant second

Let's minimize the value of $||A f||^2 = (A f)^T (A f) = f^T A^T A f$ with the constraint $f^T f = 1$

Define the loss function $L(f, \lambda) = f^T A^T A f - \lambda(f^T f - 1)$

${dL(f, \lambda) \over df} = 2 A^T A f - 2 \lambda f \Rightarrow A^T A f = \lambda f$

We must find the minimum eigenvalue $\lambda$

In [90]:
#Computing 3d coordinates

LEFT_CENTER = CENTER

X = []
ONLY_KEY_POINTS = True

for i in range(0, len(LS)):
    count_points = len(LS[i])
    if count_points >= 9:
        A = np.full((count_points, 9), 0)
        for j in range(0, count_points):
            uL = LS[i][j][0]
            vL = LS[i][j][1]
            uR = RS[i][j][0]
            vR = RS[i][j][1]
            A[j] = [uL*uR, uR*vL, uR, uL*vR, vL*vR, vR, uL, vL, 1]
        print(A.shape)
        
        eigenvalues, eigenvectors = np.linalg.eig(np.dot(A.T, A))
        index = np.argmin([abs(v) for v in eigenvalues])
        f = eigenvectors[index]
        
        F = np.reshape(f, (3, 3)).T #Fundamental Matrix
        E = np.dot(np.dot(K.T, F), K) #Essential Matrix
        
        U, D, V_T = np.linalg.svd(E)
        D[2] = 0
        D = np.diag(D)
        Z = np.array([[0, 1, 0], [-1, 0, 0], [0, 0, 0]])
        
        T = np.dot(U, np.dot(Z, U.T))        
        R = np.dot(U, np.dot(Z, np.dot(D, V_T)))
        
        t = np.array([[T[2, 1], T[0, 2], T[1, 0], 1]]).T
        RT = np.concatenate((np.concatenate((R, np.array([[0, 0, 0]]))), t), axis=1)
        M = np.concatenate((K, np.array([[0, 0, 0]]).T), axis=1)
        P = np.dot(M, RT)
        
        A = np.full((4, 3), 0)
        b = np.array([[M[0, 3] - M[2, 3], M[1, 3] - M[2, 3], P[0, 3] - P[2, 3], P[1, 3] - P[2, 3]]]).T
        
        if ONLY_KEY_POINTS:
            for j in range(0, count_points):
                uL = LS[i][j][0]
                vL = LS[i][j][1]
                uR = RS[i][j][0]
                vR = RS[i][j][1]

                A[0] = [uR*M[2, 0] - M[0, 0], uR*M[2, 1] - M[0, 1], uR*M[2, 2] - M[0, 2]]
                A[1] = [vR*M[2, 0] - M[1, 0], vR*M[2, 1] - M[1, 1], vR*M[2, 2] - M[1, 2]]
                A[2] = [uL*P[2, 0] - P[0, 0], uL*P[2, 1] - P[0, 1], uL*P[2, 2] - P[0, 2]]
                A[3] = [vL*P[2, 0] - P[1, 0], vL*P[2, 1] - P[1, 1], vL*P[2, 2] - P[1, 2]]

                #Position of key point at right camera frame
                xR = np.dot(np.linalg.inv(np.dot(A.T, A)), np.dot(A.T, b))

                #Position of key point at left camera frame
                xL = np.dot(RT, np.concatenate((xR, np.array([[1]]))))[0:-1, :]

                x = LEFT_CENTER + xL
                X.append(x)
        else:
            for j in range(0, 100):
                uL = np.random.randint(0, K[1, 2]*2 + 1)
                vL = np.random.randint(0, K[0, 2]*2 + 1)
                
        LEFT_CENTER += t[0: -1, :]

(89, 9)


In [91]:
#Visualize 3d dense cloud

xS = [X[i][0, 0] for i in range(0, len(X))]
yS = [X[i][1, 0] for i in range(0, len(X))]
zS = [X[i][2, 0] for i in range(0, len(X))]

LIM = 1e4

fig = go.Figure(
    data=[go.Scatter3d(x=xS, y=yS, z=zS, marker={"color": "blue"}, mode="markers")])
fig.update_layout(
        scene = dict(
            xaxis=dict(range=[-LIM, LIM], autorange=False),
            yaxis=dict(range=[-LIM, LIM], autorange=False),
            zaxis=dict(range=[-LIM, LIM], autorange=False),
            aspectratio=dict(x=1, y=1, z=1),
        ),
        showlegend=False,
        width=800,
        height=800)

fig.show()