# Homework \#10

Implementation of VO

## Google Colab


In [None]:
# from google.colab import drive

# drive.mount('/content/drive')
# %cd '/content/drive/MyDrive/Colab Notebooks'

# import numpy as np
# import cv2
# import matplotlib.pyplot as plt
# import math
# import scipy.signal
# import scipy.linalg
# import importlib
# import mae6292.tools as mae6292

# from google.colab.patches import cv2_imshow
# from google.colab import files as FILE
# import os

## Jupyter Notebook

In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import math
import scipy.signal
import scipy.linalg
import mae6292.tools as mae6292
import importlib

from mae6292.imshow import cv2_imshow

# VO - KITTI

The following codes perform VO for KITTI dataset, for the first 10 frames.

## Boostrapping

Bootstrapping to initilize VO has been implemented by
```
keypoints0, p_W0, R1, T1 = mae6292.VO_bootstrap(img0, img1, K, display = False)   
```

In [2]:
#open a new window for plot
%matplotlib tk 

# load K and first two images
K = np.loadtxt('KITTI/K.txt')
img0 = cv2.imread('KITTI/000000.png', cv2.IMREAD_GRAYSCALE)
img1 = cv2.imread('KITTI/000001.png', cv2.IMREAD_GRAYSCALE)

# PARAMETERS
param_bootstrap = {
    # keypoints
    'W_harris_patch' : 4, # size of harris patch
    'kappa_harris' : 0.08, # kappa for harris score
    'N_keypoint' : 2000, # number of keypoints to be detected
    'W_nms' : 8, # patch size for non-maximum supression
    # KLT
    'W_KLT' : 4, # patch size for KLT
    'tol_KLT_bidir' : 1, # tolerence of bidirectional error
    # find essential matrix
    'tol_E' : 1, # tolerence for epipolar line distance
    'tol_E_RANSAC_prob' : 0.99, # eseential matrix RANSAC probability
    # triangulation
    'tol_TRI_mu' : 1e-3, # tolerence for the singular value ratio
    'tol_TRI_rep' : 1 # tolerence for the reprojection error
}

keypoints0, p_W0, R1, T1 = mae6292.VO_bootstrap(img0, img1, K, param_bootstrap, display = True)   

print('N_keypoints0=',len(keypoints0))


N_keypoints0= 851


## Localization and Mapping


For the first 10 frames, the sequential process for localization and mapping has been implemented by
```
    R, T, S, C, fig = mae6292.VO_localization_mapping(i_frame, K, img, img_pre, S_pre, C_pre, display_process=True)
```
Each frame is saved under the folder `KITTI_output`

In [3]:
#open a new window for plot
%matplotlib tk 
importlib.reload(mae6292)

param = {
    # keypoints
    'W_harris_patch' : 4, # size of harris patch
    'kappa_harris' : 0.08, # kappa for harris score
    'N_keypoint' : 2000, # number of keypoints to be detected
    'W_nms' : 8, # patch size for non-maximum supression
    # KLT
    'W_KLT' : 4, # patch size for KLT
    'tol_KLT_bidir' : 1, # tolerence of bidirectional error
    # triangulation
    'tol_TRI_mu' : 1e-3, # tolerence for the singular value ratio
    'tol_TRI_rep' : 0.5, # tolerence for the reprojection error
    # mapping
    'tol_keypoints_new' : 18 # new keypoints should be district from the tracked keypoints by this distance
} # up to 1000 

# iniitlize iteration using img0
img_pre = img0
S_pre = mae6292.state(keypoints0, p_W0, [np.zeros((3,1))])
C_pre = mae6292.candidate([],[],[],[])

# number of frames to process
N_frames = 10
display_process = True

# variables to save the vehicle location and the keypoints in the W-frame 
T_W = np.zeros((3,N_frames+1))
p_W = p_W0

for i_frame in range(1,N_frames+1):
    
    print('i_frame=',i_frame)

    # VO localization and mapping
    img = cv2.imread("KITTI/{:06d}.png".format(i_frame),cv2.IMREAD_GRAYSCALE)
    img_rgb = cv2.cvtColor(cv2.imread("KITTI/{:06d}.png".format(i_frame)), cv2.COLOR_BGR2RGB)
    R, T, S, C, fig = mae6292.VO_localization_mapping(i_frame, img, img_rgb, img_pre, S_pre, C_pre, K, param, display_process)
    img_pre, S_pre, C_pre = img, S, C

    # save figure 
    if display_process:
        fig.savefig("KITTI_output/{:06d}.png".format(i_frame))
        # !!!NOTE!!!:
        # when N_frames is large, uncomment the following to avoid generating many figures in your screeen
        # plt.close(fig)

    # save the vehicle location and the distinct keypoints 
    T_W[:,i_frame] = (-R.T@T).flatten()
    p_W_dist = scipy.spatial.distance.cdist( S.p_W.T, p_W.T , 'euclidean')
    index_distinct = np.where( np.min(p_W_dist, axis=1) > 3 )[0]
    p_W = np.append(p_W, S.p_W[:,index_distinct], axis=1)

    # print pose
    print('R=',R)
    print('T_W=',(-R.T@T).flatten())
    print(' ')




i_frame= 1
R= [[ 0.99999267  0.00327993  0.00197406]
 [-0.00328373  0.99999276  0.00192175]
 [-0.00196774 -0.00192822  0.9999962 ]]
T_W= [-0.01956634 -0.00425165  1.00988552]
 
i_frame= 2
S_pre= 851 , KLT_matched= 671 , PnP_inliers= 669 , S_new =  918
C_pre= 636 , KLT_matched= 441 , TRI_inliers= 249 , C_new =  370
R= [[ 0.99998542  0.00211629  0.00496859]
 [-0.00213457  0.99999096  0.00367715]
 [-0.00496077 -0.00368771  0.9999809 ]]
T_W= [-5.26776646e-02  2.92192222e-04  2.06377269e+00]
 
i_frame= 3
S_pre= 918 , KLT_matched= 809 , PnP_inliers= 804 , S_new =  847
C_pre= 370 , KLT_matched= 186 , TRI_inliers= 43 , C_new =  389
R= [[ 0.99996375  0.00218644  0.00822943]
 [-0.00222981  0.99998366  0.00526434]
 [-0.00821778 -0.0052825   0.99995228]]
T_W= [-0.08446169  0.0032795   3.14759821]
 
i_frame= 4
S_pre= 847 , KLT_matched= 741 , PnP_inliers= 700 , S_new =  726
C_pre= 389 , KLT_matched= 176 , TRI_inliers= 26 , C_new =  422
R= [[ 9.99929522e-01  6.65741991e-04  1.18536400e-02]
 [-7.39979

## Generate Trajectory Map

The following codes visualize the vehicle trajectory and the keypoints in the W-frame. 

(You may need to adjust th eaxis limit)

In [4]:
fig = plt.figure()
ax = plt.axes(projection = '3d')
ax.plot(T_W[0,:], T_W[1,:], T_W[2,:], 'b')
ax.scatter(p_W[0,:], p_W[1,:], p_W[2,:], s=1, c='r', marker='o')
ax.set_xlim(-10,20)
ax.set_zlim(-2,20)
ax.set_xlabel('x',fontsize=6)
ax.set_ylabel('y',fontsize=6)
ax.set_zlabel('z',fontsize=6)
ax.view_init(elev=0., azim=-90)

## Generate Video

The output images can be converted into a video as follows.

In [9]:
img_array = []

for i_frame in range(1,N_frames):
    filename = "KITTI_output/{:06d}.png".format(i_frame)
    img = cv2.imread(filename)
    img_array.append(img)

height, width, layers = img.shape
size = (width,height)

fps = 3
codec = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('KITTI.mp4',codec, fps, size)
 
for i in range(len(img_array)):
    out.write(img_array[i])

out.release()

## Problems 1 and 2

Follow the directions in `HW10_prob1.ipynb`and `HW10_prob2.ipynb`