# Visual odometry pipeline

### Import requirements

In [1]:
import cv2
import glob
import matplotlib.pyplot as plt
import numpy as np

from ImageProcessor import ImageProcessor
from StereoMatcher import StereoMatcher
from VoxelGrid import VoxelGrid
from helperScripts.TimeKeeper import TimeKeeper

%matplotlib ipympl

### Load images and depth map

In [2]:
folderChoice = 2
path = "".join(["testImages/visualOdometryTestImages/", str(folderChoice)])

# Load set of top images
imageGlobT = sorted(glob.glob("".join([path, "/top_*", ".png"])))

# Load set of bottom images
imageGlobB = sorted(glob.glob("".join([path, "/bottom_*", ".png"])))

# Load depth map
imageGlobD = sorted(glob.glob("".join([path, "/topDepth_*", ".png"])))

if not (len(imageGlobT)==len(imageGlobB) and \
                len(imageGlobB)==len(imageGlobD)):
    print("Images could not be matched")

print ("Selections: 0-{}".format(len(imageGlobT)-2))

Selections: 0-13


In [3]:
# Top images
imagesT = []
for imageFile in imageGlobT:
    imagesT.append(cv2.imread(imageFile))

# Bottom images
imagesB = []
for imageFile in imageGlobB:
    imagesB.append(cv2.imread(imageFile))

# Depth maps; -1 flag to load them as is
depthMaps = []
for imageFile in imageGlobD:
    depthMaps.append(cv2.imread(imageFile, -1))

print(len(imagesT), len(imagesB), len(depthMaps))

15 15 15


### View top camera images

In [4]:
selection = 0

plt.figure(figsize=(7,7))
plt.suptitle("Top images")
plt.imshow(cv2.cvtColor(cv2.rotate(np.hstack([imagesT[selection], imagesT[selection+1]]), cv2.ROTATE_90_CLOCKWISE), cv2.COLOR_BGR2RGB))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.image.AxesImage at 0x1c489914940>

### View bottom camera images

In [5]:
plt.figure(figsize=(7,7))
plt.suptitle("Bottom images")
plt.imshow(cv2.cvtColor(cv2.rotate(np.hstack([imagesB[selection], imagesB[selection+1]]), cv2.ROTATE_90_CLOCKWISE), cv2.COLOR_BGR2RGB))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.image.AxesImage at 0x1c489c08b00>

### View depth images
Depth images correspond to top camera

In [6]:
plt.figure(figsize=(7,7))
plt.suptitle("Depth maps")
plt.imshow(cv2.rotate(np.hstack([depthMaps[selection], depthMaps[selection+1]]), cv2.ROTATE_90_CLOCKWISE))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.image.AxesImage at 0x1c48b4f40f0>

### Depth map units and dimensions

In [7]:
print(depthMaps[0].shape)
print(depthMaps[0][int(depthMaps[0].shape[0]/2), \
                    int(depthMaps[0].shape[1]/2)], "mm")

(640, 360)
541 mm


### Instantiating TimeKeeper

In [8]:
timeKeeper = TimeKeeper()

### Loading camera calibration matrix

In [9]:
imageProcessor = ImageProcessor()
imageProcessor.loadMonoCalibration()

stereoMatcher = StereoMatcher(imageProcessor=imageProcessor)

voxelGrid = VoxelGrid(stereoMatcher=stereoMatcher, imageProcessor=imageProcessor)

baseRotation = voxelGrid.redefineRotationMatrix
k=imageProcessor.cameraMatrixL

print(baseRotation)
print(k)

Reading from data/monoCalibration.json
Loaded mono calibration
Reading from data/parametersSGBM.json
[[ 0  0 -1]
 [ 0  1  0]
 [ 1  0  0]]
[[592.63974229   0.         188.47568825]
 [  0.         592.98181459 312.47351841]
 [  0.           0.           1.        ]]


### Feature Extraction


In [10]:
def extractFeatures(orb, image):
    """Find keypoints and descriptors for the image"""
    keypoints = orb.detect(image, None)
    keypoints, descriptors = orb.compute(image, keypoints)
    
    return keypoints, descriptors

In [11]:
timeKeeper.startPerfCounter()

orb = cv2.ORB_create()

keypoints, descriptors = extractFeatures(orb, imagesT[selection])

print("Number of features detected in frame {}: {}"\
                                .format(selection, len(keypoints)))
print("Coordinates of first keypoint in frame {}: {}"\
                                .format(selection, str(keypoints[0].pt)))

timeKeeper.printPerfCounter()

Number of features detected in frame 0: 500
Coordinates of first keypoint in frame 0: (288.0, 130.0)
Completed in 0.108893 seconds


### Visualize features

In [12]:
def visualizeFeatures(image, keypoints, flag):
    """Visualize extracted features in image"""
    display = cv2.drawKeypoints(image, keypoints, None, flags=flag)
    plt.figure(figsize=(7, 7))
    plt.imshow(cv2.cvtColor(cv2.rotate(display, cv2.ROTATE_90_CLOCKWISE), cv2.COLOR_BGR2RGB))

In [13]:
visualizeFeatures(imagesT[selection], keypoints, 4)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [14]:
visualizeFeatures(imagesT[selection], keypoints, 2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [15]:
def extractAllFeatures(images, extractFeatures, orb):
    """Find keypoints and descriptors for each image in folder"""
    allKeypoints = []
    allDescriptors = []
    
    for image in images:
        keypoints, descriptors = extractFeatures(orb, image)
        allKeypoints.append(keypoints)
        allDescriptors.append(descriptors)
    
    return allKeypoints, allDescriptors

In [16]:
timeKeeper.startPerfCounter()

allKeypoints, allDescriptors = extractAllFeatures(imagesT, extractFeatures, orb)

print(len(allKeypoints), len(allDescriptors))

print("Number of features detected in frame {}: {}"\
                            .format(selection, len(allKeypoints[selection])))
print("Coordinates of the first keypoint in frame {}: {}\n"\
                            .format(selection, str(allKeypoints[selection][0].pt)))

timeKeeper.printPerfCounter()
print("Average time for each extraction: {:.5f}".format(timeKeeper.getElapsedTime()/len(imagesT)))

15 15
Number of features detected in frame 0: 500
Coordinates of the first keypoint in frame 0: (288.0, 130.0)

Completed in 0.116350 seconds
Average time for each extraction: 0.00776


### Feature matching

In [17]:
def matchFeatures(bfMatcher, descriptors0, descriptors1, bestNMatches):
    """Match features from two images"""
    match = bfMatcher.match(descriptors0, descriptors1)
    match = sorted(match, key = lambda x:x.distance)

    return match[:bestNMatches]

In [18]:
timeKeeper.startPerfCounter()

bfMatcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

bestNMatches = 50

match = matchFeatures(bfMatcher, allDescriptors[selection], \
                    allDescriptors[selection+1], bestNMatches)

print("Number of features matched in frames {} and {}: {}"\
                        .format(selection, selection+1, len(match)))

timeKeeper.printPerfCounter()

Number of features matched in frames 0 and 1: 50
Completed in 0.002412 seconds


In [19]:
def visualizeMatches(image0, keypoints0, image1, keypoints1, match):
    imageMatches = cv2.drawMatches(image0, keypoints0, \
                            image1, keypoints1, match, None, flags=2)

    plt.figure(figsize=(7, 7))
    plt.imshow(cv2.cvtColor(imageMatches, cv2.COLOR_BGR2RGB))

In [20]:
visualizeMatches(imagesT[selection], allKeypoints[selection], \
                imagesT[selection+1], allKeypoints[selection+1], match)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [21]:
def matchAllFeatures(allDescriptors, matchFeatures, bfMatcher, bestNMatches):
    """Match features for each subsequent image pair in the dataset"""
    matches = []

    for i in range(len(allDescriptors)-1):
        descriptor1 = allDescriptors[i]
        descriptor2 = allDescriptors[i+1]

        match = matchFeatures(bfMatcher, descriptor1, descriptor2, bestNMatches)

        matches.append(match)
    
    return matches

In [22]:
timeKeeper.startPerfCounter()

matches = matchAllFeatures(allDescriptors, matchFeatures, bfMatcher, bestNMatches)

print("Number of features matched in frames {} and {}: {}"\
            .format(selection, selection+1, len(matches[selection])))

timeKeeper.printPerfCounter()
print("Average time for each frame pair match: {:.5f}".format(timeKeeper.getElapsedTime()/len(imagesT)))

Number of features matched in frames 0 and 1: 50
Completed in 0.016041 seconds
Average time for each frame pair match: 0.00107


### Trajectory Estimation

In [23]:
def estimateMotion(match, keypoints0, keypoints1, k, depthMap=None, usePnP=False):
    """Estimate camera motion from a pair of subsequent image frames"""
    rmat=np.eye(3)
    tvec=np.zeros((3,1))
    imagePoints0 = []
    imagePoints1 = []

    for m in match:
        train_idx = m.trainIdx
        query_idx = m.queryIdx

        p1x, p1y = keypoints0[query_idx].pt 
        imagePoints0.append([p1x,p1y])

        p2x,p2y = keypoints1[train_idx].pt 
        imagePoints1.append([p2x,p2y])
    
    E, mask = cv2.findEssentialMat(\
                    np.array(imagePoints0), np.array(imagePoints1), k, \
                    cv2.RANSAC, 0.999, 1.0) 

    retval, rmat, tvec, mask = cv2.recoverPose(E, np.array(imagePoints0), \
                    np.array(imagePoints1), k)

    return rmat, tvec, imagePoints0, imagePoints1

In [24]:
timeKeeper.startPerfCounter()

match = matches[selection]
rmat, tvec, imagePoints0, imagePoints1 = estimateMotion(\
                        match, allKeypoints[selection], \
                        allKeypoints[selection+1], k, \
                        depthMap=depthMaps[selection])

print("Estimated rotation:\n {0}".format(rmat))
print("Estimated translation:\n {0}".format(tvec))

timeKeeper.printPerfCounter()

Estimated rotation:
 [[ 0.99922616  0.03659853 -0.01440941]
 [-0.03542763  0.99659124  0.0745036 ]
 [ 0.01708701 -0.07393546  0.99711663]]
Estimated translation:
 [[ 0.27732344]
 [ 0.15106652]
 [-0.94882592]]
Completed in 0.002784 seconds


### Camera movement visualization

In [25]:
def visualizeCameraMovement(image0, imagePoints0, \
                image1, imagePoints1, showImageAfterMove=False):
    """Visualize camera movement across frames"""
    image0 = image0.copy()
    image1 = image1.copy()

    for i in range(0, len(imagePoints0)):
        # Coordinates of a point on t frame
        p1 = (int(imagePoints0[i][0]), int(imagePoints0[i][1]))
        # Coordinates of the same point on t+1 frame
        p2 = (int(imagePoints1[i][0]), int(imagePoints1[i][1]))

        cv2.circle(image0, p1, 5, (0, 255, 0), 1)
        cv2.arrowedLine(image0, p1, p2, (0, 255, 0), 1)
        cv2.circle(image0, p2, 5, (255, 0, 0), 1)

        if showImageAfterMove:
            cv2.circle(image1, p2, 5, (255, 0, 0), 1)
    
    if showImageAfterMove: 
        return image1
    else:
        return image0

In [26]:
imageMovementBefore = visualizeCameraMovement(imagesT[selection], \
                    imagePoints0, imagesT[selection+1], imagePoints1)

imageMovementAfter = visualizeCameraMovement(imagesT[selection], \
                    imagePoints0, imagesT[selection+1], imagePoints1, \
                    showImageAfterMove=True)

plt.figure(figsize=(7,7))
plt.imshow(cv2.cvtColor(np.hstack([imageMovementBefore, imageMovementAfter]), \
                                                        cv2.COLOR_BGR2RGB))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.image.AxesImage at 0x1c48bb99748>

### Camera Trajectory Estimation

In [27]:
def estimateTrajectory(estimateMotion, matches, allKeypoints, k, \
                                                baseRotation, depthMaps):
    """Estimate complete camera trajectory from subsequent image pairs"""
    trajectory = [np.array([0, 0, 0])]
    rotation = [baseRotation]
    #rotation = [np.eye(3)]

    #R = np.diag([1,1,1])
    R = baseRotation.copy().T
    T = np.zeros([3, 1])
    RT = np.hstack([R, T])
    RT = np.vstack([RT, np.zeros([1, 4])])
    RT[-1, -1] = 1

    for i in range(len(matches)):     
        match = matches[i]
        keypoints0 = allKeypoints[i]
        keypoints1 = allKeypoints[i+1]
        depth = depthMaps[i]

        rmat, tvec, imagePoints0, imagePoints1 = estimateMotion(\
                                match, keypoints0, keypoints1, k, depthMap=depth)

        rt_mtx = np.hstack([rmat, tvec])
        rt_mtx = np.vstack([rt_mtx, np.zeros([1, 4])])
        rt_mtx[-1, -1] = 1

        rt_mtx_inv = np.linalg.inv(rt_mtx)
        
        RT = np.dot(RT, rt_mtx_inv)

        newTrajectory = RT[:3, 3]
        newRotation = RT[:3, :3]

        trajectory.append(newTrajectory*100)
        rotation.append(newRotation)

    trajectory = np.array(trajectory)
    rotation = np.array(rotation)

    return trajectory, rotation

In [28]:
timeKeeper.startPerfCounter()

trajectory, rotation = estimateTrajectory(\
                        estimateMotion, matches, allKeypoints, k, \
                        baseRotation, depthMaps=depthMaps)

print("Camera location in point {} is: \n {}\n"\
                        .format(selection, trajectory[selection,:]))
print("Camera location in point {} is: \n {}\n"\
                        .format(selection+1, trajectory[selection+1,:]))
print("Length of trajectory: {}".format(trajectory.shape[0]))

timeKeeper.printPerfCounter()
print("Average time for trajectory segment: {:.5f}"\
                        .format(timeKeeper.getElapsedTime()/len(imagesT)))

Camera location in point 0 is: 
 [0. 0. 0.]

Camera location in point 1 is: 
 [ 93.88311705 -23.08530782  25.55443007]

Length of trajectory: 15
Completed in 0.040066 seconds
Average time for trajectory segment: 0.00267


In [29]:
print("Trajectory at each step:\n", trajectory)
print("\nRotation at each step:\n", rotation)

Trajectory at each step:
 [[   0.            0.            0.        ]
 [  93.88311705  -23.08530782   25.55443007]
 [ 148.75360795    6.04569682  -52.80763854]
 [ 137.39707564  -80.31847456 -101.9223927 ]
 [ 152.88985462   15.62461675  -78.36614025]
 [ 175.37053792  -67.91578141 -128.52210454]
 [ 102.66991199 -133.0658341  -106.84055427]
 [ 170.43084578  -73.7897505   -63.31123353]
 [ 141.01551565 -141.74902843 -130.5144693 ]
 [ 109.64400133 -225.4749662  -175.30057688]
 [  89.16537986 -301.5769965  -236.85629999]
 [  38.10191529 -373.87856344 -283.38592501]
 [ -50.78841314 -416.7396086  -267.21680879]
 [-122.99909403 -477.17270543 -300.88368232]
 [-144.0208982  -566.44703651 -340.7356278 ]]

Rotation at each step:
 [[[ 0.          0.         -1.        ]
  [ 0.          1.          0.        ]
  [ 1.          0.          0.        ]]

 [[-0.01440941  0.0745036   0.99711663]
  [ 0.03659853  0.99659124 -0.07393546]
  [-0.99922616  0.03542763 -0.01708701]]

 [[-0.0089741   0.15467274  0

In [30]:
def visualizeTrajectory(trajectory):
    """Show a 3D plot of the trajectory"""
    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")

    ax.plot(trajectory[:,0], \
            trajectory[:,1], \
            trajectory[:,2])

    ax.set_xlabel("$x$")
    ax.set_ylabel("$y$")
    ax.set_zlabel("$z$")

    ax.set_xlim(-1000,1000)
    ax.set_ylim(-1000,1000)
    ax.set_zlim(-1000,1000)

    plt.show()

In [31]:
visualizeTrajectory(trajectory)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …