In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import cv2
import os
import time
import numpy as np
import math

In [2]:
dataset_path = "/home/avishkar/Desktop/research/KITTI_VO_GRAY/dataset"
scene="00"
img_path = os.path.join(dataset_path, "sequences", scene, "image_0")
pose_path = os.path.join(dataset_path, "poses", scene+".txt")

In [3]:
def get_image(img_path ,img_size=(1241,376), channels=1):

	# Here we manage channels etc
	img = cv2.imread(img_path)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	img = cv2.resize(img, img_size, cv2.INTER_LINEAR)
	# img = np.reshape(img, (size[0], size[1channels))

	# print(img.shape)
	# cv2.imshow("img",img)
	# cv2.waitKey(0)

	return img


def load_images_temp(img_dir):
	img_files = os.listdir(img_dir)
	img_files.sort(key=lambda x: int(x.split('.')[0]))
	# print("Imgs count: ", len(img_files))
	for i in range(len(img_files) - 1):

		img_1 = get_image(os.path.join(img_dir, img_files[i]))
		img_2 = get_image(os.path.join(img_dir, img_files[i+1]))
  
		temp_set = np.concatenate([img_1, img_2], axis=-1)
		# print("Temp set ",temp_set.shape)

		yield temp_set


#Helper functions for pose preprocessing 
def isRotationMatrix(R):
    """ Checks if a matrix is a valid rotation matrix
        referred from https://www.learnopencv.com/rotation-matrix-to-euler-angles/
    """
    Rt = np.transpose(R)
    shouldBeIdentity = np.dot(Rt, R)
    I = np.identity(3, dtype = R.dtype)
    n = np.linalg.norm(I - shouldBeIdentity)
    return n < 1e-6

def rotationMatrixToEulerAngles(R):
    """ calculates rotation matrix to euler angles
        referred from https://www.learnopencv.com/rotation-matrix-to-euler-angles
    """
    assert(isRotationMatrix(R))
    sy = math.sqrt(R[0,0] * R[0,0] +  R[1,0] * R[1,0])
    singular = sy < 1e-6

    if  not singular :
        x = math.atan2(R[2,1] , R[2,2])
        y = math.atan2(-R[2,0], sy)
        z = math.atan2(R[1,0], R[0,0])
    else :
        x = math.atan2(-R[1,2], R[1,1])
        y = math.atan2(-R[2,0], sy)
        z = 0

    return np.array([x, y, z])

def getMatrices(all_poses):
    all_matrices = []
    for i in range(len(all_poses)):
        #print("I: ",i)
        j = all_poses[i]
        #print("J:   ",j)
        p = np.array([j[3], j[7], j[11]])
        #print("P:   ", p)
        R = np.array([[j[0],j[1],j[2]],
                [j[4],j[5],j[6]],
                [j[8],j[9],j[10]]])
        #print("R:   ", R)
        angles = rotationMatrixToEulerAngles(R)
        #print("Angles: ",angles)
        matrix = np.concatenate((p,angles))
        # print("MATRIX: ", matrix)
        all_matrices.append(matrix)
    return all_matrices

def load_poses(pose_file):
	print ("pose_path",pose_file)
	poses = []
	poses_set = []
	with open(pose_file, 'r') as f:
		lines = f.readlines()
		for line in lines:
			# print(line)
			pose = np.fromstring(line, dtype=float, sep=' ')
			poses.append(pose)
	# print(poses[0])
	poses = getMatrices(poses)
	# print(poses[0])

	for i in range(len(poses)-1):
		pose1 = poses[i]
		pose2 = poses[i+1]
		finalpose = pose2-pose1
		poses_set.append(finalpose)
	print("poses count: ",len(poses_set))

	return poses_set          


def load_data_batch(img_path):

	all_poses = load_poses(pose_path)

	img_batch=[]
	pose_batch=[]

	img_generator = load_images_temp(img_path)

	for i, img in enumerate(img_generator):
		img_batch.append(img)
		pose_batch.append(all_poses[i])

		if (i+1)%10==0:
			img_batch = np.reshape(img_batch, (-1, 2, 376,1241 ))
			# print("\nimg batch shape ", img_batch.shape)
			img_batch = torch.FloatTensor(np.array(img_batch))
			pose_batch = torch.FloatTensor(np.array(pose_batch))
			# print("Img shape:", img.shape)
			# print(f"Img batch len: {len(img_batch)}")
			# print("Pose Shape:", all_poses[i].shape)
			# print("Pose batch len: " ,len(pose_batch))

			yield img_batch , pose_batch
			img_batch=[]
			pose_batch=[]


In [4]:
class MyNet(nn.Module):
    def __init__(self, in_channels):
        super(MyNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=(3,3), stride = (2,2), padding=(1,1) )
        self.conv2 = nn.Conv2d(64, 128, kernel_size=(3,3), stride = (2,2), padding=(1,1) )
        self.conv3 = nn.Conv2d(128, 256, kernel_size=(3,3), stride = (2,2), padding=(1,1) )
        self.conv4 = nn.Conv2d(256, 32, kernel_size=(3,3), stride = (2,2), padding=(1,1))
        self.fc1 = nn.Linear(32*24*78, 6)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        # print(x.shape)
        x = F.relu(self.conv2(x))
        # print(x.shape)
        x = F.relu(self.conv3(x))
        # print(x.shape)
        x = F.relu(self.conv4(x))
        # print(x.shape)

        x = x.view(x.size(0), -1) # Flatten the layer
        x = self.fc1(x)
        # print(x.shape)

        return x

In [5]:
model = MyNet(2)
print(model)

test_input = torch.randn(1, 376, 1241)
print(test_input.shape)
print()

MyNet(
  (conv1): Conv2d(2, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv3): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv4): Conv2d(256, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (fc1): Linear(in_features=59904, out_features=6, bias=True)
)
torch.Size([1, 376, 1241])



In [7]:
num_epochs = 1
learning_rate=0.0001
device = "cuda" if torch.cuda.is_available() else "cpu"
criterion = torch.nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.5, weight_decay=0.5)

# Training Loop

for epoch in range(num_epochs):
    start = time.time()
    print("Epoch: ", epoch+1)
    batch_loader = load_data_batch(img_path)
    for i, (img_batch, pose_batch) in enumerate(batch_loader):
    
        # print("Img batch shape ",img_batch.shape)
        # print("pose batch shape ", pose_batch.shape)
        
        # print("pose batch shape ", pose_batch.shape)

        img_batch = img_batch.to(device)
        pose_batch = pose_batch.to(device)
        

        output = model(img_batch).to(device)
        # print("Output shape ", output.shape)
        loss = criterion(output, pose_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print("Loss: ",loss)

        if i==100:
            break
        # break

Epoch:  1
pose_path /home/avishkar/Desktop/research/KITTI_VO_GRAY/dataset/poses/00.txt
poses count:  4540
Loss:  tensor(0.1595, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.1843, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.1578, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.1682, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.1716, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.1664, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.1396, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.0944, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.0623, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.0416, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.0395, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.0396, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.0448, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.0670, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.1136, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.1304, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.1366, grad_fn=<MseLossBackward0>)
Loss:  tensor(0.1116, grad_fn=<MseLossBackwa