In [126]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import torch.utils.data
import torch.optim as optim

from tensorboardX import SummaryWriter

import os
from utils import tools


# import FlowNetC
from networks import FlowNetC

from PIL import Image
import numpy as np
import pandas as pd

# import flowlib

from PIL import Image

import tqdm
import cv2
import csv
import time

In [75]:
cap = cv2.VideoCapture('data/caolin_foot_trial1-4.mp4')
name = 'caolin'

framerate = cap.get(5)
total_image = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

print(f'The oringinal framerate is {cap.get(5)} with frame resolution of: {cap.get(3), cap.get(4)}')
print(f'The total number of frame in this video is {total_image}')

The oringinal framerate is 25.0 with frame resolution of: (768.0, 576.0)
The total number of frame in this video is 17155


In [76]:
count = 1
while(cap.isOpened()):
    frameID = cap.get(1) # get the current frame number
    ret, frame = cap.read()
    
    if(ret != True):
        print(f'We\'ve gotten {int(frameID/5)+1} frames from this video.')
        break
    if (frameID % int(framerate/5) == 0):
        frame = frame[:, 170:600 , :]
        filename = 'Train_1/'+ name + "_frame%04d.jpg" % count;count+=1
        cv2.imwrite(filename, frame)
cap.release()

We've gotten 3432 frames from this video.


## Building Dataset

In [87]:
image_files = os.listdir('Train_1/')
image_files.sort()
image_files = image_files[1:3401]

In [88]:
len(image_files)

3400

In [2]:
class MyDataset:
    
    def __init__(self, base_dir, sequence):  # base_dir(image) and sequence(lstm) are directories
        self.base_dir = base_dir
        self.sequence = sequence
        self.base_path_img = self.base_dir + self.sequence + '/cam0/data/'
        
        image_files = os.listdir(self.base_dir)
        image_files.sort()
        image_files = image_files[1:3401]
        
#         self.data_files = os.listdir(self.base_dir + self.sequence + '/cam0/data/')
#         self.data_files.sort()
        
        ## relative camera pose
        self.trajectory_relative = self.read_R6TrajFile('/vicon0/sampled_relative_R6.csv')
        
        ## abosolute camera pose (global)
        self.trajectory_abs = self.readTrajectoryFile('/vicon0/sampled.csv')
        
        ## imu
        self.imu = self.readIMU_File('/imu0/data.csv')
        
        self.imu_seq_len = 5
    
    def readTrajectoryFile(self, path):
        traj = []
        with open(self.base_dir + self.sequence + path) as csvfile:
            spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
            for row in spamreader:
                parsed = [float(row[1]), float(row[2]), float(row[3]), 
                          float(row[4]), float(row[5]), float(row[6]), float(row[7])]
                traj.append(parsed)
                
        return np.array(traj)
    
    def read_R6TrajFile(self, path):
        traj = []
        with open(self.base_dir + self.sequence + path) as csvfile:
            spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
            for row in spamreader:
                parsed = [float(row[1]), float(row[2]), float(row[3]), 
                          float(row[4]), float(row[5]), float(row[6])]
                traj.append(parsed)
                
        return np.array(traj)
    
    def readIMU_File(self, path):
        imu = []
        count = 0
        with open(self.base_dir + self.sequence + path) as csvfile:
            spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
            for row in spamreader:
                if count == 0:
                    count += 1
                    continue
                parsed = [float(row[1]), float(row[2]), float(row[3]), 
                          float(row[4]), float(row[5]), float(row[6])]
                imu.append(parsed)
                
        return np.array(imu)
    
    def getTrajectoryAbs(self, idx):
        return self.trajectory_abs[idx]
    
    def getTrajectoryAbsAll(self):
        return self.trajectory_abs
    
    def getIMU(self):
        return self.imu
    
    def __len__(self):
        return len(self.trajectory_relative)
    
    def load_img_bat(self, idx, batch):
        batch_x = []
        batch_imu = []
        for i in range(batch):
            x_data_np_1 = np.array(Image.open(self.base_path_img + self.data_files[idx + i]))
            x_data_np_2 = np.array(Image.open(self.base_path_img + self.data_files[idx+1 + i]))

            ## 3 channels
            x_data_np_1 = np.array([x_data_np_1, x_data_np_1, x_data_np_1])
            x_data_np_2 = np.array([x_data_np_2, x_data_np_2, x_data_np_2])

            X = np.array([x_data_np_1, x_data_np_2])
            batch_x.append(X)

            tmp = np.array(self.imu[idx-self.imu_seq_len+1 + i:idx+1 + i])
            batch_imu.append(tmp)
            
        
        batch_x = np.array(batch_x)
        batch_imu = np.array(batch_imu)
        
        X = Variable(torch.from_numpy(batch_x).type(torch.FloatTensor).cuda())    
        X2 = Variable(torch.from_numpy(batch_imu).type(torch.FloatTensor).cuda())    
        
        ## F2F gt
        Y = Variable(torch.from_numpy(self.trajectory_relative[idx+1:idx+1+batch]).type(torch.FloatTensor).cuda())
        
        ## global pose gt
        Y2 = Variable(torch.from_numpy(self.trajectory_abs[idx+1:idx+1+batch]).type(torch.FloatTensor).cuda())
        
        return X, X2, Y, Y2   

In [102]:
image_1 = np.array(Image.open('Train_1/'+image_files[0]))
image_2 = np.array(Image.open('Train_1/'+image_files[1]))
image_1 = image_1.reshape(3,576,430)
image_2 = image_2.reshape(3,576,430);

In [103]:
image_1.shape, image_2.shape

((3, 576, 430), (3, 576, 430))

In [104]:
X = np.array([image_1, image_2])
X.shape

(2, 3, 576, 430)

In [105]:
X = X.reshape(1, 2, 3, 576, 430)
X.shape

(1, 2, 3, 576, 430)

In [107]:
X.shape

(1, 2, 3, 576, 430)

In [114]:
batch_size, timesteps, C, H, W = X.shape
        
## Input1: Feed image pairs to FlownetC
c_in = Variable(torch.from_numpy(X.reshape(batch_size, timesteps * C, H, W)).type(torch.FloatTensor))
c_in.shape

torch.Size([1, 6, 576, 430])

In [115]:
flownet_c = FlowNetC.FlowNetC(batchNorm=False)

In [116]:
output = flownet_c(c_in)



In [120]:
output.view(1,1,-1).shape

torch.Size([1, 1, 64512])

In [137]:
sequence = pd.read_csv('data/result_1.csv',header = None)
sequence.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,-84.0,-86.0,-77.0,-75.0,-330.0,-310.0,-0.050544,0.000217,0.0004402,-0.050176,9.3e-05,0.000675
1,-83.99,-85.994,-76.995,-74.995,-329.96,-309.97,-0.050538,0.000217,0.00044025,-0.050176,9.3e-05,0.000675
2,-83.957,-85.978,-76.98,-74.978,-329.85,-309.89,-0.050517,0.000217,0.00043361,-0.050176,9.3e-05,0.000675
3,-83.906,-85.946,-76.952,-74.949,-329.67,-309.76,-0.05049,0.000217,0.00042702,-0.050176,9.3e-05,0.000675
4,-83.835,-85.899,-76.912,-74.913,-329.43,-309.58,-0.050465,0.000218,0.00042383,-0.050176,9.3e-05,0.000675


In [174]:
sequence = sequence[:874300]
# sequence.astype('float64')
sequence.shape

(874300, 12)

In [175]:
sequence.index

RangeIndex(start=0, stop=874300, step=1)

In [176]:
new_df = sequence.groupby(sequence.index//100).mean()
new_df.shape

(8743, 11)

In [182]:
array_input = np.array(new_df)
input_lstm  = Variable(torch.from_numpy(array_input).type(torch.FloatTensor))
input_lstm = input_lstm[:8000,:]
input_lstm.shape

torch.Size([8000, 11])

In [183]:
rnnIMU = nn.LSTM(
            input_size=11, 
            hidden_size=6,
            num_layers=2,
            batch_first=True)

In [186]:
input_lstm = input_lstm.view(-1,10,11)
input_lstm.shape

torch.Size([800, 10, 11])

In [191]:
test_input = input_lstm[1]


test_input = test_input.view(10,1,11)
test_input.shape
imu_out, (imu_n, imu_c)  = rnnIMU(test_input)

In [193]:
test_input.shape

torch.Size([10, 1, 11])

In [192]:
imu_out.shape

torch.Size([10, 1, 6])

In [3]:
class Vinet(nn.Module):
    def __init__(self):
        super(Vinet, self).__init__()
        self.rnn = nn.LSTM(
            input_size=49165,#49152,#24576, 
            hidden_size=1024,#64, 
            num_layers=2,
            batch_first=True)
        self.rnn.cuda()
        
        self.rnnIMU = nn.LSTM(
            input_size=6, 
            hidden_size=6,
            num_layers=2,
            batch_first=True)
        self.rnnIMU.cuda()
        
        self.linear1 = nn.Linear(1024, 128)
        self.linear2 = nn.Linear(128, 6)
        #self.linear3 = nn.Linear(128, 6)
        self.linear1.cuda()
        self.linear2.cuda()
        #self.linear3.cuda()
        
        
        
        checkpoint = None
        checkpoint_pytorch = 'FlowNet2-C_checkpoint.pth.tar'
        #checkpoint_pytorch = 'FlowNet2-SD_checkpoint.pth.tar'
        if os.path.isfile(checkpoint_pytorch):
            print(f'found the checkpoint file')
            checkpoint = torch.load(checkpoint_pytorch,\
                                map_location=lambda storage, loc: storage.cuda(0))
            best_err = checkpoint['best_EPE']
        else:
            print('No checkpoint')

        
        self.flownet_c = FlowNetC.FlowNetC(batchNorm=False)
        self.flownet_c.load_state_dict(checkpoint['state_dict'])
        self.flownet_c.cuda()

    def forward(self, image, imu):
        batch_size, timesteps, C, H, W = image.size()
        
        ## Input1: Feed image pairs to FlownetC
        c_in = image.view(batch_size, timesteps * C, H, W)
        c_out = self.flownet_c(c_in)
        #print('c_out', c_out.shape)
        
        ## Input2: Feed IMU records to LSTM
        imu_out, (imu_n, imu_c) = self.rnnIMU(imu)
        imu_out = imu_out[:, -1, :]
        #print('imu_out', imu_out.shape)
        imu_out = imu_out.unsqueeze(1)
        #print('imu_out', imu_out.shape)
        
        
        ## Combine the output of input1 and 2 and feed it to LSTM
        #r_in = c_out.view(batch_size, timesteps, -1)
        r_in = c_out.view(batch_size, 1, -1)
        #print('r_in', r_in.shape)
        

        cat_out = torch.cat((r_in, imu_out), 2)#1 1 49158
        print(cat_out.shape)
#         cat_out = torch.cat((cat_out, xyzQ), 2)#1 1 49165
        
        r_out, (h_n, h_c) = self.rnn(cat_out)
        l_out1 = self.linear1(r_out[:,-1,:])
        l_out2 = self.linear2(l_out1)
        #l_out3 = self.linear3(l_out2)

        return l_out2

In [4]:
model = Vinet()

found the checkpoint file


In [5]:
from torchviz import make_dot, make_dot_from_trace

In [6]:
# img = np.array(Image.open('image.png'))
batch_x = []
for i in range(2):
    img1 = np.array(torch.randn((384, 1024, 3)))
    img2 = np.array(torch.randn((384, 1024, 3)))
    
    img1 = np.array([img1, img1, img1])
    img2 = np.array([img2, img2, img2])
    
    X = np.array([img1,img2])
    batch_x.append(X)
    
batch_x = np.array(batch_x)
X = Variable(torch.from_numpy(batch_x).type(torch.FloatTensor).cuda()) 


X.shape

torch.Size([2, 2, 3, 384, 1024, 3])

In [124]:
flownet_c = FlowNetC.FlowNetC(batchNorm=False)

In [57]:
test_LSTM = nn.LSTM(input_size=6, 
                    hidden_size=6,
                    num_layers=2,
                    batch_first=True)
x = torch.randn(10,2,6).requires_grad_(True)
y = test_LSTM(x)
make_dot(y, params=dict(list(test_LSTM.named_parameters()) + [('x',x)]))

AttributeError: 'tuple' object has no attribute 'grad_fn'

In [45]:
image = torch.randn(1, 2, 3, 384, 1024).requires_grad_(True).cuda()
imu   = torch.randn(10,2,6).requires_grad_(True).cuda()
y = model(image, imu)
make_dot(y, params = dict(list(model.named_parameters()) + [('x',x)]))

torch.Size([1, 1, 98310])




RuntimeError: input.size(-1) must be equal to input_size. Expected 49165, got 98310

In [34]:
98310/2

49155.0