In [1]:
import torch
from torch.utils.data import DataLoader
import numpy as np
import os
import time
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from tqdm import tnrange
# from params import par
from model import DeepVO
from data_helper import get_data_info, SortedRandomBatchSampler, ImageSequenceDataset, get_partition_data_info

In [2]:
DATA_PATH = '/home/davidgogh96/DeepVO/'
class Parameters():
    def __init__(self):
        self.n_processors = 8
        # Path
        self.data_dir =  DATA_PATH + 'KITTI'
        self.image_dir = self.data_dir + '/images/'
        self.pose_dir = self.data_dir + '/pose_GT/'

        self.train_video = ['00', '01', '02', '05', '08', '10']
        self.valid_video = ['04', '06', '07', '09']
        self.partition = None  # partition videos in 'train_video' to train / valid dataset  #0.8


        # Data Preprocessing
        self.resize_mode = 'rescale'  # choice: 'crop' 'rescale' None
#         self.img_w = 608   # original size is about 1226
#         self.img_h = 184   # original size is about 370
        self.img_w = 384   # original size is about 1226
        self.img_h = 128   # original size is about 370
#         self.img_w = 384   # original size is about 1226
#         self.img_h = 128   # original size is about 370
        self.img_means = (-0.14968217427134656, -0.12941663107068363, -0.1320610301921484)
        self.img_stds = (1, 1, 1)  #(0.309122, 0.315710, 0.3226514)
        self.minus_point_5 = True

        self.seq_len = (5, 7)
        self.sample_times = 3

        # Data info path
        self.train_data_info_path = 'ndatainfo/train_df_t{}_v{}_p{}_seq{}x{}_sample{}.pickle'.format(''.join(self.train_video), ''.join(self.valid_video), self.partition, self.seq_len[0], self.seq_len[1], self.sample_times)
        self.valid_data_info_path = 'ndatainfo/valid_df_t{}_v{}_p{}_seq{}x{}_sample{}.pickle'.format(''.join(self.train_video), ''.join(self.valid_video), self.partition, self.seq_len[0], self.seq_len[1], self.sample_times)


        # Model
        self.rnn_hidden_size = 1000
        self.conv_dropout = (0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.5)
        self.rnn_dropout_out = 0.5
        self.rnn_dropout_between = 0   # 0: no dropout
        self.clip = None
        self.batch_norm = True
        # Training
        self.epochs = 50
        self.batch_size = 8
        self.pin_mem = False
        self.optim = {'opt': 'Adam'}
#         {'opt': 'Adagrad', 'lr': 0.0005}
                    # Choice:
                    # {'opt': 'Adagrad', 'lr': 0.001}
                    # {'opt': 'Adam'}
                    # {'opt': 'Cosine', 'T': 100 , 'lr': 0.001}

        # Pretrain, Resume training
        self.pretrained_flownet = DATA_PATH + '/weights/pretrained/flownets_bn_EPE2.459.pth.tar'
                                # Choice:
                                # None
                                # './pretrained/flownets_bn_EPE2.459.pth.tar'  
                                # './pretrained/flownets_EPE1.951.pth.tar'
        self.resume = False  # resume training
        self.resume_t_or_v = '.train'
        self.load_model_path = 'nmodels/t{}_v{}_im{}x{}_s{}x{}_b{}_rnn{}_{}.model{}'.format(''.join(self.train_video), ''.join(self.valid_video), self.img_h, self.img_w, self.seq_len[0], self.seq_len[1], self.batch_size, self.rnn_hidden_size, '_'.join([k+str(v) for k, v in self.optim.items()]), self.resume_t_or_v)
        self.load_optimizer_path = 'nmodels/t{}_v{}_im{}x{}_s{}x{}_b{}_rnn{}_{}.optimizer{}'.format(''.join(self.train_video), ''.join(self.valid_video), self.img_h, self.img_w, self.seq_len[0], self.seq_len[1], self.batch_size, self.rnn_hidden_size, '_'.join([k+str(v) for k, v in self.optim.items()]), self.resume_t_or_v)

        self.record_path = 'nrecords/t{}_v{}_im{}x{}_s{}x{}_b{}_rnn{}_{}.txt'.format(''.join(self.train_video), ''.join(self.valid_video), self.img_h, self.img_w, self.seq_len[0], self.seq_len[1], self.batch_size, self.rnn_hidden_size, '_'.join([k+str(v) for k, v in self.optim.items()]))
        self.save_model_path = 'nmodels/t{}_v{}_im{}x{}_s{}x{}_b{}_rnn{}_{}.model'.format(''.join(self.train_video), ''.join(self.valid_video), self.img_h, self.img_w, self.seq_len[0], self.seq_len[1], self.batch_size, self.rnn_hidden_size, '_'.join([k+str(v) for k, v in self.optim.items()]))
        self.save_optimzer_path = 'nmodels/t{}_v{}_im{}x{}_s{}x{}_b{}_rnn{}_{}.optimizer'.format(''.join(self.train_video), ''.join(self.valid_video), self.img_h, self.img_w, self.seq_len[0], self.seq_len[1], self.batch_size, self.rnn_hidden_size, '_'.join([k+str(v) for k, v in self.optim.items()]))


        if not os.path.isdir(os.path.dirname(self.record_path)):
            os.makedirs(os.path.dirname(self.record_path))
        if not os.path.isdir(os.path.dirname(self.save_model_path)):
            os.makedirs(os.path.dirname(self.save_model_path))
        if not os.path.isdir(os.path.dirname(self.save_optimzer_path)):
            os.makedirs(os.path.dirname(self.save_optimzer_path))
        if not os.path.isdir(os.path.dirname(self.train_data_info_path)):
            os.makedirs(os.path.dirname(self.train_data_info_path))

par = Parameters()



In [5]:
import torch
import torch.nn as nn
# from params import par
from torch.autograd import Variable
from torch.nn.init import kaiming_normal_, orthogonal_
import numpy as np
def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, dropout=0):
    if batchNorm:
        return nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False),
            nn.BatchNorm2d(out_planes),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout(dropout)#, inplace=True)
        )
    else:
        return nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout(dropout)#, inplace=True)
        )
class DeepVO(nn.Module):
    def __init__(self, imsize1, imsize2, batchNorm=True):
        super(DeepVO,self).__init__()
        # CNN
        self.batchNorm = batchNorm
        self.clip = par.clip
        self.conv1   = conv(self.batchNorm,   6,   64, kernel_size=7, stride=2, dropout=par.conv_dropout[0])
        self.conv2   = conv(self.batchNorm,  64,  128, kernel_size=5, stride=2, dropout=par.conv_dropout[1])
        self.conv3   = conv(self.batchNorm, 128,  256, kernel_size=5, stride=2, dropout=par.conv_dropout[2])
        self.conv3_1 = conv(self.batchNorm, 256,  256, kernel_size=3, stride=1, dropout=par.conv_dropout[3])
        self.conv4   = conv(self.batchNorm, 256,  512, kernel_size=3, stride=2, dropout=par.conv_dropout[4])
        self.conv4_1 = conv(self.batchNorm, 512,  512, kernel_size=3, stride=1, dropout=par.conv_dropout[5])
        self.conv5   = conv(self.batchNorm, 512,  512, kernel_size=3, stride=2, dropout=par.conv_dropout[6])
        self.conv5_1 = conv(self.batchNorm, 512,  512, kernel_size=3, stride=1, dropout=par.conv_dropout[7])
        self.conv6   = conv(self.batchNorm, 512, 1024, kernel_size=3, stride=2, dropout=par.conv_dropout[8])
        # Comput the shape based on diff image size
        __tmp = Variable(torch.zeros(1, 6, imsize1, imsize2))
        __tmp = self.encode_image(__tmp)

        # RNN
        self.rnn = nn.LSTM(
                    input_size=int(np.prod(__tmp.size())), 
                    hidden_size=par.rnn_hidden_size, 
                    num_layers=2, 
                    dropout=par.rnn_dropout_between, 
                    batch_first=True)
        self.rnn_drop_out = nn.Dropout(par.rnn_dropout_out)
        self.linear = nn.Linear(in_features=par.rnn_hidden_size, out_features=6)

        # Initilization
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
                kaiming_normal_(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.LSTM):
                # layer 1
                kaiming_normal_(m.weight_ih_l0)  #orthogonal_(m.weight_ih_l0)
                kaiming_normal_(m.weight_hh_l0)
                m.bias_ih_l0.data.zero_()
                m.bias_hh_l0.data.zero_()
                # Set forget gate bias to 1 (remember)
                n = m.bias_hh_l0.size(0)
                start, end = n//4, n//2
                m.bias_hh_l0.data[start:end].fill_(1.)

                # layer 2
                kaiming_normal_(m.weight_ih_l1)  #orthogonal_(m.weight_ih_l1)
                kaiming_normal_(m.weight_hh_l1)
                m.bias_ih_l1.data.zero_()
                m.bias_hh_l1.data.zero_()
                n = m.bias_hh_l1.size(0)
                start, end = n//4, n//2
                m.bias_hh_l1.data[start:end].fill_(1.)

            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()


    def forward(self, x): 
        # x: (batch, seq_len, channel, width, height)
        # stack_image
        x = torch.cat(( x[:, :-1], x[:, 1:]), dim=2)
        batch_size = x.size(0)
        seq_len = x.size(1)
        # CNN
        x = x.view(batch_size*seq_len, x.size(2), x.size(3), x.size(4))
        x = self.encode_image(x)
        x = x.view(batch_size, seq_len, -1)


        # RNN
        out, hc = self.rnn(x)
        out = self.rnn_drop_out(out)
        out = self.linear(out)
        return out
        

    def encode_image(self, x):
        out_conv2 = self.conv2(self.conv1(x))
        out_conv3 = self.conv3_1(self.conv3(out_conv2))
        out_conv4 = self.conv4_1(self.conv4(out_conv3))
        out_conv5 = self.conv5_1(self.conv5(out_conv4))
        out_conv6 = self.conv6(out_conv5)
#         print(self.conv1(x).shape)
#         print(out_conv2.shape)
#         print(out_conv3.shape)
#         print(out_conv4.shape)
#         print(out_conv5.shape)
#         print(out_conv6.shape)
        return out_conv6

    def weight_parameters(self):
        return [param for name, param in self.named_parameters() if 'weight' in name]

    def bias_parameters(self):
        return [param for name, param in self.named_parameters() if 'bias' in name]

    @staticmethod
    def get_velocity(y):
        return (y[:, :-1, :] - y[:, 1:, :]).norm(2, 2)
    
    def get_loss(self, x, y):
        predicted = self.forward(x)
        y = y[:, 1:, :]  # (batch, seq, dim_pose)
#         print(predicted.shape, y.shape)
        # Weighted MSE Loss
        angle_loss = torch.nn.functional.mse_loss(predicted[:,:,:3], y[:,:,:3])
        translation_loss = torch.nn.functional.mse_loss(predicted[:,:,3:], y[:,:,3:])
        loss = (100 * angle_loss + translation_loss)
        trans_v_loss = torch.nn.functional.mse_loss(self.get_velocity(predicted[:, :, 3:]), self.get_velocity(y[:, :, 3:]))
        angle_v_loss = torch.nn.functional.mse_loss(self.get_velocity(predicted[:, :, :3]), self.get_velocity(y[:, :, :3]))
        loss += trans_v_loss + angle_v_loss
        return loss

    def step(self, x, y, optimizer):
        optimizer.zero_grad()
        loss = self.get_loss(x, y)
        loss.backward()
        if self.clip != None:
            torch.nn.utils.clip_grad_norm(self.rnn.parameters(), self.clip)
        optimizer.step()
        return loss

In [12]:
# Write all hyperparameters to record_path
mode = 'a' if par.resume else 'w'
with open(par.record_path, mode) as f:
    f.write('\n'+'='*50 + '\n')
    f.write('\n'.join("%s: %s" % item for item in vars(par).items()))
    f.write('\n'+'='*50 + '\n')

# Prepare Data
if os.path.isfile(par.train_data_info_path) and os.path.isfile(par.valid_data_info_path):
    print('Load data info from {}'.format(par.train_data_info_path))
    train_df = pd.read_pickle(par.train_data_info_path)
    valid_df = pd.read_pickle(par.valid_data_info_path)
else:
    print('Create new data info')
    if par.partition != None:
        partition = par.partition
        train_df, valid_df = get_partition_data_info(partition, par.train_video, par.seq_len, overlap=1, sample_times=par.sample_times, shuffle=True, sort=True)
    else:
        train_df = get_data_info(folder_list=par.train_video, seq_len_range=par.seq_len, overlap=1, sample_times=par.sample_times)	
        valid_df = get_data_info(folder_list=par.valid_video, seq_len_range=par.seq_len, overlap=1, sample_times=par.sample_times)
    # save the data info
    train_df.to_pickle(par.train_data_info_path)
    valid_df.to_pickle(par.valid_data_info_path)

train_sampler = SortedRandomBatchSampler(train_df, par.batch_size, drop_last=True)
train_dataset = ImageSequenceDataset(train_df, par.resize_mode, (par.img_w, par.img_h), par.img_means, par.img_stds, par.minus_point_5)
train_dl = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=par.n_processors, pin_memory=par.pin_mem)

valid_sampler = SortedRandomBatchSampler(valid_df, par.batch_size, drop_last=True)
valid_dataset = ImageSequenceDataset(valid_df, par.resize_mode, (par.img_w, par.img_h), par.img_means, par.img_stds, par.minus_point_5)
valid_dl = DataLoader(valid_dataset, batch_sampler=valid_sampler, num_workers=par.n_processors, pin_memory=par.pin_mem)

print('Number of samples in training dataset: ', len(train_df.index))
print('Number of samples in validation dataset: ', len(valid_df.index))
print('='*50)


# Model
M_deepvo = DeepVO(par.img_h, par.img_w, par.batch_norm)
use_cuda = torch.cuda.is_available()
if use_cuda:
    print('CUDA used.')
    M_deepvo = M_deepvo.cuda()


Load data info from datainfo/train_df_t000102050810_v040607_pNone_seq5x7_sample3.pickle
Number of samples in training dataset:  11031
Number of samples in validation dataset:  1485
CUDA used.


In [53]:
_, t_x, t_y = next(iter(train_dl))
t_x = t_x.cuda(non_blocking=par.pin_mem)
t_y = t_y.cuda(non_blocking=par.pin_mem)

In [54]:
p = M_deepvo.forward(t_x)

In [55]:
M_deepvo.get_loss(t_x, t_y).shape

torch.Size([8, 6, 6]) torch.Size([8, 6, 6])


torch.Size([])

In [58]:
y = t_y[:, 1:, :]

In [59]:
def get_velocity(y):
    return (y[:, :-1, :] - y[:, 1:, :]).norm(2, 2)

In [63]:
torch.nn.functional.mse_loss(p[:, :, 3:], y[:, :, 3:])

tensor(0.4295, device='cuda:0', grad_fn=<MseLossBackward>)

In [None]:
# Write all hyperparameters to record_path
mode = 'a' if par.resume else 'w'
with open(par.record_path, mode) as f:
    f.write('\n'+'='*50 + '\n')
    f.write('\n'.join("%s: %s" % item for item in vars(par).items()))
    f.write('\n'+'='*50 + '\n')

# Prepare Data
if os.path.isfile(par.train_data_info_path) and os.path.isfile(par.valid_data_info_path):
    print('Load data info from {}'.format(par.train_data_info_path))
    train_df = pd.read_pickle(par.train_data_info_path)
    valid_df = pd.read_pickle(par.valid_data_info_path)
else:
    print('Create new data info')
    if par.partition != None:
        partition = par.partition
        train_df, valid_df = get_partition_data_info(partition, par.train_video, par.seq_len, overlap=1, sample_times=par.sample_times, shuffle=True, sort=True)
    else:
        train_df = get_data_info(folder_list=par.train_video, seq_len_range=par.seq_len, overlap=1, sample_times=par.sample_times)	
        valid_df = get_data_info(folder_list=par.valid_video, seq_len_range=par.seq_len, overlap=1, sample_times=par.sample_times)
    # save the data info
    train_df.to_pickle(par.train_data_info_path)
    valid_df.to_pickle(par.valid_data_info_path)

train_sampler = SortedRandomBatchSampler(train_df, par.batch_size, drop_last=True)
train_dataset = ImageSequenceDataset(train_df, par.resize_mode, (par.img_w, par.img_h), par.img_means, par.img_stds, par.minus_point_5)
train_dl = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=par.n_processors, pin_memory=par.pin_mem)

valid_sampler = SortedRandomBatchSampler(valid_df, par.batch_size, drop_last=True)
valid_dataset = ImageSequenceDataset(valid_df, par.resize_mode, (par.img_w, par.img_h), par.img_means, par.img_stds, par.minus_point_5)
valid_dl = DataLoader(valid_dataset, batch_sampler=valid_sampler, num_workers=par.n_processors, pin_memory=par.pin_mem)

print('Number of samples in training dataset: ', len(train_df.index))
print('Number of samples in validation dataset: ', len(valid_df.index))
print('='*50)


# Model
M_deepvo = DeepVO(par.img_h, par.img_w, par.batch_norm)
use_cuda = torch.cuda.is_available()
if use_cuda:
    print('CUDA used.')
    M_deepvo = M_deepvo.cuda()


# Load FlowNet weights pretrained with FlyingChairs
# NOTE: the pretrained model assumes image rgb values in range [-0.5, 0.5]
if par.pretrained_flownet and not par.resume:
    if use_cuda:
        pretrained_w = torch.load(par.pretrained_flownet)
    else:
        pretrained_w = torch.load(par.pretrained_flownet_flownet, map_location='cpu')
    print('Load FlowNet pretrained model')
    # Use only conv-layer-part of FlowNet as CNN for DeepVO
    model_dict = M_deepvo.state_dict()
    update_dict = {k: v for k, v in pretrained_w['state_dict'].items() if k in model_dict}
    model_dict.update(update_dict)
    M_deepvo.load_state_dict(model_dict)


# Create optimizer
if par.optim['opt'] == 'Adam':
    optimizer = torch.optim.Adam(M_deepvo.parameters(), lr=0.001, betas=(0.9, 0.999))
elif par.optim['opt'] == 'Adagrad':
    optimizer = torch.optim.Adagrad(M_deepvo.parameters(), lr=par.optim['lr'])
elif par.optim['opt'] == 'Cosine':
    optimizer = torch.optim.SGD(M_deepvo.parameters(), lr=par.optim['lr'])
    T_iter = par.optim['T']*len(train_dl)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_iter, eta_min=0, last_epoch=-1)

# Load trained DeepVO model and optimizer
if par.resume:
    M_deepvo.load_state_dict(torch.load(par.load_model_path))
    optimizer.load_state_dict(torch.load(par.load_optimizer_path))
    print('Load model from: ', par.load_model_path)
    print('Load optimizer from: ', par.load_optimizer_path)


# Train
print('Record loss in: ', par.record_path)
min_loss_t = 1e10
min_loss_v = 1e10
M_deepvo.train()
t_loss_list = []
v_loss_list = []
for ep in tnrange(par.epochs):
    st_t = time.time()
#     print('='*50)
    # Train
    M_deepvo.train()
    loss_mean = 0
    
    i = 1
    progress_bar = tqdm(train_dl, desc='Training')
    for batch_idx, (_, t_x, t_y) in enumerate(progress_bar):
#     for _, t_x, t_y in train_dl:
        if use_cuda:
            t_x = t_x.cuda(non_blocking=par.pin_mem)
            t_y = t_y.cuda(non_blocking=par.pin_mem)
        ls = M_deepvo.step(t_x, t_y, optimizer).data.cpu().numpy()
        t_loss_list.append(float(ls))
        loss_mean += float(ls)
        if par.optim == 'Cosine':
            lr_scheduler.step()
        progress_bar.set_description(
        'Epoch: {} loss: {:.4f}'.format(
            ep, loss_mean / (batch_idx + 1)))
    print('Train take {:.1f} sec'.format(time.time()-st_t))
    loss_mean /= len(train_dl)

    # Validation
    st_t = time.time()
    M_deepvo.eval()
    loss_mean_valid = 0
    
    progress_bar = tqdm(valid_dl, desc='Validating')
    for batch_idx, (_, v_x, v_y) in enumerate(progress_bar):
        if use_cuda:
            v_x = v_x.cuda(non_blocking=par.pin_mem)
            v_y = v_y.cuda(non_blocking=par.pin_mem)
        v_ls = M_deepvo.get_loss(v_x, v_y).data.cpu().numpy()
        v_loss_list.append(float(v_ls))
        loss_mean_valid += float(v_ls)
        progress_bar.set_description(
        'Epoch: {} loss: {:.4f}'.format(
            ep, loss_mean_valid / (batch_idx + 1)))
    print('Valid take {:.1f} sec'.format(time.time()-st_t))
    loss_mean_valid /= len(valid_dl)


    f = open(par.record_path, 'a')
    f.write('Epoch {}\ntrain loss mean: {}, std: {:.2f}\nvalid loss mean: {}, std: {:.2f}\n'.format(ep+1, loss_mean, np.std(t_loss_list), loss_mean_valid, np.std(v_loss_list)))
    print('Epoch {}\ntrain loss mean: {}, std: {:.2f}\nvalid loss mean: {}, std: {:.2f}\n'.format(ep+1, loss_mean, np.std(t_loss_list), loss_mean_valid, np.std(v_loss_list)))

    # Save model
    # save if the valid loss decrease
    check_interval = 1
    if loss_mean_valid < min_loss_v and ep % check_interval == 0:
        min_loss_v = loss_mean_valid
        print('Save model at ep {}, mean of valid loss: {}'.format(ep+1, loss_mean_valid))  # use 4.6 sec 
        torch.save(M_deepvo.state_dict(), par.save_model_path+'.valid')
        torch.save(optimizer.state_dict(), par.save_optimzer_path+'.valid')
    # save if the training loss decrease
    check_interval = 1
    if loss_mean < min_loss_t and ep % check_interval == 0:
        min_loss_t = loss_mean
        print('Save model at ep {}, mean of train loss: {}'.format(ep+1, loss_mean))
        torch.save(M_deepvo.state_dict(), par.save_model_path+'.train')
        torch.save(optimizer.state_dict(), par.save_optimzer_path+'.train')
    f.close()



Load data info from ndatainfo/train_df_t000102050810_v04060709_pNone_seq5x7_sample3.pickle
Number of samples in training dataset:  10996
Number of samples in validation dataset:  2435
CUDA used.
Load FlowNet pretrained model
Record loss in:  nrecords/t000102050810_v04060709_im128x384_s5x7_b8_rnn1000_optAdam.txt


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

HBox(children=(IntProgress(value=0, description='Training', max=1373, style=ProgressStyle(description_width='i…

Train take 1126.5 sec


HBox(children=(IntProgress(value=0, description='Validating', max=304, style=ProgressStyle(description_width='…

Valid take 198.8 sec
Epoch 1
train loss mean: 18.688979657294972, std: 33.87
valid loss mean: 20.559200522832963, std: 30.89

Save model at ep 1, mean of valid loss: 20.559200522832963
Save model at ep 1, mean of train loss: 18.688979657294972


HBox(children=(IntProgress(value=0, description='Training', max=1373, style=ProgressStyle(description_width='i…

In [None]:
import glob
# videos_to_test = ['04', '05', '07', '10', '09']
videos_to_test = ['00', '02', '08', '09']
videos_to_test += ['01', '04', '05', '06', '07', '10']

# Path
# load_model_path = DATA_PATH + 'weights/models/t000102050809_v04060710_im184x608_s5x7_b8_rnn1000_optAdagrad_lr0.0005.model.train'
load_model_path = par.load_model_path   #choose the model you want to load
save_dir = 'result/'  # directory to save prediction answer
if not os.path.exists(save_dir):
    os.makedirs(save_dir)


# Load model
M_deepvo = DeepVO(par.img_h, par.img_w, par.batch_norm)
use_cuda = torch.cuda.is_available()
if use_cuda:
    M_deepvo = M_deepvo.cuda()
    M_deepvo.load_state_dict(torch.load(load_model_path))
else:
    M_deepvo.load_state_dict(torch.load(load_model_path, map_location={'cuda:0': 'cpu'}))
print('Load model from: ', load_model_path)



# Data
n_workers = 1
seq_len = int((par.seq_len[0]+par.seq_len[1])/2)
overlap = seq_len - 1
print('seq_len = {},  overlap = {}'.format(seq_len, overlap))
batch_size = par.batch_size

for test_video in videos_to_test:
    df = get_data_info(folder_list=[test_video], seq_len_range=[seq_len, seq_len], overlap=overlap, sample_times=1, shuffle=False, sort=False)
    df = df.loc[df.seq_len == seq_len]  # drop last
    dataset = ImageSequenceDataset(df, par.resize_mode, (par.img_w, par.img_h), par.img_means, par.img_stds, par.minus_point_5)
    df.to_csv('test_df.csv')
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=n_workers)
    gt_pose = np.load('{}{}.npy'.format(par.pose_dir, test_video))  # (n_images, 6)

    # Predict
    M_deepvo.eval()
    has_predict = False
    answer = [[0.0]*6, ]
    st_t = time.time()
    n_batch = len(dataloader)
    for i, batch in enumerate(dataloader):
        print('{} / {}'.format(i, n_batch), end='\r', flush=True)
        _, x, y = batch
        if use_cuda:
            x = x.cuda()
            y = y.cuda()
        batch_predict_pose = M_deepvo.forward(x)

        # Record answer
        batch_predict_pose = batch_predict_pose.data.cpu().numpy()
        if i == 0:
            for pose in batch_predict_pose[0]:
                # use all predicted pose in the first prediction
                for i in range(len(pose)):
                    # Convert predicted relative pose to absolute pose by adding last pose
                    pose[i] += answer[-1][i]
                answer.append(pose.tolist())
            batch_predict_pose = batch_predict_pose[1:]

        for predict_pose_seq in batch_predict_pose:
        # use only last predicted pose in the following prediction
            last_pose = predict_pose_seq[-1]
            for i in range(len(last_pose)):
                last_pose[i] += answer[-1][i]
            answer.append(last_pose.tolist())
    print('len(answer): ', len(answer))
    print('expect len: ', len(glob.glob('{}{}/*.png'.format(par.image_dir, test_video))))
    print('Predict use {} sec'.format(time.time() - st_t))


    # Save answer
    with open('{}/out_{}.txt'.format(save_dir, test_video), 'w') as f:
        for pose in answer:
            if type(pose) == list:
                f.write(', '.join([str(p) for p in pose]))
            else:
                f.write(str(pose))
            f.write('\n')


    # Calculate loss
    gt_pose = np.load('{}{}.npy'.format(par.pose_dir, test_video))  # (n_images, 6)
    loss = 0
    for t in range(len(gt_pose)):
        angle_loss = np.sum((answer[t][:3] - gt_pose[t,:3]) ** 2)
        translation_loss = np.sum((answer[t][3:] - gt_pose[t,3:]) ** 2)
        loss = (100 * angle_loss + translation_loss)
    loss /= len(gt_pose)
    print('Loss = ', loss)
    print('='*50)

In [None]:
import matplotlib.pyplot as plt
plt.ion()

pose_GT_dir = par.pose_dir  #'KITTI/pose_GT/'
predicted_result_dir = './result/'
gradient_color = False

def plot_route(gt, out, c_gt='g', c_out='r'):
    x_idx = 3
    y_idx = 5
    x = [v for v in gt[:, x_idx]]
    y = [v for v in gt[:, y_idx]]
    plt.plot(x, y, color=c_gt, label='Ground Truth')
    #plt.scatter(x, y, color='b')

    x = [v for v in out[:, x_idx]]
    y = [v for v in out[:, y_idx]]
    plt.plot(x, y, color=c_out, label='DeepVO')
    #plt.scatter(x, y, color='b')
    plt.gca().set_aspect('equal', adjustable='datalim')


# Load in GT and predicted pose
video_list = ['00', '01', '02', '05', '08', '10']
video_list += ['04', '06', '07', '09']


for video in video_list:
    print('='*50)
    print('Video {}'.format(video))

    GT_pose_path = '{}{}.npy'.format(pose_GT_dir, video)
    gt = np.load(GT_pose_path)
    pose_result_path = '{}out_{}.txt'.format(predicted_result_dir, video)
    with open(pose_result_path) as f_out:
        out = [l.split('\n')[0] for l in f_out.readlines()]
        for i, line in enumerate(out):
            out[i] = [float(v) for v in line.split(',')]
        out = np.array(out)
        mse_rotate = 100 * np.mean((out[:, :3] - gt[:, :3])**2)
        mse_translate = np.mean((out[:, 3:] - gt[:, 3:])**2)
        print('mse_rotate: ', mse_rotate)
        print('mse_translate: ', mse_translate)
        plt.clf()
        plt.title('Video {} errors'.format(video))
        plt.plot(np.linalg.norm(out[:, :3] - gt[:, :3], axis=1), label='rotation err')
        plt.plot(np.linalg.norm(out[:, 3:] - gt[:, 3:], axis=1), label='translation err')
        plt.legend()
        save_name = '{}route_{}_err.png'.format(predicted_result_dir, video)
        plt.savefig(save_name)
        plt.show()
        


    if gradient_color:
        # plot gradient color
        step = 200
        plt.clf()
        plt.scatter([gt[0][3]], [gt[0][5]], label='sequence start', marker='s', color='k')
        for st in range(0, len(out), step):
            end = st + step
            g = max(0.2, st/len(out))
            c_gt = (0, g, 0)
            c_out = (1, g, 0)
            plot_route(gt[st:end], out[st:end], c_gt, c_out)
            if st == 0:
                plt.legend()
            plt.title('Video {}'.format(video))
            save_name = '{}route_{}_gradient.png'.format(predicted_result_dir, video)
        plt.savefig(save_name)
    else:
        # plot one color
        plt.clf()
        plt.scatter([gt[0][3]], [gt[0][5]], label='sequence start', marker='s', color='k')
        plot_route(gt, out, 'r', 'b')
        plt.legend()
        plt.title('Video {}'.format(video))
        save_name = '{}route_{}.png'.format(predicted_result_dir, video)
        plt.savefig(save_name)
        plt.show()

In [None]:
plt.show()