In [1]:
# predicted as a batch
from params import par
from model import DeepVO_CfC
import numpy as np
from PIL import Image
import glob
import os
import time
import torch
from data_helper import get_data_info, ImageSequenceDataset
from torch.utils.data import DataLoader
from helper import eulerAnglesToRotationMatrix


# videos_to_test = ['00', '01', '02', '05', '08', '09']
videos_to_test = ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10']

# Path
load_model_path = par.load_model_path   #choose the model you want to load
save_dir = 'result/'  # directory to save prediction answer
if not os.path.exists(save_dir):
    os.makedirs(save_dir)


# Load model
M_deepvo_cfc = DeepVO_CfC(par.img_h, par.img_w, par.batch_norm)
use_cuda = torch.cuda.is_available()
if use_cuda:
    M_deepvo_cfc = M_deepvo_cfc.cuda()
    M_deepvo_cfc.load_state_dict(torch.load(load_model_path))
else:
    M_deepvo_cfc.load_state_dict(torch.load(load_model_path, map_location={'cuda:0': 'cpu'}))
print('Load model from: ', load_model_path)



# Data
n_workers = 4
seq_len = int((par.seq_len[0]+par.seq_len[1])/2)
overlap = seq_len - 1
# overlap = 1
print('seq_len = {},  overlap = {}'.format(seq_len, overlap))
batch_size = par.batch_size


fd=open('test_dump.txt', 'w')
fd.write('\n'+'='*50 + '\n')


for test_video in videos_to_test:
    df = get_data_info(folder_list=[test_video], seq_len_range=[seq_len, seq_len], overlap=overlap, sample_times=1, shuffle=False, sort=False)
    df = df.loc[df.seq_len == seq_len]  # drop last
    dataset = ImageSequenceDataset(df, par.resize_mode, (par.img_w, par.img_h), par.img_means, par.img_stds, par.minus_point_5)
    df.to_csv('test_df.csv')
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=n_workers)

    gt_pose = np.load('{}{}.npy'.format(par.pose_dir, test_video))  # (n_images, 6)

    # Predict
    M_deepvo_cfc.eval()
    has_predict = False
    answer = [[0.0]*6, ]
    st_t = time.time()
    n_batch = len(dataloader)
    gt = []

    for i, batch in enumerate(dataloader):
        print('{} / {}'.format(i, n_batch), end='\r', flush=True)
        _, x, times, y = batch
        gt.append(y)
        if use_cuda:
            x = x.cuda()
            times = times.cuda()
            y = y.cuda()
        batch_predict_pose = M_deepvo_cfc.forward(x, times)

        # Record answer
        fd.write('Batch: {}\n'.format(i))
        for seq, predict_pose_seq in enumerate(batch_predict_pose):
            for pose_idx, pose in enumerate(predict_pose_seq):
                fd.write(' {} {} {}\n'.format(seq, pose_idx, pose))


        batch_predict_pose = batch_predict_pose.data.cpu().numpy()
        if i == 0:
            for pose in batch_predict_pose[0]:
                # use all predicted pose in the first prediction
                for i in range(len(pose)):
                    # Convert predicted relative pose to absolute pose by adding last pose
                    pose[i] += answer[-1][i]
                answer.append(pose.tolist())
            batch_predict_pose = batch_predict_pose[1:]

        # transform from relative to absolute 

        for predict_pose_seq in batch_predict_pose:
            # predict_pose_seq[1:] = predict_pose_seq[1:] + predict_pose_seq[0:-1]
            ang = eulerAnglesToRotationMatrix([0, answer[-1][0], 0]) #eulerAnglesToRotationMatrix([answer[-1][1], answer[-1][0], answer[-1][2]])
            location = ang.dot(predict_pose_seq[-1][3:])
            predict_pose_seq[-1][3:] = location[:]

        # use only last predicted pose in the following prediction
            last_pose = predict_pose_seq[-1]
            for i in range(len(last_pose)):
                last_pose[i] += answer[-1][i]
            # normalize angle to -Pi...Pi over y axis
            last_pose[0] = (last_pose[0] + np.pi) % (2 * np.pi) - np.pi
            answer.append(last_pose.tolist())

    print('len(answer): ', len(answer))
    fpaths = [f'{par.image_dir}{test_video}\\image_0\\{image_file}' for image_file in os.listdir('{}{}\\image_0\\'.format(par.image_dir, test_video)) if image_file.endswith(".png")]
    print('expect len: ', len(fpaths))
    print('Predict use {} sec'.format(time.time() - st_t))


    # Save answer
    with open('{}/out_{}.txt'.format(save_dir, test_video), 'w') as f:
        for pose in answer:
            if type(pose) == list:
                f.write(', '.join([str(p) for p in pose]))
            else:
                f.write(str(pose))
            f.write('\n')


#     print("Answer: ", answer)
#     print("GT_pose: ", gt_pose) 
    # Calculate loss
    gt_pose = np.load('{}{}.npy'.format(par.pose_dir, test_video))  # (n_images, 6)
    loss = 0
    for t in range(len(gt_pose)):
        angle_loss = np.sum((answer[t][:3] - gt_pose[t,:3]) ** 2)
        translation_loss = np.sum((answer[t][3:] - gt_pose[t,3:6]) ** 2)
        loss = (100 * angle_loss + translation_loss)
    loss /= len(gt_pose)
    print('Loss = ', loss)
    print('='*50)


  from .autonotebook import tqdm as notebook_tqdm


Load model from:  models/t000102050809_v04060710_im92x304_s10x14_b8_lstm448_optAdamW_base_lr0.002_decay_lr0.97_weight_decay0.0001.model.valid
seq_len = 12,  overlap = 11
Folder 00 finish in 0.022014617919921875 sec
len(answer):  4541
expect len:  4541
Predict use 130.24810194969177 sec
Loss =  26.911962139484615
Folder 01 finish in 0.012978792190551758 sec
len(answer):  1101
expect len:  1101
Predict use 35.53959774971008 sec
Loss =  72.35859975015417
Folder 02 finish in 0.020000219345092773 sec
len(answer):  4661
expect len:  4661
Predict use 132.05956840515137 sec
Loss =  98.56760426524575
Folder 03 finish in 0.012000083923339844 sec
len(answer):  801
expect len:  801
Predict use 27.69102454185486 sec
Loss =  17.550239561286162
Folder 04 finish in 0.013000249862670898 sec
len(answer):  271
expect len:  271
Predict use 13.281208038330078 sec
Loss =  27.421763279888225
Folder 05 finish in 0.01699995994567871 sec
len(answer):  2761
expect len:  2761
Predict use 80.05812191963196 sec
Los