In [1]:
import os
import torch
import numpy as np
from glob import glob

In [2]:
DATA_DIR = '/home/qiyan/Documents/TransPose/datasets/EPFL/test_drone_sim'

depth_ls = sorted(glob(os.path.join(DATA_DIR, 'depth/*')))
coord_ls = sorted(glob(os.path.join(DATA_DIR, 'init/*')))
poses_ls = sorted(glob(os.path.join(DATA_DIR, 'poses/*')))
calibration_ls = sorted(glob(os.path.join(DATA_DIR, 'calibration/*')))

In [3]:
for i, (depth, coord, pose, calibration) in enumerate(zip(depth_ls, coord_ls, poses_ls, calibration_ls)):
    depth = torch.load(depth).numpy()  # [H, W] z-buffer depth
    coord = torch.load(coord).numpy()  # [3, H, W]
    mask_nodata = coord.reshape(3, -1)[0] == -1
    pose = np.loadtxt(pose)  # [4, 4] cam-to-world matrix
    focal_length = float(np.loadtxt(calibration))
    
    # get gt camera coordinates
    ones = np.ones([1, *coord.shape[1:]])
    cam_coord = np.matmul(np.linalg.inv(pose), np.concatenate([coord, ones], axis=0).reshape(4, -1)).reshape(4, *coord.shape[1:])[:3]  # [3, H, W]
    
    # reconstruct camera coordinates from z-buffer depth
    # u / x_c = f / z_c  -> x_c = z_c * u / f, y_c = z_c * v / f
    pixel_grid = np.zeros([2, *coord.shape[1:]])  # [2, H, W] x_c & y_c
    for row in range(pixel_grid.shape[1]):
        for col in range(pixel_grid.shape[2]):
            u = col * 8
            v = row * 8
            pixel_grid[:, row, col] = np.array([u, v])
            
    pixel_grid -= np.array([360, 240]).reshape(2, 1, 1)  # relative to center pixel
    
    z_c = depth.copy()  # [H, W]
    x_c = np.multiply(z_c, pixel_grid[0]) / focal_length  # element-wise multiplication
    y_c = np.multiply(z_c, pixel_grid[1]) / focal_length  # element-wise multiplication
    
    cam_coord_recovery = np.stack([x_c, y_c, z_c])  # [3, H, W]
    
    # compare the recovery accuracy
    cam_coord_select = cam_coord.reshape(3, -1)[:, np.logical_not(mask_nodata)]  # [3, X]
    cam_coord_recovery_select = cam_coord_recovery.reshape(3, -1)[:, np.logical_not(mask_nodata)]  # [3, X]
    recovery_error = np.linalg.norm(cam_coord_select - cam_coord_recovery_select, 2, axis=0)  # [X]
    
    print("Iter: {:d}, recovering camera coords from z-buffer depth for {:d} pixels, mean error: {:.1f} cm, median error: {:.1f} cm".format(
        i, len(recovery_error), np.mean(recovery_error) * 100, np.median(recovery_error) * 100))
    # print("GT camera coord statistics: mean X: {:.1f} m, mean Y: {:.1f} m, mean Z: {:.1f} m".format(*np.mean(cam_coord_select, axis=1)))

    if i == 99:
        break

Iter: 0, recovering camera coords from z-buffer depth for 2181 pixels, mean error: 73.5 cm, median error: 59.8 cm
Iter: 1, recovering camera coords from z-buffer depth for 2959 pixels, mean error: 55.4 cm, median error: 49.0 cm
Iter: 2, recovering camera coords from z-buffer depth for 2394 pixels, mean error: 68.6 cm, median error: 59.8 cm
Iter: 3, recovering camera coords from z-buffer depth for 3234 pixels, mean error: 59.5 cm, median error: 51.3 cm
Iter: 4, recovering camera coords from z-buffer depth for 2598 pixels, mean error: 47.7 cm, median error: 40.1 cm
Iter: 5, recovering camera coords from z-buffer depth for 4168 pixels, mean error: 34.9 cm, median error: 29.7 cm
Iter: 6, recovering camera coords from z-buffer depth for 4235 pixels, mean error: 41.1 cm, median error: 31.1 cm
Iter: 7, recovering camera coords from z-buffer depth for 3391 pixels, mean error: 18.5 cm, median error: 16.6 cm
Iter: 8, recovering camera coords from z-buffer depth for 3774 pixels, mean error: 18.2 

In [4]:
cam_coord_select

array([[-56.88334899, -55.79551096, -54.36474465, ...,  48.48226222,
         52.07186711,  53.49755777],
       [-37.81294548, -37.82866306, -37.81678929, ...,  35.40962197,
         35.36246447,  35.40507501],
       [ 76.12008246,  75.78212843,  75.80699518, ...,  72.94179477,
         72.99781181,  73.0251385 ]])

In [5]:
cam_coord_recovery_select

array([[-57.090065  , -55.57357686, -54.3283549 , ...,  48.62786865,
         52.3151029 ,  53.55177409],
       [-38.06004333, -37.89107513, -37.90350342, ...,  35.25520477,
         35.2822787 ,  35.29548747],
       [ 76.12008667,  75.78215027,  75.80700684, ...,  72.94180298,
         72.99781799,  73.02514648]])