In [36]:
import torch
import os
import pickle as pkl
import trimesh
import numpy as np


In [37]:
result_folder = "/home/wayve/saurabh/pixtrack/ycb_35_round_31/"
mesh_path = "/mnt/remote/data/prajwal/YCB_Video_Dataset/models/003_cracker_box/textured.obj"
poses_file = os.path.join(result_folder, "poses.pkl")
with open(poses_file, "rb") as f:
    poses_file = pkl.load(f)
trackers_file = os.path.join(result_folder, "trackers.pkl")
with open(trackers_file, "rb") as f:
    trackers_file = pkl.load(f)
object_mesh = trimesh.load(mesh_path)
vertices = np.array(object_mesh.vertices)
vertices = np.hstack((vertices, np.ones((vertices.shape[0], 1))))

In [38]:
def get_pose_mat_from_tensor(pose_tensor):
    translation = pose_tensor.t.cpu().numpy()
    rotation = pose_tensor.R.cpu().numpy()
    mesh_pose_in_cam = np.eye(4)
    mesh_pose_in_cam[:3, :3] = rotation
    mesh_pose_in_cam[:3, -1] = translation
    return mesh_pose_in_cam

In [69]:
def similarity_transform(from_points, to_points):
    
    assert len(from_points.shape) == 2, \
        "from_points must be a m x n array"
    assert from_points.shape == to_points.shape, \
        "from_points and to_points must have the same shape"
    
    N, m = from_points.shape
    
    mean_from = from_points.mean(axis = 0)
    mean_to = to_points.mean(axis = 0)
    
    delta_from = from_points - mean_from # N x m
    delta_to = to_points - mean_to       # N x m
    
    sigma_from = (delta_from * delta_from).sum(axis = 1).mean()
    sigma_to = (delta_to * delta_to).sum(axis = 1).mean()
    
    cov_matrix = delta_to.T.dot(delta_from) / N
    
    U, d, V_t = np.linalg.svd(cov_matrix, full_matrices = True)
    cov_rank = np.linalg.matrix_rank(cov_matrix)
    S = np.eye(m)
    
    if cov_rank >= m - 1 and np.linalg.det(cov_matrix) < 0:
        S[m-1, m-1] = -1
    elif cov_rank < m-1:
        raise ValueError("colinearility detected in covariance matrix:\n{}".format(cov_matrix))
    
    R = U.dot(S).dot(V_t)
    c = (d * S.diagonal()).sum() / sigma_from
    t = mean_to - c*R.dot(mean_from)
    
    return R, c, t

In [70]:
from_trs = []
to_trs = []
for image_key in poses_file:
    to_trs.append(poses_file[image_key]["T_refined"].t.cpu().numpy())
    from_trs.append(poses_file[image_key]["gt_pose"].t.cpu().numpy())
R, c, t = similarity_transform(np.array(from_trs), np.array(to_trs))
pose_from_res_to_gt = np.eye(4)
pose_from_res_to_gt[:3, :3] = R
pose_from_res_to_gt[:3, -1] = t

In [71]:
from scipy.spatial.transform import Rotation as R
from pytorch3d.loss import chamfer_distance

In [72]:
distances = []
add_ss = []
pose_dists = []
for image_key in poses_file:
    res_pose_mat = get_pose_mat_from_tensor(poses_file[image_key]["T_refined"])
    gt_pose_mat = get_pose_mat_from_tensor(poses_file[image_key]["gt_pose"])
    
    res_vertices = np.dot(pose_from_res_to_gt, np.dot(res_pose_mat, vertices.T)).T[:, :3] * 100
    gt_vertices = np.dot(gt_pose_mat, vertices.T).T[:, :3] * 100
    
    if (not poses_file[image_key]["success"]):
        quat1 = R.from_matrix(gt_pose_mat[:3, :3]).as_quat()
        quat2 = R.from_matrix(res_pose_mat[:3, :3]).as_quat()
        print(f"skipped {image_key}")
        continue
    #     add_s = chamfer_distance(
    #         torch.tensor(res_vertices.astype(np.float32)).unsqueeze(0).cuda(), 
    #         torch.tensor(gt_vertices.astype(np.float32)).unsqueeze(0).cuda(), point_reduction="mean"
    #     )[0].cpu().item() 
    #     add_ss.append(add_s)
    l2_distances = np.linalg.norm(gt_vertices - res_vertices, axis=1)
    #print(l2_distances)
    pose_dist = np.linalg.norm(gt_pose_mat[:3, -1] - res_pose_mat[:3, -1])
    pose_dists.append(pose_dist)


    l2_dist = np.mean(l2_distances)
    distances.append(l2_dist)
    
print("Average error distance for all frames:", np.mean(distances)) 
print("Max error distance for all frames:", np.max(distances)) 

print("ADDS for all frames:", np.mean(adds))
print(np.max(pose_dists) * 100)

    

skipped 001104-color.png
skipped 001187-color.png
Average error distance for all frames: 2.6973091030167073
Max error distance for all frames: 6.8041589749815135


NameError: name 'adds' is not defined

In [None]:
Average error distance for all frames: 2.9226985174149362
Max error distance for all frames: 8.428148720680085

In [None]:
poses_file[image_key]["gt_pose"].__dict__

In [60]:
pose_from_res_to_gt

array([[ 1.04549179,  0.00412873, -0.0186813 ,  0.01048095],
       [-0.00326594,  1.04455582,  0.04807898, -0.05124176],
       [ 0.01885129, -0.04801259,  1.04439386, -0.02700372],
       [ 0.        ,  0.        ,  0.        ,  1.        ]])

In [None]:
trackers_file