## 0. Install and Import Modules

Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:

In [58]:
# import os
# import sys
# import torch
# need_pytorch3d=False
# try:
#     import pytorch3d
# except ModuleNotFoundError:
#     need_pytorch3d=True
# if need_pytorch3d:
#     if torch.__version__.startswith("1.13.") and sys.platform.startswith("linux"):
#         # We try to install PyTorch3D via a released wheel.
#         pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
#         version_str="".join([
#             f"py3{sys.version_info.minor}_cu",
#             torch.version.cuda.replace(".",""),
#             f"_pyt{pyt_version_str}"
#         ])
#         !pip install fvcore iopath
#         !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html
#     else:
#         # We try to install PyTorch3D from source.
#         !curl -LO https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz
#         !tar xzf 1.10.0.tar.gz
#         os.environ["CUB_HOME"] = os.getcwd() + "/cub-1.10.0"
#         !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'

In [59]:
# imports
import numpy as np
import torch
from pytorch3d.transforms.so3 import (
    so3_exp_map,
    so3_relative_angle,
)
from pytorch3d.transforms.se3 import (
    se3_exp_map,
    se3_log_map,
)

import pytorch3d.transforms as transforms
    
# add path for demo utils
import sys
import os
sys.path.append(os.path.abspath(''))

# set for reproducibility
torch.manual_seed(42)
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
    print("WARNING: CPU only, this will be slow!")

In [60]:
# !pip install git+https://github.com/princeton-vl/lietorch.git

## **Data preparation**

In [61]:
# # Load the matrices 
# T_o_cb_batch = np.load('')
# batch_size = T_o_cb_batch.shape[0]
# T_o_p_batch = np.load('')
# T_c_p_batch = np.load('')
# X_matrix = np.load('')
# X_batch = np.tile(X_matrix, [batch_size,1,1])

# # Load into torch tensor
# X_batch = torch.tensor(X_batch)
# T_o_p_batch = torch.tensor(T_o_p_batch)
# T_o_cb_batch = torch.tensor(T_o_cb_batch)
# T_c_p_batch = torch.tensor(T_c_p_batch)

In [62]:
## test
from scipy.spatial.transform import Rotation as R

batch_size = 800
T_o_cb_batch = np.tile(np.eye(4), [batch_size,1,1])
T_o_p_batch = np.tile(np.eye(4), [batch_size,1,1])
T_c_p_batch = np.tile(np.eye(4), [batch_size,1,1])
X_matrix = np.hstack((np.vstack((R.random().as_matrix(), np.zeros([1,3]))), np.ones([4,1])))
X_batch = np.tile(X_matrix, [batch_size,1,1])
X_batch[:, :3, 3] = 2
print(X_batch[1, :,:])
X_batch = torch.tensor(X_batch)
T_o_p_batch = torch.tensor(T_o_p_batch)
T_o_cb_batch = torch.tensor(T_o_cb_batch)
T_c_p_batch = torch.tensor(T_c_p_batch)

[[-0.48132905  0.86368179 -0.14958645  2.        ]
 [ 0.81857838  0.38188053 -0.42906491  2.        ]
 [-0.31345139 -0.32896964 -0.89080144  2.        ]
 [ 0.          0.          0.          1.        ]]


In [63]:
# # Create Transform3d objects from the batch of transformation matrices
# T_o_cb = transforms.Transform3d(matrix=T_o_cb_batch, device=device)
# X = transforms.Transform3d(matrix=X_batch, device=device)
# T_o_p = transforms.Transform3d(matrix=T_o_p_batch, device=device)
# T_c_p = transforms.Transform3d(matrix=T_c_p_batch, device=device)

# # cam_gt: the ground truth camera pose w.r.t the optical tracker cam_gt = T_o_p * (T_c_p)^-1
# cam_gt = T_o_cb.compose(X)

# # cam_real: the camera pose from initial hand-eye calibration cam_real = T_o_cb * X
# cam_real = T_o_p.compose(T_c_p.inverse())

## **Define optimization loss function**



In [64]:
def camera_distance(hand_eye_real, hand_eye_gt):
  '''
  Calculate the l2 distance in se(3) manifold for the camera.
  distance = sum||log(hand_eye_real*hand_eye_gt)||_2
  '''

  # Convert a batch of 4x4 transformation matrices transform to 
  # a batch of 6-dimensional SE(3) logarithms of the SE(3) matrices
  # print(hand_eye_real.compose(hand_eye_gt.inverse()).get_matrix())
  tmp_matrix = hand_eye_real.compose(hand_eye_gt.inverse()).get_matrix()
  vec = se3_log_map(tmp_matrix.permute(0,2,1)).sum(0)


  return torch.norm(vec)
  

# camera_distance(cam_real, cam_gt)

## **Optimization**

In [72]:
# Create Transform3d objects from the batch of transformation matrices
T_o_cb = transforms.Transform3d(matrix=T_o_cb_batch, device=device)
X = transforms.Transform3d(matrix=X_batch, device=device)
T_o_p = transforms.Transform3d(matrix=T_o_p_batch, device=device)
T_c_p = transforms.Transform3d(matrix=T_c_p_batch, device=device)

# hand_eye_real: the hand-eye transformation from initial calibration X
hand_eye_real_6D = se3_log_map(X.get_matrix().permute(0, 2, 1))
hand_eye_real_6D.requires_grad = True


# hand_eye_gt: the hand-eye transformation from the chain 
# hand_eye_gt = (T_o_cb)^-1 * T_o_p * (T_c_p)^-1
hand_eye_gt = T_o_cb.inverse().compose(T_o_p).compose(T_c_p.inverse())

# init the optimizer
optimizer = torch.optim.SGD([hand_eye_real_6D], lr=.0003, momentum=0.9)

# run the optimization
n_iter = 4000  # fix the number of iterations
for it in range(n_iter):
    # re-init the optimizer gradients
    optimizer.zero_grad()

    # transform the 6d vector into 4x4 matrix
    hand_eye_real = se3_exp_map(hand_eye_real_6D).permute(0, 2, 1)
    hand_eye_real = transforms.Transform3d(matrix=hand_eye_real, device=device)

    # compare the composed cameras with the ground truth relative cameras
    # camera_distance corresponds to $d$ from the description
    loss = \
        camera_distance(hand_eye_real, hand_eye_gt)
    # loss.requires_grad_(True)

    # our loss function is the camera_distance
    loss.backward()
    
    # apply the gradients
    optimizer.step()

    # # plot and print status message
    if it % 200==0 or it==n_iter-1:
        status = 'iteration=%3d; camera_distance=%1.3e' % (it, loss)
        print(status)
print('Optimization finished.')

iteration=  0; camera_distance=4.425e+03
iteration=200; camera_distance=3.967e+03
iteration=400; camera_distance=3.487e+03
iteration=600; camera_distance=3.007e+03
iteration=800; camera_distance=2.527e+03
iteration=1000; camera_distance=2.047e+03
iteration=1200; camera_distance=1.567e+03
iteration=1400; camera_distance=1.087e+03
iteration=1600; camera_distance=6.070e+02
iteration=1800; camera_distance=1.270e+02
iteration=2000; camera_distance=2.958e-01
iteration=2200; camera_distance=3.692e-01
iteration=2400; camera_distance=5.379e-01
iteration=2600; camera_distance=1.320e-01
iteration=2800; camera_distance=1.322e-01
iteration=3000; camera_distance=1.322e-01
iteration=3200; camera_distance=1.322e-01
iteration=3400; camera_distance=1.322e-01
iteration=3600; camera_distance=1.322e-01
iteration=3800; camera_distance=1.322e-01
iteration=3999; camera_distance=1.455e-01
Optimization finished.
