From f7e1a33642a6750324e6e74bf874a98f44c4a2b3 Mon Sep 17 00:00:00 2001 From: duinodu <472365351duino@gmail.com> Date: Thu, 6 Nov 2025 13:17:13 +0800 Subject: [PATCH 1/2] update rerun sample --- bindings/spatialmp4.cpp | 19 +++++++------------ examples/python/visualize_rerun.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/bindings/spatialmp4.cpp b/bindings/spatialmp4.cpp index 14de04f..8bacebb 100644 --- a/bindings/spatialmp4.cpp +++ b/bindings/spatialmp4.cpp @@ -85,6 +85,7 @@ PYBIND11_MODULE(spatialmp4, m) { m.def("get_major_version", &SpatialML::GetMajorVersion, "Get the major version number"); m.def("get_minor_version", &SpatialML::GetMinorVersion, "Get the minor version number"); m.def("get_patch_version", &SpatialML::GetPatchVersion, "Get the patch version number"); + m.attr("HEAD_MODEL_OFFSET") = Eigen::Vector3d(-0.05057, -0.01874, 0.04309); // Bind StreamType enum py::enum_(m, "StreamType") @@ -108,9 +109,7 @@ PYBIND11_MODULE(spatialmp4, m) { .def_readwrite("qx", &SpatialML::pose_frame::qx) .def_readwrite("qy", &SpatialML::pose_frame::qy) .def_readwrite("qz", &SpatialML::pose_frame::qz) - .def("as_se3", [](const SpatialML::pose_frame &pose) { - return pose.as_se3().matrix(); - }) + .def("as_se3", [](const SpatialML::pose_frame &pose) { return pose.as_se3().matrix(); }) .def("__repr__", [](const SpatialML::pose_frame &p) { std::ostringstream ss; ss << p; @@ -161,9 +160,8 @@ PYBIND11_MODULE(spatialmp4, m) { .def_readwrite("fy", &SpatialML::camera_intrinsics::fy) .def_readwrite("cx", &SpatialML::camera_intrinsics::cx) .def_readwrite("cy", &SpatialML::camera_intrinsics::cy) - .def("as_cvmat", [](const SpatialML::camera_intrinsics &intrinsics) { - return MatxToEigen(intrinsics.as_cvmat()); - }) + .def("as_cvmat", + [](const SpatialML::camera_intrinsics &intrinsics) { return MatxToEigen(intrinsics.as_cvmat()); }) .def("__repr__", [](const SpatialML::camera_intrinsics &i) { std::ostringstream ss; ss << i; @@ -179,12 +177,9 @@ PYBIND11_MODULE(spatialmp4, m) { [](SpatialML::camera_extrinsics &extrinsics, const Eigen::Matrix4d &matrix) { extrinsics.extrinsics = EigenToMatx(matrix); }) - .def("as_cvmat", [](const SpatialML::camera_extrinsics &extrinsics) { - return MatxToEigen(extrinsics.as_cvmat()); - }) - .def("as_se3", [](const SpatialML::camera_extrinsics &extrinsics) { - return extrinsics.as_se3().matrix(); - }) + .def("as_cvmat", + [](const SpatialML::camera_extrinsics &extrinsics) { return MatxToEigen(extrinsics.as_cvmat()); }) + .def("as_se3", [](const SpatialML::camera_extrinsics &extrinsics) { return extrinsics.as_se3().matrix(); }) .def("__repr__", [](const SpatialML::camera_extrinsics &e) { std::ostringstream ss; ss << e; diff --git a/examples/python/visualize_rerun.py b/examples/python/visualize_rerun.py index fbae9a0..6a1fdfe 100644 --- a/examples/python/visualize_rerun.py +++ b/examples/python/visualize_rerun.py @@ -22,12 +22,15 @@ def pico_pose_to_open3d(extrinsic): def main( video_file: str, depth_only: bool = False, + rgb_only: bool = False, ): """Visualize spatialmp4 using rerun.""" reader = sm.Reader(video_file) if depth_only: reader.set_read_mode(sm.ReadMode.DEPTH_ONLY) + elif rgb_only: + reader.set_read_mode(sm.ReadMode.RGB_ONLY) else: reader.set_read_mode(sm.ReadMode.DEPTH_FIRST) @@ -91,6 +94,16 @@ def main( extrinsic = np.eye(4) extrinsic[:3, 3] = [TWH.x, TWH.y, TWH.z] extrinsic[:3, :3] = Rotation.from_quat((TWH.qx, TWH.qy, TWH.qz, TWH.qw)).as_matrix() + elif rgb_only: + frame_rgb = reader.load_rgb() + timestamp = frame_rgb.timestamp + + __import__('ipdb').set_trace() + T_W_Hrgb = frame_rgb.pose.as_se3() + T_W_Irgb = sm.head_to_imu(T_W_Hrgb, sm.HEAD_MODEL_OFFSET) + T_W_Srgb = T_W_Irgb * T_I_Srgb + pass + else: rgbd = reader.load_rgbd(True) timestamp = rgbd.timestamp From 9abf1a5a293ae4528538e4d3badacc100ab4f657 Mon Sep 17 00:00:00 2001 From: "bingwen.ai" Date: Thu, 6 Nov 2025 21:06:49 +0800 Subject: [PATCH 2/2] Fix sm.head_to_imu binding bug. --- bindings/spatialmp4.cpp | 46 ++++++++++++--- examples/python/visualize_rerun.py | 90 ++++++++++++++++++++++++------ python/tests/test_utilities.py | 17 ++++++ 3 files changed, 128 insertions(+), 25 deletions(-) diff --git a/bindings/spatialmp4.cpp b/bindings/spatialmp4.cpp index 8bacebb..b846223 100644 --- a/bindings/spatialmp4.cpp +++ b/bindings/spatialmp4.cpp @@ -307,7 +307,8 @@ PYBIND11_MODULE(spatialmp4, m) { m.def( "head_to_imu", - [](py::array_t head_pose_array, py::array_t head_model_offset_array) { + [](py::array_t head_pose_array, + py::array_t head_model_offset_array) { py::buffer_info pose_info = head_pose_array.request(); if (pose_info.ndim != 2 || pose_info.shape[0] != 4 || pose_info.shape[1] != 4) { throw std::invalid_argument("head_pose must be a 4x4 matrix"); @@ -317,12 +318,36 @@ PYBIND11_MODULE(spatialmp4, m) { throw std::invalid_argument("head_model_offset must be a 3-element vector"); } - Eigen::Map> head_pose_map( - static_cast(pose_info.ptr)); - Eigen::Map head_model_offset_map(static_cast(offset_info.ptr)); + Eigen::Matrix4d head_pose_matrix; + const auto row_stride = pose_info.strides[0] / static_cast(sizeof(double)); + const auto col_stride = pose_info.strides[1] / static_cast(sizeof(double)); + const double *pose_ptr = static_cast(pose_info.ptr); + if (pose_info.strides[1] == static_cast(sizeof(double))) { + Eigen::Map> head_pose_map(pose_ptr); + head_pose_matrix = head_pose_map; + } else if (pose_info.strides[0] == static_cast(sizeof(double))) { + Eigen::Map> head_pose_map(pose_ptr); + head_pose_matrix = head_pose_map; + } else { + head_pose_matrix.setZero(); + for (py::ssize_t i = 0; i < 4; ++i) { + for (py::ssize_t j = 0; j < 4; ++j) { + head_pose_matrix(i, j) = pose_ptr[i * row_stride + j * col_stride]; + } + } + } - Eigen::Matrix4d head_pose_matrix = head_pose_map; - Eigen::Vector3d head_model_offset = head_model_offset_map; + Eigen::Vector3d head_model_offset; + const double *offset_ptr = static_cast(offset_info.ptr); + if (offset_info.strides[0] == static_cast(sizeof(double))) { + Eigen::Map head_model_offset_map(offset_ptr); + head_model_offset = head_model_offset_map; + } else { + const auto offset_stride = offset_info.strides[0] / static_cast(sizeof(double)); + for (py::ssize_t i = 0; i < 3; ++i) { + head_model_offset[i] = offset_ptr[i * offset_stride]; + } + } Eigen::Matrix3d R = head_pose_matrix.block<3, 3>(0, 0); Eigen::Vector3d t = head_pose_matrix.block<3, 1>(0, 3); @@ -333,9 +358,14 @@ PYBIND11_MODULE(spatialmp4, m) { Sophus::SE3d imu_pose; Utilities::HeadToImu(head_pose, head_model_offset, imu_pose); - Eigen::Matrix result = imu_pose.matrix(); + const Eigen::Matrix4d result = imu_pose.matrix(); py::array_t output({4, 4}); - std::memcpy(output.mutable_data(), result.data(), sizeof(double) * 16); + auto mutable_result = output.mutable_unchecked<2>(); + for (py::ssize_t i = 0; i < 4; ++i) { + for (py::ssize_t j = 0; j < 4; ++j) { + mutable_result(i, j) = result(i, j); + } + } return output; }, py::arg("head_pose"), py::arg("head_model_offset")); diff --git a/examples/python/visualize_rerun.py b/examples/python/visualize_rerun.py index 6a1fdfe..323d0e8 100644 --- a/examples/python/visualize_rerun.py +++ b/examples/python/visualize_rerun.py @@ -2,6 +2,7 @@ import os import cv2 import numpy as np +from typing import Optional from scipy.spatial.transform import Rotation import spatialmp4 as sm import rerun as rr @@ -9,6 +10,7 @@ def pico_pose_to_open3d(extrinsic): + extrinsic = np.array(extrinsic, dtype=np.float64, copy=True) convert = np.array([ [0, 0, 1, 0], [1, 0, 0, 0], @@ -19,12 +21,51 @@ def pico_pose_to_open3d(extrinsic): return output +def ensure_right_handed_rotation(rotation_matrix: np.ndarray) -> np.ndarray: + r = np.array(rotation_matrix, dtype=np.float64, copy=True) + u, _, vh = np.linalg.svd(r) + corrected = u @ vh + if np.linalg.det(corrected) < 0: + u[:, -1] *= -1 + corrected = u @ vh + return corrected + + +def head_to_imu(head_pose: np.ndarray, head_model_offset: np.ndarray) -> np.ndarray: + """Convert head pose (T_W_H) into IMU pose (T_W_I) matching Utilities::HeadToImu.""" + head_pose = np.asarray(head_pose, dtype=np.float64) + if head_pose.shape != (4, 4): + raise ValueError("head_pose must be a 4x4 matrix") + + head_model_offset = np.asarray(head_model_offset, dtype=np.float64) + if head_model_offset.shape != (3,): + raise ValueError("head_model_offset must be a 3-element vector") + + rotation_matrix = head_pose[:3, :3] + translation = head_pose[:3, 3].copy() + + rotation = Rotation.from_matrix(rotation_matrix) + translation -= rotation.apply(head_model_offset) + + quat_x, quat_y, quat_z, quat_w = rotation.as_quat() # scipy returns xyzw + imu_quat = np.array([quat_y, -quat_x, quat_z, quat_w], dtype=np.float64) # xyzw + imu_rotation = Rotation.from_quat(imu_quat).as_matrix() + imu_translation = np.array([translation[1], -translation[0], translation[2]], dtype=np.float64) + + imu_pose = np.eye(4, dtype=np.float64) + imu_pose[:3, :3] = imu_rotation + imu_pose[:3, 3] = imu_translation + return imu_pose + + def main( video_file: str, depth_only: bool = False, rgb_only: bool = False, + topk: Optional[int] = typer.Option(None, help="Limit visualization to the first K frames."), ): """Visualize spatialmp4 using rerun.""" + reader = sm.Reader(video_file) if depth_only: @@ -34,14 +75,14 @@ def main( else: reader.set_read_mode(sm.ReadMode.DEPTH_FIRST) - if not reader.has_depth(): + if not rgb_only and not reader.has_depth(): typer.echo(typer.style(f"No depth found in input file", fg=typer.colors.RED)) return if not reader.has_pose(): - typer.echo(typer.style(f"No depth found in input file", fg=typer.colors.RED)) + typer.echo(typer.style(f"No pose found in input file", fg=typer.colors.RED)) return - blueprint=rrb.Horizontal( + blueprint = rrb.Horizontal( rrb.Vertical( rrb.Spatial3DView(name="3D", origin="world"), rrb.TextDocumentView(name="Description", origin="/description"), @@ -83,6 +124,8 @@ def main( cx = float(reader.get_rgb_intrinsics_left().cx) cy = float(reader.get_rgb_intrinsics_left().cy) + processed = 0 + while reader.has_next(): if depth_only: depth_frame = reader.load_depth() @@ -98,28 +141,31 @@ def main( frame_rgb = reader.load_rgb() timestamp = frame_rgb.timestamp - __import__('ipdb').set_trace() + T_I_Srgb = reader.get_rgb_extrinsics_left().as_se3() T_W_Hrgb = frame_rgb.pose.as_se3() T_W_Irgb = sm.head_to_imu(T_W_Hrgb, sm.HEAD_MODEL_OFFSET) - T_W_Srgb = T_W_Irgb * T_I_Srgb - pass + T_W_Srgb = T_W_Irgb @ T_I_Srgb + extrinsic = T_W_Srgb else: rgbd = reader.load_rgbd(True) timestamp = rgbd.timestamp depth_np = rgbd.depth extrinsic = rgbd.T_W_S - print(f"Loading frame {reader.get_index() + 1} / {reader.get_frame_count()}, timestamp: {timestamp}") - # preprocess on depthmap - depth_uint16 = (depth_np * 1000).astype(np.uint16) - sobelx = cv2.Sobel(depth_uint16, cv2.CV_32F, 1, 0, ksize=3) - sobely = cv2.Sobel(depth_uint16, cv2.CV_32F, 0, 1, ksize=3) - grad_mag = np.sqrt(sobelx**2 + sobely**2) - depth_np[grad_mag > 500] = 0 - depth_np[(depth_np < 0.2) | (depth_np > 5)] = 0 + print(f"Loading frame {reader.get_index() + 1} / {reader.get_frame_count()}, timestamp: {timestamp}") + + if not rgb_only: + # preprocess on depthmap + depth_uint16 = (depth_np * 1000).astype(np.uint16) + sobelx = cv2.Sobel(depth_uint16, cv2.CV_32F, 1, 0, ksize=3) + sobely = cv2.Sobel(depth_uint16, cv2.CV_32F, 0, 1, ksize=3) + grad_mag = np.sqrt(sobelx**2 + sobely**2) + depth_np[grad_mag > 500] = 0 + depth_np[(depth_np < 0.2) | (depth_np > 5)] = 0 extrinsic = pico_pose_to_open3d(extrinsic) + extrinsic[:3, :3] = ensure_right_handed_rotation(extrinsic[:3, :3]) rr.set_time_seconds("time", timestamp) rr.log("world/xyz", rr.Arrows3D(vectors=[[1, 0, 0], [0, 1, 0], [0, 0, 1]], colors=[[255, 0, 0], [0, 255, 0], [0, 0, 255]])) @@ -132,9 +178,19 @@ def main( position = extrinsic[:3, 3] rotation = Rotation.from_matrix(extrinsic[:3, :3]).as_quat() rr.log("world/camera", rr.Transform3D(translation=position, rotation=rr.Quaternion(xyzw=rotation))) - rr.log("world/camera/image/depth", rr.DepthImage(depth_np, meter=1.0)) - if not depth_only: - rr.log("world/camera/image/rgb", rr.Image(rgbd.rgb, color_model="BGR").compress(jpeg_quality=95)) + + if rgb_only: + rr.log("world/camera/image/rgb", rr.Image(frame_rgb.left_rgb, color_model="BGR").compress(jpeg_quality=95)) + else: + rr.log("world/camera/image/depth", rr.DepthImage(depth_np, meter=1.0)) + if not depth_only: + rr.log("world/camera/image/rgb", rr.Image(rgbd.rgb, color_model="BGR").compress(jpeg_quality=95)) + + processed += 1 + if topk is not None and processed >= topk: + break + if topk and reader.get_index() > topk: + break if __name__ == "__main__":# diff --git a/python/tests/test_utilities.py b/python/tests/test_utilities.py index 9706e50..a965fa4 100644 --- a/python/tests/test_utilities.py +++ b/python/tests/test_utilities.py @@ -85,3 +85,20 @@ def test_head_to_imu_general_pose(): imu_pose = spatialmp4.head_to_imu(head_pose, offset) expected = compute_expected_head_to_imu(head_pose, offset) assert np.allclose(imu_pose, expected, atol=1e-10) + + +def test_head_to_imu_fortran_order_input(): + angle = np.deg2rad(30.0) + Rx = np.array([ + [1.0, 0.0, 0.0], + [0.0, np.cos(angle), -np.sin(angle)], + [0.0, np.sin(angle), np.cos(angle)], + ]) + head_pose = np.eye(4, order="F") + head_pose[:3, :3] = Rx + head_pose[:3, 3] = np.array([0.3, -0.4, 0.5]) + offset = np.array([0.12, -0.08, 0.04]) + + expected = compute_expected_head_to_imu(np.array(head_pose), offset) + imu_pose = spatialmp4.head_to_imu(head_pose, offset) + assert np.allclose(imu_pose, expected, atol=1e-12)