diff --git a/docs/source/_static/atomic_actions/press.gif b/docs/source/_static/atomic_actions/press.gif
new file mode 100644
index 000000000..1eafdc319
Binary files /dev/null and b/docs/source/_static/atomic_actions/press.gif differ
diff --git a/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst b/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst
index c2982d04e..90077698f 100644
--- a/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst
+++ b/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst
@@ -33,6 +33,8 @@ embodichain.lab.sim.atomic_actions
MoveHeldObject
PlaceCfg
Place
+ PressCfg
+ Press
AtomicActionEngine
.. currentmodule:: embodichain.lab.sim.atomic_actions
@@ -153,6 +155,15 @@ Actions
:members:
:show-inheritance:
+.. autoclass:: PressCfg
+ :members:
+ :exclude-members: __init__, copy, replace, to_dict
+ :show-inheritance:
+
+.. autoclass:: Press
+ :members:
+ :show-inheritance:
+
Engine & Registry
-----------------
diff --git a/docs/source/overview/sim/atomic_actions/builtin_actions.md b/docs/source/overview/sim/atomic_actions/builtin_actions.md
index 4507d28b1..818cd172b 100644
--- a/docs/source/overview/sim/atomic_actions/builtin_actions.md
+++ b/docs/source/overview/sim/atomic_actions/builtin_actions.md
@@ -14,6 +14,7 @@ The following actions are available out of the box:
| `PickUp` | Single | `GraspTarget` — object semantics | Approach → close gripper → lift |
|
| `MoveHeldObject` | Single | `HeldObjectPoseTarget` — held-object pose | Move held object while keeping gripper closed |
|
| `Place` | Single | `EndEffectorPoseTarget` — EEF release pose | Lower → open gripper → retract |
|
+| `Press` | Single | `EndEffectorPoseTarget` — EEF press pose | Close gripper → press down → return |
|
---
@@ -123,3 +124,26 @@ down to the target pose. On success, the returned `WorldState` clears `held_obje
`(4, 4)` or `(n_envs, 4, 4)`.

+
+---
+
+## `Press`
+
+Three-phase contact motion: *close gripper → press down → return*. This is useful
+for button-like or contact-based interactions where the end-effector should reach a
+target pose and then return to the pre-press arm pose.
+
+`Press` does not create or clear `WorldState.held_object`; it preserves the state
+threaded into it.
+
+| Config field | Default | Description |
+|---|---|---|
+| `hand_close_qpos` | `None` | **Required.** Gripper closed joint positions |
+| `hand_control_part` | `"hand"` | Robot control part for the gripper |
+| `hand_interp_steps` | `5` | Waypoints for the gripper close phase |
+| `sample_interval` | `80` | Total waypoints across all three phases |
+
+**Target:** `EndEffectorPoseTarget(xpos=...)` — the EEF pose to press, a `torch.Tensor`
+of shape `(4, 4)` or `(n_envs, 4, 4)`.
+
+
diff --git a/docs/source/overview/sim/atomic_actions/index.md b/docs/source/overview/sim/atomic_actions/index.md
index cee581798..c5c2e0ba0 100644
--- a/docs/source/overview/sim/atomic_actions/index.md
+++ b/docs/source/overview/sim/atomic_actions/index.md
@@ -57,7 +57,7 @@ and every action declares the target type, or tuple of target types, it accepts
| Target | Constructor | Used by |
|---|---|---|
-| `EndEffectorPoseTarget` | `EndEffectorPoseTarget(xpos)` | `MoveEndEffector`, `Place` |
+| `EndEffectorPoseTarget` | `EndEffectorPoseTarget(xpos)` | `MoveEndEffector`, `Place`, `Press` |
| `JointPositionTarget` | `JointPositionTarget(qpos)` | `MoveJoints` |
| `NamedJointPositionTarget` | `NamedJointPositionTarget(name)` | `MoveJoints` |
| `GraspTarget` | `GraspTarget(semantics)` | `PickUp` |
@@ -94,7 +94,7 @@ action's `TargetType` before calling `execute`:
| Target | Holds | Accepted by |
|---|---|---|
-| `EndEffectorPoseTarget(xpos)` | EEF pose tensor `(4,4)` or `(n_envs,4,4)` | `MoveEndEffector`, `Place` |
+| `EndEffectorPoseTarget(xpos)` | EEF pose tensor `(4,4)` or `(n_envs,4,4)` | `MoveEndEffector`, `Place`, `Press` |
| `JointPositionTarget(qpos)` | Control-part qpos tensor `(control_dof,)` or `(n_envs, control_dof)` | `MoveJoints` |
| `NamedJointPositionTarget(name)` | Name resolved from `MoveJointsCfg.named_joint_positions` | `MoveJoints` |
| `GraspTarget(semantics)` | `ObjectSemantics` (affordance + entity) | `PickUp` |
@@ -110,6 +110,7 @@ action's `TargetType` before calling `execute`:
| `Place` | Clears it to `None` |
| `MoveEndEffector` | Leaves it unchanged |
| `MoveJoints` | Leaves it unchanged |
+| `Press` | Leaves it unchanged |
If a step fails, `run()` returns `success=False` with the partial trajectory concatenated up
to (but not including) the failed step, and the `WorldState` going into that step.
@@ -136,6 +137,8 @@ from embodichain.lab.sim.atomic_actions import (
MoveHeldObjectCfg,
Place,
PlaceCfg,
+ Press,
+ PressCfg,
MoveEndEffector,
MoveEndEffectorCfg,
)
@@ -159,6 +162,11 @@ move_held_object_cfg = MoveHeldObjectCfg(
hand_close_qpos=torch.tensor([0.025, 0.025]),
)
move_cfg = MoveEndEffectorCfg(control_part="arm")
+press_cfg = PressCfg(
+ control_part="arm",
+ hand_control_part="hand",
+ hand_close_qpos=torch.tensor([0.025, 0.025]),
+)
move_joints_cfg = MoveJointsCfg(
control_part="arm",
named_joint_positions={"home": torch.zeros(6)},
@@ -170,6 +178,7 @@ engine.register(PickUp(motion_gen, cfg=pickup_cfg))
engine.register(MoveHeldObject(motion_gen, cfg=move_held_object_cfg))
engine.register(Place(motion_gen, cfg=place_cfg))
engine.register(MoveEndEffector(motion_gen, cfg=move_cfg))
+engine.register(Press(motion_gen, cfg=press_cfg))
engine.register(MoveJoints(motion_gen, cfg=move_joints_cfg))
# 3. Describe the object to pick
@@ -297,6 +306,7 @@ builtin_actions
- `scripts/tutorials/atomic_action/pickup.py`
- `scripts/tutorials/atomic_action/move_held_object.py`
- `scripts/tutorials/atomic_action/place.py`
+ - `scripts/tutorials/atomic_action/press.py`
Run a demo in headless CPU mode with `--auto_play --headless --device cpu` to record
an MP4 under `outputs/videos`. For example:
diff --git a/docs/source/tutorial/atomic_actions.rst b/docs/source/tutorial/atomic_actions.rst
index b65f56792..95df93bb5 100644
--- a/docs/source/tutorial/atomic_actions.rst
+++ b/docs/source/tutorial/atomic_actions.rst
@@ -5,7 +5,7 @@ Atomic Actions
EmbodiChain's **atomic action** layer provides a high-level, composable interface for common
manipulation primitives such as *move end-effector*, *move joints*, *pick up*,
-*move held object*, and *place*. Each action
+*move held object*, *place*, and *press*. Each action
encapsulates the full planning pipeline — grasp-pose estimation, IK, trajectory generation, and
gripper interpolation — behind a single ``execute(target, state)`` call, making it straightforward
to chain multiple actions together into complex robot behaviours.
@@ -18,7 +18,7 @@ Key Features
``GraspTarget`` (wrapping an ``ObjectSemantics``), or ``HeldObjectPoseTarget``. The
engine checks each step's target against the action's declared ``TargetType`` before running.
- **Built-in primitives** — ``MoveEndEffector``, ``MoveJoints``, ``PickUp``, ``MoveHeldObject``,
- and ``Place``
+ ``Place``, and ``Press``
cover the most common tabletop manipulation workflows out of the box.
See :doc:`/overview/sim/atomic_actions/index` for configs and target types.
- **Extensible registry** — custom action *classes* can be registered globally with
@@ -34,7 +34,7 @@ For the full design overview, architecture diagram, and extension guide see
The Code
--------
-Focused demo scripts are available for the five built-in primitives in the
+Focused demo scripts are available for the built-in primitives in the
``scripts/tutorials/atomic_action`` directory:
- ``move_end_effector.py``
@@ -42,6 +42,7 @@ Focused demo scripts are available for the five built-in primitives in the
- ``pickup.py``
- ``move_held_object.py``
- ``place.py``
+- ``press.py``
Each script supports interactive inspection by default. Add ``--auto_play`` to skip
keyboard prompts, and combine it with ``--headless --device cpu`` to record an MP4 under
diff --git a/embodichain/lab/sim/atomic_actions/__init__.py b/embodichain/lab/sim/atomic_actions/__init__.py
index bd96d7bea..c2e7a2cc4 100644
--- a/embodichain/lab/sim/atomic_actions/__init__.py
+++ b/embodichain/lab/sim/atomic_actions/__init__.py
@@ -18,7 +18,7 @@
This module provides a unified interface for the atomic motion primitives
(``move_end_effector``, ``move_joints``, ``pick_up``, ``move_held_object``,
-``place``), with typed targets, a ``WorldState`` threaded across sequenced
+``place``, ``press``), with typed targets, a ``WorldState`` threaded across sequenced
actions, and extensible custom action registration.
"""
@@ -47,11 +47,13 @@
MoveHeldObject,
PickUp,
Place,
+ Press,
MoveEndEffectorCfg,
MoveJointsCfg,
MoveHeldObjectCfg,
PickUpCfg,
PlaceCfg,
+ PressCfg,
)
from .engine import (
AtomicActionEngine,
@@ -84,11 +86,13 @@
"MoveHeldObject",
"PickUp",
"Place",
+ "Press",
"MoveEndEffectorCfg",
"MoveJointsCfg",
"MoveHeldObjectCfg",
"PickUpCfg",
"PlaceCfg",
+ "PressCfg",
# Engine
"AtomicActionEngine",
"register_action",
diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
index dd7991d9f..eb2452649 100644
--- a/embodichain/lab/sim/atomic_actions/actions.py
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -16,10 +16,11 @@
"""Concrete atomic actions built on :class:`AtomicAction` and :class:`TrajectoryBuilder`.
-Five sibling actions live here: :class:`MoveEndEffector`, :class:`MoveJoints`,
-:class:`PickUp`, :class:`MoveHeldObject`, and :class:`Place`. Each inherits
-:class:`AtomicAction` directly and composes a :class:`TrajectoryBuilder` for
-shared trajectory math. ``execute`` takes a typed target plus a
+Six sibling actions live here: :class:`MoveEndEffector`, :class:`MoveJoints`,
+:class:`PickUp`, :class:`MoveHeldObject`, :class:`Place`, and :class:`Press`.
+Each inherits :class:`AtomicAction` directly and composes a
+:class:`TrajectoryBuilder` for shared trajectory math. ``execute`` takes a typed
+target plus a
:class:`WorldState` and returns an :class:`ActionResult` whose trajectory is
full-robot DoF shaped ``(n_envs, n_waypoints, robot.dof)``.
"""
@@ -144,6 +145,24 @@ class PlaceCfg(ActionCfg):
"""Height (m) to retract the end-effector after opening the gripper."""
+@configclass
+class PressCfg(ActionCfg):
+ name: str = "press"
+ """Name of the action, used for identification and logging."""
+
+ sample_interval: int = 80
+ """Number of waypoints for the full trajectory (hand close + down + back)."""
+
+ hand_interp_steps: int = 5
+ """Number of waypoints for closing the gripper before pressing."""
+
+ hand_control_part: str = "hand"
+ """Name of the robot part that controls the hand joints."""
+
+ hand_close_qpos: torch.Tensor | None = None
+ """Joint positions for the closed hand state, shape ``[hand_dof,]``."""
+
+
# =============================================================================
# Shared helpers private to this module
# =============================================================================
@@ -747,6 +766,135 @@ def _fail(self, state: WorldState) -> ActionResult:
)
+# =============================================================================
+# Press
+# =============================================================================
+
+
+class Press(AtomicAction):
+ """Close the gripper, press down to a target pose, then return."""
+
+ TargetType: ClassVar[type] = EndEffectorPoseTarget
+
+ def __init__(
+ self,
+ motion_generator,
+ cfg: PressCfg | None = None,
+ ) -> None:
+ super().__init__(motion_generator, cfg or PressCfg())
+ self.builder = TrajectoryBuilder(motion_generator)
+ self.n_envs = self.robot.get_qpos().shape[0]
+ self.arm_joint_ids = self.robot.get_joint_ids(name=self.cfg.control_part)
+ self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
+ self.arm_dof = len(self.arm_joint_ids)
+ self.hand_dof = len(self.hand_joint_ids)
+ self.robot_dof = self.robot.dof
+
+ if self.cfg.hand_close_qpos is None:
+ logger.log_error(
+ "hand_close_qpos must be specified in PressCfg", ValueError
+ )
+ self.hand_close_qpos = self.builder.expand_hand_qpos(
+ self.cfg.hand_close_qpos,
+ n_envs=self.n_envs,
+ hand_dof=self.hand_dof,
+ )
+
+ def execute(self, target: EndEffectorPoseTarget, state: WorldState) -> ActionResult:
+ press_xpos = self.builder.resolve_pose_target(target.xpos, n_envs=self.n_envs)
+ start_arm_qpos = self.builder.resolve_start_qpos(
+ _arm_qpos_from_state(state, self.arm_joint_ids, self.robot_dof),
+ n_envs=self.n_envs,
+ arm_dof=self.arm_dof,
+ control_part=self.cfg.control_part,
+ )
+ start_hand_qpos = state.last_qpos[:, self.hand_joint_ids]
+
+ n_close, n_down, n_back = self._compute_phase_waypoints()
+
+ # Phase 1: close the hand while holding the current EEF pose.
+ hand_close_path = self.builder.interpolate_hand_qpos(
+ start_hand_qpos,
+ self.hand_close_qpos,
+ n_waypoints=n_close,
+ )
+
+ # Phase 2: press down to the target pose.
+ target_states_list = [
+ [PlanState(xpos=press_xpos[i], move_type=MoveType.EEF_MOVE)]
+ for i in range(self.n_envs)
+ ]
+ ok, down_arm = self.builder.plan_arm_traj(
+ target_states_list,
+ start_arm_qpos,
+ n_down,
+ control_part=self.cfg.control_part,
+ arm_dof=self.arm_dof,
+ )
+ if not ok:
+ logger.log_warning("Press failed to plan the down trajectory.")
+ return self._fail(state)
+
+ # Phase 3: return to the arm pose from before pressing.
+ press_arm_qpos = down_arm[:, -1, :]
+ back_arm = self.builder.plan_joint_traj(press_arm_qpos, start_arm_qpos, n_back)
+
+ full = torch.empty(
+ (self.n_envs, n_close + n_down + n_back, self.robot_dof),
+ dtype=torch.float32,
+ device=self.device,
+ )
+ full[:, :, :] = state.last_qpos.unsqueeze(1)
+ full[:, :n_close, self.arm_joint_ids] = start_arm_qpos.unsqueeze(1)
+ full[:, :n_close, self.hand_joint_ids] = hand_close_path
+ full[:, n_close : n_close + n_down, self.arm_joint_ids] = down_arm
+ full[:, n_close : n_close + n_down, self.hand_joint_ids] = (
+ self.hand_close_qpos.unsqueeze(1)
+ )
+ full[:, n_close + n_down :, self.arm_joint_ids] = back_arm
+ full[:, n_close + n_down :, self.hand_joint_ids] = (
+ self.hand_close_qpos.unsqueeze(1)
+ )
+
+ return ActionResult(
+ success=True,
+ trajectory=full,
+ next_state=WorldState(
+ last_qpos=full[:, -1, :].clone(),
+ held_object=state.held_object,
+ ),
+ )
+
+ def _compute_phase_waypoints(self) -> tuple[int, int, int]:
+ n_close = self.cfg.hand_interp_steps
+ if n_close < 1:
+ logger.log_error(
+ "hand_interp_steps must be at least 1 for PressCfg.", ValueError
+ )
+
+ motion_waypoints = self.cfg.sample_interval - n_close
+ n_down = motion_waypoints // 2
+ n_back = motion_waypoints - n_down
+ if n_down < 2 or n_back < 2:
+ logger.log_error(
+ "Not enough waypoints for press trajectory. Increase "
+ "sample_interval or decrease hand_interp_steps.",
+ ValueError,
+ )
+ return n_close, n_down, n_back
+
+ def _fail(self, state: WorldState) -> ActionResult:
+ return ActionResult(
+ success=False,
+ trajectory=torch.empty(
+ (self.n_envs, 0, self.robot_dof),
+ dtype=torch.float32,
+ device=self.device,
+ ),
+ next_state=state,
+ )
+
+
__all__ = [
"MoveEndEffector",
"MoveEndEffectorCfg",
@@ -758,4 +906,6 @@ def _fail(self, state: WorldState) -> ActionResult:
"PickUpCfg",
"Place",
"PlaceCfg",
+ "Press",
+ "PressCfg",
]
diff --git a/scripts/benchmark/__main__.py b/scripts/benchmark/__main__.py
index ee9eac0ae..c337eb4ab 100644
--- a/scripts/benchmark/__main__.py
+++ b/scripts/benchmark/__main__.py
@@ -21,6 +21,7 @@
python -m scripts.benchmark rl --tasks push_cube --algorithms ppo --suite default
python -m scripts.benchmark rl --rebuild-report-only
python -m scripts.benchmark robotics-kinematic-solver -s pytorch
+ python -m scripts.benchmark atomic-action --smoke
"""
from __future__ import annotations
@@ -45,6 +46,13 @@ def _run_rl_cli(_: argparse.Namespace) -> None:
rl_main()
+def _run_atomic_action_cli(_: argparse.Namespace) -> None:
+ """Run atomic action benchmark CLI entrypoint."""
+ from scripts.benchmark.atomic_action.run_benchmark import main as atomic_main
+
+ atomic_main()
+
+
def main() -> None:
"""Dispatch to the appropriate benchmark sub-command CLI."""
parser = argparse.ArgumentParser(
@@ -75,6 +83,16 @@ def main() -> None:
)
robotics_ks_parser.set_defaults(func=_run_robotics_kinematic_solver_cli)
+ # -- atomic-action -------------------------------------------------------
+ atomic_action_parser = subparsers.add_parser(
+ "atomic-action",
+ help="Benchmark atomic actions over object presets and positions.",
+ )
+ from scripts.benchmark.atomic_action.run_benchmark import add_benchmark_args
+
+ add_benchmark_args(atomic_action_parser)
+ atomic_action_parser.set_defaults(func=_run_atomic_action_cli)
+
# -- Parse ---------------------------------------------------------------
# If no sub-command is given, print help and exit.
if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
@@ -101,3 +119,6 @@ def main() -> None:
if __name__ == "__main__":
main()
+
+
+__all__ = ["main"]
diff --git a/scripts/benchmark/atomic_action/__init__.py b/scripts/benchmark/atomic_action/__init__.py
new file mode 100644
index 000000000..4709da3d9
--- /dev/null
+++ b/scripts/benchmark/atomic_action/__init__.py
@@ -0,0 +1,21 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Benchmarks for atomic actions."""
+
+from __future__ import annotations
+
+__all__ = []
diff --git a/scripts/benchmark/atomic_action/common.py b/scripts/benchmark/atomic_action/common.py
new file mode 100644
index 000000000..7220ab625
--- /dev/null
+++ b/scripts/benchmark/atomic_action/common.py
@@ -0,0 +1,1194 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Shared helpers for atomic-action benchmark scripts."""
+
+from __future__ import annotations
+
+import argparse
+import math
+import os
+import re
+import resource
+import sys
+import time
+from collections.abc import Sequence
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Callable
+
+try:
+ import psutil
+except ModuleNotFoundError:
+ psutil = None
+
+CPU_MEMORY_BACKEND = "psutil" if psutil is not None else "resource"
+DEFAULT_VIDEO_DIR = Path("outputs/benchmark_videos")
+DEFAULT_VIDEO_FPS = 20
+DEFAULT_VIDEO_MAX_MEMORY_MB = 2048
+DEFAULT_VIDEO_WIDTH = 640
+DEFAULT_VIDEO_HEIGHT = 480
+DEFAULT_VIDEO_HOLD_STEPS = 120
+DEFAULT_VIDEO_CASE_LIMIT = 0
+PHYSICAL_PICK_MIN_LIFT_M = 0.04
+PHYSICAL_PLACE_XY_TOLERANCE_M = 0.10
+PHYSICAL_MOVE_HELD_OBJECT_XYZ_TOLERANCE_M = 0.12
+PHYSICAL_VALIDATION_HOLD_STEPS = 80
+SIDE_GRASP_MAX_OPEN_AXIS_ABS_Z = 0.35
+SIDE_GRASP_OPEN_AXIS_Z_COST_WEIGHT = 0.5
+BENCHMARK_PROFILES = ("smoke", "coverage", "full")
+DEFAULT_BENCHMARK_PROFILE = "coverage"
+DEFAULT_VIDEO_LOOK_AT = (
+ (-1.25, -1.15, 0.95),
+ (-0.25, -0.02, 0.25),
+ (0.0, 0.0, 1.0),
+)
+
+
+@dataclass(frozen=True)
+class PositionCase:
+ """Initial object position case with a quadrant label."""
+
+ name: str
+ quadrant: str
+ xy: tuple[float, float]
+
+
+@dataclass(frozen=True)
+class MeshObjectPreset:
+ """Real mesh object preset used by object-conditioned benchmarks."""
+
+ object_type: str
+ material_name: str
+ label: str
+ init_rot: tuple[float, float, float]
+ body_scale: tuple[float, float, float]
+ mass: float
+ initial_z: float
+ mesh_path: str = ""
+ shape_type: str = "mesh"
+ cube_size: tuple[float, float, float] | None = None
+ use_usd_properties: bool = False
+ dynamic_friction: float = 0.97
+ static_friction: float = 0.99
+ restitution: float = 0.0
+ contact_offset: float = 0.002
+ rest_offset: float = 0.0
+ linear_damping: float = 0.7
+ angular_damping: float = 0.7
+ max_depenetration_velocity: float = 10.0
+ min_position_iters: int = 4
+ min_velocity_iters: int = 1
+ max_linear_velocity: float = 100.0
+ max_angular_velocity: float = 100.0
+ max_convex_hull_num: int = 16
+ enable_ccd: bool = False
+
+
+POSITION_CASES: dict[str, PositionCase] = {
+ "q1_near": PositionCase(name="q1_near", quadrant="q1", xy=(0.02, 0.18)),
+ "q1_far": PositionCase(name="q1_far", quadrant="q1", xy=(0.12, 0.36)),
+ "q2_near": PositionCase(name="q2_near", quadrant="q2", xy=(-0.42, 0.18)),
+ "q2_far": PositionCase(name="q2_far", quadrant="q2", xy=(-0.62, 0.36)),
+ "q3_near": PositionCase(name="q3_near", quadrant="q3", xy=(-0.42, -0.18)),
+ "q3_far": PositionCase(name="q3_far", quadrant="q3", xy=(-0.62, -0.36)),
+ "q4_near": PositionCase(name="q4_near", quadrant="q4", xy=(0.02, -0.18)),
+ "q4_far": PositionCase(name="q4_far", quadrant="q4", xy=(0.12, -0.36)),
+}
+FULL_POSITION_CASE_NAMES = tuple(POSITION_CASES.keys())
+COVERAGE_POSITION_CASE_NAMES = FULL_POSITION_CASE_NAMES
+SMOKE_POSITION_CASE_NAMES = ("q3_near",)
+
+MESH_OBJECT_PRESETS: dict[str, MeshObjectPreset] = {
+ "sugar_box": MeshObjectPreset(
+ object_type="sugar_box",
+ material_name="cardboard",
+ label="sugar_box",
+ mesh_path="SugarBox/sugar_box_usd/sugar_box.usda",
+ init_rot=(0.0, 0.0, 0.0),
+ body_scale=(0.8, 0.8, 0.8),
+ mass=0.05,
+ initial_z=0.05,
+ use_usd_properties=False,
+ ),
+ "coffee_cup": MeshObjectPreset(
+ object_type="coffee_cup",
+ material_name="ceramic",
+ label="coffee_cup",
+ mesh_path="CoffeeCup/cup.ply",
+ init_rot=(0.0, 0.0, -90.0),
+ body_scale=(4.0, 4.0, 4.0),
+ mass=0.01,
+ initial_z=0.01,
+ use_usd_properties=False,
+ ),
+ "cube": MeshObjectPreset(
+ object_type="cube",
+ material_name="plastic",
+ label="cube",
+ shape_type="cube",
+ cube_size=(0.05, 0.05, 0.05),
+ init_rot=(0.0, 0.0, 0.0),
+ body_scale=(1.0, 1.0, 1.0),
+ mass=0.05,
+ initial_z=0.05,
+ use_usd_properties=False,
+ dynamic_friction=0.5,
+ static_friction=0.5,
+ contact_offset=0.003,
+ rest_offset=0.001,
+ max_depenetration_velocity=10.0,
+ min_position_iters=32,
+ min_velocity_iters=8,
+ max_convex_hull_num=1,
+ ),
+ "paper_cup": MeshObjectPreset(
+ object_type="paper_cup",
+ material_name="paper",
+ label="paper_cup",
+ mesh_path="PaperCup/paper_cup.ply",
+ init_rot=(0.0, 0.0, 0.0),
+ body_scale=(0.75, 0.75, 1.0),
+ mass=0.01,
+ initial_z=0.05,
+ use_usd_properties=False,
+ dynamic_friction=1.0,
+ static_friction=1.0,
+ contact_offset=0.003,
+ rest_offset=0.001,
+ linear_damping=2.0,
+ angular_damping=2.0,
+ max_depenetration_velocity=2.0,
+ min_position_iters=32,
+ min_velocity_iters=8,
+ max_linear_velocity=5.0,
+ max_angular_velocity=10.0,
+ max_convex_hull_num=8,
+ ),
+ "scanned_bottle": MeshObjectPreset(
+ object_type="scanned_bottle",
+ material_name="plastic",
+ label="scanned_bottle",
+ mesh_path="ScannedBottle/yibao_processed.ply",
+ init_rot=(0.0, 0.0, 0.0),
+ body_scale=(1.0, 1.0, 1.0),
+ mass=0.05,
+ initial_z=0.05,
+ use_usd_properties=False,
+ ),
+}
+COVERAGE_MESH_OBJECT_TYPES = ("sugar_box", "cube", "paper_cup")
+FULL_MESH_OBJECT_TYPES = COVERAGE_MESH_OBJECT_TYPES
+SMOKE_MESH_OBJECT_TYPES = ("sugar_box",)
+PICKUP_APPROACH_CASES = ("top", "side")
+SMOKE_PICKUP_APPROACH_CASES = ("top",)
+
+
+def ensure_repo_root() -> None:
+ """Add the repository root to sys.path for module execution."""
+ repo_root = Path(__file__).resolve().parents[3]
+ if str(repo_root) not in sys.path:
+ sys.path.insert(0, str(repo_root))
+
+
+def ensure_torch():
+ """Import torch or raise a clear benchmark runtime error."""
+ try:
+ import torch
+ except ModuleNotFoundError as exc:
+ raise RuntimeError(
+ "Atomic action benchmark requires the EmbodiChain simulation runtime "
+ f"and PyTorch. Missing module: {exc.name}."
+ ) from exc
+ return torch
+
+
+def add_common_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add common atomic-action benchmark CLI arguments."""
+ add_profile_benchmark_args(parser)
+ parser.add_argument(
+ "--repeat",
+ type=int,
+ default=1,
+ help="Number of repeats for every benchmark case.",
+ )
+ parser.add_argument(
+ "--smoke",
+ action="store_true",
+ help="Alias for --profile smoke.",
+ )
+ parser.add_argument(
+ "--device",
+ type=str,
+ default="cpu",
+ help="Simulation device, e.g. 'cpu' or 'cuda'.",
+ )
+ parser.add_argument(
+ "--renderer",
+ type=str,
+ choices=("auto", "hybrid", "fast-rt", "rt"),
+ default="auto",
+ help="Renderer backend used by SimulationManager.",
+ )
+ add_video_benchmark_args(parser)
+
+
+def add_video_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add optional benchmark video recording CLI arguments."""
+ parser.add_argument(
+ "--record_video",
+ action="store_true",
+ help="Record trajectory replay videos for selected successful cases.",
+ )
+ parser.add_argument(
+ "--record_failed_video",
+ action="store_true",
+ help=(
+ "With --record_video, also record failed cases when a partial "
+ "trajectory or static debug scene is available."
+ ),
+ )
+ parser.add_argument(
+ "--video_case_limit",
+ type=int,
+ default=DEFAULT_VIDEO_CASE_LIMIT,
+ help="Maximum cases to record. Use 0 to record all selected cases.",
+ )
+ parser.add_argument(
+ "--video_dir",
+ type=Path,
+ default=DEFAULT_VIDEO_DIR,
+ help="Directory for benchmark replay videos.",
+ )
+ parser.add_argument(
+ "--video_fps",
+ type=int,
+ default=DEFAULT_VIDEO_FPS,
+ help="Recorded video frames per second.",
+ )
+ parser.add_argument(
+ "--video_max_memory",
+ type=int,
+ default=DEFAULT_VIDEO_MAX_MEMORY_MB,
+ help="Maximum recorder frame-buffer memory in MB.",
+ )
+ parser.add_argument(
+ "--video_width",
+ type=int,
+ default=DEFAULT_VIDEO_WIDTH,
+ help="Recorded video width in pixels.",
+ )
+ parser.add_argument(
+ "--video_height",
+ type=int,
+ default=DEFAULT_VIDEO_HEIGHT,
+ help="Recorded video height in pixels.",
+ )
+ parser.add_argument(
+ "--video_hold_steps",
+ type=int,
+ default=DEFAULT_VIDEO_HOLD_STEPS,
+ help="Extra simulation steps to hold the final replay pose.",
+ )
+
+
+def add_grasp_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add common grasp-affordance setup arguments."""
+ parser.add_argument(
+ "--n_sample",
+ type=int,
+ default=10000,
+ help="Number of samples for antipodal grasp generation.",
+ )
+ parser.add_argument(
+ "--force_reannotate",
+ action="store_true",
+ help="Force grasp region re-annotation instead of using cached data.",
+ )
+
+
+def add_profile_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add unified benchmark profile CLI arguments."""
+ parser.add_argument(
+ "--profile",
+ choices=BENCHMARK_PROFILES,
+ default=DEFAULT_BENCHMARK_PROFILE,
+ help=(
+ "Benchmark profile: smoke is one fast case, coverage is the default "
+ "core object/position matrix, and full sweeps all configured cases."
+ ),
+ )
+
+
+def add_object_position_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add object and initial-position selection arguments."""
+ parser.add_argument(
+ "--object_types",
+ nargs="+",
+ choices=(*MESH_OBJECT_PRESETS.keys(), "all"),
+ default=None,
+ help=(
+ "Real mesh object presets to benchmark. Defaults are selected by "
+ "--profile; use 'all' to include every default full preset."
+ ),
+ )
+ parser.add_argument(
+ "--position_cases",
+ nargs="+",
+ choices=(*POSITION_CASES.keys(), "all"),
+ default=None,
+ help=(
+ "Initial object position cases to benchmark. Defaults are selected by "
+ "--profile; use 'all' for all near/far cases."
+ ),
+ )
+
+
+def add_pickup_approach_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add common PickUp approach selection arguments."""
+ parser.add_argument(
+ "--approach_cases",
+ nargs="+",
+ choices=(*PICKUP_APPROACH_CASES, "all"),
+ default=None,
+ help=(
+ "PickUp approach cases to benchmark. Defaults are selected by "
+ "--profile; side uses the current object's initial XY direction."
+ ),
+ )
+
+
+def resolve_profile(args: argparse.Namespace) -> str:
+ """Resolve the effective benchmark profile from CLI arguments."""
+ profile = getattr(args, "profile", DEFAULT_BENCHMARK_PROFILE)
+ if getattr(args, "smoke", False):
+ profile = "smoke"
+ if profile not in BENCHMARK_PROFILES:
+ raise ValueError(
+ f"Unsupported benchmark profile {profile!r}. "
+ f"Expected one of {BENCHMARK_PROFILES}."
+ )
+ return profile
+
+
+def default_position_case_names_for_profile(profile: str) -> tuple[str, ...]:
+ """Return default position case names for a profile."""
+ if profile == "smoke":
+ return SMOKE_POSITION_CASE_NAMES
+ if profile == "coverage":
+ return COVERAGE_POSITION_CASE_NAMES
+ if profile == "full":
+ return FULL_POSITION_CASE_NAMES
+ raise ValueError(f"Unsupported benchmark profile: {profile}")
+
+
+def select_position_cases(
+ case_names: Sequence[str] | None,
+ profile: str,
+) -> list[PositionCase]:
+ """Resolve requested position cases, falling back to profile defaults."""
+ names = (
+ default_position_case_names_for_profile(profile)
+ if not case_names
+ else tuple(case_names)
+ )
+ if "all" in names:
+ names = FULL_POSITION_CASE_NAMES
+ return [POSITION_CASES[name] for name in names]
+
+
+def default_mesh_object_types_for_profile(profile: str) -> tuple[str, ...]:
+ """Return default real mesh object preset names for a profile."""
+ if profile == "smoke":
+ return SMOKE_MESH_OBJECT_TYPES
+ if profile == "coverage":
+ return COVERAGE_MESH_OBJECT_TYPES
+ if profile == "full":
+ return FULL_MESH_OBJECT_TYPES
+ raise ValueError(f"Unsupported benchmark profile: {profile}")
+
+
+def select_mesh_object_presets(
+ object_types: Sequence[str] | None,
+ profile: str,
+) -> list[MeshObjectPreset]:
+ """Resolve requested mesh object presets, falling back to profile defaults."""
+ names = (
+ default_mesh_object_types_for_profile(profile)
+ if not object_types
+ else tuple(object_types)
+ )
+ if "all" in names:
+ names = FULL_MESH_OBJECT_TYPES
+ return [MESH_OBJECT_PRESETS[name] for name in names]
+
+
+def default_pickup_approach_cases_for_profile(profile: str) -> tuple[str, ...]:
+ """Return default PickUp approach case names for a profile."""
+ if profile == "smoke":
+ return SMOKE_PICKUP_APPROACH_CASES
+ if profile in ("coverage", "full"):
+ return PICKUP_APPROACH_CASES
+ raise ValueError(f"Unsupported benchmark profile: {profile}")
+
+
+def select_pickup_approaches(
+ approach_cases: Sequence[str] | None,
+ profile: str,
+) -> list[str]:
+ """Resolve PickUp approach cases, falling back to profile defaults."""
+ names = (
+ default_pickup_approach_cases_for_profile(profile)
+ if not approach_cases
+ else tuple(approach_cases)
+ )
+ if "all" in names:
+ names = PICKUP_APPROACH_CASES
+ return list(names)
+
+
+def pickup_approach_direction_tuple(
+ approach: str,
+ position_case: PositionCase,
+) -> tuple[float, float, float]:
+ """Resolve a PickUp approach name into a normalized world-frame tuple."""
+ if approach == "top":
+ direction = (0.0, 0.0, -1.0)
+ elif approach == "side":
+ direction = (-position_case.xy[0], -position_case.xy[1], 0.0)
+ else:
+ raise ValueError(f"Unsupported PickUp approach case: {approach}")
+
+ norm = math.sqrt(sum(value * value for value in direction))
+ if norm < 1e-6:
+ raise ValueError(f"PickUp approach direction is zero for {position_case.name}.")
+ return tuple(value / norm for value in direction)
+
+
+def resolve_pickup_approach_direction(
+ approach: str,
+ position_case: PositionCase,
+ device,
+):
+ """Resolve a PickUp approach name into a normalized world-frame vector."""
+ torch = ensure_torch()
+ return torch.tensor(
+ pickup_approach_direction_tuple(approach, position_case),
+ dtype=torch.float32,
+ device=device,
+ )
+
+
+def format_vector3(vector) -> str:
+ """Format a 3D vector-like value for benchmark reports."""
+ if hasattr(vector, "detach"):
+ vector = vector.detach().to("cpu").tolist()
+ return f"({float(vector[0]):.3f},{float(vector[1]):.3f},{float(vector[2]):.3f})"
+
+
+def _is_horizontal_approach_direction(approach_direction) -> bool:
+ """Return true when the requested approach is a side/horizontal approach."""
+ if not hasattr(approach_direction, "detach"):
+ return abs(float(approach_direction[2])) < 1e-4
+ direction = approach_direction.detach()
+ if direction.ndim > 1:
+ direction = direction.reshape(-1, direction.shape[-1])[0]
+ return abs(float(direction[2].to("cpu"))) < 1e-4
+
+
+def create_benchmark_object(
+ sim,
+ preset: MeshObjectPreset,
+ position_case: PositionCase,
+ uid_suffix: str,
+):
+ """Create one benchmark object at a selected initial position."""
+ from embodichain.data import get_data_path
+ from embodichain.lab.sim.cfg import RigidBodyAttributesCfg, RigidObjectCfg
+ from embodichain.lab.sim.shapes import CubeCfg, MeshCfg
+
+ if preset.shape_type == "mesh":
+ shape = MeshCfg(fpath=get_data_path(preset.mesh_path))
+ elif preset.shape_type == "cube":
+ if preset.cube_size is None:
+ raise ValueError(f"Cube preset {preset.object_type!r} misses cube_size.")
+ shape = CubeCfg(size=list(preset.cube_size))
+ else:
+ raise ValueError(
+ f"Unsupported benchmark object shape_type {preset.shape_type!r}."
+ )
+
+ cfg = RigidObjectCfg(
+ uid=f"benchmark_{preset.label}_{position_case.name}_{uid_suffix}",
+ shape=shape,
+ attrs=RigidBodyAttributesCfg(
+ mass=preset.mass,
+ dynamic_friction=preset.dynamic_friction,
+ static_friction=preset.static_friction,
+ restitution=preset.restitution,
+ contact_offset=preset.contact_offset,
+ rest_offset=preset.rest_offset,
+ linear_damping=preset.linear_damping,
+ angular_damping=preset.angular_damping,
+ max_depenetration_velocity=preset.max_depenetration_velocity,
+ min_position_iters=preset.min_position_iters,
+ min_velocity_iters=preset.min_velocity_iters,
+ max_linear_velocity=preset.max_linear_velocity,
+ max_angular_velocity=preset.max_angular_velocity,
+ enable_ccd=preset.enable_ccd,
+ ),
+ max_convex_hull_num=preset.max_convex_hull_num,
+ init_pos=[position_case.xy[0], position_case.xy[1], preset.initial_z],
+ init_rot=preset.init_rot,
+ body_scale=preset.body_scale,
+ use_usd_properties=preset.use_usd_properties,
+ )
+ obj = sim.add_rigid_object(cfg=cfg)
+ sim.update(step=10)
+ return obj
+
+
+create_mesh_benchmark_object = create_benchmark_object
+
+
+def _make_benchmark_antipodal_affordance_class():
+ """Create an AntipodalAffordance subclass after project imports are available."""
+ from embodichain.lab.sim.atomic_actions import AntipodalAffordance
+
+ class BenchmarkAntipodalAffordance(AntipodalAffordance):
+ """Benchmark affordance that biases side grasps to horizontal closing."""
+
+ def get_valid_grasp_poses(
+ self,
+ obj_poses,
+ approach_direction,
+ ):
+ results = super().get_valid_grasp_poses(
+ obj_poses=obj_poses,
+ approach_direction=approach_direction,
+ )
+ if not _is_horizontal_approach_direction(approach_direction):
+ return results
+
+ adjusted_results = []
+ for grasp_poses, costs in results:
+ if grasp_poses.ndim < 3 or grasp_poses.shape[0] == 0:
+ adjusted_results.append((grasp_poses, costs))
+ continue
+
+ opening_axis_abs_z = grasp_poses[:, :3, 0].abs()[:, 2]
+ keep = opening_axis_abs_z <= SIDE_GRASP_MAX_OPEN_AXIS_ABS_Z
+ if bool(keep.any()):
+ adjusted_results.append((grasp_poses[keep], costs[keep]))
+ continue
+
+ adjusted_costs = costs + (
+ opening_axis_abs_z * SIDE_GRASP_OPEN_AXIS_Z_COST_WEIGHT
+ )
+ adjusted_results.append((grasp_poses, adjusted_costs))
+ return adjusted_results
+
+ return BenchmarkAntipodalAffordance
+
+
+def create_antipodal_object_semantics(
+ obj,
+ preset: MeshObjectPreset,
+ args: argparse.Namespace,
+ build_gripper_collision_cfg: Callable[[], object],
+ build_grasp_generator_cfg: Callable[[argparse.Namespace], object],
+):
+ """Create object semantics with an antipodal grasp affordance."""
+ from embodichain.lab.sim.atomic_actions import ObjectSemantics
+
+ mesh_vertices = obj.get_vertices(env_ids=[0], scale=True)[0]
+ mesh_triangles = obj.get_triangles(env_ids=[0])[0]
+ affordance_cls = _make_benchmark_antipodal_affordance_class()
+ return ObjectSemantics(
+ label=preset.label,
+ geometry={
+ "mesh_vertices": mesh_vertices,
+ "mesh_triangles": mesh_triangles,
+ },
+ affordance=affordance_cls(
+ mesh_vertices=mesh_vertices,
+ mesh_triangles=mesh_triangles,
+ gripper_collision_cfg=build_gripper_collision_cfg(),
+ generator_cfg=build_grasp_generator_cfg(args),
+ force_reannotate=args.force_reannotate,
+ ),
+ entity=obj,
+ )
+
+
+def describe_object_preset(preset: MeshObjectPreset) -> str:
+ """Describe an object preset for benchmark report notes."""
+ if preset.shape_type == "cube":
+ return (
+ f"{preset.object_type}/{preset.material_name}/"
+ f"CubeCfg(size={preset.cube_size})"
+ )
+ return f"{preset.object_type}/{preset.material_name}/{preset.mesh_path}"
+
+
+def sync_cuda() -> None:
+ """Synchronize CUDA stream when available."""
+ torch = ensure_torch()
+ if torch.cuda.is_available():
+ torch.cuda.synchronize()
+
+
+def reset_peak_gpu_memory() -> None:
+ """Reset PyTorch peak GPU memory stats when CUDA is available."""
+ torch = ensure_torch()
+ if torch.cuda.is_available():
+ torch.cuda.reset_peak_memory_stats()
+
+
+def peak_gpu_memory_mb() -> float:
+ """Return peak GPU memory allocated by PyTorch in MB."""
+ torch = ensure_torch()
+ if not torch.cuda.is_available():
+ return 0.0
+ return torch.cuda.max_memory_allocated() / 1024**2
+
+
+def memory_snapshot() -> dict[str, float]:
+ """Return current process memory usage snapshot in MB."""
+ torch = ensure_torch()
+ if psutil is not None:
+ process = psutil.Process(os.getpid())
+ cpu_mb = process.memory_info().rss / 1024**2
+ else:
+ cpu_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0
+ gpu_mb = (
+ torch.cuda.memory_allocated() / 1024**2 if torch.cuda.is_available() else 0.0
+ )
+ return {"cpu_mb": cpu_mb, "gpu_mb": gpu_mb}
+
+
+def timed_call(
+ callable_fn: Callable[[], object],
+) -> tuple[float, dict[str, float], float, object]:
+ """Time a callable and return elapsed seconds, memory deltas, peak GPU, result."""
+ reset_peak_gpu_memory()
+ before = memory_snapshot()
+ sync_cuda()
+
+ start = time.perf_counter()
+ result = callable_fn()
+ sync_cuda()
+ elapsed = time.perf_counter() - start
+
+ after = memory_snapshot()
+ deltas = {
+ "cpu_mb": after["cpu_mb"] - before["cpu_mb"],
+ "gpu_mb": after["gpu_mb"] - before["gpu_mb"],
+ }
+ return elapsed, deltas, peak_gpu_memory_mb(), result
+
+
+def reset_robot(robot, initial_qpos) -> None:
+ """Reset current and target robot qpos to the benchmark initial posture."""
+ for target in (False, True):
+ robot.set_qpos(initial_qpos, target=target)
+ robot.clear_dynamics()
+
+
+def reset_rigid_object(obj, initial_pose) -> None:
+ """Reset a rigid object pose and clear residual dynamics."""
+ obj.set_local_pose(initial_pose)
+ obj.clear_dynamics()
+
+
+def reset_rigid_object_xy(
+ obj,
+ base_pose,
+ xy: tuple[float, float],
+ sim=None,
+ settle_steps: int = 0,
+):
+ """Reset a rigid object to a new XY position while preserving base orientation."""
+ pose = base_pose.clone()
+ pose[:, 0, 3] = xy[0]
+ pose[:, 1, 3] = xy[1]
+ reset_rigid_object(obj, pose)
+ if sim is not None and settle_steps > 0:
+ sim.update(step=settle_steps)
+ return pose
+
+
+def park_rigid_object(
+ obj,
+ base_pose,
+ index: int = 0,
+ sim=None,
+) -> None:
+ """Move an inactive benchmark object outside the robot workspace."""
+ pose = base_pose.clone()
+ pose[:, 0, 3] = 8.0 + float(index)
+ pose[:, 1, 3] = 8.0
+ pose[:, 2, 3] = 1.0
+ reset_rigid_object(obj, pose)
+ if sim is not None:
+ sim.update(step=1)
+
+
+def object_position_tuple(obj) -> tuple[float, float, float]:
+ """Return the current object-frame origin position as a CPU tuple."""
+ pose = obj.get_local_pose(to_matrix=True)
+ xyz = pose[0, :3, 3]
+ if hasattr(xyz, "detach"):
+ xyz = xyz.detach().to("cpu").tolist()
+ return (float(xyz[0]), float(xyz[1]), float(xyz[2]))
+
+
+def xy_distance_m(
+ position: Sequence[float],
+ target: Sequence[float],
+) -> float:
+ """Return XY Euclidean distance in meters."""
+ return math.sqrt(
+ (float(position[0]) - float(target[0])) ** 2
+ + (float(position[1]) - float(target[1])) ** 2
+ )
+
+
+def xyz_distance_m(
+ position: Sequence[float],
+ target: Sequence[float],
+) -> float:
+ """Return XYZ Euclidean distance in meters."""
+ return math.sqrt(
+ (float(position[0]) - float(target[0])) ** 2
+ + (float(position[1]) - float(target[1])) ** 2
+ + (float(position[2]) - float(target[2])) ** 2
+ )
+
+
+def replay_trajectory_for_physical_validation(
+ sim,
+ robot,
+ obj,
+ traj,
+ on_step: Callable[[int], None] | None = None,
+ hold_steps: int = PHYSICAL_VALIDATION_HOLD_STEPS,
+) -> tuple[float, float, float] | None:
+ """Replay a trajectory in physics and return the final object position."""
+ if traj is None or getattr(traj, "ndim", 0) < 3 or traj.shape[1] == 0:
+ return None
+
+ for waypoint_index in range(traj.shape[1]):
+ robot.set_qpos(traj[:, waypoint_index, :])
+ sim.update(step=4)
+ if on_step is not None:
+ on_step(waypoint_index)
+
+ final_qpos = traj[:, -1, :]
+ for _ in range(hold_steps):
+ robot.set_qpos(final_qpos)
+ sim.update(step=2)
+ return object_position_tuple(obj)
+
+
+def should_record_case(
+ args: argparse.Namespace,
+ recorded_count: int,
+ success: bool,
+) -> bool:
+ """Return whether a benchmark case should emit a replay video."""
+ if not getattr(args, "record_video", False):
+ return False
+ if not success and not getattr(args, "record_failed_video", False):
+ return False
+
+ case_limit = getattr(args, "video_case_limit", DEFAULT_VIDEO_CASE_LIMIT)
+ if case_limit < 0:
+ raise ValueError("--video_case_limit must be non-negative.")
+ return case_limit == 0 or recorded_count < case_limit
+
+
+def build_video_output_path(
+ args: argparse.Namespace,
+ benchmark_name: str,
+ case_id: str,
+) -> Path:
+ """Build a deterministic, timestamped output path for one replay video."""
+ output_dir = Path(getattr(args, "video_dir", DEFAULT_VIDEO_DIR))
+ output_dir.mkdir(parents=True, exist_ok=True)
+ safe_case_id = re.sub(r"[^A-Za-z0-9_.-]+", "_", case_id).strip("_")
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ return output_dir / f"{benchmark_name}_{safe_case_id}_{timestamp}.mp4"
+
+
+def summarize_video_recording(
+ args: argparse.Namespace,
+ results: Sequence[dict[str, object]],
+ video_paths: Sequence[str],
+) -> list[str]:
+ """Build report notes that make video coverage explicit."""
+ evaluated_count = len(results)
+ success_count = sum(1 for result in results if bool(result.get("success")))
+ failure_count = evaluated_count - success_count
+ notes = [
+ (
+ "Case/video summary: "
+ f"evaluated={evaluated_count}, success={success_count}, "
+ f"failure={failure_count}, videos={len(video_paths)}"
+ )
+ ]
+ if getattr(args, "record_video", False):
+ if getattr(args, "record_failed_video", False):
+ notes.append(
+ "Video policy: records report-success replays and failed-case "
+ "debug videos when trajectory/static scene capture is available."
+ )
+ else:
+ notes.append(
+ "Video policy: records report-success replays only; failed "
+ "cases are reported in the tables but do not emit videos."
+ )
+ else:
+ notes.append("Video policy: disabled.")
+ notes.append(
+ "Replay videos: " + (", ".join(video_paths) if video_paths else "disabled")
+ )
+ return notes
+
+
+def _replay_trajectory_with_recording(
+ sim,
+ robot,
+ traj,
+ args: argparse.Namespace,
+ video_path: Path,
+ on_step: Callable[[int], None] | None = None,
+ look_at: tuple[
+ Sequence[float],
+ Sequence[float],
+ Sequence[float],
+ ] = DEFAULT_VIDEO_LOOK_AT,
+) -> Path | None:
+ """Replay a planned trajectory and record it with the simulation recorder."""
+ if traj is None or getattr(traj, "ndim", 0) < 3 or traj.shape[1] == 0:
+ return None
+
+ video_fps = getattr(args, "video_fps", DEFAULT_VIDEO_FPS)
+ video_max_memory = getattr(args, "video_max_memory", DEFAULT_VIDEO_MAX_MEMORY_MB)
+ video_width = getattr(args, "video_width", DEFAULT_VIDEO_WIDTH)
+ video_height = getattr(args, "video_height", DEFAULT_VIDEO_HEIGHT)
+ video_hold_steps = getattr(args, "video_hold_steps", DEFAULT_VIDEO_HOLD_STEPS)
+
+ original_width = sim.sim_config.width
+ original_height = sim.sim_config.height
+ recording_started = False
+ try:
+ sim.sim_config.width = video_width
+ sim.sim_config.height = video_height
+ recording_started = sim.start_window_record(
+ save_path=str(video_path),
+ fps=video_fps,
+ max_memory=video_max_memory,
+ look_at=look_at,
+ use_sim_time=True,
+ )
+ finally:
+ sim.sim_config.width = original_width
+ sim.sim_config.height = original_height
+
+ if not recording_started:
+ return None
+
+ stop_success = False
+ try:
+ for waypoint_index in range(traj.shape[1]):
+ robot.set_qpos(traj[:, waypoint_index, :])
+ sim.update(step=4)
+ if on_step is not None:
+ on_step(waypoint_index)
+
+ final_qpos = traj[:, -1, :]
+ for _ in range(video_hold_steps):
+ robot.set_qpos(final_qpos)
+ sim.update(step=2)
+ finally:
+ if sim.is_window_recording():
+ stop_success = sim.stop_window_record()
+ sim.wait_window_record_saves()
+
+ return video_path if stop_success else None
+
+
+def _record_static_scene_video(
+ sim,
+ args: argparse.Namespace,
+ video_path: Path,
+ look_at: tuple[
+ Sequence[float],
+ Sequence[float],
+ Sequence[float],
+ ] = DEFAULT_VIDEO_LOOK_AT,
+) -> Path | None:
+ """Record the current scene without replaying a planned trajectory."""
+ video_fps = getattr(args, "video_fps", DEFAULT_VIDEO_FPS)
+ video_max_memory = getattr(args, "video_max_memory", DEFAULT_VIDEO_MAX_MEMORY_MB)
+ video_width = getattr(args, "video_width", DEFAULT_VIDEO_WIDTH)
+ video_height = getattr(args, "video_height", DEFAULT_VIDEO_HEIGHT)
+ video_hold_steps = getattr(args, "video_hold_steps", DEFAULT_VIDEO_HOLD_STEPS)
+
+ original_width = sim.sim_config.width
+ original_height = sim.sim_config.height
+ recording_started = False
+ try:
+ sim.sim_config.width = video_width
+ sim.sim_config.height = video_height
+ recording_started = sim.start_window_record(
+ save_path=str(video_path),
+ fps=video_fps,
+ max_memory=video_max_memory,
+ look_at=look_at,
+ use_sim_time=True,
+ )
+ finally:
+ sim.sim_config.width = original_width
+ sim.sim_config.height = original_height
+
+ if not recording_started:
+ return None
+
+ stop_success = False
+ try:
+ for _ in range(video_hold_steps):
+ sim.update(step=2)
+ finally:
+ if sim.is_window_recording():
+ stop_success = sim.stop_window_record()
+ sim.wait_window_record_saves()
+
+ return video_path if stop_success else None
+
+
+def replay_trajectory_with_recording(
+ sim,
+ robot,
+ traj,
+ args: argparse.Namespace,
+ video_path: Path,
+ on_step: Callable[[int], None] | None = None,
+ look_at: tuple[
+ Sequence[float],
+ Sequence[float],
+ Sequence[float],
+ ] = DEFAULT_VIDEO_LOOK_AT,
+) -> Path | None:
+ """Best-effort replay recording that never changes benchmark success."""
+ try:
+ return _replay_trajectory_with_recording(
+ sim=sim,
+ robot=robot,
+ traj=traj,
+ args=args,
+ video_path=video_path,
+ on_step=on_step,
+ look_at=look_at,
+ )
+ except Exception as exc:
+ try:
+ if sim.is_window_recording():
+ sim.stop_window_record()
+ sim.wait_window_record_saves()
+ except Exception:
+ pass
+ print(
+ "Warning: failed to record benchmark replay video "
+ f"{video_path}: {type(exc).__name__}: {exc}"
+ )
+ return None
+
+
+def record_static_scene_video(
+ sim,
+ args: argparse.Namespace,
+ video_path: Path,
+ look_at: tuple[
+ Sequence[float],
+ Sequence[float],
+ Sequence[float],
+ ] = DEFAULT_VIDEO_LOOK_AT,
+) -> Path | None:
+ """Best-effort static scene recording that never changes benchmark success."""
+ try:
+ return _record_static_scene_video(
+ sim=sim,
+ args=args,
+ video_path=video_path,
+ look_at=look_at,
+ )
+ except Exception as exc:
+ try:
+ if sim.is_window_recording():
+ sim.stop_window_record()
+ sim.wait_window_record_saves()
+ except Exception:
+ pass
+ print(
+ "Warning: failed to record benchmark static debug video "
+ f"{video_path}: {type(exc).__name__}: {exc}"
+ )
+ return None
+
+
+def format_float(value: float | None, precision: int = 6) -> str:
+ """Format finite floats for tables and use N/A for missing values."""
+ if value is None or not math.isfinite(value):
+ return "N/A"
+ return f"{value:.{precision}f}"
+
+
+def format_markdown_table(rows: list[dict[str, object]]) -> list[str]:
+ """Format rows into a markdown table."""
+ if not rows:
+ return ["No data."]
+
+ headers = list(rows[0].keys())
+ lines = [
+ "| " + " | ".join(headers) + " |",
+ "| " + " | ".join(["---"] * len(headers)) + " |",
+ ]
+ for row in rows:
+ lines.append("| " + " | ".join(str(row[h]) for h in headers) + " |")
+ return lines
+
+
+def write_markdown_report(
+ benchmark_name: str,
+ perf_rows: list[dict[str, object]],
+ metric_rows: list[dict[str, object]],
+ leaderboard_rows: list[dict[str, object]],
+ notes: list[str] | None = None,
+) -> Path:
+ """Write benchmark results into one markdown report file."""
+ output_dir = Path("outputs/benchmarks")
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ report_path = output_dir / f"{benchmark_name}_{timestamp}.md"
+
+ lines: list[str] = [
+ f"# {benchmark_name} Benchmark Report",
+ "",
+ f"Generated at: {datetime.now().isoformat(timespec='seconds')}",
+ "",
+ "## Time & Memory",
+ "",
+ ]
+ lines.extend(format_markdown_table(perf_rows))
+ lines.extend(["", "## Success & Other Metrics", ""])
+ lines.extend(format_markdown_table(metric_rows))
+ lines.extend(["", "## Leaderboard", ""])
+ lines.extend(format_markdown_table(leaderboard_rows))
+
+ if notes:
+ lines.extend(["", "## Notes", ""])
+ lines.extend([f"- {note}" for note in notes])
+
+ report_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+ return report_path
+
+
+def build_single_action_leaderboard(
+ action_name: str,
+ metric_rows: list[dict[str, object]],
+) -> list[dict[str, object]]:
+ """Aggregate rows for one action by benchmark case."""
+ if not metric_rows:
+ return []
+
+ success_sum = sum(float(row["success_rate"]) for row in metric_rows)
+ count = len(metric_rows)
+ return [
+ {
+ "rank": 1,
+ "algorithm": action_name,
+ "overall_success_rate": f"{success_sum / max(count, 1):.2%}",
+ "evaluated_cases": count,
+ }
+ ]
+
+
+__all__ = [
+ "BENCHMARK_PROFILES",
+ "CPU_MEMORY_BACKEND",
+ "COVERAGE_MESH_OBJECT_TYPES",
+ "COVERAGE_POSITION_CASE_NAMES",
+ "DEFAULT_BENCHMARK_PROFILE",
+ "add_common_benchmark_args",
+ "add_grasp_benchmark_args",
+ "add_object_position_benchmark_args",
+ "add_profile_benchmark_args",
+ "add_video_benchmark_args",
+ "build_video_output_path",
+ "build_single_action_leaderboard",
+ "create_antipodal_object_semantics",
+ "create_benchmark_object",
+ "create_mesh_benchmark_object",
+ "DEFAULT_VIDEO_DIR",
+ "default_pickup_approach_cases_for_profile",
+ "default_mesh_object_types_for_profile",
+ "default_position_case_names_for_profile",
+ "describe_object_preset",
+ "ensure_repo_root",
+ "ensure_torch",
+ "format_float",
+ "format_vector3",
+ "FULL_MESH_OBJECT_TYPES",
+ "FULL_POSITION_CASE_NAMES",
+ "MESH_OBJECT_PRESETS",
+ "MeshObjectPreset",
+ "PICKUP_APPROACH_CASES",
+ "PHYSICAL_MOVE_HELD_OBJECT_XYZ_TOLERANCE_M",
+ "PHYSICAL_PICK_MIN_LIFT_M",
+ "PHYSICAL_PLACE_XY_TOLERANCE_M",
+ "PHYSICAL_VALIDATION_HOLD_STEPS",
+ "POSITION_CASES",
+ "PositionCase",
+ "object_position_tuple",
+ "park_rigid_object",
+ "pickup_approach_direction_tuple",
+ "record_static_scene_video",
+ "replay_trajectory_for_physical_validation",
+ "replay_trajectory_with_recording",
+ "reset_rigid_object",
+ "reset_rigid_object_xy",
+ "reset_robot",
+ "resolve_pickup_approach_direction",
+ "resolve_profile",
+ "select_mesh_object_presets",
+ "select_pickup_approaches",
+ "select_position_cases",
+ "should_record_case",
+ "SIDE_GRASP_MAX_OPEN_AXIS_ABS_Z",
+ "SIDE_GRASP_OPEN_AXIS_Z_COST_WEIGHT",
+ "SMOKE_PICKUP_APPROACH_CASES",
+ "SMOKE_MESH_OBJECT_TYPES",
+ "SMOKE_POSITION_CASE_NAMES",
+ "timed_call",
+ "summarize_video_recording",
+ "write_markdown_report",
+ "xy_distance_m",
+ "xyz_distance_m",
+]
diff --git a/scripts/benchmark/atomic_action/move_end_effector_benchmark.py b/scripts/benchmark/atomic_action/move_end_effector_benchmark.py
new file mode 100644
index 000000000..6267e5418
--- /dev/null
+++ b/scripts/benchmark/atomic_action/move_end_effector_benchmark.py
@@ -0,0 +1,318 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Benchmark MoveEndEffector atomic action across target poses.
+
+Measures planning latency, memory usage, trajectory success, and final TCP
+translation error for several reachable pose targets.
+Run: python -m scripts.benchmark.atomic_action.move_end_effector_benchmark
+"""
+
+from __future__ import annotations
+
+import argparse
+from dataclasses import dataclass
+from pathlib import Path
+
+from scripts.benchmark.atomic_action.common import (
+ CPU_MEMORY_BACKEND,
+ add_common_benchmark_args,
+ build_single_action_leaderboard,
+ build_video_output_path,
+ ensure_repo_root,
+ ensure_torch,
+ format_float,
+ replay_trajectory_with_recording,
+ reset_robot,
+ resolve_profile,
+ should_record_case,
+ timed_call,
+ write_markdown_report,
+)
+
+
+@dataclass(frozen=True)
+class PoseCase:
+ """End-effector target pose benchmark case."""
+
+ name: str
+ xyz: tuple[float, float, float]
+
+
+POSE_CASES = {
+ "front_left": PoseCase("front_left", (0.30, -0.20, 0.36)),
+ "front_right": PoseCase("front_right", (0.30, 0.20, 0.36)),
+ "near_center": PoseCase("near_center", (0.18, 0.00, 0.42)),
+ "far_center": PoseCase("far_center", (0.42, 0.00, 0.34)),
+}
+DEFAULT_POSE_CASES = tuple(POSE_CASES.keys())
+MOVE_SAMPLE_INTERVAL = 80
+SUCCESS_TOLERANCE_M = 0.01
+
+
+def add_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add MoveEndEffector benchmark CLI arguments."""
+ parser.add_argument(
+ "--pose_cases",
+ nargs="+",
+ choices=(*POSE_CASES.keys(), "all"),
+ default=list(DEFAULT_POSE_CASES),
+ help="Target pose cases to benchmark. Use 'all' for every case.",
+ )
+ add_common_benchmark_args(parser)
+
+
+def _parse_args() -> argparse.Namespace:
+ """Parse command line arguments."""
+ parser = argparse.ArgumentParser(
+ description="Benchmark MoveEndEffector over reachable target poses."
+ )
+ add_benchmark_args(parser)
+ return parser.parse_args()
+
+
+def _select_pose_cases(case_names: list[str]) -> list[PoseCase]:
+ """Resolve selected pose case names."""
+ if "all" in case_names:
+ return list(POSE_CASES.values())
+ return [POSE_CASES[name] for name in case_names]
+
+
+def _make_pose(device, xyz: tuple[float, float, float]):
+ """Create a top-down target pose."""
+ torch = ensure_torch()
+ pose = torch.eye(4, dtype=torch.float32, device=device)
+ pose[:3, :3] = torch.tensor(
+ [
+ [-0.0539, -0.9985, -0.0022],
+ [-0.9977, 0.0540, -0.0401],
+ [0.0401, 0.0000, -0.9992],
+ ],
+ dtype=torch.float32,
+ device=device,
+ )
+ pose[:3, 3] = torch.tensor(xyz, dtype=torch.float32, device=device)
+ return pose
+
+
+def _run_case(
+ sim,
+ robot,
+ atomic_engine,
+ initial_qpos,
+ pose_case: PoseCase,
+ repeat: int,
+ args: argparse.Namespace,
+ recorded_count: int,
+):
+ """Run one MoveEndEffector case."""
+ torch = ensure_torch()
+ from embodichain.lab.sim.atomic_actions import EndEffectorPoseTarget
+
+ reset_robot(robot, initial_qpos)
+ target_pose = _make_pose(sim.device, pose_case.xyz)
+
+ elapsed, mem_delta, peak_gpu, result = timed_call(
+ lambda: atomic_engine.run(
+ steps=[("move_end_effector", EndEffectorPoseTarget(xpos=target_pose))]
+ )
+ )
+ is_success, traj, _ = result
+ video_path = None
+ if should_record_case(args, recorded_count, bool(is_success)):
+ reset_robot(robot, initial_qpos)
+ video_path = replay_trajectory_with_recording(
+ sim=sim,
+ robot=robot,
+ traj=traj,
+ args=args,
+ video_path=build_video_output_path(
+ args, "atomic_action_move_end_effector", f"{pose_case.name}_r{repeat}"
+ ),
+ )
+ reset_robot(robot, initial_qpos)
+
+ final_error_m = None
+ if is_success and traj.shape[1] > 0:
+ arm_joint_ids = robot.get_joint_ids(name="arm")
+ final_arm_qpos = traj[:, -1, arm_joint_ids]
+ final_pose = robot.compute_fk(qpos=final_arm_qpos, name="arm", to_matrix=True)[
+ 0
+ ]
+ final_error_m = float(torch.linalg.norm(final_pose[:3, 3] - target_pose[:3, 3]))
+ target_reached = bool(
+ is_success
+ and final_error_m is not None
+ and final_error_m <= SUCCESS_TOLERANCE_M
+ )
+ return {
+ "case_id": f"{pose_case.name}:r{repeat}",
+ "target_case": pose_case.name,
+ "repeat": repeat,
+ "planning_success": bool(is_success),
+ "target_reached": target_reached,
+ "cost_time_ms": elapsed * 1000.0,
+ "cpu_delta_mb": mem_delta["cpu_mb"],
+ "gpu_delta_mb": mem_delta["gpu_mb"],
+ "peak_gpu_mb": peak_gpu,
+ "final_error_m": final_error_m,
+ "trajectory_waypoints": int(traj.shape[1]) if traj.ndim >= 2 else 0,
+ "failure_reason": "" if target_reached else "target_not_reached",
+ "video_path": str(video_path) if video_path is not None else "",
+ }
+
+
+def _build_rows(results: list[dict[str, object]]):
+ """Build report rows for MoveEndEffector."""
+ perf_rows = []
+ metric_rows = []
+ for result in results:
+ perf_rows.append(
+ {
+ "sample_size": 1,
+ "impl": "move_end_effector",
+ "case_id": result["case_id"],
+ "target_case": result["target_case"],
+ "repeat": result["repeat"],
+ "cost_time_ms": format_float(result["cost_time_ms"]),
+ "cpu_delta_mb": format_float(result["cpu_delta_mb"]),
+ "gpu_delta_mb": format_float(result["gpu_delta_mb"]),
+ "peak_gpu_mb": format_float(result["peak_gpu_mb"]),
+ }
+ )
+ metric_rows.append(
+ {
+ "sample_size": 1,
+ "impl": "move_end_effector",
+ "case_id": result["case_id"],
+ "target_case": result["target_case"],
+ "success_rate": f"{float(result['target_reached']):.6f}",
+ "planning_success_rate": f"{float(result['planning_success']):.6f}",
+ "translation_err_m": format_float(result["final_error_m"]),
+ "trajectory_waypoints": result["trajectory_waypoints"],
+ "failure_reason": result["failure_reason"] or "N/A",
+ }
+ )
+ return perf_rows, metric_rows
+
+
+def run_all_benchmarks(args: argparse.Namespace | None = None) -> Path:
+ """Run MoveEndEffector benchmark and write a markdown report."""
+ args = _parse_args() if args is None else args
+ if args.repeat < 1:
+ raise ValueError("--repeat must be at least 1.")
+ profile = resolve_profile(args)
+
+ ensure_repo_root()
+ ensure_torch()
+ from embodichain.lab.sim.atomic_actions import (
+ AtomicActionEngine,
+ MoveEndEffector,
+ MoveEndEffectorCfg,
+ )
+ from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg
+ from embodichain.lab.sim.planners import ToppraPlannerCfg
+ from scripts.tutorials.atomic_action.move_end_effector import (
+ create_robot,
+ initialize_simulation,
+ )
+
+ pose_cases = _select_pose_cases(args.pose_cases)
+ repeat = 1 if profile == "smoke" else args.repeat
+ if profile == "smoke":
+ pose_cases = [POSE_CASES["front_left"]]
+
+ print("=" * 60)
+ print("MoveEndEffector Atomic Action Benchmark")
+ print("=" * 60)
+ print(
+ "Coverage: "
+ f"profile={profile}, {len(pose_cases)} pose case(s) x "
+ f"{repeat} repeat(s)"
+ )
+
+ sim = initialize_simulation(args)
+ robot = create_robot(sim)
+ initial_qpos = robot.get_qpos().clone()
+ motion_gen = MotionGenerator(
+ cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
+ )
+ atomic_engine = AtomicActionEngine(motion_generator=motion_gen)
+ atomic_engine.register(
+ MoveEndEffector(
+ motion_gen,
+ cfg=MoveEndEffectorCfg(
+ control_part="arm", sample_interval=MOVE_SAMPLE_INTERVAL
+ ),
+ )
+ )
+
+ results: list[dict[str, object]] = []
+ video_paths: list[str] = []
+ print("\n=== MoveEndEffector Target Sweep ===")
+ for pose_case in pose_cases:
+ for repeat_index in range(repeat):
+ result = _run_case(
+ sim,
+ robot,
+ atomic_engine,
+ initial_qpos,
+ pose_case,
+ repeat_index,
+ args,
+ len(video_paths),
+ )
+ results.append(result)
+ if result["video_path"]:
+ video_paths.append(str(result["video_path"]))
+ print(
+ f" {result['case_id']:<24} "
+ f"time={result['cost_time_ms']:>10.2f} ms | "
+ f"success={result['target_reached']} "
+ f"err={format_float(result['final_error_m'], precision=4)}"
+ )
+
+ perf_rows, metric_rows = _build_rows(results)
+ leaderboard_rows = build_single_action_leaderboard("move_end_effector", metric_rows)
+ report_path = write_markdown_report(
+ benchmark_name="atomic_action_move_end_effector",
+ perf_rows=perf_rows,
+ metric_rows=metric_rows,
+ leaderboard_rows=leaderboard_rows,
+ notes=[
+ f"Profile: {profile}",
+ f"CPU memory backend: {CPU_MEMORY_BACKEND}",
+ f"Success tolerance: {SUCCESS_TOLERANCE_M} m translation error.",
+ "Replay videos: " + (", ".join(video_paths) if video_paths else "disabled"),
+ ],
+ )
+ print(f"Markdown report saved: {report_path}")
+ return report_path
+
+
+def main() -> None:
+ """Run the CLI entry point."""
+ try:
+ run_all_benchmarks()
+ except RuntimeError as exc:
+ raise SystemExit(str(exc)) from exc
+
+
+if __name__ == "__main__":
+ main()
+
+
+__all__ = ["add_benchmark_args", "run_all_benchmarks"]
diff --git a/scripts/benchmark/atomic_action/move_held_object_benchmark.py b/scripts/benchmark/atomic_action/move_held_object_benchmark.py
new file mode 100644
index 000000000..cabfb1582
--- /dev/null
+++ b/scripts/benchmark/atomic_action/move_held_object_benchmark.py
@@ -0,0 +1,766 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Benchmark MoveHeldObject atomic action after a PickUp precondition.
+
+Measures MoveHeldObject-only planning latency and memory usage once a held
+object state has been produced by PickUp.
+Run: python -m scripts.benchmark.atomic_action.move_held_object_benchmark
+"""
+
+from __future__ import annotations
+
+import argparse
+from dataclasses import dataclass
+from pathlib import Path
+
+from scripts.benchmark.atomic_action.common import (
+ CPU_MEMORY_BACKEND,
+ add_common_benchmark_args,
+ add_grasp_benchmark_args,
+ add_object_position_benchmark_args,
+ add_pickup_approach_benchmark_args,
+ build_single_action_leaderboard,
+ build_video_output_path,
+ create_antipodal_object_semantics,
+ create_benchmark_object,
+ describe_object_preset,
+ ensure_repo_root,
+ ensure_torch,
+ format_float,
+ format_vector3,
+ MeshObjectPreset,
+ object_position_tuple,
+ park_rigid_object,
+ PHYSICAL_MOVE_HELD_OBJECT_XYZ_TOLERANCE_M,
+ PHYSICAL_PICK_MIN_LIFT_M,
+ pickup_approach_direction_tuple,
+ PositionCase,
+ record_static_scene_video,
+ replay_trajectory_for_physical_validation,
+ replay_trajectory_with_recording,
+ reset_rigid_object,
+ reset_rigid_object_xy,
+ reset_robot,
+ resolve_pickup_approach_direction,
+ resolve_profile,
+ select_mesh_object_presets,
+ select_pickup_approaches,
+ select_position_cases,
+ should_record_case,
+ SIDE_GRASP_MAX_OPEN_AXIS_ABS_Z,
+ SIDE_GRASP_OPEN_AXIS_Z_COST_WEIGHT,
+ summarize_video_recording,
+ timed_call,
+ write_markdown_report,
+ xy_distance_m,
+ xyz_distance_m,
+)
+
+
+@dataclass(frozen=True)
+class HeldObjectCase:
+ """Held-object target pose benchmark case."""
+
+ name: str
+ xyz: tuple[float, float, float]
+
+
+HELD_OBJECT_CASES = {
+ "front_left": HeldObjectCase("front_left", (-0.30, -0.30, 0.50)),
+ "front_right": HeldObjectCase("front_right", (-0.30, 0.30, 0.50)),
+ "center_high": HeldObjectCase("center_high", (-0.42, 0.00, 0.58)),
+}
+PICK_SAMPLE_INTERVAL = 120
+MOVE_SAMPLE_INTERVAL = 60
+MOVE_HELD_OBJECT_SAMPLE_INTERVAL = 120
+HAND_INTERP_STEPS = 12
+
+
+def add_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add MoveHeldObject benchmark CLI arguments."""
+ add_pickup_approach_benchmark_args(parser)
+ parser.add_argument(
+ "--held_object_cases",
+ nargs="+",
+ choices=(*HELD_OBJECT_CASES.keys(), "all"),
+ default=None,
+ help=(
+ "Held-object target cases to benchmark. Defaults are selected by "
+ "--profile; use 'all' for every case."
+ ),
+ )
+ add_object_position_benchmark_args(parser)
+ add_grasp_benchmark_args(parser)
+ add_common_benchmark_args(parser)
+
+
+def _parse_args() -> argparse.Namespace:
+ """Parse command line arguments."""
+ parser = argparse.ArgumentParser(
+ description="Benchmark MoveHeldObject after a PickUp precondition."
+ )
+ add_benchmark_args(parser)
+ return parser.parse_args()
+
+
+def _selected_cases(
+ case_names: list[str] | None,
+ profile: str,
+) -> list[HeldObjectCase]:
+ """Resolve selected held-object case names."""
+ if not case_names:
+ return [HELD_OBJECT_CASES["front_left"]]
+ if "all" in case_names:
+ return list(HELD_OBJECT_CASES.values())
+ return [HELD_OBJECT_CASES[name] for name in case_names]
+
+
+def _make_object_target_pose(device, xyz: tuple[float, float, float]):
+ """Create a held-object target pose."""
+ torch = ensure_torch()
+ pose = torch.eye(4, dtype=torch.float32, device=device)
+ pose[:3, :3] = torch.tensor(
+ [
+ [0.0, 0.0, -1.0],
+ [0.0, 1.0, 0.0],
+ [1.0, 0.0, 0.0],
+ ],
+ dtype=torch.float32,
+ device=device,
+ )
+ pose[:3, 3] = torch.tensor(xyz, dtype=torch.float32, device=device)
+ return pose
+
+
+def _make_pickup_args(
+ args: argparse.Namespace,
+ object_preset: MeshObjectPreset,
+ profile: str,
+) -> argparse.Namespace:
+ """Build a tutorial-compatible argparse namespace."""
+ return argparse.Namespace(
+ object=object_preset.label,
+ n_sample=1000 if profile == "smoke" else args.n_sample,
+ force_reannotate=args.force_reannotate,
+ device=args.device,
+ renderer=args.renderer,
+ headless=True,
+ )
+
+
+def _prepare_held_state(
+ sim,
+ robot,
+ obj,
+ motion_gen,
+ args,
+ object_preset: MeshObjectPreset,
+ position_case: PositionCase,
+ pickup_approach: str,
+ profile: str,
+):
+ """Run PickUp precondition outside the timed MoveHeldObject block."""
+ from embodichain.lab.sim.atomic_actions import (
+ AtomicActionEngine,
+ EndEffectorPoseTarget,
+ GraspTarget,
+ MoveEndEffector,
+ MoveEndEffectorCfg,
+ PickUp,
+ PickUpCfg,
+ )
+ from scripts.tutorials.atomic_action.move_held_object import (
+ build_grasp_generator_cfg,
+ build_gripper_collision_cfg,
+ get_hand_open_close_qpos,
+ make_pre_pick_eef_pose,
+ )
+
+ hand_open, hand_close = get_hand_open_close_qpos(robot, sim.device)
+ atomic_engine = AtomicActionEngine(motion_generator=motion_gen)
+ atomic_engine.register(
+ MoveEndEffector(
+ motion_gen,
+ cfg=MoveEndEffectorCfg(
+ control_part="arm",
+ sample_interval=MOVE_SAMPLE_INTERVAL,
+ ),
+ )
+ )
+ atomic_engine.register(
+ PickUp(
+ motion_gen,
+ cfg=PickUpCfg(
+ control_part="arm",
+ hand_control_part="hand",
+ hand_open_qpos=hand_open,
+ hand_close_qpos=hand_close,
+ approach_direction=resolve_pickup_approach_direction(
+ pickup_approach, position_case, sim.device
+ ),
+ pre_grasp_distance=0.15,
+ lift_height=0.16,
+ sample_interval=PICK_SAMPLE_INTERVAL,
+ hand_interp_steps=HAND_INTERP_STEPS,
+ ),
+ )
+ )
+ semantics = create_antipodal_object_semantics(
+ obj=obj,
+ preset=object_preset,
+ args=_make_pickup_args(args, object_preset, profile),
+ build_gripper_collision_cfg=build_gripper_collision_cfg,
+ build_grasp_generator_cfg=build_grasp_generator_cfg,
+ )
+ obj_pose = obj.get_local_pose(to_matrix=True)
+ move_position = obj_pose[0, :3, 3].clone()
+ move_position[2] = 0.36
+ move_target = make_pre_pick_eef_pose(robot, move_position)
+ is_success, traj, state = atomic_engine.run(
+ steps=[
+ ("move_end_effector", EndEffectorPoseTarget(xpos=move_target)),
+ ("pick_up", GraspTarget(semantics=semantics)),
+ ]
+ )
+ if not is_success or state.held_object is None:
+ raise RuntimeError(
+ "Failed to prepare held-object state for MoveHeldObject benchmark."
+ )
+ robot.set_qpos(state.last_qpos)
+ return state, hand_close, traj
+
+
+def _run_case(
+ sim,
+ robot,
+ motion_gen,
+ initial_qpos,
+ args,
+ obj,
+ base_obj_pose,
+ object_preset: MeshObjectPreset,
+ position_case: PositionCase,
+ pickup_approach: str,
+ case: HeldObjectCase,
+ repeat: int,
+ recorded_count: int,
+ profile: str,
+):
+ """Run one MoveHeldObject benchmark case."""
+ from embodichain.lab.sim.atomic_actions import (
+ AtomicActionEngine,
+ HeldObjectPoseTarget,
+ MoveHeldObject,
+ MoveHeldObjectCfg,
+ )
+ from scripts.tutorials.atomic_action.move_held_object import (
+ compute_pick_close_end_step,
+ )
+
+ case_id = (
+ f"{object_preset.object_type}:{position_case.name}:"
+ f"{pickup_approach}:{case.name}:r{repeat}"
+ )
+ try:
+ reset_robot(robot, initial_qpos)
+ initial_obj_pose = reset_rigid_object_xy(
+ obj=obj,
+ base_pose=base_obj_pose,
+ xy=position_case.xy,
+ sim=sim,
+ settle_steps=2,
+ )
+ reset_rigid_object(obj, initial_obj_pose)
+ initial_obj_position = object_position_tuple(obj)
+ state, hand_close, precondition_traj = _prepare_held_state(
+ sim,
+ robot,
+ obj,
+ motion_gen,
+ args,
+ object_preset,
+ position_case,
+ pickup_approach,
+ profile,
+ )
+ approach_direction_text = format_vector3(
+ pickup_approach_direction_tuple(pickup_approach, position_case)
+ )
+ precondition_waypoints = int(precondition_traj.shape[1])
+ atomic_engine = AtomicActionEngine(motion_generator=motion_gen)
+ atomic_engine.register(
+ MoveHeldObject(
+ motion_gen,
+ cfg=MoveHeldObjectCfg(
+ control_part="arm",
+ hand_control_part="hand",
+ hand_close_qpos=hand_close,
+ sample_interval=MOVE_HELD_OBJECT_SAMPLE_INTERVAL,
+ ),
+ )
+ )
+ target_pose = _make_object_target_pose(sim.device, case.xyz)
+ elapsed, mem_delta, peak_gpu, result = timed_call(
+ lambda: atomic_engine.run(
+ steps=[
+ (
+ "move_held_object",
+ HeldObjectPoseTarget(object_target_pose=target_pose),
+ )
+ ],
+ state=state,
+ )
+ )
+ is_success, traj, final_state = result
+ torch = ensure_torch()
+ precondition_obj_position = None
+ final_obj_position = None
+ object_lift_delta_m = None
+ held_object_xy_error_m = None
+ held_object_z_error_m = None
+ held_object_xyz_error_m = None
+
+ if (
+ getattr(precondition_traj, "ndim", 0) >= 3
+ and precondition_traj.shape[1] > 0
+ ):
+ if bool(is_success) and getattr(traj, "ndim", 0) >= 3 and traj.shape[1] > 0:
+ validation_traj = torch.cat((precondition_traj, traj), dim=1)
+ else:
+ validation_traj = precondition_traj
+
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+ post_grasp_clear_step = compute_pick_close_end_step()
+ should_clear_object_dynamics = True
+
+ def _on_validation_step(waypoint_index: int) -> None:
+ nonlocal should_clear_object_dynamics, precondition_obj_position
+ if (
+ should_clear_object_dynamics
+ and waypoint_index + 1 >= post_grasp_clear_step
+ ):
+ obj.clear_dynamics()
+ should_clear_object_dynamics = False
+ if (
+ precondition_obj_position is None
+ and waypoint_index + 1 >= precondition_waypoints
+ ):
+ precondition_obj_position = object_position_tuple(obj)
+
+ final_obj_position = replay_trajectory_for_physical_validation(
+ sim=sim,
+ robot=robot,
+ obj=obj,
+ traj=validation_traj,
+ on_step=_on_validation_step,
+ )
+ if precondition_obj_position is not None:
+ object_lift_delta_m = (
+ precondition_obj_position[2] - initial_obj_position[2]
+ )
+ if bool(is_success) and final_obj_position is not None:
+ held_object_xy_error_m = xy_distance_m(final_obj_position, case.xyz)
+ held_object_z_error_m = abs(final_obj_position[2] - case.xyz[2])
+ held_object_xyz_error_m = xyz_distance_m(final_obj_position, case.xyz)
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+
+ still_held = bool(is_success and final_state.held_object is not None)
+ physical_pick_success = bool(
+ object_lift_delta_m is not None
+ and object_lift_delta_m >= PHYSICAL_PICK_MIN_LIFT_M
+ )
+ physical_move_success = bool(
+ still_held
+ and physical_pick_success
+ and held_object_xyz_error_m is not None
+ and held_object_xyz_error_m <= PHYSICAL_MOVE_HELD_OBJECT_XYZ_TOLERANCE_M
+ )
+ success = physical_move_success
+ if success:
+ failure_reason = ""
+ elif not is_success:
+ failure_reason = "planning_failed"
+ elif not still_held:
+ failure_reason = "held_object_lost"
+ elif not physical_pick_success:
+ failure_reason = "physical_pick_failed"
+ elif held_object_xyz_error_m is None:
+ failure_reason = "physical_replay_missing"
+ else:
+ failure_reason = "held_object_target_miss"
+
+ video_path = None
+ if should_record_case(args, recorded_count, success):
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+ video_case_suffix = (
+ f"{object_preset.object_type}_{position_case.name}_"
+ f"{pickup_approach}_{case.name}_r{repeat}"
+ )
+ if getattr(traj, "ndim", 0) >= 3 and traj.shape[1] > 0:
+ replay_traj = torch.cat((precondition_traj, traj), dim=1)
+ else:
+ replay_traj = precondition_traj
+
+ if getattr(replay_traj, "ndim", 0) >= 3 and replay_traj.shape[1] > 0:
+ post_grasp_clear_step = compute_pick_close_end_step()
+ should_clear_object_dynamics = True
+
+ def _on_step(waypoint_index: int) -> None:
+ nonlocal should_clear_object_dynamics
+ if (
+ should_clear_object_dynamics
+ and waypoint_index + 1 >= post_grasp_clear_step
+ ):
+ obj.clear_dynamics()
+ should_clear_object_dynamics = False
+
+ video_path = replay_trajectory_with_recording(
+ sim=sim,
+ robot=robot,
+ traj=replay_traj,
+ args=args,
+ video_path=build_video_output_path(
+ args,
+ "atomic_action_move_held_object",
+ video_case_suffix,
+ ),
+ on_step=_on_step,
+ )
+ else:
+ video_path = record_static_scene_video(
+ sim=sim,
+ args=args,
+ video_path=build_video_output_path(
+ args,
+ "atomic_action_move_held_object",
+ f"{video_case_suffix}_failed_static",
+ ),
+ )
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+
+ return {
+ "case_id": case_id,
+ "object_type": object_preset.object_type,
+ "material": object_preset.material_name,
+ "quadrant": position_case.quadrant,
+ "position_case": position_case.name,
+ "init_xy": position_case.xy,
+ "pickup_approach": pickup_approach,
+ "approach_direction": approach_direction_text,
+ "held_object_case": case.name,
+ "repeat": repeat,
+ "planning_success": bool(is_success),
+ "still_held": still_held,
+ "physical_pick_success": physical_pick_success,
+ "physical_move_success": physical_move_success,
+ "success": success,
+ "cost_time_ms": elapsed * 1000.0,
+ "cpu_delta_mb": mem_delta["cpu_mb"],
+ "gpu_delta_mb": mem_delta["gpu_mb"],
+ "peak_gpu_mb": peak_gpu,
+ "object_lift_delta_m": object_lift_delta_m,
+ "object_final_x_m": (
+ final_obj_position[0] if final_obj_position is not None else None
+ ),
+ "object_final_y_m": (
+ final_obj_position[1] if final_obj_position is not None else None
+ ),
+ "object_final_z_m": (
+ final_obj_position[2] if final_obj_position is not None else None
+ ),
+ "held_object_xy_error_m": held_object_xy_error_m,
+ "held_object_z_error_m": held_object_z_error_m,
+ "held_object_xyz_error_m": held_object_xyz_error_m,
+ "precondition_waypoints": precondition_waypoints,
+ "trajectory_waypoints": int(traj.shape[1]) if traj.ndim >= 2 else 0,
+ "failure_reason": failure_reason,
+ "video_path": str(video_path) if video_path is not None else "",
+ }
+ except Exception as exc:
+ video_path = None
+ if should_record_case(args, recorded_count, False):
+ try:
+ reset_robot(robot, initial_qpos)
+ initial_obj_pose = reset_rigid_object_xy(
+ obj=obj,
+ base_pose=base_obj_pose,
+ xy=position_case.xy,
+ sim=sim,
+ settle_steps=2,
+ )
+ video_path = record_static_scene_video(
+ sim=sim,
+ args=args,
+ video_path=build_video_output_path(
+ args,
+ "atomic_action_move_held_object",
+ (
+ f"{object_preset.object_type}_{position_case.name}_"
+ f"{pickup_approach}_{case.name}_r{repeat}"
+ "_exception_static"
+ ),
+ ),
+ )
+ reset_rigid_object(obj, initial_obj_pose)
+ except Exception:
+ video_path = None
+ return {
+ "case_id": case_id,
+ "object_type": object_preset.object_type,
+ "material": object_preset.material_name,
+ "quadrant": position_case.quadrant,
+ "position_case": position_case.name,
+ "init_xy": position_case.xy,
+ "pickup_approach": pickup_approach,
+ "approach_direction": "N/A",
+ "held_object_case": case.name,
+ "repeat": repeat,
+ "planning_success": False,
+ "still_held": False,
+ "physical_pick_success": False,
+ "physical_move_success": False,
+ "success": False,
+ "cost_time_ms": 0.0,
+ "cpu_delta_mb": 0.0,
+ "gpu_delta_mb": 0.0,
+ "peak_gpu_mb": 0.0,
+ "object_lift_delta_m": None,
+ "object_final_x_m": None,
+ "object_final_y_m": None,
+ "object_final_z_m": None,
+ "held_object_xy_error_m": None,
+ "held_object_z_error_m": None,
+ "held_object_xyz_error_m": None,
+ "precondition_waypoints": 0,
+ "trajectory_waypoints": 0,
+ "failure_reason": f"exception:{type(exc).__name__}:{exc}",
+ "video_path": str(video_path) if video_path is not None else "",
+ }
+
+
+def _build_rows(results: list[dict[str, object]]):
+ """Build report rows for MoveHeldObject."""
+ perf_rows = []
+ metric_rows = []
+ for result in results:
+ perf_rows.append(
+ {
+ "sample_size": 1,
+ "impl": "move_held_object",
+ "case_id": result["case_id"],
+ "object_type": result["object_type"],
+ "material": result["material"],
+ "quadrant": result["quadrant"],
+ "position_case": result["position_case"],
+ "init_xy": f"({result['init_xy'][0]:.3f},{result['init_xy'][1]:.3f})",
+ "pickup_approach": result["pickup_approach"],
+ "approach_direction": result["approach_direction"],
+ "held_object_case": result["held_object_case"],
+ "repeat": result["repeat"],
+ "cost_time_ms": format_float(result["cost_time_ms"]),
+ "cpu_delta_mb": format_float(result["cpu_delta_mb"]),
+ "gpu_delta_mb": format_float(result["gpu_delta_mb"]),
+ "peak_gpu_mb": format_float(result["peak_gpu_mb"]),
+ }
+ )
+ metric_rows.append(
+ {
+ "sample_size": 1,
+ "impl": "move_held_object",
+ "case_id": result["case_id"],
+ "object_type": result["object_type"],
+ "material": result["material"],
+ "quadrant": result["quadrant"],
+ "position_case": result["position_case"],
+ "pickup_approach": result["pickup_approach"],
+ "approach_direction": result["approach_direction"],
+ "held_object_case": result["held_object_case"],
+ "success_rate": f"{float(result['success']):.6f}",
+ "planning_success_rate": f"{float(result['planning_success']):.6f}",
+ "held_object_rate": f"{float(result['still_held']):.6f}",
+ "physical_pick_success_rate": (
+ f"{float(result['physical_pick_success']):.6f}"
+ ),
+ "physical_move_success_rate": (
+ f"{float(result['physical_move_success']):.6f}"
+ ),
+ "object_lift_delta_m": format_float(result["object_lift_delta_m"]),
+ "object_final_x_m": format_float(result["object_final_x_m"]),
+ "object_final_y_m": format_float(result["object_final_y_m"]),
+ "object_final_z_m": format_float(result["object_final_z_m"]),
+ "held_object_xy_error_m": format_float(
+ result["held_object_xy_error_m"]
+ ),
+ "held_object_z_error_m": format_float(result["held_object_z_error_m"]),
+ "held_object_xyz_error_m": format_float(
+ result["held_object_xyz_error_m"]
+ ),
+ "precondition_waypoints": result["precondition_waypoints"],
+ "trajectory_waypoints": result["trajectory_waypoints"],
+ "failure_reason": result["failure_reason"] or "N/A",
+ }
+ )
+ return perf_rows, metric_rows
+
+
+def run_all_benchmarks(args: argparse.Namespace | None = None) -> Path:
+ """Run MoveHeldObject benchmark and write a markdown report."""
+ args = _parse_args() if args is None else args
+ if args.repeat < 1:
+ raise ValueError("--repeat must be at least 1.")
+ profile = resolve_profile(args)
+
+ ensure_repo_root()
+ ensure_torch()
+ from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg
+ from embodichain.lab.sim.planners import ToppraPlannerCfg
+ from scripts.tutorials.atomic_action.move_held_object import (
+ create_robot,
+ initialize_simulation,
+ )
+
+ object_presets = select_mesh_object_presets(args.object_types, profile)
+ position_cases = select_position_cases(args.position_cases, profile)
+ approaches = select_pickup_approaches(args.approach_cases, profile)
+ cases = _selected_cases(args.held_object_cases, profile)
+ repeat = 1 if profile == "smoke" else args.repeat
+
+ print("=" * 60)
+ print("MoveHeldObject Atomic Action Benchmark")
+ print("=" * 60)
+ print(
+ "Coverage: "
+ f"profile={profile}, {len(object_presets)} object(s) x "
+ f"{len(position_cases)} position(s) x {len(approaches)} pickup approach(es) "
+ f"x {len(cases)} held target(s) "
+ f"x {repeat} repeat(s)"
+ )
+
+ sim = initialize_simulation(args)
+ robot = create_robot(sim)
+ initial_qpos = robot.get_qpos().clone()
+ motion_gen = MotionGenerator(
+ cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
+ )
+ object_pool = {}
+ for object_index, object_preset in enumerate(object_presets):
+ obj = create_benchmark_object(
+ sim=sim,
+ preset=object_preset,
+ position_case=position_cases[0],
+ uid_suffix="pool",
+ )
+ base_pose = obj.get_local_pose(to_matrix=True).clone()
+ park_rigid_object(obj, base_pose, index=object_index, sim=sim)
+ object_pool[object_preset.object_type] = (obj, base_pose)
+
+ results: list[dict[str, object]] = []
+ video_paths: list[str] = []
+ print("\n=== MoveHeldObject Object/Position/Target Sweep ===")
+ for object_preset in object_presets:
+ obj, base_pose = object_pool[object_preset.object_type]
+ for parked_index, parked_preset in enumerate(object_presets):
+ if parked_preset.object_type == object_preset.object_type:
+ continue
+ parked_obj, parked_base_pose = object_pool[parked_preset.object_type]
+ park_rigid_object(parked_obj, parked_base_pose, index=parked_index, sim=sim)
+ for position_case in position_cases:
+ for pickup_approach in approaches:
+ for case in cases:
+ for repeat_index in range(repeat):
+ result = _run_case(
+ sim,
+ robot,
+ motion_gen,
+ initial_qpos,
+ args,
+ obj,
+ base_pose,
+ object_preset,
+ position_case,
+ pickup_approach,
+ case,
+ repeat_index,
+ len(video_paths),
+ profile,
+ )
+ results.append(result)
+ if result["video_path"]:
+ video_paths.append(str(result["video_path"]))
+ print(
+ f" {result['case_id']:<54} "
+ f"time={result['cost_time_ms']:>10.2f} ms | "
+ f"success={result['success']}"
+ )
+
+ perf_rows, metric_rows = _build_rows(results)
+ leaderboard_rows = build_single_action_leaderboard("move_held_object", metric_rows)
+ report_path = write_markdown_report(
+ benchmark_name="atomic_action_move_held_object",
+ perf_rows=perf_rows,
+ metric_rows=metric_rows,
+ leaderboard_rows=leaderboard_rows,
+ notes=[
+ "Timed block includes MoveHeldObject only; PickUp precondition is "
+ "prepared outside timing.",
+ f"Profile: {profile}",
+ "Object presets: "
+ + ", ".join(describe_object_preset(preset) for preset in object_presets),
+ "Position cases: "
+ + ", ".join(
+ f"{case.name}/{case.quadrant}/xy={case.xy}" for case in position_cases
+ ),
+ "PickUp approach cases: " + ", ".join(approaches),
+ "Held-object target cases: " + ", ".join(case.name for case in cases),
+ f"CPU memory backend: {CPU_MEMORY_BACKEND}",
+ f"n_sample: {1000 if profile == 'smoke' else args.n_sample}",
+ (
+ "Side grasp opening-axis rule: prefer candidates with "
+ f"abs(opening_axis_z) <= {SIDE_GRASP_MAX_OPEN_AXIS_ABS_Z:.2f}; "
+ "fallback cost += "
+ f"abs(opening_axis_z) * {SIDE_GRASP_OPEN_AXIS_Z_COST_WEIGHT:.2f}."
+ ),
+ (
+ "Physical success rule: PickUp precondition lift delta >= "
+ f"{PHYSICAL_PICK_MIN_LIFT_M:.3f} m, held-object state is kept, "
+ "and final object XYZ error <= "
+ f"{PHYSICAL_MOVE_HELD_OBJECT_XYZ_TOLERANCE_M:.3f} m."
+ ),
+ *summarize_video_recording(args, results, video_paths),
+ ],
+ )
+ print(f"Markdown report saved: {report_path}")
+ return report_path
+
+
+def main() -> None:
+ """Run the CLI entry point."""
+ try:
+ run_all_benchmarks()
+ except RuntimeError as exc:
+ raise SystemExit(str(exc)) from exc
+
+
+if __name__ == "__main__":
+ main()
+
+
+__all__ = ["add_benchmark_args", "run_all_benchmarks"]
diff --git a/scripts/benchmark/atomic_action/move_joints_benchmark.py b/scripts/benchmark/atomic_action/move_joints_benchmark.py
new file mode 100644
index 000000000..25a1f3a05
--- /dev/null
+++ b/scripts/benchmark/atomic_action/move_joints_benchmark.py
@@ -0,0 +1,331 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Benchmark MoveJoints atomic action across joint-space target sequences.
+
+Measures planning latency, memory usage, trajectory success, and final arm joint
+error for named and explicit joint-position targets.
+Run: python -m scripts.benchmark.atomic_action.move_joints_benchmark
+"""
+
+from __future__ import annotations
+
+import argparse
+from dataclasses import dataclass
+from pathlib import Path
+
+from scripts.benchmark.atomic_action.common import (
+ CPU_MEMORY_BACKEND,
+ add_common_benchmark_args,
+ build_single_action_leaderboard,
+ build_video_output_path,
+ ensure_repo_root,
+ ensure_torch,
+ format_float,
+ replay_trajectory_with_recording,
+ reset_robot,
+ resolve_profile,
+ should_record_case,
+ timed_call,
+ write_markdown_report,
+)
+
+
+@dataclass(frozen=True)
+class JointSequenceCase:
+ """MoveJoints target sequence benchmark case."""
+
+ name: str
+ sequence: tuple[str, ...]
+
+
+READY_QPOS = (0.35, -1.20, 1.30, -1.65, -1.57, 0.20)
+HOME_QPOS = (0.0, -1.57, 1.57, -1.57, -1.57, 0.0)
+EXTENDED_QPOS = (0.55, -1.05, 1.10, -1.45, -1.35, 0.35)
+JOINT_TARGETS = {
+ "ready": READY_QPOS,
+ "home": HOME_QPOS,
+ "extended": EXTENDED_QPOS,
+}
+SEQUENCE_CASES = {
+ "ready_home": JointSequenceCase("ready_home", ("ready", "home")),
+ "extended_home": JointSequenceCase("extended_home", ("extended", "home")),
+ "ready_extended_home": JointSequenceCase(
+ "ready_extended_home", ("ready", "extended", "home")
+ ),
+}
+DEFAULT_SEQUENCE_CASES = tuple(SEQUENCE_CASES.keys())
+MOVE_JOINTS_SAMPLE_INTERVAL = 80
+SUCCESS_TOLERANCE_RAD = 1e-4
+
+
+def add_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add MoveJoints benchmark CLI arguments."""
+ parser.add_argument(
+ "--sequence_cases",
+ nargs="+",
+ choices=(*SEQUENCE_CASES.keys(), "all"),
+ default=list(DEFAULT_SEQUENCE_CASES),
+ help="Joint sequence cases to benchmark. Use 'all' for every case.",
+ )
+ add_common_benchmark_args(parser)
+
+
+def _parse_args() -> argparse.Namespace:
+ """Parse command line arguments."""
+ parser = argparse.ArgumentParser(
+ description="Benchmark MoveJoints over named and explicit qpos targets."
+ )
+ add_benchmark_args(parser)
+ return parser.parse_args()
+
+
+def _select_cases(case_names: list[str]) -> list[JointSequenceCase]:
+ """Resolve selected sequence case names."""
+ if "all" in case_names:
+ return list(SEQUENCE_CASES.values())
+ return [SEQUENCE_CASES[name] for name in case_names]
+
+
+def _qpos(values, device):
+ """Create a torch qpos tensor."""
+ torch = ensure_torch()
+ return torch.tensor(values, dtype=torch.float32, device=device)
+
+
+def _targets_for_sequence(sequence_case: JointSequenceCase, device):
+ """Build typed MoveJoints targets for a sequence case."""
+ from embodichain.lab.sim.atomic_actions import (
+ JointPositionTarget,
+ NamedJointPositionTarget,
+ )
+
+ targets = []
+ for index, name in enumerate(sequence_case.sequence):
+ if index == 0 and name == "ready":
+ targets.append(("move_joints", NamedJointPositionTarget(name="ready")))
+ else:
+ targets.append(
+ (
+ "move_joints",
+ JointPositionTarget(qpos=_qpos(JOINT_TARGETS[name], device)),
+ )
+ )
+ return targets
+
+
+def _run_case(
+ sim,
+ robot,
+ atomic_engine,
+ initial_qpos,
+ case: JointSequenceCase,
+ repeat: int,
+ args: argparse.Namespace,
+ recorded_count: int,
+):
+ """Run one MoveJoints case."""
+ torch = ensure_torch()
+ reset_robot(robot, initial_qpos)
+ steps = _targets_for_sequence(case, sim.device)
+ elapsed, mem_delta, peak_gpu, result = timed_call(lambda: atomic_engine.run(steps))
+ is_success, traj, _ = result
+ video_path = None
+ if should_record_case(args, recorded_count, bool(is_success)):
+ reset_robot(robot, initial_qpos)
+ video_path = replay_trajectory_with_recording(
+ sim=sim,
+ robot=robot,
+ traj=traj,
+ args=args,
+ video_path=build_video_output_path(
+ args, "atomic_action_move_joints", f"{case.name}_r{repeat}"
+ ),
+ )
+ reset_robot(robot, initial_qpos)
+
+ final_error_rad = None
+ if is_success and traj.shape[1] > 0:
+ arm_joint_ids = robot.get_joint_ids(name="arm")
+ final_qpos = traj[:, -1, arm_joint_ids][0]
+ expected = _qpos(JOINT_TARGETS[case.sequence[-1]], sim.device)
+ final_error_rad = float(torch.linalg.norm(final_qpos - expected))
+ target_reached = bool(
+ is_success
+ and final_error_rad is not None
+ and final_error_rad <= SUCCESS_TOLERANCE_RAD
+ )
+ return {
+ "case_id": f"{case.name}:r{repeat}",
+ "sequence_case": case.name,
+ "repeat": repeat,
+ "planning_success": bool(is_success),
+ "target_reached": target_reached,
+ "cost_time_ms": elapsed * 1000.0,
+ "cpu_delta_mb": mem_delta["cpu_mb"],
+ "gpu_delta_mb": mem_delta["gpu_mb"],
+ "peak_gpu_mb": peak_gpu,
+ "final_error_rad": final_error_rad,
+ "trajectory_waypoints": int(traj.shape[1]) if traj.ndim >= 2 else 0,
+ "failure_reason": "" if target_reached else "target_not_reached",
+ "video_path": str(video_path) if video_path is not None else "",
+ }
+
+
+def _build_rows(results: list[dict[str, object]]):
+ """Build report rows for MoveJoints."""
+ perf_rows = []
+ metric_rows = []
+ for result in results:
+ perf_rows.append(
+ {
+ "sample_size": 1,
+ "impl": "move_joints",
+ "case_id": result["case_id"],
+ "sequence_case": result["sequence_case"],
+ "repeat": result["repeat"],
+ "cost_time_ms": format_float(result["cost_time_ms"]),
+ "cpu_delta_mb": format_float(result["cpu_delta_mb"]),
+ "gpu_delta_mb": format_float(result["gpu_delta_mb"]),
+ "peak_gpu_mb": format_float(result["peak_gpu_mb"]),
+ }
+ )
+ metric_rows.append(
+ {
+ "sample_size": 1,
+ "impl": "move_joints",
+ "case_id": result["case_id"],
+ "sequence_case": result["sequence_case"],
+ "success_rate": f"{float(result['target_reached']):.6f}",
+ "planning_success_rate": f"{float(result['planning_success']):.6f}",
+ "joint_err_rad": format_float(result["final_error_rad"]),
+ "trajectory_waypoints": result["trajectory_waypoints"],
+ "failure_reason": result["failure_reason"] or "N/A",
+ }
+ )
+ return perf_rows, metric_rows
+
+
+def run_all_benchmarks(args: argparse.Namespace | None = None) -> Path:
+ """Run MoveJoints benchmark and write a markdown report."""
+ args = _parse_args() if args is None else args
+ if args.repeat < 1:
+ raise ValueError("--repeat must be at least 1.")
+ profile = resolve_profile(args)
+
+ ensure_repo_root()
+ ensure_torch()
+ from embodichain.lab.sim.atomic_actions import (
+ AtomicActionEngine,
+ MoveJoints,
+ MoveJointsCfg,
+ )
+ from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg
+ from embodichain.lab.sim.planners import ToppraPlannerCfg
+ from scripts.tutorials.atomic_action.move_joints import (
+ create_robot,
+ initialize_simulation,
+ )
+
+ cases = _select_cases(args.sequence_cases)
+ repeat = 1 if profile == "smoke" else args.repeat
+ if profile == "smoke":
+ cases = [SEQUENCE_CASES["ready_home"]]
+
+ print("=" * 60)
+ print("MoveJoints Atomic Action Benchmark")
+ print("=" * 60)
+ print(
+ "Coverage: "
+ f"profile={profile}, {len(cases)} sequence case(s) x "
+ f"{repeat} repeat(s)"
+ )
+
+ sim = initialize_simulation(args)
+ robot = create_robot(sim)
+ initial_qpos = robot.get_qpos().clone()
+ motion_gen = MotionGenerator(
+ cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
+ )
+ ready_qpos = _qpos(READY_QPOS, sim.device)
+ atomic_engine = AtomicActionEngine(motion_generator=motion_gen)
+ atomic_engine.register(
+ MoveJoints(
+ motion_gen,
+ cfg=MoveJointsCfg(
+ control_part="arm",
+ sample_interval=MOVE_JOINTS_SAMPLE_INTERVAL,
+ named_joint_positions={"ready": ready_qpos},
+ ),
+ )
+ )
+
+ results: list[dict[str, object]] = []
+ video_paths: list[str] = []
+ print("\n=== MoveJoints Sequence Sweep ===")
+ for case in cases:
+ for repeat_index in range(repeat):
+ result = _run_case(
+ sim,
+ robot,
+ atomic_engine,
+ initial_qpos,
+ case,
+ repeat_index,
+ args,
+ len(video_paths),
+ )
+ results.append(result)
+ if result["video_path"]:
+ video_paths.append(str(result["video_path"]))
+ print(
+ f" {result['case_id']:<24} "
+ f"time={result['cost_time_ms']:>10.2f} ms | "
+ f"success={result['target_reached']} "
+ f"err={format_float(result['final_error_rad'], precision=6)}"
+ )
+
+ perf_rows, metric_rows = _build_rows(results)
+ leaderboard_rows = build_single_action_leaderboard("move_joints", metric_rows)
+ report_path = write_markdown_report(
+ benchmark_name="atomic_action_move_joints",
+ perf_rows=perf_rows,
+ metric_rows=metric_rows,
+ leaderboard_rows=leaderboard_rows,
+ notes=[
+ f"Profile: {profile}",
+ f"CPU memory backend: {CPU_MEMORY_BACKEND}",
+ f"Success tolerance: {SUCCESS_TOLERANCE_RAD} rad joint error.",
+ "Replay videos: " + (", ".join(video_paths) if video_paths else "disabled"),
+ ],
+ )
+ print(f"Markdown report saved: {report_path}")
+ return report_path
+
+
+def main() -> None:
+ """Run the CLI entry point."""
+ try:
+ run_all_benchmarks()
+ except RuntimeError as exc:
+ raise SystemExit(str(exc)) from exc
+
+
+if __name__ == "__main__":
+ main()
+
+
+__all__ = ["add_benchmark_args", "run_all_benchmarks"]
diff --git a/scripts/benchmark/atomic_action/pickup_benchmark.py b/scripts/benchmark/atomic_action/pickup_benchmark.py
new file mode 100644
index 000000000..37395ca83
--- /dev/null
+++ b/scripts/benchmark/atomic_action/pickup_benchmark.py
@@ -0,0 +1,572 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Benchmark PickUp atomic action across grasp approach directions.
+
+Measures planning latency, memory usage, grasp planning success, held-object
+state creation, and trajectory length for the PickUp action.
+Run: python -m scripts.benchmark.atomic_action.pickup_benchmark
+"""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from scripts.benchmark.atomic_action.common import (
+ CPU_MEMORY_BACKEND,
+ add_common_benchmark_args,
+ add_grasp_benchmark_args,
+ add_object_position_benchmark_args,
+ add_pickup_approach_benchmark_args,
+ build_single_action_leaderboard,
+ build_video_output_path,
+ create_antipodal_object_semantics,
+ create_benchmark_object,
+ describe_object_preset,
+ ensure_repo_root,
+ ensure_torch,
+ format_float,
+ format_vector3,
+ MeshObjectPreset,
+ object_position_tuple,
+ park_rigid_object,
+ PHYSICAL_PICK_MIN_LIFT_M,
+ pickup_approach_direction_tuple,
+ PositionCase,
+ record_static_scene_video,
+ replay_trajectory_for_physical_validation,
+ replay_trajectory_with_recording,
+ reset_rigid_object,
+ reset_rigid_object_xy,
+ reset_robot,
+ resolve_pickup_approach_direction,
+ resolve_profile,
+ select_mesh_object_presets,
+ select_pickup_approaches,
+ select_position_cases,
+ should_record_case,
+ SIDE_GRASP_MAX_OPEN_AXIS_ABS_Z,
+ SIDE_GRASP_OPEN_AXIS_Z_COST_WEIGHT,
+ summarize_video_recording,
+ timed_call,
+ write_markdown_report,
+)
+
+PICK_SAMPLE_INTERVAL = 120
+HAND_INTERP_STEPS = 12
+
+
+def add_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add PickUp benchmark CLI arguments."""
+ add_pickup_approach_benchmark_args(parser)
+ add_object_position_benchmark_args(parser)
+ add_grasp_benchmark_args(parser)
+ add_common_benchmark_args(parser)
+
+
+def _parse_args() -> argparse.Namespace:
+ """Parse command line arguments."""
+ parser = argparse.ArgumentParser(
+ description="Benchmark PickUp over grasp approach directions."
+ )
+ add_benchmark_args(parser)
+ return parser.parse_args()
+
+
+def _make_pickup_args(
+ args: argparse.Namespace,
+ approach: str,
+ object_preset: MeshObjectPreset,
+ profile: str,
+) -> argparse.Namespace:
+ """Build a tutorial-compatible argparse namespace."""
+ return argparse.Namespace(
+ object=object_preset.label,
+ n_sample=1000 if profile == "smoke" else args.n_sample,
+ force_reannotate=args.force_reannotate,
+ approach="custom" if approach == "side" else approach,
+ custom_approach_direction=None,
+ device=args.device,
+ renderer=args.renderer,
+ headless=True,
+ )
+
+
+def _run_case(
+ sim,
+ robot,
+ motion_gen,
+ initial_qpos,
+ args,
+ obj,
+ base_obj_pose,
+ object_preset: MeshObjectPreset,
+ position_case: PositionCase,
+ approach: str,
+ repeat: int,
+ recorded_count: int,
+ profile: str,
+):
+ """Run one PickUp benchmark case."""
+ from embodichain.lab.sim.atomic_actions import (
+ AtomicActionEngine,
+ GraspTarget,
+ PickUp,
+ PickUpCfg,
+ )
+ from scripts.tutorials.atomic_action.pickup import (
+ build_grasp_generator_cfg,
+ build_gripper_collision_cfg,
+ get_hand_open_close_qpos,
+ initialize_pre_pick_robot_pose,
+ compute_pick_close_end_step,
+ )
+
+ case_id = f"{object_preset.object_type}:{position_case.name}:{approach}:r{repeat}"
+ try:
+ reset_robot(robot, initial_qpos)
+ initial_obj_pose = reset_rigid_object_xy(
+ obj=obj,
+ base_pose=base_obj_pose,
+ xy=position_case.xy,
+ sim=sim,
+ settle_steps=2,
+ )
+ initial_obj_position = object_position_tuple(obj)
+ hand_open, hand_close = get_hand_open_close_qpos(robot, sim.device)
+ initialize_pre_pick_robot_pose(robot, obj, hand_open)
+ case_args = _make_pickup_args(args, approach, object_preset, profile)
+ approach_direction = resolve_pickup_approach_direction(
+ approach, position_case, sim.device
+ )
+ approach_direction_text = format_vector3(
+ pickup_approach_direction_tuple(approach, position_case)
+ )
+ atomic_engine = AtomicActionEngine(motion_generator=motion_gen)
+ atomic_engine.register(
+ PickUp(
+ motion_gen,
+ cfg=PickUpCfg(
+ control_part="arm",
+ hand_control_part="hand",
+ hand_open_qpos=hand_open,
+ hand_close_qpos=hand_close,
+ approach_direction=approach_direction,
+ pre_grasp_distance=0.15,
+ lift_height=0.16,
+ sample_interval=PICK_SAMPLE_INTERVAL,
+ hand_interp_steps=HAND_INTERP_STEPS,
+ ),
+ )
+ )
+ semantics = create_antipodal_object_semantics(
+ obj=obj,
+ preset=object_preset,
+ args=case_args,
+ build_gripper_collision_cfg=build_gripper_collision_cfg,
+ build_grasp_generator_cfg=build_grasp_generator_cfg,
+ )
+ elapsed, mem_delta, peak_gpu, result = timed_call(
+ lambda: atomic_engine.run(
+ steps=[("pick_up", GraspTarget(semantics=semantics))]
+ )
+ )
+ is_success, traj, final_state = result
+ final_obj_position = None
+ object_lift_delta_m = None
+ object_xy_drift_m = None
+
+ if bool(is_success) and getattr(traj, "ndim", 0) >= 3 and traj.shape[1] > 0:
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+ initialize_pre_pick_robot_pose(robot, obj, hand_open)
+ post_grasp_clear_step = compute_pick_close_end_step()
+ should_clear_object_dynamics = True
+
+ def _on_validation_step(waypoint_index: int) -> None:
+ nonlocal should_clear_object_dynamics
+ if (
+ should_clear_object_dynamics
+ and waypoint_index + 1 >= post_grasp_clear_step
+ ):
+ obj.clear_dynamics()
+ should_clear_object_dynamics = False
+
+ final_obj_position = replay_trajectory_for_physical_validation(
+ sim=sim,
+ robot=robot,
+ obj=obj,
+ traj=traj,
+ on_step=_on_validation_step,
+ )
+ if final_obj_position is not None:
+ object_lift_delta_m = final_obj_position[2] - initial_obj_position[2]
+ object_xy_drift_m = (
+ (final_obj_position[0] - initial_obj_position[0]) ** 2
+ + (final_obj_position[1] - initial_obj_position[1]) ** 2
+ ) ** 0.5
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+
+ held_created = bool(is_success and final_state.held_object is not None)
+ physical_pick_success = bool(
+ held_created
+ and object_lift_delta_m is not None
+ and object_lift_delta_m >= PHYSICAL_PICK_MIN_LIFT_M
+ )
+
+ video_path = None
+ if should_record_case(args, recorded_count, physical_pick_success):
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+ initialize_pre_pick_robot_pose(robot, obj, hand_open)
+ video_case_suffix = (
+ f"{object_preset.object_type}_{position_case.name}_"
+ f"{approach}_r{repeat}"
+ )
+ if getattr(traj, "ndim", 0) >= 3 and traj.shape[1] > 0:
+ post_grasp_clear_step = compute_pick_close_end_step()
+ should_clear_object_dynamics = True
+
+ def _on_step(waypoint_index: int) -> None:
+ nonlocal should_clear_object_dynamics
+ if (
+ should_clear_object_dynamics
+ and waypoint_index + 1 >= post_grasp_clear_step
+ ):
+ obj.clear_dynamics()
+ should_clear_object_dynamics = False
+
+ video_path = replay_trajectory_with_recording(
+ sim=sim,
+ robot=robot,
+ traj=traj,
+ args=args,
+ video_path=build_video_output_path(
+ args,
+ "atomic_action_pick_up",
+ video_case_suffix,
+ ),
+ on_step=_on_step,
+ )
+ else:
+ video_path = record_static_scene_video(
+ sim=sim,
+ args=args,
+ video_path=build_video_output_path(
+ args,
+ "atomic_action_pick_up",
+ f"{video_case_suffix}_failed_static",
+ ),
+ )
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+
+ lift_height_m = None
+ if is_success and traj.shape[1] > 0:
+ arm_joint_ids = robot.get_joint_ids(name="arm")
+ start_pose = robot.compute_fk(
+ qpos=traj[:, 0, arm_joint_ids], name="arm", to_matrix=True
+ )[0]
+ final_pose = robot.compute_fk(
+ qpos=traj[:, -1, arm_joint_ids], name="arm", to_matrix=True
+ )[0]
+ lift_height_m = float(final_pose[2, 3] - start_pose[2, 3])
+ success = physical_pick_success
+ if success:
+ failure_reason = ""
+ elif not is_success:
+ failure_reason = "planning_failed"
+ elif not held_created:
+ failure_reason = "held_object_missing"
+ elif object_lift_delta_m is None:
+ failure_reason = "physical_replay_missing"
+ else:
+ failure_reason = "physical_pick_lift_too_low"
+ return {
+ "case_id": case_id,
+ "object_type": object_preset.object_type,
+ "material": object_preset.material_name,
+ "quadrant": position_case.quadrant,
+ "position_case": position_case.name,
+ "init_xy": position_case.xy,
+ "approach": approach,
+ "approach_direction": approach_direction_text,
+ "repeat": repeat,
+ "planning_success": bool(is_success),
+ "held_created": held_created,
+ "physical_pick_success": physical_pick_success,
+ "success": success,
+ "cost_time_ms": elapsed * 1000.0,
+ "cpu_delta_mb": mem_delta["cpu_mb"],
+ "gpu_delta_mb": mem_delta["gpu_mb"],
+ "peak_gpu_mb": peak_gpu,
+ "lift_height_m": lift_height_m,
+ "object_initial_z_m": initial_obj_position[2],
+ "object_final_z_m": (
+ final_obj_position[2] if final_obj_position is not None else None
+ ),
+ "object_lift_delta_m": object_lift_delta_m,
+ "object_xy_drift_m": object_xy_drift_m,
+ "trajectory_waypoints": int(traj.shape[1]) if traj.ndim >= 2 else 0,
+ "failure_reason": failure_reason,
+ "video_path": str(video_path) if video_path is not None else "",
+ }
+ except Exception as exc:
+ video_path = None
+ if should_record_case(args, recorded_count, False):
+ try:
+ reset_robot(robot, initial_qpos)
+ initial_obj_pose = reset_rigid_object_xy(
+ obj=obj,
+ base_pose=base_obj_pose,
+ xy=position_case.xy,
+ sim=sim,
+ settle_steps=2,
+ )
+ video_path = record_static_scene_video(
+ sim=sim,
+ args=args,
+ video_path=build_video_output_path(
+ args,
+ "atomic_action_pick_up",
+ (
+ f"{object_preset.object_type}_{position_case.name}_"
+ f"{approach}_r{repeat}_exception_static"
+ ),
+ ),
+ )
+ reset_rigid_object(obj, initial_obj_pose)
+ except Exception:
+ video_path = None
+ return {
+ "case_id": case_id,
+ "object_type": object_preset.object_type,
+ "material": object_preset.material_name,
+ "quadrant": position_case.quadrant,
+ "position_case": position_case.name,
+ "init_xy": position_case.xy,
+ "approach": approach,
+ "approach_direction": "N/A",
+ "repeat": repeat,
+ "planning_success": False,
+ "held_created": False,
+ "physical_pick_success": False,
+ "success": False,
+ "cost_time_ms": 0.0,
+ "cpu_delta_mb": 0.0,
+ "gpu_delta_mb": 0.0,
+ "peak_gpu_mb": 0.0,
+ "lift_height_m": None,
+ "object_initial_z_m": None,
+ "object_final_z_m": None,
+ "object_lift_delta_m": None,
+ "object_xy_drift_m": None,
+ "trajectory_waypoints": 0,
+ "failure_reason": f"exception:{type(exc).__name__}:{exc}",
+ "video_path": str(video_path) if video_path is not None else "",
+ }
+
+
+def _build_rows(results: list[dict[str, object]]):
+ """Build report rows for PickUp."""
+ perf_rows = []
+ metric_rows = []
+ for result in results:
+ perf_rows.append(
+ {
+ "sample_size": 1,
+ "impl": "pick_up",
+ "case_id": result["case_id"],
+ "object_type": result["object_type"],
+ "material": result["material"],
+ "quadrant": result["quadrant"],
+ "position_case": result["position_case"],
+ "init_xy": f"({result['init_xy'][0]:.3f},{result['init_xy'][1]:.3f})",
+ "approach": result["approach"],
+ "approach_direction": result["approach_direction"],
+ "repeat": result["repeat"],
+ "cost_time_ms": format_float(result["cost_time_ms"]),
+ "cpu_delta_mb": format_float(result["cpu_delta_mb"]),
+ "gpu_delta_mb": format_float(result["gpu_delta_mb"]),
+ "peak_gpu_mb": format_float(result["peak_gpu_mb"]),
+ }
+ )
+ metric_rows.append(
+ {
+ "sample_size": 1,
+ "impl": "pick_up",
+ "case_id": result["case_id"],
+ "object_type": result["object_type"],
+ "material": result["material"],
+ "quadrant": result["quadrant"],
+ "position_case": result["position_case"],
+ "approach": result["approach"],
+ "approach_direction": result["approach_direction"],
+ "success_rate": f"{float(result['success']):.6f}",
+ "planning_success_rate": f"{float(result['planning_success']):.6f}",
+ "held_object_rate": f"{float(result['held_created']):.6f}",
+ "physical_pick_success_rate": (
+ f"{float(result['physical_pick_success']):.6f}"
+ ),
+ "lift_height_m": format_float(result["lift_height_m"]),
+ "object_initial_z_m": format_float(result["object_initial_z_m"]),
+ "object_final_z_m": format_float(result["object_final_z_m"]),
+ "object_lift_delta_m": format_float(result["object_lift_delta_m"]),
+ "object_xy_drift_m": format_float(result["object_xy_drift_m"]),
+ "trajectory_waypoints": result["trajectory_waypoints"],
+ "failure_reason": result["failure_reason"] or "N/A",
+ }
+ )
+ return perf_rows, metric_rows
+
+
+def run_all_benchmarks(args: argparse.Namespace | None = None) -> Path:
+ """Run PickUp benchmark and write a markdown report."""
+ args = _parse_args() if args is None else args
+ if args.repeat < 1:
+ raise ValueError("--repeat must be at least 1.")
+ profile = resolve_profile(args)
+
+ ensure_repo_root()
+ ensure_torch()
+ from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg
+ from embodichain.lab.sim.planners import ToppraPlannerCfg
+ from scripts.tutorials.atomic_action.pickup import (
+ create_robot,
+ initialize_simulation,
+ )
+
+ object_presets = select_mesh_object_presets(args.object_types, profile)
+ position_cases = select_position_cases(args.position_cases, profile)
+ approaches = select_pickup_approaches(args.approach_cases, profile)
+ repeat = 1 if profile == "smoke" else args.repeat
+
+ print("=" * 60)
+ print("PickUp Atomic Action Benchmark")
+ print("=" * 60)
+ print(
+ "Coverage: "
+ f"profile={profile}, {len(object_presets)} object(s) x "
+ f"{len(position_cases)} position(s) x {len(approaches)} approach(es) "
+ f"x {repeat} repeat(s)"
+ )
+
+ sim = initialize_simulation(args)
+ robot = create_robot(sim)
+ initial_qpos = robot.get_qpos().clone()
+ motion_gen = MotionGenerator(
+ cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
+ )
+ object_pool = {}
+ for object_index, object_preset in enumerate(object_presets):
+ obj = create_benchmark_object(
+ sim=sim,
+ preset=object_preset,
+ position_case=position_cases[0],
+ uid_suffix="pool",
+ )
+ base_pose = obj.get_local_pose(to_matrix=True).clone()
+ park_rigid_object(obj, base_pose, index=object_index, sim=sim)
+ object_pool[object_preset.object_type] = (obj, base_pose)
+
+ results: list[dict[str, object]] = []
+ video_paths: list[str] = []
+ print("\n=== PickUp Object/Position/Approach Sweep ===")
+ for object_preset in object_presets:
+ obj, base_pose = object_pool[object_preset.object_type]
+ for parked_index, parked_preset in enumerate(object_presets):
+ if parked_preset.object_type == object_preset.object_type:
+ continue
+ parked_obj, parked_base_pose = object_pool[parked_preset.object_type]
+ park_rigid_object(parked_obj, parked_base_pose, index=parked_index, sim=sim)
+ for position_case in position_cases:
+ for approach in approaches:
+ for repeat_index in range(repeat):
+ result = _run_case(
+ sim,
+ robot,
+ motion_gen,
+ initial_qpos,
+ args,
+ obj,
+ base_pose,
+ object_preset,
+ position_case,
+ approach,
+ repeat_index,
+ len(video_paths),
+ profile,
+ )
+ results.append(result)
+ if result["video_path"]:
+ video_paths.append(str(result["video_path"]))
+ print(
+ f" {result['case_id']:<42} "
+ f"time={result['cost_time_ms']:>10.2f} ms | "
+ f"success={result['success']} "
+ f"lift={format_float(result['lift_height_m'], precision=4)}"
+ )
+
+ perf_rows, metric_rows = _build_rows(results)
+ leaderboard_rows = build_single_action_leaderboard("pick_up", metric_rows)
+ report_path = write_markdown_report(
+ benchmark_name="atomic_action_pick_up",
+ perf_rows=perf_rows,
+ metric_rows=metric_rows,
+ leaderboard_rows=leaderboard_rows,
+ notes=[
+ f"Profile: {profile}",
+ "Object presets: "
+ + ", ".join(describe_object_preset(preset) for preset in object_presets),
+ "Position cases: "
+ + ", ".join(
+ f"{case.name}/{case.quadrant}/xy={case.xy}" for case in position_cases
+ ),
+ "PickUp approach cases: " + ", ".join(approaches),
+ f"CPU memory backend: {CPU_MEMORY_BACKEND}",
+ f"n_sample: {1000 if profile == 'smoke' else args.n_sample}",
+ (
+ "Side grasp opening-axis rule: prefer candidates with "
+ f"abs(opening_axis_z) <= {SIDE_GRASP_MAX_OPEN_AXIS_ABS_Z:.2f}; "
+ "fallback cost += "
+ f"abs(opening_axis_z) * {SIDE_GRASP_OPEN_AXIS_Z_COST_WEIGHT:.2f}."
+ ),
+ (
+ "Physical success rule: planning success, held-object state "
+ f"created, and object lift delta >= {PHYSICAL_PICK_MIN_LIFT_M:.3f} m."
+ ),
+ *summarize_video_recording(args, results, video_paths),
+ ],
+ )
+ print(f"Markdown report saved: {report_path}")
+ return report_path
+
+
+def main() -> None:
+ """Run the CLI entry point."""
+ try:
+ run_all_benchmarks()
+ except RuntimeError as exc:
+ raise SystemExit(str(exc)) from exc
+
+
+if __name__ == "__main__":
+ main()
+
+
+__all__ = ["add_benchmark_args", "run_all_benchmarks"]
diff --git a/scripts/benchmark/atomic_action/place_benchmark.py b/scripts/benchmark/atomic_action/place_benchmark.py
new file mode 100644
index 000000000..6ea98d68d
--- /dev/null
+++ b/scripts/benchmark/atomic_action/place_benchmark.py
@@ -0,0 +1,736 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Benchmark Place atomic action after a PickUp precondition.
+
+Measures Place-only planning latency and memory usage once a held-object state
+has been produced by the PickUp action.
+Run: python -m scripts.benchmark.atomic_action.place_benchmark
+"""
+
+from __future__ import annotations
+
+import argparse
+from dataclasses import dataclass
+from pathlib import Path
+
+from scripts.benchmark.atomic_action.common import (
+ CPU_MEMORY_BACKEND,
+ add_common_benchmark_args,
+ add_grasp_benchmark_args,
+ add_object_position_benchmark_args,
+ add_pickup_approach_benchmark_args,
+ build_single_action_leaderboard,
+ build_video_output_path,
+ create_antipodal_object_semantics,
+ create_benchmark_object,
+ describe_object_preset,
+ ensure_repo_root,
+ ensure_torch,
+ format_float,
+ format_vector3,
+ MeshObjectPreset,
+ object_position_tuple,
+ park_rigid_object,
+ PHYSICAL_PICK_MIN_LIFT_M,
+ PHYSICAL_PLACE_XY_TOLERANCE_M,
+ pickup_approach_direction_tuple,
+ PositionCase,
+ record_static_scene_video,
+ replay_trajectory_for_physical_validation,
+ replay_trajectory_with_recording,
+ reset_rigid_object,
+ reset_rigid_object_xy,
+ reset_robot,
+ resolve_pickup_approach_direction,
+ resolve_profile,
+ select_mesh_object_presets,
+ select_pickup_approaches,
+ select_position_cases,
+ should_record_case,
+ SIDE_GRASP_MAX_OPEN_AXIS_ABS_Z,
+ SIDE_GRASP_OPEN_AXIS_Z_COST_WEIGHT,
+ summarize_video_recording,
+ timed_call,
+ write_markdown_report,
+ xy_distance_m,
+)
+
+
+@dataclass(frozen=True)
+class PlaceCase:
+ """Place target benchmark case."""
+
+ name: str
+ xyz: tuple[float, float, float]
+
+
+PLACE_CASES = {
+ "left_bin": PlaceCase("left_bin", (-0.20, 0.28, 0.10)),
+ "right_bin": PlaceCase("right_bin", (-0.20, -0.28, 0.10)),
+ "center": PlaceCase("center", (-0.35, 0.00, 0.12)),
+}
+PICK_SAMPLE_INTERVAL = 120
+PLACE_SAMPLE_INTERVAL = 120
+HAND_INTERP_STEPS = 12
+PLACE_LIFT_HEIGHT = 0.14
+
+
+def add_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add Place benchmark CLI arguments."""
+ add_pickup_approach_benchmark_args(parser)
+ parser.add_argument(
+ "--place_cases",
+ nargs="+",
+ choices=(*PLACE_CASES.keys(), "all"),
+ default=None,
+ help=(
+ "Place target cases to benchmark. Defaults are selected by "
+ "--profile; use 'all' for every case."
+ ),
+ )
+ add_object_position_benchmark_args(parser)
+ add_grasp_benchmark_args(parser)
+ add_common_benchmark_args(parser)
+
+
+def _parse_args() -> argparse.Namespace:
+ """Parse command line arguments."""
+ parser = argparse.ArgumentParser(
+ description="Benchmark Place after a PickUp precondition."
+ )
+ add_benchmark_args(parser)
+ return parser.parse_args()
+
+
+def _selected_cases(
+ case_names: list[str] | None,
+ profile: str,
+) -> list[PlaceCase]:
+ """Resolve selected place case names."""
+ if not case_names:
+ return [PLACE_CASES["left_bin"]]
+ if "all" in case_names:
+ return list(PLACE_CASES.values())
+ return [PLACE_CASES[name] for name in case_names]
+
+
+def _make_place_pose(device, xyz: tuple[float, float, float]):
+ """Create a top-down place target pose."""
+ torch = ensure_torch()
+ pose = torch.eye(4, dtype=torch.float32, device=device)
+ pose[:3, :3] = torch.tensor(
+ [
+ [-0.0539, -0.9985, -0.0022],
+ [-0.9977, 0.0540, -0.0401],
+ [0.0401, 0.0000, -0.9992],
+ ],
+ dtype=torch.float32,
+ device=device,
+ )
+ pose[:3, 3] = torch.tensor(xyz, dtype=torch.float32, device=device)
+ return pose
+
+
+def _make_pickup_args(
+ args: argparse.Namespace,
+ object_preset: MeshObjectPreset,
+ profile: str,
+) -> argparse.Namespace:
+ """Build a tutorial-compatible argparse namespace."""
+ return argparse.Namespace(
+ object=object_preset.label,
+ n_sample=1000 if profile == "smoke" else args.n_sample,
+ force_reannotate=args.force_reannotate,
+ device=args.device,
+ renderer=args.renderer,
+ headless=True,
+ )
+
+
+def _prepare_held_state(
+ sim,
+ robot,
+ obj,
+ motion_gen,
+ args,
+ object_preset: MeshObjectPreset,
+ position_case: PositionCase,
+ pickup_approach: str,
+ profile: str,
+):
+ """Run PickUp precondition outside the timed Place block."""
+ from embodichain.lab.sim.atomic_actions import (
+ AtomicActionEngine,
+ GraspTarget,
+ PickUp,
+ PickUpCfg,
+ )
+ from scripts.tutorials.atomic_action.place import (
+ build_grasp_generator_cfg,
+ build_gripper_collision_cfg,
+ get_hand_open_close_qpos,
+ initialize_pre_pick_robot_pose,
+ )
+
+ hand_open, hand_close = get_hand_open_close_qpos(robot, sim.device)
+ initialize_pre_pick_robot_pose(robot, obj, hand_open)
+ atomic_engine = AtomicActionEngine(motion_generator=motion_gen)
+ atomic_engine.register(
+ PickUp(
+ motion_gen,
+ cfg=PickUpCfg(
+ control_part="arm",
+ hand_control_part="hand",
+ hand_open_qpos=hand_open,
+ hand_close_qpos=hand_close,
+ approach_direction=resolve_pickup_approach_direction(
+ pickup_approach, position_case, sim.device
+ ),
+ pre_grasp_distance=0.15,
+ lift_height=0.16,
+ sample_interval=PICK_SAMPLE_INTERVAL,
+ hand_interp_steps=HAND_INTERP_STEPS,
+ ),
+ )
+ )
+ semantics = create_antipodal_object_semantics(
+ obj=obj,
+ preset=object_preset,
+ args=_make_pickup_args(args, object_preset, profile),
+ build_gripper_collision_cfg=build_gripper_collision_cfg,
+ build_grasp_generator_cfg=build_grasp_generator_cfg,
+ )
+ is_success, traj, state = atomic_engine.run(
+ steps=[("pick_up", GraspTarget(semantics=semantics))]
+ )
+ if not is_success or state.held_object is None:
+ raise RuntimeError("Failed to prepare held-object state for Place benchmark.")
+ robot.set_qpos(state.last_qpos)
+ return state, hand_open, hand_close, traj
+
+
+def _run_case(
+ sim,
+ robot,
+ motion_gen,
+ initial_qpos,
+ args,
+ obj,
+ base_obj_pose,
+ object_preset: MeshObjectPreset,
+ position_case: PositionCase,
+ pickup_approach: str,
+ case: PlaceCase,
+ repeat: int,
+ recorded_count: int,
+ profile: str,
+):
+ """Run one Place benchmark case."""
+ from embodichain.lab.sim.atomic_actions import (
+ AtomicActionEngine,
+ EndEffectorPoseTarget,
+ Place,
+ PlaceCfg,
+ )
+ from scripts.tutorials.atomic_action.place import (
+ compute_pick_close_end_step,
+ initialize_pre_pick_robot_pose,
+ )
+
+ case_id = (
+ f"{object_preset.object_type}:{position_case.name}:"
+ f"{pickup_approach}:{case.name}:r{repeat}"
+ )
+ try:
+ reset_robot(robot, initial_qpos)
+ initial_obj_pose = reset_rigid_object_xy(
+ obj=obj,
+ base_pose=base_obj_pose,
+ xy=position_case.xy,
+ sim=sim,
+ settle_steps=2,
+ )
+ reset_rigid_object(obj, initial_obj_pose)
+ initial_obj_position = object_position_tuple(obj)
+ state, hand_open, hand_close, precondition_traj = _prepare_held_state(
+ sim,
+ robot,
+ obj,
+ motion_gen,
+ args,
+ object_preset,
+ position_case,
+ pickup_approach,
+ profile,
+ )
+ approach_direction_text = format_vector3(
+ pickup_approach_direction_tuple(pickup_approach, position_case)
+ )
+ precondition_waypoints = int(precondition_traj.shape[1])
+ atomic_engine = AtomicActionEngine(motion_generator=motion_gen)
+ atomic_engine.register(
+ Place(
+ motion_gen,
+ cfg=PlaceCfg(
+ control_part="arm",
+ hand_control_part="hand",
+ hand_open_qpos=hand_open,
+ hand_close_qpos=hand_close,
+ lift_height=PLACE_LIFT_HEIGHT,
+ sample_interval=PLACE_SAMPLE_INTERVAL,
+ hand_interp_steps=HAND_INTERP_STEPS,
+ ),
+ )
+ )
+ place_pose = _make_place_pose(sim.device, case.xyz)
+ elapsed, mem_delta, peak_gpu, result = timed_call(
+ lambda: atomic_engine.run(
+ steps=[("place", EndEffectorPoseTarget(xpos=place_pose))],
+ state=state,
+ )
+ )
+ is_success, traj, final_state = result
+ torch = ensure_torch()
+ precondition_obj_position = None
+ final_obj_position = None
+ object_lift_delta_m = None
+ place_xy_error_m = None
+ place_z_error_m = None
+
+ if (
+ getattr(precondition_traj, "ndim", 0) >= 3
+ and precondition_traj.shape[1] > 0
+ ):
+ if bool(is_success) and getattr(traj, "ndim", 0) >= 3 and traj.shape[1] > 0:
+ validation_traj = torch.cat((precondition_traj, traj), dim=1)
+ else:
+ validation_traj = precondition_traj
+
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+ initialize_pre_pick_robot_pose(robot, obj, hand_open)
+ post_grasp_clear_step = compute_pick_close_end_step()
+ should_clear_object_dynamics = True
+
+ def _on_validation_step(waypoint_index: int) -> None:
+ nonlocal should_clear_object_dynamics, precondition_obj_position
+ if (
+ should_clear_object_dynamics
+ and waypoint_index + 1 >= post_grasp_clear_step
+ ):
+ obj.clear_dynamics()
+ should_clear_object_dynamics = False
+ if (
+ precondition_obj_position is None
+ and waypoint_index + 1 >= precondition_waypoints
+ ):
+ precondition_obj_position = object_position_tuple(obj)
+
+ final_obj_position = replay_trajectory_for_physical_validation(
+ sim=sim,
+ robot=robot,
+ obj=obj,
+ traj=validation_traj,
+ on_step=_on_validation_step,
+ )
+ if precondition_obj_position is not None:
+ object_lift_delta_m = (
+ precondition_obj_position[2] - initial_obj_position[2]
+ )
+ if bool(is_success) and final_obj_position is not None:
+ place_xy_error_m = xy_distance_m(final_obj_position, case.xyz)
+ place_z_error_m = abs(final_obj_position[2] - case.xyz[2])
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+
+ released = bool(is_success and final_state.held_object is None)
+ physical_pick_success = bool(
+ object_lift_delta_m is not None
+ and object_lift_delta_m >= PHYSICAL_PICK_MIN_LIFT_M
+ )
+ physical_place_success = bool(
+ released
+ and physical_pick_success
+ and place_xy_error_m is not None
+ and place_xy_error_m <= PHYSICAL_PLACE_XY_TOLERANCE_M
+ )
+ success = physical_place_success
+ if success:
+ failure_reason = ""
+ elif not is_success:
+ failure_reason = "planning_failed"
+ elif not released:
+ failure_reason = "held_object_not_released"
+ elif not physical_pick_success:
+ failure_reason = "physical_pick_failed"
+ elif place_xy_error_m is None:
+ failure_reason = "physical_replay_missing"
+ else:
+ failure_reason = "place_target_miss"
+
+ video_path = None
+ if should_record_case(args, recorded_count, success):
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+ initialize_pre_pick_robot_pose(robot, obj, hand_open)
+ video_case_suffix = (
+ f"{object_preset.object_type}_{position_case.name}_"
+ f"{pickup_approach}_{case.name}_r{repeat}"
+ )
+ if getattr(traj, "ndim", 0) >= 3 and traj.shape[1] > 0:
+ replay_traj = torch.cat((precondition_traj, traj), dim=1)
+ else:
+ replay_traj = precondition_traj
+
+ if getattr(replay_traj, "ndim", 0) >= 3 and replay_traj.shape[1] > 0:
+ post_grasp_clear_step = compute_pick_close_end_step()
+ should_clear_object_dynamics = True
+
+ def _on_step(waypoint_index: int) -> None:
+ nonlocal should_clear_object_dynamics
+ if (
+ should_clear_object_dynamics
+ and waypoint_index + 1 >= post_grasp_clear_step
+ ):
+ obj.clear_dynamics()
+ should_clear_object_dynamics = False
+
+ video_path = replay_trajectory_with_recording(
+ sim=sim,
+ robot=robot,
+ traj=replay_traj,
+ args=args,
+ video_path=build_video_output_path(
+ args,
+ "atomic_action_place",
+ video_case_suffix,
+ ),
+ on_step=_on_step,
+ )
+ else:
+ video_path = record_static_scene_video(
+ sim=sim,
+ args=args,
+ video_path=build_video_output_path(
+ args,
+ "atomic_action_place",
+ f"{video_case_suffix}_failed_static",
+ ),
+ )
+ reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+
+ return {
+ "case_id": case_id,
+ "object_type": object_preset.object_type,
+ "material": object_preset.material_name,
+ "quadrant": position_case.quadrant,
+ "position_case": position_case.name,
+ "init_xy": position_case.xy,
+ "pickup_approach": pickup_approach,
+ "approach_direction": approach_direction_text,
+ "place_case": case.name,
+ "repeat": repeat,
+ "planning_success": bool(is_success),
+ "released": released,
+ "physical_pick_success": physical_pick_success,
+ "physical_place_success": physical_place_success,
+ "success": success,
+ "cost_time_ms": elapsed * 1000.0,
+ "cpu_delta_mb": mem_delta["cpu_mb"],
+ "gpu_delta_mb": mem_delta["gpu_mb"],
+ "peak_gpu_mb": peak_gpu,
+ "object_lift_delta_m": object_lift_delta_m,
+ "object_final_x_m": (
+ final_obj_position[0] if final_obj_position is not None else None
+ ),
+ "object_final_y_m": (
+ final_obj_position[1] if final_obj_position is not None else None
+ ),
+ "object_final_z_m": (
+ final_obj_position[2] if final_obj_position is not None else None
+ ),
+ "place_xy_error_m": place_xy_error_m,
+ "place_z_error_m": place_z_error_m,
+ "precondition_waypoints": precondition_waypoints,
+ "trajectory_waypoints": int(traj.shape[1]) if traj.ndim >= 2 else 0,
+ "failure_reason": failure_reason,
+ "video_path": str(video_path) if video_path is not None else "",
+ }
+ except Exception as exc:
+ video_path = None
+ if should_record_case(args, recorded_count, False):
+ try:
+ reset_robot(robot, initial_qpos)
+ initial_obj_pose = reset_rigid_object_xy(
+ obj=obj,
+ base_pose=base_obj_pose,
+ xy=position_case.xy,
+ sim=sim,
+ settle_steps=2,
+ )
+ video_path = record_static_scene_video(
+ sim=sim,
+ args=args,
+ video_path=build_video_output_path(
+ args,
+ "atomic_action_place",
+ (
+ f"{object_preset.object_type}_{position_case.name}_"
+ f"{pickup_approach}_{case.name}_r{repeat}"
+ "_exception_static"
+ ),
+ ),
+ )
+ reset_rigid_object(obj, initial_obj_pose)
+ except Exception:
+ video_path = None
+ return {
+ "case_id": case_id,
+ "object_type": object_preset.object_type,
+ "material": object_preset.material_name,
+ "quadrant": position_case.quadrant,
+ "position_case": position_case.name,
+ "init_xy": position_case.xy,
+ "pickup_approach": pickup_approach,
+ "approach_direction": "N/A",
+ "place_case": case.name,
+ "repeat": repeat,
+ "planning_success": False,
+ "released": False,
+ "physical_pick_success": False,
+ "physical_place_success": False,
+ "success": False,
+ "cost_time_ms": 0.0,
+ "cpu_delta_mb": 0.0,
+ "gpu_delta_mb": 0.0,
+ "peak_gpu_mb": 0.0,
+ "object_lift_delta_m": None,
+ "object_final_x_m": None,
+ "object_final_y_m": None,
+ "object_final_z_m": None,
+ "place_xy_error_m": None,
+ "place_z_error_m": None,
+ "precondition_waypoints": 0,
+ "trajectory_waypoints": 0,
+ "failure_reason": f"exception:{type(exc).__name__}:{exc}",
+ "video_path": str(video_path) if video_path is not None else "",
+ }
+
+
+def _build_rows(results: list[dict[str, object]]):
+ """Build report rows for Place."""
+ perf_rows = []
+ metric_rows = []
+ for result in results:
+ perf_rows.append(
+ {
+ "sample_size": 1,
+ "impl": "place",
+ "case_id": result["case_id"],
+ "object_type": result["object_type"],
+ "material": result["material"],
+ "quadrant": result["quadrant"],
+ "position_case": result["position_case"],
+ "init_xy": f"({result['init_xy'][0]:.3f},{result['init_xy'][1]:.3f})",
+ "pickup_approach": result["pickup_approach"],
+ "approach_direction": result["approach_direction"],
+ "place_case": result["place_case"],
+ "repeat": result["repeat"],
+ "cost_time_ms": format_float(result["cost_time_ms"]),
+ "cpu_delta_mb": format_float(result["cpu_delta_mb"]),
+ "gpu_delta_mb": format_float(result["gpu_delta_mb"]),
+ "peak_gpu_mb": format_float(result["peak_gpu_mb"]),
+ }
+ )
+ metric_rows.append(
+ {
+ "sample_size": 1,
+ "impl": "place",
+ "case_id": result["case_id"],
+ "object_type": result["object_type"],
+ "material": result["material"],
+ "quadrant": result["quadrant"],
+ "position_case": result["position_case"],
+ "pickup_approach": result["pickup_approach"],
+ "approach_direction": result["approach_direction"],
+ "place_case": result["place_case"],
+ "success_rate": f"{float(result['success']):.6f}",
+ "planning_success_rate": f"{float(result['planning_success']):.6f}",
+ "release_success_rate": f"{float(result['released']):.6f}",
+ "physical_pick_success_rate": (
+ f"{float(result['physical_pick_success']):.6f}"
+ ),
+ "physical_place_success_rate": (
+ f"{float(result['physical_place_success']):.6f}"
+ ),
+ "object_lift_delta_m": format_float(result["object_lift_delta_m"]),
+ "object_final_x_m": format_float(result["object_final_x_m"]),
+ "object_final_y_m": format_float(result["object_final_y_m"]),
+ "object_final_z_m": format_float(result["object_final_z_m"]),
+ "place_xy_error_m": format_float(result["place_xy_error_m"]),
+ "place_z_error_m": format_float(result["place_z_error_m"]),
+ "precondition_waypoints": result["precondition_waypoints"],
+ "trajectory_waypoints": result["trajectory_waypoints"],
+ "failure_reason": result["failure_reason"] or "N/A",
+ }
+ )
+ return perf_rows, metric_rows
+
+
+def run_all_benchmarks(args: argparse.Namespace | None = None) -> Path:
+ """Run Place benchmark and write a markdown report."""
+ args = _parse_args() if args is None else args
+ if args.repeat < 1:
+ raise ValueError("--repeat must be at least 1.")
+ profile = resolve_profile(args)
+
+ ensure_repo_root()
+ ensure_torch()
+ from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg
+ from embodichain.lab.sim.planners import ToppraPlannerCfg
+ from scripts.tutorials.atomic_action.place import (
+ create_robot,
+ initialize_simulation,
+ )
+
+ object_presets = select_mesh_object_presets(args.object_types, profile)
+ position_cases = select_position_cases(args.position_cases, profile)
+ approaches = select_pickup_approaches(args.approach_cases, profile)
+ cases = _selected_cases(args.place_cases, profile)
+ repeat = 1 if profile == "smoke" else args.repeat
+
+ print("=" * 60)
+ print("Place Atomic Action Benchmark")
+ print("=" * 60)
+ print(
+ "Coverage: "
+ f"profile={profile}, {len(object_presets)} object(s) x "
+ f"{len(position_cases)} position(s) x {len(approaches)} pickup approach(es) "
+ f"x {len(cases)} place target(s) "
+ f"x {repeat} repeat(s)"
+ )
+
+ sim = initialize_simulation(args)
+ robot = create_robot(sim)
+ initial_qpos = robot.get_qpos().clone()
+ motion_gen = MotionGenerator(
+ cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
+ )
+ object_pool = {}
+ for object_index, object_preset in enumerate(object_presets):
+ obj = create_benchmark_object(
+ sim=sim,
+ preset=object_preset,
+ position_case=position_cases[0],
+ uid_suffix="pool",
+ )
+ base_pose = obj.get_local_pose(to_matrix=True).clone()
+ park_rigid_object(obj, base_pose, index=object_index, sim=sim)
+ object_pool[object_preset.object_type] = (obj, base_pose)
+
+ results: list[dict[str, object]] = []
+ video_paths: list[str] = []
+ print("\n=== Place Object/Position/Target Sweep ===")
+ for object_preset in object_presets:
+ obj, base_pose = object_pool[object_preset.object_type]
+ for parked_index, parked_preset in enumerate(object_presets):
+ if parked_preset.object_type == object_preset.object_type:
+ continue
+ parked_obj, parked_base_pose = object_pool[parked_preset.object_type]
+ park_rigid_object(parked_obj, parked_base_pose, index=parked_index, sim=sim)
+ for position_case in position_cases:
+ for pickup_approach in approaches:
+ for case in cases:
+ for repeat_index in range(repeat):
+ result = _run_case(
+ sim,
+ robot,
+ motion_gen,
+ initial_qpos,
+ args,
+ obj,
+ base_pose,
+ object_preset,
+ position_case,
+ pickup_approach,
+ case,
+ repeat_index,
+ len(video_paths),
+ profile,
+ )
+ results.append(result)
+ if result["video_path"]:
+ video_paths.append(str(result["video_path"]))
+ print(
+ f" {result['case_id']:<48} "
+ f"time={result['cost_time_ms']:>10.2f} ms | "
+ f"success={result['success']}"
+ )
+
+ perf_rows, metric_rows = _build_rows(results)
+ leaderboard_rows = build_single_action_leaderboard("place", metric_rows)
+ report_path = write_markdown_report(
+ benchmark_name="atomic_action_place",
+ perf_rows=perf_rows,
+ metric_rows=metric_rows,
+ leaderboard_rows=leaderboard_rows,
+ notes=[
+ "Timed block includes Place only; PickUp precondition is prepared "
+ "outside timing.",
+ f"Profile: {profile}",
+ "Object presets: "
+ + ", ".join(describe_object_preset(preset) for preset in object_presets),
+ "Position cases: "
+ + ", ".join(
+ f"{case.name}/{case.quadrant}/xy={case.xy}" for case in position_cases
+ ),
+ "PickUp approach cases: " + ", ".join(approaches),
+ "Place target cases: " + ", ".join(case.name for case in cases),
+ f"CPU memory backend: {CPU_MEMORY_BACKEND}",
+ f"n_sample: {1000 if profile == 'smoke' else args.n_sample}",
+ (
+ "Side grasp opening-axis rule: prefer candidates with "
+ f"abs(opening_axis_z) <= {SIDE_GRASP_MAX_OPEN_AXIS_ABS_Z:.2f}; "
+ "fallback cost += "
+ f"abs(opening_axis_z) * {SIDE_GRASP_OPEN_AXIS_Z_COST_WEIGHT:.2f}."
+ ),
+ (
+ "Physical success rule: PickUp precondition lift delta >= "
+ f"{PHYSICAL_PICK_MIN_LIFT_M:.3f} m, Place released the object, "
+ f"and final object XY error <= {PHYSICAL_PLACE_XY_TOLERANCE_M:.3f} m."
+ ),
+ *summarize_video_recording(args, results, video_paths),
+ ],
+ )
+ print(f"Markdown report saved: {report_path}")
+ return report_path
+
+
+def main() -> None:
+ """Run the CLI entry point."""
+ try:
+ run_all_benchmarks()
+ except RuntimeError as exc:
+ raise SystemExit(str(exc)) from exc
+
+
+if __name__ == "__main__":
+ main()
+
+
+__all__ = ["add_benchmark_args", "run_all_benchmarks"]
diff --git a/scripts/benchmark/atomic_action/press_benchmark.py b/scripts/benchmark/atomic_action/press_benchmark.py
new file mode 100644
index 000000000..a2c874a73
--- /dev/null
+++ b/scripts/benchmark/atomic_action/press_benchmark.py
@@ -0,0 +1,985 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Benchmark Press atomic action across object presets and start positions.
+
+The benchmark sweeps object presets such as bottle and mug against multiple
+initial XY positions that cover all four workspace quadrants. It reports
+planning latency, memory usage, planning success, and whether the generated
+trajectory reaches the object's top center.
+Run: python -m scripts.benchmark.atomic_action.press_benchmark
+"""
+
+from __future__ import annotations
+
+import argparse
+import math
+import os
+import resource
+import sys
+import time
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+
+from scripts.benchmark.atomic_action.common import (
+ add_profile_benchmark_args,
+ add_video_benchmark_args,
+ build_video_output_path,
+ COVERAGE_POSITION_CASE_NAMES,
+ park_rigid_object,
+ replay_trajectory_with_recording,
+ reset_rigid_object,
+ reset_rigid_object_xy,
+ resolve_profile,
+ should_record_case,
+ SMOKE_POSITION_CASE_NAMES,
+)
+
+try:
+ import psutil
+except ModuleNotFoundError:
+ psutil = None
+
+CPU_MEMORY_BACKEND = "psutil" if psutil is not None else "resource"
+_RUNTIME_IMPORTS_READY = False
+
+# Keep these constants aligned with scripts/tutorials/atomic_action/press.py.
+DEFAULT_PRESS_TOLERANCE = 0.01
+MOVE_SAMPLE_INTERVAL = 60
+PRESS_SAMPLE_INTERVAL = 90
+HAND_INTERP_STEPS = 12
+TABLE_TOP_Z = -0.045
+PRESS_CLEARANCE = 0.13
+PRESS_SURFACE_OFFSET = 0.003
+
+
+def _ensure_runtime_imports() -> None:
+ """Import simulation dependencies only when the benchmark is executed."""
+ global _RUNTIME_IMPORTS_READY
+ if _RUNTIME_IMPORTS_READY:
+ return
+
+ repo_root = Path(__file__).resolve().parents[3]
+ if str(repo_root) not in sys.path:
+ sys.path.insert(0, str(repo_root))
+
+ try:
+ import torch as torch_module
+ from embodichain.lab.sim import SimulationManager as simulation_manager_cls
+ from embodichain.lab.sim.atomic_actions import (
+ AtomicActionEngine as atomic_action_engine_cls,
+ EndEffectorPoseTarget as end_effector_pose_target_cls,
+ MoveEndEffector as move_end_effector_cls,
+ MoveEndEffectorCfg as move_end_effector_cfg_cls,
+ Press as press_cls,
+ PressCfg as press_cfg_cls,
+ )
+ from embodichain.lab.sim.cfg import (
+ RigidBodyAttributesCfg as rigid_body_attributes_cfg_cls,
+ RigidObjectCfg as rigid_object_cfg_cls,
+ )
+ from embodichain.lab.sim.material import (
+ VisualMaterialCfg as visual_material_cfg_cls,
+ )
+ from embodichain.lab.sim.objects import RigidObject as rigid_object_cls
+ from embodichain.lab.sim.objects import Robot as robot_cls
+ from embodichain.lab.sim.planners import (
+ MotionGenerator as motion_generator_cls,
+ MotionGenCfg as motion_gen_cfg_cls,
+ ToppraPlannerCfg as toppra_planner_cfg_cls,
+ )
+ from embodichain.lab.sim.shapes import CubeCfg as cube_cfg_cls
+ from scripts.tutorials.atomic_action.press import (
+ create_robot as create_robot_fn,
+ create_table as create_table_fn,
+ get_hand_close_qpos as get_hand_close_qpos_fn,
+ initialize_simulation as initialize_simulation_fn,
+ make_top_down_eef_pose as make_top_down_eef_pose_fn,
+ settle_object as settle_object_fn,
+ )
+ except ModuleNotFoundError as exc:
+ raise RuntimeError(
+ "Atomic action benchmark requires the EmbodiChain simulation runtime "
+ f"and PyTorch. Missing module: {exc.name}."
+ ) from exc
+
+ globals().update(
+ {
+ "torch": torch_module,
+ "SimulationManager": simulation_manager_cls,
+ "AtomicActionEngine": atomic_action_engine_cls,
+ "EndEffectorPoseTarget": end_effector_pose_target_cls,
+ "MoveEndEffector": move_end_effector_cls,
+ "MoveEndEffectorCfg": move_end_effector_cfg_cls,
+ "Press": press_cls,
+ "PressCfg": press_cfg_cls,
+ "RigidBodyAttributesCfg": rigid_body_attributes_cfg_cls,
+ "RigidObjectCfg": rigid_object_cfg_cls,
+ "VisualMaterialCfg": visual_material_cfg_cls,
+ "RigidObject": rigid_object_cls,
+ "Robot": robot_cls,
+ "MotionGenerator": motion_generator_cls,
+ "MotionGenCfg": motion_gen_cfg_cls,
+ "ToppraPlannerCfg": toppra_planner_cfg_cls,
+ "CubeCfg": cube_cfg_cls,
+ "create_robot": create_robot_fn,
+ "create_table": create_table_fn,
+ "get_hand_close_qpos": get_hand_close_qpos_fn,
+ "initialize_simulation": initialize_simulation_fn,
+ "make_top_down_eef_pose": make_top_down_eef_pose_fn,
+ "settle_object": settle_object_fn,
+ }
+ )
+ _RUNTIME_IMPORTS_READY = True
+
+
+@dataclass(frozen=True)
+class ObjectPreset:
+ """Primitive object preset used by the atomic-action benchmark."""
+
+ object_type: str
+ material_name: str
+ size: tuple[float, float, float]
+ base_color: tuple[float, float, float, float]
+ roughness: float
+ dynamic_friction: float = 0.8
+ static_friction: float = 0.9
+
+
+@dataclass(frozen=True)
+class PositionCase:
+ """Initial object position case with a quadrant label."""
+
+ name: str
+ quadrant: str
+ xy: tuple[float, float]
+
+
+@dataclass(frozen=True)
+class PressCaseResult:
+ """Result for one Press benchmark case."""
+
+ case_id: str
+ object_type: str
+ material_name: str
+ quadrant: str
+ position_case: str
+ init_xy: tuple[float, float]
+ repeat_index: int
+ planning_success: bool
+ center_hit: bool
+ cost_time_ms: float
+ cpu_delta_mb: float
+ gpu_delta_mb: float
+ peak_gpu_mb: float
+ xy_error_m: float | None
+ hit_step: int | None
+ trajectory_waypoints: int
+ failure_reason: str
+ video_path: str = ""
+
+
+OBJECT_PRESETS: dict[str, ObjectPreset] = {
+ "bottle": ObjectPreset(
+ object_type="bottle",
+ material_name="green_plastic",
+ size=(0.06, 0.06, 0.16),
+ base_color=(0.10, 0.45, 0.32, 1.0),
+ roughness=0.55,
+ ),
+ "mug": ObjectPreset(
+ object_type="mug",
+ material_name="ceramic",
+ size=(0.10, 0.08, 0.10),
+ base_color=(0.88, 0.85, 0.78, 1.0),
+ roughness=0.35,
+ ),
+ "wooden_block": ObjectPreset(
+ object_type="wooden_block",
+ material_name="wood",
+ size=(0.12, 0.12, 0.06),
+ base_color=(0.58, 0.32, 0.14, 1.0),
+ roughness=0.85,
+ ),
+}
+
+POSITION_CASES: dict[str, PositionCase] = {
+ "q1_near": PositionCase(name="q1_near", quadrant="q1", xy=(0.02, 0.18)),
+ "q1_far": PositionCase(name="q1_far", quadrant="q1", xy=(0.12, 0.36)),
+ "q2_near": PositionCase(name="q2_near", quadrant="q2", xy=(-0.42, 0.18)),
+ "q2_far": PositionCase(name="q2_far", quadrant="q2", xy=(-0.62, 0.36)),
+ "q3_near": PositionCase(name="q3_near", quadrant="q3", xy=(-0.42, -0.18)),
+ "q3_far": PositionCase(name="q3_far", quadrant="q3", xy=(-0.62, -0.36)),
+ "q4_near": PositionCase(name="q4_near", quadrant="q4", xy=(0.02, -0.18)),
+ "q4_far": PositionCase(name="q4_far", quadrant="q4", xy=(0.12, -0.36)),
+}
+
+DEFAULT_OBJECT_TYPES = ("bottle", "mug")
+FULL_OBJECT_TYPES = tuple(OBJECT_PRESETS.keys())
+SMOKE_OBJECT_TYPES = ("bottle",)
+
+
+def add_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add atomic-action benchmark arguments to an argument parser."""
+ add_profile_benchmark_args(parser)
+ parser.add_argument(
+ "--object_types",
+ nargs="+",
+ choices=(*OBJECT_PRESETS.keys(), "all"),
+ default=None,
+ help=(
+ "Object presets to benchmark. Defaults are selected by --profile; "
+ "use 'all' to include every preset."
+ ),
+ )
+ parser.add_argument(
+ "--position_cases",
+ nargs="+",
+ choices=(*POSITION_CASES.keys(), "all"),
+ default=None,
+ help=(
+ "Initial position cases to benchmark. Defaults are selected by "
+ "--profile; use 'all' for all near/far cases."
+ ),
+ )
+ parser.add_argument(
+ "--repeat",
+ type=int,
+ default=1,
+ help="Number of repeats for every object-position case.",
+ )
+ parser.add_argument(
+ "--smoke",
+ action="store_true",
+ help="Alias for --profile smoke.",
+ )
+ parser.add_argument(
+ "--device",
+ type=str,
+ default="cpu",
+ help="Simulation device, e.g. 'cpu' or 'cuda'.",
+ )
+ parser.add_argument(
+ "--renderer",
+ type=str,
+ choices=("auto", "hybrid", "fast-rt", "rt"),
+ default="auto",
+ help="Renderer backend used by SimulationManager.",
+ )
+ add_video_benchmark_args(parser)
+ parser.add_argument(
+ "--press_tolerance",
+ type=float,
+ default=DEFAULT_PRESS_TOLERANCE,
+ help="XY tolerance in meters for the press-center check.",
+ )
+
+
+def _parse_args() -> argparse.Namespace:
+ """Parse command line arguments for the atomic-action benchmark."""
+ parser = argparse.ArgumentParser(
+ description=(
+ "Benchmark Press atomic action over object presets and initial "
+ "workspace quadrants."
+ )
+ )
+ add_benchmark_args(parser)
+ return parser.parse_args()
+
+
+def _sync_cuda() -> None:
+ """Synchronize CUDA stream when available."""
+ if torch.cuda.is_available():
+ torch.cuda.synchronize()
+
+
+def _reset_peak_gpu_memory() -> None:
+ """Reset PyTorch peak GPU memory stats when CUDA is available."""
+ if torch.cuda.is_available():
+ torch.cuda.reset_peak_memory_stats()
+
+
+def _peak_gpu_memory_mb() -> float:
+ """Return peak GPU memory allocated by PyTorch in MB."""
+ if not torch.cuda.is_available():
+ return 0.0
+ return torch.cuda.max_memory_allocated() / 1024**2
+
+
+def _memory_snapshot() -> dict[str, float]:
+ """Return current process memory usage snapshot in MB."""
+ if psutil is not None:
+ process = psutil.Process(os.getpid())
+ cpu_mb = process.memory_info().rss / 1024**2
+ else:
+ cpu_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0
+ gpu_mb = (
+ torch.cuda.memory_allocated() / 1024**2 if torch.cuda.is_available() else 0.0
+ )
+ return {"cpu_mb": cpu_mb, "gpu_mb": gpu_mb}
+
+
+def _format_float(value: float | None, precision: int = 6) -> str:
+ """Format finite floats for tables and use N/A for missing values."""
+ if value is None or not math.isfinite(value):
+ return "N/A"
+ return f"{value:.{precision}f}"
+
+
+def _format_markdown_table(rows: list[dict[str, object]]) -> list[str]:
+ """Format rows into a markdown table."""
+ if not rows:
+ return ["No data."]
+
+ headers = list(rows[0].keys())
+ lines = [
+ "| " + " | ".join(headers) + " |",
+ "| " + " | ".join(["---"] * len(headers)) + " |",
+ ]
+ for row in rows:
+ lines.append("| " + " | ".join(str(row[h]) for h in headers) + " |")
+ return lines
+
+
+def _write_markdown_report(
+ benchmark_name: str,
+ perf_rows: list[dict[str, object]],
+ metric_rows: list[dict[str, object]],
+ leaderboard_rows: list[dict[str, object]],
+ notes: list[str] | None = None,
+) -> Path:
+ """Write benchmark results into one markdown report file."""
+ output_dir = Path("outputs/benchmarks")
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ report_path = output_dir / f"{benchmark_name}_{timestamp}.md"
+
+ lines: list[str] = [
+ f"# {benchmark_name} Benchmark Report",
+ "",
+ f"Generated at: {datetime.now().isoformat(timespec='seconds')}",
+ "",
+ "## Time & Memory",
+ "",
+ ]
+ lines.extend(_format_markdown_table(perf_rows))
+ lines.extend(["", "## Success & Other Metrics", ""])
+ lines.extend(_format_markdown_table(metric_rows))
+ lines.extend(["", "## Leaderboard", ""])
+ lines.extend(_format_markdown_table(leaderboard_rows))
+
+ if notes:
+ lines.extend(["", "## Notes", ""])
+ lines.extend([f"- {note}" for note in notes])
+
+ report_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+ return report_path
+
+
+def _default_object_types_for_profile(profile: str) -> tuple[str, ...]:
+ """Return default Press primitive object names for a profile."""
+ if profile in ("smoke", "coverage", "full"):
+ return SMOKE_OBJECT_TYPES
+ raise ValueError(f"Unsupported benchmark profile: {profile}")
+
+
+def _select_object_presets(
+ object_types: list[str] | None,
+ profile: str,
+) -> list[ObjectPreset]:
+ """Resolve selected object preset names."""
+ if not object_types:
+ object_types = list(_default_object_types_for_profile(profile))
+ if "all" in object_types:
+ return list(OBJECT_PRESETS.values())
+ return [OBJECT_PRESETS[name] for name in object_types]
+
+
+def _default_position_cases_for_profile(profile: str) -> tuple[str, ...]:
+ """Return default Press position case names for a profile."""
+ if profile == "smoke":
+ return SMOKE_POSITION_CASE_NAMES
+ if profile in ("coverage", "full"):
+ return COVERAGE_POSITION_CASE_NAMES
+ raise ValueError(f"Unsupported benchmark profile: {profile}")
+
+
+def _select_position_cases(
+ position_cases: list[str] | None,
+ profile: str,
+) -> list[PositionCase]:
+ """Resolve selected position case names."""
+ if not position_cases:
+ position_cases = list(_default_position_cases_for_profile(profile))
+ if "all" in position_cases:
+ return list(POSITION_CASES.values())
+ return [POSITION_CASES[name] for name in position_cases]
+
+
+def _create_benchmark_object(
+ sim: SimulationManager,
+ preset: ObjectPreset,
+ position_case: PositionCase,
+ repeat_index: int,
+) -> RigidObject:
+ """Create one static benchmark object at the requested initial position."""
+ init_pos = (
+ position_case.xy[0],
+ position_case.xy[1],
+ TABLE_TOP_Z + 0.5 * preset.size[2],
+ )
+ uid = f"atomic_benchmark_{preset.object_type}_{position_case.name}_{repeat_index}"
+ cfg = RigidObjectCfg(
+ uid=uid,
+ shape=CubeCfg(
+ size=list(preset.size),
+ visual_material=VisualMaterialCfg(
+ uid=f"{preset.object_type}_{preset.material_name}_mat",
+ base_color=list(preset.base_color),
+ roughness=preset.roughness,
+ ),
+ ),
+ body_type="static",
+ attrs=RigidBodyAttributesCfg(
+ dynamic_friction=preset.dynamic_friction,
+ static_friction=preset.static_friction,
+ ),
+ init_pos=init_pos,
+ )
+ return sim.add_rigid_object(cfg=cfg)
+
+
+def _reset_robot(robot: Robot, initial_qpos: torch.Tensor) -> None:
+ """Reset current and target robot qpos to the benchmark initial posture."""
+ for target in (False, True):
+ robot.set_qpos(initial_qpos, target=target)
+ robot.clear_dynamics()
+
+
+def _build_atomic_engine(
+ motion_gen: MotionGenerator,
+ robot: Robot,
+ device: torch.device,
+) -> AtomicActionEngine:
+ """Build a Press benchmark engine with MoveEndEffector pre-positioning."""
+ hand_close = get_hand_close_qpos(robot, device)
+ atomic_engine = AtomicActionEngine(motion_generator=motion_gen)
+ atomic_engine.register(
+ MoveEndEffector(
+ motion_gen,
+ cfg=MoveEndEffectorCfg(
+ control_part="arm",
+ sample_interval=MOVE_SAMPLE_INTERVAL,
+ ),
+ )
+ )
+ atomic_engine.register(
+ Press(
+ motion_gen,
+ cfg=PressCfg(
+ control_part="arm",
+ hand_control_part="hand",
+ hand_close_qpos=hand_close,
+ sample_interval=PRESS_SAMPLE_INTERVAL,
+ hand_interp_steps=HAND_INTERP_STEPS,
+ ),
+ )
+ )
+ return atomic_engine
+
+
+def _make_press_targets(
+ obj: RigidObject,
+ preset: ObjectPreset,
+) -> tuple[torch.Tensor, torch.Tensor]:
+ """Create pre-press and press poses for the object's top center."""
+ obj_pose = obj.get_local_pose(to_matrix=True)
+ object_center = obj_pose[0, :3, 3].clone()
+ object_top_z = object_center[2] + 0.5 * preset.size[2]
+
+ press_position = object_center.clone()
+ press_position[2] = object_top_z + PRESS_SURFACE_OFFSET
+ move_position = press_position.clone()
+ move_position[2] = object_top_z + PRESS_CLEARANCE
+
+ return make_top_down_eef_pose(move_position), make_top_down_eef_pose(press_position)
+
+
+def _compute_press_center_check(
+ robot: Robot,
+ traj: torch.Tensor,
+ obj: RigidObject,
+ object_height: float,
+ tolerance: float,
+) -> tuple[bool, float, int]:
+ """Check whether the planned Press trajectory reaches the object top center."""
+ if traj.numel() == 0:
+ return False, float("inf"), -1
+
+ arm_joint_ids = robot.get_joint_ids(name="arm")
+ n_down = (PRESS_SAMPLE_INTERVAL - HAND_INTERP_STEPS) // 2
+ press_segment_start = MOVE_SAMPLE_INTERVAL + HAND_INTERP_STEPS
+ press_segment_end = min(press_segment_start + n_down, traj.shape[1])
+ arm_traj = traj[:, press_segment_start:press_segment_end, arm_joint_ids]
+ if arm_traj.shape[1] == 0:
+ return False, float("inf"), -1
+
+ fk_pose = torch.stack(
+ [
+ robot.compute_fk(
+ qpos=waypoint.unsqueeze(0),
+ name="arm",
+ to_matrix=True,
+ )[0]
+ for waypoint in arm_traj[0]
+ ],
+ dim=0,
+ )
+
+ obj_pose = obj.get_local_pose(to_matrix=True)
+ object_center = obj_pose[0, :3, 3]
+ object_top_z = object_center[2] + 0.5 * object_height
+ target_xy = object_center[:2]
+ target_z = object_top_z + PRESS_SURFACE_OFFSET
+
+ xy_error = torch.linalg.norm(fk_pose[:, :2, 3] - target_xy, dim=1)
+ z_error = torch.abs(fk_pose[:, 2, 3] - target_z)
+ combined_error = xy_error + z_error
+ best_idx = int(torch.argmin(combined_error).item())
+ best_pos = fk_pose[best_idx, :3, 3]
+ center_error = float(torch.linalg.norm(best_pos[:2] - target_xy).item())
+ return center_error <= tolerance, center_error, press_segment_start + best_idx
+
+
+def _timed_atomic_run(
+ atomic_engine: AtomicActionEngine,
+ move_target: torch.Tensor,
+ press_target: torch.Tensor,
+) -> tuple[float, dict[str, float], float, bool, torch.Tensor]:
+ """Run a timed atomic-action sequence and return timing/memory/results."""
+ _reset_peak_gpu_memory()
+ mem_before = _memory_snapshot()
+ _sync_cuda()
+
+ start = time.perf_counter()
+ is_success, traj, _ = atomic_engine.run(
+ steps=[
+ ("move_end_effector", EndEffectorPoseTarget(xpos=move_target)),
+ ("press", EndEffectorPoseTarget(xpos=press_target)),
+ ]
+ )
+ _sync_cuda()
+ elapsed = time.perf_counter() - start
+
+ mem_after = _memory_snapshot()
+ deltas = {
+ "cpu_mb": mem_after["cpu_mb"] - mem_before["cpu_mb"],
+ "gpu_mb": mem_after["gpu_mb"] - mem_before["gpu_mb"],
+ }
+ return elapsed, deltas, _peak_gpu_memory_mb(), is_success, traj
+
+
+def _run_press_case(
+ sim: SimulationManager,
+ robot: Robot,
+ atomic_engine: AtomicActionEngine,
+ initial_qpos: torch.Tensor,
+ obj: RigidObject,
+ base_obj_pose: torch.Tensor,
+ preset: ObjectPreset,
+ position_case: PositionCase,
+ repeat_index: int,
+ press_tolerance: float,
+ args: argparse.Namespace,
+ recorded_count: int,
+) -> PressCaseResult:
+ """Run one object-position Press benchmark case."""
+ case_id = f"{preset.object_type}:{position_case.name}:r{repeat_index}"
+ try:
+ _reset_robot(robot, initial_qpos)
+ initial_obj_pose = reset_rigid_object_xy(
+ obj=obj,
+ base_pose=base_obj_pose,
+ xy=position_case.xy,
+ sim=sim,
+ settle_steps=2,
+ )
+ move_target, press_target = _make_press_targets(obj, preset)
+
+ elapsed, mem_delta, peak_gpu, planning_success, traj = _timed_atomic_run(
+ atomic_engine=atomic_engine,
+ move_target=move_target,
+ press_target=press_target,
+ )
+ video_path = None
+ if should_record_case(args, recorded_count, bool(planning_success)):
+ _reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+ video_path = replay_trajectory_with_recording(
+ sim=sim,
+ robot=robot,
+ traj=traj,
+ args=args,
+ video_path=build_video_output_path(
+ args,
+ "atomic_action_press",
+ (f"{preset.object_type}_{position_case.name}" f"_r{repeat_index}"),
+ ),
+ )
+ _reset_robot(robot, initial_qpos)
+ reset_rigid_object(obj, initial_obj_pose)
+
+ center_hit = False
+ xy_error_m: float | None = None
+ hit_step: int | None = None
+ failure_reason = ""
+ if planning_success:
+ center_hit, xy_error_m, raw_hit_step = _compute_press_center_check(
+ robot=robot,
+ traj=traj,
+ obj=obj,
+ object_height=preset.size[2],
+ tolerance=press_tolerance,
+ )
+ hit_step = raw_hit_step if raw_hit_step >= 0 else None
+ if not center_hit:
+ failure_reason = "center_miss"
+ else:
+ failure_reason = "planning_failed"
+
+ return PressCaseResult(
+ case_id=case_id,
+ object_type=preset.object_type,
+ material_name=preset.material_name,
+ quadrant=position_case.quadrant,
+ position_case=position_case.name,
+ init_xy=position_case.xy,
+ repeat_index=repeat_index,
+ planning_success=planning_success,
+ center_hit=center_hit,
+ cost_time_ms=elapsed * 1000.0,
+ cpu_delta_mb=mem_delta["cpu_mb"],
+ gpu_delta_mb=mem_delta["gpu_mb"],
+ peak_gpu_mb=peak_gpu,
+ xy_error_m=xy_error_m,
+ hit_step=hit_step,
+ trajectory_waypoints=int(traj.shape[1]) if traj.ndim >= 2 else 0,
+ failure_reason=failure_reason,
+ video_path=str(video_path) if video_path is not None else "",
+ )
+ except Exception as exc:
+ return PressCaseResult(
+ case_id=case_id,
+ object_type=preset.object_type,
+ material_name=preset.material_name,
+ quadrant=position_case.quadrant,
+ position_case=position_case.name,
+ init_xy=position_case.xy,
+ repeat_index=repeat_index,
+ planning_success=False,
+ center_hit=False,
+ cost_time_ms=0.0,
+ cpu_delta_mb=0.0,
+ gpu_delta_mb=0.0,
+ peak_gpu_mb=0.0,
+ xy_error_m=None,
+ hit_step=None,
+ trajectory_waypoints=0,
+ failure_reason=f"exception:{type(exc).__name__}:{exc}",
+ )
+
+
+def _build_perf_rows(results: list[PressCaseResult]) -> list[dict[str, object]]:
+ """Build Time & Memory table rows."""
+ rows: list[dict[str, object]] = []
+ for result in results:
+ rows.append(
+ {
+ "sample_size": 1,
+ "impl": "press",
+ "case_id": result.case_id,
+ "object_type": result.object_type,
+ "material": result.material_name,
+ "quadrant": result.quadrant,
+ "position_case": result.position_case,
+ "init_xy": f"({result.init_xy[0]:.3f},{result.init_xy[1]:.3f})",
+ "repeat": result.repeat_index,
+ "cost_time_ms": _format_float(result.cost_time_ms),
+ "cpu_delta_mb": _format_float(result.cpu_delta_mb),
+ "gpu_delta_mb": _format_float(result.gpu_delta_mb),
+ "peak_gpu_mb": _format_float(result.peak_gpu_mb),
+ }
+ )
+ return rows
+
+
+def _build_metric_rows(results: list[PressCaseResult]) -> list[dict[str, object]]:
+ """Build Success & Other Metrics table rows."""
+ rows: list[dict[str, object]] = []
+ for result in results:
+ overall_success = result.planning_success and result.center_hit
+ rows.append(
+ {
+ "sample_size": 1,
+ "impl": "press",
+ "case_id": result.case_id,
+ "object_type": result.object_type,
+ "material": result.material_name,
+ "quadrant": result.quadrant,
+ "position_case": result.position_case,
+ "success_rate": f"{float(overall_success):.6f}",
+ "planning_success_rate": f"{float(result.planning_success):.6f}",
+ "center_hit_rate": f"{float(result.center_hit):.6f}",
+ "xy_error_m": _format_float(result.xy_error_m),
+ "hit_step": result.hit_step if result.hit_step is not None else "N/A",
+ "trajectory_waypoints": result.trajectory_waypoints,
+ "failure_reason": result.failure_reason or "N/A",
+ }
+ )
+ return rows
+
+
+def _build_leaderboard_rows(results: list[PressCaseResult]) -> list[dict[str, object]]:
+ """Aggregate and rank object-conditioned Press variants by success rate."""
+ aggregate: dict[str, dict[str, float | set[str]]] = {}
+ for result in results:
+ algorithm = f"press:{result.object_type}"
+ if algorithm not in aggregate:
+ aggregate[algorithm] = {
+ "overall_success_sum": 0.0,
+ "planning_success_sum": 0.0,
+ "xy_error_sum": 0.0,
+ "xy_error_count": 0.0,
+ "cost_time_sum": 0.0,
+ "case_count": 0.0,
+ "quadrants": set(),
+ }
+
+ stats = aggregate[algorithm]
+ stats["overall_success_sum"] = float(stats["overall_success_sum"]) + float(
+ result.planning_success and result.center_hit
+ )
+ stats["planning_success_sum"] = float(stats["planning_success_sum"]) + float(
+ result.planning_success
+ )
+ if result.xy_error_m is not None and math.isfinite(result.xy_error_m):
+ stats["xy_error_sum"] = float(stats["xy_error_sum"]) + result.xy_error_m
+ stats["xy_error_count"] = float(stats["xy_error_count"]) + 1.0
+ stats["cost_time_sum"] = float(stats["cost_time_sum"]) + result.cost_time_ms
+ stats["case_count"] = float(stats["case_count"]) + 1.0
+ quadrants = stats["quadrants"]
+ if isinstance(quadrants, set):
+ quadrants.add(result.quadrant)
+
+ ranked = sorted(
+ aggregate.items(),
+ key=lambda item: (
+ float(item[1]["overall_success_sum"])
+ / max(float(item[1]["case_count"]), 1.0),
+ -float(item[1]["cost_time_sum"]) / max(float(item[1]["case_count"]), 1.0),
+ ),
+ reverse=True,
+ )
+
+ rows: list[dict[str, object]] = []
+ for rank, (algorithm, stats) in enumerate(ranked, start=1):
+ case_count = max(float(stats["case_count"]), 1.0)
+ xy_error_count = float(stats["xy_error_count"])
+ avg_xy_error = (
+ float(stats["xy_error_sum"]) / xy_error_count
+ if xy_error_count > 0.0
+ else None
+ )
+ quadrants = stats["quadrants"]
+ quadrant_coverage = (
+ ",".join(sorted(quadrants)) if isinstance(quadrants, set) else ""
+ )
+ rows.append(
+ {
+ "rank": rank,
+ "algorithm": algorithm,
+ "overall_success_rate": (
+ f"{float(stats['overall_success_sum']) / case_count:.2%}"
+ ),
+ "planning_success_rate": (
+ f"{float(stats['planning_success_sum']) / case_count:.2%}"
+ ),
+ "avg_xy_error_m": _format_float(avg_xy_error),
+ "avg_cost_time_ms": _format_float(
+ float(stats["cost_time_sum"]) / case_count
+ ),
+ "evaluated_cases": int(case_count),
+ "quadrant_coverage": quadrant_coverage,
+ }
+ )
+ return rows
+
+
+def _print_case_result(result: PressCaseResult) -> None:
+ """Print one aligned case result line."""
+ overall_success = result.planning_success and result.center_hit
+ print(
+ f" {result.case_id:<28} "
+ f"time={result.cost_time_ms:>10.2f} ms | "
+ f"CPU delta={result.cpu_delta_mb:+.1f} MB "
+ f"GPU delta={result.gpu_delta_mb:+.1f} MB "
+ f"peak GPU={result.peak_gpu_mb:.1f} MB | "
+ f"success={overall_success} "
+ f"xy_error={_format_float(result.xy_error_m, precision=4)}"
+ )
+ if result.failure_reason:
+ print(f" reason={result.failure_reason}")
+
+
+def _build_notes(
+ object_presets: list[ObjectPreset],
+ position_cases: list[PositionCase],
+ repeat: int,
+ video_paths: list[str],
+ profile: str,
+) -> list[str]:
+ """Build report notes with benchmark coverage metadata."""
+ quadrant_counts: dict[str, int] = {}
+ for position_case in position_cases:
+ quadrant_counts[position_case.quadrant] = (
+ quadrant_counts.get(position_case.quadrant, 0) + 1
+ )
+ return [
+ f"Profile: {profile}",
+ "Object presets: "
+ + ", ".join(
+ f"{preset.object_type}/{preset.material_name}/size={preset.size}"
+ for preset in object_presets
+ ),
+ "Position cases per quadrant: "
+ + ", ".join(
+ f"{quadrant}={count}" for quadrant, count in sorted(quadrant_counts.items())
+ ),
+ f"CPU memory backend: {CPU_MEMORY_BACKEND}",
+ f"Repeat per object-position case: {repeat}",
+ "Replay videos: " + (", ".join(video_paths) if video_paths else "disabled"),
+ "success_rate is 1 only when planning succeeds and the Press trajectory "
+ "reaches the object top center.",
+ ]
+
+
+def run_all_benchmarks(args: argparse.Namespace | None = None) -> Path:
+ """Run all atomic-action benchmarks and write the markdown report."""
+ args = _parse_args() if args is None else args
+ if args.repeat < 1:
+ raise ValueError("--repeat must be at least 1.")
+ profile = resolve_profile(args)
+ _ensure_runtime_imports()
+
+ object_presets = _select_object_presets(args.object_types, profile)
+ position_cases = _select_position_cases(args.position_cases, profile)
+ repeat = 1 if profile == "smoke" else args.repeat
+
+ print("=" * 60)
+ print("Atomic Action Press Benchmark")
+ print("=" * 60)
+ print(
+ "Coverage: "
+ f"profile={profile}, {len(object_presets)} object presets x "
+ f"{len(position_cases)} position cases x {repeat} repeat(s)"
+ )
+
+ sim = initialize_simulation(args)
+ robot = create_robot(sim)
+ create_table(sim)
+ initial_qpos = robot.get_qpos().clone()
+ motion_gen = MotionGenerator(
+ cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
+ )
+ atomic_engine = _build_atomic_engine(motion_gen, robot, sim.device)
+ object_pool = {}
+ for object_index, preset in enumerate(object_presets):
+ obj = _create_benchmark_object(sim, preset, position_cases[0], object_index)
+ settle_object(sim, obj, step=2)
+ base_pose = obj.get_local_pose(to_matrix=True).clone()
+ park_rigid_object(obj, base_pose, index=object_index, sim=sim)
+ object_pool[preset.object_type] = (obj, base_pose)
+
+ results: list[PressCaseResult] = []
+ video_paths: list[str] = []
+ print("\n=== Press Object/Position Sweep ===")
+ for preset in object_presets:
+ obj, base_pose = object_pool[preset.object_type]
+ for parked_index, parked_preset in enumerate(object_presets):
+ if parked_preset.object_type == preset.object_type:
+ continue
+ parked_obj, parked_base_pose = object_pool[parked_preset.object_type]
+ park_rigid_object(parked_obj, parked_base_pose, index=parked_index, sim=sim)
+ for position_case in position_cases:
+ for repeat_index in range(repeat):
+ result = _run_press_case(
+ sim=sim,
+ robot=robot,
+ atomic_engine=atomic_engine,
+ initial_qpos=initial_qpos,
+ obj=obj,
+ base_obj_pose=base_pose,
+ preset=preset,
+ position_case=position_case,
+ repeat_index=repeat_index,
+ press_tolerance=args.press_tolerance,
+ args=args,
+ recorded_count=len(video_paths),
+ )
+ results.append(result)
+ if result.video_path:
+ video_paths.append(result.video_path)
+ _print_case_result(result)
+
+ perf_rows = _build_perf_rows(results)
+ metric_rows = _build_metric_rows(results)
+ leaderboard_rows = _build_leaderboard_rows(results)
+ report_path = _write_markdown_report(
+ benchmark_name="atomic_action_press",
+ perf_rows=perf_rows,
+ metric_rows=metric_rows,
+ leaderboard_rows=leaderboard_rows,
+ notes=_build_notes(
+ object_presets,
+ position_cases,
+ repeat,
+ video_paths,
+ profile,
+ ),
+ )
+
+ print("\n" + "=" * 60)
+ print("Benchmarks complete.")
+ print(f"Markdown report saved: {report_path}")
+ print("=" * 60)
+ return report_path
+
+
+def main() -> None:
+ """Run the CLI entry point."""
+ try:
+ run_all_benchmarks()
+ except RuntimeError as exc:
+ raise SystemExit(str(exc)) from exc
+
+
+if __name__ == "__main__":
+ main()
+
+
+__all__ = ["add_benchmark_args", "run_all_benchmarks"]
diff --git a/scripts/benchmark/atomic_action/run_benchmark.py b/scripts/benchmark/atomic_action/run_benchmark.py
new file mode 100644
index 000000000..26c9749b3
--- /dev/null
+++ b/scripts/benchmark/atomic_action/run_benchmark.py
@@ -0,0 +1,340 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Dispatch benchmarks for all atomic actions.
+
+Run a single action benchmark or all action benchmarks in sequence.
+Run: python -m scripts.benchmark.atomic_action.run_benchmark --action press
+"""
+
+from __future__ import annotations
+
+import argparse
+import shlex
+import subprocess
+import sys
+from pathlib import Path
+
+from scripts.benchmark.atomic_action.common import (
+ MESH_OBJECT_PRESETS,
+ PICKUP_APPROACH_CASES,
+ POSITION_CASES,
+ add_profile_benchmark_args,
+ add_video_benchmark_args,
+ resolve_profile,
+)
+
+ACTION_MODULES = {
+ "move_end_effector": "scripts.benchmark.atomic_action.move_end_effector_benchmark",
+ "move_joints": "scripts.benchmark.atomic_action.move_joints_benchmark",
+ "pick_up": "scripts.benchmark.atomic_action.pickup_benchmark",
+ "move_held_object": "scripts.benchmark.atomic_action.move_held_object_benchmark",
+ "place": "scripts.benchmark.atomic_action.place_benchmark",
+ "press": "scripts.benchmark.atomic_action.press_benchmark",
+}
+DEFAULT_ACTIONS = tuple(ACTION_MODULES.keys())
+MESH_OBJECT_ACTIONS = {"pick_up", "move_held_object", "place"}
+PRESS_OBJECT_TYPES = {"bottle", "mug", "wooden_block", "all"}
+MESH_OBJECT_TYPES = {*MESH_OBJECT_PRESETS.keys(), "all"}
+
+
+def add_benchmark_args(parser: argparse.ArgumentParser) -> None:
+ """Add atomic-action aggregate benchmark CLI arguments."""
+ parser.add_argument(
+ "--action",
+ nargs="+",
+ choices=(*ACTION_MODULES.keys(), "all"),
+ default=["press"],
+ help="Atomic action benchmark(s) to run. Use 'all' for every action.",
+ )
+ parser.add_argument(
+ "--smoke",
+ action="store_true",
+ help="Alias for --profile smoke.",
+ )
+ add_profile_benchmark_args(parser)
+ parser.add_argument(
+ "--object_types",
+ nargs="+",
+ default=None,
+ help=(
+ "Optional object presets forwarded to selected object-conditioned "
+ "benchmarks. Values must be valid for each selected action."
+ ),
+ )
+ parser.add_argument(
+ "--position_cases",
+ nargs="+",
+ choices=(*POSITION_CASES.keys(), "all"),
+ default=None,
+ help=(
+ "Optional initial position cases forwarded to selected "
+ "object-conditioned benchmarks."
+ ),
+ )
+ parser.add_argument(
+ "--approach_cases",
+ nargs="+",
+ choices=(*PICKUP_APPROACH_CASES, "all"),
+ default=None,
+ help=(
+ "Optional PickUp approach cases forwarded to pick/place/held-object "
+ "benchmarks."
+ ),
+ )
+ parser.add_argument(
+ "--device",
+ type=str,
+ default="cpu",
+ help="Simulation device forwarded to each selected benchmark.",
+ )
+ parser.add_argument(
+ "--renderer",
+ type=str,
+ choices=("auto", "hybrid", "fast-rt", "rt"),
+ default="auto",
+ help="Renderer backend forwarded to each selected benchmark.",
+ )
+ add_video_benchmark_args(parser)
+ parser.add_argument(
+ "--in_process",
+ action="store_true",
+ help=(
+ "Run selected benchmarks in the current Python process. This is useful "
+ "for debugging but can leave DexSim resources alive across actions."
+ ),
+ )
+
+
+def _parse_args() -> argparse.Namespace:
+ """Parse command line arguments."""
+ parser = argparse.ArgumentParser(
+ description="Run one or more atomic-action benchmarks."
+ )
+ add_benchmark_args(parser)
+ return parser.parse_args()
+
+
+def _selected_actions(actions: list[str]) -> list[str]:
+ """Resolve selected action names."""
+ if "all" in actions:
+ return list(DEFAULT_ACTIONS)
+ return actions
+
+
+def _validate_object_types_for_actions(
+ selected_actions: list[str],
+ object_types: list[str] | None,
+) -> None:
+ """Validate optional object filters against selected action namespaces."""
+ if not object_types:
+ return
+
+ requested = set(object_types)
+ validators: dict[str, set[str]] = {}
+ for action_name in selected_actions:
+ if action_name in MESH_OBJECT_ACTIONS:
+ validators[action_name] = MESH_OBJECT_TYPES
+ elif action_name == "press":
+ validators[action_name] = PRESS_OBJECT_TYPES
+
+ invalid_parts = []
+ for action_name, valid_types in validators.items():
+ invalid = sorted(requested - valid_types)
+ if invalid:
+ valid = ", ".join(sorted(valid_types))
+ invalid_parts.append(
+ f"{action_name}: invalid {invalid}; valid values are {valid}"
+ )
+ if invalid_parts:
+ raise RuntimeError(
+ "--object_types must be valid for every selected object-conditioned "
+ "benchmark. " + " | ".join(invalid_parts)
+ )
+
+
+def _make_child_args(args: argparse.Namespace) -> argparse.Namespace:
+ """Build minimal child benchmark arguments for aggregate dispatch."""
+ profile = resolve_profile(args)
+ return argparse.Namespace(
+ smoke=profile == "smoke",
+ profile=profile,
+ repeat=1,
+ device=args.device,
+ renderer=args.renderer,
+ object_types=args.object_types,
+ position_cases=args.position_cases,
+ press_tolerance=0.01,
+ pose_cases=["all"],
+ sequence_cases=["all"],
+ approach_cases=args.approach_cases,
+ place_cases=None,
+ held_object_cases=None,
+ n_sample=1000 if profile == "smoke" else 10000,
+ force_reannotate=False,
+ record_video=args.record_video,
+ record_failed_video=args.record_failed_video,
+ video_case_limit=args.video_case_limit,
+ video_dir=args.video_dir,
+ video_fps=args.video_fps,
+ video_max_memory=args.video_max_memory,
+ video_width=args.video_width,
+ video_height=args.video_height,
+ video_hold_steps=args.video_hold_steps,
+ )
+
+
+def _make_child_cli_args(args: argparse.Namespace, action_name: str) -> list[str]:
+ """Build CLI arguments forwarded to one isolated benchmark subprocess."""
+ profile = resolve_profile(args)
+ child_args = [
+ "--profile",
+ profile,
+ "--repeat",
+ "1",
+ "--device",
+ args.device,
+ "--renderer",
+ args.renderer,
+ "--video_case_limit",
+ str(args.video_case_limit),
+ "--video_dir",
+ str(args.video_dir),
+ "--video_fps",
+ str(args.video_fps),
+ "--video_max_memory",
+ str(args.video_max_memory),
+ "--video_width",
+ str(args.video_width),
+ "--video_height",
+ str(args.video_height),
+ "--video_hold_steps",
+ str(args.video_hold_steps),
+ ]
+ if action_name in {"pick_up", "move_held_object", "place", "press"}:
+ if args.object_types:
+ child_args.append("--object_types")
+ child_args.extend(args.object_types)
+ if args.position_cases:
+ child_args.append("--position_cases")
+ child_args.extend(args.position_cases)
+ if action_name in {"pick_up", "move_held_object", "place"}:
+ if args.approach_cases:
+ child_args.append("--approach_cases")
+ child_args.extend(args.approach_cases)
+ if args.record_video:
+ child_args.append("--record_video")
+ if args.record_failed_video:
+ child_args.append("--record_failed_video")
+ return child_args
+
+
+def _run_action_subprocess(action_name: str, args: argparse.Namespace) -> Path:
+ """Run one action benchmark in an isolated Python subprocess."""
+ module_name = ACTION_MODULES[action_name]
+ command = [
+ sys.executable,
+ "-m",
+ module_name,
+ *_make_child_cli_args(args, action_name),
+ ]
+ print("\n" + "=" * 60, flush=True)
+ print(
+ f"Running {action_name} benchmark in an isolated subprocess",
+ flush=True,
+ )
+ print(
+ "Command: " + " ".join(shlex.quote(part) for part in command),
+ flush=True,
+ )
+ print("=" * 60, flush=True)
+
+ process = subprocess.Popen(
+ command,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ text=True,
+ encoding="utf-8",
+ errors="replace",
+ bufsize=1,
+ )
+ report_path: Path | None = None
+ if process.stdout is not None:
+ for line in process.stdout:
+ print(line, end="", flush=True)
+ if line.startswith("Markdown report saved:"):
+ report_path = Path(line.split(":", maxsplit=1)[1].strip())
+
+ return_code = process.wait()
+ if return_code != 0:
+ raise RuntimeError(
+ f"{action_name} benchmark subprocess failed with exit code "
+ f"{return_code}."
+ )
+ if report_path is None:
+ raise RuntimeError(
+ f"{action_name} benchmark subprocess finished without reporting "
+ "a markdown report path."
+ )
+ return report_path
+
+
+def _run_in_process_benchmarks(
+ args: argparse.Namespace,
+ selected_actions: list[str],
+) -> list[Path]:
+ """Run selected benchmarks in the current Python process."""
+ reports: list[Path] = []
+ for action_name in selected_actions:
+ module_name = ACTION_MODULES[action_name]
+ module = __import__(module_name, fromlist=["run_all_benchmarks"])
+ child_args = _make_child_args(args)
+ report_path = module.run_all_benchmarks(child_args)
+ reports.append(report_path)
+ return reports
+
+
+def run_all_benchmarks(args: argparse.Namespace | None = None) -> list[Path]:
+ """Run selected atomic-action benchmarks."""
+ args = _parse_args() if args is None else args
+ selected_actions = _selected_actions(args.action)
+ _validate_object_types_for_actions(selected_actions, args.object_types)
+
+ if args.in_process:
+ return _run_in_process_benchmarks(args, selected_actions)
+
+ return [
+ _run_action_subprocess(action_name, args) for action_name in selected_actions
+ ]
+
+
+def main() -> None:
+ """Run the CLI entry point."""
+ try:
+ reports = run_all_benchmarks()
+ except RuntimeError as exc:
+ raise SystemExit(str(exc)) from exc
+ if reports:
+ print("Generated reports:")
+ for report in reports:
+ print(f" {report}")
+
+
+if __name__ == "__main__":
+ main()
+
+
+__all__ = ["add_benchmark_args", "run_all_benchmarks"]
diff --git a/scripts/tutorials/atomic_action/move_end_effector.py b/scripts/tutorials/atomic_action/move_end_effector.py
index 2194be8c6..a15e6484e 100644
--- a/scripts/tutorials/atomic_action/move_end_effector.py
+++ b/scripts/tutorials/atomic_action/move_end_effector.py
@@ -47,11 +47,11 @@
)
from embodichain.lab.sim.objects import Robot
from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
-from embodichain.lab.sim.solvers import PytorchSolverCfg
from embodichain.utils import logger
from scripts.tutorials.atomic_action.tutorial_utils import (
draw_axis_marker,
get_tutorial_window_size,
+ make_ur5_solver_cfg,
start_auto_play_recording,
stop_auto_play_recording,
)
@@ -126,18 +126,7 @@ def create_robot(sim: SimulationManager, position=(0.0, 0.0, 0.0)) -> Robot:
"arm": ["JOINT[0-9]"],
"hand": [GRIPPER_HAND_JOINT_PATTERN],
},
- solver_cfg={
- "arm": PytorchSolverCfg(
- end_link_name="ee_link",
- root_link_name="base_link",
- tcp=[
- [1.0, 0.0, 0.0, 0.0],
- [0.0, 1.0, 0.0, 0.0],
- [0.0, 0.0, 1.0, GRIPPER_TCP_Z],
- [0.0, 0.0, 0.0, 1.0],
- ],
- )
- },
+ solver_cfg={"arm": make_ur5_solver_cfg(GRIPPER_TCP_Z)},
init_qpos=[0.0, -1.57, 1.57, -1.57, -1.57, 0.0, 0.0, 0.0],
init_pos=position,
)
diff --git a/scripts/tutorials/atomic_action/move_held_object.py b/scripts/tutorials/atomic_action/move_held_object.py
index 6873be12f..a26726a75 100644
--- a/scripts/tutorials/atomic_action/move_held_object.py
+++ b/scripts/tutorials/atomic_action/move_held_object.py
@@ -58,7 +58,6 @@
from embodichain.lab.sim.objects import RigidObject, Robot
from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
from embodichain.lab.sim.shapes import MeshCfg
-from embodichain.lab.sim.solvers import PytorchSolverCfg
from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import (
AntipodalSamplerCfg,
GraspGeneratorCfg,
@@ -70,6 +69,7 @@
from scripts.tutorials.atomic_action.tutorial_utils import (
draw_axis_marker,
get_tutorial_window_size,
+ make_ur5_solver_cfg,
start_auto_play_recording,
stop_auto_play_recording,
)
@@ -173,18 +173,7 @@ def create_robot(sim: SimulationManager, position=(0.0, 0.0, 0.0)) -> Robot:
"arm": ["JOINT[0-9]"],
"hand": [GRIPPER_HAND_JOINT_PATTERN],
},
- solver_cfg={
- "arm": PytorchSolverCfg(
- end_link_name="ee_link",
- root_link_name="base_link",
- tcp=[
- [1.0, 0.0, 0.0, 0.0],
- [0.0, 1.0, 0.0, 0.0],
- [0.0, 0.0, 1.0, GRIPPER_TCP_Z],
- [0.0, 0.0, 0.0, 1.0],
- ],
- )
- },
+ solver_cfg={"arm": make_ur5_solver_cfg(GRIPPER_TCP_Z)},
init_qpos=[0.0, -1.57, 1.57, -1.57, -1.57, 0.0, 0.0, 0.0],
init_pos=position,
)
diff --git a/scripts/tutorials/atomic_action/move_joints.py b/scripts/tutorials/atomic_action/move_joints.py
index 32cfc44df..c1146acc1 100644
--- a/scripts/tutorials/atomic_action/move_joints.py
+++ b/scripts/tutorials/atomic_action/move_joints.py
@@ -48,11 +48,11 @@
)
from embodichain.lab.sim.objects import Robot
from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
-from embodichain.lab.sim.solvers import PytorchSolverCfg
from embodichain.utils import logger
from scripts.tutorials.atomic_action.tutorial_utils import (
draw_axis_marker,
get_tutorial_window_size,
+ make_ur5_solver_cfg,
start_auto_play_recording,
stop_auto_play_recording,
)
@@ -127,18 +127,7 @@ def create_robot(sim: SimulationManager, position=(0.0, 0.0, 0.0)) -> Robot:
"arm": ["JOINT[0-9]"],
"hand": [GRIPPER_HAND_JOINT_PATTERN],
},
- solver_cfg={
- "arm": PytorchSolverCfg(
- end_link_name="ee_link",
- root_link_name="base_link",
- tcp=[
- [1.0, 0.0, 0.0, 0.0],
- [0.0, 1.0, 0.0, 0.0],
- [0.0, 0.0, 1.0, GRIPPER_TCP_Z],
- [0.0, 0.0, 0.0, 1.0],
- ],
- )
- },
+ solver_cfg={"arm": make_ur5_solver_cfg(GRIPPER_TCP_Z)},
init_qpos=[0.0, -1.57, 1.57, -1.57, -1.57, 0.0, 0.0, 0.0],
init_pos=position,
)
diff --git a/scripts/tutorials/atomic_action/pickup.py b/scripts/tutorials/atomic_action/pickup.py
index 4edd089a3..369fee5a8 100644
--- a/scripts/tutorials/atomic_action/pickup.py
+++ b/scripts/tutorials/atomic_action/pickup.py
@@ -52,7 +52,6 @@
from embodichain.lab.sim.objects import RigidObject, Robot
from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
from embodichain.lab.sim.shapes import MeshCfg
-from embodichain.lab.sim.solvers import PytorchSolverCfg
from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import (
AntipodalSamplerCfg,
GraspGeneratorCfg,
@@ -64,6 +63,7 @@
from scripts.tutorials.atomic_action.tutorial_utils import (
draw_axis_marker,
get_tutorial_window_size,
+ make_ur5_solver_cfg,
start_auto_play_recording,
stop_auto_play_recording,
)
@@ -195,18 +195,7 @@ def create_robot(sim: SimulationManager, position=(0.0, 0.0, 0.0)) -> Robot:
"arm": ["JOINT[0-9]"],
"hand": [GRIPPER_HAND_JOINT_PATTERN],
},
- solver_cfg={
- "arm": PytorchSolverCfg(
- end_link_name="ee_link",
- root_link_name="base_link",
- tcp=[
- [1.0, 0.0, 0.0, 0.0],
- [0.0, 1.0, 0.0, 0.0],
- [0.0, 0.0, 1.0, GRIPPER_TCP_Z],
- [0.0, 0.0, 0.0, 1.0],
- ],
- )
- },
+ solver_cfg={"arm": make_ur5_solver_cfg(GRIPPER_TCP_Z)},
init_qpos=[0.0, -1.57, 1.57, -1.57, -1.57, 0.0, 0.0, 0.0],
init_pos=position,
)
diff --git a/scripts/tutorials/atomic_action/place.py b/scripts/tutorials/atomic_action/place.py
index f6ec66bcf..946036a5d 100644
--- a/scripts/tutorials/atomic_action/place.py
+++ b/scripts/tutorials/atomic_action/place.py
@@ -55,7 +55,6 @@
from embodichain.lab.sim.objects import RigidObject, Robot
from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
from embodichain.lab.sim.shapes import MeshCfg
-from embodichain.lab.sim.solvers import PytorchSolverCfg
from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import (
AntipodalSamplerCfg,
GraspGeneratorCfg,
@@ -67,6 +66,7 @@
from scripts.tutorials.atomic_action.tutorial_utils import (
draw_axis_marker,
get_tutorial_window_size,
+ make_ur5_solver_cfg,
start_auto_play_recording,
stop_auto_play_recording,
)
@@ -169,18 +169,7 @@ def create_robot(sim: SimulationManager, position=(0.0, 0.0, 0.0)) -> Robot:
"arm": ["JOINT[0-9]"],
"hand": [GRIPPER_HAND_JOINT_PATTERN],
},
- solver_cfg={
- "arm": PytorchSolverCfg(
- end_link_name="ee_link",
- root_link_name="base_link",
- tcp=[
- [1.0, 0.0, 0.0, 0.0],
- [0.0, 1.0, 0.0, 0.0],
- [0.0, 0.0, 1.0, GRIPPER_TCP_Z],
- [0.0, 0.0, 0.0, 1.0],
- ],
- )
- },
+ solver_cfg={"arm": make_ur5_solver_cfg(GRIPPER_TCP_Z)},
init_qpos=[0.0, -1.57, 1.57, -1.57, -1.57, 0.0, 0.0, 0.0],
init_pos=position,
)
diff --git a/scripts/tutorials/atomic_action/press.py b/scripts/tutorials/atomic_action/press.py
new file mode 100644
index 000000000..245239feb
--- /dev/null
+++ b/scripts/tutorials/atomic_action/press.py
@@ -0,0 +1,434 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Demonstrate Press on the center of a regular wooden block."""
+
+from __future__ import annotations
+
+import argparse
+import sys
+import time
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+ sys.path.insert(0, str(_REPO_ROOT))
+
+import torch
+
+from embodichain.data import get_data_path
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
+from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.sim.atomic_actions import (
+ AtomicActionEngine,
+ EndEffectorPoseTarget,
+ MoveEndEffector,
+ MoveEndEffectorCfg,
+ Press,
+ PressCfg,
+)
+from embodichain.lab.sim.cfg import (
+ JointDrivePropertiesCfg,
+ LightCfg,
+ RenderCfg,
+ RigidBodyAttributesCfg,
+ RigidObjectCfg,
+ RobotCfg,
+ URDFCfg,
+)
+from embodichain.lab.sim.material import VisualMaterialCfg
+from embodichain.lab.sim.objects import RigidObject, Robot
+from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
+from embodichain.lab.sim.shapes import CubeCfg
+from embodichain.utils import logger
+from scripts.tutorials.atomic_action.tutorial_utils import (
+ draw_axis_marker,
+ get_tutorial_window_size,
+ make_ur5_solver_cfg,
+ start_auto_play_recording,
+ stop_auto_play_recording,
+)
+
+GRIPPER_URDF_PATH = "DH_PGI_140_80/DH_PGI_140_80.urdf"
+GRIPPER_HAND_JOINT_PATTERN = "GRIPPER_FINGER1_JOINT_1"
+GRIPPER_TCP_Z = 0.15
+
+MOVE_SAMPLE_INTERVAL = 60
+PRESS_SAMPLE_INTERVAL = 90
+HAND_INTERP_STEPS = 12
+POST_TRAJECTORY_STEPS = 180
+TABLE_SIZE = [1.0, 1.4, 0.05]
+TABLE_TOP_Z = -0.045
+BLOCK_SIZE = [0.12, 0.12, 0.06]
+BLOCK_CENTER = [-0.30, -0.12, TABLE_TOP_Z + 0.5 * BLOCK_SIZE[2]]
+PRESS_CLEARANCE = 0.13
+PRESS_SURFACE_OFFSET = 0.003
+DEFAULT_PRESS_TOLERANCE = 0.01
+
+
+def parse_arguments() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(
+ description="Demonstrate Press on the center of a regular wooden block."
+ )
+ add_env_launcher_args_to_parser(parser)
+ parser.add_argument(
+ "--auto_play",
+ action="store_true",
+ help="Run the viewer demo without waiting for keyboard input.",
+ )
+ parser.add_argument(
+ "--debug_state",
+ action="store_true",
+ help="Log the block pose during replay.",
+ )
+ parser.add_argument(
+ "--press_tolerance",
+ type=float,
+ default=DEFAULT_PRESS_TOLERANCE,
+ help="XY tolerance in meters for checking whether press reaches block center.",
+ )
+ parser.add_argument(
+ "--block_pos",
+ type=float,
+ nargs=2,
+ default=BLOCK_CENTER[:2],
+ metavar=("X", "Y"),
+ help="Initial XY position of the wooden block center.",
+ )
+ parser.add_argument(
+ "--no_vis_eef_axis",
+ action="store_true",
+ help="Do not draw the press target coordinate frame before planning.",
+ )
+ return parser.parse_args()
+
+
+def initialize_simulation(args: argparse.Namespace) -> SimulationManager:
+ width, height = get_tutorial_window_size(args)
+ cfg = SimulationManagerCfg(
+ width=width,
+ height=height,
+ headless=True,
+ sim_device=args.device,
+ render_cfg=RenderCfg(renderer=args.renderer),
+ physics_dt=1.0 / 100.0,
+ arena_space=2.5,
+ )
+ sim = SimulationManager(cfg)
+ sim.add_light(
+ cfg=LightCfg(
+ uid="main_light",
+ color=(0.6, 0.6, 0.6),
+ intensity=30.0,
+ init_pos=(1.0, 0.0, 3.0),
+ )
+ )
+ return sim
+
+
+def create_robot(sim: SimulationManager, position=(0.0, 0.0, 0.0)) -> Robot:
+ ur5_urdf_path = get_data_path("UniversalRobots/UR5/UR5.urdf")
+ gripper_urdf_path = get_data_path(GRIPPER_URDF_PATH)
+ cfg = RobotCfg(
+ uid="UR5",
+ urdf_cfg=URDFCfg(
+ components=[
+ {"component_type": "arm", "urdf_path": ur5_urdf_path},
+ {"component_type": "hand", "urdf_path": gripper_urdf_path},
+ ]
+ ),
+ drive_pros=JointDrivePropertiesCfg(
+ stiffness={"JOINT[0-9]": 1e4, GRIPPER_HAND_JOINT_PATTERN: 1e3},
+ damping={"JOINT[0-9]": 1e3, GRIPPER_HAND_JOINT_PATTERN: 1e2},
+ max_effort={"JOINT[0-9]": 1e5, GRIPPER_HAND_JOINT_PATTERN: 1e4},
+ drive_type="force",
+ ),
+ control_parts={
+ "arm": ["JOINT[0-9]"],
+ "hand": [GRIPPER_HAND_JOINT_PATTERN],
+ },
+ solver_cfg={"arm": make_ur5_solver_cfg(GRIPPER_TCP_Z)},
+ init_qpos=[0.0, -1.57, 1.57, -1.57, -1.57, 0.0, 0.0, 0.0],
+ init_pos=position,
+ )
+ return sim.add_robot(cfg=cfg)
+
+
+def create_table(sim: SimulationManager) -> RigidObject:
+ cfg = RigidObjectCfg(
+ uid="table",
+ shape=CubeCfg(size=TABLE_SIZE),
+ body_type="static",
+ attrs=RigidBodyAttributesCfg(
+ dynamic_friction=0.8,
+ static_friction=0.9,
+ ),
+ init_pos=[-0.30, 0.10, TABLE_TOP_Z - 0.5 * TABLE_SIZE[2]],
+ )
+ return sim.add_rigid_object(cfg=cfg)
+
+
+def create_wooden_block(
+ sim: SimulationManager,
+ block_center: list[float],
+) -> RigidObject:
+ cfg = RigidObjectCfg(
+ uid="wooden_block",
+ shape=CubeCfg(
+ size=BLOCK_SIZE,
+ visual_material=VisualMaterialCfg(
+ uid="wooden_block_mat",
+ base_color=[0.58, 0.32, 0.14, 1.0],
+ roughness=0.85,
+ ),
+ ),
+ body_type="static",
+ attrs=RigidBodyAttributesCfg(
+ dynamic_friction=0.8,
+ static_friction=0.9,
+ ),
+ init_pos=block_center,
+ )
+ return sim.add_rigid_object(cfg=cfg)
+
+
+def settle_object(sim: SimulationManager, obj: RigidObject, step: int = 5) -> None:
+ if sim.device.type == "cuda":
+ sim.init_gpu_physics()
+ obj.reset()
+ sim.update(step=step)
+ obj.clear_dynamics()
+
+
+def get_hand_close_qpos(robot: Robot, device: torch.device) -> torch.Tensor:
+ hand_limits = robot.get_qpos_limits(name="hand")[0].to(
+ device=device, dtype=torch.float32
+ )
+ return hand_limits[:, 1]
+
+
+def make_top_down_eef_pose(position: torch.Tensor) -> torch.Tensor:
+ pose = torch.eye(4, dtype=torch.float32, device=position.device)
+ pose[:3, :3] = torch.tensor(
+ [
+ [-0.0539, -0.9985, -0.0022],
+ [-0.9977, 0.0540, -0.0401],
+ [0.0401, 0.0000, -0.9992],
+ ],
+ dtype=torch.float32,
+ device=position.device,
+ )
+ pose[:3, 3] = position
+ return pose
+
+
+def format_tensor(tensor: torch.Tensor) -> str:
+ rounded = (tensor.detach().cpu() * 10000.0).round() / 10000.0
+ return str(rounded.tolist())
+
+
+def log_block_state(block: RigidObject, label: str) -> None:
+ block_pose = block.get_local_pose(to_matrix=True)
+ logger.log_info(
+ f"{label}: pos={format_tensor(block_pose[0, :3, 3])}, "
+ f"z_axis={format_tensor(block_pose[0, :3, 2])}"
+ )
+
+
+def draw_press_target_axis(sim: SimulationManager, press_target: torch.Tensor) -> None:
+ if press_target.dim() == 2:
+ press_target = press_target.unsqueeze(0)
+ draw_axis_marker(sim, "press_target_axis", press_target)
+
+
+def compute_press_center_check(
+ robot: Robot,
+ traj: torch.Tensor,
+ block: RigidObject,
+ tolerance: float,
+) -> tuple[bool, float, int, torch.Tensor, torch.Tensor]:
+ arm_joint_ids = robot.get_joint_ids(name="arm")
+ press_segment_start = MOVE_SAMPLE_INTERVAL + HAND_INTERP_STEPS
+ press_segment_end = MOVE_SAMPLE_INTERVAL + PRESS_SAMPLE_INTERVAL
+ arm_traj = traj[:, press_segment_start:press_segment_end, arm_joint_ids]
+ fk_pose = torch.stack(
+ [
+ robot.compute_fk(
+ qpos=waypoint.unsqueeze(0),
+ name="arm",
+ to_matrix=True,
+ )[0]
+ for waypoint in arm_traj[0]
+ ],
+ dim=0,
+ )
+
+ block_pose = block.get_local_pose(to_matrix=True)
+ block_center = block_pose[0, :3, 3]
+ block_top_z = block_center[2] + 0.5 * BLOCK_SIZE[2]
+ target_xy = block_center[:2]
+ target_z = block_top_z + PRESS_SURFACE_OFFSET
+
+ xy_error = torch.linalg.norm(fk_pose[:, :2, 3] - target_xy, dim=1)
+ z_error = torch.abs(fk_pose[:, 2, 3] - target_z)
+ combined_error = xy_error + z_error
+ best_idx = int(torch.argmin(combined_error).item())
+ best_pos = fk_pose[best_idx, :3, 3]
+ center_error = float(torch.linalg.norm(best_pos[:2] - target_xy).item())
+ return (
+ center_error <= tolerance,
+ center_error,
+ press_segment_start + best_idx,
+ best_pos,
+ torch.tensor(
+ [target_xy[0], target_xy[1], target_z],
+ dtype=torch.float32,
+ device=traj.device,
+ ),
+ )
+
+
+def run_press_demo(args: argparse.Namespace) -> None:
+ sim = initialize_simulation(args)
+ robot = create_robot(sim)
+ create_table(sim)
+ block_center = [
+ args.block_pos[0],
+ args.block_pos[1],
+ TABLE_TOP_Z + 0.5 * BLOCK_SIZE[2],
+ ]
+ block = create_wooden_block(sim, block_center)
+
+ settle_object(sim, block, step=5)
+ motion_gen = MotionGenerator(
+ cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
+ )
+ hand_close = get_hand_close_qpos(robot, sim.device)
+
+ atomic_engine = AtomicActionEngine(motion_generator=motion_gen)
+ atomic_engine.register(
+ MoveEndEffector(
+ motion_gen,
+ cfg=MoveEndEffectorCfg(
+ control_part="arm",
+ sample_interval=MOVE_SAMPLE_INTERVAL,
+ ),
+ )
+ )
+ atomic_engine.register(
+ Press(
+ motion_gen,
+ cfg=PressCfg(
+ control_part="arm",
+ hand_control_part="hand",
+ hand_close_qpos=hand_close,
+ sample_interval=PRESS_SAMPLE_INTERVAL,
+ hand_interp_steps=HAND_INTERP_STEPS,
+ ),
+ )
+ )
+
+ if not args.headless:
+ sim.open_window()
+ if not args.auto_play:
+ input("Inspect the wooden block, then press Enter to plan...")
+
+ block_pose = block.get_local_pose(to_matrix=True)
+ block_top_z = block_pose[0, 2, 3] + 0.5 * BLOCK_SIZE[2]
+ press_position = block_pose[0, :3, 3].clone()
+ press_position[2] = block_top_z + PRESS_SURFACE_OFFSET
+ move_position = press_position.clone()
+ move_position[2] = block_top_z + PRESS_CLEARANCE
+
+ move_target = make_top_down_eef_pose(move_position)
+ press_target = make_top_down_eef_pose(press_position)
+ if not args.no_vis_eef_axis:
+ draw_press_target_axis(sim, press_target)
+
+ logger.log_info("Planning MoveEndEffector -> Press")
+ start_time = time.time()
+ is_success, traj, _ = atomic_engine.run(
+ steps=[
+ ("move_end_effector", EndEffectorPoseTarget(xpos=move_target)),
+ ("press", EndEffectorPoseTarget(xpos=press_target)),
+ ]
+ )
+ cost_time = time.time() - start_time
+ logger.log_info(f"Plan trajectory cost time: {cost_time:.2f} seconds")
+ if not is_success:
+ logger.log_warning("Failed to plan Press demo trajectory.")
+ return
+
+ is_center_hit, center_error, hit_step, hit_pos, expected_pos = (
+ compute_press_center_check(
+ robot=robot,
+ traj=traj,
+ block=block,
+ tolerance=args.press_tolerance,
+ )
+ )
+ logger.log_info(
+ "Press center check: "
+ f"success={is_center_hit}, "
+ f"xy_error={center_error:.4f} m, "
+ f"hit_step={hit_step}, "
+ f"hit_pos={format_tensor(hit_pos)}, "
+ f"expected={format_tensor(expected_pos)}"
+ )
+ if not is_center_hit:
+ logger.log_warning(
+ "Press planned trajectory did not reach the block center within "
+ f"{args.press_tolerance:.4f} m."
+ )
+ return
+
+ if not args.auto_play:
+ input("Press Enter to replay the Press demo...")
+
+ recording_started = start_auto_play_recording(
+ sim, args, video_prefix="press_auto_play"
+ )
+ try:
+ log_stride = max(1, traj.shape[1] // 10)
+ for i in range(traj.shape[1]):
+ robot.set_qpos(traj[:, i, :])
+ sim.update(step=4)
+ if args.debug_state and (i % log_stride == 0 or i == traj.shape[1] - 1):
+ log_block_state(block, f"replay step {i}/{traj.shape[1] - 1}")
+ time.sleep(1e-2)
+
+ logger.log_info("Press returned the end-effector to the starting pose.")
+ final_qpos = traj[:, -1, :]
+ for i in range(POST_TRAJECTORY_STEPS):
+ robot.set_qpos(final_qpos)
+ sim.update(step=2)
+ if args.debug_state and i % max(1, POST_TRAJECTORY_STEPS // 5) == 0:
+ log_block_state(block, f"post step {i}/{POST_TRAJECTORY_STEPS - 1}")
+ time.sleep(1e-2)
+ finally:
+ stop_auto_play_recording(sim, recording_started)
+
+ if not args.auto_play:
+ input("Press Enter to exit the simulation...")
+
+
+def main() -> None:
+ args = parse_arguments()
+ run_press_demo(args)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/tutorials/atomic_action/tutorial_utils.py b/scripts/tutorials/atomic_action/tutorial_utils.py
index c846455cc..ba3a27205 100644
--- a/scripts/tutorials/atomic_action/tutorial_utils.py
+++ b/scripts/tutorials/atomic_action/tutorial_utils.py
@@ -25,6 +25,7 @@
from embodichain.lab.sim import SimulationManager
from embodichain.lab.sim.cfg import MarkerCfg
+from embodichain.lab.sim.solvers import URSolverCfg
RECORD_WIDTH = 640
RECORD_HEIGHT = 480
@@ -41,6 +42,23 @@
DEFAULT_AXIS_SIZE = 0.003
+def make_ur5_solver_cfg(tcp_z: float) -> URSolverCfg:
+ """Create the UR5 arm solver cfg used by atomic-action tutorials."""
+ cfg = URSolverCfg(
+ ur_type="ur5",
+ end_link_name="ee_link",
+ root_link_name="base_link",
+ tcp=[
+ [1.0, 0.0, 0.0, 0.0],
+ [0.0, 1.0, 0.0, 0.0],
+ [0.0, 0.0, 1.0, tcp_z],
+ [0.0, 0.0, 0.0, 1.0],
+ ],
+ )
+ cfg.urdf_path = None
+ return cfg
+
+
def get_tutorial_window_size(args: argparse.Namespace) -> tuple[int, int]:
"""Return the viewer window size used by atomic-action tutorials."""
return VIEWER_WIDTH, VIEWER_HEIGHT
@@ -115,6 +133,7 @@ def draw_axis_marker(
"DEFAULT_AUTO_PLAY_LOOK_AT",
"DEFAULT_AXIS_LEN",
"DEFAULT_AXIS_SIZE",
+ "make_ur5_solver_cfg",
"get_tutorial_window_size",
"start_auto_play_recording",
"stop_auto_play_recording",
diff --git a/tests/sim/atomic_actions/test_actions.py b/tests/sim/atomic_actions/test_actions.py
index fb5a1bf0f..f0b07653a 100644
--- a/tests/sim/atomic_actions/test_actions.py
+++ b/tests/sim/atomic_actions/test_actions.py
@@ -47,6 +47,8 @@
PickUpCfg,
Place,
PlaceCfg,
+ Press,
+ PressCfg,
)
NUM_ENVS = 2
@@ -380,3 +382,57 @@ def test_execute_clears_held_object(self):
assert result.success is True
assert result.trajectory.shape[2] == TOTAL_DOF
assert result.next_state.held_object is None
+
+
+# ---------------------------------------------------------------------------
+# Press
+# ---------------------------------------------------------------------------
+
+
+class TestPressAction:
+ def setup_method(self):
+ self.mg = _make_mock_motion_generator()
+
+ def test_target_type_is_pose_target(self):
+ assert Press.TargetType is EndEffectorPoseTarget
+
+ def test_default_name_is_explicit(self):
+ assert PressCfg(hand_close_qpos=_hand_close()).name == "press"
+
+ def test_execute_closes_hand_and_preserves_held_object(self):
+ cfg = PressCfg(
+ hand_close_qpos=_hand_close(),
+ sample_interval=12,
+ hand_interp_steps=4,
+ )
+ action = Press(self.mg, cfg)
+ sem = ObjectSemantics(
+ affordance=AntipodalAffordance(), geometry={}, label="mug"
+ )
+ held = HeldObjectState(
+ semantics=sem,
+ object_to_eef=torch.eye(4).unsqueeze(0).repeat(NUM_ENVS, 1, 1),
+ grasp_xpos=torch.eye(4).unsqueeze(0).repeat(NUM_ENVS, 1, 1),
+ )
+ start_hand_qpos = torch.full((NUM_ENVS, HAND_DOF), 0.01)
+ last_qpos = torch.cat([torch.zeros(NUM_ENVS, ARM_DOF), start_hand_qpos], dim=1)
+ state = WorldState(last_qpos=last_qpos, held_object=held)
+
+ def interpolate(trajectory, interp_num, device):
+ return trajectory[:, -1:, :].repeat(1, interp_num, 1)
+
+ with patch(
+ "embodichain.lab.sim.atomic_actions.trajectory.interpolate_with_distance",
+ side_effect=interpolate,
+ ):
+ result = action.execute(EndEffectorPoseTarget(xpos=torch.eye(4)), state)
+
+ assert result.success is True
+ assert result.trajectory.shape == (NUM_ENVS, 12, TOTAL_DOF)
+ expected_hand_qpos = _hand_close().unsqueeze(0).repeat(NUM_ENVS, 1)
+ assert torch.allclose(result.trajectory[:, -1, ARM_DOF:], expected_hand_qpos)
+ assert torch.allclose(
+ result.next_state.last_qpos[:, :ARM_DOF],
+ last_qpos[:, :ARM_DOF],
+ )
+ assert result.next_state.held_object is held