From 90216c4f91eb21810b4391b6926fdf4ba2bca605 Mon Sep 17 00:00:00 2001
From: Utkarsh <Utkarsh@Yue-piper-machine.taile660d6.ts.net>
Date: Fri, 7 Nov 2025 20:43:38 -0800
Subject: [PATCH 1/3] task verifier updates

---
 .../isaaclab/data_collection/README.md        |   52 +
 .../isaaclab/data_collection/__init__.py      |    0
 .../data_collection/learning/hybrid_ppo.py    |  533 +++++++++
 .../data_collection/learning/simple_ppo.py    |  331 ++++++
 .../isaaclab/data_collection/ply_to_depth.py  |  190 +++
 .../isaaclab/data_collection/sim_base.py      | 1033 +++++++++++++++++
 .../data_collection/sim_env_factory.py        |  573 +++++++++
 .../data_collection/sim_heuristic_manip.py    |  787 +++++++++++++
 .../sim_preprocess/grasp_generation.py        |  252 ++++
 .../sim_preprocess/grasp_utils.py             |  391 +++++++
 .../sim_preprocess/trajectory_smoothing.py    |  190 +++
 .../sim_preprocess/usd_conversion.py          |  584 ++++++++++
 .../data_collection/sim_trajectory_replay.py  |  143 +++
 .../sim_utils/calibration_utils.py            |   88 ++
 .../data_collection/sim_utils/sim_utils.py    |  257 ++++
 .../sim_utils/transform_utils.py              |   50 +
 16 files changed, 5454 insertions(+)
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/README.md
 create mode 100644 openreal2sim/simulation/isaaclab/data_collection/__init__.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/learning/hybrid_ppo.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/learning/simple_ppo.py
 create mode 100644 openreal2sim/simulation/isaaclab/data_collection/ply_to_depth.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_base.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_env_factory.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_generation.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_utils.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/trajectory_smoothing.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/usd_conversion.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_trajectory_replay.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_utils/calibration_utils.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_utils/sim_utils.py
 create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_utils/transform_utils.py

diff --git a/openreal2sim/simulation/isaaclab/data_collection/README.md b/openreal2sim/simulation/isaaclab/data_collection/README.md
new file mode 100755
index 0000000..199f68d
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/README.md
@@ -0,0 +1,52 @@
+# IsaacLab
+
+## Installation
+
+**Pull the pre-built docker image**
+
+If you want to use a pre-built image from Docker Hub, you can pull it directly:
+   ```bash
+   docker pull ghcr.io/pointscoder/isaaclab:dev
+   docker tag ghcr.io/pointscoder/isaaclab:dev isaaclab:dev
+   ```
+
+**On the host machine and before entering the container**, run
+```
+xhost +local:
+```
+This is to allow the container to access the host's X server for IsaacSim GUI.
+
+Then, launch the container:
+```
+HOST_UID=$(id -u) HOST_GID=$(id -g) docker compose -p "$USER" -f docker/compose.yml up -d isaaclab 
+```
+and enter it:
+```
+HOST_UID=$(id -u) HOST_GID=$(id -g) docker compose -p "$USER" -f docker/compose.yml exec isaaclab bash
+```
+
+## Preprocessing
+
+We need to convert the textured meshes to USD format. This is done by running the following script:
+```
+python openreal2sim/simulation/isaaclab/sim_preprocess/usd_conversion.py
+```
+
+Then, we generate grasp proposals for each object in the scene:
+```
+python openreal2sim/simulation/isaaclab/sim_preprocess/grasp_generation.py
+```
+
+## Running the simulation
+
+We provide heuristic policies using grasping and motion planning:
+```
+python openreal2sim/simulation/isaaclab/sim_heuristic_manip.py
+```
+
+## Replay Robot Trajectories
+
+We can also replay the recorded robot trajectories in IsaacSim:
+```
+python openreal2sim/simulation/isaaclab/sim_replay_trajectories.py --demo_dir <path_to_demo_directory>
+```
\ No newline at end of file
diff --git a/openreal2sim/simulation/isaaclab/data_collection/__init__.py b/openreal2sim/simulation/isaaclab/data_collection/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/openreal2sim/simulation/isaaclab/data_collection/learning/hybrid_ppo.py b/openreal2sim/simulation/isaaclab/data_collection/learning/hybrid_ppo.py
new file mode 100755
index 0000000..a5a8ff0
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/learning/hybrid_ppo.py
@@ -0,0 +1,533 @@
+# learning/ppo_hybrid.py
+# -*- coding: utf-8 -*-
+"""
+Hybrid PPO: Categorical (proposal index at t=0) + Gaussian (offset at t=0) + Gaussian (residual for t>0).
+Action sent to env is always concatenated as:
+    [ residual_6 | proposal_onehot_P | offset_6 ]
+Env consumes:
+    - t == 0: uses (proposal_onehot_P, offset_6); ignores residual_6
+    - t > 0 : uses residual_6; ignores the rest
+"""
+
+from __future__ import annotations
+from pathlib import Path
+from typing import Tuple, Iterable, Dict
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+
+# ========================= Hybrid Actor-Critic =========================
+class HybridActorCritic(nn.Module):
+    """
+    Policy head structure:
+      - proposal_logits: (B, P)          [Categorical at t=0]
+      - offset_mu/std:   (B, P, 6)        [Gaussian at t=0, choose row by sampled idx]
+      - residual_mu/std: (B, 6)           [Gaussian at t>0]
+      - value:           (B,)
+    """
+    def __init__(self, obs_dim: int, num_proposals: int, hidden_sizes: Tuple[int, ...] = (256, 256)):
+        super().__init__()
+        self.P = int(num_proposals)
+
+        # Shared trunk
+        layers = []
+        last = obs_dim
+        for hs in hidden_sizes:
+            layers += [nn.Linear(last, hs), nn.ReLU(inplace=True)]
+            last = hs
+        self.shared = nn.Sequential(*layers)
+
+        # Heads
+        self.proposal_head = nn.Linear(last, self.P)         # logits for Categorical
+        self.offset_mu     = nn.Linear(last, self.P * 6)     # per-proposal 6D mean
+        self.offset_logstd = nn.Parameter(torch.full((self.P, 6), -0.5))  # learnable, proposal-wise
+        self.residual_mu   = nn.Linear(last, 6)              # 6D mean for residual
+        self.residual_logstd = nn.Parameter(torch.full((6,), -0.5))       # learnable
+
+        self.v_head        = nn.Linear(last, 1)
+
+    def forward(self, obs: torch.Tensor) -> Dict[str, torch.Tensor]:
+        """
+        Returns a dict of all distribution params and value.
+        Shapes:
+          - proposal_logits: (B,P)
+          - offset_mu:       (B,P,6)
+          - offset_std:      (P,6)  (broadcast on batch)
+          - residual_mu:     (B,6)
+          - residual_std:    (6,)   (broadcast on batch)
+          - value:           (B,)
+        """
+        x = self.shared(obs)
+        proposal_logits = self.proposal_head(x)                              # (B,P)
+        offset_mu = self.offset_mu(x).view(-1, self.P, 6)                    # (B,P,6)
+        offset_std = torch.exp(self.offset_logstd)                           # (P,6)
+        residual_mu = self.residual_mu(x)                                    # (B,6)
+        residual_std = torch.exp(self.residual_logstd)                       # (6,)
+        value = self.v_head(x).squeeze(-1)
+        return {
+            "proposal_logits": proposal_logits,
+            "offset_mu": offset_mu,
+            "offset_std": offset_std,
+            "residual_mu": residual_mu,
+            "residual_std": residual_std,
+            "value": value,
+        }
+
+    @staticmethod
+    def _one_hot(indices: torch.Tensor, P: int) -> torch.Tensor:
+        """indices: (B,), return onehot (B,P) float32."""
+        B = indices.shape[0]
+        onehot = torch.zeros(B, P, dtype=torch.float32, device=indices.device)
+        onehot.scatter_(1, indices.view(-1, 1), 1.0)
+        return onehot
+
+    def act_sample(self, obs: torch.Tensor, is_step0: bool) -> Dict[str, torch.Tensor]:
+        """
+        Sample action and return everything needed for PPO rollout at this step.
+        At t=0: sample idx ~ Categorical, offset ~ N(mu[idx], std[idx]), build env_action.
+        At t>0: sample residual ~ N(mu, std), build env_action with zeros for the rest.
+        Returns dict with keys:
+          - env_action: (B, 6 + P + 6)
+          - logp:       (B,)
+          - entropy:    (B,)
+          - value:      (B,)
+          - cache: any tensors needed for debugging (optional)
+        """
+        outs = self.forward(obs)
+        P = outs["proposal_logits"].shape[1]
+        B = obs.shape[0]
+
+        if is_step0:
+            # Categorical over proposals
+            cat = torch.distributions.Categorical(logits=outs["proposal_logits"])
+            idx = cat.sample()  # (B,)
+            onehot = self._one_hot(idx, P)  # (B,P)
+
+            # Gather offset distribution of the selected proposal
+            mu_sel = outs["offset_mu"][torch.arange(B, device=obs.device), idx, :]           # (B,6)
+            std_sel = outs["offset_std"][idx, :] if outs["offset_std"].dim() == 2 else outs["offset_std"]  # (B?,6)
+            # outs["offset_std"] shape is (P,6), need to gather then expand to (B,6)
+            std_sel = outs["offset_std"][idx, :]  # (B,6)
+
+            gauss_off = torch.distributions.Normal(mu_sel, std_sel)
+            u_off = gauss_off.rsample()  # (B,6)
+            u_off_tanh = torch.tanh(u_off)  # keep action in [-1,1] space for env’s clamp
+            # Tanh correction
+            logp_off = gauss_off.log_prob(u_off) - torch.log(1.0 - u_off_tanh.pow(2) + 1e-6)
+            logp_off = logp_off.sum(-1)  # (B,)
+
+            logp = cat.log_prob(idx) + logp_off
+
+            # Entropy (encourage exploration at t=0): categorical + offset gaussian entropy
+            ent = cat.entropy() + gauss_off.entropy().sum(-1)
+
+            # Residual part zero at t=0 (env ignores it anyway)
+            residual = torch.zeros(B, 6, dtype=torch.float32, device=obs.device)
+
+            # Build env action: [residual_6 | onehot_P | offset6]
+            env_action = torch.cat([residual, onehot, u_off_tanh], dim=-1)  # (B, 6+P+6)
+
+            return {
+                "env_action": env_action,
+                "logp": logp,
+                "entropy": ent,
+                "value": outs["value"],
+            }
+        else:
+            # Only residual Gaussian matters
+            gauss_res = torch.distributions.Normal(outs["residual_mu"], outs["residual_std"])
+            u_res = gauss_res.rsample()          # (B,6)
+            a_res = torch.tanh(u_res)            # (B,6)
+            logp_res = gauss_res.log_prob(u_res) - torch.log(1.0 - a_res.pow(2) + 1e-6)
+            logp_res = logp_res.sum(-1)          # (B,)
+            ent = gauss_res.entropy().sum(-1)    # (B,)
+
+            # proposal onehot & offset set to zero placeholders
+            P = outs["proposal_logits"].shape[1]
+            onehot = torch.zeros(B, P, dtype=torch.float32, device=obs.device)
+            off6   = torch.zeros(B, 6, dtype=torch.float32, device=obs.device)
+
+            env_action = torch.cat([a_res, onehot, off6], dim=-1)  # (B,6+P+6)
+            return {
+                "env_action": env_action,
+                "logp": logp_res,
+                "entropy": ent,
+                "value": outs["value"],
+            }
+
+    def log_prob_recompute(self, obs: torch.Tensor, env_action: torch.Tensor, is_step0: bool) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Recompute log_prob and entropy under NEW network params for PPO update.
+        env_action: (B, 6+P+6) = [res6 | onehot_P | off6]
+        Returns (logp, entropy) both (B,)
+        """
+        outs = self.forward(obs)
+        B = env_action.shape[0]
+        P = outs["proposal_logits"].shape[1]
+
+        res6  = env_action[:, :6]
+        onehot = env_action[:, 6:6+P]
+        off6 = env_action[:, 6+P:]
+
+        if is_step0:
+            # Recover idx from onehot
+            idx = onehot.argmax(dim=-1)  # (B,)
+
+            # logp for categorical
+            cat = torch.distributions.Categorical(logits=outs["proposal_logits"])
+            logp_cat = cat.log_prob(idx)  # (B,)
+
+            # logp for selected offset
+            mu_sel  = outs["offset_mu"][torch.arange(B, device=obs.device), idx, :]  # (B,6)
+            std_sel = outs["offset_std"][idx, :]                                     # (B,6)
+            gauss_off = torch.distributions.Normal(mu_sel, std_sel)
+            # Convert bounded action back to pre-squash space via atanh
+            a = off6.clamp(-0.999999, 0.999999)
+            atanh_a = 0.5 * (torch.log1p(a + 1e-6) - torch.log1p(-a + 1e-6))
+            logp_off = gauss_off.log_prob(atanh_a) - torch.log(1.0 - a.pow(2) + 1e-6)
+            logp_off = logp_off.sum(-1)
+
+            logp = logp_cat + logp_off
+            ent = cat.entropy() + gauss_off.entropy().sum(-1)
+            return logp, ent
+        else:
+            gauss_res = torch.distributions.Normal(outs["residual_mu"], outs["residual_std"])
+            a = res6.clamp(-0.999999, 0.999999)
+            atanh_a = 0.5 * (torch.log1p(a + 1e-6) - torch.log1p(-a + 1e-6))
+            logp_res = gauss_res.log_prob(atanh_a) - torch.log(1.0 - a.pow(2) + 1e-6)
+            logp_res = logp_res.sum(-1)
+            ent = gauss_res.entropy().sum(-1)
+            return logp_res, ent
+
+
+# ========================= Rollout Buffer (Hybrid) =========================
+class HybridRolloutBuffer:
+    """
+    Stores per-timestep data for PPO with fixed horizon.
+    We keep the merged env_action (for env stepping and PPO recompute),
+    together with scalar logp/value/reward/done/timeout.
+    """
+    def __init__(self, horizon: int, num_envs: int, obs_dim: int, act_dim: int, device: torch.device):
+        self.h, self.n, self.d, self.k = horizon, num_envs, obs_dim, act_dim
+        self.device = device
+        self.reset()
+
+    def reset(self):
+        H, N, D, K, dev = self.h, self.n, self.d, self.k, self.device
+        self.obs      = torch.zeros(H, N, D, device=dev, dtype=torch.float32)
+        self.actions  = torch.zeros(H, N, K, device=dev, dtype=torch.float32)  # [res6|1hotP|off6]
+        self.logp     = torch.zeros(H, N, device=dev, dtype=torch.float32)
+        self.rewards  = torch.zeros(H, N, device=dev, dtype=torch.float32)
+        self.dones    = torch.zeros(H, N, device=dev, dtype=torch.bool)
+        self.values   = torch.zeros(H, N, device=dev, dtype=torch.float32)
+        self.timeouts = torch.zeros(H, N, device=dev, dtype=torch.bool)
+        self.ptr = 0
+
+    def add(self, *, obs, env_action, logp, rew, done, val, t_out):
+        i = self.ptr
+        self.obs[i]      = obs.detach()
+        self.actions[i]  = env_action.detach()
+        self.logp[i]     = logp.detach()
+        self.rewards[i]  = rew.detach().squeeze(-1)
+        self.dones[i]    = done.detach().bool()
+        self.values[i]   = val.detach()
+        self.timeouts[i] = t_out.detach().bool()
+        self.ptr += 1
+
+    @torch.no_grad()
+    def compute_gae(self, last_value: torch.Tensor, gamma=0.99, lam=0.95):
+        H, N = self.h, self.n
+        adv = torch.zeros(H, N, device=self.device, dtype=torch.float32)
+        last_gae = torch.zeros(N, device=self.device, dtype=torch.float32)
+        for t in reversed(range(H)):
+            if t == H - 1:
+                next_nonterminal = (~self.dones[t] | self.timeouts[t]).float()
+                next_values = last_value
+            else:
+                next_nonterminal = (~self.dones[t + 1] | self.timeouts[t + 1]).float()
+                next_values = self.values[t + 1]
+            delta = self.rewards[t] + gamma * next_values * next_nonterminal - self.values[t]
+            last_gae = delta + gamma * lam * next_nonterminal * last_gae
+            adv[t] = last_gae
+        ret = adv + self.values
+        return adv, ret
+
+    def iterate_minibatches(self, batch_size: int, num_epochs: int) -> Iterable:
+        H, N = self.h, self.n
+        total = H * N
+        obs   = self.obs.view(total, -1)
+        acts  = self.actions.view(total, -1)
+        logp  = self.logp.view(total)
+        values= self.values.view(total)
+
+        def idx_batches():
+            idx = torch.randperm(total, device=self.device)
+            for i in range(0, total, batch_size):
+                j = idx[i: i + batch_size]
+                yield j
+
+        for _ in range(num_epochs):
+            for j in idx_batches():
+                yield j, obs[j], acts[j], logp[j], values[j]
+
+
+# ========================= Helpers =========================
+def _as_tensor(x, device: torch.device, dtype: torch.dtype | None = None) -> torch.Tensor:
+    if isinstance(x, torch.Tensor):
+        if dtype is not None and x.dtype != dtype:
+            x = x.to(dtype)
+        return x.to(device)
+    return torch.as_tensor(x, device=device, dtype=dtype)
+
+@torch.no_grad()
+def _reset_env(env, device: torch.device):
+    rst = env.reset()
+    obs = rst["obs"]
+    return _as_tensor(obs, device=device, dtype=torch.float32)
+
+def _choose_minibatch_size(total_batch: int, num_envs: int) -> int:
+    mb = max(num_envs * 2, 32)
+    while total_batch % mb != 0 and mb > 8:
+        mb //= 2
+    if total_batch % mb != 0:
+        mb = num_envs
+    return mb
+
+
+@torch.no_grad()
+def _partial_reset_and_merge_obs(env, next_obs: torch.Tensor, done_t: torch.Tensor, device: torch.device):
+    """
+    If any env is done:
+      1) Try env.reset_done(done_mask) which returns {"obs": (M,D)} or (N,D)
+      2) Else try env.reset(env_ids=idxs)
+      3) Else fallback to global reset()
+    Then merge the reset obs into next_obs only for those done envs and return merged tensor.
+    """
+    if not bool(done_t.any()):
+        return next_obs  # nothing to do
+
+    idxs = torch.nonzero(done_t, as_tuple=False).squeeze(-1)  # (M,)
+    M = int(idxs.numel())
+    if M == 0:
+        return next_obs
+
+    rst = env.reset(env_ids=idxs)
+    new_obs = _as_tensor(rst["obs"], device=device, dtype=torch.float32)
+    if new_obs.shape[0] == next_obs.shape[0]:
+        return new_obs
+    merged = next_obs.clone()
+    merged[idxs] = new_obs
+    return merged
+
+# ========================= PPO Train (Hybrid) =========================
+def ppo_train_hybrid(
+    *,
+    env,
+    net: HybridActorCritic,
+    optimizer: optim.Optimizer,
+    device: torch.device,
+    total_epochs: int,
+    horizon: int,
+    mini_epochs: int,
+    batch_size_hint: int,
+    clip_eps: float,
+    value_coef: float,
+    entropy_coef: float,
+    gamma: float,
+    gae_lambda: float,
+    run_dir: Path,
+    save_every: int = 50,
+):
+    """
+    Hybrid PPO training loop:
+      - t=0 uses Categorical(index) + Gaussian(offset)
+      - t>0 uses Gaussian(residual)
+    Assumptions about env:
+      - action_space.shape[0] == (6 + P + 6)
+      - step() consumes the onehot & offset at t=0, and residual at t>0
+    """
+    from torch.utils.tensorboard import SummaryWriter
+
+    print("#####################################horizon =", horizon)
+
+    run_dir.mkdir(parents=True, exist_ok=True)
+    ckpt_best = run_dir / "best.pt"
+
+    num_envs = getattr(env, "num_envs", None)
+    if num_envs is None:
+        raise ValueError("env.num_envs is required for batch sizing.")
+    obs_dim = env.observation_space.shape[0]
+    act_dim = env.action_space.shape[0]  # 6 + P + 6
+
+    writer = SummaryWriter(log_dir=run_dir)
+
+    print(f"[INFO][Hybrid PPO] epochs={total_epochs}, horizon={horizon}, num_envs={num_envs}, "
+          f"obs_dim={obs_dim}, act_dim={act_dim}")
+    buffer = HybridRolloutBuffer(horizon=horizon, num_envs=num_envs, obs_dim=obs_dim, act_dim=act_dim, device=device)
+
+    total_batch = horizon * num_envs if batch_size_hint <= 0 else int(batch_size_hint)
+    minibatch_size = _choose_minibatch_size(total_batch, num_envs)
+
+    # initial reset
+    obs = _reset_env(env, device)
+    best_mean_return = -1e9
+
+    for epoch in range(1, total_epochs + 1):
+        buffer.reset()
+
+        ep_return_per_env = torch.zeros(num_envs, device=device, dtype=torch.float32)
+        finished_episode_returns = []  # Python list[float]
+        ep_track_return_per_env = torch.zeros(num_envs, device=device, dtype=torch.float32)
+        finished_episode_track_returns = []  # Python list[float]
+
+        # ===== rollout =====
+        for t in range(horizon):
+            is_step0 = (t == 0)  # because your env uses fixed horizon episodes
+            out = net.act_sample(obs, is_step0=is_step0)
+            env_action = out["env_action"].detach()
+            logp = out["logp"].detach()
+            val  = out["value"].detach()
+
+            step_out = env.step(env_action)  # action shape (N, 6+P+6)
+            next_obs_any, reward_any, done_any, infos = step_out[0]["obs"], step_out[1], step_out[2], step_out[3]
+
+            next_obs = _as_tensor(next_obs_any, device=device, dtype=torch.float32)
+            rew_t    = _as_tensor(reward_any,  device=device, dtype=torch.float32)
+            done_t   = _as_tensor(done_any,    device=device, dtype=torch.bool)
+            t_out    = _as_tensor(infos.get("time_outs", torch.zeros_like(done_t)), device=device, dtype=torch.bool)
+
+            buffer.add(obs=obs, env_action=env_action, logp=logp, rew=rew_t, done=done_t, val=val, t_out=t_out)
+
+            # episode return bookkeeping
+            r_track_t = _as_tensor(infos["r_track"], device=device, dtype=torch.float32).view_as(rew_t)
+            ep_return_per_env += rew_t.squeeze(1)                  # (N,)
+            ep_track_return_per_env += r_track_t.squeeze(1)        # (N,)
+
+            done_or_timeout = (done_t | t_out)          # (N,)
+            if bool(done_or_timeout.any()):
+                finished_episode_returns.extend(
+                    ep_return_per_env[done_or_timeout].detach().cpu().tolist()
+                )
+                finished_episode_track_returns.extend(
+                    ep_track_return_per_env[done_or_timeout].detach().cpu().tolist()
+                )
+                ep_return_per_env[done_or_timeout] = 0.0
+                ep_track_return_per_env[done_or_timeout] = 0.0
+
+            # reset env if done or timeout
+            next_obs = _partial_reset_and_merge_obs(env, next_obs, done_t, device)
+            obs = next_obs
+
+        # ===== bootstrap value =====
+        with torch.no_grad():
+            last_v = net.forward(obs)["value"]  # (N,)
+
+        # ===== GAE / returns =====
+        adv, ret = buffer.compute_gae(last_value=last_v.detach(), gamma=gamma, lam=gae_lambda)
+
+        # normalize advantage
+        adv_flat = adv.view(-1)
+        ret_flat = ret.view(-1)
+        adv_flat = (adv_flat - torch.mean(adv_flat)) / (torch.std(adv_flat) + 1e-8)
+
+        # ===== PPO updates =====
+        policy_loss_epoch = 0.0
+        value_loss_epoch  = 0.0
+        entropy_epoch     = 0.0
+        num_updates       = 0
+
+        for idx, obs_b, act_b, old_logp_b, old_v_b in buffer.iterate_minibatches(minibatch_size, mini_epochs):
+            # Figure out which rows are step0 in this flattened view:
+            # Original layout is (H,N) -> flattened to (H*N,)
+            # Rows [0*N : 1*N) correspond to t=0
+            H, N = buffer.h, buffer.n
+            is_step0_mask = (idx // N) == 0  # boolean per-sample
+
+            # Recompute logp & entropy under NEW params (mixture by timestep type)
+            logp_new = torch.empty_like(old_logp_b)
+            ent_new  = torch.empty_like(old_logp_b)
+
+            if bool(is_step0_mask.any()):
+                mask = is_step0_mask
+                lp0, en0 = net.log_prob_recompute(obs_b[mask], act_b[mask], is_step0=True)
+                logp_new[mask] = lp0
+                ent_new[mask]  = en0
+            if bool((~is_step0_mask).any()):
+                mask = ~is_step0_mask
+                lp1, en1 = net.log_prob_recompute(obs_b[mask], act_b[mask], is_step0=False)
+                logp_new[mask] = lp1
+                ent_new[mask]  = en1
+
+            ratio = torch.exp(logp_new - old_logp_b)
+
+            adv_b = adv_flat[idx]
+            ret_b = ret_flat[idx]
+
+            surrogate1 = ratio * adv_b
+            surrogate2 = torch.clamp(ratio, 1.0 - clip_eps, 1.0 + clip_eps) * adv_b
+            policy_loss = -torch.min(surrogate1, surrogate2).mean()
+
+            # Clipped value loss (same as your original)
+            # Re-run value head:
+            v_b = net.forward(obs_b)["value"]
+            value_clipped = old_v_b + (v_b - old_v_b).clamp(-clip_eps, clip_eps)
+            value_losses = (v_b - ret_b).pow(2)
+            value_losses_clipped = (value_clipped - ret_b).pow(2)
+            value_loss = 0.5 * torch.max(value_losses, value_losses_clipped).mean()
+
+            loss = policy_loss + value_coef * value_loss - entropy_coef * ent_new.mean()
+
+            optimizer.zero_grad(set_to_none=True)
+            loss.backward()
+            nn.utils.clip_grad_norm_(net.parameters(), 0.5)
+            optimizer.step()
+
+            policy_loss_epoch += float(policy_loss.detach())
+            value_loss_epoch  += float(value_loss.detach())
+            entropy_epoch     += float(ent_new.mean().detach())
+            num_updates       += 1
+
+        mean_return = float(ret.mean().detach())
+        print(f"[E{epoch:04d}] return={mean_return:7.3f}  "
+              f"pi={policy_loss_epoch/max(1,num_updates):7.4f}  "
+              f"v={value_loss_epoch/max(1,num_updates):7.4f}  "
+              f"H={entropy_epoch/max(1,num_updates):7.4f}")
+
+        # Explained variance
+        ev = float(1.0 - torch.var(ret_flat - buffer.values.view(-1)) / (torch.var(ret_flat) + 1e-12))
+
+        # TB logs
+        writer.add_scalar("return_mean", mean_return, epoch)
+        writer.add_scalar("policy_loss", policy_loss_epoch / max(1, num_updates), epoch)
+        writer.add_scalar("value_loss", value_loss_epoch / max(1, num_updates), epoch)
+        writer.add_scalar("entropy", entropy_epoch / max(1, num_updates), epoch)
+        writer.add_scalar("explained_variance", ev, epoch)
+        if len(finished_episode_returns) > 0:
+            mean_ep_return = float(sum(finished_episode_returns) / len(finished_episode_returns))
+            writer.add_scalar("episode_return_mean", mean_ep_return, epoch)
+        if len(finished_episode_track_returns) > 0:
+            mean_ep_track_return = float(sum(finished_episode_track_returns) / len(finished_episode_track_returns))
+            writer.add_scalar("episode_tracking_return_mean", mean_ep_track_return, epoch)
+
+        # Save best & periodic
+        if mean_return > best_mean_return:
+            best_mean_return = mean_return
+            torch.save({
+                "model": net.state_dict(),
+                "optimizer": optimizer.state_dict(),
+                "epoch": epoch,
+                "best_return": best_mean_return
+            }, ckpt_best)
+
+        if (save_every > 0) and (epoch % save_every == 0):
+            ckpt_path = run_dir / f"epoch_{epoch:04d}_ret_{mean_return:.2f}.pt"
+            torch.save({
+                "model": net.state_dict(),
+                "optimizer": optimizer.state_dict(),
+                "epoch": epoch,
+                "return": mean_return
+            }, ckpt_path)
+
+    return {"best_mean_return": best_mean_return, "ckpt_best": ckpt_best}
diff --git a/openreal2sim/simulation/isaaclab/data_collection/learning/simple_ppo.py b/openreal2sim/simulation/isaaclab/data_collection/learning/simple_ppo.py
new file mode 100755
index 0000000..0cfbf76
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/learning/simple_ppo.py
@@ -0,0 +1,331 @@
+# ppo_utils.py
+from __future__ import annotations
+from pathlib import Path
+from typing import Tuple, Iterable
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.tensorboard import SummaryWriter
+
+# ========================= Actor-Critic =========================
+class ActorCritic(nn.Module):
+    """Simple MLP actor-critic. Actor outputs mean in pre-squash space; action = tanh(N(mu, sigma))."""
+    def __init__(self, obs_dim: int, act_dim: int, hidden_sizes: Tuple[int, ...] = (256, 256)):
+        super().__init__()
+        layers = []
+        last = obs_dim
+        for hs in hidden_sizes:
+            layers += [nn.Linear(last, hs), nn.ReLU(inplace=True)]
+            last = hs
+        self.shared = nn.Sequential(*layers)
+        self.mu_head = nn.Linear(last, act_dim)                       # pre-squash mean
+        self.log_std = nn.Parameter(torch.full((act_dim,), -0.5))     # trainable log_std
+        self.v_head  = nn.Linear(last, 1)
+
+    def forward(self, obs: torch.Tensor):
+        """obs: (B, obs_dim) -> mu: (B, act_dim), std: (act_dim,), v: (B,)"""
+        x = self.shared(obs)
+        mu = self.mu_head(x)
+        v  = self.v_head(x).squeeze(-1)
+        std = torch.exp(self.log_std)
+        return mu, std, v
+
+    @staticmethod
+    def sample_tanh_gaussian(mu: torch.Tensor, std: torch.Tensor):
+        """Return action in [-1,1], log_prob with tanh correction, and pre-squash sample u."""
+        dist = torch.distributions.Normal(mu, std)
+        u = dist.rsample()  # reparameterized
+        a = torch.tanh(u)
+        # log|det(Jacobian of tanh)| = sum log(1 - tanh(u)^2)
+        log_prob = dist.log_prob(u) - torch.log(1.0 - a.pow(2) + 1e-6)
+        return a, log_prob.sum(-1), u
+
+    @staticmethod
+    def log_prob_tanh_gaussian(mu: torch.Tensor, std: torch.Tensor, a: torch.Tensor):
+        """Compute log_prob(a) for tanh(N(mu,std)) using atanh(a)."""
+        a = a.clamp(min=-0.999999, max=0.999999)
+        atanh_a = 0.5 * (torch.log1p(a + 1e-6) - torch.log1p(-a + 1e-6))
+        dist = torch.distributions.Normal(mu, std)
+        log_prob = dist.log_prob(atanh_a) - torch.log(1.0 - a.pow(2) + 1e-6)
+        return log_prob.sum(-1)
+
+
+# ========================= Rollout Buffer =========================
+class RolloutBuffer:
+    """On-policy PPO rollout buffer with fixed horizon (all tensors on a single device)."""
+    def __init__(self, horizon: int, num_envs: int, obs_dim: int, act_dim: int, device: torch.device):
+        self.h, self.n, self.d, self.k = horizon, num_envs, obs_dim, act_dim
+        self.device = device
+        self.reset()
+
+    def reset(self):
+        H, N, D, K, dev = self.h, self.n, self.d, self.k, self.device
+        self.obs      = torch.zeros(H, N, D, device=dev, dtype=torch.float32)
+        self.actions  = torch.zeros(H, N, K, device=dev, dtype=torch.float32)
+        self.logp     = torch.zeros(H, N, device=dev, dtype=torch.float32)
+        self.rewards  = torch.zeros(H, N, device=dev, dtype=torch.float32)
+        self.dones    = torch.zeros(H, N, device=dev, dtype=torch.bool)
+        self.values   = torch.zeros(H, N, device=dev, dtype=torch.float32)
+        self.timeouts = torch.zeros(H, N, device=dev, dtype=torch.bool)
+        self.ptr = 0
+
+    def add(self, *, obs, act, logp, rew, done, val, t_out):
+        """Store DETACHED copies to keep env and rollout outside the grad graph."""
+        i = self.ptr
+        self.obs[i]      = obs.detach()
+        self.actions[i]  = act.detach()
+        self.logp[i]     = logp.detach()
+        self.rewards[i]  = rew.detach().squeeze(-1)  # allow (B,1) or (B,)
+        self.dones[i]    = done.detach().bool()
+        self.values[i]   = val.detach()
+        self.timeouts[i] = t_out.detach().bool()
+        self.ptr += 1
+
+    @torch.no_grad()  # make GAE graph-free
+    def compute_gae(self, last_value: torch.Tensor, gamma=0.99, lam=0.95):
+        """last_value: (N,)"""
+        H, N = self.h, self.n
+        adv = torch.zeros(H, N, device=self.device, dtype=torch.float32)
+        last_gae = torch.zeros(N, device=self.device, dtype=torch.float32)
+        for t in reversed(range(H)):
+            if t == H - 1:
+                next_nonterminal = (~self.dones[t] | self.timeouts[t]).float()
+                next_values = last_value
+            else:
+                next_nonterminal = (~self.dones[t + 1] | self.timeouts[t + 1]).float()
+                next_values = self.values[t + 1]
+            delta = self.rewards[t] + gamma * next_values * next_nonterminal - self.values[t]
+            last_gae = delta + gamma * lam * next_nonterminal * last_gae
+            adv[t] = last_gae
+        ret = adv + self.values
+        return adv, ret
+
+    def iterate_minibatches(self, batch_size: int, num_epochs: int) -> Iterable:
+        H, N = self.h, self.n
+        total = H * N
+        obs     = self.obs.view(total, -1)
+        acts    = self.actions.view(total, -1)
+        logp    = self.logp.view(total)
+        values  = self.values.view(total)
+
+        def idx_batches():
+            idx = torch.randperm(total, device=self.device)
+            for i in range(0, total, batch_size):
+                j = idx[i : i + batch_size]
+                yield j
+
+        for _ in range(num_epochs):
+            for j in idx_batches():
+                yield j, obs[j], acts[j], logp[j], values[j]
+
+
+# ========================= Helpers =========================
+def _as_tensor(x, device: torch.device, dtype: torch.dtype | None = None) -> torch.Tensor:
+    """Convert numpy/torch to torch on device with optional dtype, avoiding copies when possible."""
+    if isinstance(x, torch.Tensor):
+        if dtype is not None and x.dtype != dtype:
+            x = x.to(dtype)
+        return x.to(device)
+    return torch.as_tensor(x, device=device, dtype=dtype)
+
+
+@torch.no_grad()
+def _reset_env(env, device: torch.device):
+    """Support env.reset() that returns torch or numpy."""
+    rst = env.reset()
+    obs = rst["obs"]
+    return _as_tensor(obs, device=device, dtype=torch.float32)
+
+
+def _choose_minibatch_size(total_batch: int, num_envs: int) -> int:
+    """Heuristic minibatch sizing that divides total_batch."""
+    mb = max(num_envs * 2, 32)
+    while total_batch % mb != 0 and mb > 8:
+        mb //= 2
+    if total_batch % mb != 0:
+        mb = num_envs
+    return mb
+
+
+@torch.no_grad()
+def _partial_reset_and_merge_obs(env, next_obs: torch.Tensor, done_t: torch.Tensor, device: torch.device):
+    """
+    If any env is done:
+      1) Try env.reset_done(done_mask) which returns {"obs": (M,D)} or (N,D)
+      2) Else try env.reset(env_ids=idxs)
+      3) Else fallback to global reset()
+    Then merge the reset obs into next_obs only for those done envs and return merged tensor.
+    """
+    if not bool(done_t.any()):
+        return next_obs  # nothing to do
+
+    idxs = torch.nonzero(done_t, as_tuple=False).squeeze(-1)  # (M,)
+    M = int(idxs.numel())
+    if M == 0:
+        return next_obs
+
+    rst = env.reset(env_ids=idxs)
+    new_obs = _as_tensor(rst["obs"], device=device, dtype=torch.float32)
+    if new_obs.shape[0] == next_obs.shape[0]:
+        return new_obs
+    merged = next_obs.clone()
+    merged[idxs] = new_obs
+    return merged
+
+# ========================= PPO Train =========================
+def ppo_train(
+    *,
+    env,
+    net: ActorCritic,
+    optimizer: optim.Optimizer,
+    device: torch.device,
+    total_epochs: int,
+    horizon: int,
+    mini_epochs: int,
+    batch_size_hint: int,
+    clip_eps: float,
+    value_coef: float,
+    entropy_coef: float,
+    gamma: float,
+    gae_lambda: float,
+    run_dir: Path,
+    save_every: int = 50,
+):
+    """
+    Generic PPO training loop.
+    Assumes:
+      - env.reset() -> {"obs": torch or numpy, shape (N,D)}
+      - env.step(action) ->
+            ({"obs": torch/numpy (N,D)}, reward (N,1)/(N,), done (N,), {"time_outs": (N,)})
+      - Partial reset:
+            env.reset_done(done_mask)   OR   env.reset(env_ids=idxs)   OR   fallback global reset()
+    """
+    run_dir.mkdir(parents=True, exist_ok=True)
+    ckpt_best = run_dir / "best.pt"
+
+    num_envs = getattr(env, "num_envs", None)
+    if num_envs is None:
+        raise ValueError("env.num_envs is required for batch sizing.")
+    obs_dim = env.observation_space.shape[0]
+    act_dim = env.action_space.shape[0]
+
+    writer = SummaryWriter(log_dir=run_dir)
+
+    print(f"[INFO] PPO training: epochs={total_epochs}, horizon={horizon}, num_envs={num_envs}, "
+          f"obs_dim={obs_dim}, act_dim={act_dim}")
+    buffer = RolloutBuffer(horizon=horizon, num_envs=num_envs, obs_dim=obs_dim, act_dim=act_dim, device=device)
+
+    total_batch = horizon * num_envs if batch_size_hint <= 0 else int(batch_size_hint)
+    minibatch_size = _choose_minibatch_size(total_batch, num_envs)
+
+    # initial reset
+    obs = _reset_env(env, device)
+    best_mean_return = -1e9
+
+    for epoch in range(1, total_epochs + 1):
+        buffer.reset()
+
+        # ===== rollout =====
+        for t in range(horizon):
+            mu, std, v = net(obs)
+            act, logp, _ = net.sample_tanh_gaussian(mu, std)  # act in [-1,1], (N,K)
+
+            # IMPORTANT: do not let env/sim see gradients
+            act_no_grad = act.detach()
+
+            step_out = env.step(act_no_grad)
+            next_obs_any, reward_any, done_any, infos = step_out[0]["obs"], step_out[1], step_out[2], step_out[3]
+
+            next_obs = _as_tensor(next_obs_any, device=device, dtype=torch.float32)
+            rew_t    = _as_tensor(reward_any,  device=device, dtype=torch.float32)  # (N,1) or (N,)
+            done_t   = _as_tensor(done_any,    device=device, dtype=torch.bool)     # (N,)
+            t_out    = _as_tensor(infos.get("time_outs", torch.zeros_like(done_t)), device=device, dtype=torch.bool)
+
+            buffer.add(obs=obs, act=act_no_grad, logp=logp, rew=rew_t, done=done_t, val=v, t_out=t_out)
+
+            # partial reset (merge only done envs' obs)
+            next_obs = _partial_reset_and_merge_obs(env, next_obs, done_t, device)
+
+            obs = next_obs
+
+        # ===== bootstrap value =====
+        _, _, last_v = net(obs)  # (N,)
+
+        # ===== GAE / returns (graph-free) =====
+        adv, ret = buffer.compute_gae(last_value=last_v.detach(), gamma=gamma, lam=gae_lambda)
+
+        # normalize advantage
+        adv_flat = adv.view(-1)
+        ret_flat = ret.view(-1)
+        adv_flat = (adv_flat - torch.mean(adv_flat)) / (torch.std(adv_flat) + 1e-8)
+
+        # ===== PPO updates =====
+        policy_loss_epoch = 0.0
+        value_loss_epoch  = 0.0
+        entropy_epoch     = 0.0
+        num_updates       = 0
+
+        for idx, obs_b, act_b, old_logp_b, old_v_b in buffer.iterate_minibatches(minibatch_size, mini_epochs):
+            adv_b = adv_flat[idx]
+            ret_b = ret_flat[idx]
+
+            mu_b, std_b, v_b = net(obs_b)
+            new_logp_b = net.log_prob_tanh_gaussian(mu_b, std_b, act_b)
+            ratio = torch.exp(new_logp_b - old_logp_b)
+
+            surrogate1 = ratio * adv_b
+            surrogate2 = torch.clamp(ratio, 1.0 - clip_eps, 1.0 + clip_eps) * adv_b
+            policy_loss = -torch.min(surrogate1, surrogate2).mean()
+
+            value_clipped = old_v_b + (v_b - old_v_b).clamp(-clip_eps, clip_eps)
+            value_losses = (v_b - ret_b).pow(2)
+            value_losses_clipped = (value_clipped - ret_b).pow(2)
+            value_loss = 0.5 * torch.max(value_losses, value_losses_clipped).mean()
+
+            base_dist = torch.distributions.Normal(mu_b, std_b)
+            entropy = base_dist.entropy().sum(-1).mean()
+
+            loss = policy_loss + value_coef * value_loss - entropy_coef * entropy
+
+            optimizer.zero_grad(set_to_none=True)
+            loss.backward()
+            nn.utils.clip_grad_norm_(net.parameters(), 0.5)
+            optimizer.step()
+
+            policy_loss_epoch += float(policy_loss.detach())
+            value_loss_epoch  += float(value_loss.detach())
+            entropy_epoch     += float(entropy.detach())
+            num_updates       += 1
+
+        mean_return = float(ret.mean().detach())
+        print(f"[E{epoch:04d}] return={mean_return:7.3f}  "
+              f"pi={policy_loss_epoch/max(1,num_updates):7.4f}  "
+              f"v={value_loss_epoch/max(1,num_updates):7.4f}  "
+              f"H={entropy_epoch/max(1,num_updates):7.4f}")
+
+        # Value head quality: explained variance on flattened batch
+        ev = float(1.0 - torch.var(ret_flat - buffer.values.view(-1)) / (torch.var(ret_flat) + 1e-12))
+
+        writer.add_scalar("return_mean", mean_return, epoch)
+        writer.add_scalar("policy_loss", policy_loss_epoch / max(1,num_updates), epoch)
+        writer.add_scalar("value_loss", value_loss_epoch / max(1,num_updates), epoch)
+        writer.add_scalar("entropy", entropy_epoch / max(1,num_updates), epoch)
+        writer.add_scalar("explained_variance", ev, epoch)
+
+        # save best & periodic
+        if mean_return > best_mean_return:
+            best_mean_return = mean_return
+            torch.save({"model": net.state_dict(),
+                        "optimizer": optimizer.state_dict(),
+                        "epoch": epoch,
+                        "best_return": best_mean_return},
+                       ckpt_best)
+        if (save_every > 0) and (epoch % save_every == 0):
+            ckpt_path = run_dir / f"epoch_{epoch:04d}_ret_{mean_return:.2f}.pt"
+            torch.save({"model": net.state_dict(),
+                        "optimizer": optimizer.state_dict(),
+                        "epoch": epoch,
+                        "return": mean_return},
+                       ckpt_path)
+
+    return {"best_mean_return": best_mean_return, "ckpt_best": ckpt_best}
diff --git a/openreal2sim/simulation/isaaclab/data_collection/ply_to_depth.py b/openreal2sim/simulation/isaaclab/data_collection/ply_to_depth.py
new file mode 100644
index 0000000..97c7579
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/ply_to_depth.py
@@ -0,0 +1,190 @@
+import numpy as np
+import cv2
+
+def load_ply(ply_path):
+    """Load PLY file and extract 3D points."""
+    with open(ply_path, 'rb') as f:
+        # Read header
+        line = f.readline().decode('ascii').strip()
+        if line != 'ply':
+            raise ValueError("Not a valid PLY file")
+        
+        format_type = None
+        vertex_count = 0
+        properties = []
+        in_header = True
+        
+        while in_header:
+            line = f.readline().decode('ascii').strip()
+            
+            if line.startswith('format'):
+                format_type = line.split()[1]
+            elif line.startswith('element vertex'):
+                vertex_count = int(line.split()[2])
+            elif line.startswith('property'):
+                prop_type = line.split()[1]
+                prop_name = line.split()[2]
+                properties.append((prop_name, prop_type))
+            elif line == 'end_header':
+                in_header = False
+        
+        # Find x, y, z indices
+        prop_names = [p[0] for p in properties]
+        if 'x' not in prop_names or 'y' not in prop_names or 'z' not in prop_names:
+            raise ValueError("PLY file must contain x, y, z properties")
+        
+        x_idx = prop_names.index('x')
+        y_idx = prop_names.index('y')
+        z_idx = prop_names.index('z')
+        
+        # Read vertex data
+        points = []
+        
+        if format_type == 'ascii':
+            # ASCII format
+            for _ in range(vertex_count):
+                line = f.readline().decode('ascii').strip().split()
+                x = float(line[x_idx])
+                y = float(line[y_idx])
+                z = float(line[z_idx])
+                points.append([x, y, z])
+        
+        elif format_type == 'binary_little_endian':
+            # Binary format
+            dtype_map = {
+                'float': 'f4', 'double': 'f8',
+                'uchar': 'u1', 'uint8': 'u1',
+                'char': 'i1', 'int8': 'i1',
+                'ushort': 'u2', 'uint16': 'u2',
+                'short': 'i2', 'int16': 'i2',
+                'uint': 'u4', 'uint32': 'u4',
+                'int': 'i4', 'int32': 'i4'
+            }
+            
+            # Create numpy dtype for binary reading
+            dtype_list = [(p[0], dtype_map.get(p[1], 'f4')) for p in properties]
+            vertex_data = np.frombuffer(f.read(vertex_count * np.dtype(dtype_list).itemsize), 
+                                       dtype=dtype_list, count=vertex_count)
+            
+            points = np.column_stack([vertex_data['x'], vertex_data['y'], vertex_data['z']])
+            return points
+        
+        else:
+            raise ValueError(f"Unsupported PLY format: {format_type}")
+    
+    return np.array(points)
+
+def project_points_to_image(points, fx, fy, cx, cy, width, height):
+    """
+    Project 3D points to 2D image plane using camera intrinsics.
+    
+    Args:
+        points: Nx3 array of 3D points in camera coordinates
+        fx, fy: Focal lengths in pixels
+        cx, cy: Principal point coordinates
+        width, height: Image dimensions
+    
+    Returns:
+        depth_image: 2D array containing depth values
+    """
+    # Filter points behind the camera
+    valid_mask = points[:, 2] > 0
+    points = points[valid_mask]
+    
+    if len(points) == 0:
+        print("Warning: No points in front of camera")
+        return np.zeros((height, width), dtype=np.float32)
+    
+    # Project 3D points to 2D using pinhole camera model
+    # u = fx * (X/Z) + cx
+    # v = fy * (Y/Z) + cy
+    u = (fx * points[:, 0] / points[:, 2] + cx).astype(int)
+    v = (fy * points[:, 1] / points[:, 2] + cy).astype(int)
+    depth = points[:, 2]
+    
+    # Filter points within image bounds
+    valid_idx = (u >= 0) & (u < width) & (v >= 0) & (v < height)
+    u = u[valid_idx]
+    v = v[valid_idx]
+    depth = depth[valid_idx]
+    
+    # Create depth image
+    depth_image = np.zeros((height, width), dtype=np.float32)
+    
+    # Handle multiple points projecting to same pixel (keep closest)
+    for i in range(len(u)):
+        if depth_image[v[i], u[i]] == 0 or depth[i] < depth_image[v[i], u[i]]:
+            depth_image[v[i], u[i]] = depth[i]
+    
+    return depth_image
+
+def save_depth_image(depth_image, output_path, normalize=True):
+    """Save depth image as TIFF, PNG, or NPY file."""
+    if output_path.endswith('.npy'):
+        # Save raw depth values
+        np.save(output_path, depth_image)
+        print(f"Saved raw depth to {output_path}")
+    elif output_path.endswith(('.tiff', '.tif')):
+        # Save as 32-bit float TIFF with raw depth values
+        cv2.imwrite(output_path, depth_image.astype(np.float32))
+        print(f"Saved depth as TIFF to {output_path}")
+    else:
+        # Normalize and save as grayscale image (PNG/JPG)
+        if normalize:
+            depth_vis = depth_image.copy()
+            mask = depth_vis > 0
+            if mask.any():
+                depth_vis[mask] = (depth_vis[mask] - depth_vis[mask].min()) / (depth_vis[mask].max() - depth_vis[mask].min())
+                depth_vis = (depth_vis * 255).astype(np.uint8)
+            else:
+                depth_vis = np.zeros_like(depth_image, dtype=np.uint8)
+        else:
+            depth_vis = depth_image.astype(np.uint8)
+        
+        cv2.imwrite(output_path, depth_vis)
+        print(f"Saved depth visualization to {output_path}")
+
+def main():
+    # ===== CONFIGURATION - EDIT THESE VALUES =====
+    INPUT_PLY = '/app/outputs/demo_video/reconstruction/background_points.ply'          # Path to input PLY file
+    OUTPUT_IMAGE = '/app/outputs/demo_video/reconstruction/image_background_depth.png'       # Output path (.tiff, .tif, .png, .jpg, or .npy)
+
+    # Camera intrinsic parameters
+    FX = 664.4203491210938      # Focal length x in pixels
+    FY = 663.5459594726562     # Focal length y in pixels
+    CX = 295.5      # Principal point x
+    CY = 166.0      # Principal point y
+    WIDTH = 584     # Image width
+    HEIGHT = 328    # Image height
+    # ============================================
+    
+    # Load PLY file
+    print(f"Loading PLY file: {INPUT_PLY}")
+    points = load_ply(INPUT_PLY)
+    print(f"Loaded {len(points)} points")
+    
+    # Project to depth image
+    print("Projecting points to image plane...")
+    depth_image = project_points_to_image(
+        points, 
+        FX, FY, CX, CY,
+        WIDTH, HEIGHT
+    )
+    
+    # Count valid pixels
+    valid_pixels = np.sum(depth_image > 0)
+    print(f"Valid depth pixels: {valid_pixels}/{WIDTH * HEIGHT}")
+    
+    # Save depth image
+    save_depth_image(depth_image, OUTPUT_IMAGE)
+    
+    # Print depth statistics
+    if valid_pixels > 0:
+        print(f"\nDepth statistics:")
+        print(f"  Min depth: {depth_image[depth_image > 0].min():.3f}")
+        print(f"  Max depth: {depth_image.max():.3f}")
+        print(f"  Mean depth: {depth_image[depth_image > 0].mean():.3f}")
+
+if __name__ == '__main__':
+    main()
+
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_base.py b/openreal2sim/simulation/isaaclab/data_collection/sim_base.py
new file mode 100755
index 0000000..15b64d1
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_base.py
@@ -0,0 +1,1033 @@
+from __future__ import annotations
+
+import json
+import os
+import random
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+
+import curobo
+import imageio
+
+# Isaac Lab
+import isaaclab.sim as sim_utils
+import numpy as np
+import torch
+import time
+from curobo.types.base import TensorDeviceType
+from curobo.types.math import Pose
+from curobo.types.robot import JointState, RobotConfig
+from curobo.util.logger import setup_curobo_logger
+from curobo.util_file import get_robot_configs_path, join_path, load_yaml
+from curobo.wrap.reacher.motion_gen import (
+    MotionGen,
+    MotionGenConfig,
+    MotionGenPlanConfig,
+)
+from isaaclab.controllers import DifferentialIKController, DifferentialIKControllerCfg
+from isaaclab.devices import Se3Gamepad, Se3Keyboard, Se3SpaceMouse
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.markers import VisualizationMarkers, VisualizationMarkersCfg
+from isaaclab.sensors.camera import Camera
+from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
+from isaaclab.utils.math import (
+    subtract_frame_transforms,
+    transform_points,
+    unproject_depth,
+)
+
+##-- for sim background to real background video composition--
+from PIL import Image 
+import cv2  
+
+os.environ['CUROBO_TORCH_CUDA_GRAPH_RESET'] = '1'
+
+
+def get_next_demo_id(demo_root: Path) -> int:
+    if not demo_root.exists():
+        return 0
+    demo_ids = []
+    for name in os.listdir(demo_root):
+        if name.startswith("demo_"):
+            try:
+                demo_ids.append(int(name.split("_")[1]))
+            except Exception:
+                pass
+    return max(demo_ids) + 1 if demo_ids else 0
+
+
+class BaseSimulator:
+    """
+    Base class for robot simulation.
+
+    Attributes:
+      self.sim, self.scene, self.sim_dt
+      self.robot, self.object_prim, self.background_prim
+      self.teleop_interface, self.sim_state_machine
+      self.diff_ik_cfg, self.diff_ik_controller
+      self.ee_goals, self.current_goal_idx, self.ik_commands
+      self.robot_entity_cfg, self.robot_gripper_cfg
+      self.gripper_open_tensor, self.gripper_close_tensor
+      self.ee_jacobi_idx, self.count, self.demo_id
+      self.camera, self.save_dict
+      self.selected_object_id, self.obj_rel_traj, self.debug_level
+      self._goal_vis, self._traj_vis
+    """
+
+    def __init__(
+        self,
+        sim: sim_utils.SimulationContext,
+        scene: Any,  # InteractiveScene
+        *,
+        args,
+        sim_cfgs: Dict,
+        robot_pose: torch.Tensor,
+        cam_dict: Dict,
+        out_dir: Path,
+        img_folder: str,
+        set_physics_props: bool = True,
+        enable_motion_planning: bool = True,
+        debug_level: int = 1,
+    ) -> None:
+        # basic simulation setup
+        self.sim: sim_utils.SimulationContext = sim
+        self.sim_cfgs = sim_cfgs
+        self.scene = scene
+        self.sim_dt = sim.get_physics_dt()
+
+        self.num_envs: int = int(scene.num_envs)
+        self._all_env_ids = torch.arange(
+            self.num_envs, device=sim.device, dtype=torch.long
+        )
+
+        self.cam_dict = cam_dict
+        self.out_dir: Path = out_dir
+        self.img_folder: str = img_folder
+
+        # scene entities
+        self.robot = scene["robot"]
+        if robot_pose.ndim == 1:
+            self.robot_pose = (
+                robot_pose.view(1, -1).repeat(self.num_envs, 1).to(self.robot.device)
+            )
+        else:
+            assert robot_pose.shape[0] == self.num_envs and robot_pose.shape[1] == 7, (
+                f"robot_pose must be [B,7], got {robot_pose.shape}"
+            )
+            self.robot_pose = robot_pose.to(self.robot.device).contiguous()
+
+        self.object_prim = scene["object_00"]
+        self.other_object_prims = [
+            scene[key]
+            for key in scene.keys()
+            if f"object_" in key and key != "object_00"
+        ]
+        self.background_prim = scene["background"]
+        self.camera: Camera = scene["camera"]
+
+        # physics properties
+        if set_physics_props:
+            static_friction = 5.0
+            dynamic_friction = 5.0
+            restitution = 0.0
+
+            # object: rigid prim -> has root_physx_view
+            if (
+                hasattr(self.object_prim, "root_physx_view")
+                and self.object_prim.root_physx_view is not None
+            ):
+                obj_view = self.object_prim.root_physx_view
+                obj_mats = obj_view.get_material_properties()
+                vals_obj = torch.tensor(
+                    [static_friction, dynamic_friction, restitution],
+                    device=obj_mats.device,
+                    dtype=obj_mats.dtype,
+                )
+                obj_mats[:] = vals_obj
+                obj_view.set_material_properties(
+                    obj_mats, self._all_env_ids.to(obj_mats.device)
+                )
+
+            # background: GroundPlaneCfg -> XFormPrim (no root_physx_view); skip if unavailable
+            if (
+                hasattr(self.background_prim, "root_physx_view")
+                and self.background_prim.root_physx_view is not None
+            ):
+                bg_view = self.background_prim.root_physx_view
+                bg_mats = bg_view.get_material_properties()
+                vals_bg = torch.tensor(
+                    [static_friction, dynamic_friction, restitution],
+                    device=bg_mats.device,
+                    dtype=bg_mats.dtype,
+                )
+                bg_mats[:] = vals_bg
+                bg_view.set_material_properties(
+                    bg_mats, self._all_env_ids.to(bg_mats.device)
+                )
+
+        # ik controller
+        self.diff_ik_cfg = DifferentialIKControllerCfg(
+            command_type="pose", use_relative_mode=False, ik_method="dls"
+        )
+        self.diff_ik_controller = DifferentialIKController(
+            self.diff_ik_cfg, num_envs=self.num_envs, device=sim.device
+        )
+
+        # robot: joints / gripper / jacobian indices
+        self.robot_entity_cfg = SceneEntityCfg(
+            "robot", joint_names=["panda_joint.*"], body_names=["panda_hand"]
+        )
+        self.robot_gripper_cfg = SceneEntityCfg(
+            "robot", joint_names=["panda_finger_joint.*"], body_names=["panda_hand"]
+        )
+        self.robot_entity_cfg.resolve(scene)
+        self.robot_gripper_cfg.resolve(scene)
+        self.gripper_open_tensor = 0.04 * torch.ones(
+            (self.num_envs, len(self.robot_gripper_cfg.joint_ids)),
+            device=self.robot.device,
+        )
+        self.gripper_close_tensor = torch.zeros(
+            (self.num_envs, len(self.robot_gripper_cfg.joint_ids)),
+            device=self.robot.device,
+        )
+        if self.robot.is_fixed_base:
+            self.ee_jacobi_idx = self.robot_entity_cfg.body_ids[0] - 1
+        else:
+            self.ee_jacobi_idx = self.robot_entity_cfg.body_ids[0]
+
+        # demo count and data saving
+        self.count = 0
+        self.demo_id = 0
+        self.save_dict = {
+            "rgb": [],
+            "depth": [],
+            "segmask": [],
+            "joint_pos": [],
+            "joint_vel": [],
+            "actions": [],
+            "gripper_pos": [],
+            "gripper_cmd": [],
+        }
+
+        # visualization
+        self.selected_object_id = 0
+        self.debug_level = debug_level
+
+        self.goal_vis_list = []
+        if self.debug_level > 0:
+            for b in range(self.num_envs):
+                cfg = VisualizationMarkersCfg(
+                    prim_path=f"/Visuals/ee_goal/env_{b:03d}",
+                    markers={
+                        "frame": sim_utils.UsdFileCfg(
+                            usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/frame_prim.usd",
+                            scale=(0.06, 0.06, 0.06),
+                            visual_material=sim_utils.PreviewSurfaceCfg(
+                                diffuse_color=(1.0, 0.0, 0.0)
+                            ),
+                        ),
+                    },
+                )
+                self.goal_vis_list.append(VisualizationMarkers(cfg))
+
+        # curobo motion planning
+        self.enable_motion_planning = enable_motion_planning
+        if self.enable_motion_planning:
+            print(f"prepare curobo motion planning: {enable_motion_planning}")
+            self.prepare_curobo()
+            print("curobo motion planning ready.")
+
+    # -------- Curobo Motion Planning ----------
+    def prepare_curobo(self):
+        setup_curobo_logger("error")
+        # tensor_args = TensorDeviceType()
+        tensor_args = TensorDeviceType(device=self.sim.device, dtype=torch.float32)
+        curobo_path = curobo.__file__.split("/__init__")[0]
+        robot_file = f"{curobo_path}/content/configs/robot/franka.yml"
+        motion_gen_config = MotionGenConfig.load_from_robot_config(
+            robot_cfg=robot_file,
+            world_model=None,
+            tensor_args=tensor_args,
+            interpolation_dt=self.sim_dt,
+            use_cuda_graph=True if self.num_envs == 1 else False,
+        )
+        self.motion_gen = MotionGen(motion_gen_config)
+        if self.num_envs == 1:
+            self.motion_gen.warmup(enable_graph=True)
+        _ = RobotConfig.from_dict(
+            load_yaml(join_path(get_robot_configs_path(), robot_file))["robot_cfg"],
+            tensor_args,
+        )
+
+    # ---------- Helpers ----------
+    def _ensure_batch_pose(self, p, q):
+        """Ensure position [B,3], quaternion [B,4] on device."""
+        B = self.scene.num_envs
+        p = torch.as_tensor(p, dtype=torch.float32, device=self.sim.device)
+        q = torch.as_tensor(q, dtype=torch.float32, device=self.sim.device)
+        if p.ndim == 1:
+            p = p.view(1, -1).repeat(B, 1)
+        if q.ndim == 1:
+            q = q.view(1, -1).repeat(B, 1)
+        return p.contiguous(), q.contiguous()
+
+    def _traj_to_BT7(self, traj):
+        """Normalize various curobo traj.position shapes to [B, T, 7]."""
+        B = self.scene.num_envs
+        pos = traj.position  # torch or numpy
+        pos = torch.as_tensor(pos, device=self.sim.device, dtype=torch.float32)
+
+        if pos.ndim == 3:
+            # candidate shapes: [B,T,7] or [T,B,7]
+            if pos.shape[0] == B and pos.shape[-1] == 7:
+                return pos  # [B,T,7]
+            if pos.shape[1] == B and pos.shape[-1] == 7:
+                return pos.permute(1, 0, 2).contiguous()  # [B,T,7]
+        elif pos.ndim == 2 and pos.shape[-1] == 7:
+            # [T,7] → broadcast to all envs
+            return pos.unsqueeze(0).repeat(B, 1, 1)
+        # Fallback: flatten and infer
+        flat = pos.reshape(-1, 7)  # [B*T,7]
+        T = flat.shape[0] // B
+        return flat.view(B, T, 7).contiguous()
+
+    # ---------- Planning / Execution (Single) ----------
+    def reinitialize_motion_gen(self):
+        """
+        Reinitialize the motion generation object.
+        Call this after a crash to restore a clean state.
+        """
+        print("[INFO] Reinitializing motion planner...")
+        try:
+            # Clear CUDA cache
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            
+            # Recreate the motion planner
+            from curobo.types.base import TensorDeviceType
+            from curobo.wrap.reacher.motion_gen import MotionGen, MotionGenConfig
+            from curobo.util_file import get_robot_configs_path, join_path, load_yaml
+            from curobo.types.robot import RobotConfig
+            import curobo
+            
+            tensor_args = TensorDeviceType(device=self.sim.device, dtype=torch.float32)
+            curobo_path = curobo.__file__.split("/__init__")[0]
+            robot_file = f"{curobo_path}/content/configs/robot/franka.yml"
+            
+            motion_gen_config = MotionGenConfig.load_from_robot_config(
+                robot_cfg=robot_file,
+                world_model=None,
+                tensor_args=tensor_args,
+                interpolation_dt=self.sim_dt,
+                use_cuda_graph=True if self.num_envs == 1 else False,
+            )
+            
+            self.motion_gen = MotionGen(motion_gen_config)
+            
+            if self.num_envs == 1:
+                self.motion_gen.warmup(enable_graph=True)
+            
+            print("[INFO] Motion planner reinitialized successfully")
+            return True
+            
+        except Exception as e:
+            print(f"[ERROR] Failed to reinitialize motion planner: {e}")
+            return False
+
+
+    def motion_planning_batch(
+        self, position, quaternion, max_attempts=1, allow_graph=False, max_retries=1
+    ):
+        """
+        Multi-environment planning with automatic recovery from crashes.
+        Returns None on complete failure to signal restart needed.
+        """
+        B = self.scene.num_envs
+        joint_pos = self.robot.data.joint_pos[
+            :, self.robot_entity_cfg.joint_ids
+        ].contiguous()
+        
+        pos_b, quat_b = self._ensure_batch_pose(position, quaternion)
+        
+        for retry in range(max_retries):
+            try:
+                # Attempt planning
+                start_state = JointState.from_position(joint_pos)
+                goal_pose = Pose(position=pos_b, quaternion=quat_b)
+                plan_cfg = MotionGenPlanConfig(
+                    max_attempts=max_attempts, enable_graph=allow_graph
+                )
+                
+                result = self.motion_gen.plan_batch(start_state, goal_pose, plan_cfg)
+                
+                # Process results
+                paths = result.get_paths()
+                T_max = 1
+                
+                for i, p in enumerate(paths):
+                    if not result.success[i]:
+                        print(f"[WARN] Motion planning failed for env {i}.")
+                    else:
+                        T_max = max(T_max, int(p.position.shape[-2]))
+                
+                dof = joint_pos.shape[-1]
+                BT7 = torch.zeros(
+                    (B, T_max, dof), device=self.sim.device, dtype=torch.float32
+                )
+                
+                for i, p in enumerate(paths):
+                    if result.success[i] == False:
+                        BT7[i, :, :] = (
+                            joint_pos[i : i + 1, :].unsqueeze(1).repeat(1, T_max, 1)
+                        )
+                    else:
+                        Ti = p.position.shape[-2]
+                        BT7[i, :Ti, :] = p.position.to(self.sim.device).to(torch.float32)
+                        if Ti < T_max:
+                            BT7[i, Ti:, :] = BT7[i, Ti - 1 : Ti, :]
+                
+                success = result.success if result.success is not None else torch.zeros(
+                    B, dtype=torch.bool, device=self.sim.device
+                )
+                
+                # Success! Return the trajectory
+                return BT7, success
+                
+            except AttributeError as e:
+                print(f"[ERROR] Motion planner crash on attempt {retry+1}/{max_retries}: {e}")
+                
+                if retry < max_retries - 1:
+                    if self.reinitialize_motion_gen():
+                        print(f"[INFO] Retrying motion planning (attempt {retry+2}/{max_retries})...")
+                        continue
+                    else:
+                        print("[ERROR] Failed to recover motion planner")
+                        break
+                else:
+                    print("[ERROR] Max retries reached, motion planning failed critically")
+                    
+            except Exception as e:
+                print(f"[ERROR] Unexpected error in motion planning: {type(e).__name__}: {e}")
+                
+                if retry < max_retries - 1 and ("cuda graph" in str(e).lower() or "NoneType" in str(e)):
+                    if self.reinitialize_motion_gen():
+                        print(f"[INFO] Retrying after CUDA error (attempt {retry+2}/{max_retries})...")
+                        continue
+                break
+        
+        # If we get here, all retries failed - return None to signal complete failure
+        print("[ERROR] Motion planning failed critically - returning None to trigger restart")
+        return None, None
+
+
+    def motion_planning_single(
+        self, position, quaternion, max_attempts=1, use_graph=True, max_retries=1
+    ):
+        """
+        Single environment planning with automatic recovery from crashes.
+        Returns None on complete failure to signal restart needed.
+        """
+        joint_pos0 = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids][
+            0:1
+        ].contiguous()
+        
+        pos_b, quat_b = self._ensure_batch_pose(position, quaternion)
+        pos_b = pos_b[0:1]
+        quat_b = quat_b[0:1]
+        
+        for retry in range(max_retries):
+            try:
+                start_state = JointState.from_position(joint_pos0)
+                goal_pose = Pose(position=pos_b, quaternion=quat_b)
+                plan_cfg = MotionGenPlanConfig(
+                    max_attempts=max_attempts, enable_graph=use_graph
+                )
+                
+                result = self.motion_gen.plan_single(start_state, goal_pose, plan_cfg)
+                traj = result.get_interpolated_plan()
+                
+                if result.success[0] == True:
+                    BT7 = (
+                        traj.position.to(self.sim.device).to(torch.float32).unsqueeze(0)
+                    )
+                else:
+                    print(f"[WARN] Motion planning failed.")
+                    BT7 = joint_pos0.unsqueeze(1)
+                
+                return BT7, result.success
+                
+            except AttributeError as e:
+                print(f"[ERROR] Motion planner crash on attempt {retry+1}/{max_retries}: {e}")
+                
+                if retry < max_retries - 1:
+                    if self.reinitialize_motion_gen():
+                        print(f"[INFO] Retrying motion planning (attempt {retry+2}/{max_retries})...")
+                        continue
+                    else:
+                        break
+                else:
+                    print("[ERROR] Max retries reached")
+                    
+            except Exception as e:
+                print(f"[ERROR] Unexpected error: {type(e).__name__}: {e}")
+                
+                if retry < max_retries - 1 and ("cuda graph" in str(e).lower() or "NoneType" in str(e)):
+                    if self.reinitialize_motion_gen():
+                        continue
+                break
+        
+        # Complete failure - return None to signal restart
+        print("[ERROR] Motion planning failed critically - returning None to trigger restart")
+        return None, None
+
+    def motion_planning(self, position, quaternion, max_attempts=1):
+        if self.scene.num_envs == 1:
+            return self.motion_planning_single(
+                position, quaternion, max_attempts=max_attempts, use_graph=True
+            )
+        else:
+            return self.motion_planning_batch(
+                position, quaternion, max_attempts=max_attempts, allow_graph=False
+            )
+
+    def move_to_motion_planning(
+        self,
+        position: torch.Tensor,
+        quaternion: torch.Tensor,
+        gripper_open: bool = True,
+        record: bool = True,
+    ) -> torch.Tensor:
+        """
+        Cartesian space control: Move the end effector to the desired position and orientation using motion planning.
+        Works with batched envs. If inputs are 1D, they will be broadcast to all envs.
+        """
+        traj, success = self.motion_planning(position, quaternion)
+        BT7 = traj
+        T = BT7.shape[1]
+        last = None
+        for i in range(T):
+            joint_pos_des = BT7[:, i, :]  # [B,7]
+            self.apply_actions(joint_pos_des, gripper_open=gripper_open)
+            obs = self.get_observation(gripper_open=gripper_open)
+            if record:
+                self.record_data(obs)
+            last = joint_pos_des
+        return last, success
+
+    def compose_real_video(self, env_id: int = 0):
+        """
+        Composite simulated video onto real background using mask-based rendering.
+        
+        Args:
+            env_id: Environment ID to process
+        """
+    
+        def pad_to_even(frame):
+            """Pad frame to even dimensions for video encoding."""
+            H, W = frame.shape[:2]
+            pad_h = H % 2
+            pad_w = W % 2
+            if pad_h or pad_w:
+                pad = ((0, pad_h), (0, pad_w)) if frame.ndim == 2 else ((0, pad_h), (0, pad_w), (0, 0))
+                frame = np.pad(frame, pad, mode='edge')
+            return frame
+        
+        # Construct paths
+        base_path = self.out_dir / self.img_folder
+        demo_path = self._demo_dir() / f"env_{env_id:03d}"
+        
+        SIM_VIDEO_PATH = demo_path / "sim_video.mp4"
+        MASK_VIDEO_PATH = demo_path / "mask_video.mp4"
+        REAL_BACKGROUND_PATH = base_path / "reconstruction" / "background.jpg"
+        OUTPUT_PATH = demo_path / "real_video.mp4"
+        
+        # Check if required files exist
+        if not SIM_VIDEO_PATH.exists():
+            print(f"[WARN] Simulated video not found: {SIM_VIDEO_PATH}")
+            return False
+        if not MASK_VIDEO_PATH.exists():
+            print(f"[WARN] Mask video not found: {MASK_VIDEO_PATH}")
+            return False
+        if not REAL_BACKGROUND_PATH.exists():
+            print(f"[WARN] Real background not found: {REAL_BACKGROUND_PATH}")
+            return False
+        
+        # Load real background image
+        print(f"[INFO] Loading real background: {REAL_BACKGROUND_PATH}")
+        real_img = np.array(Image.open(REAL_BACKGROUND_PATH))
+        
+        H, W, _ = real_img.shape
+        print(f"[INFO] Background size: {W}x{H}")
+        
+        # Open videos
+        print(f"[INFO] Loading simulated video: {SIM_VIDEO_PATH}")
+        rgb_reader = imageio.get_reader(SIM_VIDEO_PATH)
+        print(f"[INFO] Loading mask video: {MASK_VIDEO_PATH}")
+        mask_reader = imageio.get_reader(MASK_VIDEO_PATH)
+        
+        # Get metadata from original video
+        N = rgb_reader.count_frames()
+        fps = rgb_reader.get_meta_data()['fps']
+        print(f"[INFO] Processing {N} frames at {fps} FPS...")
+        
+        composed_images = []
+        
+        for i in range(N):
+            # Read frames
+            sim_rgb = rgb_reader.get_data(i)
+            sim_mask = mask_reader.get_data(i)
+            
+            # Convert mask to binary (grayscale > 127 = foreground)
+            if sim_mask.ndim == 3:
+                sim_mask = cv2.cvtColor(sim_mask, cv2.COLOR_RGB2GRAY)
+            sim_mask = sim_mask > 127
+            
+            # Resize sim frames to match real background if needed
+            if sim_rgb.shape[:2] != (H, W):
+                sim_rgb = cv2.resize(sim_rgb, (W, H))
+                sim_mask = cv2.resize(sim_mask.astype(np.uint8), (W, H)) > 0
+            
+            # Compose: real background + simulated foreground (where mask is True)
+            composed = real_img.copy()
+            composed = pad_to_even(composed)
+            composed[sim_mask] = sim_rgb[sim_mask]
+            
+            composed_images.append(composed)
+            
+            if (i + 1) % 10 == 0:
+                print(f"[INFO] Processed {i + 1}/{N} frames")
+        
+        # Save composed video
+        print(f"[INFO] Saving composed video to: {OUTPUT_PATH}")
+        writer = imageio.get_writer(OUTPUT_PATH, fps=fps, macro_block_size=None)
+        for frame in composed_images:
+            writer.append_data(frame)
+        writer.close()
+        
+        print(f"[INFO] Done! Saved {len(composed_images)} frames to {OUTPUT_PATH} at {fps} FPS")
+        return True
+    # ---------- Visualization ----------
+    def show_goal(self, pos, quat):
+        """
+        show a pose with visual marker(s).
+          - if [B,3]/[B,4], update all envs;
+          - if [3]/[4] or [1,3]/[1,4], default to update env 0;
+          - optional env_ids specify a subset of envs to update; when a single pose is input, it will be broadcast to these envs.
+        """
+        if self.debug_level == 0:
+            print("debug_level=0, skip visualization.")
+            return
+
+        if not isinstance(pos, torch.Tensor):
+            pos_t = torch.tensor(pos, dtype=torch.float32, device=self.sim.device)
+            quat_t = torch.tensor(quat, dtype=torch.float32, device=self.sim.device)
+        else:
+            pos_t = pos.to(self.sim.device)
+            quat_t = quat.to(self.sim.device)
+
+        if pos_t.ndim == 1:
+            pos_t = pos_t.view(1, -1)
+        if quat_t.ndim == 1:
+            quat_t = quat_t.view(1, -1)
+
+        B = self.num_envs
+
+        if pos_t.shape[0] == B:
+            for b in range(B):
+                self.goal_vis_list[b].visualize(pos_t[b : b + 1], quat_t[b : b + 1])
+        else:
+            self.goal_vis_list[0].visualize(pos_t, quat_t)
+
+    def set_robot_pose(self, robot_pose: torch.Tensor):
+        if robot_pose.ndim == 1:
+            self.robot_pose = (
+                robot_pose.view(1, -1).repeat(self.num_envs, 1).to(self.robot.device)
+            )
+        else:
+            assert robot_pose.shape[0] == self.num_envs and robot_pose.shape[1] == 7, (
+                f"robot_pose must be [B,7], got {robot_pose.shape}"
+            )
+            self.robot_pose = robot_pose.to(self.robot.device).contiguous()
+
+
+    # ---------- Environment Step ----------
+    def step(self):
+        self.scene.write_data_to_sim()
+        self.sim.step()
+        self.camera.update(dt=self.sim_dt)
+        self.count += 1
+        self.scene.update(self.sim_dt)
+
+    # ---------- Apply actions to robot joints ----------
+    def apply_actions(self, joint_pos_des, gripper_open: bool = True):
+        # joint_pos_des: [B, n_joints]
+        self.robot.set_joint_position_target(
+            joint_pos_des, joint_ids=self.robot_entity_cfg.joint_ids
+        )
+        if gripper_open:
+            self.robot.set_joint_position_target(
+                self.gripper_open_tensor, joint_ids=self.robot_gripper_cfg.joint_ids
+            )
+        else:
+            self.robot.set_joint_position_target(
+                self.gripper_close_tensor, joint_ids=self.robot_gripper_cfg.joint_ids
+            )
+        self.step()
+
+    # ---------- EE control ----------
+    def move_to(
+        self,
+        position: torch.Tensor,
+        quaternion: torch.Tensor,
+        gripper_open: bool = True,
+        record: bool = True,
+    ) -> torch.Tensor:
+        if self.enable_motion_planning:
+            return self.move_to_motion_planning(
+                position, quaternion, gripper_open=gripper_open, record=record
+            )
+        else:
+            return self.move_to_ik(
+                position, quaternion, gripper_open=gripper_open, record=record
+            )
+
+    def move_to_ik(
+        self,
+        position: torch.Tensor,
+        quaternion: torch.Tensor,
+        steps: int = 50,
+        gripper_open: bool = True,
+        record: bool = True,
+    ) -> torch.Tensor:
+        """
+        Cartesian space control: Move the end effector to the desired position and orientation using inverse kinematics.
+        Works with batched envs. If inputs are 1D, they will be broadcast to all envs.
+
+        Early-stop when both position and orientation errors are within tolerances.
+        'steps' now acts as a max-iteration cap; the loop breaks earlier on convergence.
+        """
+        # Ensure [B,3]/[B,4] tensors on device
+        position, quaternion = self._ensure_batch_pose(position, quaternion)
+
+        # IK command (world frame goals)
+        ee_goals = torch.cat([position, quaternion], dim=1).to(self.sim.device).float()
+        self.diff_ik_controller.reset()
+        self.diff_ik_controller.set_command(ee_goals)
+
+        # Tolerances (you can tune if needed)
+        pos_tol = 3e-3  # meters
+        ori_tol = 3.0 * np.pi / 180.0  # radians (~3 degrees)
+
+        # Interpret 'steps' as max iterations; early-stop on convergence
+        max_steps = int(steps) if steps is not None and steps > 0 else 10_000
+
+        joint_pos_des = None
+        for _ in range(max_steps):
+            # Current EE pose (world) and Jacobian
+            jacobian = self.robot.root_physx_view.get_jacobians()[
+                :, self.ee_jacobi_idx, :, self.robot_entity_cfg.joint_ids
+            ]
+            ee_pose_w = self.robot.data.body_state_w[
+                :, self.robot_entity_cfg.body_ids[0], 0:7
+            ]
+            root_pose_w = self.robot.data.root_state_w[:, 0:7]
+            joint_pos = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids]
+
+            # Current EE pose expressed in robot base
+            ee_pos_b, ee_quat_b = subtract_frame_transforms(
+                root_pose_w[:, 0:3],
+                root_pose_w[:, 3:7],
+                ee_pose_w[:, 0:3],
+                ee_pose_w[:, 3:7],
+            )
+
+            # Compute next joint command
+            joint_pos_des = self.diff_ik_controller.compute(
+                ee_pos_b, ee_quat_b, jacobian, joint_pos
+            )
+
+            # Apply
+            self.apply_actions(joint_pos_des, gripper_open=gripper_open)
+
+            # Optional recording
+            if record:
+                obs = self.get_observation(gripper_open=gripper_open)
+                self.record_data(obs)
+
+            # --- Early-stop check ---
+            # Desired EE pose in base frame (convert world goal -> base)
+            des_pos_b, des_quat_b = subtract_frame_transforms(
+                root_pose_w[:, 0:3], root_pose_w[:, 3:7], position, quaternion
+            )
+            # Position error [B]
+            pos_err = torch.norm(des_pos_b - ee_pos_b, dim=1)
+            # Orientation error [B]: angle between quaternions
+            # Note: q and -q are equivalent -> take |dot|
+            dot = torch.sum(des_quat_b * ee_quat_b, dim=1).abs().clamp(-1.0, 1.0)
+            ori_err = 2.0 * torch.acos(dot)
+
+            done = (pos_err <= pos_tol) & (ori_err <= ori_tol)
+            if bool(torch.all(done)):
+                break
+
+        return joint_pos_des
+
+    # ---------- Robot Waiting ----------
+    def wait(self, gripper_open, steps: int, record: bool = True):
+        joint_pos_des = self.robot.data.joint_pos[
+            :, self.robot_entity_cfg.joint_ids
+        ].clone()
+        for _ in range(steps):
+            self.apply_actions(joint_pos_des, gripper_open=gripper_open)
+            obs = self.get_observation(gripper_open=gripper_open)
+            if record:
+                self.record_data(obs)
+        return joint_pos_des
+
+    # ---------- Reset Envs ----------
+    def reset(self, env_ids=None):
+        """
+        Reset all envs or only those in env_ids.
+        Assumptions:
+          - self.robot_pose.shape == (B, 7)        # base pose per env (wxyz in [:,3:])
+          - self.robot.data.default_joint_pos == (B, 7)
+          - self.robot.data.default_joint_vel == (B, 7)
+        """
+        device = self.object_prim.device
+        if env_ids is None:
+            env_ids_t = self._all_env_ids.to(device)  # (B,)
+        else:
+            env_ids_t = torch.as_tensor(env_ids, device=device, dtype=torch.long).view(
+                -1
+            )  # (M,)
+        M = int(env_ids_t.shape[0])
+
+        # --- object pose/vel: set object at env origins with identity quat ---
+        env_origins = self.scene.env_origins.to(device)[env_ids_t]  # (M,3)
+        object_pose = torch.zeros((M, 7), device=device, dtype=torch.float32)
+        object_pose[:, :3] = env_origins
+        object_pose[:, 3] = 1.0  # wxyz = [1,0,0,0]
+        self.object_prim.write_root_pose_to_sim(object_pose, env_ids=env_ids_t)
+        self.object_prim.write_root_velocity_to_sim(
+            torch.zeros((M, 6), device=device, dtype=torch.float32), env_ids=env_ids_t
+        )
+        self.object_prim.write_data_to_sim()
+        for prim in self.other_object_prims:
+            prim.write_root_pose_to_sim(object_pose, env_ids=env_ids_t)
+            prim.write_root_velocity_to_sim(
+                torch.zeros((M, 6), device=device, dtype=torch.float32),
+                env_ids=env_ids_t,
+            )
+            prim.write_data_to_sim()
+
+        # --- robot base pose/vel ---
+        # robot_pose is (B,7) in *local* base frame; add env origin offset per env
+        rp_local = self.robot_pose.to(self.robot.device)[env_ids_t]  # (M,7)
+        env_origins_robot = env_origins.to(self.robot.device)  # (M,3)
+        robot_pose_world = rp_local.clone()
+        robot_pose_world[:, :3] = env_origins_robot + robot_pose_world[:, :3]
+        self.robot.write_root_pose_to_sim(robot_pose_world, env_ids=env_ids_t)
+        self.robot.write_root_velocity_to_sim(
+            torch.zeros((M, 6), device=self.robot.device, dtype=torch.float32),
+            env_ids=env_ids_t,
+        )
+
+        # --- joints (B,7) -> select ids (M,7) ---
+        joint_pos = self.robot.data.default_joint_pos.to(self.robot.device)[
+            env_ids_t
+        ]  # (M,7)
+        joint_vel = self.robot.data.default_joint_vel.to(self.robot.device)[
+            env_ids_t
+        ]  # (M,7)
+        self.robot.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids_t)
+        self.robot.write_data_to_sim()
+
+        # housekeeping
+        self.clear_data()
+
+    # ---------- Get Observations ----------
+    def get_observation(self, gripper_open) -> Dict[str, torch.Tensor]:
+        # camera outputs (already batched)
+        rgb = self.camera.data.output["rgb"]  # [B,H,W,3]
+        depth = self.camera.data.output["distance_to_image_plane"]  # [B,H,W]
+        ins_all = self.camera.data.output["instance_id_segmentation_fast"]  # [B,H,W]
+
+        B, H, W, _ = ins_all.shape
+        fg_mask_list = []
+        obj_mask_list = []
+        for b in range(B):
+            ins_id_seg = ins_all[b]
+            id_mapping = self.camera.data.info[b]["instance_id_segmentation_fast"][
+                "idToLabels"
+            ]
+            fg_mask_b = torch.zeros_like(
+                ins_id_seg, dtype=torch.bool, device=ins_id_seg.device
+            )
+            obj_mask_b = torch.zeros_like(
+                ins_id_seg, dtype=torch.bool, device=ins_id_seg.device
+            )
+            for key, value in id_mapping.items():
+                if "object" in value:
+                    fg_mask_b |= ins_id_seg == key
+                    obj_mask_b |= ins_id_seg == key
+                if "Robot" in value:
+                    fg_mask_b |= ins_id_seg == key
+            fg_mask_list.append(fg_mask_b)
+            obj_mask_list.append(obj_mask_b)
+        fg_mask = torch.stack(fg_mask_list, dim=0)  # [B,H,W]
+        obj_mask = torch.stack(obj_mask_list, dim=0)  # [B,H,W]
+
+        ee_pose_w = self.robot.data.body_state_w[
+            :, self.robot_entity_cfg.body_ids[0], 0:7
+        ]
+        joint_pos = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids]
+        joint_vel = self.robot.data.joint_vel[:, self.robot_entity_cfg.joint_ids]
+        gripper_pos = self.robot.data.joint_pos[:, self.robot_gripper_cfg.joint_ids]
+        gripper_cmd = (
+            self.gripper_open_tensor if gripper_open else self.gripper_close_tensor
+        )
+
+        cam_pos_w = self.camera.data.pos_w
+        cam_quat_w = self.camera.data.quat_w_ros
+        ee_pos_cam, ee_quat_cam = subtract_frame_transforms(
+            cam_pos_w, cam_quat_w, ee_pose_w[:, 0:3], ee_pose_w[:, 3:7]
+        )
+        ee_pose_cam = torch.cat([ee_pos_cam, ee_quat_cam], dim=1)
+
+        points_3d_cam = unproject_depth(
+            self.camera.data.output["distance_to_image_plane"],
+            self.camera.data.intrinsic_matrices,
+        )
+        points_3d_world = transform_points(
+            points_3d_cam, self.camera.data.pos_w, self.camera.data.quat_w_ros
+        )
+
+        object_center = self.object_prim.data.root_com_pos_w[:, :3]
+
+        return {
+            "rgb": rgb,
+            "depth": depth,
+            "fg_mask": fg_mask,
+            "joint_pos": joint_pos,
+            "gripper_pos": gripper_pos,
+            "gripper_cmd": gripper_cmd,
+            "joint_vel": joint_vel,
+            "ee_pose_cam": ee_pose_cam,
+            "ee_pose_w": ee_pose_w,
+            "object_mask": obj_mask,
+            "points_cam": points_3d_cam,
+            "points_world": points_3d_world,
+            "object_center": object_center,
+        }
+
+    # ---------- Task Completion Verifier ----------
+    def is_success(self) -> bool:
+        raise NotImplementedError(
+            "BaseSimulator.is_success() should be implemented in subclass."
+        )
+
+    # ---------- Data Recording & Saving & Clearing ----------
+    def record_data(self, obs: Dict[str, torch.Tensor]):
+        self.save_dict["rgb"].append(obs["rgb"].cpu().numpy())  # [B,H,W,3]
+        self.save_dict["depth"].append(obs["depth"].cpu().numpy())  # [B,H,W]
+        self.save_dict["segmask"].append(obs["fg_mask"].cpu().numpy())  # [B,H,W]
+        self.save_dict["joint_pos"].append(obs["joint_pos"].cpu().numpy())  # [B,nJ]
+        self.save_dict["gripper_pos"].append(obs["gripper_pos"].cpu().numpy())  # [B,3]
+        self.save_dict["gripper_cmd"].append(obs["gripper_cmd"].cpu().numpy())  # [B,1]
+        self.save_dict["joint_vel"].append(obs["joint_vel"].cpu().numpy())
+
+    def clear_data(self):
+        for key in self.save_dict.keys():
+            self.save_dict[key] = []
+
+    def _demo_dir(self) -> Path:
+        return self.out_dir / self.img_folder / "demos" / f"demo_{self.demo_id}"
+
+    def _env_dir(self, base: Path, b: int) -> Path:
+        d = base / f"env_{b:03d}"
+        d.mkdir(parents=True, exist_ok=True)
+        return d
+
+    def save_data(self, ignore_keys: List[str] = []):
+        save_root = self._demo_dir()
+        save_root.mkdir(parents=True, exist_ok=True)
+
+        stacked = {k: np.array(v) for k, v in self.save_dict.items()}
+
+        for b in range(self.num_envs):
+            env_dir = self._env_dir(save_root, b)
+            for key, arr in stacked.items():
+                if key in ignore_keys:  # skip the keys for storage
+                    continue
+                if key == "rgb":
+                    video_path = env_dir / "sim_video.mp4"
+                    writer = imageio.get_writer(
+                        video_path, fps=50, macro_block_size=None
+                    )
+                    for t in range(arr.shape[0]):
+                        writer.append_data(arr[t, b])
+                    writer.close()
+                elif key == "segmask":
+                    video_path = env_dir / "mask_video.mp4"
+                    writer = imageio.get_writer(
+                        video_path, fps=50, macro_block_size=None
+                    )
+                    for t in range(arr.shape[0]):
+                        writer.append_data((arr[t, b].astype(np.uint8) * 255))
+                    writer.close()
+                elif key == "depth":
+                    depth_seq = arr[:, b]
+                    flat = depth_seq[depth_seq > 0]
+                    max_depth = np.percentile(flat, 99) if flat.size > 0 else 1.0
+                    depth_norm = np.clip(depth_seq / max_depth * 255.0, 0, 255).astype(
+                        np.uint8
+                    )
+                    video_path = env_dir / "depth_video.mp4"
+                    writer = imageio.get_writer(
+                        video_path, fps=50, macro_block_size=None
+                    )
+                    for t in range(depth_norm.shape[0]):
+                        writer.append_data(depth_norm[t])
+                    writer.close()
+                    np.save(env_dir / f"{key}.npy", depth_seq)
+                else:
+                    np.save(env_dir / f"{key}.npy", arr[:, b])
+            json.dump(self.sim_cfgs, open(env_dir / "config.json", "w"), indent=2)
+        
+        print("[INFO]: Demonstration is saved at: ", save_root)
+        
+        # Compose real videos for all environments
+        print("\n[INFO] Composing real videos with background...")
+        for b in range(self.num_envs):
+            print(f"[INFO] Processing environment {b}/{self.num_envs}...")
+            success = self.compose_real_video(env_id=b)
+            if success:
+                print(f"[INFO] Real video composed successfully for env {b}")
+            else:
+                print(f"[WARN] Failed to compose real video for env {b}")
+
+        demo_root = self.out_dir / "all_demos"
+        demo_root.mkdir(parents=True, exist_ok=True)
+        total_demo_id = get_next_demo_id(demo_root)
+        demo_save_path = demo_root / f"demo_{total_demo_id}"
+        demo_save_path.mkdir(parents=True, exist_ok=True)
+        meta_info = {
+            "path": str(save_root),
+            "fps": 50,
+        }
+        with open(demo_save_path / "meta_info.json", "w") as f:
+            json.dump(meta_info, f)
+        os.system(f"cp -r {save_root}/* {demo_save_path}")
+        print("[INFO]: Demonstration is saved at: ", demo_save_path)
+
+    def delete_data(self):
+        save_path = self._demo_dir()
+        failure_root = self.out_dir / self.img_folder / "demos_failures"
+        failure_root.mkdir(parents=True, exist_ok=True)
+        fail_demo_id = get_next_demo_id(failure_root)
+        failure_path = failure_root / f"demo_{fail_demo_id}"
+        os.system(f"mv {save_path} {failure_path}")
+        for key in self.save_dict.keys():
+            self.save_dict[key] = []
+        print("[INFO]: Clear up the folder: ", save_path)
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_env_factory.py b/openreal2sim/simulation/isaaclab/data_collection/sim_env_factory.py
new file mode 100755
index 0000000..073668b
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_env_factory.py
@@ -0,0 +1,573 @@
+# -*- coding: utf-8 -*-
+"""
+Isaac Lab-based simulation environment factory.
+"""
+
+from __future__ import annotations
+import copy
+import json
+import random
+from pathlib import Path
+from typing import Tuple, Optional, List, Dict
+
+import numpy as np
+import transforms3d
+
+# Isaac Lab core
+from isaaclab.utils import configclass
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.assets import AssetBaseCfg, RigidObjectCfg
+from isaaclab.sim.schemas import schemas_cfg
+import isaaclab.sim as sim_utils
+from isaaclab.sensors.camera import CameraCfg
+from isaaclab_assets import FRANKA_PANDA_HIGH_PD_CFG
+
+# Manager-based API (terms/configs)
+from isaaclab.managers import (
+    TerminationTermCfg as DoneTerm,
+    EventTermCfg as EventTerm,
+    ObservationGroupCfg as ObsGroup,
+    ObservationTermCfg as ObsTerm,
+    RewardTermCfg as RewTerm,
+    CurriculumTermCfg as CurrTerm,
+    SceneEntityCfg,
+)
+
+# Task-specific MDP helpers (adjust path if needed)
+import isaaclab_tasks.manager_based.manipulation.lift.mdp as mdp
+
+from dataclasses import dataclass, MISSING
+
+from dataclasses import dataclass
+from typing import Optional, List, Dict
+
+
+@dataclass
+class SceneCtx:
+    cam_dict: Dict
+    obj_paths: List[str]
+    background_path: str
+    robot_pos: List[float]
+    robot_rot: List[float]
+    bg_physics: Optional[Dict] = None
+    obj_physics: List[Dict] = None
+    use_ground_plane: bool = False
+    ground_z: Optional[float] = None
+
+
+_SCENE_CTX: Optional[SceneCtx] = None
+
+# ---- default physx presets ----
+DEFAULT_BG_PHYSICS = {
+    "mass_props": {"mass": 100.0},
+    "rigid_props": {"disable_gravity": True, "kinematic_enabled": True},
+    "collision_props": {
+        "collision_enabled": True,
+        "contact_offset": 0.0015,
+        "rest_offset": 0.0003,
+        "torsional_patch_radius": 0.02,
+        "min_torsional_patch_radius": 0.005,
+    },
+}
+DEFAULT_OBJ_PHYSICS = {
+    "mass_props": {"mass": 0.5},
+    "rigid_props": {"disable_gravity": False, "kinematic_enabled": False},
+    "collision_props": {
+        "collision_enabled": True,
+        "contact_offset": 0.0015,
+        "rest_offset": 0.0003,
+        "torsional_patch_radius": 0.02,
+        "min_torsional_patch_radius": 0.005,
+    },
+}
+
+
+def _deep_update(dst: dict, src: dict | None) -> dict:
+    """Recursive dict update without touching the original."""
+    out = copy.deepcopy(dst)
+    if not src:
+        return out
+    for k, v in src.items():
+        if isinstance(v, dict) and isinstance(out.get(k), dict):
+            out[k] = _deep_update(out[k], v)
+        else:
+            out[k] = v
+    return out
+
+
+# --------------------------------------------------------------------------------------
+# Camera / Robot builders (use _SCENE_CTX)
+# --------------------------------------------------------------------------------------
+def create_camera():
+    """Return a CameraCfg using the global SceneCtx."""
+    assert _SCENE_CTX is not None, (
+        "init_scene_configs/init_scene_from_scene_dict must be called first."
+    )
+    C = _SCENE_CTX.cam_dict
+    width = int(C["width"])
+    height = int(C["height"])
+    fx, fy, cx, cy = C["fx"], C["fy"], C["cx"], C["cy"]
+    cam_orientation = tuple(C["cam_orientation"])
+    cam_pos = tuple(C["scene_info"]["move_to"])
+    return CameraCfg(
+        prim_path="{ENV_REGEX_NS}/Camera",
+        offset=CameraCfg.OffsetCfg(pos=cam_pos, rot=cam_orientation, convention="ros"),
+        data_types=[
+            "rgb",
+            "distance_to_image_plane",
+            "distance_to_camera",
+            "instance_id_segmentation_fast",
+        ],
+        colorize_instance_id_segmentation=False,
+        spawn=sim_utils.PinholeCameraCfg.from_intrinsic_matrix(
+            intrinsic_matrix=[fx, 0, cx, 0, fy, cy, 0, 0, 1],
+            width=width,
+            height=height,
+        ),
+        width=width,
+        height=height,
+    )
+
+
+def create_robot():
+    """Return a configured Franka Panda config using the global SceneCtx."""
+    assert _SCENE_CTX is not None, (
+        "init_scene_configs/init_scene_from_scene_dict must be called first."
+    )
+    robot = FRANKA_PANDA_HIGH_PD_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+    robot.init_state.pos = tuple(_SCENE_CTX.robot_pos)
+    robot.init_state.rot = tuple(_SCENE_CTX.robot_rot)
+    return robot
+
+
+# --------------------------------------------------------------------------------------
+# Dynamic InteractiveSceneCfg builder
+# --------------------------------------------------------------------------------------
+def build_tabletop_scene_cfg():
+    """
+    Auto-generate a multi-object InteractiveSceneCfg subclass:
+      - background, camera, robot
+      - object_00, object_01, ... based on _SCENE_CTX.obj_paths
+    """
+    assert _SCENE_CTX is not None, (
+        "init_scene_configs/init_scene_from_scene_dict must be called first."
+    )
+    C = _SCENE_CTX
+
+    base_attrs = {}
+
+    # Light
+    base_attrs["light"] = AssetBaseCfg(
+        prim_path="/World/lightDome",
+        spawn=sim_utils.DomeLightCfg(intensity=4000.0, color=(1.0, 1.0, 1.0)),
+    )
+
+    _bg = _deep_update(DEFAULT_BG_PHYSICS, C.bg_physics)
+    _objs = [
+        _deep_update(DEFAULT_OBJ_PHYSICS, obj_physics) for obj_physics in C.obj_physics
+    ]
+
+    bg_mass_cfg = schemas_cfg.MassPropertiesCfg(**_bg["mass_props"])
+    bg_rigid_cfg = schemas_cfg.RigidBodyPropertiesCfg(**_bg["rigid_props"])
+    bg_colli_cfg = schemas_cfg.CollisionPropertiesCfg(**_bg["collision_props"])
+
+    # add another ground plane (mainly for better visualization)
+    base_attrs["backgroundn"] = AssetBaseCfg(
+        prim_path="/World/defaultGroundPlane",
+        spawn=sim_utils.GroundPlaneCfg(),
+    )
+    z = float(C.ground_z if C.ground_z is not None else 0.0)
+    base_attrs["backgroundn"].init_state.pos = (0.0, 0.0, z - 0.2)
+
+    # ---------- Background ----------
+    if C.use_ground_plane:
+        # Simple horizontal ground; only z is customized.
+        # Note: GroundPlaneCfg doesn't take mass/rigid/collision configs (it's a helper),
+        #       so we only set pose here.
+        base_attrs["background"] = AssetBaseCfg(
+            prim_path="/World/defaultGroundPlane",
+            spawn=sim_utils.GroundPlaneCfg(),
+        )
+        z = float(C.ground_z if C.ground_z is not None else 0.0)
+        base_attrs["background"].init_state.pos = (0.0, 0.0, z)
+        # no usd_path assignment in __post_init__ when using ground
+    else:
+        # original USD background
+        base_attrs["background"] = RigidObjectCfg(
+            prim_path="{ENV_REGEX_NS}/Background",
+            spawn=sim_utils.UsdFileCfg(
+                usd_path="",
+                mass_props=bg_mass_cfg,
+                rigid_props=bg_rigid_cfg,
+                collision_props=bg_colli_cfg,
+            ),
+        )
+
+    # Placeholder entries to be replaced in __post_init__
+    base_attrs["camera"] = CameraCfg(prim_path="{ENV_REGEX_NS}/Camera")
+    base_attrs["robot"] = FRANKA_PANDA_HIGH_PD_CFG.replace(
+        prim_path="{ENV_REGEX_NS}/Robot"
+    )
+
+    # Instantiate objects
+    for i, usd_path in enumerate(C.obj_paths):
+        obj_mass_cfg = schemas_cfg.MassPropertiesCfg(**_objs[i]["mass_props"])
+        obj_rigid_cfg = schemas_cfg.RigidBodyPropertiesCfg(**_objs[i]["rigid_props"])
+        obj_colli_cfg = schemas_cfg.CollisionPropertiesCfg(
+            **_objs[i]["collision_props"]
+        )
+
+        obj_template = RigidObjectCfg(
+            prim_path="{ENV_REGEX_NS}/Object",
+            spawn=sim_utils.UsdFileCfg(
+                usd_path="",
+                mass_props=obj_mass_cfg,
+                rigid_props=obj_rigid_cfg,
+                collision_props=obj_colli_cfg,
+            ),
+        )
+
+        name = f"object_{i:02d}"
+        cfg_i = copy.deepcopy(obj_template)
+        cfg_i.prim_path = f"{{ENV_REGEX_NS}}/{name}"
+        cfg_i.spawn.usd_path = usd_path
+        base_attrs[name] = cfg_i
+
+    # Inject background path + replace camera/robot on finalize
+    def __post_init__(self):
+        if not C.use_ground_plane:
+            self.background.spawn.usd_path = C.background_path
+        self.camera = create_camera()
+        self.robot = create_robot()
+
+    attrs = dict(base_attrs)
+    attrs["__doc__"] = "Auto-generated multi-object TableTop scene cfg."
+    attrs["__post_init__"] = __post_init__
+
+    DynamicSceneCfg = configclass(
+        type("TableTopSceneCfgAuto", (InteractiveSceneCfg,), attrs)
+    )
+    return DynamicSceneCfg
+
+
+# --------------------------------------------------------------------------------------
+# Build cam_dict & init directly from a raw scene dict
+# --------------------------------------------------------------------------------------
+def init_scene_from_scene_dict(
+    scene: dict,
+    cfgs: dict,
+    *,
+    use_ground_plane: bool = False,
+):
+    """
+    Initialize SceneCtx directly from a raw scene dict.
+    If robot pose not provided, sample one with robot_placement_candidates_v2().
+    """
+    cam_dict = cfgs["cam_cfg"]
+    obj_paths = [o["usd"] for o in scene["objects"].values()]
+    background_path = scene["background"]["usd"]
+
+    # overwrite physics
+    # Priority: args > scene > default
+    obj_physics = cfgs["physics_cfg"]["obj_physics"]
+    bg_physics = cfgs["physics_cfg"]["bg_physics"]
+    if obj_physics is None:
+        obj_physics = [o.get("physics", None) for o in scene["objects"].values()]
+    elif isinstance(obj_physics, dict):
+        obj_physics = [obj_physics for _ in scene["objects"].values()]
+    elif isinstance(obj_physics, list):
+        assert len(obj_physics) == len(scene["objects"]), (
+            "obj_physics must be a list of the same length as scene['objects'] if provided."
+        )
+        pass
+    else:
+        raise TypeError("obj_physics must be None, a dict, or a list of dicts.")
+    bg_physics = (
+        scene["background"].get("physics", None) if bg_physics is None else bg_physics
+    )
+
+    robot_pos = cfgs["robot_cfg"]["robot_pose"][:3]
+    robot_rot = cfgs["robot_cfg"]["robot_pose"][3:]
+
+    ground_z = None
+    if use_ground_plane:
+        try:
+            ground_z = float(scene["plane"]["simulation"]["point"][2])
+        except Exception as e:
+            raise ValueError(
+                f"use_ground_plane=True but scene['plane']['simulation'] missing/invalid: {e}"
+            )
+
+    # write global ctx (keep old fields the same)
+    global _SCENE_CTX
+    _SCENE_CTX = SceneCtx(
+        cam_dict=cam_dict,
+        obj_paths=obj_paths,
+        background_path=background_path,
+        robot_pos=list(robot_pos),
+        robot_rot=list(robot_rot),
+        bg_physics=bg_physics,
+        obj_physics=list(obj_physics),
+        use_ground_plane=use_ground_plane,
+        ground_z=ground_z,
+    )
+
+    return {
+        "cam_dict": cam_dict,
+        "obj_usd_paths": obj_paths,
+        "background_usd": background_path,
+        "robot_pos": robot_pos,
+        "robot_rot": robot_rot,
+        "use_ground_plane": use_ground_plane,
+        "ground_z": ground_z,
+    }
+
+
+# --------------------------------------------------------------------------------------
+# Env factory
+# --------------------------------------------------------------------------------------
+def _build_manip_env_cfg(scene_cfg_cls, *, num_envs: int, env_spacing: float = 2.5):
+    """Return a ManagerBasedRLEnvCfg subclass stitched together from sub-Cfgs."""
+    from isaaclab.envs import ManagerBasedRLEnvCfg
+    from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg
+    from isaaclab.envs.mdp.actions.actions_cfg import (
+        DifferentialInverseKinematicsActionCfg,
+    )
+
+    @configclass
+    class ManipEnvCfg(ManagerBasedRLEnvCfg):
+        scene = scene_cfg_cls(num_envs=num_envs, env_spacing=env_spacing)
+        commands = CommandsCfg()
+        actions = ActionsCfg()
+        observations = ObservationsCfg()
+        events = EventCfg()
+        rewards = RewardsCfg()
+        terminations = TerminationsCfg()
+        curriculum = CurriculumCfg()
+
+        def __post_init__(self):
+            # ---- Sim & PhysX ----
+            self.decimation = 2  # 4
+            self.episode_length_s = 5.0
+            self.sim.dt = 0.01
+            self.sim.render_interval = self.decimation
+
+            physx = self.sim.physx
+            physx.enable_ccd = True
+            physx.solver_type = 1  # TGS
+            physx.num_position_iterations = 16
+            physx.num_velocity_iterations = 2
+            physx.contact_offset = 0.003
+            physx.rest_offset = 0.0
+            physx.max_depenetration_velocity = 0.5
+            physx.enable_stabilization = True
+            physx.enable_sleeping = True
+
+            # ---- IK arm & binary gripper ----
+            self.actions.arm_action = DifferentialInverseKinematicsActionCfg(
+                asset_name="robot",
+                joint_names=["panda_joint.*"],
+                body_name="panda_hand",
+                controller=DifferentialIKControllerCfg(
+                    command_type="pose", use_relative_mode=True, ik_method="dls"
+                ),
+                scale=0.5,
+                body_offset=DifferentialInverseKinematicsActionCfg.OffsetCfg(
+                    pos=[0.0, 0.0, 0.107]
+                ),
+            )
+            self.actions.gripper_action = mdp.BinaryJointPositionActionCfg(
+                asset_name="robot",
+                joint_names=["panda_finger.*"],
+                open_command_expr={"panda_finger_.*": 0.04},
+                close_command_expr={"panda_finger_.*": 0.0},
+            )
+            self.commands.object_pose.body_name = "panda_hand"
+
+    return ManipEnvCfg
+
+
+def load_scene_json(key: str) -> dict:
+    """Return the raw scene dict from outputs/<key>/simulation/scene.json."""
+    scene_path = Path.cwd() / "outputs" / key / "simulation" / "scene.json"
+    if not scene_path.exists():
+        raise FileNotFoundError(scene_path)
+    return json.load(open(scene_path))
+
+
+def make_env(
+    cfgs: dict,
+    num_envs: int = 1,
+    device: str = "cuda:0",
+    bg_simplify: bool = False,
+) -> Tuple["ManagerBasedRLEnv", "ManagerBasedRLEnvCfg"]:
+    """
+    Public entry to construct a ManagerBasedRLEnv from outputs/<key>/simulation/scene.json.
+    Returns: (env, env_cfg)
+    """
+    from isaaclab.envs import ManagerBasedRLEnv
+
+    # Load scene json and initialize global SceneCtx
+    scene = cfgs["scene_cfg"]
+    init_scene_from_scene_dict(
+        scene,
+        cfgs=cfgs,
+        use_ground_plane=bg_simplify,
+    )
+
+    # Build scene & env cfg
+    SceneCfg = build_tabletop_scene_cfg()
+    ManipEnvCfg = _build_manip_env_cfg(SceneCfg, num_envs=num_envs, env_spacing=2.5)
+    env_cfg = ManipEnvCfg()
+    env_cfg.sim.device = device
+    env_cfg.scene.num_envs = num_envs  # double safety
+
+    env = ManagerBasedRLEnv(cfg=env_cfg)
+    return env, env_cfg
+
+
+# --------------------------------------------------------------------------------------
+# Observation/Action/Reward/Termination/Curriculum config classes
+# --------------------------------------------------------------------------------------
+@configclass
+class CommandsCfg:
+    """Command terms for the MDP."""
+
+    object_pose = mdp.UniformPoseCommandCfg(
+        asset_name="robot",
+        body_name=MISSING,
+        resampling_time_range=(5.0, 5.0),
+        debug_vis=False,
+        ranges=mdp.UniformPoseCommandCfg.Ranges(
+            pos_x=(0.4, 0.6),
+            pos_y=(-0.25, 0.25),
+            pos_z=(0.25, 0.5),
+            roll=(0.0, 0.0),
+            pitch=(0.0, 0.0),
+            yaw=(0.0, 0.0),
+        ),
+    )
+
+
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+
+    arm_action: (
+        mdp.JointPositionActionCfg | mdp.DifferentialInverseKinematicsActionCfg
+    ) = MISSING  # type: ignore
+    gripper_action: mdp.BinaryJointPositionActionCfg = MISSING  # type: ignore
+
+
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel)
+        joint_pos = ObsTerm(func=mdp.joint_pos)
+        joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel)
+        joint_vel = ObsTerm(func=mdp.joint_vel)
+        target_object_position = ObsTerm(
+            func=mdp.generated_commands, params={"command_name": "object_pose"}
+        )
+        actions = ObsTerm(func=mdp.last_action)
+        rgb = ObsTerm(
+            func=mdp.image,
+            params={
+                "sensor_cfg": SceneEntityCfg(name="camera"),
+                "data_type": "rgb",
+                "convert_perspective_to_orthogonal": False,
+                "normalize": False,
+            },
+        )
+        depth = ObsTerm(
+            func=mdp.image,
+            params={
+                "sensor_cfg": SceneEntityCfg(name="camera"),
+                "data_type": "distance_to_image_plane",
+                "convert_perspective_to_orthogonal": False,
+                "normalize": False,
+            },
+        )
+        segmask = ObsTerm(
+            func=mdp.image,
+            params={
+                "sensor_cfg": SceneEntityCfg(name="camera"),
+                "data_type": "instance_id_segmentation_fast",
+                "convert_perspective_to_orthogonal": False,
+                "normalize": False,
+            },
+        )
+
+        def __post_init__(self):
+            self.enable_corruption = True
+            self.concatenate_terms = False
+
+    policy: PolicyCfg = PolicyCfg()
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+
+    reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")
+
+
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP (kept simple)."""
+
+    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-1e-4)
+    joint_vel = RewTerm(
+        func=mdp.joint_vel_l2,
+        weight=-1e-4,
+        params={"asset_cfg": SceneEntityCfg("robot")},
+    )
+
+
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+
+
+@configclass
+class CurriculumCfg:
+    """Curriculum terms for the MDP."""
+
+    action_rate = CurrTerm(
+        func=mdp.modify_reward_weight,
+        params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000},
+    )
+    joint_vel = CurrTerm(
+        func=mdp.modify_reward_weight,
+        params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000},
+    )
+
+
+# Public symbols
+__all__ = [
+    # Scene init
+    "init_scene_configs",
+    "init_scene_from_scene_dict",
+    # Scene builders
+    "create_camera",
+    "create_robot",
+    "build_tabletop_scene_cfg",
+    # Placement samplers
+    "robot_placement_candidates",
+    "robot_placement_candidates_v2",
+    # Manager config groups
+    "CommandsCfg",
+    "ActionsCfg",
+    "ObservationsCfg",
+    "EventCfg",
+    "RewardsCfg",
+    "TerminationsCfg",
+    "CurriculumCfg",
+    # Env factory
+    "make_env",
+]
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py
new file mode 100755
index 0000000..8f29fb3
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py
@@ -0,0 +1,787 @@
+"""
+Heuristic manipulation policy in Isaac Lab simulation.
+Using grasping and motion planning to perform object manipulation tasks.
+"""
+from __future__ import annotations
+
+# ───────────────────────────────────────────────────────────────────────────── AppLauncher ─────────────────────────────────────────────────────────────────────────────
+import argparse, os, json, random, transforms3d
+from pathlib import Path
+import numpy as np
+import torch
+import yaml
+from isaaclab.app import AppLauncher
+
+# ───────────────────────────────────────────────────────────────────────────── CLI ─────────────────────────────────────────────────────────────────────────────
+parser = argparse.ArgumentParser("sim_policy")
+parser.add_argument("--key", type=str, default="demo_video", help="scene key (outputs/<key>)")
+parser.add_argument("--robot", type=str, default="franka")
+parser.add_argument("--num_envs", type=int, default=1)
+parser.add_argument("--num_trials", type=int, default=1)
+parser.add_argument("--teleop_device", type=str, default="keyboard")
+parser.add_argument("--sensitivity", type=float, default=1.0)
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+args_cli.enable_cameras = True
+args_cli.headless = False  # headless mode for batch execution
+app_launcher = AppLauncher(vars(args_cli))
+simulation_app = app_launcher.app
+
+# ───────────────────────────────────────────────────────────────────────────── Runtime imports ─────────────────────────────────────────────────────────────────────────────
+import isaaclab.sim as sim_utils
+from isaaclab.utils.math import subtract_frame_transforms
+
+from graspnetAPI.grasp import GraspGroup
+
+
+# ───────────────────────────────────────────────────────────────────────────── Simulation environments ─────────────────────────────────────────────────────────────────────────────
+from sim_base import BaseSimulator, get_next_demo_id
+from sim_env_factory import make_env
+from sim_preprocess.grasp_utils import get_best_grasp_with_hints
+from sim_utils.transform_utils import pose_to_mat, mat_to_pose, grasp_to_world, grasp_approach_axis_batch
+from sim_utils.sim_utils import load_sim_parameters
+
+BASE_DIR   = Path.cwd()
+img_folder = args_cli.key
+out_dir    = BASE_DIR / "outputs"
+
+
+# ───────────────────────────────────────────────────────────────────────────── Heuristic Manipulation ───────────────────────────────────────────────────────────────────────────── 
+class HeuristicManipulation(BaseSimulator):
+    """
+    Physical trial-and-error grasping with approach-axis perturbation:
+      â€¢ Multiple grasp proposals executed in parallel;
+      â€¢ Every attempt does reset â†’ pre â†’ grasp â†’ close â†’ lift â†’ check;
+      â€¢ Early stops when any env succeeds; then re-exec for logging.
+    """
+    def __init__(self, sim, scene, sim_cfgs: dict):
+        robot_pose = torch.tensor(
+            sim_cfgs["robot_cfg"]["robot_pose"],
+            dtype=torch.float32,
+            device=sim.device
+        )
+        super().__init__(
+            sim=sim, sim_cfgs=sim_cfgs, scene=scene, args=args_cli,
+            robot_pose=robot_pose, cam_dict=sim_cfgs["cam_cfg"],
+            out_dir=out_dir, img_folder=img_folder,
+            enable_motion_planning=True,
+            set_physics_props=True, debug_level=0,
+        )
+
+        self.selected_object_id = sim_cfgs["demo_cfg"]["manip_object_id"]
+        self.traj_key = sim_cfgs["demo_cfg"]["traj_key"]
+        self.traj_path = sim_cfgs["demo_cfg"]["traj_path"]
+        self.goal_offset = [0, 0, sim_cfgs["demo_cfg"]["goal_offset"]]
+        self.grasp_path = sim_cfgs["demo_cfg"]["grasp_path"]
+        self.grasp_idx = sim_cfgs["demo_cfg"]["grasp_idx"]
+        self.grasp_pre = sim_cfgs["demo_cfg"]["grasp_pre"]
+        self.grasp_delta = sim_cfgs["demo_cfg"]["grasp_delta"]
+        self.load_obj_goal_traj()
+
+    def load_obj_goal_traj(self):
+        """
+        Load the relative trajectory Î”_w (T,4,4) and precompute the absolute
+        object goal trajectory for each env using the *actual current* object pose
+        in the scene as T_obj_init (not env_origin).
+          T_obj_goal[t] = Î”_w[t] @ T_obj_init
+
+        Sets:
+          self.obj_rel_traj   : np.ndarray or None, shape (T,4,4)
+          self.obj_goal_traj_w: np.ndarray or None, shape (B,T,4,4)
+        """
+        # â€”â€” 1) Load Î”_w â€”â€”
+        rel = np.load(self.traj_path).astype(np.float32)
+        self.obj_rel_traj = rel  # (T,4,4)
+
+        self.reset()
+
+        # â€”â€” 2) Read current object initial pose per env as T_obj_init â€”â€”
+        B = self.scene.num_envs
+        # obj_state = self.object_prim.data.root_com_state_w[:, :7]  # [B,7], pos(3)+quat(wxyz)(4)
+        obj_state = self.object_prim.data.root_state_w[:, :7]  # [B,7], pos(3)+quat(wxyz)(4)
+        self.show_goal(obj_state[:, :3], obj_state[:, 3:7])
+
+        obj_state_np = obj_state.detach().cpu().numpy().astype(np.float32)
+        offset_np = np.asarray(self.goal_offset, dtype=np.float32).reshape(3)
+        obj_state_np[:, :3] += offset_np  # raise a bit to avoid collision
+
+        # Note: here the relative traj Î”_w is defined in world frame with origin (0,0,0),
+        # Hence, we need to normalize it to each env's origin frame.
+        origins = self.scene.env_origins.detach().cpu().numpy().astype(np.float32)  # (B,3)
+        obj_state_np[:, :3] -= origins # normalize to env origin frame
+
+        # â€”â€” 3) Precompute absolute object goals for all envs â€”â€”
+        T = rel.shape[0]
+        obj_goal = np.zeros((B, T, 4, 4), dtype=np.float32)
+        for b in range(B):
+            T_init = pose_to_mat(obj_state_np[b, :3], obj_state_np[b, 3:7])  # (4,4)
+            for t in range(T):
+                goal = rel[t] @ T_init
+                goal[:3, 3] += origins[b]  # back to world frame
+                obj_goal[b, t] = goal
+
+        self.obj_goal_traj_w = obj_goal  # [B, T, 4, 4]
+
+    def follow_object_goals(self, start_joint_pos, sample_step=1, visualize=True):
+        """
+        Follow precomputed object absolute trajectory with automatic restart on failure.
+        If motion planning fails, raises an exception to trigger re-grasp from step 0.
+        """
+        B = self.scene.num_envs
+        obj_goal_all = self.obj_goal_traj_w  # [B, T, 4, 4]
+        T = obj_goal_all.shape[1]
+
+        ee_w  = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7]  # [B,7]
+        obj_w = self.object_prim.data.root_state_w[:, :7]
+
+        T_ee_in_obj = []
+        for b in range(B):
+            T_ee_w  = pose_to_mat(ee_w[b, :3],  ee_w[b, 3:7])
+            T_obj_w = pose_to_mat(obj_w[b, :3], obj_w[b, 3:7])
+            T_ee_in_obj.append((np.linalg.inv(T_obj_w) @ T_ee_w).astype(np.float32))
+
+        joint_pos = start_joint_pos
+        root_w = self.robot.data.root_state_w[:, 0:7]
+
+        t_iter = list(range(0, T, sample_step))
+        t_iter = t_iter + [T-1] if t_iter[-1] != T-1 else t_iter
+
+        for t in t_iter:
+            goal_pos_list, goal_quat_list = [], []
+            print(f"[INFO] follow object goal step {t}/{T}")
+            for b in range(B):
+                T_obj_goal = obj_goal_all[b, t]
+                T_ee_goal  = T_obj_goal @ T_ee_in_obj[b]
+                pos_b, quat_b = mat_to_pose(T_ee_goal)
+                goal_pos_list.append(pos_b.astype(np.float32))
+                goal_quat_list.append(quat_b.astype(np.float32))
+
+            goal_pos  = torch.as_tensor(np.stack(goal_pos_list),  dtype=torch.float32, device=self.sim.device)
+            goal_quat = torch.as_tensor(np.stack(goal_quat_list), dtype=torch.float32, device=self.sim.device)
+
+            if visualize:
+                self.show_goal(goal_pos, goal_quat)
+            ee_pos_b, ee_quat_b = subtract_frame_transforms(
+                root_w[:, :3], root_w[:, 3:7], goal_pos, goal_quat
+            )
+            
+            joint_pos, success = self.move_to(ee_pos_b, ee_quat_b, gripper_open=False)
+            
+            # Check for critical motion planning failure
+            if joint_pos is None or success is None:
+                print(f"[CRITICAL] Motion planning failed at step {t}/{T}")
+                print("[CRITICAL] Object likely dropped - need to restart from grasp")
+                raise RuntimeError("MotionPlanningFailure_RestartNeeded")
+            
+            if torch.any(success == False):
+                print(f"[WARN] Some envs failed motion planning at step {t}/{T}, but continuing...")
+            
+            self.save_dict["actions"].append(np.concatenate([ee_pos_b.cpu().numpy(), ee_quat_b.cpu().numpy(), np.ones((B, 1))], axis=1))
+
+        joint_pos = self.wait(gripper_open=True, steps=10)
+        return joint_pos
+
+
+    def viz_object_goals(self, sample_step=1, hold_steps=20):
+        self.reset()
+        self.wait(gripper_open=True, steps=10)
+        B = self.scene.num_envs
+        env_ids = torch.arange(B, device=self.object_prim.device, dtype=torch.long)
+        goals = self.obj_goal_traj_w
+        t_iter = list(range(0, goals.shape[1], sample_step))
+        t_iter = t_iter + [goals.shape[1]-1] if t_iter[-1] != goals.shape[1]-1 else t_iter
+        for t in t_iter:
+            print(f"[INFO] viz object goal step {t}/{goals.shape[1]}")
+            pos_list, quat_list = [], []
+            for b in range(B):
+                pos, quat = mat_to_pose(goals[b, t])
+                pos_list.append(pos.astype(np.float32))
+                quat_list.append(quat.astype(np.float32))
+            pose = self.object_prim.data.root_state_w[:, :7]
+            # pose = self.object_prim.data.root_com_state_w[:, :7]
+            pose[:, :3]   = torch.tensor(np.stack(pos_list),  dtype=torch.float32, device=pose.device)
+            pose[:, 3:7]  = torch.tensor(np.stack(quat_list), dtype=torch.float32, device=pose.device)
+            self.show_goal(pose[:, :3], pose[:, 3:7])
+
+            for _ in range(hold_steps):
+                self.object_prim.write_root_pose_to_sim(pose, env_ids=env_ids)
+                self.object_prim.write_data_to_sim()
+                self.step()
+
+    # ---------- Helpers ----------
+    def _to_base(self, pos_w: np.ndarray | torch.Tensor, quat_w: np.ndarray | torch.Tensor):
+        """World â†’ robot base frame for all envs."""
+        root = self.robot.data.root_state_w[:, 0:7]  # [B,7]
+        p_w, q_w = self._ensure_batch_pose(pos_w, quat_w)
+        pb, qb = subtract_frame_transforms(
+            root[:, 0:3], root[:, 3:7], p_w, q_w
+        )
+        return pb, qb  # [B,3], [B,4]
+
+    # ---------- Batched execution & lift-check ----------
+    def execute_and_lift_once_batch(self, info: dict, lift_height=0.06, position_threshold=0.1) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Reset → pre → grasp → close → lift → hold; return (success[B], score[B]).
+        Now propagates motion planning failures by returning (None, None).
+        """
+        B = self.scene.num_envs
+        self.reset()
+
+        # open gripper buffer
+        jp = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids]
+        self.wait(gripper_open=True, steps=4)
+
+        # pre-grasp
+        jp, success = self.move_to(info["pre_p_b"], info["pre_q_b"], gripper_open=True)
+        if jp is None or success is None:
+            print("[CRITICAL] Motion planning failed during pre-grasp in grasp trial")
+            return None, None
+        if torch.any(success==False): 
+            return np.zeros(B, bool), np.zeros(B, np.float32)
+        jp = self.wait(gripper_open=True, steps=3)
+
+        # grasp
+        jp, success = self.move_to(info["p_b"], info["q_b"], gripper_open=True)
+        if jp is None or success is None:
+            print("[CRITICAL] Motion planning failed during grasp approach in grasp trial")
+            return None, None
+        if torch.any(success==False): 
+            return np.zeros(B, bool), np.zeros(B, np.float32)
+        jp = self.wait(gripper_open=True, steps=2)
+
+        # close
+        jp = self.wait(gripper_open=False, steps=6)
+
+        # initial heights
+        obj0 = self.object_prim.data.root_com_pos_w[:, 0:3]
+        ee_w = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7]
+        ee_p0 = ee_w[:, :3]
+        robot_ee_z0 = ee_p0[:, 2].clone()
+        obj_z0 = obj0[:, 2].clone()
+        print(f"[INFO] mean object z0={obj_z0.mean().item():.3f} m, mean EE z0={robot_ee_z0.mean().item():.3f} m")  # ADD THIS BACK
+
+        # lift: keep orientation, add height
+        ee_q = ee_w[:, 3:7]
+        target_p = ee_p0.clone()
+        target_p[:, 2] += lift_height
+
+        root = self.robot.data.root_state_w[:, 0:7]
+        p_lift_b, q_lift_b = subtract_frame_transforms(
+            root[:, 0:3], root[:, 3:7],
+            target_p, ee_q
+        )
+        jp, success = self.move_to(p_lift_b, q_lift_b, gripper_open=False)
+        
+        # Check for motion planning failure
+        if jp is None or success is None:
+            print("[CRITICAL] Motion planning failed during lift in grasp trial")
+            return None, None
+        
+        if torch.any(success==False): 
+            return np.zeros(B, bool), np.zeros(B, np.float32)
+        jp = self.wait(gripper_open=False, steps=8)
+
+        # final heights and success checking
+        obj1 = self.object_prim.data.root_com_pos_w[:, 0:3]
+        ee_w1 = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7]
+        robot_ee_z1 = ee_w1[:, 2]
+        obj_z1 = obj1[:, 2]
+        print(f"[INFO] mean object z1={obj_z1.mean().item():.3f} m, mean EE z1={robot_ee_z1.mean().item():.3f} m")  # ADD THIS BACK
+
+        # Lift check
+        ee_diff  = robot_ee_z1 - robot_ee_z0
+        obj_diff = obj_z1 - obj_z0
+        lift_check = (torch.abs(ee_diff - obj_diff) <= 0.01) & \
+            (torch.abs(ee_diff) >= 0.5 * lift_height) & \
+            (torch.abs(obj_diff) >= 0.5 * lift_height)
+
+        # Goal proximity check
+        final_goal_matrices = self.obj_goal_traj_w[:, -1, :, :]
+        goal_positions_np = final_goal_matrices[:, :3, 3]
+        goal_positions = torch.tensor(goal_positions_np, dtype=torch.float32, device=self.sim.device)
+        current_obj_pos = self.object_prim.data.root_state_w[:, :3]
+        distances = torch.norm(current_obj_pos - goal_positions, dim=1)
+        proximity_check = distances <= position_threshold
+
+        # Combined check
+        lifted = lift_check & proximity_check
+
+        # Score calculation
+        score = torch.zeros_like(ee_diff)
+        if torch.any(lifted):
+            base_score = 1000.0
+            epsilon = 0.001
+            score[lifted] = base_score / (distances[lifted] + epsilon)
+        
+        # ADD THESE DEBUG PRINTS BACK
+        print(f"[INFO] Lift check passed: {lift_check.sum().item()}/{B}")
+        print(f"[INFO] Proximity check passed: {proximity_check.sum().item()}/{B}")
+        print(f"[INFO] Final lifted flag (both checks): {lifted.sum().item()}/{B}")
+        
+        if B <= 10:  # Detailed output for small batches
+            for b in range(B):
+                print(f"  Env {b}: lift={lift_check[b].item()}, prox={proximity_check[b].item()}, "
+                    f"dist={distances[b].item():.4f}m, score={score[b].item():.2f}")
+        else:
+            print(f"[INFO] Distance to goal - mean: {distances.mean().item():.4f}m, "
+                f"min: {distances.min().item():.4f}m, max: {distances.max().item():.4f}m")
+        
+        return lifted.detach().cpu().numpy().astype(bool), score.detach().cpu().numpy().astype(np.float32)
+
+    def build_grasp_info(
+        self,
+        grasp_pos_w_batch: np.ndarray,   # (B,3)  GraspNet proposal in world frame
+        grasp_quat_w_batch: np.ndarray,  # (B,4)  wxyz
+        pre_dist_batch: np.ndarray,      # (B,)
+        delta_batch: np.ndarray          # (B,)
+    ) -> dict:
+        """
+        return grasp info dict for all envs in batch.
+        """
+        B = self.scene.num_envs
+        p_w   = np.asarray(grasp_pos_w_batch,  dtype=np.float32).reshape(B, 3)
+        q_w   = np.asarray(grasp_quat_w_batch, dtype=np.float32).reshape(B, 4)
+        pre_d = np.asarray(pre_dist_batch,     dtype=np.float32).reshape(B)
+        delt  = np.asarray(delta_batch,        dtype=np.float32).reshape(B)
+
+        a_batch = grasp_approach_axis_batch(q_w)  # (B,3)
+
+        pre_p_w = (p_w - pre_d[:, None] * a_batch).astype(np.float32)
+        gra_p_w = (p_w + delt[:,  None] * a_batch).astype(np.float32)
+
+        origins = self.scene.env_origins.detach().cpu().numpy().astype(np.float32)  # (B,3)
+        pre_p_w = pre_p_w + origins
+        gra_p_w = gra_p_w + origins
+
+        pre_pb, pre_qb = self._to_base(pre_p_w, q_w)
+        gra_pb, gra_qb = self._to_base(gra_p_w, q_w)
+
+        return {
+            "pre_p_w": pre_p_w, "p_w": gra_p_w, "q_w": q_w,
+            "pre_p_b": pre_pb,  "pre_q_b": pre_qb,
+            "p_b": gra_pb,      "q_b": gra_qb,
+            "pre_dist": pre_d,  "delta": delt,
+        }
+
+
+    def grasp_trials(self, gg, std: float = 0.0005, max_retries: int = 3):
+        """
+        Try grasp proposals with automatic recovery from motion planning failures.
+        Returns dict with success flag and chosen grasp parameters.
+        
+        Args:
+            gg: GraspGroup with grasp proposals
+            std: Standard deviation for random perturbations
+            max_retries: Number of retry attempts if motion planning fails
+        """
+        B = self.scene.num_envs
+        idx_all = list(range(len(gg)))
+        if len(idx_all) == 0:
+            print("[ERR] empty grasp list.")
+            return {
+                "success": False,
+                "chosen_pose_w": None,
+                "chosen_pre": None,
+                "chosen_delta": None,
+            }
+
+        rng = np.random.default_rng()
+        pre_dist_const = 0.12  # m
+
+        # Retry loop for grasp selection phase
+        for retry_attempt in range(max_retries):
+            try:
+                print(f"\n{'='*60}")
+                print(f"GRASP SELECTION ATTEMPT {retry_attempt + 1}/{max_retries}")
+                print(f"{'='*60}\n")
+                
+                success = False
+                chosen_pose_w = None    # (p_w, q_w)
+                chosen_pre    = None
+                chosen_delta  = None
+
+                # Assign different grasp proposals to different envs
+                for start in range(0, len(idx_all), B):
+                    block = idx_all[start : start + B]
+                    if len(block) < B:
+                        block = block + [block[-1]] * (B - len(block))
+
+                    grasp_pos_w_batch, grasp_quat_w_batch = [], []
+                    for idx in block:
+                        p_w, q_w = grasp_to_world(gg[int(idx)])
+                        grasp_pos_w_batch.append(p_w.astype(np.float32))
+                        grasp_quat_w_batch.append(q_w.astype(np.float32))
+                    grasp_pos_w_batch  = np.stack(grasp_pos_w_batch,  axis=0)  # (B,3)
+                    grasp_quat_w_batch = np.stack(grasp_quat_w_batch, axis=0)  # (B,4)
+                    self.show_goal(grasp_pos_w_batch, grasp_quat_w_batch)
+                    
+                    # Random disturbance along approach axis
+                    pre_dist_batch = np.full((B,), pre_dist_const, dtype=np.float32)
+                    delta_batch    = rng.normal(0.0, std, size=(B,)).astype(np.float32)
+
+                    info = self.build_grasp_info(grasp_pos_w_batch, grasp_quat_w_batch,
+                                                pre_dist_batch, delta_batch)
+
+                    ok_batch, score_batch = self.execute_and_lift_once_batch(info)
+                    
+                    # Check if motion planning returned valid results
+                    if ok_batch is None or score_batch is None:
+                        print(f"[CRITICAL] Motion planning failed during grasp trial at block[{start}:{start+B}]")
+                        raise RuntimeError("GraspTrialMotionPlanningFailure")
+                    
+                    ok_cnt = int(ok_batch.sum())
+                    print(f"[SEARCH] block[{start}:{start+B}] -> success {ok_cnt}/{B}")
+                    
+                    if ok_cnt > 0:
+                        winner = int(np.argmax(score_batch))
+                        chosen_pose_w = (grasp_pos_w_batch[winner], grasp_quat_w_batch[winner])
+                        chosen_pre    = float(pre_dist_batch[winner])
+                        chosen_delta  = float(delta_batch[winner])
+                        success = True
+                        print(f"[SUCCESS] Found valid grasp on attempt {retry_attempt + 1}")
+                        return {
+                            "success": success,
+                            "chosen_pose_w": chosen_pose_w,
+                            "chosen_pre": chosen_pre,
+                            "chosen_delta": chosen_delta,
+                        }
+
+                # If we get here, no grasp succeeded in this attempt
+                if not success:
+                    print(f"[WARN] No proposal succeeded in grasp trial attempt {retry_attempt + 1}/{max_retries}")
+                    if retry_attempt < max_retries - 1:
+                        print("[INFO] Retrying grasp selection...")
+                        continue
+                    else:
+                        print("[ERR] No proposal succeeded to lift after all grasp trial attempts.")
+                        return {
+                            "success": False,
+                            "chosen_pose_w": None,
+                            "chosen_pre": None,
+                            "chosen_delta": None,
+                        }
+                        
+            except RuntimeError as e:
+                if "GraspTrialMotionPlanningFailure" in str(e):
+                    print(f"\n[RESTART] Motion planning failed during grasp trial on attempt {retry_attempt + 1}")
+                    print(f"[RESTART] Remaining grasp trial attempts: {max_retries - retry_attempt - 1}\n")
+                    
+                    # Clear corrupted data and continue to next retry
+                    self.clear_data()
+                    
+                    if retry_attempt < max_retries - 1:
+                        continue
+                    else:
+                        print("[ERR] Grasp trial failed after all retry attempts")
+                        return {
+                            "success": False,
+                            "chosen_pose_w": None,
+                            "chosen_pre": None,
+                            "chosen_delta": None,
+                        }
+                else:
+                    # Unknown error, re-raise
+                    raise
+                    
+            except Exception as e:
+                print(f"[ERROR] Unexpected error during grasp trial: {type(e).__name__}: {e}")
+                if retry_attempt < max_retries - 1:
+                    print(f"[ERROR] Attempting grasp trial retry {retry_attempt + 1}/{max_retries}...")
+                    self.clear_data()
+                    continue
+                else:
+                    return {
+                        "success": False,
+                        "chosen_pose_w": None,
+                        "chosen_pre": None,
+                        "chosen_delta": None,
+                    }
+        
+        # Fallback (should not reach here)
+        return {
+            "success": False,
+            "chosen_pose_w": None,
+            "chosen_pre": None,
+            "chosen_delta": None,
+        }
+    
+    def is_success(self, position_threshold: float = 0.05) -> bool:
+        """
+        Verify if the manipulation task succeeded by comparing final object position
+        with the goal position from the trajectory.
+        
+        Args:
+            position_threshold: Distance threshold in meters (default: 0.05m = 5cm)
+        
+        Returns:
+            bool: True if task succeeded, False otherwise
+        """
+        # Get the final goal position from the precomputed trajectory
+        # self.obj_goal_traj_w has shape [B, T, 4, 4]
+        B = self.scene.num_envs
+        final_goal_matrices = self.obj_goal_traj_w[:, -1, :, :]  # [B, 4, 4] - last timestep
+        
+        # Extract goal positions (translation part of the transform matrix)
+        goal_positions_np = final_goal_matrices[:, :3, 3]  # [B, 3] - xyz positions (numpy)
+        
+        # Convert to torch tensor
+        goal_positions = torch.tensor(goal_positions_np, dtype=torch.float32, device=self.sim.device)
+        
+        # Get current object positions
+        current_obj_pos = self.object_prim.data.root_state_w[:, :3]  # [B, 3]
+        
+        # Calculate distances
+        distances = torch.norm(current_obj_pos - goal_positions, dim=1)  # [B]
+        
+        # Check if all environments succeeded
+        success_mask = distances <= position_threshold
+        
+        # Print results for each environment
+        print("\n" + "="*50)
+        print("TASK VERIFICATION RESULTS")
+        print("="*50)
+        for b in range(B):
+            status = "SUCCESS" if success_mask[b] else "FAILURE"
+            print(f"Env {b}: {status} (distance: {distances[b].item():.4f}m, threshold: {position_threshold}m)")
+            print(f"  Goal position: [{goal_positions[b, 0].item():.3f}, {goal_positions[b, 1].item():.3f}, {goal_positions[b, 2].item():.3f}]")
+            print(f"  Final position: [{current_obj_pos[b, 0].item():.3f}, {current_obj_pos[b, 1].item():.3f}, {current_obj_pos[b, 2].item():.3f}]")
+        
+        # Overall result
+        all_success = torch.all(success_mask).item()
+        print("="*50)
+        if all_success:
+            print("TASK VERIFIER: SUCCESS - All environments completed successfully!")
+        else:
+            success_count = torch.sum(success_mask).item()
+            print(f"TASK VERIFIER: FAILURE - {success_count}/{B} environments succeeded")
+        print("="*50 + "\n")
+        
+        return all_success
+
+
+    def inference(self, std: float = 0.0, max_restart_attempts: int = 10, 
+              max_grasp_retries: int = 3, regrasp_after_failures: int = 3) -> bool:
+        """
+        Main function with automatic restart on motion planning failures.
+        Now has separate retry loops for:
+        1. Grasp selection phase (max_grasp_retries)
+        2. Trajectory following phase (max_restart_attempts)
+        3. Re-grasp after N consecutive trajectory failures (regrasp_after_failures)
+        """
+        B = self.scene.num_envs
+        
+        # Read grasp proposals (only once)
+        npy_path = self.grasp_path
+        if npy_path is None or (not os.path.exists(npy_path)):
+            print(f"[ERR] grasps npy not found: {npy_path}")
+            return False
+        gg = GraspGroup().from_npy(npy_file_path=npy_path)
+        gg = get_best_grasp_with_hints(gg, point=None, direction=[0, 0, -1])
+
+        # Track consecutive trajectory failures for re-grasping
+        consecutive_trajectory_failures = 0
+        total_grasp_attempts = 0
+        max_total_grasp_attempts = 5  # Prevent infinite re-grasping loop
+        
+        # ========== OUTER LOOP: RE-GRASP IF NEEDED ==========
+        while total_grasp_attempts < max_total_grasp_attempts:
+            total_grasp_attempts += 1
+            
+            # ========== PHASE 1: GRASP SELECTION (with retries) ==========
+            if self.grasp_idx >= 0:
+                # Fixed grasp index mode
+                if self.grasp_idx >= len(gg):
+                    print(f"[ERR] grasp_idx {self.grasp_idx} out of range [0,{len(gg)})")
+                    return False
+                print(f"[INFO] using fixed grasp index {self.grasp_idx} for all envs.")
+                p_w, q_w = grasp_to_world(gg[int(self.grasp_idx)])
+                ret = {
+                    "success": True,
+                    "chosen_pose_w": (p_w.astype(np.float32), q_w.astype(np.float32)),
+                    "chosen_pre": self.grasp_pre if self.grasp_pre is not None else 0.12,
+                    "chosen_delta": self.grasp_delta if self.grasp_delta is not None else 0.0,
+                }
+            else:
+                # Automatic grasp selection with retries
+                if total_grasp_attempts > 1:
+                    print(f"\n{'#'*60}")
+                    print(f"RE-GRASP ATTEMPT {total_grasp_attempts}/{max_total_grasp_attempts}")
+                    print(f"{'#'*60}\n")
+                ret = self.grasp_trials(gg, std=std, max_retries=max_grasp_retries)
+            
+            if not ret["success"]:
+                print("[ERR] No successful grasp found after all attempts")
+                return False
+            
+            print(f"\n[SUCCESS] Grasp selected: delta={ret['chosen_delta']:.4f}m")
+            
+            # Reset trajectory failure counter for new grasp
+            consecutive_trajectory_failures = 0
+            
+            # ========== PHASE 2: TRAJECTORY FOLLOWING (with restarts) ==========
+            for restart_attempt in range(max_restart_attempts):
+                try:
+                    print(f"\n{'='*60}")
+                    print(f"TRAJECTORY EXECUTION ATTEMPT {restart_attempt + 1}/{max_restart_attempts}")
+                    print(f"{'='*60}\n")
+                    
+                    # Prepare grasp info
+                    p_win, q_win = ret["chosen_pose_w"]
+                    p_all   = np.repeat(p_win.reshape(1, 3), B, axis=0)
+                    q_all   = np.repeat(q_win.reshape(1, 4), B, axis=0)
+                    pre_all = np.full((B,), ret["chosen_pre"],   dtype=np.float32)
+                    del_all = np.full((B,), ret["chosen_delta"], dtype=np.float32)
+                    info_all = self.build_grasp_info(p_all, q_all, pre_all, del_all)
+
+                    # Reset and execute: open → pre → grasp → close → follow_object_goals
+                    self.reset()
+
+                    # Save grasp poses relative to camera
+                    cam_p = self.camera.data.pos_w
+                    cam_q = self.camera.data.quat_w_ros
+                    gp_w  = torch.as_tensor(info_all["p_w"],     dtype=torch.float32, device=self.sim.device)
+                    gq_w  = torch.as_tensor(info_all["q_w"],     dtype=torch.float32, device=self.sim.device)
+                    pre_w = torch.as_tensor(info_all["pre_p_w"], dtype=torch.float32, device=self.sim.device)
+                    gp_cam,  gq_cam  = subtract_frame_transforms(cam_p, cam_q, gp_w,  gq_w)
+                    pre_cam, pre_qcm = subtract_frame_transforms(cam_p, cam_q, pre_w, gq_w)
+                    self.save_dict["grasp_pose_cam"]    = torch.cat([gp_cam,  gq_cam],  dim=1).unsqueeze(0).cpu().numpy()
+                    self.save_dict["pregrasp_pose_cam"] = torch.cat([pre_cam, pre_qcm], dim=1).unsqueeze(0).cpu().numpy()
+
+                    jp = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids]
+                    self.wait(gripper_open=True, steps=4)
+
+                    # Pre-grasp
+                    print("[INFO] Moving to pre-grasp position...")
+                    jp, success = self.move_to(info_all["pre_p_b"], info_all["pre_q_b"], gripper_open=True)
+                    if jp is None or torch.any(success == False):
+                        print("[WARN] Pre-grasp motion failed, restarting...")
+                        consecutive_trajectory_failures += 1
+                        if consecutive_trajectory_failures >= regrasp_after_failures:
+                            break  # Exit to re-grasp
+                        continue
+                    self.save_dict["actions"].append(np.concatenate([info_all["pre_p_b"].cpu().numpy(), info_all["pre_q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1))
+                    jp = self.wait(gripper_open=True, steps=3)
+
+                    # Grasp
+                    print("[INFO] Moving to grasp position...")
+                    jp, success = self.move_to(info_all["p_b"], info_all["q_b"], gripper_open=True)
+                    if jp is None or torch.any(success == False):
+                        print("[WARN] Grasp motion failed, restarting...")
+                        consecutive_trajectory_failures += 1
+                        if consecutive_trajectory_failures >= regrasp_after_failures:
+                            break  # Exit to re-grasp
+                        continue
+                    self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1))
+
+                    # Close gripper
+                    print("[INFO] Closing gripper...")
+                    jp = self.wait(gripper_open=False, steps=50)
+                    self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.ones((B, 1))], axis=1))
+
+                    # Follow object trajectory - THIS CAN RAISE RuntimeError
+                    print("[INFO] Following object trajectory...")
+                    jp = self.follow_object_goals(jp, sample_step=5, visualize=True)
+                    
+                    # If we reach here, trajectory following succeeded!
+                    print("[SUCCESS] Trajectory completed successfully!")
+                    
+                    # Verify task completion
+                    task_success = self.is_success(position_threshold=0.1)
+                    if task_success:
+                        print("[SUCCESS] Task verification passed!")
+                        self.save_data()
+                        return True
+                    else:
+                        print("[WARN] Task verification failed, but trajectory completed. Saving anyway...")
+                        self.save_data()
+                        return True
+                        
+                except RuntimeError as e:
+                    if "MotionPlanningFailure_RestartNeeded" in str(e):
+                        print(f"\n[RESTART] Motion planning failed mid-trajectory on attempt {restart_attempt + 1}")
+                        print("[RESTART] Object was likely dropped. Restarting from grasp phase...")
+                        print(f"[RESTART] Remaining attempts: {max_restart_attempts - restart_attempt - 1}\n")
+                        
+                        # Clear the corrupted trajectory data
+                        self.clear_data()
+                        consecutive_trajectory_failures += 1
+                        
+                        # Check if we need to re-grasp
+                        if consecutive_trajectory_failures >= regrasp_after_failures:
+                            print(f"\n{'!'*60}")
+                            print(f"CONSECUTIVE TRAJECTORY FAILURES: {consecutive_trajectory_failures}")
+                            print(f"RE-SELECTING GRASP (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})")
+                            print(f"{'!'*60}\n")
+                            break  # Exit trajectory loop to re-grasp
+                        
+                        # Continue to next restart attempt
+                        continue
+                    else:
+                        # Unknown error, re-raise
+                        raise
+                
+                except Exception as e:
+                    print(f"[ERROR] Unexpected error during execution: {type(e).__name__}: {e}")
+                    print(f"[ERROR] Attempting restart {restart_attempt + 1}/{max_restart_attempts}...")
+                    self.clear_data()
+                    consecutive_trajectory_failures += 1
+                    
+                    # Check if we need to re-grasp
+                    if consecutive_trajectory_failures >= regrasp_after_failures:
+                        print(f"\n{'!'*60}")
+                        print(f"CONSECUTIVE TRAJECTORY FAILURES: {consecutive_trajectory_failures}")
+                        print(f"RE-SELECTING GRASP (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})")
+                        print(f"{'!'*60}\n")
+                        break  # Exit trajectory loop to re-grasp
+                    
+                    continue
+            
+            # Check why we exited the trajectory loop
+            if consecutive_trajectory_failures >= regrasp_after_failures:
+                # We broke out to re-grasp
+                if total_grasp_attempts >= max_total_grasp_attempts:
+                    print(f"\n[FAILURE] Exhausted all grasp attempts ({max_total_grasp_attempts})")
+                    return False
+                # Continue outer loop to select new grasp
+                continue
+            else:
+                # We exhausted all restart attempts without hitting re-grasp threshold
+                print(f"\n[FAILURE] Failed after {max_restart_attempts} trajectory restart attempts")
+                return False
+        
+        # If we exit the outer loop, we exhausted all grasp attempts
+        print(f"\n[FAILURE] Failed after {max_total_grasp_attempts} total grasp selection attempts")
+        return False
+
+# ───────────────────────────────────────────────────────────────────────────── Entry Point ─────────────────────────────────────────────────────────────────────────────
+def main():
+    sim_cfgs = load_sim_parameters(BASE_DIR, args_cli.key)
+    env, _ = make_env(
+        cfgs=sim_cfgs, num_envs=args_cli.num_envs,
+        device=args_cli.device,
+        bg_simplify=False,
+    )
+    sim, scene = env.sim, env.scene
+
+    my_sim = HeuristicManipulation(sim, scene, sim_cfgs=sim_cfgs)
+
+    demo_root = (out_dir / img_folder / "demos").resolve()
+
+    for _ in range(args_cli.num_trials):
+
+        robot_pose = torch.tensor(sim_cfgs["robot_cfg"]["robot_pose"], dtype=torch.float32, device=my_sim.sim.device)  # [7], pos(3)+quat(wxyz)(4)
+        my_sim.set_robot_pose(robot_pose)
+        my_sim.demo_id = get_next_demo_id(demo_root)
+        my_sim.reset()
+        print(f"[INFO] start simulation demo_{my_sim.demo_id}")
+        # Note: if you set viz_object_goals(), remember to disable gravity and collision for object
+        # my_sim.viz_object_goals(sample_step=10, hold_steps=40)
+        my_sim.inference()
+
+    env.close()
+    simulation_app.close()
+
+if __name__ == "__main__":
+    main()
+    os.system("quit()")
+    simulation_app.close()
\ No newline at end of file
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_generation.py b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_generation.py
new file mode 100755
index 0000000..f932334
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_generation.py
@@ -0,0 +1,252 @@
+# grasp_det_full_all.py (batch keys)
+from __future__ import annotations
+import argparse, json, os
+from pathlib import Path
+import numpy as np
+import trimesh
+import open3d as o3d
+import yaml
+
+from grasp_utils import MyGraspNet
+
+
+def sample_surface(glb_path: str, n_points: int = 5000) -> np.ndarray:
+    """Load mesh and sample surface points."""
+    mesh = trimesh.load_mesh(glb_path)
+    if isinstance(mesh, trimesh.Scene):
+        mesh = list(mesh.geometry.values())[0]
+    pts, _ = trimesh.sample.sample_surface(mesh, n_points)
+    return np.array(pts, dtype=np.float32)
+
+
+def crop_pointcloud_quadrant(
+    points,
+    quadrant=1,
+    axes="xy",
+):
+    # Map axis letters to indices
+    axis_index = {"x": 0, "y": 1, "z": 2}
+    a1, a2 = axes[0], axes[1]
+    idx1, idx2 = axis_index[a1], axis_index[a2]
+
+    center_full = points.mean(axis=0)
+
+    c1, c2 = center_full[idx1], center_full[idx2]
+
+    ge = np.greater  # >
+
+    # Build boolean masks for the four quadrants
+    # Right/Left split along axis1, Top/Bottom split along axis2
+    right = ge(points[:, idx1], c1)  # axis1 >= c1  (or > c1)
+    top = ge(points[:, idx2], c2)  # axis2 >= c2  (or > c2)
+
+    # Quadrant mapping per convention documented above
+    if quadrant == 1:
+        mask = right & top
+    elif quadrant == 2:
+        mask = (~right) & top
+    elif quadrant == 3:
+        mask = (~right) & (~top)
+    elif quadrant == 4:
+        mask = right & (~top)
+    else:
+        raise ValueError("`quadrant` must be an integer in {1,2,3,4}.")
+
+    cropped_points = points[mask]
+    return cropped_points, mask
+
+
+def read_score_safe(gg, i: int, fallback: float) -> float:
+    """Try to read candidate score; fall back to a synthetic rank-based value."""
+    try:
+        return float(getattr(gg[i], "score"))
+    except Exception:
+        pass
+    try:
+        return float(gg.grasp_group_array[i][0])
+    except Exception:
+        return fallback
+
+
+def grasps_to_pointcloud(
+    gg, pts_per_gripper: int = 400, color=(1.0, 0.0, 0.0)
+) -> o3d.geometry.PointCloud:
+    """Sample each gripper mesh to points and tint with color."""
+    geoms = gg.to_open3d_geometry_list()  # list of TriangleMesh
+    out = o3d.geometry.PointCloud()
+    for g in geoms:
+        out += g.sample_points_uniformly(pts_per_gripper)
+    if len(out.points) > 0:
+        out.colors = o3d.utility.Vector3dVector(
+            np.tile(np.array(color, dtype=np.float32), (len(out.points), 1))
+        )
+    return out
+
+
+def save_vis_ply(
+    points_xyz: np.ndarray,
+    gg,
+    save_path: Path,
+    pts_per_gripper: int = 400,
+    cloud_color=(0.0, 1.0, 0.0),
+):
+    """Write a PLY: green object cloud + red ALL grasp candidates."""
+    cloud = o3d.geometry.PointCloud()
+    cloud.points = o3d.utility.Vector3dVector(points_xyz.astype(np.float32))
+    cloud.colors = o3d.utility.Vector3dVector(
+        np.tile(np.array(cloud_color, dtype=np.float32), (len(cloud.points), 1))
+    )
+    grasp_pcd = grasps_to_pointcloud(
+        gg, pts_per_gripper=pts_per_gripper, color=(1.0, 0.0, 0.0)
+    )
+    merged = cloud + grasp_pcd
+    save_path.parent.mkdir(parents=True, exist_ok=True)
+    o3d.io.write_point_cloud(str(save_path), merged)
+    print(f"[VIS] saved {save_path}")
+
+
+def process_one_object(
+    gg_net: MyGraspNet,
+    obj_idx: int,
+    obj_dict: dict,
+    proposals_dir: Path,
+    keep: int | None,
+    nms: bool,
+    n_points: int,
+    overwrite: bool,
+    vis_pts_per_gripper: int = 400,
+) -> str | None:
+    """
+    Build full point cloud for one object, run GraspNet, export all candidates to NPZ,
+    save a PLY visualization, and return the NPZ path (string).
+    """
+    glb_path = obj_dict["optimized"]
+    if glb_path is None or (not os.path.exists(glb_path)):
+        print(
+            f"[WARN][{obj_dict['oid']}_{obj_dict['name']}] mesh not found -> {glb_path}"
+        )
+        return None
+
+    npy_path = proposals_dir / f"{obj_dict['oid']}_{obj_dict['name']}_grasp.npy"
+    vis_path = proposals_dir / f"{obj_dict['oid']}_{obj_dict['name']}_grasp_viz.ply"
+
+    # Build cloud & inference
+    pcs = sample_surface(glb_path, n_points=n_points)
+    # pcs = crop_pointcloud_quadrant(pcs, quadrant=3, axes="xy")[0]
+    pcs[..., 2] *= -1.0  # keep identical convention
+    gg = gg_net.inference(pcs)
+    if nms:
+        gg = gg.nms()
+    gg = gg.sort_by_score()
+    if keep is not None and len(gg) > keep:
+        gg = gg[:keep]
+    if len(gg) == 0:
+        print(f"[WARN][{obj_dict['oid']}_{obj_dict['name']}] zero proposals")
+        return None
+
+    # Save visualization
+    save_vis_ply(pcs, gg, vis_path, pts_per_gripper=vis_pts_per_gripper)
+
+    for g_i in range(len(gg)):
+        translation = gg[g_i].translation
+        rotation = gg[g_i].rotation_matrix
+        translation = np.array([translation[0], translation[1], -translation[2]])
+        rotation[2, :] = -rotation[2, :]
+        rotation[:, 2] = -rotation[:, 2]
+        gg.grasp_group_array[g_i][13:16] = translation
+        gg.grasp_group_array[g_i][4:13] = rotation.reshape(-1)
+
+    gg.save_npy(npy_path)
+    print(
+        f"[OK][{obj_dict['oid']}_{obj_dict['name']}] saved {len(gg.grasp_group_array)} -> {npy_path.name}"
+    )
+
+    return str(npy_path.resolve())
+
+
+def run_for_key(
+    key: str,
+    n_points: int,
+    keep: int | None,
+    nms: bool,
+    overwrite: bool,
+    vis_pts_per_gripper: int,
+):
+    base_dir = Path.cwd()
+    out_dir = base_dir / "outputs"
+    scene_json = out_dir / key / "simulation" / "scene.json"
+    if not scene_json.exists():
+        raise FileNotFoundError(scene_json)
+
+    # Load scene.json once
+    scene_dict = json.load(open(scene_json))
+    objects = scene_dict.get("objects", {})
+    if not isinstance(objects, dict) or len(objects) == 0:
+        print(f"[WARN][{key}] scene_dict['objects'] is empty.")
+        return
+
+    # Prepare save dir
+    key_dir = out_dir / key
+    proposals_dir = key_dir / "grasps"
+    proposals_dir.mkdir(parents=True, exist_ok=True)
+
+    # Single GraspNet instance reused for all objects
+    net = MyGraspNet()
+
+    # Loop all objects
+    for i, obj in objects.items():
+        npy_path = process_one_object(
+            gg_net=net,
+            obj_idx=i,
+            obj_dict=obj,
+            proposals_dir=proposals_dir,
+            keep=keep,
+            nms=nms,
+            n_points=n_points,
+            overwrite=overwrite,
+            vis_pts_per_gripper=vis_pts_per_gripper,
+        )
+        if npy_path is not None:
+            scene_dict["objects"][i]["grasps"] = npy_path
+            with open(scene_json, "w") as f:
+                json.dump(scene_dict, f, indent=2)
+            print(
+                f"[OK][{key}] scene.json updated with 'grasps' for {obj['oid']}_{obj['name']}."
+            )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        "Export grasp proposals for ALL objects (batch over keys) and write paths into scene.json"
+    )
+    parser.add_argument("--n_points", type=int, default=100000)
+    parser.add_argument("--keep", type=int, default=None)  # set None to keep all
+    parser.add_argument("--nms", type=bool, default=True)
+    parser.add_argument("--overwrite", action="store_true")
+    parser.add_argument(
+        "--vis_pts_per_gripper",
+        type=int,
+        default=400,
+        help="points sampled per gripper mesh for PLY",
+    )
+    args = parser.parse_args()
+
+    # load keys from YAML
+    cfg_path = Path.cwd() / "config" / "config.yaml"
+    cfg = yaml.safe_load(cfg_path.open("r"))
+    keys = cfg["keys"]
+
+    for key in keys:
+        print(f"\n========== [GraspDet] Processing key: {key} ==========")
+        run_for_key(
+            key=key,
+            n_points=args.n_points,
+            keep=args.keep,
+            nms=args.nms,
+            overwrite=args.overwrite,
+            vis_pts_per_gripper=args.vis_pts_per_gripper,
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_utils.py b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_utils.py
new file mode 100755
index 0000000..2ceea0d
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_utils.py
@@ -0,0 +1,391 @@
+import os
+import sys
+import numpy as np
+import time
+import torch
+from typing import List
+import open3d as o3d
+from graspnetAPI import GraspGroup
+from pathlib import Path
+base_dir = Path.cwd()
+from graspness_unofficial.models.graspnet import GraspNet, pred_decode
+from graspness_unofficial.dataset.graspnet_dataset import minkowski_collate_fn
+from graspness_unofficial.utils.collision_detector import ModelFreeCollisionDetector
+from graspness_unofficial.utils.data_utils import CameraInfo, create_point_cloud_from_depth_image, get_workspace_mask
+from scipy.spatial.transform import Rotation as R
+
+grasp_cfgs = {
+    "save_files": False,
+    "checkpoint_path": "/home/isaac/third_party/graspness_unofficial/ckpt/minkuresunet_kinect.tar", #hardcode path
+    "seed_feat_dim": 512,
+    "camera": "kinect",
+    "num_point": 80000,
+    "batch_size": 1,
+    "voxel_size": 0.001, # 0.005
+    "collision_thresh": 0.00001,
+    "voxel_size_cd": 0.01, # 0.01
+    "infer": True,
+}
+
+def my_worker_init_fn(worker_id):
+    np.random.seed(np.random.get_state()[1][0] + worker_id)
+    pass
+
+class MyGraspNet():
+    def __init__(self):
+        cfgs = grasp_cfgs
+        self.cfgs = cfgs
+        self.net = GraspNet(seed_feat_dim=cfgs["seed_feat_dim"], is_training=False)
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.net.to(self.device)
+        # Load checkpoint
+        checkpoint = torch.load(cfgs["checkpoint_path"])
+        self.net.load_state_dict(checkpoint['model_state_dict'])
+        start_epoch = checkpoint['epoch']
+        print("-> loaded checkpoint %s (epoch: %d)" % (cfgs["checkpoint_path"], start_epoch))
+
+        self.net.eval()
+
+    def inference(self, pcs):
+
+        data_dict = {'point_clouds': pcs.astype(np.float32),
+                    'coors': pcs.astype(np.float32) / self.cfgs["voxel_size"],
+                    'feats': np.ones_like(pcs).astype(np.float32)}
+        batch_data = minkowski_collate_fn([data_dict])
+        tic = time.time()
+        for key in batch_data:
+            if 'list' in key:
+                for i in range(len(batch_data[key])):
+                    for j in range(len(batch_data[key][i])):
+                        batch_data[key][i][j] = batch_data[key][i][j].to(self.device)
+            else:
+                batch_data[key] = batch_data[key].to(self.device)
+
+        # Forward pass
+        with torch.no_grad():
+            end_points = self.net(batch_data)
+            grasp_preds = pred_decode(end_points)
+
+        preds = grasp_preds[0].detach().cpu().numpy()
+        gg = GraspGroup(preds)
+
+        # collision detection
+        if self.cfgs["collision_thresh"] > 0:
+            cloud = data_dict['point_clouds']
+            mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=self.cfgs["voxel_size_cd"])
+            collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=self.cfgs["collision_thresh"])
+            gg = gg[~collision_mask]
+
+        toc = time.time()
+        # print('inference time: %fs' % (toc - tic))
+        return gg
+
+def inference(cfgs, data_input):
+    batch_data = minkowski_collate_fn([data_input])
+    net = GraspNet(seed_feat_dim=cfgs["seed_feat_dim"], is_training=False)
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    net.to(device)
+
+    # Load checkpoint
+    checkpoint = torch.load(cfgs["checkpoint_path"])
+    net.load_state_dict(checkpoint['model_state_dict'])
+    start_epoch = checkpoint['epoch']
+    print("-> loaded checkpoint %s (epoch: %d)" % (cfgs["checkpoint_path"], start_epoch))
+
+    net.eval()
+    tic = time.time()
+
+    for key in batch_data:
+        if 'list' in key:
+            for i in range(len(batch_data[key])):
+                for j in range(len(batch_data[key][i])):
+                    batch_data[key][i][j] = batch_data[key][i][j].to(device)
+        else:
+            batch_data[key] = batch_data[key].to(device)
+
+    # Forward pass
+    with torch.no_grad():
+        end_points = net(batch_data)
+        grasp_preds = pred_decode(end_points)
+
+    preds = grasp_preds[0].detach().cpu().numpy()
+    gg = GraspGroup(preds)
+    # collision detection
+    if cfgs["collision_thresh"] > 0:
+        cloud = data_input['point_clouds']
+        mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs["voxel_size_cd"])
+        collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs["collision_thresh"])
+        gg = gg[~collision_mask]
+
+    toc = time.time()
+    # print('inference time: %fs' % (toc - tic))
+    return gg
+
+def pointcloud_to_grasp(cfgs, pcs):
+    data_dict = {'point_clouds': pcs.astype(np.float32),
+                'coors': pcs.astype(np.float32) / cfgs["voxel_size"],
+                'feats': np.ones_like(pcs).astype(np.float32)}
+    gg = inference(cfgs, data_dict)
+    return gg
+
+def vis_grasp(pcs, gg):
+    gg = gg.nms()
+    gg = gg.sort_by_score()
+    keep = 1
+    if gg.__len__() > keep:
+        gg = gg[:keep]
+    grippers = gg.to_open3d_geometry_list()
+    cloud = o3d.geometry.PointCloud()
+    cloud.points = o3d.utility.Vector3dVector(pcs.astype(np.float32))
+    o3d.visualization.draw_geometries([cloud, *grippers])
+
+def grasp_to_pointcloud(grippers, gripper_points=1000, gripper_color=[1, 0, 0]):
+    grippers_pcd = o3d.geometry.PointCloud()
+    for gripper in grippers:
+        g_pcd = gripper.sample_points_uniformly(gripper_points)
+        grippers_pcd += g_pcd
+    grippers_pcd.colors = o3d.utility.Vector3dVector(np.tile(np.array([gripper_color]), (len(grippers_pcd.points), 1)))
+    return grippers_pcd
+
+
+def vis_save_grasp(points, gg, best_grasp, save_path, colors=None, grasp_position=None, place_position=None):
+    # visualize grasp pos, place pos, grasp poses, and pcd
+    cloud = o3d.geometry.PointCloud()
+    cloud.points = o3d.utility.Vector3dVector(points.astype(np.float32))
+
+    if colors is None:
+        cloud.colors = o3d.utility.Vector3dVector(np.tile(np.array([[0, 1, 0]]), (len(cloud.points), 1)))
+    elif isinstance(colors, np.ndarray):
+        cloud.colors = o3d.utility.Vector3dVector(colors.astype(np.float32))
+    elif isinstance(colors, list):
+        cloud.colors = o3d.utility.Vector3dVector(np.tile(np.array([colors]), (len(cloud.points), 1)))
+
+    if type(gg) == GraspGroup:
+        pcd_w_grasp = grasp_to_pointcloud(gg.to_open3d_geometry_list())
+    else:
+        pcd_w_grasp = grasp_to_pointcloud([gg.to_open3d_geometry()])
+
+    pcd_best_grasp = grasp_to_pointcloud([best_grasp.to_open3d_geometry()], gripper_color=[0, 0, 1])
+    pcd_w_grasp += pcd_best_grasp
+
+    if grasp_position is not None and place_position is not None:
+        pick_pcd = o3d.geometry.PointCloud()
+        place_pcd = o3d.geometry.PointCloud()
+        pick_pcd.points = o3d.utility.Vector3dVector(np.array(grasp_position).reshape(1,3).astype(np.float32))
+        place_pcd.points = o3d.utility.Vector3dVector(np.array(place_position).reshape(1,3).astype(np.float32))
+        pick_pcd.colors = place_pcd.colors = o3d.utility.Vector3dVector(np.array([[0,0,1]]).astype(np.float32))
+        pcd_w_grasp = pcd_w_grasp + pick_pcd + place_pcd
+
+    pcd_w_grasp += cloud
+
+    o3d.io.write_point_cloud(save_path, pcd_w_grasp)
+
+def get_pose_from_grasp(best_grasp):
+    grasp_position = best_grasp.translation
+    # convert rotation to isaacgym convention
+    delta_m = np.array([[0, 0, 1], [0, -1, 0], [1, 0, 0]])
+    rotation = np.dot(best_grasp.rotation_matrix, delta_m)
+    quaternion_grasp = R.from_matrix(rotation).as_quat()
+    quaternion_grasp = np.array([quaternion_grasp[3],quaternion_grasp[0],quaternion_grasp[1],quaternion_grasp[2]])
+    rotation_unit_vect = rotation[:,2]
+    # grasp_position -= 0.03 * rotation_unit_vect
+    return grasp_position, quaternion_grasp, rotation_unit_vect
+
+def get_best_grasp(gg, position, max_dis=0.05):
+    # get best grasp pose around the grasp position
+    best_grasp = None
+    for g in gg:
+        if np.linalg.norm(g.translation - position) < max_dis:
+            if best_grasp is None:
+                best_grasp = g
+            else:
+                if g.score > best_grasp.score:
+                    best_grasp = g
+    if best_grasp is None:
+        best_grasp = gg[0]
+    return best_grasp
+
+def get_best_grasp_z_aligned(gg):
+    # get best grasp pose that is facing the -z axis (for top-down grasping)
+    best_grasp = None
+    best_angle = np.inf
+
+    gravity_direction = np.array([0, 0, -1])  # -Z direction in world coordinates
+
+    for g in gg:
+
+        # approach vector is +X axis of the grasp frame, make it close to the -Z axis in the world frame
+        approach_vector = g.rotation_matrix[:, 0]
+        approach_vector /= np.linalg.norm(approach_vector)
+
+        # compute the angle between the approach vector and the gravity direction
+        angle = np.arccos(np.clip(np.dot(approach_vector, gravity_direction), -1.0, 1.0))
+
+        if angle < best_angle:
+            best_angle = angle
+            best_grasp = g
+        elif np.isclose(angle, best_angle) and g.score > best_grasp.score:
+            best_grasp = g
+
+    if best_grasp is None:
+        print("No best grasp found, falling back to the first grasp.")
+        best_grasp = gg[0]  # fallback
+
+    return best_grasp
+
+def get_best_grasp_z_aligned_and_y_aligned(gg,
+                                           desired_approach_dir=np.array([0, 0, -1]),
+                                           desired_width_dir=np.array([0, 1, 0]),
+                                           angle_tolerance=0.2):
+    """
+    Pick the grasp whose:
+      1) X-axis (approach) is closest to desired_approach_dir (default -Z),
+      2) Y-axis (gripper width) is closest to desired_width_dir (default +Y),
+      3) Score is highest if angles tie.
+
+    Grasp coordinate system:
+      - rotation_matrix[:, 0] → X-axis = Approach direction
+      - rotation_matrix[:, 1] → Y-axis = Gripper width direction
+      - rotation_matrix[:, 2] → Z-axis = Depth/thickness direction
+    """
+
+    best_grasp = None
+    # Track best angles and score
+    best_approach_angle = np.inf
+    best_width_angle = np.inf
+    best_score = -np.inf
+
+    # Normalize desired directions
+    desired_approach_dir = desired_approach_dir / np.linalg.norm(desired_approach_dir)
+    desired_width_dir = desired_width_dir / np.linalg.norm(desired_width_dir)
+
+    for g in gg:
+        # 1) Approach vector angle
+        approach_vec = g.rotation_matrix[:, 0]
+        approach_vec /= np.linalg.norm(approach_vec)
+        approach_angle = angle_between(approach_vec, desired_approach_dir)
+
+        # 2) Width vector angle
+        width_vec = g.rotation_matrix[:, 1]
+        width_vec /= np.linalg.norm(width_vec)
+        width_angle = angle_between(width_vec, desired_width_dir)
+
+        # 3) Compare to the "best" so far in a hierarchical manner
+        if approach_angle < best_approach_angle:
+            # Definitely better in terms of approach alignment => choose this
+            best_approach_angle = approach_angle
+            best_width_angle = width_angle
+            best_score = g.score
+            best_grasp = g
+        elif np.isclose(approach_angle, best_approach_angle, atol=angle_tolerance):
+            # Approach angles are essentially tied, compare width alignment
+            if width_angle < best_width_angle:
+                best_width_angle = width_angle
+                best_score = g.score
+                best_grasp = g
+            elif np.isclose(width_angle, best_width_angle, atol=angle_tolerance):
+                # Both angles tied, pick the higher score
+                if g.score > best_score:
+                    best_score = g.score
+                    best_grasp = g
+
+    if best_grasp is None and len(gg) > 0:
+        print("No valid grasp found using angle criteria. Falling back to the first grasp.")
+        best_grasp = gg[0]
+
+    return best_grasp
+
+
+def get_best_grasp_with_hints(gg: GraspGroup, point: List[float] = None, direction: List[float] = None):
+    """
+    Rescore all grasps using optional spatial and directional hints, then return a new
+    GraspGroup sorted by this custom score (best first). Does NOT mutate the original gg.
+
+    Scoring terms in [0, 1], combined by a weighted sum:
+      - dir_term: alignment between grasp approach (+X axis) and `direction`
+      - pt_term : proximity to `point` (RBF over distance)
+      - net_term: original network score normalized over gg
+
+    Args:
+        gg: GraspGroup from graspnetAPI.
+        point: (3,) world point. If provided, grasps closer to this point are preferred.
+        direction: (3,) world direction. If provided, grasps whose approach (+X) aligns
+                   with this direction are preferred.
+
+    Returns:
+        GraspGroup: a *new* group sorted by the custom score (descending).
+                    The best guess is result[0].
+    """
+    # --- Early exits ---
+    if gg is None or len(gg) == 0:
+        return gg
+
+    # Internal weights (you can tweak if needed)
+    w_dir = 1.0
+    w_pt  = 1.0
+    w_net = 0
+
+    # If hints are missing, zero-out the corresponding weights
+    if point is None:
+        w_pt = 0.0
+    if direction is None or (np.asarray(direction).shape != (3,)):
+        w_dir = 0.0
+
+    # Length-scale for the point proximity (meters). Similar to your 0.05 window.
+    sigma = 0.05
+    sigma2 = max(sigma * sigma, 1e-12)
+
+    # --- Gather per-grasp attributes ---
+    translations = []
+    approach_axes = []  # grasp frame +X
+    net_scores    = []
+    for g in gg:
+        translations.append(g.translation.astype(np.float64))
+        # Normalize +X axis as approach direction
+        ax = g.rotation_matrix[:, 0].astype(np.float64)
+        n  = np.linalg.norm(ax)
+        approach_axes.append(ax / n if n > 0 else np.array([1.0, 0.0, 0.0], dtype=np.float64))
+        net_scores.append(float(g.score))
+
+    translations = np.vstack(translations)          # (N,3)
+    approach_axes = np.vstack(approach_axes)        # (N,3)
+    net_scores = np.asarray(net_scores, dtype=np.float64)  # (N,)
+
+    # --- Normalize original network scores to [0,1] ---
+    if np.isfinite(net_scores).all() and (net_scores.max() > net_scores.min()):
+        net_term = (net_scores - net_scores.min()) / (net_scores.max() - net_scores.min())
+    else:
+        net_term = np.zeros_like(net_scores)
+
+    # --- Direction alignment term (cosine mapped to [0,1]) ---
+    if w_dir > 0.0:
+        d = np.asarray(direction, dtype=np.float64)
+        nd = np.linalg.norm(d)
+        if nd > 0:
+            d = d / nd
+            cosv = np.clip((approach_axes * d[None, :]).sum(axis=1), -1.0, 1.0)
+            dir_term = 0.5 * (cosv + 1.0)  # map [-1,1] -> [0,1]
+        else:
+            dir_term = np.zeros(len(gg), dtype=np.float64)
+    else:
+        dir_term = np.zeros(len(gg), dtype=np.float64)
+
+    # --- Point proximity term (RBF over Euclidean distance) ---
+    if w_pt > 0.0:
+        p = np.asarray(point, dtype=np.float64).reshape(1, 3)
+        dists = np.linalg.norm(translations - p, axis=1)
+        pt_term = np.exp(-0.5 * (dists * dists) / sigma2)  # in (0,1]
+    else:
+        pt_term = np.zeros(len(gg), dtype=np.float64)
+
+    # --- Combine ---
+    total_score = w_dir * dir_term + w_pt * pt_term + w_net * net_term
+    gg.scores = total_score
+    gg.sort_by_score()  # Sort in-place by the new score (best first)
+    return gg
+
+
+def angle_between(v1, v2):
+    """Utility to compute the angle between two normalized vectors in [0, π]."""
+    dot_val = np.clip(np.dot(v1, v2), -1.0, 1.0)
+    return np.arccos(dot_val)
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/trajectory_smoothing.py b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/trajectory_smoothing.py
new file mode 100755
index 0000000..7e309ed
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/trajectory_smoothing.py
@@ -0,0 +1,190 @@
+"""
+Trajectory smoothing with axis-flip/axis-permutation disambiguation in the object-local frame.
+Note this is just an experimental feature and may not work well.
+"""
+
+import numpy as np
+import transforms3d
+from pathlib import Path
+import yaml
+import json
+import argparse
+
+def _proj_to_SO3(R: np.ndarray) -> np.ndarray:
+    """Project a near-rotation 3x3 matrix to the closest element in SO(3) via SVD."""
+    U, _, Vt = np.linalg.svd(R)
+    R_star = U @ Vt
+    if np.linalg.det(R_star) < 0:
+        U[:, -1] *= -1
+        R_star = U @ Vt
+    return R_star
+
+def _angle_between_rotations(Ra: np.ndarray, Rb: np.ndarray) -> float:
+    """Geodesic angle on SO(3): arccos( (trace(Ra^T Rb) - 1)/2 )."""
+    M = Ra.T @ Rb
+    cos_th = (np.trace(M) - 1.0) * 0.5
+    cos_th = np.clip(cos_th, -1.0, 1.0)
+    return float(np.arccos(cos_th))
+
+def _quat_slerp(q0: np.ndarray, q1: np.ndarray, alpha: float) -> np.ndarray:
+    """Shortest-arc SLERP between unit quaternions q0 -> q1. Quaternions are (w,x,y,z)."""
+    q0 = q0 / np.linalg.norm(q0)
+    q1 = q1 / np.linalg.norm(q1)
+    dot = float(np.dot(q0, q1))
+    if dot < 0.0:
+        q1 = -q1
+        dot = -dot
+    dot = np.clip(dot, -1.0, 1.0)
+    if dot > 0.9995:
+        q = (1.0 - alpha) * q0 + alpha * q1
+        return q / np.linalg.norm(q)
+    theta0 = np.arccos(dot)
+    s0 = np.sin((1 - alpha) * theta0) / np.sin(theta0)
+    s1 = np.sin(alpha * theta0) / np.sin(theta0)
+    q = s0 * q0 + s1 * q1
+    return q / np.linalg.norm(q)
+
+def _generate_group24() -> np.ndarray:
+    """
+    Generate 24 orientation-preserving signed permutation matrices (cube rotations).
+    Each is 3x3, has one ±1 per row/column, det=+1.
+    """
+    mats = []
+    perms = [(0,1,2),(0,2,1),(1,0,2),(1,2,0),(2,0,1),(2,1,0)]
+    for p in perms:
+        for sx in (-1,1):
+            for sy in (-1,1):
+                for sz in (-1,1):
+                    M = np.zeros((3,3), float)
+                    M[0, p[0]] = sx
+                    M[1, p[1]] = sy
+                    M[2, p[2]] = sz
+                    if np.linalg.det(M) > 0:
+                        mats.append(M)
+    mats = np.stack(mats, axis=0)
+    assert mats.shape[0] == 24
+    return mats
+
+def smooth_object_local_trajectory(
+    poses: np.ndarray,
+    rot_smooth_alpha: float = 0.2,
+    trans_smooth_alpha: float = 0.2,
+    repair_SO3: bool = True,
+) -> np.ndarray:
+    """
+    Smooth a (N,4,4) trajectory with axis-flip/axis-permutation disambiguation in the OBJECT-LOCAL frame.
+
+    Assumptions:
+      - Each pose T_t = [R_t, t_t; 0 0 0 1], rotation acts actively on column vectors.
+      - Coordinate re-labeling / flips (right-handed only) may occur in the object's local frame.
+      - Therefore we correct R_meas by RIGHT-multiplying S in G24, i.e., R_corr = R_meas @ S.
+
+    Args:
+        poses: np.ndarray of shape (N, 4, 4), the raw trajectory.
+        rot_smooth_alpha: rotation smoothing factor in [0,1] (higher -> less smoothing).
+        trans_smooth_alpha: translation smoothing factor in [0,1].
+        repair_SO3: if True, project measured rotations to SO(3) per frame (robust to noise/reflection).
+
+    Returns:
+        np.ndarray of shape (N, 4, 4): the smoothed trajectory.
+    """
+    assert poses.ndim == 3 and poses.shape[1:] == (4,4), "poses must be (N,4,4)"
+    N = poses.shape[0]
+    if N == 0:
+        return poses.copy()
+
+    G = _generate_group24()
+
+    out = np.zeros_like(poses, dtype=float)
+    R_prev = None
+    q_prev = None
+    t_prev = None
+
+    for i in range(N):
+        T_in = poses[i].astype(float)
+        R_meas = T_in[:3, :3]
+        t_meas = T_in[:3, 3]
+
+        if repair_SO3:
+            R_meas = _proj_to_SO3(R_meas)
+
+        # --- Disambiguation in object-local frame: choose S to minimize distance to previous rotation.
+        if R_prev is not None:
+            best_angle = np.inf
+            R_best = R_meas
+            for S in G:
+                Rc = R_meas @ S  # RIGHT-multiply: local/object-frame relabel/flip
+                ang = _angle_between_rotations(R_prev, Rc)
+                if ang < best_angle:
+                    best_angle = ang
+                    R_best = Rc
+        else:
+            R_best = R_meas
+
+        # --- Quaternion sign continuity + SLERP smoothing.
+        q_best = transforms3d.quaternions.mat2quat(R_best)  # (w,x,y,z)
+        if q_prev is not None and float(np.dot(q_best, q_prev)) < 0.0:
+            q_best = -q_best
+        q_smooth = q_best if q_prev is None else _quat_slerp(q_prev, q_best, rot_smooth_alpha)
+        R_smooth = transforms3d.quaternions.quat2mat(q_smooth)
+
+        # --- Translation EMA smoothing (object-local relabeling doesn't change translation component).
+        t_smooth = t_meas if t_prev is None else (1.0 - trans_smooth_alpha) * t_prev + trans_smooth_alpha * t_meas
+
+        # --- Compose output pose.
+        T_out = np.eye(4, dtype=float)
+        T_out[:3, :3] = R_smooth
+        T_out[:3, 3]  = t_smooth
+        out[i] = T_out
+
+        # --- Update memory.
+        R_prev = R_smooth
+        q_prev = q_smooth
+        t_prev = t_smooth
+
+    return out
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Smooth trajectory poses")
+    parser.add_argument("--traj", type=str, default="fdpose_trajs", help="Path to config file")
+    args = parser.parse_args()
+
+    cfg_path = Path.cwd() / "config" / "config.yaml"
+    cfg = yaml.safe_load(cfg_path.open("r"))
+    keys = cfg["keys"]
+
+    base_dir = Path.cwd()
+    out_dir = base_dir / "outputs"
+    for key in keys:
+        print(f"\n========== [Trajectory Smoothing] Processing key: {key} ==========")
+
+        scene_json = out_dir / key / "simulation" / "scene.json"
+        if not scene_json.exists():
+            raise FileNotFoundError(scene_json)
+
+        scene_dict = json.load(open(scene_json))
+        objects = scene_dict.get("objects", {})
+        if not isinstance(objects, dict) or len(objects) == 0:
+            print(f"[WARN][{key}] scene_dict['objects'] is empty.")
+            return
+
+        key_dir = out_dir / key
+        for i, obj in objects.items():
+            traj_path = obj[args.traj]
+            traj = np.load(traj_path).astype(np.float32) # relative poses (N,4,4)
+            
+            smoothed_trajs = smooth_object_local_trajectory(
+                traj,
+                rot_smooth_alpha=0.2,
+                trans_smooth_alpha=0.2,
+                repair_SO3=True,
+            ).astype(np.float32)
+            smoothed_path = key_dir / "reconstruction" / "motions" / f"{i}_{obj['name']}_smoothed_trajs.npy"
+            np.save(smoothed_path, smoothed_trajs)
+            scene_dict["objects"][i]["smooth_trajs"] = str(smoothed_path)
+
+        json.dump(scene_dict, open(scene_json, "w"), indent=2)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/usd_conversion.py b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/usd_conversion.py
new file mode 100755
index 0000000..49b2f50
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/usd_conversion.py
@@ -0,0 +1,584 @@
+########################################################
+## Commandline arguments (IsaacLab only; no --key)
+########################################################
+import argparse
+from isaaclab.app import AppLauncher
+
+parser = argparse.ArgumentParser()
+# NOTE: no --key here; we will batch keys from config/config.yaml
+AppLauncher.add_app_launcher_args(parser)
+args_isaaclab = parser.parse_args()
+# Force GUI unless you toggle via IsaacLab flags
+args_isaaclab.headless = False
+app_launcher = AppLauncher(args_isaaclab)
+simulation_app = app_launcher.app
+
+########################################################
+## Imports
+########################################################
+from dataclasses import dataclass
+from typing import Literal, List
+from pathlib import Path
+import numpy as np
+from PIL import Image
+import trimesh
+import json
+import yaml
+import contextlib
+import os
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+
+import carb
+import isaacsim.core.utils.stage as stage_utils
+import omni.kit.app
+from loguru import logger as log
+from isaaclab.sim.converters import (
+    MeshConverter,
+    MeshConverterCfg,
+    UrdfConverter,
+    UrdfConverterCfg,
+)
+from isaaclab.sim.schemas import schemas_cfg
+from isaaclab.utils.assets import check_file_path
+from pxr import Usd, UsdPhysics
+from rich.logging import RichHandler
+
+# Extra USD/PhysX schemas + tokens
+from pxr import PhysxSchema, Sdf, UsdGeom
+
+log.configure(handlers=[{"sink": RichHandler(), "format": "{message}"}])
+
+########################################################
+## Small utilities
+########################################################
+base_dir = Path.cwd()
+out_dir = base_dir / "outputs"
+
+def glb_to_usd(filename: str) -> str:
+    assert filename.endswith(".glb")
+    return filename.replace(".glb", ".usd")
+
+def convert_glb_to_obj(glb_path):
+    """
+    Convert a GLB file to an OBJ file with texture.
+    (Kept from your original file; unused by the flow unless you point inputs to .glb->.obj)
+    """
+    obj_name = str(glb_path).split("/")[-1].split(".")[0]
+    obj_path = Path(glb_path).parent / f"{obj_name}.obj"
+    mtl_path = Path(glb_path).parent / f"{obj_name}.mtl"
+    texture_path = Path(glb_path).parent / f"{obj_name}.jpg"
+
+    mesh = trimesh.load(glb_path)
+
+    if hasattr(mesh.visual, 'material') and hasattr(mesh.visual.material, 'image'):
+        image = mesh.visual.material.image
+        if image is not None:
+            Image.fromarray(np.asarray(image)).save(texture_path)
+
+    with open(mtl_path, "w") as mtl_file:
+        mtl_file.write(
+            "newmtl material_0\nKa 1.000 1.000 1.000\nKd 1.000 1.000 1.000\n"
+            "Ks 0.000 0.000 0.000\nNs 10.000\nd 1.0\nillum 2\n"
+        )
+        mtl_file.write(f"map_Kd {texture_path.name}\n")
+
+    with open(obj_path, "w") as obj_file:
+        obj_file.write(f"mtllib {mtl_path.name}\n")
+        for v in mesh.vertices:
+            obj_file.write(f"v {v[0]} {v[1]} {v[2]}\n")
+        for uv in mesh.visual.uv:
+            obj_file.write(f"vt {uv[0]} {1 - uv[1]}\n")
+        for f in mesh.faces:
+            obj_file.write(f"f {f[0]+1}/{f[0]+1} {f[1]+1}/{f[1]+1} {f[2]+1}/{f[2]+1}\n")
+
+    print(f"Converted GLB to OBJ: {obj_path}")
+    return str(obj_path)
+
+########################################################
+## Args container (same fields as before)
+########################################################
+@dataclass
+class Args:
+    input: List[str]
+    output: List[str]
+    make_instanceable: bool = False
+    # We will enforce convexDecomposition for both bg and objects in run_conversion()
+    collision_approximation: Literal["convexHull", "convexDecomposition", "meshSimplification", "none"] = "convexDecomposition"
+    mass: float | None = 1.0
+    disable_gravity: bool = False
+    kinematic_enabled: bool = False
+    headless: bool = False
+    exit_on_finish: bool = True
+
+# We'll assign a per-key Args instance to this global so helper functions keep working unchanged.
+args = None  # type: ignore
+
+########################################################
+## Converters and USD post-processing (unchanged)
+########################################################
+def convert_obj_to_usd(obj_path, usd_path):
+    log.info(f"Converting {obj_path}")
+
+    if not os.path.isabs(obj_path):
+        obj_path = os.path.abspath(obj_path)
+    if not check_file_path(obj_path):
+        raise ValueError(f"Invalid mesh file path: {obj_path}")
+
+    usd_path = os.path.abspath(usd_path)
+
+    mass_props = schemas_cfg.MassPropertiesCfg(mass=args.mass) if args.mass is not None else None
+    rigid_props = schemas_cfg.RigidBodyPropertiesCfg(
+        disable_gravity=args.disable_gravity,
+        kinematic_enabled=args.kinematic_enabled,
+    )
+    collision_props = schemas_cfg.CollisionPropertiesCfg(
+        collision_enabled=args.collision_approximation != "none",
+        contact_offset=0.006,
+        rest_offset=-0.002,
+        torsional_patch_radius=0.05,
+        min_torsional_patch_radius=0.005,
+    )
+
+    mesh_converter_cfg = MeshConverterCfg(
+        mass_props=mass_props,
+        rigid_props=rigid_props,
+        collision_props=collision_props,
+        asset_path=obj_path,
+        force_usd_conversion=True,
+        usd_dir=os.path.dirname(usd_path),
+        usd_file_name=os.path.basename(usd_path),
+        make_instanceable=args.make_instanceable,
+        collision_approximation="convexDecomposition",  # enforce
+    )
+
+    log.info(f"Conversion configuration: {mesh_converter_cfg}")
+    MeshConverter(mesh_converter_cfg)
+
+def ensure_rigidbody_and_set_kinematic(usd_path: str, disable_gravity: bool = True):
+    stage = Usd.Stage.Open(usd_path)
+    prim = stage.GetDefaultPrim()
+    if not prim or not prim.IsValid():
+        for p in stage.GetPseudoRoot().GetChildren():
+            prim = p
+            break
+        if not prim or not prim.IsValid():
+            raise RuntimeError(f"[ensure_rigidbody_and_set_kinematic] No valid prim in stage: {usd_path}")
+
+    UsdPhysics.RigidBodyAPI.Apply(prim)
+    try:
+        rb = PhysxSchema.PhysxRigidBodyAPI.Apply(prim)
+        if hasattr(rb, "CreateKinematicEnabledAttr"):
+            rb.CreateKinematicEnabledAttr(True)
+        else:
+            prim.CreateAttribute("physxRigidBody:kinematicEnabled", Sdf.ValueTypeNames.Bool).Set(True)
+        if hasattr(rb, "CreateDisableGravityAttr"):
+            rb.CreateDisableGravityAttr(bool(disable_gravity))
+        else:
+            prim.CreateAttribute("physxRigidBody:disableGravity", Sdf.ValueTypeNames.Bool).Set(bool(disable_gravity))
+    except Exception:
+        prim.CreateAttribute("physxRigidBody:kinematicEnabled", Sdf.ValueTypeNames.Bool).Set(True)
+        prim.CreateAttribute("physxRigidBody:disableGravity", Sdf.ValueTypeNames.Bool).Set(bool(disable_gravity))
+    try:
+        UsdPhysics.RigidBodyAPI(prim).CreateDisableGravityAttr(bool(disable_gravity))
+    except Exception:
+        prim.CreateAttribute("physics:disableGravity", Sdf.ValueTypeNames.Bool).Set(bool(disable_gravity))
+    stage.Save()
+
+def enable_ccd_and_iters(usd_path: str, pos_iters: int = 14, vel_iters: int = 4,
+                         enable_ccd: bool = True, enable_speculative_ccd: bool = True):
+    stage = Usd.Stage.Open(usd_path)
+    prim = stage.GetDefaultPrim()
+    if not prim or not prim.IsValid():
+        for p in stage.Traverse():
+            prim = p
+            break
+        if not prim or not prim.IsValid():
+            raise RuntimeError(f"[enable_ccd_and_iters] No valid prim in stage: {usd_path}")
+
+    UsdPhysics.RigidBodyAPI.Apply(prim)
+    try:
+        rb = PhysxSchema.PhysxRigidBodyAPI.Apply(prim)
+        if enable_ccd:
+            (rb.CreateEnableCCDAttr(True) if hasattr(rb, "CreateEnableCCDAttr")
+             else prim.CreateAttribute("physxRigidBody:enableCCD", Sdf.ValueTypeNames.Bool).Set(True))
+        if enable_speculative_ccd:
+            (rb.CreateEnableSpeculativeCCDAttr(True) if hasattr(rb, "CreateEnableSpeculativeCCDAttr")
+             else prim.CreateAttribute("physxRigidBody:enableSpeculativeCCD", Sdf.ValueTypeNames.Bool).Set(True))
+        (rb.CreateSolverPositionIterationCountAttr(int(pos_iters)) if hasattr(rb, "CreateSolverPositionIterationCountAttr")
+         else prim.CreateAttribute("physxRigidBody:solverPositionIterationCount", Sdf.ValueTypeNames.Int).Set(int(pos_iters)))
+        (rb.CreateSolverVelocityIterationCountAttr(int(vel_iters)) if hasattr(rb, "CreateSolverVelocityIterationCountAttr")
+         else prim.CreateAttribute("physxRigidBody:solverVelocityIterationCount", Sdf.ValueTypeNames.Int).Set(int(vel_iters)))
+    except Exception:
+        if enable_ccd:
+            prim.CreateAttribute("physxRigidBody:enableCCD", Sdf.ValueTypeNames.Bool).Set(True)
+        if enable_speculative_ccd:
+            prim.CreateAttribute("physxRigidBody:enableSpeculativeCCD", Sdf.ValueTypeNames.Bool).Set(True)
+        prim.CreateAttribute("physxRigidBody:solverPositionIterationCount", Sdf.ValueTypeNames.Int).Set(int(pos_iters))
+        prim.CreateAttribute("physxRigidBody:solverVelocityIterationCount", Sdf.ValueTypeNames.Int).Set(int(vel_iters))
+    stage.Save()
+
+def set_convex_decomposition_params(usd_path: str,
+                                    voxel_resolution: int = 500_000,
+                                    max_convex_hulls: int = 24,
+                                    max_vertices_per_hull: int = 64,
+                                    concavity: float = 0.002,
+                                    shrink_wrap: bool = True,
+                                    overlap: bool = False,
+                                    contact_offset: float = 0.006,
+                                    rest_offset: float = -0.002):
+    stage = Usd.Stage.Open(usd_path)
+    root = stage.GetDefaultPrim()
+    if not root or not root.IsValid():
+        return
+
+    for prim in stage.Traverse():
+        if not prim or not prim.IsValid():
+            continue
+        if not prim.GetPath().HasPrefix(root.GetPath()):
+            continue
+        if not prim.IsA(UsdGeom.Mesh):
+            continue
+
+        UsdPhysics.CollisionAPI.Apply(prim)
+        mesh_col = UsdPhysics.MeshCollisionAPI.Apply(prim)
+
+        try:
+            mesh_col.CreateApproximationAttr(PhysxSchema.Tokens.convexDecomposition)
+        except Exception:
+            try:
+                mesh_col.GetApproximationAttr().Set("convexDecomposition")
+            except Exception:
+                prim.CreateAttribute("physics:approximation", Sdf.ValueTypeNames.Token).Set("convexDecomposition")
+
+        try:
+            cd_api = PhysxSchema.PhysxConvexDecompositionCollisionAPI.Apply(prim)
+            if hasattr(cd_api, "CreateVoxelResolutionAttr"):
+                cd_api.CreateVoxelResolutionAttr(int(voxel_resolution))
+            else:
+                prim.CreateAttribute("physxConvexDecomposition:voxelResolution", Sdf.ValueTypeNames.Int).Set(int(voxel_resolution))
+            if hasattr(cd_api, "CreateMaxConvexHullsAttr"):
+                cd_api.CreateMaxConvexHullsAttr(int(max_convex_hulls))
+            else:
+                prim.CreateAttribute("physxConvexDecomposition:maxConvexHulls", Sdf.ValueTypeNames.Int).Set(int(max_convex_hulls))
+            if hasattr(cd_api, "CreateMaxNumVerticesPerCHAttr"):
+                cd_api.CreateMaxNumVerticesPerCHAttr(int(max_vertices_per_hull))
+            else:
+                prim.CreateAttribute("physxConvexDecomposition:maxNumVerticesPerCH", Sdf.ValueTypeNames.Int).Set(int(max_vertices_per_hull))
+            if hasattr(cd_api, "CreateConcavityAttr"):
+                cd_api.CreateConcavityAttr(float(concavity))
+            else:
+                prim.CreateAttribute("physxConvexDecomposition:concavity", Sdf.ValueTypeNames.Float).Set(float(concavity))
+            if hasattr(cd_api, "CreateShrinkWrapAttr"):
+                cd_api.CreateShrinkWrapAttr(bool(shrink_wrap))
+            else:
+                prim.CreateAttribute("physxConvexDecomposition:shrinkWrap", Sdf.ValueTypeNames.Bool).Set(bool(shrink_wrap))
+            if hasattr(cd_api, "CreateOverlapAttr"):
+                cd_api.CreateOverlapAttr(bool(overlap))
+            else:
+                prim.CreateAttribute("physxConvexDecomposition:overlap", Sdf.ValueTypeNames.Bool).Set(bool(overlap))
+        except Exception:
+            prim.CreateAttribute("physxConvexDecomposition:voxelResolution", Sdf.ValueTypeNames.Int).Set(int(voxel_resolution))
+            prim.CreateAttribute("physxConvexDecomposition:maxConvexHulls", Sdf.ValueTypeNames.Int).Set(int(max_convex_hulls))
+            prim.CreateAttribute("physxConvexDecomposition:maxNumVerticesPerCH", Sdf.ValueTypeNames.Int).Set(int(max_vertices_per_hull))
+            prim.CreateAttribute("physxConvexDecomposition:concavity", Sdf.ValueTypeNames.Float).Set(float(concavity))
+            prim.CreateAttribute("physxConvexDecomposition:shrinkWrap", Sdf.ValueTypeNames.Bool).Set(bool(shrink_wrap))
+            prim.CreateAttribute("physxConvexDecomposition:overlap", Sdf.ValueTypeNames.Bool).Set(bool(overlap))
+
+        try:
+            col_api = PhysxSchema.PhysxCollisionAPI.Apply(prim)
+            if hasattr(col_api, "CreateContactOffsetAttr"):
+                col_api.CreateContactOffsetAttr(float(contact_offset))
+            else:
+                prim.CreateAttribute("physxCollision:contactOffset", Sdf.ValueTypeNames.Float).Set(float(contact_offset))
+            if hasattr(col_api, "CreateRestOffsetAttr"):
+                col_api.CreateRestOffsetAttr(float(rest_offset))
+            else:
+                prim.CreateAttribute("physxCollision:restOffset", Sdf.ValueTypeNames.Float).Set(float(rest_offset))
+        except Exception:
+            prim.CreateAttribute("physxCollision:contactOffset", Sdf.ValueTypeNames.Float).Set(float(contact_offset))
+            prim.CreateAttribute("physxCollision:restOffset", Sdf.ValueTypeNames.Float).Set(float(rest_offset))
+
+        try:
+            UsdGeom.Mesh(prim).CreateDoubleSidedAttr(True)
+        except Exception:
+            pass
+
+    stage.Save()
+
+def make_visual_names_unique(xml_string: str) -> str:
+    tree = ET.ElementTree(ET.fromstring(xml_string))
+    root = tree.getroot()
+    elements_with_name = root.findall(".//visual")
+    name_counts = defaultdict(int)
+    for element in elements_with_name:
+        name = element.get("name")
+        name_counts[name] += 1
+    for element in elements_with_name:
+        name = element.get("name")
+        if name_counts[name] > 1:
+            count = name_counts[name]
+            new_name = f"{name}{count}"
+            element.set("name", new_name)
+            name_counts[name] -= 1
+    return ET.tostring(root, encoding="unicode")
+
+def make_urdf_with_unique_visual_names(urdf_path: str) -> str:
+    xml_str = open(urdf_path, "r").read()
+    xml_str = '<?xml version="1.0"?>' + "\n" + make_visual_names_unique(xml_str)
+    urdf_unique_path = urdf_path.replace(".urdf", "_unique.urdf")
+    with open(urdf_unique_path, "w") as f:
+        f.write(xml_str)
+    return urdf_unique_path
+
+def make_obj_without_slashes(obj_path: str) -> str:
+    obj_str = open(obj_path, "r").read()
+    obj_str = obj_str.replace("/", "")
+    obj_clean_path = obj_path.replace(".obj", "_clean.obj")
+    with open(obj_clean_path, "w") as f:
+        f.write(obj_str)
+    return obj_clean_path
+
+def convert_urdf_to_usd(urdf_path, usd_path):
+    log.info(f"Converting {urdf_path}")
+    if not os.path.isabs(urdf_path):
+        urdf_path = os.path.abspath(urdf_path)
+    urdf_converter_cfg = UrdfConverterCfg(
+        asset_path=urdf_path,
+        usd_dir=os.path.dirname(usd_path),
+        usd_file_name=os.path.basename(usd_path),
+        make_instanceable=args.make_instanceable,
+        force_usd_conversion=True,
+        fix_base=True,
+    )
+    UrdfConverter(urdf_converter_cfg)
+
+def is_articulation(usd_path: str):
+    joint_count = 0
+    stage = Usd.Stage.Open(usd_path)
+    for prim in stage.Traverse():
+        if prim.IsA(UsdPhysics.Joint):
+            joint_count += 1
+    return joint_count > 0
+
+def apply_rigidbody_api(usd_path):
+    stage = Usd.Stage.Open(usd_path)
+    defaultPrim = stage.GetDefaultPrim()
+    UsdPhysics.RigidBodyAPI.Apply(defaultPrim)
+    stage.Save()
+
+def count_rigid_api(usd_path):
+    stage = Usd.Stage.Open(usd_path)
+    rigid_api_count = 0
+    for prim in stage.Traverse():
+        if prim.HasAPI(UsdPhysics.RigidBodyAPI):
+            rigid_api_count += 1
+    return rigid_api_count
+
+########################################################
+## Per-key conversion (batch)
+########################################################
+def run_conversion_for_key(key: str):
+    global args
+
+    scene_json = out_dir / key / "simulation" / "scene.json"
+    assert scene_json.exists(), f"[{key}] scene.json not found: {scene_json}"
+    scene_dict = json.load(open(scene_json, "r"))
+
+    # Build IO lists (background always first). Prefer "refined" if present.
+    input_list, output_list = [scene_dict["background"]["registered"]], [glb_to_usd(scene_dict["background"]["registered"])]
+    scene_dict["background"]["usd"] = glb_to_usd(scene_dict["background"]["registered"])
+    for idx, obj in scene_dict["objects"].items():
+        input_list.append(obj["optimized"])
+        output_list.append(glb_to_usd(obj["optimized"]))
+        scene_dict["objects"][idx]["usd"] = glb_to_usd(obj["optimized"])
+
+    # Persist USD paths back to scene.json
+    with open(scene_json, 'w') as f:
+        json.dump(scene_dict, f, indent=2)
+
+    # Create a fresh Args for this key
+    args = Args(input=input_list, output=output_list, make_instanceable=False,
+                collision_approximation="convexDecomposition",
+                mass=1.0, disable_gravity=False, kinematic_enabled=False,
+                headless=args_isaaclab.headless, exit_on_finish=True)
+
+    # Run the original main conversion logic for this key
+    log.info(f"[{key}] converting {len(args.input)} assets...")
+    run_conversion(scene_dict)
+
+def run_conversion(scene_dict: dict):
+    """This is the body of your original `main()` but parameterized and reused per key."""
+    if isinstance(args.input, list) and isinstance(args.output, list):
+        for input_path, output_path in zip(args.input, args.output):
+            is_background = (input_path == scene_dict["background"]["registered"])
+
+            if input_path.endswith(".obj") or input_path.endswith(".glb"):
+                # Save current, then enforce per-asset overrides
+                prev_mass = args.mass
+                prev_approx = args.collision_approximation
+                prev_disable_grav = args.disable_gravity
+                prev_kinematic = args.kinematic_enabled
+
+                args.collision_approximation = "convexDecomposition"
+
+                if is_background:
+                    args.mass = None
+                    args.disable_gravity = True
+                    args.kinematic_enabled = True
+                else:
+                    args.mass = 1.0
+                    args.disable_gravity = False
+                    args.kinematic_enabled = False
+
+                convert_obj_to_usd(input_path, output_path)
+
+                if is_background:
+                    apply_rigidbody_api(output_path)
+                    ensure_rigidbody_and_set_kinematic(output_path, disable_gravity=True)
+                    set_convex_decomposition_params(
+                        output_path,
+                        voxel_resolution=600_000,
+                        max_convex_hulls=512,
+                        max_vertices_per_hull=128,
+                        concavity=0.0005,
+                        shrink_wrap=True,
+                        overlap=False,
+                        contact_offset=0.006,
+                        rest_offset=-0.002
+                    )
+                else:
+                    apply_rigidbody_api(output_path)
+                    enable_ccd_and_iters(output_path, pos_iters=14, vel_iters=4,
+                                         enable_ccd=True, enable_speculative_ccd=True)
+                    set_convex_decomposition_params(
+                        output_path,
+                        voxel_resolution=600_000,
+                        max_convex_hulls=24,
+                        max_vertices_per_hull=64,
+                        concavity=0.002,
+                        shrink_wrap=True,
+                        overlap=False,
+                        contact_offset=0.006,
+                        rest_offset=-0.002
+                    )
+
+                # Restore global args
+                args.mass = prev_mass
+                args.collision_approximation = prev_approx
+                args.disable_gravity = prev_disable_grav
+                args.kinematic_enabled = prev_kinematic
+
+                log.info(f'Rigid body count: {count_rigid_api(output_path)}')
+                log.info(f"Saved USD file to {os.path.abspath(output_path)}")
+
+            elif input_path.endswith(".urdf"):
+                urdf_unique_path = make_urdf_with_unique_visual_names(input_path)
+                convert_urdf_to_usd(urdf_unique_path, output_path)
+
+                if is_background:
+                    apply_rigidbody_api(output_path)
+                    ensure_rigidbody_and_set_kinematic(output_path, disable_gravity=True)
+                    set_convex_decomposition_params(
+                        output_path,
+                        voxel_resolution=600_000,
+                        max_convex_hulls=28,
+                        max_vertices_per_hull=64,
+                        concavity=0.0015,
+                        shrink_wrap=True,
+                        overlap=False,
+                        contact_offset=0.006,
+                        rest_offset=-0.002
+                    )
+                elif not is_articulation(output_path):
+                    apply_rigidbody_api(output_path)
+                    enable_ccd_and_iters(output_path, pos_iters=14, vel_iters=4,
+                                         enable_ccd=True, enable_speculative_ccd=True)
+                    set_convex_decomposition_params(
+                        output_path,
+                        voxel_resolution=600_000,
+                        max_convex_hulls=24,
+                        max_vertices_per_hull=64,
+                        concavity=0.002,
+                        shrink_wrap=True,
+                        overlap=False,
+                        contact_offset=0.006,
+                        rest_offset=-0.002
+                    )
+
+                log.info(f'Rigid body count: {count_rigid_api(output_path)}')
+                log.info(f"Saved USD file to {os.path.abspath(output_path)}")
+
+    else:
+        # Single-path branches kept for completeness (not used in batch flow)
+        input_path = args.input
+        output_path = args.output
+        if input_path.endswith(".obj") or input_path.endswith(".glb"):
+            convert_obj_to_usd(input_path, output_path)
+            apply_rigidbody_api(output_path)
+            enable_ccd_and_iters(output_path, pos_iters=14, vel_iters=4,
+                                 enable_ccd=True, enable_speculative_ccd=True)
+            set_convex_decomposition_params(
+                output_path,
+                voxel_resolution=600_000,
+                max_convex_hulls=24,
+                max_vertices_per_hull=64,
+                concavity=0.002,
+                shrink_wrap=True,
+                overlap=False,
+                contact_offset=0.006,
+                rest_offset=-0.002
+            )
+            log.info(f'Rigid body count: {count_rigid_api(output_path)}')
+            log.info(f"Saved USD file to {os.path.abspath(output_path)}")
+
+        elif input_path.endswith(".urdf"):
+            urdf_unique_path = make_urdf_with_unique_visual_names(input_path)
+            convert_urdf_to_usd(urdf_unique_path, output_path)
+            apply_rigidbody_api(output_path)
+            enable_ccd_and_iters(output_path, pos_iters=14, vel_iters=4,
+                                 enable_ccd=True, enable_speculative_ccd=True)
+            set_convex_decomposition_params(
+                output_path,
+                voxel_resolution=600_000,
+                max_convex_hulls=24,
+                max_vertices_per_hull=64,
+                concavity=0.002,
+                shrink_wrap=True,
+                overlap=False,
+                contact_offset=0.006,
+                rest_offset=-0.002
+            )
+            log.info(f'Rigid body count: {count_rigid_api(output_path)}')
+            log.info(f"Saved USD file to {os.path.abspath(output_path)}")
+
+########################################################
+## Batch entry
+########################################################
+def main():
+    # Load keys from config/config.yaml and run per key
+    cfg = yaml.safe_load((base_dir / "config" / "config.yaml").open("r"))
+    keys = cfg["keys"]
+    for key in keys:
+        log.info(f"\n========== [USD Convert] key: {key} ==========")
+        run_conversion_for_key(key)
+
+    # Exit or keep GUI (same behavior as your original main)
+    if args and args.exit_on_finish:
+        return
+
+    if args and not args.headless:
+        carb_settings_iface = carb.settings.get_settings()
+        local_gui = carb_settings_iface.get("/app/window/enabled")
+        livestream_gui = carb_settings_iface.get("/app/livestream/enabled")
+        if local_gui or livestream_gui:
+            # If you want to open the last output USD in a viewer, uncomment below
+            # stage_utils.open_stage(args.output[-1] if isinstance(args.output, list) else args.output)
+            app = omni.kit.app.get_app_interface()
+            with contextlib.suppress(KeyboardInterrupt):
+                while app.is_running():
+                    app.update()
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_trajectory_replay.py b/openreal2sim/simulation/isaaclab/data_collection/sim_trajectory_replay.py
new file mode 100755
index 0000000..1ed673c
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_trajectory_replay.py
@@ -0,0 +1,143 @@
+"""
+Replay existing trajectories in Isaac Lab simulation.
+"""
+from __future__ import annotations
+
+# ─────────── AppLauncher ───────────
+import argparse, os, json, random, transforms3d
+from pathlib import Path
+import numpy as np
+import torch
+import yaml
+from isaaclab.app import AppLauncher
+
+# ─────────── CLI ───────────
+parser = argparse.ArgumentParser("sim_policy")
+parser.add_argument("--demo_dir", type=str, help="directory of robot trajectory")
+parser.add_argument("--key", type=str, default=None, help="scene key (outputs/<key>)")
+parser.add_argument("--robot", type=str, default="franka")
+parser.add_argument("--num_envs", type=int, default=1)
+parser.add_argument("--num_trials", type=int, default=1)
+parser.add_argument("--teleop_device", type=str, default="keyboard")
+parser.add_argument("--sensitivity", type=float, default=1.0)
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+args_cli.enable_cameras = True
+args_cli.headless = False  # headless mode for batch execution
+app_launcher = AppLauncher(vars(args_cli))
+simulation_app = app_launcher.app
+
+# ─────────── Runtime imports ───────────
+import isaaclab.sim as sim_utils
+from isaaclab.utils.math import subtract_frame_transforms
+
+from graspnetAPI.grasp import GraspGroup
+
+
+# ─────────── Simulation environments ───────────
+from sim_base import BaseSimulator, get_next_demo_id
+from sim_env_factory import make_env
+from sim_preprocess.grasp_utils import get_best_grasp_with_hints
+from sim_utils.transform_utils import pose_to_mat, mat_to_pose, grasp_to_world, grasp_approach_axis_batch
+from sim_utils.sim_utils import load_sim_parameters
+
+BASE_DIR   = Path.cwd()
+sim_cfgs = json.load(open(Path(args_cli.demo_dir) / "config.json", "r"))
+args_cli.key = sim_cfgs["key"]
+img_folder = args_cli.key
+out_dir    = BASE_DIR / "outputs"
+
+
+# ──────────────────────────── Heuristic Manipulation ────────────────────────────
+class HeuristicManipulation(BaseSimulator):
+    """
+    Physical trial-and-error grasping with approach-axis perturbation:
+      • Multiple grasp proposals executed in parallel;
+      • Every attempt does reset → pre → grasp → close → lift → check;
+      • Early stops when any env succeeds; then re-exec for logging.
+    """
+    def __init__(self, sim, scene, sim_cfgs: dict):
+        robot_pose = torch.tensor(
+            sim_cfgs["robot_cfg"]["robot_pose"],
+            dtype=torch.float32,
+            device=sim.device
+        )
+        super().__init__(
+            sim=sim, sim_cfgs=sim_cfgs, scene=scene, args=args_cli,
+            robot_pose=robot_pose, cam_dict=sim_cfgs["cam_cfg"],
+            out_dir=out_dir, img_folder=img_folder,
+            enable_motion_planning=True,
+            set_physics_props=True, debug_level=0,
+        )
+
+        self.selected_object_id = sim_cfgs["demo_cfg"]["manip_object_id"]
+        self.traj_key = sim_cfgs["demo_cfg"]["traj_key"]
+        self.traj_path = sim_cfgs["demo_cfg"]["traj_path"]
+        self.goal_offset = [0, 0, sim_cfgs["demo_cfg"]["goal_offset"]]
+        self.grasp_path = sim_cfgs["demo_cfg"]["grasp_path"]
+        self.grasp_idx = sim_cfgs["demo_cfg"]["grasp_idx"]
+        self.grasp_pre = sim_cfgs["demo_cfg"]["grasp_pre"]
+        self.grasp_delta = sim_cfgs["demo_cfg"]["grasp_delta"]
+
+    # ---------- Helpers ----------
+    def _to_base(self, pos_w: np.ndarray | torch.Tensor, quat_w: np.ndarray | torch.Tensor):
+        """World → robot base frame for all envs."""
+        root = self.robot.data.root_state_w[:, 0:7]  # [B,7]
+        p_w, q_w = self._ensure_batch_pose(pos_w, quat_w)
+        pb, qb = subtract_frame_transforms(
+            root[:, 0:3], root[:, 3:7], p_w, q_w
+        )
+        return pb, qb  # [B,3], [B,4]
+
+    def replay_actions(self, actions: np.ndarray):
+        """
+        Replay a sequence of recorded actions: (p[B,3], q[B,4], gripper[B,1])
+        """
+        n_steps = actions.shape[0]
+
+        self.reset()
+        self.wait(gripper_open=True, steps=10)
+
+        for t in range(n_steps):
+            print(f"[INFO] replay step {t}/{n_steps}")
+            act = actions[t:t+1]
+            p_b = torch.as_tensor(act[:, 0:3], dtype=torch.float32, device=self.sim.device)
+            q_b = torch.as_tensor(act[:, 3:7], dtype=torch.float32, device=self.sim.device)
+            g_b = act[:, 7] < 0.5
+            jp, success = self.move_to(p_b, q_b, gripper_open=g_b)
+            if torch.any(success==False):
+                print(f"[ERR] replay step {t} failed.")
+                return False
+            jp = self.wait(gripper_open=g_b, steps=3)
+        return True
+
+# ──────────────────────────── Entry Point ────────────────────────────
+def main():
+    env, _ = make_env(
+        cfgs=sim_cfgs, num_envs=args_cli.num_envs,
+        device=args_cli.device,
+        bg_simplify=False,
+    )
+    sim, scene = env.sim, env.scene
+
+    my_sim = HeuristicManipulation(sim, scene, sim_cfgs=sim_cfgs)
+
+    demo_root = (out_dir / img_folder / "demos").resolve()
+
+    for _ in range(args_cli.num_trials):
+
+        robot_pose = torch.tensor(sim_cfgs["robot_cfg"]["robot_pose"], dtype=torch.float32, device=my_sim.sim.device)  # [7], pos(3)+quat(wxyz)(4)
+        my_sim.set_robot_pose(robot_pose)
+        my_sim.demo_id = get_next_demo_id(demo_root)
+        my_sim.reset()
+        print(f"[INFO] start simulation demo_{my_sim.demo_id}")
+        actions = np.load(Path(args_cli.demo_dir) / "actions.npy")
+        my_sim.replay_actions(actions)
+
+    env.close()
+    simulation_app.close()
+
+if __name__ == "__main__":
+    main()
+    os.system("quit()")
+    simulation_app.close()
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_utils/calibration_utils.py b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/calibration_utils.py
new file mode 100755
index 0000000..e8c3666
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/calibration_utils.py
@@ -0,0 +1,88 @@
+import numpy as np
+import transforms3d
+
+def _as_homo(T):
+    A = np.asarray(T, dtype=np.float64)
+    if A.shape == (4, 4):
+        return A
+    if A.shape == (3, 4):
+        M = np.eye(4, dtype=np.float64); M[:3, :4] = A; return M
+    if A.shape == (3, 3):
+        M = np.eye(4, dtype=np.float64); M[:3, :3] = A; return M
+    raise ValueError(f"Unsupported shape: {A.shape}")
+
+def _blk3(R):
+    M = np.eye(4, dtype=np.float64); M[:3, :3] = R; return M
+
+def _quat_wxyz(R):
+    q = transforms3d.quaternions.mat2quat(R)  # (w,x,y,z)
+    return -q if q[0] < 0 else q
+
+def load_extrinsics(config: dict, key: str) -> np.ndarray:
+    """
+    Load the extrinsic parameters from the scene JSON.
+    """
+
+    E_list = config.get("extrinsics", None)
+    if E_list is None:
+        print(f"Warning: No calibrated extrinsics found for {key}. Use random robot poses.")
+        return None
+    E = np.array(E_list).reshape(4, 4)
+    print(f"Loaded extrinsics for {key}:")
+    print(E)
+    return E
+
+
+def calibration_to_robot_pose(scene_js: dict, T_camopencv_to_robotbase_m) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Reproduce the real-world calibrated camera to robot transform into simulation
+    Inputs:
+        scene_js: The scene JSON containing camera extrinsics
+        T_camopencv_to_robotbase_m: The transform matrix from camera_optical (in opencv camera format) to robot_base
+    Outputs:
+        pos_w: The position of the robot base in world coordinates
+        quat_w: The orientation of the robot base in world coordinates (as a quaternion)
+        meta: A dictionary containing additional information about the calibration
+    """
+
+    T_camopencv_to_world = _as_homo(scene_js["camera"]["camera_opencv_to_world"])
+
+    T_camopencv_to_robotbase = _as_homo(T_camopencv_to_robotbase_m)
+
+    T_robotbase_to_world = T_camopencv_to_world @ np.linalg.inv(T_camopencv_to_robotbase)
+
+    pos_w  = T_robotbase_to_world[:3, 3].copy()
+    quat_w = _quat_wxyz(T_robotbase_to_world[:3, :3])
+
+    return pos_w, quat_w
+
+def calibration_to_robot_pose_deprecated(scene_js: dict, T_camisaac_to_robotbase_m) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Reproduce the real-world calibrated camera to robot transform into simulation
+    Inputs:
+        scene_js: The scene JSON containing camera extrinsics
+        T_camisaac_to_robotbase_m: The transform matrix from camera_isaac to robot_base
+    Outputs:
+        pos_w: The position of the robot base in world coordinates
+        quat_w: The orientation of the robot base in world coordinates (as a quaternion)
+        meta: A dictionary containing additional information about the calibration
+    """
+
+    T_camopencv_to_world = _as_homo(scene_js["camera"]["camera_opencv_to_world"])
+
+    T_camisaac_to_robotbase = _as_homo(T_camisaac_to_robotbase_m)
+
+    # This may be the true transformation
+    # REP-103: camera_opencv(OpenCV: +x right, +y down, +z forward) -> camera_isaac(IsaacSim: +x forward, +y left, +z up)
+    T_camopencv_to_camisaac = np.array([[ 0,  0,  1],
+                                        [-1,  0,  0],
+                                        [ 0, -1,  0]], dtype=np.float64)
+
+    T_camopencv_to_robotbase = T_camisaac_to_robotbase @ _blk3(T_camopencv_to_camisaac)   # base -> camera_link
+
+    T_robotbase_to_world = T_camopencv_to_world @ np.linalg.inv(T_camopencv_to_robotbase)
+
+    pos_w  = T_robotbase_to_world[:3, 3].copy()
+    quat_w = _quat_wxyz(T_robotbase_to_world[:3, :3])
+
+    return pos_w, quat_w
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_utils/sim_utils.py b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/sim_utils.py
new file mode 100755
index 0000000..2ddd68b
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/sim_utils.py
@@ -0,0 +1,257 @@
+import json
+from pathlib import Path
+import yaml
+import torch
+import random
+import numpy as np
+import transforms3d
+
+from sim_utils.calibration_utils import calibration_to_robot_pose, load_extrinsics
+
+default_config = {
+    "physics": "default",
+    "extrinsics": None,
+    "goal_offset": 0,
+    "grasp_idx": -1,
+    "grasp_pre": None,
+    "grasp_delta": None,
+    "traj_key": "fdpose_trajs", # "fdpose_trajs", "simple_trajs", "hybrid_trajs"
+    "manip_object_id": "1",
+}
+
+def compose_configs(key_name: str, config: dict) -> dict:
+    ret_key_config = {}
+    local_config = config["local"].get(key_name, {})
+    local_config = local_config.get("simulation", {})
+    global_config = config.get("global", {})
+    global_config = global_config.get("simulation", {})
+    for param in default_config.keys():
+        value = local_config.get(param, global_config.get(param, default_config[param]))
+        ret_key_config[param] = value
+    print(f"[Info] Config for {key_name}: {ret_key_config}")
+    return ret_key_config
+
+def load_sim_parameters(basedir, key) -> dict:
+    scene_json_path = Path(basedir) / "outputs" / key / "simulation" / "scene.json"
+    scene_json   = json.load(open(scene_json_path, "r"))
+    exp_config = yaml.load(open(Path(basedir) / "config/config.yaml"), Loader=yaml.FullLoader)
+    exp_config = compose_configs(key, exp_config)
+
+    # cam configs
+    cam_cfg = {
+        "width":  int(scene_json["camera"]["width"]),
+        "height": int(scene_json["camera"]["height"]),
+        "fx": float(scene_json["camera"]["fx"]),
+        "fy": float(scene_json["camera"]["fy"]),
+        "cx": float(scene_json["camera"]["cx"]),
+        "cy": float(scene_json["camera"]["cy"]),
+        "cam_orientation": tuple(scene_json["camera"]["camera_heading_wxyz"]),
+        "scene_info": {
+            "move_to":      list(scene_json["camera"]["camera_position"]),
+            "scene_min":    list(scene_json["aabb"]["scene_min"]),
+            "scene_max":    list(scene_json["aabb"]["scene_max"]),
+            "object_center":list(scene_json["objects"]["1"]["object_center"]),
+        },
+    }
+
+    # robot configs
+    E = load_extrinsics(exp_config, key)
+    if E is None:
+        robot_cfg = random.choice(robot_placement_candidates_v2(cam_cfg["scene_info"]))
+        pos_w, quat_w = robot_cfg["position"], robot_cfg["rotation"]
+    else:
+        pos_w, quat_w = calibration_to_robot_pose(scene_json, E)
+    robot_pose = list(pos_w) + list(quat_w)
+    robot_cfg = {
+        "robot_pose": robot_pose,
+    }
+
+    # demo configs
+    goal_offset = exp_config.get("goal_offset", 0)
+    grasp_idx = exp_config.get("grasp_idx", -1)
+    grasp_pre = exp_config.get("grasp_pre", None)
+    grasp_delta = exp_config.get("grasp_delta", None)
+    traj_key = exp_config.get("traj_key", "fdpose_trajs") # "fdpose_trajs", "simple_trajs", "hybrid_trajs"
+    manip_object_id = exp_config.get("manip_object_id", "1")
+    traj_path = scene_json["objects"][manip_object_id][traj_key]
+    grasp_path = scene_json["objects"][manip_object_id].get("grasps", None)
+    demo_cfg = {
+        "manip_object_id": manip_object_id,
+        "traj_key": traj_key,
+        "traj_path": traj_path,
+        "goal_offset": goal_offset,
+        "grasp_idx": grasp_idx,
+        "grasp_pre": grasp_pre,
+        "grasp_delta": grasp_delta,
+        "grasp_path": grasp_path,
+    }
+
+    # physics configs
+    physics_key = exp_config.get("physics", "default")
+    physics_cfg = {}
+    if physics_key == "default":
+        physics_cfg["bg_physics"] = default_bg_physics
+        physics_cfg["obj_physics"] = default_obj_physics
+    elif physics_key == "viz":
+        physics_cfg["bg_physics"] = default_bg_physics
+        physics_cfg["obj_physics"] = viz_obj_physics
+    else:
+        print(f"[WARN] Unrecognized physics key '{physics_key}', no physics specified in exp configs.")
+        physics_cfg["bg_physics"] = None
+        physics_cfg["obj_physics"] = None
+
+    return {
+        "key": key,
+        "scene_cfg": scene_json,
+        "exp_cfg": exp_config,
+        "cam_cfg": cam_cfg,
+        "robot_cfg": robot_cfg,
+        "demo_cfg": demo_cfg,
+        "physics_cfg": physics_cfg,
+    }
+
+
+# Default physics for background and objects
+default_bg_physics = {
+    "mass_props": {"mass": 100.0},
+    "rigid_props": {"disable_gravity": True, "kinematic_enabled": True},
+    "collision_props": {"collision_enabled": True,},
+}
+
+default_obj_physics = {
+    "mass_props": {"mass": 0.2},
+    "rigid_props": {"disable_gravity": False, "kinematic_enabled": False},
+    "collision_props": {"collision_enabled": True,},
+}
+
+# For viz
+viz_obj_physics = {
+    "mass_props": {"mass": 0.5},
+    "rigid_props": {"disable_gravity": True, "kinematic_enabled": False},
+    "collision_props": {"collision_enabled": False,},
+}
+
+def robot_placement_candidates_v2(
+    scene_info: dict,
+    reachability=(0.3, 0.65),
+    reachability_zcenter_offset=0.3,
+    robot_aabb=(0.12, 0.12, 0.72),
+    num_radius_steps=10,
+    num_angle_steps=36,
+    num_z_steps=5,
+    occlusion_clearance=(0.02, 0.02, 0.02),  # inflated base AABB for occlusion checks (m)
+    obj_world_radius=0.02,                   # small ring near object center (m)
+    num_obj_rays=8,                          # # of rays from ring points (>=8)
+    min_cam_base_separation_deg=45.0,        # minimum separation angle wrt camera direction
+):
+    """
+    Return robot base poses (position + wxyz rotation) satisfying:
+      (1) Reachability
+      (2) No overlap with background AABB
+      (3) Same half-space as camera w.r.t object & min separation angle
+      (4) No occlusion from camera to object (center ray + ring rays)
+    """
+    object_center = np.array(scene_info["object_center"], dtype=float)
+    scene_min = np.array(scene_info["scene_min"], dtype=float)
+    scene_max = np.array(scene_info["scene_max"], dtype=float)
+    cam_pos = np.array(scene_info["move_to"], dtype=float)
+
+    def segment_intersects_aabb(p0, p1, aabb_min, aabb_max, eps=1e-9):
+        d = p1 - p0
+        t0, t1 = 0.0, 1.0
+        for i in range(3):
+            if abs(d[i]) < eps:
+                if p0[i] < aabb_min[i] or p0[i] > aabb_max[i]:
+                    return False
+            else:
+                inv_d = 1.0 / d[i]
+                t_near = (aabb_min[i] - p0[i]) * inv_d
+                t_far = (aabb_max[i] - p0[i]) * inv_d
+                if t_near > t_far:
+                    t_near, t_far = t_far, t_near
+                t0 = max(t0, t_near)
+                t1 = min(t1, t_far)
+                if t0 > t1:
+                    return False
+        return True
+
+    def edge_ring_points(obj_center, cam_pos, radius, m):
+        if m <= 0 or radius <= 0.0:
+            return np.empty((0, 3), dtype=float)
+        view = obj_center - cam_pos
+        n = np.linalg.norm(view)
+        if n < 1e-9:
+            return np.empty((0, 3), dtype=float)
+        u = view / n
+        tmp = np.array([1.0, 0.0, 0.0], dtype=float)
+        if abs(np.dot(tmp, u)) > 0.9:
+            tmp = np.array([0.0, 1.0, 0.0], dtype=float)
+        e1 = np.cross(u, tmp); e1 /= (np.linalg.norm(e1) + 1e-12)
+        e2 = np.cross(u, e1);  e2 /= (np.linalg.norm(e2) + 1e-12)
+        phis = np.linspace(0.0, 2.0 * np.pi, num=m, endpoint=False)
+        ring = [obj_center + radius * (np.cos(phi) * e1 + np.sin(phi) * e2) for phi in phis]
+        return np.asarray(ring, dtype=float)
+
+    candidates = []
+    radius_values = np.linspace(reachability[0], reachability[1], num=num_radius_steps)
+    angle_values = np.linspace(0, 2 * np.pi, num=num_angle_steps, endpoint=False)
+    z_min, z_max = 0.0, 2.0 * float(object_center[2])
+    z_values = np.linspace(z_min, z_max, num=num_z_steps)
+    ring_pts = edge_ring_points(object_center, cam_pos, obj_world_radius, num_obj_rays)
+
+    cos_min_sep = np.cos(np.deg2rad(float(min_cam_base_separation_deg)))
+
+    for z in z_values:
+        reach_center_z = z + reachability_zcenter_offset
+        for r in radius_values:
+            for theta in angle_values:
+                dx = r * np.cos(theta); dy = r * np.sin(theta)
+                base_pos = np.array([object_center[0] - dx, object_center[1] - dy, z], dtype=float)
+
+                # (1) reachability
+                reach_dist = np.linalg.norm(object_center - np.array([base_pos[0], base_pos[1], reach_center_z]))
+                if reach_dist > reachability[1] or reach_dist < reachability[0]:
+                    continue
+
+                # (2) no overlap with background AABB
+                half = 0.5 * np.array(robot_aabb, dtype=float)
+                base_min = base_pos - half
+                base_max = base_pos + half
+                if np.all(base_max > scene_min) and np.all(base_min < scene_max):
+                    continue
+
+                # (3) same-side + min angle
+                v_cam = cam_pos - object_center
+                v_base = base_pos - object_center
+                n1 = np.linalg.norm(v_cam); n2 = np.linalg.norm(v_base)
+                if n1 < 1e-9 or n2 < 1e-9:
+                    continue
+                cos_val = float(np.dot(v_cam, v_base) / (n1 * n2))
+                if (cos_val < 0.0) or (cos_val > cos_min_sep):
+                    continue
+
+                # (4a) no occlusion on center ray
+                extra = np.array(occlusion_clearance, dtype=float)
+                occ_min = base_min - extra
+                occ_max = base_max + extra
+                if segment_intersects_aabb(cam_pos, object_center, occ_min, occ_max):
+                    continue
+                # (4b) ring rays
+                if any(segment_intersects_aabb(cam_pos, P, occ_min, occ_max) for P in ring_pts):
+                    continue
+
+                # (5) yaw towards object in XY plane
+                facing_vec = object_center[:2] - base_pos[:2]
+                yaw = np.arctan2(facing_vec[1], facing_vec[0])
+                quat_wxyz = transforms3d.euler.euler2quat(0.0, 0.0, yaw)
+
+                candidates.append({
+                    "position": base_pos.tolist(),
+                    "rotation": [float(x) for x in quat_wxyz],  # (w,x,y,z)
+                    "yaw_deg": float(np.degrees(yaw)),
+                    "base_z": float(z),
+                })
+
+    if not candidates:
+        raise RuntimeError("No valid base found under reach/collision/same-side(min-angle)/occlusion constraints.")
+    return candidates
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_utils/transform_utils.py b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/transform_utils.py
new file mode 100755
index 0000000..a8997e9
--- /dev/null
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/transform_utils.py
@@ -0,0 +1,50 @@
+# transform_utils.py
+import torch
+import numpy as np
+import transforms3d
+
+def grasp_to_world(grasp) -> tuple[np.ndarray, np.ndarray]:
+    """
+    GraspNet -> EE:
+      X_grasp=approach, Y_grasp=width, Z_grasp=thickness
+      We want: Z_EE=approach, Y_EE=-width, X_EE=thickness
+    """
+    Rg = grasp.rotation_matrix.astype(np.float32)
+    delta = np.array([[0, 0, 1],
+                      [0, -1, 0],
+                      [1, 0, 0]], dtype=np.float32)
+    Ree = Rg @ delta
+    q   = transforms3d.quaternions.mat2quat(Ree).astype(np.float32)  # wxyz
+    p   = grasp.translation.astype(np.float32)
+    return p, q
+
+def grasp_approach_axis_batch(qw_batch: np.ndarray) -> np.ndarray:
+    """
+    get the approach axis (Z axis) of each env in world frame
+    qw_batch: (B,4) quaternion (wxyz)
+    return: (B,3) each env's grasping approach axis (Z axis) in world frame
+    """
+    qw_batch = np.asarray(qw_batch, dtype=np.float32)
+    a_list = []
+    for q in qw_batch:
+        R = transforms3d.quaternions.quat2mat(q)
+        a = R[:, 2]
+        a_list.append((a / (np.linalg.norm(a) + 1e-8)).astype(np.float32))
+    return np.stack(a_list, axis=0)
+
+def pose_to_mat(pos, quat):
+    if torch.is_tensor(pos):  pos  = pos.cpu().numpy()
+    if torch.is_tensor(quat): quat = quat.cpu().numpy()
+    m = np.eye(4, dtype=np.float32)
+    m[:3, :3] = transforms3d.quaternions.quat2mat(quat)
+    m[:3,  3] = pos
+    return m
+
+def mat_to_pose(mat):
+    pos  = mat[:3, 3]
+    quat = transforms3d.quaternions.mat2quat(mat[:3, :3])
+    return pos, quat
+
+def normalize_quaternion(q: np.ndarray) -> np.ndarray:
+    norm = np.linalg.norm(q, axis=1, keepdims=True) + 1e-9
+    return q / norm

From 6f21c3e9b2c98840219089258ce0b2d63118307d Mon Sep 17 00:00:00 2001
From: Utkarsh <Utkarsh@Yue-piper-machine.taile660d6.ts.net>
Date: Mon, 10 Nov 2025 07:54:41 -0800
Subject: [PATCH 2/3] not saving failed traj

---
 docker/compose.yml                                        | 1 -
 .../isaaclab/data_collection/sim_heuristic_manip.py       | 8 ++++----
 openreal2sim/simulation/isaaclab/sim_base.py              | 2 +-
 openreal2sim/simulation/isaaclab/sim_heuristic_manip.py   | 2 +-
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/docker/compose.yml b/docker/compose.yml
index ac8d0e2..41d3486 100644
--- a/docker/compose.yml
+++ b/docker/compose.yml
@@ -33,7 +33,6 @@ services:
       context: ..
       dockerfile: docker/isaaclab/Dockerfile
     image: isaaclab:dev
-    user: "${HOST_UID}:${HOST_GID}"
     deploy:
       resources:
         reservations:
diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py
index 8f29fb3..d5c16d4 100755
--- a/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py
@@ -219,7 +219,7 @@ def _to_base(self, pos_w: np.ndarray | torch.Tensor, quat_w: np.ndarray | torch.
         return pb, qb  # [B,3], [B,4]
 
     # ---------- Batched execution & lift-check ----------
-    def execute_and_lift_once_batch(self, info: dict, lift_height=0.06, position_threshold=0.1) -> tuple[np.ndarray, np.ndarray]:
+    def execute_and_lift_once_batch(self, info: dict, lift_height=0.06, position_threshold=0.2) -> tuple[np.ndarray, np.ndarray]:
         """
         Reset → pre → grasp → close → lift → hold; return (success[B], score[B]).
         Now propagates motion planning failures by returning (None, None).
@@ -692,9 +692,9 @@ def inference(self, std: float = 0.0, max_restart_attempts: int = 10,
                         self.save_data()
                         return True
                     else:
-                        print("[WARN] Task verification failed, but trajectory completed. Saving anyway...")
-                        self.save_data()
-                        return True
+                        print("[WARN] Task verification failed, but trajectory completed")
+                        print(f"[ERROR] Attempting restart")
+                        self.clear_data()
                         
                 except RuntimeError as e:
                     if "MotionPlanningFailure_RestartNeeded" in str(e):
diff --git a/openreal2sim/simulation/isaaclab/sim_base.py b/openreal2sim/simulation/isaaclab/sim_base.py
index ca01aa6..d3e95b7 100755
--- a/openreal2sim/simulation/isaaclab/sim_base.py
+++ b/openreal2sim/simulation/isaaclab/sim_base.py
@@ -948,4 +948,4 @@ def delete_data(self):
         os.system(f"mv {save_path} {failure_path}")
         for key in self.save_dict.keys():
             self.save_dict[key] = []
-        print("[INFO]: Clear up the folder: ", save_path)
+        print("[INFO]: Clear up the folder: ", save_path)
\ No newline at end of file
diff --git a/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py b/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py
index 647af13..cf2f1d5 100755
--- a/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py
+++ b/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py
@@ -515,4 +515,4 @@ def main():
 if __name__ == "__main__":
     main()
     os.system("quit()")
-    simulation_app.close()
+    simulation_app.close()
\ No newline at end of file

From f535e1e61ef4ae803aa6cae6584d9c44b31b9937 Mon Sep 17 00:00:00 2001
From: Utkarsh <Utkarsh@Yue-piper-machine.taile660d6.ts.net>
Date: Mon, 10 Nov 2025 11:31:49 -0800
Subject: [PATCH 3/3] current updates for pipeline

---
 .../data_collection/sim_heuristic_manip.py    | 483 +++++++++++++-----
 1 file changed, 350 insertions(+), 133 deletions(-)

diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py
index d5c16d4..e876e67 100755
--- a/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py
+++ b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py
@@ -16,7 +16,7 @@
 parser = argparse.ArgumentParser("sim_policy")
 parser.add_argument("--key", type=str, default="demo_video", help="scene key (outputs/<key>)")
 parser.add_argument("--robot", type=str, default="franka")
-parser.add_argument("--num_envs", type=int, default=1)
+parser.add_argument("--num_envs", type=int, default=4)
 parser.add_argument("--num_trials", type=int, default=1)
 parser.add_argument("--teleop_device", type=str, default="keyboard")
 parser.add_argument("--sensitivity", type=float, default=1.0)
@@ -366,62 +366,101 @@ def build_grasp_info(
 
     def grasp_trials(self, gg, std: float = 0.0005, max_retries: int = 3):
         """
-        Try grasp proposals with automatic recovery from motion planning failures.
-        Returns dict with success flag and chosen grasp parameters.
+        Async grasp trials: each env finds its own successful grasp independently.
+        Environments that succeed early keep their grasp while failed envs continue searching.
         
-        Args:
-            gg: GraspGroup with grasp proposals
-            std: Standard deviation for random perturbations
-            max_retries: Number of retry attempts if motion planning fails
+        Returns dict with per-env success flags, chosen grasp parameters, and skip mask.
+        Envs that exhaust retries are marked to skip in future processing.
         """
         B = self.scene.num_envs
         idx_all = list(range(len(gg)))
         if len(idx_all) == 0:
             print("[ERR] empty grasp list.")
             return {
-                "success": False,
-                "chosen_pose_w": None,
-                "chosen_pre": None,
-                "chosen_delta": None,
+                "success": np.zeros(B, dtype=bool),
+                "chosen_pose_w": [None] * B,
+                "chosen_pre": [None] * B,
+                "chosen_delta": [None] * B,
+                "skip_envs": np.zeros(B, dtype=bool),
             }
 
         rng = np.random.default_rng()
         pre_dist_const = 0.12  # m
 
+        # Track per-environment success and chosen grasps
+        env_success = np.zeros(B, dtype=bool)
+        env_chosen_pose_w = [None] * B  # List of (pos, quat) tuples per env
+        env_chosen_pre = np.zeros(B, dtype=np.float32)
+        env_chosen_delta = np.zeros(B, dtype=np.float32)
+        env_best_score = np.zeros(B, dtype=np.float32)
+        env_skip = np.zeros(B, dtype=bool)  # Track envs to skip due to exhausted retries
+        
         # Retry loop for grasp selection phase
         for retry_attempt in range(max_retries):
             try:
                 print(f"\n{'='*60}")
-                print(f"GRASP SELECTION ATTEMPT {retry_attempt + 1}/{max_retries}")
+                print(f"ASYNC GRASP SELECTION ATTEMPT {retry_attempt + 1}/{max_retries}")
+                print(f"Successful envs: {env_success.sum()}/{B}")
                 print(f"{'='*60}\n")
                 
-                success = False
-                chosen_pose_w = None    # (p_w, q_w)
-                chosen_pre    = None
-                chosen_delta  = None
-
-                # Assign different grasp proposals to different envs
+                # If all non-skipped envs succeeded, we're done
+                active_mask = ~env_skip
+                if np.all(env_success[active_mask]):
+                    successful_envs = np.where(env_success & ~env_skip)[0]
+                    skipped_envs = np.where(env_skip)[0]
+                    print(f"[SUCCESS] All active environments found valid grasps!")
+                    if len(skipped_envs) > 0:
+                        print(f"[INFO] Skipped {len(skipped_envs)} failed environments: {skipped_envs.tolist()}")
+                    break
+                
+                # Get mask of envs that still need to find a grasp (not successful and not skipped)
+                active_failed_mask = (~env_success) & (~env_skip)
+                failed_env_ids = np.where(active_failed_mask)[0]
+                n_failed = len(failed_env_ids)
+                
+                if n_failed == 0:
+                    # All remaining envs are either successful or skipped
+                    break
+                
+                print(f"[INFO] {n_failed} environments still searching for grasps: {failed_env_ids.tolist()}")
+                
+                # Iterate through grasp proposals
                 for start in range(0, len(idx_all), B):
                     block = idx_all[start : start + B]
                     if len(block) < B:
                         block = block + [block[-1]] * (B - len(block))
 
+                    # Build grasp proposals for ALL envs (even successful ones get dummy data)
                     grasp_pos_w_batch, grasp_quat_w_batch = [], []
-                    for idx in block:
-                        p_w, q_w = grasp_to_world(gg[int(idx)])
-                        grasp_pos_w_batch.append(p_w.astype(np.float32))
-                        grasp_quat_w_batch.append(q_w.astype(np.float32))
+                    for b in range(B):
+                        if env_success[b] or env_skip[b]:
+                            # Use existing successful grasp or dummy for skipped env
+                            if env_success[b]:
+                                p_w, q_w = env_chosen_pose_w[b]
+                            else:
+                                # Dummy grasp for skipped env
+                                p_w, q_w = grasp_to_world(gg[0])
+                            grasp_pos_w_batch.append(p_w.astype(np.float32))
+                            grasp_quat_w_batch.append(q_w.astype(np.float32))
+                        else:
+                            # New grasp proposal for failed env
+                            idx = block[b % len(block)]
+                            p_w, q_w = grasp_to_world(gg[int(idx)])
+                            grasp_pos_w_batch.append(p_w.astype(np.float32))
+                            grasp_quat_w_batch.append(q_w.astype(np.float32))
+                    
                     grasp_pos_w_batch  = np.stack(grasp_pos_w_batch,  axis=0)  # (B,3)
                     grasp_quat_w_batch = np.stack(grasp_quat_w_batch, axis=0)  # (B,4)
                     self.show_goal(grasp_pos_w_batch, grasp_quat_w_batch)
                     
                     # Random disturbance along approach axis
                     pre_dist_batch = np.full((B,), pre_dist_const, dtype=np.float32)
-                    delta_batch    = rng.normal(0.0, std, size=(B,)).astype(np.float32)
+                    delta_batch = rng.normal(0.0, std, size=(B,)).astype(np.float32)
 
                     info = self.build_grasp_info(grasp_pos_w_batch, grasp_quat_w_batch,
                                                 pre_dist_batch, delta_batch)
 
+                    # Execute grasp trial for ALL envs
                     ok_batch, score_batch = self.execute_and_lift_once_batch(info)
                     
                     # Check if motion planning returned valid results
@@ -429,36 +468,60 @@ def grasp_trials(self, gg, std: float = 0.0005, max_retries: int = 3):
                         print(f"[CRITICAL] Motion planning failed during grasp trial at block[{start}:{start+B}]")
                         raise RuntimeError("GraspTrialMotionPlanningFailure")
                     
-                    ok_cnt = int(ok_batch.sum())
-                    print(f"[SEARCH] block[{start}:{start+B}] -> success {ok_cnt}/{B}")
+                    # Update success status ONLY for envs that were still searching
+                    for b in range(B):
+                        if not env_success[b] and not env_skip[b]:  # Only update if active and not already successful
+                            if ok_batch[b] and score_batch[b] > env_best_score[b]:
+                                # This env found a better grasp
+                                env_success[b] = True
+                                env_chosen_pose_w[b] = (grasp_pos_w_batch[b], grasp_quat_w_batch[b])
+                                env_chosen_pre[b] = pre_dist_batch[b]
+                                env_chosen_delta[b] = delta_batch[b]
+                                env_best_score[b] = score_batch[b]
+                                print(f"[SUCCESS] Env {b} found valid grasp (score: {score_batch[b]:.2f})")
+                    
+                    # Check if all active envs now succeeded
+                    active_mask = ~env_skip
+                    newly_successful = np.sum(env_success[active_failed_mask])
+                    print(f"[SEARCH] block[{start}:{start+B}] -> {newly_successful}/{n_failed} previously-failed envs now successful")
                     
-                    if ok_cnt > 0:
-                        winner = int(np.argmax(score_batch))
-                        chosen_pose_w = (grasp_pos_w_batch[winner], grasp_quat_w_batch[winner])
-                        chosen_pre    = float(pre_dist_batch[winner])
-                        chosen_delta  = float(delta_batch[winner])
-                        success = True
-                        print(f"[SUCCESS] Found valid grasp on attempt {retry_attempt + 1}")
+                    if np.all(env_success[active_mask]):
+                        successful_envs = np.where(env_success & ~env_skip)[0]
+                        skipped_envs = np.where(env_skip)[0]
+                        print(f"[SUCCESS] All active environments found valid grasps on attempt {retry_attempt + 1}!")
+                        if len(skipped_envs) > 0:
+                            print(f"[INFO] {len(skipped_envs)} environments skipped: {skipped_envs.tolist()}")
                         return {
-                            "success": success,
-                            "chosen_pose_w": chosen_pose_w,
-                            "chosen_pre": chosen_pre,
-                            "chosen_delta": chosen_delta,
+                            "success": env_success,
+                            "chosen_pose_w": env_chosen_pose_w,
+                            "chosen_pre": env_chosen_pre,
+                            "chosen_delta": env_chosen_delta,
+                            "skip_envs": env_skip,
                         }
-
-                # If we get here, no grasp succeeded in this attempt
-                if not success:
-                    print(f"[WARN] No proposal succeeded in grasp trial attempt {retry_attempt + 1}/{max_retries}")
+                
+                # End of this attempt - check status
+                active_mask = ~env_skip
+                if not np.all(env_success[active_mask]):
+                    still_failed = np.where((~env_success) & (~env_skip))[0]
+                    print(f"[WARN] {len(still_failed)} envs still need grasps after attempt {retry_attempt + 1}: {still_failed.tolist()}")
                     if retry_attempt < max_retries - 1:
-                        print("[INFO] Retrying grasp selection...")
+                        print("[INFO] Retrying grasp selection for failed environments...")
                         continue
                     else:
-                        print("[ERR] No proposal succeeded to lift after all grasp trial attempts.")
+                        # Last attempt exhausted - mark remaining failed envs as skipped
+                        print(f"[WARN] Exhausted all {max_retries} grasp attempts")
+                        print(f"[INFO] Marking {len(still_failed)} failed environments to skip: {still_failed.tolist()}")
+                        env_skip[still_failed] = True
+                        
+                        successful_envs = np.where(env_success)[0]
+                        print(f"[INFO] Proceeding with {len(successful_envs)} successful environments: {successful_envs.tolist()}")
+                        
                         return {
-                            "success": False,
-                            "chosen_pose_w": None,
-                            "chosen_pre": None,
-                            "chosen_delta": None,
+                            "success": env_success,
+                            "chosen_pose_w": env_chosen_pose_w,
+                            "chosen_pre": env_chosen_pre,
+                            "chosen_delta": env_chosen_delta,
+                            "skip_envs": env_skip,
                         }
                         
             except RuntimeError as e:
@@ -473,14 +536,19 @@ def grasp_trials(self, gg, std: float = 0.0005, max_retries: int = 3):
                         continue
                     else:
                         print("[ERR] Grasp trial failed after all retry attempts")
+                        # Mark all currently failed envs as skipped
+                        failed_envs = np.where((~env_success) & (~env_skip))[0]
+                        if len(failed_envs) > 0:
+                            print(f"[INFO] Marking {len(failed_envs)} failed environments to skip: {failed_envs.tolist()}")
+                            env_skip[failed_envs] = True
                         return {
-                            "success": False,
-                            "chosen_pose_w": None,
-                            "chosen_pre": None,
-                            "chosen_delta": None,
+                            "success": env_success,
+                            "chosen_pose_w": env_chosen_pose_w,
+                            "chosen_pre": env_chosen_pre,
+                            "chosen_delta": env_chosen_delta,
+                            "skip_envs": env_skip,
                         }
                 else:
-                    # Unknown error, re-raise
                     raise
                     
             except Exception as e:
@@ -490,20 +558,28 @@ def grasp_trials(self, gg, std: float = 0.0005, max_retries: int = 3):
                     self.clear_data()
                     continue
                 else:
+                    # Mark all currently failed envs as skipped
+                    failed_envs = np.where((~env_success) & (~env_skip))[0]
+                    if len(failed_envs) > 0:
+                        print(f"[INFO] Marking {len(failed_envs)} failed environments to skip: {failed_envs.tolist()}")
+                        env_skip[failed_envs] = True
                     return {
-                        "success": False,
-                        "chosen_pose_w": None,
-                        "chosen_pre": None,
-                        "chosen_delta": None,
+                        "success": env_success,
+                        "chosen_pose_w": env_chosen_pose_w,
+                        "chosen_pre": env_chosen_pre,
+                        "chosen_delta": env_chosen_delta,
+                        "skip_envs": env_skip,
                     }
         
-        # Fallback (should not reach here)
+        # Return final status
         return {
-            "success": False,
-            "chosen_pose_w": None,
-            "chosen_pre": None,
-            "chosen_delta": None,
+            "success": env_success,
+            "chosen_pose_w": env_chosen_pose_w,
+            "chosen_pre": env_chosen_pre,
+            "chosen_delta": env_chosen_delta,
+            "skip_envs": env_skip,
         }
+
     
     def is_success(self, position_threshold: float = 0.05) -> bool:
         """
@@ -557,16 +633,98 @@ def is_success(self, position_threshold: float = 0.05) -> bool:
         print("="*50 + "\n")
         
         return all_success
+    
+    def save_data_selective(self, skip_envs: np.ndarray, ignore_keys: List[str] = []):
+        """
+        Save data only for non-skipped environments.
+        
+        Args:
+            skip_envs: Boolean array [B] indicating which envs to skip
+            ignore_keys: List of keys to ignore during save
+        """
+        save_root = self._demo_dir()
+        save_root.mkdir(parents=True, exist_ok=True)
+
+        stacked = {k: np.array(v) for k, v in self.save_dict.items()}
+        
+        active_envs = np.where(~skip_envs)[0]
+        print(f"[INFO] Saving data for {len(active_envs)} active environments: {active_envs.tolist()}")
+
+        for b in active_envs:
+            env_dir = self._env_dir(save_root, b)
+            for key, arr in stacked.items():
+                if key in ignore_keys:
+                    continue
+                if key == "rgb":
+                    video_path = env_dir / "sim_video.mp4"
+                    writer = imageio.get_writer(
+                        video_path, fps=50, macro_block_size=None
+                    )
+                    for t in range(arr.shape[0]):
+                        writer.append_data(arr[t, b])
+                    writer.close()
+                elif key == "segmask":
+                    video_path = env_dir / "mask_video.mp4"
+                    writer = imageio.get_writer(
+                        video_path, fps=50, macro_block_size=None
+                    )
+                    for t in range(arr.shape[0]):
+                        writer.append_data((arr[t, b].astype(np.uint8) * 255))
+                    writer.close()
+                elif key == "depth":
+                    depth_seq = arr[:, b]
+                    flat = depth_seq[depth_seq > 0]
+                    max_depth = np.percentile(flat, 99) if flat.size > 0 else 1.0
+                    depth_norm = np.clip(depth_seq / max_depth * 255.0, 0, 255).astype(
+                        np.uint8
+                    )
+                    video_path = env_dir / "depth_video.mp4"
+                    writer = imageio.get_writer(
+                        video_path, fps=50, macro_block_size=None
+                    )
+                    for t in range(depth_norm.shape[0]):
+                        writer.append_data(depth_norm[t])
+                    writer.close()
+                    np.save(env_dir / f"{key}.npy", depth_seq)
+                else:
+                    np.save(env_dir / f"{key}.npy", arr[:, b])
+            json.dump(self.sim_cfgs, open(env_dir / "config.json", "w"), indent=2)
+        
+        print("[INFO]: Demonstration is saved at: ", save_root)
+        
+        # Compose real videos only for active environments
+        print("\n[INFO] Composing real videos with background...")
+        for b in active_envs:
+            print(f"[INFO] Processing environment {b}/{len(active_envs)}...")
+            success = self.compose_real_video(env_id=b)
+            if success:
+                print(f"[INFO] Real video composed successfully for env {b}")
+            else:
+                print(f"[WARN] Failed to compose real video for env {b}")
+
+        demo_root = self.out_dir / "all_demos"
+        demo_root.mkdir(parents=True, exist_ok=True)
+        total_demo_id = get_next_demo_id(demo_root)
+        demo_save_path = demo_root / f"demo_{total_demo_id}"
+        demo_save_path.mkdir(parents=True, exist_ok=True)
+        meta_info = {
+            "path": str(save_root),
+            "fps": 50,
+            "active_envs": active_envs.tolist(),
+            "skipped_envs": np.where(skip_envs)[0].tolist(),
+        }
+        with open(demo_save_path / "meta_info.json", "w") as f:
+            json.dump(meta_info, f)
+        os.system(f"cp -r {save_root}/* {demo_save_path}")
+        print("[INFO]: Demonstration is saved at: ", demo_save_path)
 
 
     def inference(self, std: float = 0.0, max_restart_attempts: int = 10, 
-              max_grasp_retries: int = 3, regrasp_after_failures: int = 3) -> bool:
+          max_grasp_retries: int = 1, regrasp_after_failures: int = 1) -> bool:
         """
-        Main function with automatic restart on motion planning failures.
-        Now has separate retry loops for:
-        1. Grasp selection phase (max_grasp_retries)
-        2. Trajectory following phase (max_restart_attempts)
-        3. Re-grasp after N consecutive trajectory failures (regrasp_after_failures)
+        Main function with async per-env grasp selection and trajectory following.
+        Each environment progresses independently through grasp selection, and only
+        moves to trajectory following once ALL envs have found successful grasps.
         """
         B = self.scene.num_envs
         
@@ -578,59 +736,94 @@ def inference(self, std: float = 0.0, max_restart_attempts: int = 10,
         gg = GraspGroup().from_npy(npy_file_path=npy_path)
         gg = get_best_grasp_with_hints(gg, point=None, direction=[0, 0, -1])
 
-        # Track consecutive trajectory failures for re-grasping
-        consecutive_trajectory_failures = 0
+        # Track per-environment state
+        env_grasp_success = np.zeros(B, dtype=bool)
+        env_traj_success = np.zeros(B, dtype=bool)
+        env_consecutive_failures = np.zeros(B, dtype=int)
+        env_skip = np.zeros(B, dtype=bool)  # Track envs to skip entirely
+        
         total_grasp_attempts = 0
-        max_total_grasp_attempts = 5  # Prevent infinite re-grasping loop
+        max_total_grasp_attempts = 5
         
         # ========== OUTER LOOP: RE-GRASP IF NEEDED ==========
         while total_grasp_attempts < max_total_grasp_attempts:
             total_grasp_attempts += 1
             
-            # ========== PHASE 1: GRASP SELECTION (with retries) ==========
+            # ========== PHASE 1: ASYNC GRASP SELECTION ==========
             if self.grasp_idx >= 0:
-                # Fixed grasp index mode
+                # Fixed grasp index mode - all envs use same grasp
                 if self.grasp_idx >= len(gg):
                     print(f"[ERR] grasp_idx {self.grasp_idx} out of range [0,{len(gg)})")
                     return False
                 print(f"[INFO] using fixed grasp index {self.grasp_idx} for all envs.")
                 p_w, q_w = grasp_to_world(gg[int(self.grasp_idx)])
-                ret = {
-                    "success": True,
-                    "chosen_pose_w": (p_w.astype(np.float32), q_w.astype(np.float32)),
-                    "chosen_pre": self.grasp_pre if self.grasp_pre is not None else 0.12,
-                    "chosen_delta": self.grasp_delta if self.grasp_delta is not None else 0.0,
-                }
+                
+                # Prepare per-env format
+                env_grasp_success = np.ones(B, dtype=bool)
+                env_chosen_pose_w = [(p_w.astype(np.float32), q_w.astype(np.float32))] * B
+                env_chosen_pre = np.full(B, self.grasp_pre if self.grasp_pre is not None else 0.12, dtype=np.float32)
+                env_chosen_delta = np.full(B, self.grasp_delta if self.grasp_delta is not None else 0.0, dtype=np.float32)
+                env_skip = np.zeros(B, dtype=bool)
             else:
-                # Automatic grasp selection with retries
+                # Automatic async grasp selection
                 if total_grasp_attempts > 1:
                     print(f"\n{'#'*60}")
                     print(f"RE-GRASP ATTEMPT {total_grasp_attempts}/{max_total_grasp_attempts}")
                     print(f"{'#'*60}\n")
+                
                 ret = self.grasp_trials(gg, std=std, max_retries=max_grasp_retries)
+                
+                env_grasp_success = ret["success"]
+                env_chosen_pose_w = ret["chosen_pose_w"]
+                env_chosen_pre = ret["chosen_pre"]
+                env_chosen_delta = ret["chosen_delta"]
+                env_skip = ret["skip_envs"]
+            
+            # Check if any active envs found grasps
+            active_envs = np.where(~env_skip)[0]
+            successful_envs = np.where(env_grasp_success & ~env_skip)[0]
+            
+            if len(active_envs) == 0:
+                print("[ERR] All environments failed grasp selection and were skipped")
+                return False
             
-            if not ret["success"]:
-                print("[ERR] No successful grasp found after all attempts")
+            if len(successful_envs) == 0:
+                print(f"[ERR] None of the {len(active_envs)} active environments found grasps")
                 return False
             
-            print(f"\n[SUCCESS] Grasp selected: delta={ret['chosen_delta']:.4f}m")
+            print(f"\n[SUCCESS] {len(successful_envs)}/{len(active_envs)} active environments found valid grasps!")
+            if len(successful_envs) < len(active_envs):
+                skipped_envs = np.where(env_skip)[0]
+                print(f"[INFO] {len(skipped_envs)} environments skipped: {skipped_envs.tolist()}")
+            
+            for b in successful_envs:
+                print(f"  Env {b}: delta={env_chosen_delta[b]:.4f}m")
             
-            # Reset trajectory failure counter for new grasp
-            consecutive_trajectory_failures = 0
+            # Reset trajectory failure counters for new grasps
+            env_consecutive_failures[:] = 0
+            env_traj_success[:] = False
             
-            # ========== PHASE 2: TRAJECTORY FOLLOWING (with restarts) ==========
+            # ========== PHASE 2: TRAJECTORY FOLLOWING (with per-env restart) ==========
             for restart_attempt in range(max_restart_attempts):
                 try:
                     print(f"\n{'='*60}")
                     print(f"TRAJECTORY EXECUTION ATTEMPT {restart_attempt + 1}/{max_restart_attempts}")
+                    active_envs = np.where(~env_skip)[0]
+                    successful_traj_envs = np.where(env_traj_success & ~env_skip)[0]
+                    print(f"Active envs: {active_envs.tolist()}")
+                    print(f"Successful envs: {len(successful_traj_envs)}/{len(active_envs)}")
                     print(f"{'='*60}\n")
                     
-                    # Prepare grasp info
-                    p_win, q_win = ret["chosen_pose_w"]
-                    p_all   = np.repeat(p_win.reshape(1, 3), B, axis=0)
-                    q_all   = np.repeat(q_win.reshape(1, 4), B, axis=0)
-                    pre_all = np.full((B,), ret["chosen_pre"],   dtype=np.float32)
-                    del_all = np.full((B,), ret["chosen_delta"], dtype=np.float32)
+                    # If all active envs succeeded in trajectory, we're done
+                    if np.all(env_traj_success[~env_skip]):
+                        print(f"[SUCCESS] All {len(active_envs)} active environments completed trajectories!")
+                        break
+                    
+                    # Prepare grasp info for ALL envs using their individual grasps
+                    p_all = np.stack([pose[0] for pose in env_chosen_pose_w], axis=0)  # [B,3]
+                    q_all = np.stack([pose[1] for pose in env_chosen_pose_w], axis=0)  # [B,4]
+                    pre_all = env_chosen_pre  # [B]
+                    del_all = env_chosen_delta  # [B]
                     info_all = self.build_grasp_info(p_all, q_all, pre_all, del_all)
 
                     # Reset and execute: open → pre → grasp → close → follow_object_goals
@@ -651,108 +844,132 @@ def inference(self, std: float = 0.0, max_restart_attempts: int = 10,
                     self.wait(gripper_open=True, steps=4)
 
                     # Pre-grasp
-                    print("[INFO] Moving to pre-grasp position...")
+                    print("[INFO] Moving to pre-grasp positions...")
                     jp, success = self.move_to(info_all["pre_p_b"], info_all["pre_q_b"], gripper_open=True)
                     if jp is None or torch.any(success == False):
-                        print("[WARN] Pre-grasp motion failed, restarting...")
-                        consecutive_trajectory_failures += 1
-                        if consecutive_trajectory_failures >= regrasp_after_failures:
-                            break  # Exit to re-grasp
+                        print("[WARN] Pre-grasp motion failed for some envs")
+                        failed_envs = torch.where(success == False)[0].cpu().numpy() if success is not None else np.arange(B)
+                        # Only increment failure counter for ACTIVE envs that failed
+                        for b in failed_envs:
+                            if not env_skip[b]:  # Only track failures for active envs
+                                env_consecutive_failures[b] += 1
+                        
+                        # Check if any ACTIVE env needs re-grasp
+                        active_need_regrasp = np.where((env_consecutive_failures >= regrasp_after_failures) & ~env_skip)[0]
+                        if len(active_need_regrasp) > 0:
+                            print(f"[INFO] Active envs {active_need_regrasp.tolist()} need re-grasping")
+                            break
                         continue
+                    
                     self.save_dict["actions"].append(np.concatenate([info_all["pre_p_b"].cpu().numpy(), info_all["pre_q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1))
                     jp = self.wait(gripper_open=True, steps=3)
 
                     # Grasp
-                    print("[INFO] Moving to grasp position...")
+                    print("[INFO] Moving to grasp positions...")
                     jp, success = self.move_to(info_all["p_b"], info_all["q_b"], gripper_open=True)
                     if jp is None or torch.any(success == False):
-                        print("[WARN] Grasp motion failed, restarting...")
-                        consecutive_trajectory_failures += 1
-                        if consecutive_trajectory_failures >= regrasp_after_failures:
-                            break  # Exit to re-grasp
+                        print("[WARN] Grasp motion failed for some envs")
+                        failed_envs = torch.where(success == False)[0].cpu().numpy() if success is not None else np.arange(B)
+                        # Only increment failure counter for ACTIVE envs that failed
+                        for b in failed_envs:
+                            if not env_skip[b]:  # Only track failures for active envs
+                                env_consecutive_failures[b] += 1
+                        
+                        # Check if any ACTIVE env needs re-grasp
+                        active_need_regrasp = np.where((env_consecutive_failures >= regrasp_after_failures) & ~env_skip)[0]
+                        if len(active_need_regrasp) > 0:
+                            print(f"[INFO] Active envs {active_need_regrasp.tolist()} need re-grasping")
+                            break
                         continue
+                    
                     self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1))
 
                     # Close gripper
-                    print("[INFO] Closing gripper...")
+                    print("[INFO] Closing grippers...")
                     jp = self.wait(gripper_open=False, steps=50)
                     self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.ones((B, 1))], axis=1))
 
-                    # Follow object trajectory - THIS CAN RAISE RuntimeError
-                    print("[INFO] Following object trajectory...")
+                    # Follow object trajectory
+                    print("[INFO] Following object trajectories...")
                     jp = self.follow_object_goals(jp, sample_step=5, visualize=True)
                     
-                    # If we reach here, trajectory following succeeded!
-                    print("[SUCCESS] Trajectory completed successfully!")
+                    # If we reach here, trajectory following succeeded for all!
+                    print("[SUCCESS] All trajectories completed successfully!")
+                    env_traj_success[:] = True
                     
-                    # Verify task completion
+                    # Verify task completion (only for active envs)
                     task_success = self.is_success(position_threshold=0.1)
                     if task_success:
-                        print("[SUCCESS] Task verification passed!")
-                        self.save_data()
+                        print(f"[SUCCESS] Task verification passed for all {len(active_envs)} active environments!")
+                        self.save_data_selective(env_skip)
                         return True
                     else:
-                        print("[WARN] Task verification failed, but trajectory completed")
-                        print(f"[ERROR] Attempting restart")
+                        print("[WARN] Task verification failed for some environments")
                         self.clear_data()
                         
                 except RuntimeError as e:
                     if "MotionPlanningFailure_RestartNeeded" in str(e):
                         print(f"\n[RESTART] Motion planning failed mid-trajectory on attempt {restart_attempt + 1}")
                         print("[RESTART] Object was likely dropped. Restarting from grasp phase...")
-                        print(f"[RESTART] Remaining attempts: {max_restart_attempts - restart_attempt - 1}\n")
                         
-                        # Clear the corrupted trajectory data
+                        # Increment failure counter ONLY for ACTIVE envs that haven't succeeded
+                        for b in range(B):
+                            if not env_traj_success[b] and not env_skip[b]:  # Only active, unsuccessful envs
+                                env_consecutive_failures[b] += 1
+                        
                         self.clear_data()
-                        consecutive_trajectory_failures += 1
                         
-                        # Check if we need to re-grasp
-                        if consecutive_trajectory_failures >= regrasp_after_failures:
+                        # Check if any ACTIVE env needs re-grasp
+                        active_need_regrasp = np.where((env_consecutive_failures >= regrasp_after_failures) & ~env_skip)[0]
+                        if len(active_need_regrasp) > 0:
                             print(f"\n{'!'*60}")
-                            print(f"CONSECUTIVE TRAJECTORY FAILURES: {consecutive_trajectory_failures}")
-                            print(f"RE-SELECTING GRASP (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})")
+                            print(f"Active envs {active_need_regrasp.tolist()} need re-grasping")
+                            print(f"RE-SELECTING GRASPS (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})")
                             print(f"{'!'*60}\n")
-                            break  # Exit trajectory loop to re-grasp
+                            break
                         
-                        # Continue to next restart attempt
                         continue
                     else:
-                        # Unknown error, re-raise
                         raise
                 
                 except Exception as e:
                     print(f"[ERROR] Unexpected error during execution: {type(e).__name__}: {e}")
-                    print(f"[ERROR] Attempting restart {restart_attempt + 1}/{max_restart_attempts}...")
+                    
+                    # Increment failure counter ONLY for ACTIVE envs that haven't succeeded
+                    for b in range(B):
+                        if not env_traj_success[b] and not env_skip[b]:  # Only active, unsuccessful envs
+                            env_consecutive_failures[b] += 1
+                    
                     self.clear_data()
-                    consecutive_trajectory_failures += 1
                     
-                    # Check if we need to re-grasp
-                    if consecutive_trajectory_failures >= regrasp_after_failures:
+                    # Check if any ACTIVE env needs re-grasp
+                    active_need_regrasp = np.where((env_consecutive_failures >= regrasp_after_failures) & ~env_skip)[0]
+                    if len(active_need_regrasp) > 0:
                         print(f"\n{'!'*60}")
-                        print(f"CONSECUTIVE TRAJECTORY FAILURES: {consecutive_trajectory_failures}")
-                        print(f"RE-SELECTING GRASP (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})")
+                        print(f"Active envs {active_need_regrasp.tolist()} need re-grasping")
+                        print(f"RE-SELECTING GRASPS (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})")
                         print(f"{'!'*60}\n")
-                        break  # Exit trajectory loop to re-grasp
+                        break
                     
                     continue
             
             # Check why we exited the trajectory loop
-            if consecutive_trajectory_failures >= regrasp_after_failures:
-                # We broke out to re-grasp
+            active_need_regrasp = np.where((env_consecutive_failures >= regrasp_after_failures) & ~env_skip)[0]
+            if len(active_need_regrasp) > 0:
+                # Some ACTIVE envs need new grasps
                 if total_grasp_attempts >= max_total_grasp_attempts:
-                    print(f"\n[FAILURE] Exhausted all grasp attempts ({max_total_grasp_attempts})")
+                    print(f"\n[FAILURE] Active envs {active_need_regrasp.tolist()} exhausted all grasp attempts ({max_total_grasp_attempts})")
                     return False
-                # Continue outer loop to select new grasp
+                # Continue outer loop to select new grasps for failed envs
                 continue
             else:
-                # We exhausted all restart attempts without hitting re-grasp threshold
+                # We exhausted all restart attempts
                 print(f"\n[FAILURE] Failed after {max_restart_attempts} trajectory restart attempts")
                 return False
         
         # If we exit the outer loop, we exhausted all grasp attempts
         print(f"\n[FAILURE] Failed after {max_total_grasp_attempts} total grasp selection attempts")
         return False
-
 # ───────────────────────────────────────────────────────────────────────────── Entry Point ─────────────────────────────────────────────────────────────────────────────
 def main():
     sim_cfgs = load_sim_parameters(BASE_DIR, args_cli.key)