From 90216c4f91eb21810b4391b6926fdf4ba2bca605 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Fri, 7 Nov 2025 20:43:38 -0800 Subject: [PATCH 1/3] task verifier updates --- .../isaaclab/data_collection/README.md | 52 + .../isaaclab/data_collection/__init__.py | 0 .../data_collection/learning/hybrid_ppo.py | 533 +++++++++ .../data_collection/learning/simple_ppo.py | 331 ++++++ .../isaaclab/data_collection/ply_to_depth.py | 190 +++ .../isaaclab/data_collection/sim_base.py | 1033 +++++++++++++++++ .../data_collection/sim_env_factory.py | 573 +++++++++ .../data_collection/sim_heuristic_manip.py | 787 +++++++++++++ .../sim_preprocess/grasp_generation.py | 252 ++++ .../sim_preprocess/grasp_utils.py | 391 +++++++ .../sim_preprocess/trajectory_smoothing.py | 190 +++ .../sim_preprocess/usd_conversion.py | 584 ++++++++++ .../data_collection/sim_trajectory_replay.py | 143 +++ .../sim_utils/calibration_utils.py | 88 ++ .../data_collection/sim_utils/sim_utils.py | 257 ++++ .../sim_utils/transform_utils.py | 50 + 16 files changed, 5454 insertions(+) create mode 100755 openreal2sim/simulation/isaaclab/data_collection/README.md create mode 100644 openreal2sim/simulation/isaaclab/data_collection/__init__.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/learning/hybrid_ppo.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/learning/simple_ppo.py create mode 100644 openreal2sim/simulation/isaaclab/data_collection/ply_to_depth.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_base.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_env_factory.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_generation.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_utils.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/trajectory_smoothing.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/usd_conversion.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_trajectory_replay.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_utils/calibration_utils.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_utils/sim_utils.py create mode 100755 openreal2sim/simulation/isaaclab/data_collection/sim_utils/transform_utils.py diff --git a/openreal2sim/simulation/isaaclab/data_collection/README.md b/openreal2sim/simulation/isaaclab/data_collection/README.md new file mode 100755 index 0000000..199f68d --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/README.md @@ -0,0 +1,52 @@ +# IsaacLab + +## Installation + +**Pull the pre-built docker image** + +If you want to use a pre-built image from Docker Hub, you can pull it directly: + ```bash + docker pull ghcr.io/pointscoder/isaaclab:dev + docker tag ghcr.io/pointscoder/isaaclab:dev isaaclab:dev + ``` + +**On the host machine and before entering the container**, run +``` +xhost +local: +``` +This is to allow the container to access the host's X server for IsaacSim GUI. + +Then, launch the container: +``` +HOST_UID=$(id -u) HOST_GID=$(id -g) docker compose -p "$USER" -f docker/compose.yml up -d isaaclab +``` +and enter it: +``` +HOST_UID=$(id -u) HOST_GID=$(id -g) docker compose -p "$USER" -f docker/compose.yml exec isaaclab bash +``` + +## Preprocessing + +We need to convert the textured meshes to USD format. This is done by running the following script: +``` +python openreal2sim/simulation/isaaclab/sim_preprocess/usd_conversion.py +``` + +Then, we generate grasp proposals for each object in the scene: +``` +python openreal2sim/simulation/isaaclab/sim_preprocess/grasp_generation.py +``` + +## Running the simulation + +We provide heuristic policies using grasping and motion planning: +``` +python openreal2sim/simulation/isaaclab/sim_heuristic_manip.py +``` + +## Replay Robot Trajectories + +We can also replay the recorded robot trajectories in IsaacSim: +``` +python openreal2sim/simulation/isaaclab/sim_replay_trajectories.py --demo_dir +``` \ No newline at end of file diff --git a/openreal2sim/simulation/isaaclab/data_collection/__init__.py b/openreal2sim/simulation/isaaclab/data_collection/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/openreal2sim/simulation/isaaclab/data_collection/learning/hybrid_ppo.py b/openreal2sim/simulation/isaaclab/data_collection/learning/hybrid_ppo.py new file mode 100755 index 0000000..a5a8ff0 --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/learning/hybrid_ppo.py @@ -0,0 +1,533 @@ +# learning/ppo_hybrid.py +# -*- coding: utf-8 -*- +""" +Hybrid PPO: Categorical (proposal index at t=0) + Gaussian (offset at t=0) + Gaussian (residual for t>0). +Action sent to env is always concatenated as: + [ residual_6 | proposal_onehot_P | offset_6 ] +Env consumes: + - t == 0: uses (proposal_onehot_P, offset_6); ignores residual_6 + - t > 0 : uses residual_6; ignores the rest +""" + +from __future__ import annotations +from pathlib import Path +from typing import Tuple, Iterable, Dict + +import torch +import torch.nn as nn +import torch.optim as optim + + +# ========================= Hybrid Actor-Critic ========================= +class HybridActorCritic(nn.Module): + """ + Policy head structure: + - proposal_logits: (B, P) [Categorical at t=0] + - offset_mu/std: (B, P, 6) [Gaussian at t=0, choose row by sampled idx] + - residual_mu/std: (B, 6) [Gaussian at t>0] + - value: (B,) + """ + def __init__(self, obs_dim: int, num_proposals: int, hidden_sizes: Tuple[int, ...] = (256, 256)): + super().__init__() + self.P = int(num_proposals) + + # Shared trunk + layers = [] + last = obs_dim + for hs in hidden_sizes: + layers += [nn.Linear(last, hs), nn.ReLU(inplace=True)] + last = hs + self.shared = nn.Sequential(*layers) + + # Heads + self.proposal_head = nn.Linear(last, self.P) # logits for Categorical + self.offset_mu = nn.Linear(last, self.P * 6) # per-proposal 6D mean + self.offset_logstd = nn.Parameter(torch.full((self.P, 6), -0.5)) # learnable, proposal-wise + self.residual_mu = nn.Linear(last, 6) # 6D mean for residual + self.residual_logstd = nn.Parameter(torch.full((6,), -0.5)) # learnable + + self.v_head = nn.Linear(last, 1) + + def forward(self, obs: torch.Tensor) -> Dict[str, torch.Tensor]: + """ + Returns a dict of all distribution params and value. + Shapes: + - proposal_logits: (B,P) + - offset_mu: (B,P,6) + - offset_std: (P,6) (broadcast on batch) + - residual_mu: (B,6) + - residual_std: (6,) (broadcast on batch) + - value: (B,) + """ + x = self.shared(obs) + proposal_logits = self.proposal_head(x) # (B,P) + offset_mu = self.offset_mu(x).view(-1, self.P, 6) # (B,P,6) + offset_std = torch.exp(self.offset_logstd) # (P,6) + residual_mu = self.residual_mu(x) # (B,6) + residual_std = torch.exp(self.residual_logstd) # (6,) + value = self.v_head(x).squeeze(-1) + return { + "proposal_logits": proposal_logits, + "offset_mu": offset_mu, + "offset_std": offset_std, + "residual_mu": residual_mu, + "residual_std": residual_std, + "value": value, + } + + @staticmethod + def _one_hot(indices: torch.Tensor, P: int) -> torch.Tensor: + """indices: (B,), return onehot (B,P) float32.""" + B = indices.shape[0] + onehot = torch.zeros(B, P, dtype=torch.float32, device=indices.device) + onehot.scatter_(1, indices.view(-1, 1), 1.0) + return onehot + + def act_sample(self, obs: torch.Tensor, is_step0: bool) -> Dict[str, torch.Tensor]: + """ + Sample action and return everything needed for PPO rollout at this step. + At t=0: sample idx ~ Categorical, offset ~ N(mu[idx], std[idx]), build env_action. + At t>0: sample residual ~ N(mu, std), build env_action with zeros for the rest. + Returns dict with keys: + - env_action: (B, 6 + P + 6) + - logp: (B,) + - entropy: (B,) + - value: (B,) + - cache: any tensors needed for debugging (optional) + """ + outs = self.forward(obs) + P = outs["proposal_logits"].shape[1] + B = obs.shape[0] + + if is_step0: + # Categorical over proposals + cat = torch.distributions.Categorical(logits=outs["proposal_logits"]) + idx = cat.sample() # (B,) + onehot = self._one_hot(idx, P) # (B,P) + + # Gather offset distribution of the selected proposal + mu_sel = outs["offset_mu"][torch.arange(B, device=obs.device), idx, :] # (B,6) + std_sel = outs["offset_std"][idx, :] if outs["offset_std"].dim() == 2 else outs["offset_std"] # (B?,6) + # outs["offset_std"] shape is (P,6), need to gather then expand to (B,6) + std_sel = outs["offset_std"][idx, :] # (B,6) + + gauss_off = torch.distributions.Normal(mu_sel, std_sel) + u_off = gauss_off.rsample() # (B,6) + u_off_tanh = torch.tanh(u_off) # keep action in [-1,1] space for env’s clamp + # Tanh correction + logp_off = gauss_off.log_prob(u_off) - torch.log(1.0 - u_off_tanh.pow(2) + 1e-6) + logp_off = logp_off.sum(-1) # (B,) + + logp = cat.log_prob(idx) + logp_off + + # Entropy (encourage exploration at t=0): categorical + offset gaussian entropy + ent = cat.entropy() + gauss_off.entropy().sum(-1) + + # Residual part zero at t=0 (env ignores it anyway) + residual = torch.zeros(B, 6, dtype=torch.float32, device=obs.device) + + # Build env action: [residual_6 | onehot_P | offset6] + env_action = torch.cat([residual, onehot, u_off_tanh], dim=-1) # (B, 6+P+6) + + return { + "env_action": env_action, + "logp": logp, + "entropy": ent, + "value": outs["value"], + } + else: + # Only residual Gaussian matters + gauss_res = torch.distributions.Normal(outs["residual_mu"], outs["residual_std"]) + u_res = gauss_res.rsample() # (B,6) + a_res = torch.tanh(u_res) # (B,6) + logp_res = gauss_res.log_prob(u_res) - torch.log(1.0 - a_res.pow(2) + 1e-6) + logp_res = logp_res.sum(-1) # (B,) + ent = gauss_res.entropy().sum(-1) # (B,) + + # proposal onehot & offset set to zero placeholders + P = outs["proposal_logits"].shape[1] + onehot = torch.zeros(B, P, dtype=torch.float32, device=obs.device) + off6 = torch.zeros(B, 6, dtype=torch.float32, device=obs.device) + + env_action = torch.cat([a_res, onehot, off6], dim=-1) # (B,6+P+6) + return { + "env_action": env_action, + "logp": logp_res, + "entropy": ent, + "value": outs["value"], + } + + def log_prob_recompute(self, obs: torch.Tensor, env_action: torch.Tensor, is_step0: bool) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Recompute log_prob and entropy under NEW network params for PPO update. + env_action: (B, 6+P+6) = [res6 | onehot_P | off6] + Returns (logp, entropy) both (B,) + """ + outs = self.forward(obs) + B = env_action.shape[0] + P = outs["proposal_logits"].shape[1] + + res6 = env_action[:, :6] + onehot = env_action[:, 6:6+P] + off6 = env_action[:, 6+P:] + + if is_step0: + # Recover idx from onehot + idx = onehot.argmax(dim=-1) # (B,) + + # logp for categorical + cat = torch.distributions.Categorical(logits=outs["proposal_logits"]) + logp_cat = cat.log_prob(idx) # (B,) + + # logp for selected offset + mu_sel = outs["offset_mu"][torch.arange(B, device=obs.device), idx, :] # (B,6) + std_sel = outs["offset_std"][idx, :] # (B,6) + gauss_off = torch.distributions.Normal(mu_sel, std_sel) + # Convert bounded action back to pre-squash space via atanh + a = off6.clamp(-0.999999, 0.999999) + atanh_a = 0.5 * (torch.log1p(a + 1e-6) - torch.log1p(-a + 1e-6)) + logp_off = gauss_off.log_prob(atanh_a) - torch.log(1.0 - a.pow(2) + 1e-6) + logp_off = logp_off.sum(-1) + + logp = logp_cat + logp_off + ent = cat.entropy() + gauss_off.entropy().sum(-1) + return logp, ent + else: + gauss_res = torch.distributions.Normal(outs["residual_mu"], outs["residual_std"]) + a = res6.clamp(-0.999999, 0.999999) + atanh_a = 0.5 * (torch.log1p(a + 1e-6) - torch.log1p(-a + 1e-6)) + logp_res = gauss_res.log_prob(atanh_a) - torch.log(1.0 - a.pow(2) + 1e-6) + logp_res = logp_res.sum(-1) + ent = gauss_res.entropy().sum(-1) + return logp_res, ent + + +# ========================= Rollout Buffer (Hybrid) ========================= +class HybridRolloutBuffer: + """ + Stores per-timestep data for PPO with fixed horizon. + We keep the merged env_action (for env stepping and PPO recompute), + together with scalar logp/value/reward/done/timeout. + """ + def __init__(self, horizon: int, num_envs: int, obs_dim: int, act_dim: int, device: torch.device): + self.h, self.n, self.d, self.k = horizon, num_envs, obs_dim, act_dim + self.device = device + self.reset() + + def reset(self): + H, N, D, K, dev = self.h, self.n, self.d, self.k, self.device + self.obs = torch.zeros(H, N, D, device=dev, dtype=torch.float32) + self.actions = torch.zeros(H, N, K, device=dev, dtype=torch.float32) # [res6|1hotP|off6] + self.logp = torch.zeros(H, N, device=dev, dtype=torch.float32) + self.rewards = torch.zeros(H, N, device=dev, dtype=torch.float32) + self.dones = torch.zeros(H, N, device=dev, dtype=torch.bool) + self.values = torch.zeros(H, N, device=dev, dtype=torch.float32) + self.timeouts = torch.zeros(H, N, device=dev, dtype=torch.bool) + self.ptr = 0 + + def add(self, *, obs, env_action, logp, rew, done, val, t_out): + i = self.ptr + self.obs[i] = obs.detach() + self.actions[i] = env_action.detach() + self.logp[i] = logp.detach() + self.rewards[i] = rew.detach().squeeze(-1) + self.dones[i] = done.detach().bool() + self.values[i] = val.detach() + self.timeouts[i] = t_out.detach().bool() + self.ptr += 1 + + @torch.no_grad() + def compute_gae(self, last_value: torch.Tensor, gamma=0.99, lam=0.95): + H, N = self.h, self.n + adv = torch.zeros(H, N, device=self.device, dtype=torch.float32) + last_gae = torch.zeros(N, device=self.device, dtype=torch.float32) + for t in reversed(range(H)): + if t == H - 1: + next_nonterminal = (~self.dones[t] | self.timeouts[t]).float() + next_values = last_value + else: + next_nonterminal = (~self.dones[t + 1] | self.timeouts[t + 1]).float() + next_values = self.values[t + 1] + delta = self.rewards[t] + gamma * next_values * next_nonterminal - self.values[t] + last_gae = delta + gamma * lam * next_nonterminal * last_gae + adv[t] = last_gae + ret = adv + self.values + return adv, ret + + def iterate_minibatches(self, batch_size: int, num_epochs: int) -> Iterable: + H, N = self.h, self.n + total = H * N + obs = self.obs.view(total, -1) + acts = self.actions.view(total, -1) + logp = self.logp.view(total) + values= self.values.view(total) + + def idx_batches(): + idx = torch.randperm(total, device=self.device) + for i in range(0, total, batch_size): + j = idx[i: i + batch_size] + yield j + + for _ in range(num_epochs): + for j in idx_batches(): + yield j, obs[j], acts[j], logp[j], values[j] + + +# ========================= Helpers ========================= +def _as_tensor(x, device: torch.device, dtype: torch.dtype | None = None) -> torch.Tensor: + if isinstance(x, torch.Tensor): + if dtype is not None and x.dtype != dtype: + x = x.to(dtype) + return x.to(device) + return torch.as_tensor(x, device=device, dtype=dtype) + +@torch.no_grad() +def _reset_env(env, device: torch.device): + rst = env.reset() + obs = rst["obs"] + return _as_tensor(obs, device=device, dtype=torch.float32) + +def _choose_minibatch_size(total_batch: int, num_envs: int) -> int: + mb = max(num_envs * 2, 32) + while total_batch % mb != 0 and mb > 8: + mb //= 2 + if total_batch % mb != 0: + mb = num_envs + return mb + + +@torch.no_grad() +def _partial_reset_and_merge_obs(env, next_obs: torch.Tensor, done_t: torch.Tensor, device: torch.device): + """ + If any env is done: + 1) Try env.reset_done(done_mask) which returns {"obs": (M,D)} or (N,D) + 2) Else try env.reset(env_ids=idxs) + 3) Else fallback to global reset() + Then merge the reset obs into next_obs only for those done envs and return merged tensor. + """ + if not bool(done_t.any()): + return next_obs # nothing to do + + idxs = torch.nonzero(done_t, as_tuple=False).squeeze(-1) # (M,) + M = int(idxs.numel()) + if M == 0: + return next_obs + + rst = env.reset(env_ids=idxs) + new_obs = _as_tensor(rst["obs"], device=device, dtype=torch.float32) + if new_obs.shape[0] == next_obs.shape[0]: + return new_obs + merged = next_obs.clone() + merged[idxs] = new_obs + return merged + +# ========================= PPO Train (Hybrid) ========================= +def ppo_train_hybrid( + *, + env, + net: HybridActorCritic, + optimizer: optim.Optimizer, + device: torch.device, + total_epochs: int, + horizon: int, + mini_epochs: int, + batch_size_hint: int, + clip_eps: float, + value_coef: float, + entropy_coef: float, + gamma: float, + gae_lambda: float, + run_dir: Path, + save_every: int = 50, +): + """ + Hybrid PPO training loop: + - t=0 uses Categorical(index) + Gaussian(offset) + - t>0 uses Gaussian(residual) + Assumptions about env: + - action_space.shape[0] == (6 + P + 6) + - step() consumes the onehot & offset at t=0, and residual at t>0 + """ + from torch.utils.tensorboard import SummaryWriter + + print("#####################################horizon =", horizon) + + run_dir.mkdir(parents=True, exist_ok=True) + ckpt_best = run_dir / "best.pt" + + num_envs = getattr(env, "num_envs", None) + if num_envs is None: + raise ValueError("env.num_envs is required for batch sizing.") + obs_dim = env.observation_space.shape[0] + act_dim = env.action_space.shape[0] # 6 + P + 6 + + writer = SummaryWriter(log_dir=run_dir) + + print(f"[INFO][Hybrid PPO] epochs={total_epochs}, horizon={horizon}, num_envs={num_envs}, " + f"obs_dim={obs_dim}, act_dim={act_dim}") + buffer = HybridRolloutBuffer(horizon=horizon, num_envs=num_envs, obs_dim=obs_dim, act_dim=act_dim, device=device) + + total_batch = horizon * num_envs if batch_size_hint <= 0 else int(batch_size_hint) + minibatch_size = _choose_minibatch_size(total_batch, num_envs) + + # initial reset + obs = _reset_env(env, device) + best_mean_return = -1e9 + + for epoch in range(1, total_epochs + 1): + buffer.reset() + + ep_return_per_env = torch.zeros(num_envs, device=device, dtype=torch.float32) + finished_episode_returns = [] # Python list[float] + ep_track_return_per_env = torch.zeros(num_envs, device=device, dtype=torch.float32) + finished_episode_track_returns = [] # Python list[float] + + # ===== rollout ===== + for t in range(horizon): + is_step0 = (t == 0) # because your env uses fixed horizon episodes + out = net.act_sample(obs, is_step0=is_step0) + env_action = out["env_action"].detach() + logp = out["logp"].detach() + val = out["value"].detach() + + step_out = env.step(env_action) # action shape (N, 6+P+6) + next_obs_any, reward_any, done_any, infos = step_out[0]["obs"], step_out[1], step_out[2], step_out[3] + + next_obs = _as_tensor(next_obs_any, device=device, dtype=torch.float32) + rew_t = _as_tensor(reward_any, device=device, dtype=torch.float32) + done_t = _as_tensor(done_any, device=device, dtype=torch.bool) + t_out = _as_tensor(infos.get("time_outs", torch.zeros_like(done_t)), device=device, dtype=torch.bool) + + buffer.add(obs=obs, env_action=env_action, logp=logp, rew=rew_t, done=done_t, val=val, t_out=t_out) + + # episode return bookkeeping + r_track_t = _as_tensor(infos["r_track"], device=device, dtype=torch.float32).view_as(rew_t) + ep_return_per_env += rew_t.squeeze(1) # (N,) + ep_track_return_per_env += r_track_t.squeeze(1) # (N,) + + done_or_timeout = (done_t | t_out) # (N,) + if bool(done_or_timeout.any()): + finished_episode_returns.extend( + ep_return_per_env[done_or_timeout].detach().cpu().tolist() + ) + finished_episode_track_returns.extend( + ep_track_return_per_env[done_or_timeout].detach().cpu().tolist() + ) + ep_return_per_env[done_or_timeout] = 0.0 + ep_track_return_per_env[done_or_timeout] = 0.0 + + # reset env if done or timeout + next_obs = _partial_reset_and_merge_obs(env, next_obs, done_t, device) + obs = next_obs + + # ===== bootstrap value ===== + with torch.no_grad(): + last_v = net.forward(obs)["value"] # (N,) + + # ===== GAE / returns ===== + adv, ret = buffer.compute_gae(last_value=last_v.detach(), gamma=gamma, lam=gae_lambda) + + # normalize advantage + adv_flat = adv.view(-1) + ret_flat = ret.view(-1) + adv_flat = (adv_flat - torch.mean(adv_flat)) / (torch.std(adv_flat) + 1e-8) + + # ===== PPO updates ===== + policy_loss_epoch = 0.0 + value_loss_epoch = 0.0 + entropy_epoch = 0.0 + num_updates = 0 + + for idx, obs_b, act_b, old_logp_b, old_v_b in buffer.iterate_minibatches(minibatch_size, mini_epochs): + # Figure out which rows are step0 in this flattened view: + # Original layout is (H,N) -> flattened to (H*N,) + # Rows [0*N : 1*N) correspond to t=0 + H, N = buffer.h, buffer.n + is_step0_mask = (idx // N) == 0 # boolean per-sample + + # Recompute logp & entropy under NEW params (mixture by timestep type) + logp_new = torch.empty_like(old_logp_b) + ent_new = torch.empty_like(old_logp_b) + + if bool(is_step0_mask.any()): + mask = is_step0_mask + lp0, en0 = net.log_prob_recompute(obs_b[mask], act_b[mask], is_step0=True) + logp_new[mask] = lp0 + ent_new[mask] = en0 + if bool((~is_step0_mask).any()): + mask = ~is_step0_mask + lp1, en1 = net.log_prob_recompute(obs_b[mask], act_b[mask], is_step0=False) + logp_new[mask] = lp1 + ent_new[mask] = en1 + + ratio = torch.exp(logp_new - old_logp_b) + + adv_b = adv_flat[idx] + ret_b = ret_flat[idx] + + surrogate1 = ratio * adv_b + surrogate2 = torch.clamp(ratio, 1.0 - clip_eps, 1.0 + clip_eps) * adv_b + policy_loss = -torch.min(surrogate1, surrogate2).mean() + + # Clipped value loss (same as your original) + # Re-run value head: + v_b = net.forward(obs_b)["value"] + value_clipped = old_v_b + (v_b - old_v_b).clamp(-clip_eps, clip_eps) + value_losses = (v_b - ret_b).pow(2) + value_losses_clipped = (value_clipped - ret_b).pow(2) + value_loss = 0.5 * torch.max(value_losses, value_losses_clipped).mean() + + loss = policy_loss + value_coef * value_loss - entropy_coef * ent_new.mean() + + optimizer.zero_grad(set_to_none=True) + loss.backward() + nn.utils.clip_grad_norm_(net.parameters(), 0.5) + optimizer.step() + + policy_loss_epoch += float(policy_loss.detach()) + value_loss_epoch += float(value_loss.detach()) + entropy_epoch += float(ent_new.mean().detach()) + num_updates += 1 + + mean_return = float(ret.mean().detach()) + print(f"[E{epoch:04d}] return={mean_return:7.3f} " + f"pi={policy_loss_epoch/max(1,num_updates):7.4f} " + f"v={value_loss_epoch/max(1,num_updates):7.4f} " + f"H={entropy_epoch/max(1,num_updates):7.4f}") + + # Explained variance + ev = float(1.0 - torch.var(ret_flat - buffer.values.view(-1)) / (torch.var(ret_flat) + 1e-12)) + + # TB logs + writer.add_scalar("return_mean", mean_return, epoch) + writer.add_scalar("policy_loss", policy_loss_epoch / max(1, num_updates), epoch) + writer.add_scalar("value_loss", value_loss_epoch / max(1, num_updates), epoch) + writer.add_scalar("entropy", entropy_epoch / max(1, num_updates), epoch) + writer.add_scalar("explained_variance", ev, epoch) + if len(finished_episode_returns) > 0: + mean_ep_return = float(sum(finished_episode_returns) / len(finished_episode_returns)) + writer.add_scalar("episode_return_mean", mean_ep_return, epoch) + if len(finished_episode_track_returns) > 0: + mean_ep_track_return = float(sum(finished_episode_track_returns) / len(finished_episode_track_returns)) + writer.add_scalar("episode_tracking_return_mean", mean_ep_track_return, epoch) + + # Save best & periodic + if mean_return > best_mean_return: + best_mean_return = mean_return + torch.save({ + "model": net.state_dict(), + "optimizer": optimizer.state_dict(), + "epoch": epoch, + "best_return": best_mean_return + }, ckpt_best) + + if (save_every > 0) and (epoch % save_every == 0): + ckpt_path = run_dir / f"epoch_{epoch:04d}_ret_{mean_return:.2f}.pt" + torch.save({ + "model": net.state_dict(), + "optimizer": optimizer.state_dict(), + "epoch": epoch, + "return": mean_return + }, ckpt_path) + + return {"best_mean_return": best_mean_return, "ckpt_best": ckpt_best} diff --git a/openreal2sim/simulation/isaaclab/data_collection/learning/simple_ppo.py b/openreal2sim/simulation/isaaclab/data_collection/learning/simple_ppo.py new file mode 100755 index 0000000..0cfbf76 --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/learning/simple_ppo.py @@ -0,0 +1,331 @@ +# ppo_utils.py +from __future__ import annotations +from pathlib import Path +from typing import Tuple, Iterable +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.tensorboard import SummaryWriter + +# ========================= Actor-Critic ========================= +class ActorCritic(nn.Module): + """Simple MLP actor-critic. Actor outputs mean in pre-squash space; action = tanh(N(mu, sigma)).""" + def __init__(self, obs_dim: int, act_dim: int, hidden_sizes: Tuple[int, ...] = (256, 256)): + super().__init__() + layers = [] + last = obs_dim + for hs in hidden_sizes: + layers += [nn.Linear(last, hs), nn.ReLU(inplace=True)] + last = hs + self.shared = nn.Sequential(*layers) + self.mu_head = nn.Linear(last, act_dim) # pre-squash mean + self.log_std = nn.Parameter(torch.full((act_dim,), -0.5)) # trainable log_std + self.v_head = nn.Linear(last, 1) + + def forward(self, obs: torch.Tensor): + """obs: (B, obs_dim) -> mu: (B, act_dim), std: (act_dim,), v: (B,)""" + x = self.shared(obs) + mu = self.mu_head(x) + v = self.v_head(x).squeeze(-1) + std = torch.exp(self.log_std) + return mu, std, v + + @staticmethod + def sample_tanh_gaussian(mu: torch.Tensor, std: torch.Tensor): + """Return action in [-1,1], log_prob with tanh correction, and pre-squash sample u.""" + dist = torch.distributions.Normal(mu, std) + u = dist.rsample() # reparameterized + a = torch.tanh(u) + # log|det(Jacobian of tanh)| = sum log(1 - tanh(u)^2) + log_prob = dist.log_prob(u) - torch.log(1.0 - a.pow(2) + 1e-6) + return a, log_prob.sum(-1), u + + @staticmethod + def log_prob_tanh_gaussian(mu: torch.Tensor, std: torch.Tensor, a: torch.Tensor): + """Compute log_prob(a) for tanh(N(mu,std)) using atanh(a).""" + a = a.clamp(min=-0.999999, max=0.999999) + atanh_a = 0.5 * (torch.log1p(a + 1e-6) - torch.log1p(-a + 1e-6)) + dist = torch.distributions.Normal(mu, std) + log_prob = dist.log_prob(atanh_a) - torch.log(1.0 - a.pow(2) + 1e-6) + return log_prob.sum(-1) + + +# ========================= Rollout Buffer ========================= +class RolloutBuffer: + """On-policy PPO rollout buffer with fixed horizon (all tensors on a single device).""" + def __init__(self, horizon: int, num_envs: int, obs_dim: int, act_dim: int, device: torch.device): + self.h, self.n, self.d, self.k = horizon, num_envs, obs_dim, act_dim + self.device = device + self.reset() + + def reset(self): + H, N, D, K, dev = self.h, self.n, self.d, self.k, self.device + self.obs = torch.zeros(H, N, D, device=dev, dtype=torch.float32) + self.actions = torch.zeros(H, N, K, device=dev, dtype=torch.float32) + self.logp = torch.zeros(H, N, device=dev, dtype=torch.float32) + self.rewards = torch.zeros(H, N, device=dev, dtype=torch.float32) + self.dones = torch.zeros(H, N, device=dev, dtype=torch.bool) + self.values = torch.zeros(H, N, device=dev, dtype=torch.float32) + self.timeouts = torch.zeros(H, N, device=dev, dtype=torch.bool) + self.ptr = 0 + + def add(self, *, obs, act, logp, rew, done, val, t_out): + """Store DETACHED copies to keep env and rollout outside the grad graph.""" + i = self.ptr + self.obs[i] = obs.detach() + self.actions[i] = act.detach() + self.logp[i] = logp.detach() + self.rewards[i] = rew.detach().squeeze(-1) # allow (B,1) or (B,) + self.dones[i] = done.detach().bool() + self.values[i] = val.detach() + self.timeouts[i] = t_out.detach().bool() + self.ptr += 1 + + @torch.no_grad() # make GAE graph-free + def compute_gae(self, last_value: torch.Tensor, gamma=0.99, lam=0.95): + """last_value: (N,)""" + H, N = self.h, self.n + adv = torch.zeros(H, N, device=self.device, dtype=torch.float32) + last_gae = torch.zeros(N, device=self.device, dtype=torch.float32) + for t in reversed(range(H)): + if t == H - 1: + next_nonterminal = (~self.dones[t] | self.timeouts[t]).float() + next_values = last_value + else: + next_nonterminal = (~self.dones[t + 1] | self.timeouts[t + 1]).float() + next_values = self.values[t + 1] + delta = self.rewards[t] + gamma * next_values * next_nonterminal - self.values[t] + last_gae = delta + gamma * lam * next_nonterminal * last_gae + adv[t] = last_gae + ret = adv + self.values + return adv, ret + + def iterate_minibatches(self, batch_size: int, num_epochs: int) -> Iterable: + H, N = self.h, self.n + total = H * N + obs = self.obs.view(total, -1) + acts = self.actions.view(total, -1) + logp = self.logp.view(total) + values = self.values.view(total) + + def idx_batches(): + idx = torch.randperm(total, device=self.device) + for i in range(0, total, batch_size): + j = idx[i : i + batch_size] + yield j + + for _ in range(num_epochs): + for j in idx_batches(): + yield j, obs[j], acts[j], logp[j], values[j] + + +# ========================= Helpers ========================= +def _as_tensor(x, device: torch.device, dtype: torch.dtype | None = None) -> torch.Tensor: + """Convert numpy/torch to torch on device with optional dtype, avoiding copies when possible.""" + if isinstance(x, torch.Tensor): + if dtype is not None and x.dtype != dtype: + x = x.to(dtype) + return x.to(device) + return torch.as_tensor(x, device=device, dtype=dtype) + + +@torch.no_grad() +def _reset_env(env, device: torch.device): + """Support env.reset() that returns torch or numpy.""" + rst = env.reset() + obs = rst["obs"] + return _as_tensor(obs, device=device, dtype=torch.float32) + + +def _choose_minibatch_size(total_batch: int, num_envs: int) -> int: + """Heuristic minibatch sizing that divides total_batch.""" + mb = max(num_envs * 2, 32) + while total_batch % mb != 0 and mb > 8: + mb //= 2 + if total_batch % mb != 0: + mb = num_envs + return mb + + +@torch.no_grad() +def _partial_reset_and_merge_obs(env, next_obs: torch.Tensor, done_t: torch.Tensor, device: torch.device): + """ + If any env is done: + 1) Try env.reset_done(done_mask) which returns {"obs": (M,D)} or (N,D) + 2) Else try env.reset(env_ids=idxs) + 3) Else fallback to global reset() + Then merge the reset obs into next_obs only for those done envs and return merged tensor. + """ + if not bool(done_t.any()): + return next_obs # nothing to do + + idxs = torch.nonzero(done_t, as_tuple=False).squeeze(-1) # (M,) + M = int(idxs.numel()) + if M == 0: + return next_obs + + rst = env.reset(env_ids=idxs) + new_obs = _as_tensor(rst["obs"], device=device, dtype=torch.float32) + if new_obs.shape[0] == next_obs.shape[0]: + return new_obs + merged = next_obs.clone() + merged[idxs] = new_obs + return merged + +# ========================= PPO Train ========================= +def ppo_train( + *, + env, + net: ActorCritic, + optimizer: optim.Optimizer, + device: torch.device, + total_epochs: int, + horizon: int, + mini_epochs: int, + batch_size_hint: int, + clip_eps: float, + value_coef: float, + entropy_coef: float, + gamma: float, + gae_lambda: float, + run_dir: Path, + save_every: int = 50, +): + """ + Generic PPO training loop. + Assumes: + - env.reset() -> {"obs": torch or numpy, shape (N,D)} + - env.step(action) -> + ({"obs": torch/numpy (N,D)}, reward (N,1)/(N,), done (N,), {"time_outs": (N,)}) + - Partial reset: + env.reset_done(done_mask) OR env.reset(env_ids=idxs) OR fallback global reset() + """ + run_dir.mkdir(parents=True, exist_ok=True) + ckpt_best = run_dir / "best.pt" + + num_envs = getattr(env, "num_envs", None) + if num_envs is None: + raise ValueError("env.num_envs is required for batch sizing.") + obs_dim = env.observation_space.shape[0] + act_dim = env.action_space.shape[0] + + writer = SummaryWriter(log_dir=run_dir) + + print(f"[INFO] PPO training: epochs={total_epochs}, horizon={horizon}, num_envs={num_envs}, " + f"obs_dim={obs_dim}, act_dim={act_dim}") + buffer = RolloutBuffer(horizon=horizon, num_envs=num_envs, obs_dim=obs_dim, act_dim=act_dim, device=device) + + total_batch = horizon * num_envs if batch_size_hint <= 0 else int(batch_size_hint) + minibatch_size = _choose_minibatch_size(total_batch, num_envs) + + # initial reset + obs = _reset_env(env, device) + best_mean_return = -1e9 + + for epoch in range(1, total_epochs + 1): + buffer.reset() + + # ===== rollout ===== + for t in range(horizon): + mu, std, v = net(obs) + act, logp, _ = net.sample_tanh_gaussian(mu, std) # act in [-1,1], (N,K) + + # IMPORTANT: do not let env/sim see gradients + act_no_grad = act.detach() + + step_out = env.step(act_no_grad) + next_obs_any, reward_any, done_any, infos = step_out[0]["obs"], step_out[1], step_out[2], step_out[3] + + next_obs = _as_tensor(next_obs_any, device=device, dtype=torch.float32) + rew_t = _as_tensor(reward_any, device=device, dtype=torch.float32) # (N,1) or (N,) + done_t = _as_tensor(done_any, device=device, dtype=torch.bool) # (N,) + t_out = _as_tensor(infos.get("time_outs", torch.zeros_like(done_t)), device=device, dtype=torch.bool) + + buffer.add(obs=obs, act=act_no_grad, logp=logp, rew=rew_t, done=done_t, val=v, t_out=t_out) + + # partial reset (merge only done envs' obs) + next_obs = _partial_reset_and_merge_obs(env, next_obs, done_t, device) + + obs = next_obs + + # ===== bootstrap value ===== + _, _, last_v = net(obs) # (N,) + + # ===== GAE / returns (graph-free) ===== + adv, ret = buffer.compute_gae(last_value=last_v.detach(), gamma=gamma, lam=gae_lambda) + + # normalize advantage + adv_flat = adv.view(-1) + ret_flat = ret.view(-1) + adv_flat = (adv_flat - torch.mean(adv_flat)) / (torch.std(adv_flat) + 1e-8) + + # ===== PPO updates ===== + policy_loss_epoch = 0.0 + value_loss_epoch = 0.0 + entropy_epoch = 0.0 + num_updates = 0 + + for idx, obs_b, act_b, old_logp_b, old_v_b in buffer.iterate_minibatches(minibatch_size, mini_epochs): + adv_b = adv_flat[idx] + ret_b = ret_flat[idx] + + mu_b, std_b, v_b = net(obs_b) + new_logp_b = net.log_prob_tanh_gaussian(mu_b, std_b, act_b) + ratio = torch.exp(new_logp_b - old_logp_b) + + surrogate1 = ratio * adv_b + surrogate2 = torch.clamp(ratio, 1.0 - clip_eps, 1.0 + clip_eps) * adv_b + policy_loss = -torch.min(surrogate1, surrogate2).mean() + + value_clipped = old_v_b + (v_b - old_v_b).clamp(-clip_eps, clip_eps) + value_losses = (v_b - ret_b).pow(2) + value_losses_clipped = (value_clipped - ret_b).pow(2) + value_loss = 0.5 * torch.max(value_losses, value_losses_clipped).mean() + + base_dist = torch.distributions.Normal(mu_b, std_b) + entropy = base_dist.entropy().sum(-1).mean() + + loss = policy_loss + value_coef * value_loss - entropy_coef * entropy + + optimizer.zero_grad(set_to_none=True) + loss.backward() + nn.utils.clip_grad_norm_(net.parameters(), 0.5) + optimizer.step() + + policy_loss_epoch += float(policy_loss.detach()) + value_loss_epoch += float(value_loss.detach()) + entropy_epoch += float(entropy.detach()) + num_updates += 1 + + mean_return = float(ret.mean().detach()) + print(f"[E{epoch:04d}] return={mean_return:7.3f} " + f"pi={policy_loss_epoch/max(1,num_updates):7.4f} " + f"v={value_loss_epoch/max(1,num_updates):7.4f} " + f"H={entropy_epoch/max(1,num_updates):7.4f}") + + # Value head quality: explained variance on flattened batch + ev = float(1.0 - torch.var(ret_flat - buffer.values.view(-1)) / (torch.var(ret_flat) + 1e-12)) + + writer.add_scalar("return_mean", mean_return, epoch) + writer.add_scalar("policy_loss", policy_loss_epoch / max(1,num_updates), epoch) + writer.add_scalar("value_loss", value_loss_epoch / max(1,num_updates), epoch) + writer.add_scalar("entropy", entropy_epoch / max(1,num_updates), epoch) + writer.add_scalar("explained_variance", ev, epoch) + + # save best & periodic + if mean_return > best_mean_return: + best_mean_return = mean_return + torch.save({"model": net.state_dict(), + "optimizer": optimizer.state_dict(), + "epoch": epoch, + "best_return": best_mean_return}, + ckpt_best) + if (save_every > 0) and (epoch % save_every == 0): + ckpt_path = run_dir / f"epoch_{epoch:04d}_ret_{mean_return:.2f}.pt" + torch.save({"model": net.state_dict(), + "optimizer": optimizer.state_dict(), + "epoch": epoch, + "return": mean_return}, + ckpt_path) + + return {"best_mean_return": best_mean_return, "ckpt_best": ckpt_best} diff --git a/openreal2sim/simulation/isaaclab/data_collection/ply_to_depth.py b/openreal2sim/simulation/isaaclab/data_collection/ply_to_depth.py new file mode 100644 index 0000000..97c7579 --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/ply_to_depth.py @@ -0,0 +1,190 @@ +import numpy as np +import cv2 + +def load_ply(ply_path): + """Load PLY file and extract 3D points.""" + with open(ply_path, 'rb') as f: + # Read header + line = f.readline().decode('ascii').strip() + if line != 'ply': + raise ValueError("Not a valid PLY file") + + format_type = None + vertex_count = 0 + properties = [] + in_header = True + + while in_header: + line = f.readline().decode('ascii').strip() + + if line.startswith('format'): + format_type = line.split()[1] + elif line.startswith('element vertex'): + vertex_count = int(line.split()[2]) + elif line.startswith('property'): + prop_type = line.split()[1] + prop_name = line.split()[2] + properties.append((prop_name, prop_type)) + elif line == 'end_header': + in_header = False + + # Find x, y, z indices + prop_names = [p[0] for p in properties] + if 'x' not in prop_names or 'y' not in prop_names or 'z' not in prop_names: + raise ValueError("PLY file must contain x, y, z properties") + + x_idx = prop_names.index('x') + y_idx = prop_names.index('y') + z_idx = prop_names.index('z') + + # Read vertex data + points = [] + + if format_type == 'ascii': + # ASCII format + for _ in range(vertex_count): + line = f.readline().decode('ascii').strip().split() + x = float(line[x_idx]) + y = float(line[y_idx]) + z = float(line[z_idx]) + points.append([x, y, z]) + + elif format_type == 'binary_little_endian': + # Binary format + dtype_map = { + 'float': 'f4', 'double': 'f8', + 'uchar': 'u1', 'uint8': 'u1', + 'char': 'i1', 'int8': 'i1', + 'ushort': 'u2', 'uint16': 'u2', + 'short': 'i2', 'int16': 'i2', + 'uint': 'u4', 'uint32': 'u4', + 'int': 'i4', 'int32': 'i4' + } + + # Create numpy dtype for binary reading + dtype_list = [(p[0], dtype_map.get(p[1], 'f4')) for p in properties] + vertex_data = np.frombuffer(f.read(vertex_count * np.dtype(dtype_list).itemsize), + dtype=dtype_list, count=vertex_count) + + points = np.column_stack([vertex_data['x'], vertex_data['y'], vertex_data['z']]) + return points + + else: + raise ValueError(f"Unsupported PLY format: {format_type}") + + return np.array(points) + +def project_points_to_image(points, fx, fy, cx, cy, width, height): + """ + Project 3D points to 2D image plane using camera intrinsics. + + Args: + points: Nx3 array of 3D points in camera coordinates + fx, fy: Focal lengths in pixels + cx, cy: Principal point coordinates + width, height: Image dimensions + + Returns: + depth_image: 2D array containing depth values + """ + # Filter points behind the camera + valid_mask = points[:, 2] > 0 + points = points[valid_mask] + + if len(points) == 0: + print("Warning: No points in front of camera") + return np.zeros((height, width), dtype=np.float32) + + # Project 3D points to 2D using pinhole camera model + # u = fx * (X/Z) + cx + # v = fy * (Y/Z) + cy + u = (fx * points[:, 0] / points[:, 2] + cx).astype(int) + v = (fy * points[:, 1] / points[:, 2] + cy).astype(int) + depth = points[:, 2] + + # Filter points within image bounds + valid_idx = (u >= 0) & (u < width) & (v >= 0) & (v < height) + u = u[valid_idx] + v = v[valid_idx] + depth = depth[valid_idx] + + # Create depth image + depth_image = np.zeros((height, width), dtype=np.float32) + + # Handle multiple points projecting to same pixel (keep closest) + for i in range(len(u)): + if depth_image[v[i], u[i]] == 0 or depth[i] < depth_image[v[i], u[i]]: + depth_image[v[i], u[i]] = depth[i] + + return depth_image + +def save_depth_image(depth_image, output_path, normalize=True): + """Save depth image as TIFF, PNG, or NPY file.""" + if output_path.endswith('.npy'): + # Save raw depth values + np.save(output_path, depth_image) + print(f"Saved raw depth to {output_path}") + elif output_path.endswith(('.tiff', '.tif')): + # Save as 32-bit float TIFF with raw depth values + cv2.imwrite(output_path, depth_image.astype(np.float32)) + print(f"Saved depth as TIFF to {output_path}") + else: + # Normalize and save as grayscale image (PNG/JPG) + if normalize: + depth_vis = depth_image.copy() + mask = depth_vis > 0 + if mask.any(): + depth_vis[mask] = (depth_vis[mask] - depth_vis[mask].min()) / (depth_vis[mask].max() - depth_vis[mask].min()) + depth_vis = (depth_vis * 255).astype(np.uint8) + else: + depth_vis = np.zeros_like(depth_image, dtype=np.uint8) + else: + depth_vis = depth_image.astype(np.uint8) + + cv2.imwrite(output_path, depth_vis) + print(f"Saved depth visualization to {output_path}") + +def main(): + # ===== CONFIGURATION - EDIT THESE VALUES ===== + INPUT_PLY = '/app/outputs/demo_video/reconstruction/background_points.ply' # Path to input PLY file + OUTPUT_IMAGE = '/app/outputs/demo_video/reconstruction/image_background_depth.png' # Output path (.tiff, .tif, .png, .jpg, or .npy) + + # Camera intrinsic parameters + FX = 664.4203491210938 # Focal length x in pixels + FY = 663.5459594726562 # Focal length y in pixels + CX = 295.5 # Principal point x + CY = 166.0 # Principal point y + WIDTH = 584 # Image width + HEIGHT = 328 # Image height + # ============================================ + + # Load PLY file + print(f"Loading PLY file: {INPUT_PLY}") + points = load_ply(INPUT_PLY) + print(f"Loaded {len(points)} points") + + # Project to depth image + print("Projecting points to image plane...") + depth_image = project_points_to_image( + points, + FX, FY, CX, CY, + WIDTH, HEIGHT + ) + + # Count valid pixels + valid_pixels = np.sum(depth_image > 0) + print(f"Valid depth pixels: {valid_pixels}/{WIDTH * HEIGHT}") + + # Save depth image + save_depth_image(depth_image, OUTPUT_IMAGE) + + # Print depth statistics + if valid_pixels > 0: + print(f"\nDepth statistics:") + print(f" Min depth: {depth_image[depth_image > 0].min():.3f}") + print(f" Max depth: {depth_image.max():.3f}") + print(f" Mean depth: {depth_image[depth_image > 0].mean():.3f}") + +if __name__ == '__main__': + main() + diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_base.py b/openreal2sim/simulation/isaaclab/data_collection/sim_base.py new file mode 100755 index 0000000..15b64d1 --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_base.py @@ -0,0 +1,1033 @@ +from __future__ import annotations + +import json +import os +import random +from pathlib import Path +from typing import Any, Dict, List, Optional + + +import curobo +import imageio + +# Isaac Lab +import isaaclab.sim as sim_utils +import numpy as np +import torch +import time +from curobo.types.base import TensorDeviceType +from curobo.types.math import Pose +from curobo.types.robot import JointState, RobotConfig +from curobo.util.logger import setup_curobo_logger +from curobo.util_file import get_robot_configs_path, join_path, load_yaml +from curobo.wrap.reacher.motion_gen import ( + MotionGen, + MotionGenConfig, + MotionGenPlanConfig, +) +from isaaclab.controllers import DifferentialIKController, DifferentialIKControllerCfg +from isaaclab.devices import Se3Gamepad, Se3Keyboard, Se3SpaceMouse +from isaaclab.managers import SceneEntityCfg +from isaaclab.markers import VisualizationMarkers, VisualizationMarkersCfg +from isaaclab.sensors.camera import Camera +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR +from isaaclab.utils.math import ( + subtract_frame_transforms, + transform_points, + unproject_depth, +) + +##-- for sim background to real background video composition-- +from PIL import Image +import cv2 + +os.environ['CUROBO_TORCH_CUDA_GRAPH_RESET'] = '1' + + +def get_next_demo_id(demo_root: Path) -> int: + if not demo_root.exists(): + return 0 + demo_ids = [] + for name in os.listdir(demo_root): + if name.startswith("demo_"): + try: + demo_ids.append(int(name.split("_")[1])) + except Exception: + pass + return max(demo_ids) + 1 if demo_ids else 0 + + +class BaseSimulator: + """ + Base class for robot simulation. + + Attributes: + self.sim, self.scene, self.sim_dt + self.robot, self.object_prim, self.background_prim + self.teleop_interface, self.sim_state_machine + self.diff_ik_cfg, self.diff_ik_controller + self.ee_goals, self.current_goal_idx, self.ik_commands + self.robot_entity_cfg, self.robot_gripper_cfg + self.gripper_open_tensor, self.gripper_close_tensor + self.ee_jacobi_idx, self.count, self.demo_id + self.camera, self.save_dict + self.selected_object_id, self.obj_rel_traj, self.debug_level + self._goal_vis, self._traj_vis + """ + + def __init__( + self, + sim: sim_utils.SimulationContext, + scene: Any, # InteractiveScene + *, + args, + sim_cfgs: Dict, + robot_pose: torch.Tensor, + cam_dict: Dict, + out_dir: Path, + img_folder: str, + set_physics_props: bool = True, + enable_motion_planning: bool = True, + debug_level: int = 1, + ) -> None: + # basic simulation setup + self.sim: sim_utils.SimulationContext = sim + self.sim_cfgs = sim_cfgs + self.scene = scene + self.sim_dt = sim.get_physics_dt() + + self.num_envs: int = int(scene.num_envs) + self._all_env_ids = torch.arange( + self.num_envs, device=sim.device, dtype=torch.long + ) + + self.cam_dict = cam_dict + self.out_dir: Path = out_dir + self.img_folder: str = img_folder + + # scene entities + self.robot = scene["robot"] + if robot_pose.ndim == 1: + self.robot_pose = ( + robot_pose.view(1, -1).repeat(self.num_envs, 1).to(self.robot.device) + ) + else: + assert robot_pose.shape[0] == self.num_envs and robot_pose.shape[1] == 7, ( + f"robot_pose must be [B,7], got {robot_pose.shape}" + ) + self.robot_pose = robot_pose.to(self.robot.device).contiguous() + + self.object_prim = scene["object_00"] + self.other_object_prims = [ + scene[key] + for key in scene.keys() + if f"object_" in key and key != "object_00" + ] + self.background_prim = scene["background"] + self.camera: Camera = scene["camera"] + + # physics properties + if set_physics_props: + static_friction = 5.0 + dynamic_friction = 5.0 + restitution = 0.0 + + # object: rigid prim -> has root_physx_view + if ( + hasattr(self.object_prim, "root_physx_view") + and self.object_prim.root_physx_view is not None + ): + obj_view = self.object_prim.root_physx_view + obj_mats = obj_view.get_material_properties() + vals_obj = torch.tensor( + [static_friction, dynamic_friction, restitution], + device=obj_mats.device, + dtype=obj_mats.dtype, + ) + obj_mats[:] = vals_obj + obj_view.set_material_properties( + obj_mats, self._all_env_ids.to(obj_mats.device) + ) + + # background: GroundPlaneCfg -> XFormPrim (no root_physx_view); skip if unavailable + if ( + hasattr(self.background_prim, "root_physx_view") + and self.background_prim.root_physx_view is not None + ): + bg_view = self.background_prim.root_physx_view + bg_mats = bg_view.get_material_properties() + vals_bg = torch.tensor( + [static_friction, dynamic_friction, restitution], + device=bg_mats.device, + dtype=bg_mats.dtype, + ) + bg_mats[:] = vals_bg + bg_view.set_material_properties( + bg_mats, self._all_env_ids.to(bg_mats.device) + ) + + # ik controller + self.diff_ik_cfg = DifferentialIKControllerCfg( + command_type="pose", use_relative_mode=False, ik_method="dls" + ) + self.diff_ik_controller = DifferentialIKController( + self.diff_ik_cfg, num_envs=self.num_envs, device=sim.device + ) + + # robot: joints / gripper / jacobian indices + self.robot_entity_cfg = SceneEntityCfg( + "robot", joint_names=["panda_joint.*"], body_names=["panda_hand"] + ) + self.robot_gripper_cfg = SceneEntityCfg( + "robot", joint_names=["panda_finger_joint.*"], body_names=["panda_hand"] + ) + self.robot_entity_cfg.resolve(scene) + self.robot_gripper_cfg.resolve(scene) + self.gripper_open_tensor = 0.04 * torch.ones( + (self.num_envs, len(self.robot_gripper_cfg.joint_ids)), + device=self.robot.device, + ) + self.gripper_close_tensor = torch.zeros( + (self.num_envs, len(self.robot_gripper_cfg.joint_ids)), + device=self.robot.device, + ) + if self.robot.is_fixed_base: + self.ee_jacobi_idx = self.robot_entity_cfg.body_ids[0] - 1 + else: + self.ee_jacobi_idx = self.robot_entity_cfg.body_ids[0] + + # demo count and data saving + self.count = 0 + self.demo_id = 0 + self.save_dict = { + "rgb": [], + "depth": [], + "segmask": [], + "joint_pos": [], + "joint_vel": [], + "actions": [], + "gripper_pos": [], + "gripper_cmd": [], + } + + # visualization + self.selected_object_id = 0 + self.debug_level = debug_level + + self.goal_vis_list = [] + if self.debug_level > 0: + for b in range(self.num_envs): + cfg = VisualizationMarkersCfg( + prim_path=f"/Visuals/ee_goal/env_{b:03d}", + markers={ + "frame": sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/frame_prim.usd", + scale=(0.06, 0.06, 0.06), + visual_material=sim_utils.PreviewSurfaceCfg( + diffuse_color=(1.0, 0.0, 0.0) + ), + ), + }, + ) + self.goal_vis_list.append(VisualizationMarkers(cfg)) + + # curobo motion planning + self.enable_motion_planning = enable_motion_planning + if self.enable_motion_planning: + print(f"prepare curobo motion planning: {enable_motion_planning}") + self.prepare_curobo() + print("curobo motion planning ready.") + + # -------- Curobo Motion Planning ---------- + def prepare_curobo(self): + setup_curobo_logger("error") + # tensor_args = TensorDeviceType() + tensor_args = TensorDeviceType(device=self.sim.device, dtype=torch.float32) + curobo_path = curobo.__file__.split("/__init__")[0] + robot_file = f"{curobo_path}/content/configs/robot/franka.yml" + motion_gen_config = MotionGenConfig.load_from_robot_config( + robot_cfg=robot_file, + world_model=None, + tensor_args=tensor_args, + interpolation_dt=self.sim_dt, + use_cuda_graph=True if self.num_envs == 1 else False, + ) + self.motion_gen = MotionGen(motion_gen_config) + if self.num_envs == 1: + self.motion_gen.warmup(enable_graph=True) + _ = RobotConfig.from_dict( + load_yaml(join_path(get_robot_configs_path(), robot_file))["robot_cfg"], + tensor_args, + ) + + # ---------- Helpers ---------- + def _ensure_batch_pose(self, p, q): + """Ensure position [B,3], quaternion [B,4] on device.""" + B = self.scene.num_envs + p = torch.as_tensor(p, dtype=torch.float32, device=self.sim.device) + q = torch.as_tensor(q, dtype=torch.float32, device=self.sim.device) + if p.ndim == 1: + p = p.view(1, -1).repeat(B, 1) + if q.ndim == 1: + q = q.view(1, -1).repeat(B, 1) + return p.contiguous(), q.contiguous() + + def _traj_to_BT7(self, traj): + """Normalize various curobo traj.position shapes to [B, T, 7].""" + B = self.scene.num_envs + pos = traj.position # torch or numpy + pos = torch.as_tensor(pos, device=self.sim.device, dtype=torch.float32) + + if pos.ndim == 3: + # candidate shapes: [B,T,7] or [T,B,7] + if pos.shape[0] == B and pos.shape[-1] == 7: + return pos # [B,T,7] + if pos.shape[1] == B and pos.shape[-1] == 7: + return pos.permute(1, 0, 2).contiguous() # [B,T,7] + elif pos.ndim == 2 and pos.shape[-1] == 7: + # [T,7] → broadcast to all envs + return pos.unsqueeze(0).repeat(B, 1, 1) + # Fallback: flatten and infer + flat = pos.reshape(-1, 7) # [B*T,7] + T = flat.shape[0] // B + return flat.view(B, T, 7).contiguous() + + # ---------- Planning / Execution (Single) ---------- + def reinitialize_motion_gen(self): + """ + Reinitialize the motion generation object. + Call this after a crash to restore a clean state. + """ + print("[INFO] Reinitializing motion planner...") + try: + # Clear CUDA cache + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + # Recreate the motion planner + from curobo.types.base import TensorDeviceType + from curobo.wrap.reacher.motion_gen import MotionGen, MotionGenConfig + from curobo.util_file import get_robot_configs_path, join_path, load_yaml + from curobo.types.robot import RobotConfig + import curobo + + tensor_args = TensorDeviceType(device=self.sim.device, dtype=torch.float32) + curobo_path = curobo.__file__.split("/__init__")[0] + robot_file = f"{curobo_path}/content/configs/robot/franka.yml" + + motion_gen_config = MotionGenConfig.load_from_robot_config( + robot_cfg=robot_file, + world_model=None, + tensor_args=tensor_args, + interpolation_dt=self.sim_dt, + use_cuda_graph=True if self.num_envs == 1 else False, + ) + + self.motion_gen = MotionGen(motion_gen_config) + + if self.num_envs == 1: + self.motion_gen.warmup(enable_graph=True) + + print("[INFO] Motion planner reinitialized successfully") + return True + + except Exception as e: + print(f"[ERROR] Failed to reinitialize motion planner: {e}") + return False + + + def motion_planning_batch( + self, position, quaternion, max_attempts=1, allow_graph=False, max_retries=1 + ): + """ + Multi-environment planning with automatic recovery from crashes. + Returns None on complete failure to signal restart needed. + """ + B = self.scene.num_envs + joint_pos = self.robot.data.joint_pos[ + :, self.robot_entity_cfg.joint_ids + ].contiguous() + + pos_b, quat_b = self._ensure_batch_pose(position, quaternion) + + for retry in range(max_retries): + try: + # Attempt planning + start_state = JointState.from_position(joint_pos) + goal_pose = Pose(position=pos_b, quaternion=quat_b) + plan_cfg = MotionGenPlanConfig( + max_attempts=max_attempts, enable_graph=allow_graph + ) + + result = self.motion_gen.plan_batch(start_state, goal_pose, plan_cfg) + + # Process results + paths = result.get_paths() + T_max = 1 + + for i, p in enumerate(paths): + if not result.success[i]: + print(f"[WARN] Motion planning failed for env {i}.") + else: + T_max = max(T_max, int(p.position.shape[-2])) + + dof = joint_pos.shape[-1] + BT7 = torch.zeros( + (B, T_max, dof), device=self.sim.device, dtype=torch.float32 + ) + + for i, p in enumerate(paths): + if result.success[i] == False: + BT7[i, :, :] = ( + joint_pos[i : i + 1, :].unsqueeze(1).repeat(1, T_max, 1) + ) + else: + Ti = p.position.shape[-2] + BT7[i, :Ti, :] = p.position.to(self.sim.device).to(torch.float32) + if Ti < T_max: + BT7[i, Ti:, :] = BT7[i, Ti - 1 : Ti, :] + + success = result.success if result.success is not None else torch.zeros( + B, dtype=torch.bool, device=self.sim.device + ) + + # Success! Return the trajectory + return BT7, success + + except AttributeError as e: + print(f"[ERROR] Motion planner crash on attempt {retry+1}/{max_retries}: {e}") + + if retry < max_retries - 1: + if self.reinitialize_motion_gen(): + print(f"[INFO] Retrying motion planning (attempt {retry+2}/{max_retries})...") + continue + else: + print("[ERROR] Failed to recover motion planner") + break + else: + print("[ERROR] Max retries reached, motion planning failed critically") + + except Exception as e: + print(f"[ERROR] Unexpected error in motion planning: {type(e).__name__}: {e}") + + if retry < max_retries - 1 and ("cuda graph" in str(e).lower() or "NoneType" in str(e)): + if self.reinitialize_motion_gen(): + print(f"[INFO] Retrying after CUDA error (attempt {retry+2}/{max_retries})...") + continue + break + + # If we get here, all retries failed - return None to signal complete failure + print("[ERROR] Motion planning failed critically - returning None to trigger restart") + return None, None + + + def motion_planning_single( + self, position, quaternion, max_attempts=1, use_graph=True, max_retries=1 + ): + """ + Single environment planning with automatic recovery from crashes. + Returns None on complete failure to signal restart needed. + """ + joint_pos0 = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids][ + 0:1 + ].contiguous() + + pos_b, quat_b = self._ensure_batch_pose(position, quaternion) + pos_b = pos_b[0:1] + quat_b = quat_b[0:1] + + for retry in range(max_retries): + try: + start_state = JointState.from_position(joint_pos0) + goal_pose = Pose(position=pos_b, quaternion=quat_b) + plan_cfg = MotionGenPlanConfig( + max_attempts=max_attempts, enable_graph=use_graph + ) + + result = self.motion_gen.plan_single(start_state, goal_pose, plan_cfg) + traj = result.get_interpolated_plan() + + if result.success[0] == True: + BT7 = ( + traj.position.to(self.sim.device).to(torch.float32).unsqueeze(0) + ) + else: + print(f"[WARN] Motion planning failed.") + BT7 = joint_pos0.unsqueeze(1) + + return BT7, result.success + + except AttributeError as e: + print(f"[ERROR] Motion planner crash on attempt {retry+1}/{max_retries}: {e}") + + if retry < max_retries - 1: + if self.reinitialize_motion_gen(): + print(f"[INFO] Retrying motion planning (attempt {retry+2}/{max_retries})...") + continue + else: + break + else: + print("[ERROR] Max retries reached") + + except Exception as e: + print(f"[ERROR] Unexpected error: {type(e).__name__}: {e}") + + if retry < max_retries - 1 and ("cuda graph" in str(e).lower() or "NoneType" in str(e)): + if self.reinitialize_motion_gen(): + continue + break + + # Complete failure - return None to signal restart + print("[ERROR] Motion planning failed critically - returning None to trigger restart") + return None, None + + def motion_planning(self, position, quaternion, max_attempts=1): + if self.scene.num_envs == 1: + return self.motion_planning_single( + position, quaternion, max_attempts=max_attempts, use_graph=True + ) + else: + return self.motion_planning_batch( + position, quaternion, max_attempts=max_attempts, allow_graph=False + ) + + def move_to_motion_planning( + self, + position: torch.Tensor, + quaternion: torch.Tensor, + gripper_open: bool = True, + record: bool = True, + ) -> torch.Tensor: + """ + Cartesian space control: Move the end effector to the desired position and orientation using motion planning. + Works with batched envs. If inputs are 1D, they will be broadcast to all envs. + """ + traj, success = self.motion_planning(position, quaternion) + BT7 = traj + T = BT7.shape[1] + last = None + for i in range(T): + joint_pos_des = BT7[:, i, :] # [B,7] + self.apply_actions(joint_pos_des, gripper_open=gripper_open) + obs = self.get_observation(gripper_open=gripper_open) + if record: + self.record_data(obs) + last = joint_pos_des + return last, success + + def compose_real_video(self, env_id: int = 0): + """ + Composite simulated video onto real background using mask-based rendering. + + Args: + env_id: Environment ID to process + """ + + def pad_to_even(frame): + """Pad frame to even dimensions for video encoding.""" + H, W = frame.shape[:2] + pad_h = H % 2 + pad_w = W % 2 + if pad_h or pad_w: + pad = ((0, pad_h), (0, pad_w)) if frame.ndim == 2 else ((0, pad_h), (0, pad_w), (0, 0)) + frame = np.pad(frame, pad, mode='edge') + return frame + + # Construct paths + base_path = self.out_dir / self.img_folder + demo_path = self._demo_dir() / f"env_{env_id:03d}" + + SIM_VIDEO_PATH = demo_path / "sim_video.mp4" + MASK_VIDEO_PATH = demo_path / "mask_video.mp4" + REAL_BACKGROUND_PATH = base_path / "reconstruction" / "background.jpg" + OUTPUT_PATH = demo_path / "real_video.mp4" + + # Check if required files exist + if not SIM_VIDEO_PATH.exists(): + print(f"[WARN] Simulated video not found: {SIM_VIDEO_PATH}") + return False + if not MASK_VIDEO_PATH.exists(): + print(f"[WARN] Mask video not found: {MASK_VIDEO_PATH}") + return False + if not REAL_BACKGROUND_PATH.exists(): + print(f"[WARN] Real background not found: {REAL_BACKGROUND_PATH}") + return False + + # Load real background image + print(f"[INFO] Loading real background: {REAL_BACKGROUND_PATH}") + real_img = np.array(Image.open(REAL_BACKGROUND_PATH)) + + H, W, _ = real_img.shape + print(f"[INFO] Background size: {W}x{H}") + + # Open videos + print(f"[INFO] Loading simulated video: {SIM_VIDEO_PATH}") + rgb_reader = imageio.get_reader(SIM_VIDEO_PATH) + print(f"[INFO] Loading mask video: {MASK_VIDEO_PATH}") + mask_reader = imageio.get_reader(MASK_VIDEO_PATH) + + # Get metadata from original video + N = rgb_reader.count_frames() + fps = rgb_reader.get_meta_data()['fps'] + print(f"[INFO] Processing {N} frames at {fps} FPS...") + + composed_images = [] + + for i in range(N): + # Read frames + sim_rgb = rgb_reader.get_data(i) + sim_mask = mask_reader.get_data(i) + + # Convert mask to binary (grayscale > 127 = foreground) + if sim_mask.ndim == 3: + sim_mask = cv2.cvtColor(sim_mask, cv2.COLOR_RGB2GRAY) + sim_mask = sim_mask > 127 + + # Resize sim frames to match real background if needed + if sim_rgb.shape[:2] != (H, W): + sim_rgb = cv2.resize(sim_rgb, (W, H)) + sim_mask = cv2.resize(sim_mask.astype(np.uint8), (W, H)) > 0 + + # Compose: real background + simulated foreground (where mask is True) + composed = real_img.copy() + composed = pad_to_even(composed) + composed[sim_mask] = sim_rgb[sim_mask] + + composed_images.append(composed) + + if (i + 1) % 10 == 0: + print(f"[INFO] Processed {i + 1}/{N} frames") + + # Save composed video + print(f"[INFO] Saving composed video to: {OUTPUT_PATH}") + writer = imageio.get_writer(OUTPUT_PATH, fps=fps, macro_block_size=None) + for frame in composed_images: + writer.append_data(frame) + writer.close() + + print(f"[INFO] Done! Saved {len(composed_images)} frames to {OUTPUT_PATH} at {fps} FPS") + return True + # ---------- Visualization ---------- + def show_goal(self, pos, quat): + """ + show a pose with visual marker(s). + - if [B,3]/[B,4], update all envs; + - if [3]/[4] or [1,3]/[1,4], default to update env 0; + - optional env_ids specify a subset of envs to update; when a single pose is input, it will be broadcast to these envs. + """ + if self.debug_level == 0: + print("debug_level=0, skip visualization.") + return + + if not isinstance(pos, torch.Tensor): + pos_t = torch.tensor(pos, dtype=torch.float32, device=self.sim.device) + quat_t = torch.tensor(quat, dtype=torch.float32, device=self.sim.device) + else: + pos_t = pos.to(self.sim.device) + quat_t = quat.to(self.sim.device) + + if pos_t.ndim == 1: + pos_t = pos_t.view(1, -1) + if quat_t.ndim == 1: + quat_t = quat_t.view(1, -1) + + B = self.num_envs + + if pos_t.shape[0] == B: + for b in range(B): + self.goal_vis_list[b].visualize(pos_t[b : b + 1], quat_t[b : b + 1]) + else: + self.goal_vis_list[0].visualize(pos_t, quat_t) + + def set_robot_pose(self, robot_pose: torch.Tensor): + if robot_pose.ndim == 1: + self.robot_pose = ( + robot_pose.view(1, -1).repeat(self.num_envs, 1).to(self.robot.device) + ) + else: + assert robot_pose.shape[0] == self.num_envs and robot_pose.shape[1] == 7, ( + f"robot_pose must be [B,7], got {robot_pose.shape}" + ) + self.robot_pose = robot_pose.to(self.robot.device).contiguous() + + + # ---------- Environment Step ---------- + def step(self): + self.scene.write_data_to_sim() + self.sim.step() + self.camera.update(dt=self.sim_dt) + self.count += 1 + self.scene.update(self.sim_dt) + + # ---------- Apply actions to robot joints ---------- + def apply_actions(self, joint_pos_des, gripper_open: bool = True): + # joint_pos_des: [B, n_joints] + self.robot.set_joint_position_target( + joint_pos_des, joint_ids=self.robot_entity_cfg.joint_ids + ) + if gripper_open: + self.robot.set_joint_position_target( + self.gripper_open_tensor, joint_ids=self.robot_gripper_cfg.joint_ids + ) + else: + self.robot.set_joint_position_target( + self.gripper_close_tensor, joint_ids=self.robot_gripper_cfg.joint_ids + ) + self.step() + + # ---------- EE control ---------- + def move_to( + self, + position: torch.Tensor, + quaternion: torch.Tensor, + gripper_open: bool = True, + record: bool = True, + ) -> torch.Tensor: + if self.enable_motion_planning: + return self.move_to_motion_planning( + position, quaternion, gripper_open=gripper_open, record=record + ) + else: + return self.move_to_ik( + position, quaternion, gripper_open=gripper_open, record=record + ) + + def move_to_ik( + self, + position: torch.Tensor, + quaternion: torch.Tensor, + steps: int = 50, + gripper_open: bool = True, + record: bool = True, + ) -> torch.Tensor: + """ + Cartesian space control: Move the end effector to the desired position and orientation using inverse kinematics. + Works with batched envs. If inputs are 1D, they will be broadcast to all envs. + + Early-stop when both position and orientation errors are within tolerances. + 'steps' now acts as a max-iteration cap; the loop breaks earlier on convergence. + """ + # Ensure [B,3]/[B,4] tensors on device + position, quaternion = self._ensure_batch_pose(position, quaternion) + + # IK command (world frame goals) + ee_goals = torch.cat([position, quaternion], dim=1).to(self.sim.device).float() + self.diff_ik_controller.reset() + self.diff_ik_controller.set_command(ee_goals) + + # Tolerances (you can tune if needed) + pos_tol = 3e-3 # meters + ori_tol = 3.0 * np.pi / 180.0 # radians (~3 degrees) + + # Interpret 'steps' as max iterations; early-stop on convergence + max_steps = int(steps) if steps is not None and steps > 0 else 10_000 + + joint_pos_des = None + for _ in range(max_steps): + # Current EE pose (world) and Jacobian + jacobian = self.robot.root_physx_view.get_jacobians()[ + :, self.ee_jacobi_idx, :, self.robot_entity_cfg.joint_ids + ] + ee_pose_w = self.robot.data.body_state_w[ + :, self.robot_entity_cfg.body_ids[0], 0:7 + ] + root_pose_w = self.robot.data.root_state_w[:, 0:7] + joint_pos = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids] + + # Current EE pose expressed in robot base + ee_pos_b, ee_quat_b = subtract_frame_transforms( + root_pose_w[:, 0:3], + root_pose_w[:, 3:7], + ee_pose_w[:, 0:3], + ee_pose_w[:, 3:7], + ) + + # Compute next joint command + joint_pos_des = self.diff_ik_controller.compute( + ee_pos_b, ee_quat_b, jacobian, joint_pos + ) + + # Apply + self.apply_actions(joint_pos_des, gripper_open=gripper_open) + + # Optional recording + if record: + obs = self.get_observation(gripper_open=gripper_open) + self.record_data(obs) + + # --- Early-stop check --- + # Desired EE pose in base frame (convert world goal -> base) + des_pos_b, des_quat_b = subtract_frame_transforms( + root_pose_w[:, 0:3], root_pose_w[:, 3:7], position, quaternion + ) + # Position error [B] + pos_err = torch.norm(des_pos_b - ee_pos_b, dim=1) + # Orientation error [B]: angle between quaternions + # Note: q and -q are equivalent -> take |dot| + dot = torch.sum(des_quat_b * ee_quat_b, dim=1).abs().clamp(-1.0, 1.0) + ori_err = 2.0 * torch.acos(dot) + + done = (pos_err <= pos_tol) & (ori_err <= ori_tol) + if bool(torch.all(done)): + break + + return joint_pos_des + + # ---------- Robot Waiting ---------- + def wait(self, gripper_open, steps: int, record: bool = True): + joint_pos_des = self.robot.data.joint_pos[ + :, self.robot_entity_cfg.joint_ids + ].clone() + for _ in range(steps): + self.apply_actions(joint_pos_des, gripper_open=gripper_open) + obs = self.get_observation(gripper_open=gripper_open) + if record: + self.record_data(obs) + return joint_pos_des + + # ---------- Reset Envs ---------- + def reset(self, env_ids=None): + """ + Reset all envs or only those in env_ids. + Assumptions: + - self.robot_pose.shape == (B, 7) # base pose per env (wxyz in [:,3:]) + - self.robot.data.default_joint_pos == (B, 7) + - self.robot.data.default_joint_vel == (B, 7) + """ + device = self.object_prim.device + if env_ids is None: + env_ids_t = self._all_env_ids.to(device) # (B,) + else: + env_ids_t = torch.as_tensor(env_ids, device=device, dtype=torch.long).view( + -1 + ) # (M,) + M = int(env_ids_t.shape[0]) + + # --- object pose/vel: set object at env origins with identity quat --- + env_origins = self.scene.env_origins.to(device)[env_ids_t] # (M,3) + object_pose = torch.zeros((M, 7), device=device, dtype=torch.float32) + object_pose[:, :3] = env_origins + object_pose[:, 3] = 1.0 # wxyz = [1,0,0,0] + self.object_prim.write_root_pose_to_sim(object_pose, env_ids=env_ids_t) + self.object_prim.write_root_velocity_to_sim( + torch.zeros((M, 6), device=device, dtype=torch.float32), env_ids=env_ids_t + ) + self.object_prim.write_data_to_sim() + for prim in self.other_object_prims: + prim.write_root_pose_to_sim(object_pose, env_ids=env_ids_t) + prim.write_root_velocity_to_sim( + torch.zeros((M, 6), device=device, dtype=torch.float32), + env_ids=env_ids_t, + ) + prim.write_data_to_sim() + + # --- robot base pose/vel --- + # robot_pose is (B,7) in *local* base frame; add env origin offset per env + rp_local = self.robot_pose.to(self.robot.device)[env_ids_t] # (M,7) + env_origins_robot = env_origins.to(self.robot.device) # (M,3) + robot_pose_world = rp_local.clone() + robot_pose_world[:, :3] = env_origins_robot + robot_pose_world[:, :3] + self.robot.write_root_pose_to_sim(robot_pose_world, env_ids=env_ids_t) + self.robot.write_root_velocity_to_sim( + torch.zeros((M, 6), device=self.robot.device, dtype=torch.float32), + env_ids=env_ids_t, + ) + + # --- joints (B,7) -> select ids (M,7) --- + joint_pos = self.robot.data.default_joint_pos.to(self.robot.device)[ + env_ids_t + ] # (M,7) + joint_vel = self.robot.data.default_joint_vel.to(self.robot.device)[ + env_ids_t + ] # (M,7) + self.robot.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids_t) + self.robot.write_data_to_sim() + + # housekeeping + self.clear_data() + + # ---------- Get Observations ---------- + def get_observation(self, gripper_open) -> Dict[str, torch.Tensor]: + # camera outputs (already batched) + rgb = self.camera.data.output["rgb"] # [B,H,W,3] + depth = self.camera.data.output["distance_to_image_plane"] # [B,H,W] + ins_all = self.camera.data.output["instance_id_segmentation_fast"] # [B,H,W] + + B, H, W, _ = ins_all.shape + fg_mask_list = [] + obj_mask_list = [] + for b in range(B): + ins_id_seg = ins_all[b] + id_mapping = self.camera.data.info[b]["instance_id_segmentation_fast"][ + "idToLabels" + ] + fg_mask_b = torch.zeros_like( + ins_id_seg, dtype=torch.bool, device=ins_id_seg.device + ) + obj_mask_b = torch.zeros_like( + ins_id_seg, dtype=torch.bool, device=ins_id_seg.device + ) + for key, value in id_mapping.items(): + if "object" in value: + fg_mask_b |= ins_id_seg == key + obj_mask_b |= ins_id_seg == key + if "Robot" in value: + fg_mask_b |= ins_id_seg == key + fg_mask_list.append(fg_mask_b) + obj_mask_list.append(obj_mask_b) + fg_mask = torch.stack(fg_mask_list, dim=0) # [B,H,W] + obj_mask = torch.stack(obj_mask_list, dim=0) # [B,H,W] + + ee_pose_w = self.robot.data.body_state_w[ + :, self.robot_entity_cfg.body_ids[0], 0:7 + ] + joint_pos = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids] + joint_vel = self.robot.data.joint_vel[:, self.robot_entity_cfg.joint_ids] + gripper_pos = self.robot.data.joint_pos[:, self.robot_gripper_cfg.joint_ids] + gripper_cmd = ( + self.gripper_open_tensor if gripper_open else self.gripper_close_tensor + ) + + cam_pos_w = self.camera.data.pos_w + cam_quat_w = self.camera.data.quat_w_ros + ee_pos_cam, ee_quat_cam = subtract_frame_transforms( + cam_pos_w, cam_quat_w, ee_pose_w[:, 0:3], ee_pose_w[:, 3:7] + ) + ee_pose_cam = torch.cat([ee_pos_cam, ee_quat_cam], dim=1) + + points_3d_cam = unproject_depth( + self.camera.data.output["distance_to_image_plane"], + self.camera.data.intrinsic_matrices, + ) + points_3d_world = transform_points( + points_3d_cam, self.camera.data.pos_w, self.camera.data.quat_w_ros + ) + + object_center = self.object_prim.data.root_com_pos_w[:, :3] + + return { + "rgb": rgb, + "depth": depth, + "fg_mask": fg_mask, + "joint_pos": joint_pos, + "gripper_pos": gripper_pos, + "gripper_cmd": gripper_cmd, + "joint_vel": joint_vel, + "ee_pose_cam": ee_pose_cam, + "ee_pose_w": ee_pose_w, + "object_mask": obj_mask, + "points_cam": points_3d_cam, + "points_world": points_3d_world, + "object_center": object_center, + } + + # ---------- Task Completion Verifier ---------- + def is_success(self) -> bool: + raise NotImplementedError( + "BaseSimulator.is_success() should be implemented in subclass." + ) + + # ---------- Data Recording & Saving & Clearing ---------- + def record_data(self, obs: Dict[str, torch.Tensor]): + self.save_dict["rgb"].append(obs["rgb"].cpu().numpy()) # [B,H,W,3] + self.save_dict["depth"].append(obs["depth"].cpu().numpy()) # [B,H,W] + self.save_dict["segmask"].append(obs["fg_mask"].cpu().numpy()) # [B,H,W] + self.save_dict["joint_pos"].append(obs["joint_pos"].cpu().numpy()) # [B,nJ] + self.save_dict["gripper_pos"].append(obs["gripper_pos"].cpu().numpy()) # [B,3] + self.save_dict["gripper_cmd"].append(obs["gripper_cmd"].cpu().numpy()) # [B,1] + self.save_dict["joint_vel"].append(obs["joint_vel"].cpu().numpy()) + + def clear_data(self): + for key in self.save_dict.keys(): + self.save_dict[key] = [] + + def _demo_dir(self) -> Path: + return self.out_dir / self.img_folder / "demos" / f"demo_{self.demo_id}" + + def _env_dir(self, base: Path, b: int) -> Path: + d = base / f"env_{b:03d}" + d.mkdir(parents=True, exist_ok=True) + return d + + def save_data(self, ignore_keys: List[str] = []): + save_root = self._demo_dir() + save_root.mkdir(parents=True, exist_ok=True) + + stacked = {k: np.array(v) for k, v in self.save_dict.items()} + + for b in range(self.num_envs): + env_dir = self._env_dir(save_root, b) + for key, arr in stacked.items(): + if key in ignore_keys: # skip the keys for storage + continue + if key == "rgb": + video_path = env_dir / "sim_video.mp4" + writer = imageio.get_writer( + video_path, fps=50, macro_block_size=None + ) + for t in range(arr.shape[0]): + writer.append_data(arr[t, b]) + writer.close() + elif key == "segmask": + video_path = env_dir / "mask_video.mp4" + writer = imageio.get_writer( + video_path, fps=50, macro_block_size=None + ) + for t in range(arr.shape[0]): + writer.append_data((arr[t, b].astype(np.uint8) * 255)) + writer.close() + elif key == "depth": + depth_seq = arr[:, b] + flat = depth_seq[depth_seq > 0] + max_depth = np.percentile(flat, 99) if flat.size > 0 else 1.0 + depth_norm = np.clip(depth_seq / max_depth * 255.0, 0, 255).astype( + np.uint8 + ) + video_path = env_dir / "depth_video.mp4" + writer = imageio.get_writer( + video_path, fps=50, macro_block_size=None + ) + for t in range(depth_norm.shape[0]): + writer.append_data(depth_norm[t]) + writer.close() + np.save(env_dir / f"{key}.npy", depth_seq) + else: + np.save(env_dir / f"{key}.npy", arr[:, b]) + json.dump(self.sim_cfgs, open(env_dir / "config.json", "w"), indent=2) + + print("[INFO]: Demonstration is saved at: ", save_root) + + # Compose real videos for all environments + print("\n[INFO] Composing real videos with background...") + for b in range(self.num_envs): + print(f"[INFO] Processing environment {b}/{self.num_envs}...") + success = self.compose_real_video(env_id=b) + if success: + print(f"[INFO] Real video composed successfully for env {b}") + else: + print(f"[WARN] Failed to compose real video for env {b}") + + demo_root = self.out_dir / "all_demos" + demo_root.mkdir(parents=True, exist_ok=True) + total_demo_id = get_next_demo_id(demo_root) + demo_save_path = demo_root / f"demo_{total_demo_id}" + demo_save_path.mkdir(parents=True, exist_ok=True) + meta_info = { + "path": str(save_root), + "fps": 50, + } + with open(demo_save_path / "meta_info.json", "w") as f: + json.dump(meta_info, f) + os.system(f"cp -r {save_root}/* {demo_save_path}") + print("[INFO]: Demonstration is saved at: ", demo_save_path) + + def delete_data(self): + save_path = self._demo_dir() + failure_root = self.out_dir / self.img_folder / "demos_failures" + failure_root.mkdir(parents=True, exist_ok=True) + fail_demo_id = get_next_demo_id(failure_root) + failure_path = failure_root / f"demo_{fail_demo_id}" + os.system(f"mv {save_path} {failure_path}") + for key in self.save_dict.keys(): + self.save_dict[key] = [] + print("[INFO]: Clear up the folder: ", save_path) diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_env_factory.py b/openreal2sim/simulation/isaaclab/data_collection/sim_env_factory.py new file mode 100755 index 0000000..073668b --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_env_factory.py @@ -0,0 +1,573 @@ +# -*- coding: utf-8 -*- +""" +Isaac Lab-based simulation environment factory. +""" + +from __future__ import annotations +import copy +import json +import random +from pathlib import Path +from typing import Tuple, Optional, List, Dict + +import numpy as np +import transforms3d + +# Isaac Lab core +from isaaclab.utils import configclass +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.assets import AssetBaseCfg, RigidObjectCfg +from isaaclab.sim.schemas import schemas_cfg +import isaaclab.sim as sim_utils +from isaaclab.sensors.camera import CameraCfg +from isaaclab_assets import FRANKA_PANDA_HIGH_PD_CFG + +# Manager-based API (terms/configs) +from isaaclab.managers import ( + TerminationTermCfg as DoneTerm, + EventTermCfg as EventTerm, + ObservationGroupCfg as ObsGroup, + ObservationTermCfg as ObsTerm, + RewardTermCfg as RewTerm, + CurriculumTermCfg as CurrTerm, + SceneEntityCfg, +) + +# Task-specific MDP helpers (adjust path if needed) +import isaaclab_tasks.manager_based.manipulation.lift.mdp as mdp + +from dataclasses import dataclass, MISSING + +from dataclasses import dataclass +from typing import Optional, List, Dict + + +@dataclass +class SceneCtx: + cam_dict: Dict + obj_paths: List[str] + background_path: str + robot_pos: List[float] + robot_rot: List[float] + bg_physics: Optional[Dict] = None + obj_physics: List[Dict] = None + use_ground_plane: bool = False + ground_z: Optional[float] = None + + +_SCENE_CTX: Optional[SceneCtx] = None + +# ---- default physx presets ---- +DEFAULT_BG_PHYSICS = { + "mass_props": {"mass": 100.0}, + "rigid_props": {"disable_gravity": True, "kinematic_enabled": True}, + "collision_props": { + "collision_enabled": True, + "contact_offset": 0.0015, + "rest_offset": 0.0003, + "torsional_patch_radius": 0.02, + "min_torsional_patch_radius": 0.005, + }, +} +DEFAULT_OBJ_PHYSICS = { + "mass_props": {"mass": 0.5}, + "rigid_props": {"disable_gravity": False, "kinematic_enabled": False}, + "collision_props": { + "collision_enabled": True, + "contact_offset": 0.0015, + "rest_offset": 0.0003, + "torsional_patch_radius": 0.02, + "min_torsional_patch_radius": 0.005, + }, +} + + +def _deep_update(dst: dict, src: dict | None) -> dict: + """Recursive dict update without touching the original.""" + out = copy.deepcopy(dst) + if not src: + return out + for k, v in src.items(): + if isinstance(v, dict) and isinstance(out.get(k), dict): + out[k] = _deep_update(out[k], v) + else: + out[k] = v + return out + + +# -------------------------------------------------------------------------------------- +# Camera / Robot builders (use _SCENE_CTX) +# -------------------------------------------------------------------------------------- +def create_camera(): + """Return a CameraCfg using the global SceneCtx.""" + assert _SCENE_CTX is not None, ( + "init_scene_configs/init_scene_from_scene_dict must be called first." + ) + C = _SCENE_CTX.cam_dict + width = int(C["width"]) + height = int(C["height"]) + fx, fy, cx, cy = C["fx"], C["fy"], C["cx"], C["cy"] + cam_orientation = tuple(C["cam_orientation"]) + cam_pos = tuple(C["scene_info"]["move_to"]) + return CameraCfg( + prim_path="{ENV_REGEX_NS}/Camera", + offset=CameraCfg.OffsetCfg(pos=cam_pos, rot=cam_orientation, convention="ros"), + data_types=[ + "rgb", + "distance_to_image_plane", + "distance_to_camera", + "instance_id_segmentation_fast", + ], + colorize_instance_id_segmentation=False, + spawn=sim_utils.PinholeCameraCfg.from_intrinsic_matrix( + intrinsic_matrix=[fx, 0, cx, 0, fy, cy, 0, 0, 1], + width=width, + height=height, + ), + width=width, + height=height, + ) + + +def create_robot(): + """Return a configured Franka Panda config using the global SceneCtx.""" + assert _SCENE_CTX is not None, ( + "init_scene_configs/init_scene_from_scene_dict must be called first." + ) + robot = FRANKA_PANDA_HIGH_PD_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + robot.init_state.pos = tuple(_SCENE_CTX.robot_pos) + robot.init_state.rot = tuple(_SCENE_CTX.robot_rot) + return robot + + +# -------------------------------------------------------------------------------------- +# Dynamic InteractiveSceneCfg builder +# -------------------------------------------------------------------------------------- +def build_tabletop_scene_cfg(): + """ + Auto-generate a multi-object InteractiveSceneCfg subclass: + - background, camera, robot + - object_00, object_01, ... based on _SCENE_CTX.obj_paths + """ + assert _SCENE_CTX is not None, ( + "init_scene_configs/init_scene_from_scene_dict must be called first." + ) + C = _SCENE_CTX + + base_attrs = {} + + # Light + base_attrs["light"] = AssetBaseCfg( + prim_path="/World/lightDome", + spawn=sim_utils.DomeLightCfg(intensity=4000.0, color=(1.0, 1.0, 1.0)), + ) + + _bg = _deep_update(DEFAULT_BG_PHYSICS, C.bg_physics) + _objs = [ + _deep_update(DEFAULT_OBJ_PHYSICS, obj_physics) for obj_physics in C.obj_physics + ] + + bg_mass_cfg = schemas_cfg.MassPropertiesCfg(**_bg["mass_props"]) + bg_rigid_cfg = schemas_cfg.RigidBodyPropertiesCfg(**_bg["rigid_props"]) + bg_colli_cfg = schemas_cfg.CollisionPropertiesCfg(**_bg["collision_props"]) + + # add another ground plane (mainly for better visualization) + base_attrs["backgroundn"] = AssetBaseCfg( + prim_path="/World/defaultGroundPlane", + spawn=sim_utils.GroundPlaneCfg(), + ) + z = float(C.ground_z if C.ground_z is not None else 0.0) + base_attrs["backgroundn"].init_state.pos = (0.0, 0.0, z - 0.2) + + # ---------- Background ---------- + if C.use_ground_plane: + # Simple horizontal ground; only z is customized. + # Note: GroundPlaneCfg doesn't take mass/rigid/collision configs (it's a helper), + # so we only set pose here. + base_attrs["background"] = AssetBaseCfg( + prim_path="/World/defaultGroundPlane", + spawn=sim_utils.GroundPlaneCfg(), + ) + z = float(C.ground_z if C.ground_z is not None else 0.0) + base_attrs["background"].init_state.pos = (0.0, 0.0, z) + # no usd_path assignment in __post_init__ when using ground + else: + # original USD background + base_attrs["background"] = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Background", + spawn=sim_utils.UsdFileCfg( + usd_path="", + mass_props=bg_mass_cfg, + rigid_props=bg_rigid_cfg, + collision_props=bg_colli_cfg, + ), + ) + + # Placeholder entries to be replaced in __post_init__ + base_attrs["camera"] = CameraCfg(prim_path="{ENV_REGEX_NS}/Camera") + base_attrs["robot"] = FRANKA_PANDA_HIGH_PD_CFG.replace( + prim_path="{ENV_REGEX_NS}/Robot" + ) + + # Instantiate objects + for i, usd_path in enumerate(C.obj_paths): + obj_mass_cfg = schemas_cfg.MassPropertiesCfg(**_objs[i]["mass_props"]) + obj_rigid_cfg = schemas_cfg.RigidBodyPropertiesCfg(**_objs[i]["rigid_props"]) + obj_colli_cfg = schemas_cfg.CollisionPropertiesCfg( + **_objs[i]["collision_props"] + ) + + obj_template = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Object", + spawn=sim_utils.UsdFileCfg( + usd_path="", + mass_props=obj_mass_cfg, + rigid_props=obj_rigid_cfg, + collision_props=obj_colli_cfg, + ), + ) + + name = f"object_{i:02d}" + cfg_i = copy.deepcopy(obj_template) + cfg_i.prim_path = f"{{ENV_REGEX_NS}}/{name}" + cfg_i.spawn.usd_path = usd_path + base_attrs[name] = cfg_i + + # Inject background path + replace camera/robot on finalize + def __post_init__(self): + if not C.use_ground_plane: + self.background.spawn.usd_path = C.background_path + self.camera = create_camera() + self.robot = create_robot() + + attrs = dict(base_attrs) + attrs["__doc__"] = "Auto-generated multi-object TableTop scene cfg." + attrs["__post_init__"] = __post_init__ + + DynamicSceneCfg = configclass( + type("TableTopSceneCfgAuto", (InteractiveSceneCfg,), attrs) + ) + return DynamicSceneCfg + + +# -------------------------------------------------------------------------------------- +# Build cam_dict & init directly from a raw scene dict +# -------------------------------------------------------------------------------------- +def init_scene_from_scene_dict( + scene: dict, + cfgs: dict, + *, + use_ground_plane: bool = False, +): + """ + Initialize SceneCtx directly from a raw scene dict. + If robot pose not provided, sample one with robot_placement_candidates_v2(). + """ + cam_dict = cfgs["cam_cfg"] + obj_paths = [o["usd"] for o in scene["objects"].values()] + background_path = scene["background"]["usd"] + + # overwrite physics + # Priority: args > scene > default + obj_physics = cfgs["physics_cfg"]["obj_physics"] + bg_physics = cfgs["physics_cfg"]["bg_physics"] + if obj_physics is None: + obj_physics = [o.get("physics", None) for o in scene["objects"].values()] + elif isinstance(obj_physics, dict): + obj_physics = [obj_physics for _ in scene["objects"].values()] + elif isinstance(obj_physics, list): + assert len(obj_physics) == len(scene["objects"]), ( + "obj_physics must be a list of the same length as scene['objects'] if provided." + ) + pass + else: + raise TypeError("obj_physics must be None, a dict, or a list of dicts.") + bg_physics = ( + scene["background"].get("physics", None) if bg_physics is None else bg_physics + ) + + robot_pos = cfgs["robot_cfg"]["robot_pose"][:3] + robot_rot = cfgs["robot_cfg"]["robot_pose"][3:] + + ground_z = None + if use_ground_plane: + try: + ground_z = float(scene["plane"]["simulation"]["point"][2]) + except Exception as e: + raise ValueError( + f"use_ground_plane=True but scene['plane']['simulation'] missing/invalid: {e}" + ) + + # write global ctx (keep old fields the same) + global _SCENE_CTX + _SCENE_CTX = SceneCtx( + cam_dict=cam_dict, + obj_paths=obj_paths, + background_path=background_path, + robot_pos=list(robot_pos), + robot_rot=list(robot_rot), + bg_physics=bg_physics, + obj_physics=list(obj_physics), + use_ground_plane=use_ground_plane, + ground_z=ground_z, + ) + + return { + "cam_dict": cam_dict, + "obj_usd_paths": obj_paths, + "background_usd": background_path, + "robot_pos": robot_pos, + "robot_rot": robot_rot, + "use_ground_plane": use_ground_plane, + "ground_z": ground_z, + } + + +# -------------------------------------------------------------------------------------- +# Env factory +# -------------------------------------------------------------------------------------- +def _build_manip_env_cfg(scene_cfg_cls, *, num_envs: int, env_spacing: float = 2.5): + """Return a ManagerBasedRLEnvCfg subclass stitched together from sub-Cfgs.""" + from isaaclab.envs import ManagerBasedRLEnvCfg + from isaaclab.controllers.differential_ik_cfg import DifferentialIKControllerCfg + from isaaclab.envs.mdp.actions.actions_cfg import ( + DifferentialInverseKinematicsActionCfg, + ) + + @configclass + class ManipEnvCfg(ManagerBasedRLEnvCfg): + scene = scene_cfg_cls(num_envs=num_envs, env_spacing=env_spacing) + commands = CommandsCfg() + actions = ActionsCfg() + observations = ObservationsCfg() + events = EventCfg() + rewards = RewardsCfg() + terminations = TerminationsCfg() + curriculum = CurriculumCfg() + + def __post_init__(self): + # ---- Sim & PhysX ---- + self.decimation = 2 # 4 + self.episode_length_s = 5.0 + self.sim.dt = 0.01 + self.sim.render_interval = self.decimation + + physx = self.sim.physx + physx.enable_ccd = True + physx.solver_type = 1 # TGS + physx.num_position_iterations = 16 + physx.num_velocity_iterations = 2 + physx.contact_offset = 0.003 + physx.rest_offset = 0.0 + physx.max_depenetration_velocity = 0.5 + physx.enable_stabilization = True + physx.enable_sleeping = True + + # ---- IK arm & binary gripper ---- + self.actions.arm_action = DifferentialInverseKinematicsActionCfg( + asset_name="robot", + joint_names=["panda_joint.*"], + body_name="panda_hand", + controller=DifferentialIKControllerCfg( + command_type="pose", use_relative_mode=True, ik_method="dls" + ), + scale=0.5, + body_offset=DifferentialInverseKinematicsActionCfg.OffsetCfg( + pos=[0.0, 0.0, 0.107] + ), + ) + self.actions.gripper_action = mdp.BinaryJointPositionActionCfg( + asset_name="robot", + joint_names=["panda_finger.*"], + open_command_expr={"panda_finger_.*": 0.04}, + close_command_expr={"panda_finger_.*": 0.0}, + ) + self.commands.object_pose.body_name = "panda_hand" + + return ManipEnvCfg + + +def load_scene_json(key: str) -> dict: + """Return the raw scene dict from outputs//simulation/scene.json.""" + scene_path = Path.cwd() / "outputs" / key / "simulation" / "scene.json" + if not scene_path.exists(): + raise FileNotFoundError(scene_path) + return json.load(open(scene_path)) + + +def make_env( + cfgs: dict, + num_envs: int = 1, + device: str = "cuda:0", + bg_simplify: bool = False, +) -> Tuple["ManagerBasedRLEnv", "ManagerBasedRLEnvCfg"]: + """ + Public entry to construct a ManagerBasedRLEnv from outputs//simulation/scene.json. + Returns: (env, env_cfg) + """ + from isaaclab.envs import ManagerBasedRLEnv + + # Load scene json and initialize global SceneCtx + scene = cfgs["scene_cfg"] + init_scene_from_scene_dict( + scene, + cfgs=cfgs, + use_ground_plane=bg_simplify, + ) + + # Build scene & env cfg + SceneCfg = build_tabletop_scene_cfg() + ManipEnvCfg = _build_manip_env_cfg(SceneCfg, num_envs=num_envs, env_spacing=2.5) + env_cfg = ManipEnvCfg() + env_cfg.sim.device = device + env_cfg.scene.num_envs = num_envs # double safety + + env = ManagerBasedRLEnv(cfg=env_cfg) + return env, env_cfg + + +# -------------------------------------------------------------------------------------- +# Observation/Action/Reward/Termination/Curriculum config classes +# -------------------------------------------------------------------------------------- +@configclass +class CommandsCfg: + """Command terms for the MDP.""" + + object_pose = mdp.UniformPoseCommandCfg( + asset_name="robot", + body_name=MISSING, + resampling_time_range=(5.0, 5.0), + debug_vis=False, + ranges=mdp.UniformPoseCommandCfg.Ranges( + pos_x=(0.4, 0.6), + pos_y=(-0.25, 0.25), + pos_z=(0.25, 0.5), + roll=(0.0, 0.0), + pitch=(0.0, 0.0), + yaw=(0.0, 0.0), + ), + ) + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + arm_action: ( + mdp.JointPositionActionCfg | mdp.DifferentialInverseKinematicsActionCfg + ) = MISSING # type: ignore + gripper_action: mdp.BinaryJointPositionActionCfg = MISSING # type: ignore + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel) + joint_pos = ObsTerm(func=mdp.joint_pos) + joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel) + joint_vel = ObsTerm(func=mdp.joint_vel) + target_object_position = ObsTerm( + func=mdp.generated_commands, params={"command_name": "object_pose"} + ) + actions = ObsTerm(func=mdp.last_action) + rgb = ObsTerm( + func=mdp.image, + params={ + "sensor_cfg": SceneEntityCfg(name="camera"), + "data_type": "rgb", + "convert_perspective_to_orthogonal": False, + "normalize": False, + }, + ) + depth = ObsTerm( + func=mdp.image, + params={ + "sensor_cfg": SceneEntityCfg(name="camera"), + "data_type": "distance_to_image_plane", + "convert_perspective_to_orthogonal": False, + "normalize": False, + }, + ) + segmask = ObsTerm( + func=mdp.image, + params={ + "sensor_cfg": SceneEntityCfg(name="camera"), + "data_type": "instance_id_segmentation_fast", + "convert_perspective_to_orthogonal": False, + "normalize": False, + }, + ) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = False + + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset") + + +@configclass +class RewardsCfg: + """Reward terms for the MDP (kept simple).""" + + action_rate = RewTerm(func=mdp.action_rate_l2, weight=-1e-4) + joint_vel = RewTerm( + func=mdp.joint_vel_l2, + weight=-1e-4, + params={"asset_cfg": SceneEntityCfg("robot")}, + ) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + +@configclass +class CurriculumCfg: + """Curriculum terms for the MDP.""" + + action_rate = CurrTerm( + func=mdp.modify_reward_weight, + params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000}, + ) + joint_vel = CurrTerm( + func=mdp.modify_reward_weight, + params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000}, + ) + + +# Public symbols +__all__ = [ + # Scene init + "init_scene_configs", + "init_scene_from_scene_dict", + # Scene builders + "create_camera", + "create_robot", + "build_tabletop_scene_cfg", + # Placement samplers + "robot_placement_candidates", + "robot_placement_candidates_v2", + # Manager config groups + "CommandsCfg", + "ActionsCfg", + "ObservationsCfg", + "EventCfg", + "RewardsCfg", + "TerminationsCfg", + "CurriculumCfg", + # Env factory + "make_env", +] diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py new file mode 100755 index 0000000..8f29fb3 --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py @@ -0,0 +1,787 @@ +""" +Heuristic manipulation policy in Isaac Lab simulation. +Using grasping and motion planning to perform object manipulation tasks. +""" +from __future__ import annotations + +# ───────────────────────────────────────────────────────────────────────────── AppLauncher ───────────────────────────────────────────────────────────────────────────── +import argparse, os, json, random, transforms3d +from pathlib import Path +import numpy as np +import torch +import yaml +from isaaclab.app import AppLauncher + +# ───────────────────────────────────────────────────────────────────────────── CLI ───────────────────────────────────────────────────────────────────────────── +parser = argparse.ArgumentParser("sim_policy") +parser.add_argument("--key", type=str, default="demo_video", help="scene key (outputs/)") +parser.add_argument("--robot", type=str, default="franka") +parser.add_argument("--num_envs", type=int, default=1) +parser.add_argument("--num_trials", type=int, default=1) +parser.add_argument("--teleop_device", type=str, default="keyboard") +parser.add_argument("--sensitivity", type=float, default=1.0) +AppLauncher.add_app_launcher_args(parser) +args_cli = parser.parse_args() +args_cli.enable_cameras = True +args_cli.headless = False # headless mode for batch execution +app_launcher = AppLauncher(vars(args_cli)) +simulation_app = app_launcher.app + +# ───────────────────────────────────────────────────────────────────────────── Runtime imports ───────────────────────────────────────────────────────────────────────────── +import isaaclab.sim as sim_utils +from isaaclab.utils.math import subtract_frame_transforms + +from graspnetAPI.grasp import GraspGroup + + +# ───────────────────────────────────────────────────────────────────────────── Simulation environments ───────────────────────────────────────────────────────────────────────────── +from sim_base import BaseSimulator, get_next_demo_id +from sim_env_factory import make_env +from sim_preprocess.grasp_utils import get_best_grasp_with_hints +from sim_utils.transform_utils import pose_to_mat, mat_to_pose, grasp_to_world, grasp_approach_axis_batch +from sim_utils.sim_utils import load_sim_parameters + +BASE_DIR = Path.cwd() +img_folder = args_cli.key +out_dir = BASE_DIR / "outputs" + + +# ───────────────────────────────────────────────────────────────────────────── Heuristic Manipulation ───────────────────────────────────────────────────────────────────────────── +class HeuristicManipulation(BaseSimulator): + """ + Physical trial-and-error grasping with approach-axis perturbation: + • Multiple grasp proposals executed in parallel; + • Every attempt does reset → pre → grasp → close → lift → check; + • Early stops when any env succeeds; then re-exec for logging. + """ + def __init__(self, sim, scene, sim_cfgs: dict): + robot_pose = torch.tensor( + sim_cfgs["robot_cfg"]["robot_pose"], + dtype=torch.float32, + device=sim.device + ) + super().__init__( + sim=sim, sim_cfgs=sim_cfgs, scene=scene, args=args_cli, + robot_pose=robot_pose, cam_dict=sim_cfgs["cam_cfg"], + out_dir=out_dir, img_folder=img_folder, + enable_motion_planning=True, + set_physics_props=True, debug_level=0, + ) + + self.selected_object_id = sim_cfgs["demo_cfg"]["manip_object_id"] + self.traj_key = sim_cfgs["demo_cfg"]["traj_key"] + self.traj_path = sim_cfgs["demo_cfg"]["traj_path"] + self.goal_offset = [0, 0, sim_cfgs["demo_cfg"]["goal_offset"]] + self.grasp_path = sim_cfgs["demo_cfg"]["grasp_path"] + self.grasp_idx = sim_cfgs["demo_cfg"]["grasp_idx"] + self.grasp_pre = sim_cfgs["demo_cfg"]["grasp_pre"] + self.grasp_delta = sim_cfgs["demo_cfg"]["grasp_delta"] + self.load_obj_goal_traj() + + def load_obj_goal_traj(self): + """ + Load the relative trajectory Δ_w (T,4,4) and precompute the absolute + object goal trajectory for each env using the *actual current* object pose + in the scene as T_obj_init (not env_origin). + T_obj_goal[t] = Δ_w[t] @ T_obj_init + + Sets: + self.obj_rel_traj : np.ndarray or None, shape (T,4,4) + self.obj_goal_traj_w: np.ndarray or None, shape (B,T,4,4) + """ + # —— 1) Load Δ_w —— + rel = np.load(self.traj_path).astype(np.float32) + self.obj_rel_traj = rel # (T,4,4) + + self.reset() + + # —— 2) Read current object initial pose per env as T_obj_init —— + B = self.scene.num_envs + # obj_state = self.object_prim.data.root_com_state_w[:, :7] # [B,7], pos(3)+quat(wxyz)(4) + obj_state = self.object_prim.data.root_state_w[:, :7] # [B,7], pos(3)+quat(wxyz)(4) + self.show_goal(obj_state[:, :3], obj_state[:, 3:7]) + + obj_state_np = obj_state.detach().cpu().numpy().astype(np.float32) + offset_np = np.asarray(self.goal_offset, dtype=np.float32).reshape(3) + obj_state_np[:, :3] += offset_np # raise a bit to avoid collision + + # Note: here the relative traj Δ_w is defined in world frame with origin (0,0,0), + # Hence, we need to normalize it to each env's origin frame. + origins = self.scene.env_origins.detach().cpu().numpy().astype(np.float32) # (B,3) + obj_state_np[:, :3] -= origins # normalize to env origin frame + + # —— 3) Precompute absolute object goals for all envs —— + T = rel.shape[0] + obj_goal = np.zeros((B, T, 4, 4), dtype=np.float32) + for b in range(B): + T_init = pose_to_mat(obj_state_np[b, :3], obj_state_np[b, 3:7]) # (4,4) + for t in range(T): + goal = rel[t] @ T_init + goal[:3, 3] += origins[b] # back to world frame + obj_goal[b, t] = goal + + self.obj_goal_traj_w = obj_goal # [B, T, 4, 4] + + def follow_object_goals(self, start_joint_pos, sample_step=1, visualize=True): + """ + Follow precomputed object absolute trajectory with automatic restart on failure. + If motion planning fails, raises an exception to trigger re-grasp from step 0. + """ + B = self.scene.num_envs + obj_goal_all = self.obj_goal_traj_w # [B, T, 4, 4] + T = obj_goal_all.shape[1] + + ee_w = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7] # [B,7] + obj_w = self.object_prim.data.root_state_w[:, :7] + + T_ee_in_obj = [] + for b in range(B): + T_ee_w = pose_to_mat(ee_w[b, :3], ee_w[b, 3:7]) + T_obj_w = pose_to_mat(obj_w[b, :3], obj_w[b, 3:7]) + T_ee_in_obj.append((np.linalg.inv(T_obj_w) @ T_ee_w).astype(np.float32)) + + joint_pos = start_joint_pos + root_w = self.robot.data.root_state_w[:, 0:7] + + t_iter = list(range(0, T, sample_step)) + t_iter = t_iter + [T-1] if t_iter[-1] != T-1 else t_iter + + for t in t_iter: + goal_pos_list, goal_quat_list = [], [] + print(f"[INFO] follow object goal step {t}/{T}") + for b in range(B): + T_obj_goal = obj_goal_all[b, t] + T_ee_goal = T_obj_goal @ T_ee_in_obj[b] + pos_b, quat_b = mat_to_pose(T_ee_goal) + goal_pos_list.append(pos_b.astype(np.float32)) + goal_quat_list.append(quat_b.astype(np.float32)) + + goal_pos = torch.as_tensor(np.stack(goal_pos_list), dtype=torch.float32, device=self.sim.device) + goal_quat = torch.as_tensor(np.stack(goal_quat_list), dtype=torch.float32, device=self.sim.device) + + if visualize: + self.show_goal(goal_pos, goal_quat) + ee_pos_b, ee_quat_b = subtract_frame_transforms( + root_w[:, :3], root_w[:, 3:7], goal_pos, goal_quat + ) + + joint_pos, success = self.move_to(ee_pos_b, ee_quat_b, gripper_open=False) + + # Check for critical motion planning failure + if joint_pos is None or success is None: + print(f"[CRITICAL] Motion planning failed at step {t}/{T}") + print("[CRITICAL] Object likely dropped - need to restart from grasp") + raise RuntimeError("MotionPlanningFailure_RestartNeeded") + + if torch.any(success == False): + print(f"[WARN] Some envs failed motion planning at step {t}/{T}, but continuing...") + + self.save_dict["actions"].append(np.concatenate([ee_pos_b.cpu().numpy(), ee_quat_b.cpu().numpy(), np.ones((B, 1))], axis=1)) + + joint_pos = self.wait(gripper_open=True, steps=10) + return joint_pos + + + def viz_object_goals(self, sample_step=1, hold_steps=20): + self.reset() + self.wait(gripper_open=True, steps=10) + B = self.scene.num_envs + env_ids = torch.arange(B, device=self.object_prim.device, dtype=torch.long) + goals = self.obj_goal_traj_w + t_iter = list(range(0, goals.shape[1], sample_step)) + t_iter = t_iter + [goals.shape[1]-1] if t_iter[-1] != goals.shape[1]-1 else t_iter + for t in t_iter: + print(f"[INFO] viz object goal step {t}/{goals.shape[1]}") + pos_list, quat_list = [], [] + for b in range(B): + pos, quat = mat_to_pose(goals[b, t]) + pos_list.append(pos.astype(np.float32)) + quat_list.append(quat.astype(np.float32)) + pose = self.object_prim.data.root_state_w[:, :7] + # pose = self.object_prim.data.root_com_state_w[:, :7] + pose[:, :3] = torch.tensor(np.stack(pos_list), dtype=torch.float32, device=pose.device) + pose[:, 3:7] = torch.tensor(np.stack(quat_list), dtype=torch.float32, device=pose.device) + self.show_goal(pose[:, :3], pose[:, 3:7]) + + for _ in range(hold_steps): + self.object_prim.write_root_pose_to_sim(pose, env_ids=env_ids) + self.object_prim.write_data_to_sim() + self.step() + + # ---------- Helpers ---------- + def _to_base(self, pos_w: np.ndarray | torch.Tensor, quat_w: np.ndarray | torch.Tensor): + """World → robot base frame for all envs.""" + root = self.robot.data.root_state_w[:, 0:7] # [B,7] + p_w, q_w = self._ensure_batch_pose(pos_w, quat_w) + pb, qb = subtract_frame_transforms( + root[:, 0:3], root[:, 3:7], p_w, q_w + ) + return pb, qb # [B,3], [B,4] + + # ---------- Batched execution & lift-check ---------- + def execute_and_lift_once_batch(self, info: dict, lift_height=0.06, position_threshold=0.1) -> tuple[np.ndarray, np.ndarray]: + """ + Reset → pre → grasp → close → lift → hold; return (success[B], score[B]). + Now propagates motion planning failures by returning (None, None). + """ + B = self.scene.num_envs + self.reset() + + # open gripper buffer + jp = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids] + self.wait(gripper_open=True, steps=4) + + # pre-grasp + jp, success = self.move_to(info["pre_p_b"], info["pre_q_b"], gripper_open=True) + if jp is None or success is None: + print("[CRITICAL] Motion planning failed during pre-grasp in grasp trial") + return None, None + if torch.any(success==False): + return np.zeros(B, bool), np.zeros(B, np.float32) + jp = self.wait(gripper_open=True, steps=3) + + # grasp + jp, success = self.move_to(info["p_b"], info["q_b"], gripper_open=True) + if jp is None or success is None: + print("[CRITICAL] Motion planning failed during grasp approach in grasp trial") + return None, None + if torch.any(success==False): + return np.zeros(B, bool), np.zeros(B, np.float32) + jp = self.wait(gripper_open=True, steps=2) + + # close + jp = self.wait(gripper_open=False, steps=6) + + # initial heights + obj0 = self.object_prim.data.root_com_pos_w[:, 0:3] + ee_w = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7] + ee_p0 = ee_w[:, :3] + robot_ee_z0 = ee_p0[:, 2].clone() + obj_z0 = obj0[:, 2].clone() + print(f"[INFO] mean object z0={obj_z0.mean().item():.3f} m, mean EE z0={robot_ee_z0.mean().item():.3f} m") # ADD THIS BACK + + # lift: keep orientation, add height + ee_q = ee_w[:, 3:7] + target_p = ee_p0.clone() + target_p[:, 2] += lift_height + + root = self.robot.data.root_state_w[:, 0:7] + p_lift_b, q_lift_b = subtract_frame_transforms( + root[:, 0:3], root[:, 3:7], + target_p, ee_q + ) + jp, success = self.move_to(p_lift_b, q_lift_b, gripper_open=False) + + # Check for motion planning failure + if jp is None or success is None: + print("[CRITICAL] Motion planning failed during lift in grasp trial") + return None, None + + if torch.any(success==False): + return np.zeros(B, bool), np.zeros(B, np.float32) + jp = self.wait(gripper_open=False, steps=8) + + # final heights and success checking + obj1 = self.object_prim.data.root_com_pos_w[:, 0:3] + ee_w1 = self.robot.data.body_state_w[:, self.robot_entity_cfg.body_ids[0], 0:7] + robot_ee_z1 = ee_w1[:, 2] + obj_z1 = obj1[:, 2] + print(f"[INFO] mean object z1={obj_z1.mean().item():.3f} m, mean EE z1={robot_ee_z1.mean().item():.3f} m") # ADD THIS BACK + + # Lift check + ee_diff = robot_ee_z1 - robot_ee_z0 + obj_diff = obj_z1 - obj_z0 + lift_check = (torch.abs(ee_diff - obj_diff) <= 0.01) & \ + (torch.abs(ee_diff) >= 0.5 * lift_height) & \ + (torch.abs(obj_diff) >= 0.5 * lift_height) + + # Goal proximity check + final_goal_matrices = self.obj_goal_traj_w[:, -1, :, :] + goal_positions_np = final_goal_matrices[:, :3, 3] + goal_positions = torch.tensor(goal_positions_np, dtype=torch.float32, device=self.sim.device) + current_obj_pos = self.object_prim.data.root_state_w[:, :3] + distances = torch.norm(current_obj_pos - goal_positions, dim=1) + proximity_check = distances <= position_threshold + + # Combined check + lifted = lift_check & proximity_check + + # Score calculation + score = torch.zeros_like(ee_diff) + if torch.any(lifted): + base_score = 1000.0 + epsilon = 0.001 + score[lifted] = base_score / (distances[lifted] + epsilon) + + # ADD THESE DEBUG PRINTS BACK + print(f"[INFO] Lift check passed: {lift_check.sum().item()}/{B}") + print(f"[INFO] Proximity check passed: {proximity_check.sum().item()}/{B}") + print(f"[INFO] Final lifted flag (both checks): {lifted.sum().item()}/{B}") + + if B <= 10: # Detailed output for small batches + for b in range(B): + print(f" Env {b}: lift={lift_check[b].item()}, prox={proximity_check[b].item()}, " + f"dist={distances[b].item():.4f}m, score={score[b].item():.2f}") + else: + print(f"[INFO] Distance to goal - mean: {distances.mean().item():.4f}m, " + f"min: {distances.min().item():.4f}m, max: {distances.max().item():.4f}m") + + return lifted.detach().cpu().numpy().astype(bool), score.detach().cpu().numpy().astype(np.float32) + + def build_grasp_info( + self, + grasp_pos_w_batch: np.ndarray, # (B,3) GraspNet proposal in world frame + grasp_quat_w_batch: np.ndarray, # (B,4) wxyz + pre_dist_batch: np.ndarray, # (B,) + delta_batch: np.ndarray # (B,) + ) -> dict: + """ + return grasp info dict for all envs in batch. + """ + B = self.scene.num_envs + p_w = np.asarray(grasp_pos_w_batch, dtype=np.float32).reshape(B, 3) + q_w = np.asarray(grasp_quat_w_batch, dtype=np.float32).reshape(B, 4) + pre_d = np.asarray(pre_dist_batch, dtype=np.float32).reshape(B) + delt = np.asarray(delta_batch, dtype=np.float32).reshape(B) + + a_batch = grasp_approach_axis_batch(q_w) # (B,3) + + pre_p_w = (p_w - pre_d[:, None] * a_batch).astype(np.float32) + gra_p_w = (p_w + delt[:, None] * a_batch).astype(np.float32) + + origins = self.scene.env_origins.detach().cpu().numpy().astype(np.float32) # (B,3) + pre_p_w = pre_p_w + origins + gra_p_w = gra_p_w + origins + + pre_pb, pre_qb = self._to_base(pre_p_w, q_w) + gra_pb, gra_qb = self._to_base(gra_p_w, q_w) + + return { + "pre_p_w": pre_p_w, "p_w": gra_p_w, "q_w": q_w, + "pre_p_b": pre_pb, "pre_q_b": pre_qb, + "p_b": gra_pb, "q_b": gra_qb, + "pre_dist": pre_d, "delta": delt, + } + + + def grasp_trials(self, gg, std: float = 0.0005, max_retries: int = 3): + """ + Try grasp proposals with automatic recovery from motion planning failures. + Returns dict with success flag and chosen grasp parameters. + + Args: + gg: GraspGroup with grasp proposals + std: Standard deviation for random perturbations + max_retries: Number of retry attempts if motion planning fails + """ + B = self.scene.num_envs + idx_all = list(range(len(gg))) + if len(idx_all) == 0: + print("[ERR] empty grasp list.") + return { + "success": False, + "chosen_pose_w": None, + "chosen_pre": None, + "chosen_delta": None, + } + + rng = np.random.default_rng() + pre_dist_const = 0.12 # m + + # Retry loop for grasp selection phase + for retry_attempt in range(max_retries): + try: + print(f"\n{'='*60}") + print(f"GRASP SELECTION ATTEMPT {retry_attempt + 1}/{max_retries}") + print(f"{'='*60}\n") + + success = False + chosen_pose_w = None # (p_w, q_w) + chosen_pre = None + chosen_delta = None + + # Assign different grasp proposals to different envs + for start in range(0, len(idx_all), B): + block = idx_all[start : start + B] + if len(block) < B: + block = block + [block[-1]] * (B - len(block)) + + grasp_pos_w_batch, grasp_quat_w_batch = [], [] + for idx in block: + p_w, q_w = grasp_to_world(gg[int(idx)]) + grasp_pos_w_batch.append(p_w.astype(np.float32)) + grasp_quat_w_batch.append(q_w.astype(np.float32)) + grasp_pos_w_batch = np.stack(grasp_pos_w_batch, axis=0) # (B,3) + grasp_quat_w_batch = np.stack(grasp_quat_w_batch, axis=0) # (B,4) + self.show_goal(grasp_pos_w_batch, grasp_quat_w_batch) + + # Random disturbance along approach axis + pre_dist_batch = np.full((B,), pre_dist_const, dtype=np.float32) + delta_batch = rng.normal(0.0, std, size=(B,)).astype(np.float32) + + info = self.build_grasp_info(grasp_pos_w_batch, grasp_quat_w_batch, + pre_dist_batch, delta_batch) + + ok_batch, score_batch = self.execute_and_lift_once_batch(info) + + # Check if motion planning returned valid results + if ok_batch is None or score_batch is None: + print(f"[CRITICAL] Motion planning failed during grasp trial at block[{start}:{start+B}]") + raise RuntimeError("GraspTrialMotionPlanningFailure") + + ok_cnt = int(ok_batch.sum()) + print(f"[SEARCH] block[{start}:{start+B}] -> success {ok_cnt}/{B}") + + if ok_cnt > 0: + winner = int(np.argmax(score_batch)) + chosen_pose_w = (grasp_pos_w_batch[winner], grasp_quat_w_batch[winner]) + chosen_pre = float(pre_dist_batch[winner]) + chosen_delta = float(delta_batch[winner]) + success = True + print(f"[SUCCESS] Found valid grasp on attempt {retry_attempt + 1}") + return { + "success": success, + "chosen_pose_w": chosen_pose_w, + "chosen_pre": chosen_pre, + "chosen_delta": chosen_delta, + } + + # If we get here, no grasp succeeded in this attempt + if not success: + print(f"[WARN] No proposal succeeded in grasp trial attempt {retry_attempt + 1}/{max_retries}") + if retry_attempt < max_retries - 1: + print("[INFO] Retrying grasp selection...") + continue + else: + print("[ERR] No proposal succeeded to lift after all grasp trial attempts.") + return { + "success": False, + "chosen_pose_w": None, + "chosen_pre": None, + "chosen_delta": None, + } + + except RuntimeError as e: + if "GraspTrialMotionPlanningFailure" in str(e): + print(f"\n[RESTART] Motion planning failed during grasp trial on attempt {retry_attempt + 1}") + print(f"[RESTART] Remaining grasp trial attempts: {max_retries - retry_attempt - 1}\n") + + # Clear corrupted data and continue to next retry + self.clear_data() + + if retry_attempt < max_retries - 1: + continue + else: + print("[ERR] Grasp trial failed after all retry attempts") + return { + "success": False, + "chosen_pose_w": None, + "chosen_pre": None, + "chosen_delta": None, + } + else: + # Unknown error, re-raise + raise + + except Exception as e: + print(f"[ERROR] Unexpected error during grasp trial: {type(e).__name__}: {e}") + if retry_attempt < max_retries - 1: + print(f"[ERROR] Attempting grasp trial retry {retry_attempt + 1}/{max_retries}...") + self.clear_data() + continue + else: + return { + "success": False, + "chosen_pose_w": None, + "chosen_pre": None, + "chosen_delta": None, + } + + # Fallback (should not reach here) + return { + "success": False, + "chosen_pose_w": None, + "chosen_pre": None, + "chosen_delta": None, + } + + def is_success(self, position_threshold: float = 0.05) -> bool: + """ + Verify if the manipulation task succeeded by comparing final object position + with the goal position from the trajectory. + + Args: + position_threshold: Distance threshold in meters (default: 0.05m = 5cm) + + Returns: + bool: True if task succeeded, False otherwise + """ + # Get the final goal position from the precomputed trajectory + # self.obj_goal_traj_w has shape [B, T, 4, 4] + B = self.scene.num_envs + final_goal_matrices = self.obj_goal_traj_w[:, -1, :, :] # [B, 4, 4] - last timestep + + # Extract goal positions (translation part of the transform matrix) + goal_positions_np = final_goal_matrices[:, :3, 3] # [B, 3] - xyz positions (numpy) + + # Convert to torch tensor + goal_positions = torch.tensor(goal_positions_np, dtype=torch.float32, device=self.sim.device) + + # Get current object positions + current_obj_pos = self.object_prim.data.root_state_w[:, :3] # [B, 3] + + # Calculate distances + distances = torch.norm(current_obj_pos - goal_positions, dim=1) # [B] + + # Check if all environments succeeded + success_mask = distances <= position_threshold + + # Print results for each environment + print("\n" + "="*50) + print("TASK VERIFICATION RESULTS") + print("="*50) + for b in range(B): + status = "SUCCESS" if success_mask[b] else "FAILURE" + print(f"Env {b}: {status} (distance: {distances[b].item():.4f}m, threshold: {position_threshold}m)") + print(f" Goal position: [{goal_positions[b, 0].item():.3f}, {goal_positions[b, 1].item():.3f}, {goal_positions[b, 2].item():.3f}]") + print(f" Final position: [{current_obj_pos[b, 0].item():.3f}, {current_obj_pos[b, 1].item():.3f}, {current_obj_pos[b, 2].item():.3f}]") + + # Overall result + all_success = torch.all(success_mask).item() + print("="*50) + if all_success: + print("TASK VERIFIER: SUCCESS - All environments completed successfully!") + else: + success_count = torch.sum(success_mask).item() + print(f"TASK VERIFIER: FAILURE - {success_count}/{B} environments succeeded") + print("="*50 + "\n") + + return all_success + + + def inference(self, std: float = 0.0, max_restart_attempts: int = 10, + max_grasp_retries: int = 3, regrasp_after_failures: int = 3) -> bool: + """ + Main function with automatic restart on motion planning failures. + Now has separate retry loops for: + 1. Grasp selection phase (max_grasp_retries) + 2. Trajectory following phase (max_restart_attempts) + 3. Re-grasp after N consecutive trajectory failures (regrasp_after_failures) + """ + B = self.scene.num_envs + + # Read grasp proposals (only once) + npy_path = self.grasp_path + if npy_path is None or (not os.path.exists(npy_path)): + print(f"[ERR] grasps npy not found: {npy_path}") + return False + gg = GraspGroup().from_npy(npy_file_path=npy_path) + gg = get_best_grasp_with_hints(gg, point=None, direction=[0, 0, -1]) + + # Track consecutive trajectory failures for re-grasping + consecutive_trajectory_failures = 0 + total_grasp_attempts = 0 + max_total_grasp_attempts = 5 # Prevent infinite re-grasping loop + + # ========== OUTER LOOP: RE-GRASP IF NEEDED ========== + while total_grasp_attempts < max_total_grasp_attempts: + total_grasp_attempts += 1 + + # ========== PHASE 1: GRASP SELECTION (with retries) ========== + if self.grasp_idx >= 0: + # Fixed grasp index mode + if self.grasp_idx >= len(gg): + print(f"[ERR] grasp_idx {self.grasp_idx} out of range [0,{len(gg)})") + return False + print(f"[INFO] using fixed grasp index {self.grasp_idx} for all envs.") + p_w, q_w = grasp_to_world(gg[int(self.grasp_idx)]) + ret = { + "success": True, + "chosen_pose_w": (p_w.astype(np.float32), q_w.astype(np.float32)), + "chosen_pre": self.grasp_pre if self.grasp_pre is not None else 0.12, + "chosen_delta": self.grasp_delta if self.grasp_delta is not None else 0.0, + } + else: + # Automatic grasp selection with retries + if total_grasp_attempts > 1: + print(f"\n{'#'*60}") + print(f"RE-GRASP ATTEMPT {total_grasp_attempts}/{max_total_grasp_attempts}") + print(f"{'#'*60}\n") + ret = self.grasp_trials(gg, std=std, max_retries=max_grasp_retries) + + if not ret["success"]: + print("[ERR] No successful grasp found after all attempts") + return False + + print(f"\n[SUCCESS] Grasp selected: delta={ret['chosen_delta']:.4f}m") + + # Reset trajectory failure counter for new grasp + consecutive_trajectory_failures = 0 + + # ========== PHASE 2: TRAJECTORY FOLLOWING (with restarts) ========== + for restart_attempt in range(max_restart_attempts): + try: + print(f"\n{'='*60}") + print(f"TRAJECTORY EXECUTION ATTEMPT {restart_attempt + 1}/{max_restart_attempts}") + print(f"{'='*60}\n") + + # Prepare grasp info + p_win, q_win = ret["chosen_pose_w"] + p_all = np.repeat(p_win.reshape(1, 3), B, axis=0) + q_all = np.repeat(q_win.reshape(1, 4), B, axis=0) + pre_all = np.full((B,), ret["chosen_pre"], dtype=np.float32) + del_all = np.full((B,), ret["chosen_delta"], dtype=np.float32) + info_all = self.build_grasp_info(p_all, q_all, pre_all, del_all) + + # Reset and execute: open → pre → grasp → close → follow_object_goals + self.reset() + + # Save grasp poses relative to camera + cam_p = self.camera.data.pos_w + cam_q = self.camera.data.quat_w_ros + gp_w = torch.as_tensor(info_all["p_w"], dtype=torch.float32, device=self.sim.device) + gq_w = torch.as_tensor(info_all["q_w"], dtype=torch.float32, device=self.sim.device) + pre_w = torch.as_tensor(info_all["pre_p_w"], dtype=torch.float32, device=self.sim.device) + gp_cam, gq_cam = subtract_frame_transforms(cam_p, cam_q, gp_w, gq_w) + pre_cam, pre_qcm = subtract_frame_transforms(cam_p, cam_q, pre_w, gq_w) + self.save_dict["grasp_pose_cam"] = torch.cat([gp_cam, gq_cam], dim=1).unsqueeze(0).cpu().numpy() + self.save_dict["pregrasp_pose_cam"] = torch.cat([pre_cam, pre_qcm], dim=1).unsqueeze(0).cpu().numpy() + + jp = self.robot.data.joint_pos[:, self.robot_entity_cfg.joint_ids] + self.wait(gripper_open=True, steps=4) + + # Pre-grasp + print("[INFO] Moving to pre-grasp position...") + jp, success = self.move_to(info_all["pre_p_b"], info_all["pre_q_b"], gripper_open=True) + if jp is None or torch.any(success == False): + print("[WARN] Pre-grasp motion failed, restarting...") + consecutive_trajectory_failures += 1 + if consecutive_trajectory_failures >= regrasp_after_failures: + break # Exit to re-grasp + continue + self.save_dict["actions"].append(np.concatenate([info_all["pre_p_b"].cpu().numpy(), info_all["pre_q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1)) + jp = self.wait(gripper_open=True, steps=3) + + # Grasp + print("[INFO] Moving to grasp position...") + jp, success = self.move_to(info_all["p_b"], info_all["q_b"], gripper_open=True) + if jp is None or torch.any(success == False): + print("[WARN] Grasp motion failed, restarting...") + consecutive_trajectory_failures += 1 + if consecutive_trajectory_failures >= regrasp_after_failures: + break # Exit to re-grasp + continue + self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1)) + + # Close gripper + print("[INFO] Closing gripper...") + jp = self.wait(gripper_open=False, steps=50) + self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.ones((B, 1))], axis=1)) + + # Follow object trajectory - THIS CAN RAISE RuntimeError + print("[INFO] Following object trajectory...") + jp = self.follow_object_goals(jp, sample_step=5, visualize=True) + + # If we reach here, trajectory following succeeded! + print("[SUCCESS] Trajectory completed successfully!") + + # Verify task completion + task_success = self.is_success(position_threshold=0.1) + if task_success: + print("[SUCCESS] Task verification passed!") + self.save_data() + return True + else: + print("[WARN] Task verification failed, but trajectory completed. Saving anyway...") + self.save_data() + return True + + except RuntimeError as e: + if "MotionPlanningFailure_RestartNeeded" in str(e): + print(f"\n[RESTART] Motion planning failed mid-trajectory on attempt {restart_attempt + 1}") + print("[RESTART] Object was likely dropped. Restarting from grasp phase...") + print(f"[RESTART] Remaining attempts: {max_restart_attempts - restart_attempt - 1}\n") + + # Clear the corrupted trajectory data + self.clear_data() + consecutive_trajectory_failures += 1 + + # Check if we need to re-grasp + if consecutive_trajectory_failures >= regrasp_after_failures: + print(f"\n{'!'*60}") + print(f"CONSECUTIVE TRAJECTORY FAILURES: {consecutive_trajectory_failures}") + print(f"RE-SELECTING GRASP (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})") + print(f"{'!'*60}\n") + break # Exit trajectory loop to re-grasp + + # Continue to next restart attempt + continue + else: + # Unknown error, re-raise + raise + + except Exception as e: + print(f"[ERROR] Unexpected error during execution: {type(e).__name__}: {e}") + print(f"[ERROR] Attempting restart {restart_attempt + 1}/{max_restart_attempts}...") + self.clear_data() + consecutive_trajectory_failures += 1 + + # Check if we need to re-grasp + if consecutive_trajectory_failures >= regrasp_after_failures: + print(f"\n{'!'*60}") + print(f"CONSECUTIVE TRAJECTORY FAILURES: {consecutive_trajectory_failures}") + print(f"RE-SELECTING GRASP (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})") + print(f"{'!'*60}\n") + break # Exit trajectory loop to re-grasp + + continue + + # Check why we exited the trajectory loop + if consecutive_trajectory_failures >= regrasp_after_failures: + # We broke out to re-grasp + if total_grasp_attempts >= max_total_grasp_attempts: + print(f"\n[FAILURE] Exhausted all grasp attempts ({max_total_grasp_attempts})") + return False + # Continue outer loop to select new grasp + continue + else: + # We exhausted all restart attempts without hitting re-grasp threshold + print(f"\n[FAILURE] Failed after {max_restart_attempts} trajectory restart attempts") + return False + + # If we exit the outer loop, we exhausted all grasp attempts + print(f"\n[FAILURE] Failed after {max_total_grasp_attempts} total grasp selection attempts") + return False + +# ───────────────────────────────────────────────────────────────────────────── Entry Point ───────────────────────────────────────────────────────────────────────────── +def main(): + sim_cfgs = load_sim_parameters(BASE_DIR, args_cli.key) + env, _ = make_env( + cfgs=sim_cfgs, num_envs=args_cli.num_envs, + device=args_cli.device, + bg_simplify=False, + ) + sim, scene = env.sim, env.scene + + my_sim = HeuristicManipulation(sim, scene, sim_cfgs=sim_cfgs) + + demo_root = (out_dir / img_folder / "demos").resolve() + + for _ in range(args_cli.num_trials): + + robot_pose = torch.tensor(sim_cfgs["robot_cfg"]["robot_pose"], dtype=torch.float32, device=my_sim.sim.device) # [7], pos(3)+quat(wxyz)(4) + my_sim.set_robot_pose(robot_pose) + my_sim.demo_id = get_next_demo_id(demo_root) + my_sim.reset() + print(f"[INFO] start simulation demo_{my_sim.demo_id}") + # Note: if you set viz_object_goals(), remember to disable gravity and collision for object + # my_sim.viz_object_goals(sample_step=10, hold_steps=40) + my_sim.inference() + + env.close() + simulation_app.close() + +if __name__ == "__main__": + main() + os.system("quit()") + simulation_app.close() \ No newline at end of file diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_generation.py b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_generation.py new file mode 100755 index 0000000..f932334 --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_generation.py @@ -0,0 +1,252 @@ +# grasp_det_full_all.py (batch keys) +from __future__ import annotations +import argparse, json, os +from pathlib import Path +import numpy as np +import trimesh +import open3d as o3d +import yaml + +from grasp_utils import MyGraspNet + + +def sample_surface(glb_path: str, n_points: int = 5000) -> np.ndarray: + """Load mesh and sample surface points.""" + mesh = trimesh.load_mesh(glb_path) + if isinstance(mesh, trimesh.Scene): + mesh = list(mesh.geometry.values())[0] + pts, _ = trimesh.sample.sample_surface(mesh, n_points) + return np.array(pts, dtype=np.float32) + + +def crop_pointcloud_quadrant( + points, + quadrant=1, + axes="xy", +): + # Map axis letters to indices + axis_index = {"x": 0, "y": 1, "z": 2} + a1, a2 = axes[0], axes[1] + idx1, idx2 = axis_index[a1], axis_index[a2] + + center_full = points.mean(axis=0) + + c1, c2 = center_full[idx1], center_full[idx2] + + ge = np.greater # > + + # Build boolean masks for the four quadrants + # Right/Left split along axis1, Top/Bottom split along axis2 + right = ge(points[:, idx1], c1) # axis1 >= c1 (or > c1) + top = ge(points[:, idx2], c2) # axis2 >= c2 (or > c2) + + # Quadrant mapping per convention documented above + if quadrant == 1: + mask = right & top + elif quadrant == 2: + mask = (~right) & top + elif quadrant == 3: + mask = (~right) & (~top) + elif quadrant == 4: + mask = right & (~top) + else: + raise ValueError("`quadrant` must be an integer in {1,2,3,4}.") + + cropped_points = points[mask] + return cropped_points, mask + + +def read_score_safe(gg, i: int, fallback: float) -> float: + """Try to read candidate score; fall back to a synthetic rank-based value.""" + try: + return float(getattr(gg[i], "score")) + except Exception: + pass + try: + return float(gg.grasp_group_array[i][0]) + except Exception: + return fallback + + +def grasps_to_pointcloud( + gg, pts_per_gripper: int = 400, color=(1.0, 0.0, 0.0) +) -> o3d.geometry.PointCloud: + """Sample each gripper mesh to points and tint with color.""" + geoms = gg.to_open3d_geometry_list() # list of TriangleMesh + out = o3d.geometry.PointCloud() + for g in geoms: + out += g.sample_points_uniformly(pts_per_gripper) + if len(out.points) > 0: + out.colors = o3d.utility.Vector3dVector( + np.tile(np.array(color, dtype=np.float32), (len(out.points), 1)) + ) + return out + + +def save_vis_ply( + points_xyz: np.ndarray, + gg, + save_path: Path, + pts_per_gripper: int = 400, + cloud_color=(0.0, 1.0, 0.0), +): + """Write a PLY: green object cloud + red ALL grasp candidates.""" + cloud = o3d.geometry.PointCloud() + cloud.points = o3d.utility.Vector3dVector(points_xyz.astype(np.float32)) + cloud.colors = o3d.utility.Vector3dVector( + np.tile(np.array(cloud_color, dtype=np.float32), (len(cloud.points), 1)) + ) + grasp_pcd = grasps_to_pointcloud( + gg, pts_per_gripper=pts_per_gripper, color=(1.0, 0.0, 0.0) + ) + merged = cloud + grasp_pcd + save_path.parent.mkdir(parents=True, exist_ok=True) + o3d.io.write_point_cloud(str(save_path), merged) + print(f"[VIS] saved {save_path}") + + +def process_one_object( + gg_net: MyGraspNet, + obj_idx: int, + obj_dict: dict, + proposals_dir: Path, + keep: int | None, + nms: bool, + n_points: int, + overwrite: bool, + vis_pts_per_gripper: int = 400, +) -> str | None: + """ + Build full point cloud for one object, run GraspNet, export all candidates to NPZ, + save a PLY visualization, and return the NPZ path (string). + """ + glb_path = obj_dict["optimized"] + if glb_path is None or (not os.path.exists(glb_path)): + print( + f"[WARN][{obj_dict['oid']}_{obj_dict['name']}] mesh not found -> {glb_path}" + ) + return None + + npy_path = proposals_dir / f"{obj_dict['oid']}_{obj_dict['name']}_grasp.npy" + vis_path = proposals_dir / f"{obj_dict['oid']}_{obj_dict['name']}_grasp_viz.ply" + + # Build cloud & inference + pcs = sample_surface(glb_path, n_points=n_points) + # pcs = crop_pointcloud_quadrant(pcs, quadrant=3, axes="xy")[0] + pcs[..., 2] *= -1.0 # keep identical convention + gg = gg_net.inference(pcs) + if nms: + gg = gg.nms() + gg = gg.sort_by_score() + if keep is not None and len(gg) > keep: + gg = gg[:keep] + if len(gg) == 0: + print(f"[WARN][{obj_dict['oid']}_{obj_dict['name']}] zero proposals") + return None + + # Save visualization + save_vis_ply(pcs, gg, vis_path, pts_per_gripper=vis_pts_per_gripper) + + for g_i in range(len(gg)): + translation = gg[g_i].translation + rotation = gg[g_i].rotation_matrix + translation = np.array([translation[0], translation[1], -translation[2]]) + rotation[2, :] = -rotation[2, :] + rotation[:, 2] = -rotation[:, 2] + gg.grasp_group_array[g_i][13:16] = translation + gg.grasp_group_array[g_i][4:13] = rotation.reshape(-1) + + gg.save_npy(npy_path) + print( + f"[OK][{obj_dict['oid']}_{obj_dict['name']}] saved {len(gg.grasp_group_array)} -> {npy_path.name}" + ) + + return str(npy_path.resolve()) + + +def run_for_key( + key: str, + n_points: int, + keep: int | None, + nms: bool, + overwrite: bool, + vis_pts_per_gripper: int, +): + base_dir = Path.cwd() + out_dir = base_dir / "outputs" + scene_json = out_dir / key / "simulation" / "scene.json" + if not scene_json.exists(): + raise FileNotFoundError(scene_json) + + # Load scene.json once + scene_dict = json.load(open(scene_json)) + objects = scene_dict.get("objects", {}) + if not isinstance(objects, dict) or len(objects) == 0: + print(f"[WARN][{key}] scene_dict['objects'] is empty.") + return + + # Prepare save dir + key_dir = out_dir / key + proposals_dir = key_dir / "grasps" + proposals_dir.mkdir(parents=True, exist_ok=True) + + # Single GraspNet instance reused for all objects + net = MyGraspNet() + + # Loop all objects + for i, obj in objects.items(): + npy_path = process_one_object( + gg_net=net, + obj_idx=i, + obj_dict=obj, + proposals_dir=proposals_dir, + keep=keep, + nms=nms, + n_points=n_points, + overwrite=overwrite, + vis_pts_per_gripper=vis_pts_per_gripper, + ) + if npy_path is not None: + scene_dict["objects"][i]["grasps"] = npy_path + with open(scene_json, "w") as f: + json.dump(scene_dict, f, indent=2) + print( + f"[OK][{key}] scene.json updated with 'grasps' for {obj['oid']}_{obj['name']}." + ) + + +def main(): + parser = argparse.ArgumentParser( + "Export grasp proposals for ALL objects (batch over keys) and write paths into scene.json" + ) + parser.add_argument("--n_points", type=int, default=100000) + parser.add_argument("--keep", type=int, default=None) # set None to keep all + parser.add_argument("--nms", type=bool, default=True) + parser.add_argument("--overwrite", action="store_true") + parser.add_argument( + "--vis_pts_per_gripper", + type=int, + default=400, + help="points sampled per gripper mesh for PLY", + ) + args = parser.parse_args() + + # load keys from YAML + cfg_path = Path.cwd() / "config" / "config.yaml" + cfg = yaml.safe_load(cfg_path.open("r")) + keys = cfg["keys"] + + for key in keys: + print(f"\n========== [GraspDet] Processing key: {key} ==========") + run_for_key( + key=key, + n_points=args.n_points, + keep=args.keep, + nms=args.nms, + overwrite=args.overwrite, + vis_pts_per_gripper=args.vis_pts_per_gripper, + ) + + +if __name__ == "__main__": + main() diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_utils.py b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_utils.py new file mode 100755 index 0000000..2ceea0d --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/grasp_utils.py @@ -0,0 +1,391 @@ +import os +import sys +import numpy as np +import time +import torch +from typing import List +import open3d as o3d +from graspnetAPI import GraspGroup +from pathlib import Path +base_dir = Path.cwd() +from graspness_unofficial.models.graspnet import GraspNet, pred_decode +from graspness_unofficial.dataset.graspnet_dataset import minkowski_collate_fn +from graspness_unofficial.utils.collision_detector import ModelFreeCollisionDetector +from graspness_unofficial.utils.data_utils import CameraInfo, create_point_cloud_from_depth_image, get_workspace_mask +from scipy.spatial.transform import Rotation as R + +grasp_cfgs = { + "save_files": False, + "checkpoint_path": "/home/isaac/third_party/graspness_unofficial/ckpt/minkuresunet_kinect.tar", #hardcode path + "seed_feat_dim": 512, + "camera": "kinect", + "num_point": 80000, + "batch_size": 1, + "voxel_size": 0.001, # 0.005 + "collision_thresh": 0.00001, + "voxel_size_cd": 0.01, # 0.01 + "infer": True, +} + +def my_worker_init_fn(worker_id): + np.random.seed(np.random.get_state()[1][0] + worker_id) + pass + +class MyGraspNet(): + def __init__(self): + cfgs = grasp_cfgs + self.cfgs = cfgs + self.net = GraspNet(seed_feat_dim=cfgs["seed_feat_dim"], is_training=False) + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self.net.to(self.device) + # Load checkpoint + checkpoint = torch.load(cfgs["checkpoint_path"]) + self.net.load_state_dict(checkpoint['model_state_dict']) + start_epoch = checkpoint['epoch'] + print("-> loaded checkpoint %s (epoch: %d)" % (cfgs["checkpoint_path"], start_epoch)) + + self.net.eval() + + def inference(self, pcs): + + data_dict = {'point_clouds': pcs.astype(np.float32), + 'coors': pcs.astype(np.float32) / self.cfgs["voxel_size"], + 'feats': np.ones_like(pcs).astype(np.float32)} + batch_data = minkowski_collate_fn([data_dict]) + tic = time.time() + for key in batch_data: + if 'list' in key: + for i in range(len(batch_data[key])): + for j in range(len(batch_data[key][i])): + batch_data[key][i][j] = batch_data[key][i][j].to(self.device) + else: + batch_data[key] = batch_data[key].to(self.device) + + # Forward pass + with torch.no_grad(): + end_points = self.net(batch_data) + grasp_preds = pred_decode(end_points) + + preds = grasp_preds[0].detach().cpu().numpy() + gg = GraspGroup(preds) + + # collision detection + if self.cfgs["collision_thresh"] > 0: + cloud = data_dict['point_clouds'] + mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=self.cfgs["voxel_size_cd"]) + collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=self.cfgs["collision_thresh"]) + gg = gg[~collision_mask] + + toc = time.time() + # print('inference time: %fs' % (toc - tic)) + return gg + +def inference(cfgs, data_input): + batch_data = minkowski_collate_fn([data_input]) + net = GraspNet(seed_feat_dim=cfgs["seed_feat_dim"], is_training=False) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + net.to(device) + + # Load checkpoint + checkpoint = torch.load(cfgs["checkpoint_path"]) + net.load_state_dict(checkpoint['model_state_dict']) + start_epoch = checkpoint['epoch'] + print("-> loaded checkpoint %s (epoch: %d)" % (cfgs["checkpoint_path"], start_epoch)) + + net.eval() + tic = time.time() + + for key in batch_data: + if 'list' in key: + for i in range(len(batch_data[key])): + for j in range(len(batch_data[key][i])): + batch_data[key][i][j] = batch_data[key][i][j].to(device) + else: + batch_data[key] = batch_data[key].to(device) + + # Forward pass + with torch.no_grad(): + end_points = net(batch_data) + grasp_preds = pred_decode(end_points) + + preds = grasp_preds[0].detach().cpu().numpy() + gg = GraspGroup(preds) + # collision detection + if cfgs["collision_thresh"] > 0: + cloud = data_input['point_clouds'] + mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs["voxel_size_cd"]) + collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs["collision_thresh"]) + gg = gg[~collision_mask] + + toc = time.time() + # print('inference time: %fs' % (toc - tic)) + return gg + +def pointcloud_to_grasp(cfgs, pcs): + data_dict = {'point_clouds': pcs.astype(np.float32), + 'coors': pcs.astype(np.float32) / cfgs["voxel_size"], + 'feats': np.ones_like(pcs).astype(np.float32)} + gg = inference(cfgs, data_dict) + return gg + +def vis_grasp(pcs, gg): + gg = gg.nms() + gg = gg.sort_by_score() + keep = 1 + if gg.__len__() > keep: + gg = gg[:keep] + grippers = gg.to_open3d_geometry_list() + cloud = o3d.geometry.PointCloud() + cloud.points = o3d.utility.Vector3dVector(pcs.astype(np.float32)) + o3d.visualization.draw_geometries([cloud, *grippers]) + +def grasp_to_pointcloud(grippers, gripper_points=1000, gripper_color=[1, 0, 0]): + grippers_pcd = o3d.geometry.PointCloud() + for gripper in grippers: + g_pcd = gripper.sample_points_uniformly(gripper_points) + grippers_pcd += g_pcd + grippers_pcd.colors = o3d.utility.Vector3dVector(np.tile(np.array([gripper_color]), (len(grippers_pcd.points), 1))) + return grippers_pcd + + +def vis_save_grasp(points, gg, best_grasp, save_path, colors=None, grasp_position=None, place_position=None): + # visualize grasp pos, place pos, grasp poses, and pcd + cloud = o3d.geometry.PointCloud() + cloud.points = o3d.utility.Vector3dVector(points.astype(np.float32)) + + if colors is None: + cloud.colors = o3d.utility.Vector3dVector(np.tile(np.array([[0, 1, 0]]), (len(cloud.points), 1))) + elif isinstance(colors, np.ndarray): + cloud.colors = o3d.utility.Vector3dVector(colors.astype(np.float32)) + elif isinstance(colors, list): + cloud.colors = o3d.utility.Vector3dVector(np.tile(np.array([colors]), (len(cloud.points), 1))) + + if type(gg) == GraspGroup: + pcd_w_grasp = grasp_to_pointcloud(gg.to_open3d_geometry_list()) + else: + pcd_w_grasp = grasp_to_pointcloud([gg.to_open3d_geometry()]) + + pcd_best_grasp = grasp_to_pointcloud([best_grasp.to_open3d_geometry()], gripper_color=[0, 0, 1]) + pcd_w_grasp += pcd_best_grasp + + if grasp_position is not None and place_position is not None: + pick_pcd = o3d.geometry.PointCloud() + place_pcd = o3d.geometry.PointCloud() + pick_pcd.points = o3d.utility.Vector3dVector(np.array(grasp_position).reshape(1,3).astype(np.float32)) + place_pcd.points = o3d.utility.Vector3dVector(np.array(place_position).reshape(1,3).astype(np.float32)) + pick_pcd.colors = place_pcd.colors = o3d.utility.Vector3dVector(np.array([[0,0,1]]).astype(np.float32)) + pcd_w_grasp = pcd_w_grasp + pick_pcd + place_pcd + + pcd_w_grasp += cloud + + o3d.io.write_point_cloud(save_path, pcd_w_grasp) + +def get_pose_from_grasp(best_grasp): + grasp_position = best_grasp.translation + # convert rotation to isaacgym convention + delta_m = np.array([[0, 0, 1], [0, -1, 0], [1, 0, 0]]) + rotation = np.dot(best_grasp.rotation_matrix, delta_m) + quaternion_grasp = R.from_matrix(rotation).as_quat() + quaternion_grasp = np.array([quaternion_grasp[3],quaternion_grasp[0],quaternion_grasp[1],quaternion_grasp[2]]) + rotation_unit_vect = rotation[:,2] + # grasp_position -= 0.03 * rotation_unit_vect + return grasp_position, quaternion_grasp, rotation_unit_vect + +def get_best_grasp(gg, position, max_dis=0.05): + # get best grasp pose around the grasp position + best_grasp = None + for g in gg: + if np.linalg.norm(g.translation - position) < max_dis: + if best_grasp is None: + best_grasp = g + else: + if g.score > best_grasp.score: + best_grasp = g + if best_grasp is None: + best_grasp = gg[0] + return best_grasp + +def get_best_grasp_z_aligned(gg): + # get best grasp pose that is facing the -z axis (for top-down grasping) + best_grasp = None + best_angle = np.inf + + gravity_direction = np.array([0, 0, -1]) # -Z direction in world coordinates + + for g in gg: + + # approach vector is +X axis of the grasp frame, make it close to the -Z axis in the world frame + approach_vector = g.rotation_matrix[:, 0] + approach_vector /= np.linalg.norm(approach_vector) + + # compute the angle between the approach vector and the gravity direction + angle = np.arccos(np.clip(np.dot(approach_vector, gravity_direction), -1.0, 1.0)) + + if angle < best_angle: + best_angle = angle + best_grasp = g + elif np.isclose(angle, best_angle) and g.score > best_grasp.score: + best_grasp = g + + if best_grasp is None: + print("No best grasp found, falling back to the first grasp.") + best_grasp = gg[0] # fallback + + return best_grasp + +def get_best_grasp_z_aligned_and_y_aligned(gg, + desired_approach_dir=np.array([0, 0, -1]), + desired_width_dir=np.array([0, 1, 0]), + angle_tolerance=0.2): + """ + Pick the grasp whose: + 1) X-axis (approach) is closest to desired_approach_dir (default -Z), + 2) Y-axis (gripper width) is closest to desired_width_dir (default +Y), + 3) Score is highest if angles tie. + + Grasp coordinate system: + - rotation_matrix[:, 0] → X-axis = Approach direction + - rotation_matrix[:, 1] → Y-axis = Gripper width direction + - rotation_matrix[:, 2] → Z-axis = Depth/thickness direction + """ + + best_grasp = None + # Track best angles and score + best_approach_angle = np.inf + best_width_angle = np.inf + best_score = -np.inf + + # Normalize desired directions + desired_approach_dir = desired_approach_dir / np.linalg.norm(desired_approach_dir) + desired_width_dir = desired_width_dir / np.linalg.norm(desired_width_dir) + + for g in gg: + # 1) Approach vector angle + approach_vec = g.rotation_matrix[:, 0] + approach_vec /= np.linalg.norm(approach_vec) + approach_angle = angle_between(approach_vec, desired_approach_dir) + + # 2) Width vector angle + width_vec = g.rotation_matrix[:, 1] + width_vec /= np.linalg.norm(width_vec) + width_angle = angle_between(width_vec, desired_width_dir) + + # 3) Compare to the "best" so far in a hierarchical manner + if approach_angle < best_approach_angle: + # Definitely better in terms of approach alignment => choose this + best_approach_angle = approach_angle + best_width_angle = width_angle + best_score = g.score + best_grasp = g + elif np.isclose(approach_angle, best_approach_angle, atol=angle_tolerance): + # Approach angles are essentially tied, compare width alignment + if width_angle < best_width_angle: + best_width_angle = width_angle + best_score = g.score + best_grasp = g + elif np.isclose(width_angle, best_width_angle, atol=angle_tolerance): + # Both angles tied, pick the higher score + if g.score > best_score: + best_score = g.score + best_grasp = g + + if best_grasp is None and len(gg) > 0: + print("No valid grasp found using angle criteria. Falling back to the first grasp.") + best_grasp = gg[0] + + return best_grasp + + +def get_best_grasp_with_hints(gg: GraspGroup, point: List[float] = None, direction: List[float] = None): + """ + Rescore all grasps using optional spatial and directional hints, then return a new + GraspGroup sorted by this custom score (best first). Does NOT mutate the original gg. + + Scoring terms in [0, 1], combined by a weighted sum: + - dir_term: alignment between grasp approach (+X axis) and `direction` + - pt_term : proximity to `point` (RBF over distance) + - net_term: original network score normalized over gg + + Args: + gg: GraspGroup from graspnetAPI. + point: (3,) world point. If provided, grasps closer to this point are preferred. + direction: (3,) world direction. If provided, grasps whose approach (+X) aligns + with this direction are preferred. + + Returns: + GraspGroup: a *new* group sorted by the custom score (descending). + The best guess is result[0]. + """ + # --- Early exits --- + if gg is None or len(gg) == 0: + return gg + + # Internal weights (you can tweak if needed) + w_dir = 1.0 + w_pt = 1.0 + w_net = 0 + + # If hints are missing, zero-out the corresponding weights + if point is None: + w_pt = 0.0 + if direction is None or (np.asarray(direction).shape != (3,)): + w_dir = 0.0 + + # Length-scale for the point proximity (meters). Similar to your 0.05 window. + sigma = 0.05 + sigma2 = max(sigma * sigma, 1e-12) + + # --- Gather per-grasp attributes --- + translations = [] + approach_axes = [] # grasp frame +X + net_scores = [] + for g in gg: + translations.append(g.translation.astype(np.float64)) + # Normalize +X axis as approach direction + ax = g.rotation_matrix[:, 0].astype(np.float64) + n = np.linalg.norm(ax) + approach_axes.append(ax / n if n > 0 else np.array([1.0, 0.0, 0.0], dtype=np.float64)) + net_scores.append(float(g.score)) + + translations = np.vstack(translations) # (N,3) + approach_axes = np.vstack(approach_axes) # (N,3) + net_scores = np.asarray(net_scores, dtype=np.float64) # (N,) + + # --- Normalize original network scores to [0,1] --- + if np.isfinite(net_scores).all() and (net_scores.max() > net_scores.min()): + net_term = (net_scores - net_scores.min()) / (net_scores.max() - net_scores.min()) + else: + net_term = np.zeros_like(net_scores) + + # --- Direction alignment term (cosine mapped to [0,1]) --- + if w_dir > 0.0: + d = np.asarray(direction, dtype=np.float64) + nd = np.linalg.norm(d) + if nd > 0: + d = d / nd + cosv = np.clip((approach_axes * d[None, :]).sum(axis=1), -1.0, 1.0) + dir_term = 0.5 * (cosv + 1.0) # map [-1,1] -> [0,1] + else: + dir_term = np.zeros(len(gg), dtype=np.float64) + else: + dir_term = np.zeros(len(gg), dtype=np.float64) + + # --- Point proximity term (RBF over Euclidean distance) --- + if w_pt > 0.0: + p = np.asarray(point, dtype=np.float64).reshape(1, 3) + dists = np.linalg.norm(translations - p, axis=1) + pt_term = np.exp(-0.5 * (dists * dists) / sigma2) # in (0,1] + else: + pt_term = np.zeros(len(gg), dtype=np.float64) + + # --- Combine --- + total_score = w_dir * dir_term + w_pt * pt_term + w_net * net_term + gg.scores = total_score + gg.sort_by_score() # Sort in-place by the new score (best first) + return gg + + +def angle_between(v1, v2): + """Utility to compute the angle between two normalized vectors in [0, π].""" + dot_val = np.clip(np.dot(v1, v2), -1.0, 1.0) + return np.arccos(dot_val) diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/trajectory_smoothing.py b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/trajectory_smoothing.py new file mode 100755 index 0000000..7e309ed --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/trajectory_smoothing.py @@ -0,0 +1,190 @@ +""" +Trajectory smoothing with axis-flip/axis-permutation disambiguation in the object-local frame. +Note this is just an experimental feature and may not work well. +""" + +import numpy as np +import transforms3d +from pathlib import Path +import yaml +import json +import argparse + +def _proj_to_SO3(R: np.ndarray) -> np.ndarray: + """Project a near-rotation 3x3 matrix to the closest element in SO(3) via SVD.""" + U, _, Vt = np.linalg.svd(R) + R_star = U @ Vt + if np.linalg.det(R_star) < 0: + U[:, -1] *= -1 + R_star = U @ Vt + return R_star + +def _angle_between_rotations(Ra: np.ndarray, Rb: np.ndarray) -> float: + """Geodesic angle on SO(3): arccos( (trace(Ra^T Rb) - 1)/2 ).""" + M = Ra.T @ Rb + cos_th = (np.trace(M) - 1.0) * 0.5 + cos_th = np.clip(cos_th, -1.0, 1.0) + return float(np.arccos(cos_th)) + +def _quat_slerp(q0: np.ndarray, q1: np.ndarray, alpha: float) -> np.ndarray: + """Shortest-arc SLERP between unit quaternions q0 -> q1. Quaternions are (w,x,y,z).""" + q0 = q0 / np.linalg.norm(q0) + q1 = q1 / np.linalg.norm(q1) + dot = float(np.dot(q0, q1)) + if dot < 0.0: + q1 = -q1 + dot = -dot + dot = np.clip(dot, -1.0, 1.0) + if dot > 0.9995: + q = (1.0 - alpha) * q0 + alpha * q1 + return q / np.linalg.norm(q) + theta0 = np.arccos(dot) + s0 = np.sin((1 - alpha) * theta0) / np.sin(theta0) + s1 = np.sin(alpha * theta0) / np.sin(theta0) + q = s0 * q0 + s1 * q1 + return q / np.linalg.norm(q) + +def _generate_group24() -> np.ndarray: + """ + Generate 24 orientation-preserving signed permutation matrices (cube rotations). + Each is 3x3, has one ±1 per row/column, det=+1. + """ + mats = [] + perms = [(0,1,2),(0,2,1),(1,0,2),(1,2,0),(2,0,1),(2,1,0)] + for p in perms: + for sx in (-1,1): + for sy in (-1,1): + for sz in (-1,1): + M = np.zeros((3,3), float) + M[0, p[0]] = sx + M[1, p[1]] = sy + M[2, p[2]] = sz + if np.linalg.det(M) > 0: + mats.append(M) + mats = np.stack(mats, axis=0) + assert mats.shape[0] == 24 + return mats + +def smooth_object_local_trajectory( + poses: np.ndarray, + rot_smooth_alpha: float = 0.2, + trans_smooth_alpha: float = 0.2, + repair_SO3: bool = True, +) -> np.ndarray: + """ + Smooth a (N,4,4) trajectory with axis-flip/axis-permutation disambiguation in the OBJECT-LOCAL frame. + + Assumptions: + - Each pose T_t = [R_t, t_t; 0 0 0 1], rotation acts actively on column vectors. + - Coordinate re-labeling / flips (right-handed only) may occur in the object's local frame. + - Therefore we correct R_meas by RIGHT-multiplying S in G24, i.e., R_corr = R_meas @ S. + + Args: + poses: np.ndarray of shape (N, 4, 4), the raw trajectory. + rot_smooth_alpha: rotation smoothing factor in [0,1] (higher -> less smoothing). + trans_smooth_alpha: translation smoothing factor in [0,1]. + repair_SO3: if True, project measured rotations to SO(3) per frame (robust to noise/reflection). + + Returns: + np.ndarray of shape (N, 4, 4): the smoothed trajectory. + """ + assert poses.ndim == 3 and poses.shape[1:] == (4,4), "poses must be (N,4,4)" + N = poses.shape[0] + if N == 0: + return poses.copy() + + G = _generate_group24() + + out = np.zeros_like(poses, dtype=float) + R_prev = None + q_prev = None + t_prev = None + + for i in range(N): + T_in = poses[i].astype(float) + R_meas = T_in[:3, :3] + t_meas = T_in[:3, 3] + + if repair_SO3: + R_meas = _proj_to_SO3(R_meas) + + # --- Disambiguation in object-local frame: choose S to minimize distance to previous rotation. + if R_prev is not None: + best_angle = np.inf + R_best = R_meas + for S in G: + Rc = R_meas @ S # RIGHT-multiply: local/object-frame relabel/flip + ang = _angle_between_rotations(R_prev, Rc) + if ang < best_angle: + best_angle = ang + R_best = Rc + else: + R_best = R_meas + + # --- Quaternion sign continuity + SLERP smoothing. + q_best = transforms3d.quaternions.mat2quat(R_best) # (w,x,y,z) + if q_prev is not None and float(np.dot(q_best, q_prev)) < 0.0: + q_best = -q_best + q_smooth = q_best if q_prev is None else _quat_slerp(q_prev, q_best, rot_smooth_alpha) + R_smooth = transforms3d.quaternions.quat2mat(q_smooth) + + # --- Translation EMA smoothing (object-local relabeling doesn't change translation component). + t_smooth = t_meas if t_prev is None else (1.0 - trans_smooth_alpha) * t_prev + trans_smooth_alpha * t_meas + + # --- Compose output pose. + T_out = np.eye(4, dtype=float) + T_out[:3, :3] = R_smooth + T_out[:3, 3] = t_smooth + out[i] = T_out + + # --- Update memory. + R_prev = R_smooth + q_prev = q_smooth + t_prev = t_smooth + + return out + + +def main(): + parser = argparse.ArgumentParser(description="Smooth trajectory poses") + parser.add_argument("--traj", type=str, default="fdpose_trajs", help="Path to config file") + args = parser.parse_args() + + cfg_path = Path.cwd() / "config" / "config.yaml" + cfg = yaml.safe_load(cfg_path.open("r")) + keys = cfg["keys"] + + base_dir = Path.cwd() + out_dir = base_dir / "outputs" + for key in keys: + print(f"\n========== [Trajectory Smoothing] Processing key: {key} ==========") + + scene_json = out_dir / key / "simulation" / "scene.json" + if not scene_json.exists(): + raise FileNotFoundError(scene_json) + + scene_dict = json.load(open(scene_json)) + objects = scene_dict.get("objects", {}) + if not isinstance(objects, dict) or len(objects) == 0: + print(f"[WARN][{key}] scene_dict['objects'] is empty.") + return + + key_dir = out_dir / key + for i, obj in objects.items(): + traj_path = obj[args.traj] + traj = np.load(traj_path).astype(np.float32) # relative poses (N,4,4) + + smoothed_trajs = smooth_object_local_trajectory( + traj, + rot_smooth_alpha=0.2, + trans_smooth_alpha=0.2, + repair_SO3=True, + ).astype(np.float32) + smoothed_path = key_dir / "reconstruction" / "motions" / f"{i}_{obj['name']}_smoothed_trajs.npy" + np.save(smoothed_path, smoothed_trajs) + scene_dict["objects"][i]["smooth_trajs"] = str(smoothed_path) + + json.dump(scene_dict, open(scene_json, "w"), indent=2) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/usd_conversion.py b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/usd_conversion.py new file mode 100755 index 0000000..49b2f50 --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_preprocess/usd_conversion.py @@ -0,0 +1,584 @@ +######################################################## +## Commandline arguments (IsaacLab only; no --key) +######################################################## +import argparse +from isaaclab.app import AppLauncher + +parser = argparse.ArgumentParser() +# NOTE: no --key here; we will batch keys from config/config.yaml +AppLauncher.add_app_launcher_args(parser) +args_isaaclab = parser.parse_args() +# Force GUI unless you toggle via IsaacLab flags +args_isaaclab.headless = False +app_launcher = AppLauncher(args_isaaclab) +simulation_app = app_launcher.app + +######################################################## +## Imports +######################################################## +from dataclasses import dataclass +from typing import Literal, List +from pathlib import Path +import numpy as np +from PIL import Image +import trimesh +import json +import yaml +import contextlib +import os +import xml.etree.ElementTree as ET +from collections import defaultdict + +import carb +import isaacsim.core.utils.stage as stage_utils +import omni.kit.app +from loguru import logger as log +from isaaclab.sim.converters import ( + MeshConverter, + MeshConverterCfg, + UrdfConverter, + UrdfConverterCfg, +) +from isaaclab.sim.schemas import schemas_cfg +from isaaclab.utils.assets import check_file_path +from pxr import Usd, UsdPhysics +from rich.logging import RichHandler + +# Extra USD/PhysX schemas + tokens +from pxr import PhysxSchema, Sdf, UsdGeom + +log.configure(handlers=[{"sink": RichHandler(), "format": "{message}"}]) + +######################################################## +## Small utilities +######################################################## +base_dir = Path.cwd() +out_dir = base_dir / "outputs" + +def glb_to_usd(filename: str) -> str: + assert filename.endswith(".glb") + return filename.replace(".glb", ".usd") + +def convert_glb_to_obj(glb_path): + """ + Convert a GLB file to an OBJ file with texture. + (Kept from your original file; unused by the flow unless you point inputs to .glb->.obj) + """ + obj_name = str(glb_path).split("/")[-1].split(".")[0] + obj_path = Path(glb_path).parent / f"{obj_name}.obj" + mtl_path = Path(glb_path).parent / f"{obj_name}.mtl" + texture_path = Path(glb_path).parent / f"{obj_name}.jpg" + + mesh = trimesh.load(glb_path) + + if hasattr(mesh.visual, 'material') and hasattr(mesh.visual.material, 'image'): + image = mesh.visual.material.image + if image is not None: + Image.fromarray(np.asarray(image)).save(texture_path) + + with open(mtl_path, "w") as mtl_file: + mtl_file.write( + "newmtl material_0\nKa 1.000 1.000 1.000\nKd 1.000 1.000 1.000\n" + "Ks 0.000 0.000 0.000\nNs 10.000\nd 1.0\nillum 2\n" + ) + mtl_file.write(f"map_Kd {texture_path.name}\n") + + with open(obj_path, "w") as obj_file: + obj_file.write(f"mtllib {mtl_path.name}\n") + for v in mesh.vertices: + obj_file.write(f"v {v[0]} {v[1]} {v[2]}\n") + for uv in mesh.visual.uv: + obj_file.write(f"vt {uv[0]} {1 - uv[1]}\n") + for f in mesh.faces: + obj_file.write(f"f {f[0]+1}/{f[0]+1} {f[1]+1}/{f[1]+1} {f[2]+1}/{f[2]+1}\n") + + print(f"Converted GLB to OBJ: {obj_path}") + return str(obj_path) + +######################################################## +## Args container (same fields as before) +######################################################## +@dataclass +class Args: + input: List[str] + output: List[str] + make_instanceable: bool = False + # We will enforce convexDecomposition for both bg and objects in run_conversion() + collision_approximation: Literal["convexHull", "convexDecomposition", "meshSimplification", "none"] = "convexDecomposition" + mass: float | None = 1.0 + disable_gravity: bool = False + kinematic_enabled: bool = False + headless: bool = False + exit_on_finish: bool = True + +# We'll assign a per-key Args instance to this global so helper functions keep working unchanged. +args = None # type: ignore + +######################################################## +## Converters and USD post-processing (unchanged) +######################################################## +def convert_obj_to_usd(obj_path, usd_path): + log.info(f"Converting {obj_path}") + + if not os.path.isabs(obj_path): + obj_path = os.path.abspath(obj_path) + if not check_file_path(obj_path): + raise ValueError(f"Invalid mesh file path: {obj_path}") + + usd_path = os.path.abspath(usd_path) + + mass_props = schemas_cfg.MassPropertiesCfg(mass=args.mass) if args.mass is not None else None + rigid_props = schemas_cfg.RigidBodyPropertiesCfg( + disable_gravity=args.disable_gravity, + kinematic_enabled=args.kinematic_enabled, + ) + collision_props = schemas_cfg.CollisionPropertiesCfg( + collision_enabled=args.collision_approximation != "none", + contact_offset=0.006, + rest_offset=-0.002, + torsional_patch_radius=0.05, + min_torsional_patch_radius=0.005, + ) + + mesh_converter_cfg = MeshConverterCfg( + mass_props=mass_props, + rigid_props=rigid_props, + collision_props=collision_props, + asset_path=obj_path, + force_usd_conversion=True, + usd_dir=os.path.dirname(usd_path), + usd_file_name=os.path.basename(usd_path), + make_instanceable=args.make_instanceable, + collision_approximation="convexDecomposition", # enforce + ) + + log.info(f"Conversion configuration: {mesh_converter_cfg}") + MeshConverter(mesh_converter_cfg) + +def ensure_rigidbody_and_set_kinematic(usd_path: str, disable_gravity: bool = True): + stage = Usd.Stage.Open(usd_path) + prim = stage.GetDefaultPrim() + if not prim or not prim.IsValid(): + for p in stage.GetPseudoRoot().GetChildren(): + prim = p + break + if not prim or not prim.IsValid(): + raise RuntimeError(f"[ensure_rigidbody_and_set_kinematic] No valid prim in stage: {usd_path}") + + UsdPhysics.RigidBodyAPI.Apply(prim) + try: + rb = PhysxSchema.PhysxRigidBodyAPI.Apply(prim) + if hasattr(rb, "CreateKinematicEnabledAttr"): + rb.CreateKinematicEnabledAttr(True) + else: + prim.CreateAttribute("physxRigidBody:kinematicEnabled", Sdf.ValueTypeNames.Bool).Set(True) + if hasattr(rb, "CreateDisableGravityAttr"): + rb.CreateDisableGravityAttr(bool(disable_gravity)) + else: + prim.CreateAttribute("physxRigidBody:disableGravity", Sdf.ValueTypeNames.Bool).Set(bool(disable_gravity)) + except Exception: + prim.CreateAttribute("physxRigidBody:kinematicEnabled", Sdf.ValueTypeNames.Bool).Set(True) + prim.CreateAttribute("physxRigidBody:disableGravity", Sdf.ValueTypeNames.Bool).Set(bool(disable_gravity)) + try: + UsdPhysics.RigidBodyAPI(prim).CreateDisableGravityAttr(bool(disable_gravity)) + except Exception: + prim.CreateAttribute("physics:disableGravity", Sdf.ValueTypeNames.Bool).Set(bool(disable_gravity)) + stage.Save() + +def enable_ccd_and_iters(usd_path: str, pos_iters: int = 14, vel_iters: int = 4, + enable_ccd: bool = True, enable_speculative_ccd: bool = True): + stage = Usd.Stage.Open(usd_path) + prim = stage.GetDefaultPrim() + if not prim or not prim.IsValid(): + for p in stage.Traverse(): + prim = p + break + if not prim or not prim.IsValid(): + raise RuntimeError(f"[enable_ccd_and_iters] No valid prim in stage: {usd_path}") + + UsdPhysics.RigidBodyAPI.Apply(prim) + try: + rb = PhysxSchema.PhysxRigidBodyAPI.Apply(prim) + if enable_ccd: + (rb.CreateEnableCCDAttr(True) if hasattr(rb, "CreateEnableCCDAttr") + else prim.CreateAttribute("physxRigidBody:enableCCD", Sdf.ValueTypeNames.Bool).Set(True)) + if enable_speculative_ccd: + (rb.CreateEnableSpeculativeCCDAttr(True) if hasattr(rb, "CreateEnableSpeculativeCCDAttr") + else prim.CreateAttribute("physxRigidBody:enableSpeculativeCCD", Sdf.ValueTypeNames.Bool).Set(True)) + (rb.CreateSolverPositionIterationCountAttr(int(pos_iters)) if hasattr(rb, "CreateSolverPositionIterationCountAttr") + else prim.CreateAttribute("physxRigidBody:solverPositionIterationCount", Sdf.ValueTypeNames.Int).Set(int(pos_iters))) + (rb.CreateSolverVelocityIterationCountAttr(int(vel_iters)) if hasattr(rb, "CreateSolverVelocityIterationCountAttr") + else prim.CreateAttribute("physxRigidBody:solverVelocityIterationCount", Sdf.ValueTypeNames.Int).Set(int(vel_iters))) + except Exception: + if enable_ccd: + prim.CreateAttribute("physxRigidBody:enableCCD", Sdf.ValueTypeNames.Bool).Set(True) + if enable_speculative_ccd: + prim.CreateAttribute("physxRigidBody:enableSpeculativeCCD", Sdf.ValueTypeNames.Bool).Set(True) + prim.CreateAttribute("physxRigidBody:solverPositionIterationCount", Sdf.ValueTypeNames.Int).Set(int(pos_iters)) + prim.CreateAttribute("physxRigidBody:solverVelocityIterationCount", Sdf.ValueTypeNames.Int).Set(int(vel_iters)) + stage.Save() + +def set_convex_decomposition_params(usd_path: str, + voxel_resolution: int = 500_000, + max_convex_hulls: int = 24, + max_vertices_per_hull: int = 64, + concavity: float = 0.002, + shrink_wrap: bool = True, + overlap: bool = False, + contact_offset: float = 0.006, + rest_offset: float = -0.002): + stage = Usd.Stage.Open(usd_path) + root = stage.GetDefaultPrim() + if not root or not root.IsValid(): + return + + for prim in stage.Traverse(): + if not prim or not prim.IsValid(): + continue + if not prim.GetPath().HasPrefix(root.GetPath()): + continue + if not prim.IsA(UsdGeom.Mesh): + continue + + UsdPhysics.CollisionAPI.Apply(prim) + mesh_col = UsdPhysics.MeshCollisionAPI.Apply(prim) + + try: + mesh_col.CreateApproximationAttr(PhysxSchema.Tokens.convexDecomposition) + except Exception: + try: + mesh_col.GetApproximationAttr().Set("convexDecomposition") + except Exception: + prim.CreateAttribute("physics:approximation", Sdf.ValueTypeNames.Token).Set("convexDecomposition") + + try: + cd_api = PhysxSchema.PhysxConvexDecompositionCollisionAPI.Apply(prim) + if hasattr(cd_api, "CreateVoxelResolutionAttr"): + cd_api.CreateVoxelResolutionAttr(int(voxel_resolution)) + else: + prim.CreateAttribute("physxConvexDecomposition:voxelResolution", Sdf.ValueTypeNames.Int).Set(int(voxel_resolution)) + if hasattr(cd_api, "CreateMaxConvexHullsAttr"): + cd_api.CreateMaxConvexHullsAttr(int(max_convex_hulls)) + else: + prim.CreateAttribute("physxConvexDecomposition:maxConvexHulls", Sdf.ValueTypeNames.Int).Set(int(max_convex_hulls)) + if hasattr(cd_api, "CreateMaxNumVerticesPerCHAttr"): + cd_api.CreateMaxNumVerticesPerCHAttr(int(max_vertices_per_hull)) + else: + prim.CreateAttribute("physxConvexDecomposition:maxNumVerticesPerCH", Sdf.ValueTypeNames.Int).Set(int(max_vertices_per_hull)) + if hasattr(cd_api, "CreateConcavityAttr"): + cd_api.CreateConcavityAttr(float(concavity)) + else: + prim.CreateAttribute("physxConvexDecomposition:concavity", Sdf.ValueTypeNames.Float).Set(float(concavity)) + if hasattr(cd_api, "CreateShrinkWrapAttr"): + cd_api.CreateShrinkWrapAttr(bool(shrink_wrap)) + else: + prim.CreateAttribute("physxConvexDecomposition:shrinkWrap", Sdf.ValueTypeNames.Bool).Set(bool(shrink_wrap)) + if hasattr(cd_api, "CreateOverlapAttr"): + cd_api.CreateOverlapAttr(bool(overlap)) + else: + prim.CreateAttribute("physxConvexDecomposition:overlap", Sdf.ValueTypeNames.Bool).Set(bool(overlap)) + except Exception: + prim.CreateAttribute("physxConvexDecomposition:voxelResolution", Sdf.ValueTypeNames.Int).Set(int(voxel_resolution)) + prim.CreateAttribute("physxConvexDecomposition:maxConvexHulls", Sdf.ValueTypeNames.Int).Set(int(max_convex_hulls)) + prim.CreateAttribute("physxConvexDecomposition:maxNumVerticesPerCH", Sdf.ValueTypeNames.Int).Set(int(max_vertices_per_hull)) + prim.CreateAttribute("physxConvexDecomposition:concavity", Sdf.ValueTypeNames.Float).Set(float(concavity)) + prim.CreateAttribute("physxConvexDecomposition:shrinkWrap", Sdf.ValueTypeNames.Bool).Set(bool(shrink_wrap)) + prim.CreateAttribute("physxConvexDecomposition:overlap", Sdf.ValueTypeNames.Bool).Set(bool(overlap)) + + try: + col_api = PhysxSchema.PhysxCollisionAPI.Apply(prim) + if hasattr(col_api, "CreateContactOffsetAttr"): + col_api.CreateContactOffsetAttr(float(contact_offset)) + else: + prim.CreateAttribute("physxCollision:contactOffset", Sdf.ValueTypeNames.Float).Set(float(contact_offset)) + if hasattr(col_api, "CreateRestOffsetAttr"): + col_api.CreateRestOffsetAttr(float(rest_offset)) + else: + prim.CreateAttribute("physxCollision:restOffset", Sdf.ValueTypeNames.Float).Set(float(rest_offset)) + except Exception: + prim.CreateAttribute("physxCollision:contactOffset", Sdf.ValueTypeNames.Float).Set(float(contact_offset)) + prim.CreateAttribute("physxCollision:restOffset", Sdf.ValueTypeNames.Float).Set(float(rest_offset)) + + try: + UsdGeom.Mesh(prim).CreateDoubleSidedAttr(True) + except Exception: + pass + + stage.Save() + +def make_visual_names_unique(xml_string: str) -> str: + tree = ET.ElementTree(ET.fromstring(xml_string)) + root = tree.getroot() + elements_with_name = root.findall(".//visual") + name_counts = defaultdict(int) + for element in elements_with_name: + name = element.get("name") + name_counts[name] += 1 + for element in elements_with_name: + name = element.get("name") + if name_counts[name] > 1: + count = name_counts[name] + new_name = f"{name}{count}" + element.set("name", new_name) + name_counts[name] -= 1 + return ET.tostring(root, encoding="unicode") + +def make_urdf_with_unique_visual_names(urdf_path: str) -> str: + xml_str = open(urdf_path, "r").read() + xml_str = '' + "\n" + make_visual_names_unique(xml_str) + urdf_unique_path = urdf_path.replace(".urdf", "_unique.urdf") + with open(urdf_unique_path, "w") as f: + f.write(xml_str) + return urdf_unique_path + +def make_obj_without_slashes(obj_path: str) -> str: + obj_str = open(obj_path, "r").read() + obj_str = obj_str.replace("/", "") + obj_clean_path = obj_path.replace(".obj", "_clean.obj") + with open(obj_clean_path, "w") as f: + f.write(obj_str) + return obj_clean_path + +def convert_urdf_to_usd(urdf_path, usd_path): + log.info(f"Converting {urdf_path}") + if not os.path.isabs(urdf_path): + urdf_path = os.path.abspath(urdf_path) + urdf_converter_cfg = UrdfConverterCfg( + asset_path=urdf_path, + usd_dir=os.path.dirname(usd_path), + usd_file_name=os.path.basename(usd_path), + make_instanceable=args.make_instanceable, + force_usd_conversion=True, + fix_base=True, + ) + UrdfConverter(urdf_converter_cfg) + +def is_articulation(usd_path: str): + joint_count = 0 + stage = Usd.Stage.Open(usd_path) + for prim in stage.Traverse(): + if prim.IsA(UsdPhysics.Joint): + joint_count += 1 + return joint_count > 0 + +def apply_rigidbody_api(usd_path): + stage = Usd.Stage.Open(usd_path) + defaultPrim = stage.GetDefaultPrim() + UsdPhysics.RigidBodyAPI.Apply(defaultPrim) + stage.Save() + +def count_rigid_api(usd_path): + stage = Usd.Stage.Open(usd_path) + rigid_api_count = 0 + for prim in stage.Traverse(): + if prim.HasAPI(UsdPhysics.RigidBodyAPI): + rigid_api_count += 1 + return rigid_api_count + +######################################################## +## Per-key conversion (batch) +######################################################## +def run_conversion_for_key(key: str): + global args + + scene_json = out_dir / key / "simulation" / "scene.json" + assert scene_json.exists(), f"[{key}] scene.json not found: {scene_json}" + scene_dict = json.load(open(scene_json, "r")) + + # Build IO lists (background always first). Prefer "refined" if present. + input_list, output_list = [scene_dict["background"]["registered"]], [glb_to_usd(scene_dict["background"]["registered"])] + scene_dict["background"]["usd"] = glb_to_usd(scene_dict["background"]["registered"]) + for idx, obj in scene_dict["objects"].items(): + input_list.append(obj["optimized"]) + output_list.append(glb_to_usd(obj["optimized"])) + scene_dict["objects"][idx]["usd"] = glb_to_usd(obj["optimized"]) + + # Persist USD paths back to scene.json + with open(scene_json, 'w') as f: + json.dump(scene_dict, f, indent=2) + + # Create a fresh Args for this key + args = Args(input=input_list, output=output_list, make_instanceable=False, + collision_approximation="convexDecomposition", + mass=1.0, disable_gravity=False, kinematic_enabled=False, + headless=args_isaaclab.headless, exit_on_finish=True) + + # Run the original main conversion logic for this key + log.info(f"[{key}] converting {len(args.input)} assets...") + run_conversion(scene_dict) + +def run_conversion(scene_dict: dict): + """This is the body of your original `main()` but parameterized and reused per key.""" + if isinstance(args.input, list) and isinstance(args.output, list): + for input_path, output_path in zip(args.input, args.output): + is_background = (input_path == scene_dict["background"]["registered"]) + + if input_path.endswith(".obj") or input_path.endswith(".glb"): + # Save current, then enforce per-asset overrides + prev_mass = args.mass + prev_approx = args.collision_approximation + prev_disable_grav = args.disable_gravity + prev_kinematic = args.kinematic_enabled + + args.collision_approximation = "convexDecomposition" + + if is_background: + args.mass = None + args.disable_gravity = True + args.kinematic_enabled = True + else: + args.mass = 1.0 + args.disable_gravity = False + args.kinematic_enabled = False + + convert_obj_to_usd(input_path, output_path) + + if is_background: + apply_rigidbody_api(output_path) + ensure_rigidbody_and_set_kinematic(output_path, disable_gravity=True) + set_convex_decomposition_params( + output_path, + voxel_resolution=600_000, + max_convex_hulls=512, + max_vertices_per_hull=128, + concavity=0.0005, + shrink_wrap=True, + overlap=False, + contact_offset=0.006, + rest_offset=-0.002 + ) + else: + apply_rigidbody_api(output_path) + enable_ccd_and_iters(output_path, pos_iters=14, vel_iters=4, + enable_ccd=True, enable_speculative_ccd=True) + set_convex_decomposition_params( + output_path, + voxel_resolution=600_000, + max_convex_hulls=24, + max_vertices_per_hull=64, + concavity=0.002, + shrink_wrap=True, + overlap=False, + contact_offset=0.006, + rest_offset=-0.002 + ) + + # Restore global args + args.mass = prev_mass + args.collision_approximation = prev_approx + args.disable_gravity = prev_disable_grav + args.kinematic_enabled = prev_kinematic + + log.info(f'Rigid body count: {count_rigid_api(output_path)}') + log.info(f"Saved USD file to {os.path.abspath(output_path)}") + + elif input_path.endswith(".urdf"): + urdf_unique_path = make_urdf_with_unique_visual_names(input_path) + convert_urdf_to_usd(urdf_unique_path, output_path) + + if is_background: + apply_rigidbody_api(output_path) + ensure_rigidbody_and_set_kinematic(output_path, disable_gravity=True) + set_convex_decomposition_params( + output_path, + voxel_resolution=600_000, + max_convex_hulls=28, + max_vertices_per_hull=64, + concavity=0.0015, + shrink_wrap=True, + overlap=False, + contact_offset=0.006, + rest_offset=-0.002 + ) + elif not is_articulation(output_path): + apply_rigidbody_api(output_path) + enable_ccd_and_iters(output_path, pos_iters=14, vel_iters=4, + enable_ccd=True, enable_speculative_ccd=True) + set_convex_decomposition_params( + output_path, + voxel_resolution=600_000, + max_convex_hulls=24, + max_vertices_per_hull=64, + concavity=0.002, + shrink_wrap=True, + overlap=False, + contact_offset=0.006, + rest_offset=-0.002 + ) + + log.info(f'Rigid body count: {count_rigid_api(output_path)}') + log.info(f"Saved USD file to {os.path.abspath(output_path)}") + + else: + # Single-path branches kept for completeness (not used in batch flow) + input_path = args.input + output_path = args.output + if input_path.endswith(".obj") or input_path.endswith(".glb"): + convert_obj_to_usd(input_path, output_path) + apply_rigidbody_api(output_path) + enable_ccd_and_iters(output_path, pos_iters=14, vel_iters=4, + enable_ccd=True, enable_speculative_ccd=True) + set_convex_decomposition_params( + output_path, + voxel_resolution=600_000, + max_convex_hulls=24, + max_vertices_per_hull=64, + concavity=0.002, + shrink_wrap=True, + overlap=False, + contact_offset=0.006, + rest_offset=-0.002 + ) + log.info(f'Rigid body count: {count_rigid_api(output_path)}') + log.info(f"Saved USD file to {os.path.abspath(output_path)}") + + elif input_path.endswith(".urdf"): + urdf_unique_path = make_urdf_with_unique_visual_names(input_path) + convert_urdf_to_usd(urdf_unique_path, output_path) + apply_rigidbody_api(output_path) + enable_ccd_and_iters(output_path, pos_iters=14, vel_iters=4, + enable_ccd=True, enable_speculative_ccd=True) + set_convex_decomposition_params( + output_path, + voxel_resolution=600_000, + max_convex_hulls=24, + max_vertices_per_hull=64, + concavity=0.002, + shrink_wrap=True, + overlap=False, + contact_offset=0.006, + rest_offset=-0.002 + ) + log.info(f'Rigid body count: {count_rigid_api(output_path)}') + log.info(f"Saved USD file to {os.path.abspath(output_path)}") + +######################################################## +## Batch entry +######################################################## +def main(): + # Load keys from config/config.yaml and run per key + cfg = yaml.safe_load((base_dir / "config" / "config.yaml").open("r")) + keys = cfg["keys"] + for key in keys: + log.info(f"\n========== [USD Convert] key: {key} ==========") + run_conversion_for_key(key) + + # Exit or keep GUI (same behavior as your original main) + if args and args.exit_on_finish: + return + + if args and not args.headless: + carb_settings_iface = carb.settings.get_settings() + local_gui = carb_settings_iface.get("/app/window/enabled") + livestream_gui = carb_settings_iface.get("/app/livestream/enabled") + if local_gui or livestream_gui: + # If you want to open the last output USD in a viewer, uncomment below + # stage_utils.open_stage(args.output[-1] if isinstance(args.output, list) else args.output) + app = omni.kit.app.get_app_interface() + with contextlib.suppress(KeyboardInterrupt): + while app.is_running(): + app.update() + +if __name__ == "__main__": + main() + simulation_app.close() diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_trajectory_replay.py b/openreal2sim/simulation/isaaclab/data_collection/sim_trajectory_replay.py new file mode 100755 index 0000000..1ed673c --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_trajectory_replay.py @@ -0,0 +1,143 @@ +""" +Replay existing trajectories in Isaac Lab simulation. +""" +from __future__ import annotations + +# ─────────── AppLauncher ─────────── +import argparse, os, json, random, transforms3d +from pathlib import Path +import numpy as np +import torch +import yaml +from isaaclab.app import AppLauncher + +# ─────────── CLI ─────────── +parser = argparse.ArgumentParser("sim_policy") +parser.add_argument("--demo_dir", type=str, help="directory of robot trajectory") +parser.add_argument("--key", type=str, default=None, help="scene key (outputs/)") +parser.add_argument("--robot", type=str, default="franka") +parser.add_argument("--num_envs", type=int, default=1) +parser.add_argument("--num_trials", type=int, default=1) +parser.add_argument("--teleop_device", type=str, default="keyboard") +parser.add_argument("--sensitivity", type=float, default=1.0) +AppLauncher.add_app_launcher_args(parser) +args_cli = parser.parse_args() +args_cli.enable_cameras = True +args_cli.headless = False # headless mode for batch execution +app_launcher = AppLauncher(vars(args_cli)) +simulation_app = app_launcher.app + +# ─────────── Runtime imports ─────────── +import isaaclab.sim as sim_utils +from isaaclab.utils.math import subtract_frame_transforms + +from graspnetAPI.grasp import GraspGroup + + +# ─────────── Simulation environments ─────────── +from sim_base import BaseSimulator, get_next_demo_id +from sim_env_factory import make_env +from sim_preprocess.grasp_utils import get_best_grasp_with_hints +from sim_utils.transform_utils import pose_to_mat, mat_to_pose, grasp_to_world, grasp_approach_axis_batch +from sim_utils.sim_utils import load_sim_parameters + +BASE_DIR = Path.cwd() +sim_cfgs = json.load(open(Path(args_cli.demo_dir) / "config.json", "r")) +args_cli.key = sim_cfgs["key"] +img_folder = args_cli.key +out_dir = BASE_DIR / "outputs" + + +# ──────────────────────────── Heuristic Manipulation ──────────────────────────── +class HeuristicManipulation(BaseSimulator): + """ + Physical trial-and-error grasping with approach-axis perturbation: + • Multiple grasp proposals executed in parallel; + • Every attempt does reset → pre → grasp → close → lift → check; + • Early stops when any env succeeds; then re-exec for logging. + """ + def __init__(self, sim, scene, sim_cfgs: dict): + robot_pose = torch.tensor( + sim_cfgs["robot_cfg"]["robot_pose"], + dtype=torch.float32, + device=sim.device + ) + super().__init__( + sim=sim, sim_cfgs=sim_cfgs, scene=scene, args=args_cli, + robot_pose=robot_pose, cam_dict=sim_cfgs["cam_cfg"], + out_dir=out_dir, img_folder=img_folder, + enable_motion_planning=True, + set_physics_props=True, debug_level=0, + ) + + self.selected_object_id = sim_cfgs["demo_cfg"]["manip_object_id"] + self.traj_key = sim_cfgs["demo_cfg"]["traj_key"] + self.traj_path = sim_cfgs["demo_cfg"]["traj_path"] + self.goal_offset = [0, 0, sim_cfgs["demo_cfg"]["goal_offset"]] + self.grasp_path = sim_cfgs["demo_cfg"]["grasp_path"] + self.grasp_idx = sim_cfgs["demo_cfg"]["grasp_idx"] + self.grasp_pre = sim_cfgs["demo_cfg"]["grasp_pre"] + self.grasp_delta = sim_cfgs["demo_cfg"]["grasp_delta"] + + # ---------- Helpers ---------- + def _to_base(self, pos_w: np.ndarray | torch.Tensor, quat_w: np.ndarray | torch.Tensor): + """World → robot base frame for all envs.""" + root = self.robot.data.root_state_w[:, 0:7] # [B,7] + p_w, q_w = self._ensure_batch_pose(pos_w, quat_w) + pb, qb = subtract_frame_transforms( + root[:, 0:3], root[:, 3:7], p_w, q_w + ) + return pb, qb # [B,3], [B,4] + + def replay_actions(self, actions: np.ndarray): + """ + Replay a sequence of recorded actions: (p[B,3], q[B,4], gripper[B,1]) + """ + n_steps = actions.shape[0] + + self.reset() + self.wait(gripper_open=True, steps=10) + + for t in range(n_steps): + print(f"[INFO] replay step {t}/{n_steps}") + act = actions[t:t+1] + p_b = torch.as_tensor(act[:, 0:3], dtype=torch.float32, device=self.sim.device) + q_b = torch.as_tensor(act[:, 3:7], dtype=torch.float32, device=self.sim.device) + g_b = act[:, 7] < 0.5 + jp, success = self.move_to(p_b, q_b, gripper_open=g_b) + if torch.any(success==False): + print(f"[ERR] replay step {t} failed.") + return False + jp = self.wait(gripper_open=g_b, steps=3) + return True + +# ──────────────────────────── Entry Point ──────────────────────────── +def main(): + env, _ = make_env( + cfgs=sim_cfgs, num_envs=args_cli.num_envs, + device=args_cli.device, + bg_simplify=False, + ) + sim, scene = env.sim, env.scene + + my_sim = HeuristicManipulation(sim, scene, sim_cfgs=sim_cfgs) + + demo_root = (out_dir / img_folder / "demos").resolve() + + for _ in range(args_cli.num_trials): + + robot_pose = torch.tensor(sim_cfgs["robot_cfg"]["robot_pose"], dtype=torch.float32, device=my_sim.sim.device) # [7], pos(3)+quat(wxyz)(4) + my_sim.set_robot_pose(robot_pose) + my_sim.demo_id = get_next_demo_id(demo_root) + my_sim.reset() + print(f"[INFO] start simulation demo_{my_sim.demo_id}") + actions = np.load(Path(args_cli.demo_dir) / "actions.npy") + my_sim.replay_actions(actions) + + env.close() + simulation_app.close() + +if __name__ == "__main__": + main() + os.system("quit()") + simulation_app.close() diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_utils/calibration_utils.py b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/calibration_utils.py new file mode 100755 index 0000000..e8c3666 --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/calibration_utils.py @@ -0,0 +1,88 @@ +import numpy as np +import transforms3d + +def _as_homo(T): + A = np.asarray(T, dtype=np.float64) + if A.shape == (4, 4): + return A + if A.shape == (3, 4): + M = np.eye(4, dtype=np.float64); M[:3, :4] = A; return M + if A.shape == (3, 3): + M = np.eye(4, dtype=np.float64); M[:3, :3] = A; return M + raise ValueError(f"Unsupported shape: {A.shape}") + +def _blk3(R): + M = np.eye(4, dtype=np.float64); M[:3, :3] = R; return M + +def _quat_wxyz(R): + q = transforms3d.quaternions.mat2quat(R) # (w,x,y,z) + return -q if q[0] < 0 else q + +def load_extrinsics(config: dict, key: str) -> np.ndarray: + """ + Load the extrinsic parameters from the scene JSON. + """ + + E_list = config.get("extrinsics", None) + if E_list is None: + print(f"Warning: No calibrated extrinsics found for {key}. Use random robot poses.") + return None + E = np.array(E_list).reshape(4, 4) + print(f"Loaded extrinsics for {key}:") + print(E) + return E + + +def calibration_to_robot_pose(scene_js: dict, T_camopencv_to_robotbase_m) -> tuple[np.ndarray, np.ndarray]: + """ + Reproduce the real-world calibrated camera to robot transform into simulation + Inputs: + scene_js: The scene JSON containing camera extrinsics + T_camopencv_to_robotbase_m: The transform matrix from camera_optical (in opencv camera format) to robot_base + Outputs: + pos_w: The position of the robot base in world coordinates + quat_w: The orientation of the robot base in world coordinates (as a quaternion) + meta: A dictionary containing additional information about the calibration + """ + + T_camopencv_to_world = _as_homo(scene_js["camera"]["camera_opencv_to_world"]) + + T_camopencv_to_robotbase = _as_homo(T_camopencv_to_robotbase_m) + + T_robotbase_to_world = T_camopencv_to_world @ np.linalg.inv(T_camopencv_to_robotbase) + + pos_w = T_robotbase_to_world[:3, 3].copy() + quat_w = _quat_wxyz(T_robotbase_to_world[:3, :3]) + + return pos_w, quat_w + +def calibration_to_robot_pose_deprecated(scene_js: dict, T_camisaac_to_robotbase_m) -> tuple[np.ndarray, np.ndarray]: + """ + Reproduce the real-world calibrated camera to robot transform into simulation + Inputs: + scene_js: The scene JSON containing camera extrinsics + T_camisaac_to_robotbase_m: The transform matrix from camera_isaac to robot_base + Outputs: + pos_w: The position of the robot base in world coordinates + quat_w: The orientation of the robot base in world coordinates (as a quaternion) + meta: A dictionary containing additional information about the calibration + """ + + T_camopencv_to_world = _as_homo(scene_js["camera"]["camera_opencv_to_world"]) + + T_camisaac_to_robotbase = _as_homo(T_camisaac_to_robotbase_m) + + # This may be the true transformation + # REP-103: camera_opencv(OpenCV: +x right, +y down, +z forward) -> camera_isaac(IsaacSim: +x forward, +y left, +z up) + T_camopencv_to_camisaac = np.array([[ 0, 0, 1], + [-1, 0, 0], + [ 0, -1, 0]], dtype=np.float64) + + T_camopencv_to_robotbase = T_camisaac_to_robotbase @ _blk3(T_camopencv_to_camisaac) # base -> camera_link + + T_robotbase_to_world = T_camopencv_to_world @ np.linalg.inv(T_camopencv_to_robotbase) + + pos_w = T_robotbase_to_world[:3, 3].copy() + quat_w = _quat_wxyz(T_robotbase_to_world[:3, :3]) + + return pos_w, quat_w diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_utils/sim_utils.py b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/sim_utils.py new file mode 100755 index 0000000..2ddd68b --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/sim_utils.py @@ -0,0 +1,257 @@ +import json +from pathlib import Path +import yaml +import torch +import random +import numpy as np +import transforms3d + +from sim_utils.calibration_utils import calibration_to_robot_pose, load_extrinsics + +default_config = { + "physics": "default", + "extrinsics": None, + "goal_offset": 0, + "grasp_idx": -1, + "grasp_pre": None, + "grasp_delta": None, + "traj_key": "fdpose_trajs", # "fdpose_trajs", "simple_trajs", "hybrid_trajs" + "manip_object_id": "1", +} + +def compose_configs(key_name: str, config: dict) -> dict: + ret_key_config = {} + local_config = config["local"].get(key_name, {}) + local_config = local_config.get("simulation", {}) + global_config = config.get("global", {}) + global_config = global_config.get("simulation", {}) + for param in default_config.keys(): + value = local_config.get(param, global_config.get(param, default_config[param])) + ret_key_config[param] = value + print(f"[Info] Config for {key_name}: {ret_key_config}") + return ret_key_config + +def load_sim_parameters(basedir, key) -> dict: + scene_json_path = Path(basedir) / "outputs" / key / "simulation" / "scene.json" + scene_json = json.load(open(scene_json_path, "r")) + exp_config = yaml.load(open(Path(basedir) / "config/config.yaml"), Loader=yaml.FullLoader) + exp_config = compose_configs(key, exp_config) + + # cam configs + cam_cfg = { + "width": int(scene_json["camera"]["width"]), + "height": int(scene_json["camera"]["height"]), + "fx": float(scene_json["camera"]["fx"]), + "fy": float(scene_json["camera"]["fy"]), + "cx": float(scene_json["camera"]["cx"]), + "cy": float(scene_json["camera"]["cy"]), + "cam_orientation": tuple(scene_json["camera"]["camera_heading_wxyz"]), + "scene_info": { + "move_to": list(scene_json["camera"]["camera_position"]), + "scene_min": list(scene_json["aabb"]["scene_min"]), + "scene_max": list(scene_json["aabb"]["scene_max"]), + "object_center":list(scene_json["objects"]["1"]["object_center"]), + }, + } + + # robot configs + E = load_extrinsics(exp_config, key) + if E is None: + robot_cfg = random.choice(robot_placement_candidates_v2(cam_cfg["scene_info"])) + pos_w, quat_w = robot_cfg["position"], robot_cfg["rotation"] + else: + pos_w, quat_w = calibration_to_robot_pose(scene_json, E) + robot_pose = list(pos_w) + list(quat_w) + robot_cfg = { + "robot_pose": robot_pose, + } + + # demo configs + goal_offset = exp_config.get("goal_offset", 0) + grasp_idx = exp_config.get("grasp_idx", -1) + grasp_pre = exp_config.get("grasp_pre", None) + grasp_delta = exp_config.get("grasp_delta", None) + traj_key = exp_config.get("traj_key", "fdpose_trajs") # "fdpose_trajs", "simple_trajs", "hybrid_trajs" + manip_object_id = exp_config.get("manip_object_id", "1") + traj_path = scene_json["objects"][manip_object_id][traj_key] + grasp_path = scene_json["objects"][manip_object_id].get("grasps", None) + demo_cfg = { + "manip_object_id": manip_object_id, + "traj_key": traj_key, + "traj_path": traj_path, + "goal_offset": goal_offset, + "grasp_idx": grasp_idx, + "grasp_pre": grasp_pre, + "grasp_delta": grasp_delta, + "grasp_path": grasp_path, + } + + # physics configs + physics_key = exp_config.get("physics", "default") + physics_cfg = {} + if physics_key == "default": + physics_cfg["bg_physics"] = default_bg_physics + physics_cfg["obj_physics"] = default_obj_physics + elif physics_key == "viz": + physics_cfg["bg_physics"] = default_bg_physics + physics_cfg["obj_physics"] = viz_obj_physics + else: + print(f"[WARN] Unrecognized physics key '{physics_key}', no physics specified in exp configs.") + physics_cfg["bg_physics"] = None + physics_cfg["obj_physics"] = None + + return { + "key": key, + "scene_cfg": scene_json, + "exp_cfg": exp_config, + "cam_cfg": cam_cfg, + "robot_cfg": robot_cfg, + "demo_cfg": demo_cfg, + "physics_cfg": physics_cfg, + } + + +# Default physics for background and objects +default_bg_physics = { + "mass_props": {"mass": 100.0}, + "rigid_props": {"disable_gravity": True, "kinematic_enabled": True}, + "collision_props": {"collision_enabled": True,}, +} + +default_obj_physics = { + "mass_props": {"mass": 0.2}, + "rigid_props": {"disable_gravity": False, "kinematic_enabled": False}, + "collision_props": {"collision_enabled": True,}, +} + +# For viz +viz_obj_physics = { + "mass_props": {"mass": 0.5}, + "rigid_props": {"disable_gravity": True, "kinematic_enabled": False}, + "collision_props": {"collision_enabled": False,}, +} + +def robot_placement_candidates_v2( + scene_info: dict, + reachability=(0.3, 0.65), + reachability_zcenter_offset=0.3, + robot_aabb=(0.12, 0.12, 0.72), + num_radius_steps=10, + num_angle_steps=36, + num_z_steps=5, + occlusion_clearance=(0.02, 0.02, 0.02), # inflated base AABB for occlusion checks (m) + obj_world_radius=0.02, # small ring near object center (m) + num_obj_rays=8, # # of rays from ring points (>=8) + min_cam_base_separation_deg=45.0, # minimum separation angle wrt camera direction +): + """ + Return robot base poses (position + wxyz rotation) satisfying: + (1) Reachability + (2) No overlap with background AABB + (3) Same half-space as camera w.r.t object & min separation angle + (4) No occlusion from camera to object (center ray + ring rays) + """ + object_center = np.array(scene_info["object_center"], dtype=float) + scene_min = np.array(scene_info["scene_min"], dtype=float) + scene_max = np.array(scene_info["scene_max"], dtype=float) + cam_pos = np.array(scene_info["move_to"], dtype=float) + + def segment_intersects_aabb(p0, p1, aabb_min, aabb_max, eps=1e-9): + d = p1 - p0 + t0, t1 = 0.0, 1.0 + for i in range(3): + if abs(d[i]) < eps: + if p0[i] < aabb_min[i] or p0[i] > aabb_max[i]: + return False + else: + inv_d = 1.0 / d[i] + t_near = (aabb_min[i] - p0[i]) * inv_d + t_far = (aabb_max[i] - p0[i]) * inv_d + if t_near > t_far: + t_near, t_far = t_far, t_near + t0 = max(t0, t_near) + t1 = min(t1, t_far) + if t0 > t1: + return False + return True + + def edge_ring_points(obj_center, cam_pos, radius, m): + if m <= 0 or radius <= 0.0: + return np.empty((0, 3), dtype=float) + view = obj_center - cam_pos + n = np.linalg.norm(view) + if n < 1e-9: + return np.empty((0, 3), dtype=float) + u = view / n + tmp = np.array([1.0, 0.0, 0.0], dtype=float) + if abs(np.dot(tmp, u)) > 0.9: + tmp = np.array([0.0, 1.0, 0.0], dtype=float) + e1 = np.cross(u, tmp); e1 /= (np.linalg.norm(e1) + 1e-12) + e2 = np.cross(u, e1); e2 /= (np.linalg.norm(e2) + 1e-12) + phis = np.linspace(0.0, 2.0 * np.pi, num=m, endpoint=False) + ring = [obj_center + radius * (np.cos(phi) * e1 + np.sin(phi) * e2) for phi in phis] + return np.asarray(ring, dtype=float) + + candidates = [] + radius_values = np.linspace(reachability[0], reachability[1], num=num_radius_steps) + angle_values = np.linspace(0, 2 * np.pi, num=num_angle_steps, endpoint=False) + z_min, z_max = 0.0, 2.0 * float(object_center[2]) + z_values = np.linspace(z_min, z_max, num=num_z_steps) + ring_pts = edge_ring_points(object_center, cam_pos, obj_world_radius, num_obj_rays) + + cos_min_sep = np.cos(np.deg2rad(float(min_cam_base_separation_deg))) + + for z in z_values: + reach_center_z = z + reachability_zcenter_offset + for r in radius_values: + for theta in angle_values: + dx = r * np.cos(theta); dy = r * np.sin(theta) + base_pos = np.array([object_center[0] - dx, object_center[1] - dy, z], dtype=float) + + # (1) reachability + reach_dist = np.linalg.norm(object_center - np.array([base_pos[0], base_pos[1], reach_center_z])) + if reach_dist > reachability[1] or reach_dist < reachability[0]: + continue + + # (2) no overlap with background AABB + half = 0.5 * np.array(robot_aabb, dtype=float) + base_min = base_pos - half + base_max = base_pos + half + if np.all(base_max > scene_min) and np.all(base_min < scene_max): + continue + + # (3) same-side + min angle + v_cam = cam_pos - object_center + v_base = base_pos - object_center + n1 = np.linalg.norm(v_cam); n2 = np.linalg.norm(v_base) + if n1 < 1e-9 or n2 < 1e-9: + continue + cos_val = float(np.dot(v_cam, v_base) / (n1 * n2)) + if (cos_val < 0.0) or (cos_val > cos_min_sep): + continue + + # (4a) no occlusion on center ray + extra = np.array(occlusion_clearance, dtype=float) + occ_min = base_min - extra + occ_max = base_max + extra + if segment_intersects_aabb(cam_pos, object_center, occ_min, occ_max): + continue + # (4b) ring rays + if any(segment_intersects_aabb(cam_pos, P, occ_min, occ_max) for P in ring_pts): + continue + + # (5) yaw towards object in XY plane + facing_vec = object_center[:2] - base_pos[:2] + yaw = np.arctan2(facing_vec[1], facing_vec[0]) + quat_wxyz = transforms3d.euler.euler2quat(0.0, 0.0, yaw) + + candidates.append({ + "position": base_pos.tolist(), + "rotation": [float(x) for x in quat_wxyz], # (w,x,y,z) + "yaw_deg": float(np.degrees(yaw)), + "base_z": float(z), + }) + + if not candidates: + raise RuntimeError("No valid base found under reach/collision/same-side(min-angle)/occlusion constraints.") + return candidates diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_utils/transform_utils.py b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/transform_utils.py new file mode 100755 index 0000000..a8997e9 --- /dev/null +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_utils/transform_utils.py @@ -0,0 +1,50 @@ +# transform_utils.py +import torch +import numpy as np +import transforms3d + +def grasp_to_world(grasp) -> tuple[np.ndarray, np.ndarray]: + """ + GraspNet -> EE: + X_grasp=approach, Y_grasp=width, Z_grasp=thickness + We want: Z_EE=approach, Y_EE=-width, X_EE=thickness + """ + Rg = grasp.rotation_matrix.astype(np.float32) + delta = np.array([[0, 0, 1], + [0, -1, 0], + [1, 0, 0]], dtype=np.float32) + Ree = Rg @ delta + q = transforms3d.quaternions.mat2quat(Ree).astype(np.float32) # wxyz + p = grasp.translation.astype(np.float32) + return p, q + +def grasp_approach_axis_batch(qw_batch: np.ndarray) -> np.ndarray: + """ + get the approach axis (Z axis) of each env in world frame + qw_batch: (B,4) quaternion (wxyz) + return: (B,3) each env's grasping approach axis (Z axis) in world frame + """ + qw_batch = np.asarray(qw_batch, dtype=np.float32) + a_list = [] + for q in qw_batch: + R = transforms3d.quaternions.quat2mat(q) + a = R[:, 2] + a_list.append((a / (np.linalg.norm(a) + 1e-8)).astype(np.float32)) + return np.stack(a_list, axis=0) + +def pose_to_mat(pos, quat): + if torch.is_tensor(pos): pos = pos.cpu().numpy() + if torch.is_tensor(quat): quat = quat.cpu().numpy() + m = np.eye(4, dtype=np.float32) + m[:3, :3] = transforms3d.quaternions.quat2mat(quat) + m[:3, 3] = pos + return m + +def mat_to_pose(mat): + pos = mat[:3, 3] + quat = transforms3d.quaternions.mat2quat(mat[:3, :3]) + return pos, quat + +def normalize_quaternion(q: np.ndarray) -> np.ndarray: + norm = np.linalg.norm(q, axis=1, keepdims=True) + 1e-9 + return q / norm From 6f21c3e9b2c98840219089258ce0b2d63118307d Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Mon, 10 Nov 2025 07:54:41 -0800 Subject: [PATCH 2/3] not saving failed traj --- docker/compose.yml | 1 - .../isaaclab/data_collection/sim_heuristic_manip.py | 8 ++++---- openreal2sim/simulation/isaaclab/sim_base.py | 2 +- openreal2sim/simulation/isaaclab/sim_heuristic_manip.py | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docker/compose.yml b/docker/compose.yml index ac8d0e2..41d3486 100644 --- a/docker/compose.yml +++ b/docker/compose.yml @@ -33,7 +33,6 @@ services: context: .. dockerfile: docker/isaaclab/Dockerfile image: isaaclab:dev - user: "${HOST_UID}:${HOST_GID}" deploy: resources: reservations: diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py index 8f29fb3..d5c16d4 100755 --- a/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py @@ -219,7 +219,7 @@ def _to_base(self, pos_w: np.ndarray | torch.Tensor, quat_w: np.ndarray | torch. return pb, qb # [B,3], [B,4] # ---------- Batched execution & lift-check ---------- - def execute_and_lift_once_batch(self, info: dict, lift_height=0.06, position_threshold=0.1) -> tuple[np.ndarray, np.ndarray]: + def execute_and_lift_once_batch(self, info: dict, lift_height=0.06, position_threshold=0.2) -> tuple[np.ndarray, np.ndarray]: """ Reset → pre → grasp → close → lift → hold; return (success[B], score[B]). Now propagates motion planning failures by returning (None, None). @@ -692,9 +692,9 @@ def inference(self, std: float = 0.0, max_restart_attempts: int = 10, self.save_data() return True else: - print("[WARN] Task verification failed, but trajectory completed. Saving anyway...") - self.save_data() - return True + print("[WARN] Task verification failed, but trajectory completed") + print(f"[ERROR] Attempting restart") + self.clear_data() except RuntimeError as e: if "MotionPlanningFailure_RestartNeeded" in str(e): diff --git a/openreal2sim/simulation/isaaclab/sim_base.py b/openreal2sim/simulation/isaaclab/sim_base.py index ca01aa6..d3e95b7 100755 --- a/openreal2sim/simulation/isaaclab/sim_base.py +++ b/openreal2sim/simulation/isaaclab/sim_base.py @@ -948,4 +948,4 @@ def delete_data(self): os.system(f"mv {save_path} {failure_path}") for key in self.save_dict.keys(): self.save_dict[key] = [] - print("[INFO]: Clear up the folder: ", save_path) + print("[INFO]: Clear up the folder: ", save_path) \ No newline at end of file diff --git a/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py b/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py index 647af13..cf2f1d5 100755 --- a/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py +++ b/openreal2sim/simulation/isaaclab/sim_heuristic_manip.py @@ -515,4 +515,4 @@ def main(): if __name__ == "__main__": main() os.system("quit()") - simulation_app.close() + simulation_app.close() \ No newline at end of file From f535e1e61ef4ae803aa6cae6584d9c44b31b9937 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Mon, 10 Nov 2025 11:31:49 -0800 Subject: [PATCH 3/3] current updates for pipeline --- .../data_collection/sim_heuristic_manip.py | 483 +++++++++++++----- 1 file changed, 350 insertions(+), 133 deletions(-) diff --git a/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py index d5c16d4..e876e67 100755 --- a/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py +++ b/openreal2sim/simulation/isaaclab/data_collection/sim_heuristic_manip.py @@ -16,7 +16,7 @@ parser = argparse.ArgumentParser("sim_policy") parser.add_argument("--key", type=str, default="demo_video", help="scene key (outputs/)") parser.add_argument("--robot", type=str, default="franka") -parser.add_argument("--num_envs", type=int, default=1) +parser.add_argument("--num_envs", type=int, default=4) parser.add_argument("--num_trials", type=int, default=1) parser.add_argument("--teleop_device", type=str, default="keyboard") parser.add_argument("--sensitivity", type=float, default=1.0) @@ -366,62 +366,101 @@ def build_grasp_info( def grasp_trials(self, gg, std: float = 0.0005, max_retries: int = 3): """ - Try grasp proposals with automatic recovery from motion planning failures. - Returns dict with success flag and chosen grasp parameters. + Async grasp trials: each env finds its own successful grasp independently. + Environments that succeed early keep their grasp while failed envs continue searching. - Args: - gg: GraspGroup with grasp proposals - std: Standard deviation for random perturbations - max_retries: Number of retry attempts if motion planning fails + Returns dict with per-env success flags, chosen grasp parameters, and skip mask. + Envs that exhaust retries are marked to skip in future processing. """ B = self.scene.num_envs idx_all = list(range(len(gg))) if len(idx_all) == 0: print("[ERR] empty grasp list.") return { - "success": False, - "chosen_pose_w": None, - "chosen_pre": None, - "chosen_delta": None, + "success": np.zeros(B, dtype=bool), + "chosen_pose_w": [None] * B, + "chosen_pre": [None] * B, + "chosen_delta": [None] * B, + "skip_envs": np.zeros(B, dtype=bool), } rng = np.random.default_rng() pre_dist_const = 0.12 # m + # Track per-environment success and chosen grasps + env_success = np.zeros(B, dtype=bool) + env_chosen_pose_w = [None] * B # List of (pos, quat) tuples per env + env_chosen_pre = np.zeros(B, dtype=np.float32) + env_chosen_delta = np.zeros(B, dtype=np.float32) + env_best_score = np.zeros(B, dtype=np.float32) + env_skip = np.zeros(B, dtype=bool) # Track envs to skip due to exhausted retries + # Retry loop for grasp selection phase for retry_attempt in range(max_retries): try: print(f"\n{'='*60}") - print(f"GRASP SELECTION ATTEMPT {retry_attempt + 1}/{max_retries}") + print(f"ASYNC GRASP SELECTION ATTEMPT {retry_attempt + 1}/{max_retries}") + print(f"Successful envs: {env_success.sum()}/{B}") print(f"{'='*60}\n") - success = False - chosen_pose_w = None # (p_w, q_w) - chosen_pre = None - chosen_delta = None - - # Assign different grasp proposals to different envs + # If all non-skipped envs succeeded, we're done + active_mask = ~env_skip + if np.all(env_success[active_mask]): + successful_envs = np.where(env_success & ~env_skip)[0] + skipped_envs = np.where(env_skip)[0] + print(f"[SUCCESS] All active environments found valid grasps!") + if len(skipped_envs) > 0: + print(f"[INFO] Skipped {len(skipped_envs)} failed environments: {skipped_envs.tolist()}") + break + + # Get mask of envs that still need to find a grasp (not successful and not skipped) + active_failed_mask = (~env_success) & (~env_skip) + failed_env_ids = np.where(active_failed_mask)[0] + n_failed = len(failed_env_ids) + + if n_failed == 0: + # All remaining envs are either successful or skipped + break + + print(f"[INFO] {n_failed} environments still searching for grasps: {failed_env_ids.tolist()}") + + # Iterate through grasp proposals for start in range(0, len(idx_all), B): block = idx_all[start : start + B] if len(block) < B: block = block + [block[-1]] * (B - len(block)) + # Build grasp proposals for ALL envs (even successful ones get dummy data) grasp_pos_w_batch, grasp_quat_w_batch = [], [] - for idx in block: - p_w, q_w = grasp_to_world(gg[int(idx)]) - grasp_pos_w_batch.append(p_w.astype(np.float32)) - grasp_quat_w_batch.append(q_w.astype(np.float32)) + for b in range(B): + if env_success[b] or env_skip[b]: + # Use existing successful grasp or dummy for skipped env + if env_success[b]: + p_w, q_w = env_chosen_pose_w[b] + else: + # Dummy grasp for skipped env + p_w, q_w = grasp_to_world(gg[0]) + grasp_pos_w_batch.append(p_w.astype(np.float32)) + grasp_quat_w_batch.append(q_w.astype(np.float32)) + else: + # New grasp proposal for failed env + idx = block[b % len(block)] + p_w, q_w = grasp_to_world(gg[int(idx)]) + grasp_pos_w_batch.append(p_w.astype(np.float32)) + grasp_quat_w_batch.append(q_w.astype(np.float32)) + grasp_pos_w_batch = np.stack(grasp_pos_w_batch, axis=0) # (B,3) grasp_quat_w_batch = np.stack(grasp_quat_w_batch, axis=0) # (B,4) self.show_goal(grasp_pos_w_batch, grasp_quat_w_batch) # Random disturbance along approach axis pre_dist_batch = np.full((B,), pre_dist_const, dtype=np.float32) - delta_batch = rng.normal(0.0, std, size=(B,)).astype(np.float32) + delta_batch = rng.normal(0.0, std, size=(B,)).astype(np.float32) info = self.build_grasp_info(grasp_pos_w_batch, grasp_quat_w_batch, pre_dist_batch, delta_batch) + # Execute grasp trial for ALL envs ok_batch, score_batch = self.execute_and_lift_once_batch(info) # Check if motion planning returned valid results @@ -429,36 +468,60 @@ def grasp_trials(self, gg, std: float = 0.0005, max_retries: int = 3): print(f"[CRITICAL] Motion planning failed during grasp trial at block[{start}:{start+B}]") raise RuntimeError("GraspTrialMotionPlanningFailure") - ok_cnt = int(ok_batch.sum()) - print(f"[SEARCH] block[{start}:{start+B}] -> success {ok_cnt}/{B}") + # Update success status ONLY for envs that were still searching + for b in range(B): + if not env_success[b] and not env_skip[b]: # Only update if active and not already successful + if ok_batch[b] and score_batch[b] > env_best_score[b]: + # This env found a better grasp + env_success[b] = True + env_chosen_pose_w[b] = (grasp_pos_w_batch[b], grasp_quat_w_batch[b]) + env_chosen_pre[b] = pre_dist_batch[b] + env_chosen_delta[b] = delta_batch[b] + env_best_score[b] = score_batch[b] + print(f"[SUCCESS] Env {b} found valid grasp (score: {score_batch[b]:.2f})") + + # Check if all active envs now succeeded + active_mask = ~env_skip + newly_successful = np.sum(env_success[active_failed_mask]) + print(f"[SEARCH] block[{start}:{start+B}] -> {newly_successful}/{n_failed} previously-failed envs now successful") - if ok_cnt > 0: - winner = int(np.argmax(score_batch)) - chosen_pose_w = (grasp_pos_w_batch[winner], grasp_quat_w_batch[winner]) - chosen_pre = float(pre_dist_batch[winner]) - chosen_delta = float(delta_batch[winner]) - success = True - print(f"[SUCCESS] Found valid grasp on attempt {retry_attempt + 1}") + if np.all(env_success[active_mask]): + successful_envs = np.where(env_success & ~env_skip)[0] + skipped_envs = np.where(env_skip)[0] + print(f"[SUCCESS] All active environments found valid grasps on attempt {retry_attempt + 1}!") + if len(skipped_envs) > 0: + print(f"[INFO] {len(skipped_envs)} environments skipped: {skipped_envs.tolist()}") return { - "success": success, - "chosen_pose_w": chosen_pose_w, - "chosen_pre": chosen_pre, - "chosen_delta": chosen_delta, + "success": env_success, + "chosen_pose_w": env_chosen_pose_w, + "chosen_pre": env_chosen_pre, + "chosen_delta": env_chosen_delta, + "skip_envs": env_skip, } - - # If we get here, no grasp succeeded in this attempt - if not success: - print(f"[WARN] No proposal succeeded in grasp trial attempt {retry_attempt + 1}/{max_retries}") + + # End of this attempt - check status + active_mask = ~env_skip + if not np.all(env_success[active_mask]): + still_failed = np.where((~env_success) & (~env_skip))[0] + print(f"[WARN] {len(still_failed)} envs still need grasps after attempt {retry_attempt + 1}: {still_failed.tolist()}") if retry_attempt < max_retries - 1: - print("[INFO] Retrying grasp selection...") + print("[INFO] Retrying grasp selection for failed environments...") continue else: - print("[ERR] No proposal succeeded to lift after all grasp trial attempts.") + # Last attempt exhausted - mark remaining failed envs as skipped + print(f"[WARN] Exhausted all {max_retries} grasp attempts") + print(f"[INFO] Marking {len(still_failed)} failed environments to skip: {still_failed.tolist()}") + env_skip[still_failed] = True + + successful_envs = np.where(env_success)[0] + print(f"[INFO] Proceeding with {len(successful_envs)} successful environments: {successful_envs.tolist()}") + return { - "success": False, - "chosen_pose_w": None, - "chosen_pre": None, - "chosen_delta": None, + "success": env_success, + "chosen_pose_w": env_chosen_pose_w, + "chosen_pre": env_chosen_pre, + "chosen_delta": env_chosen_delta, + "skip_envs": env_skip, } except RuntimeError as e: @@ -473,14 +536,19 @@ def grasp_trials(self, gg, std: float = 0.0005, max_retries: int = 3): continue else: print("[ERR] Grasp trial failed after all retry attempts") + # Mark all currently failed envs as skipped + failed_envs = np.where((~env_success) & (~env_skip))[0] + if len(failed_envs) > 0: + print(f"[INFO] Marking {len(failed_envs)} failed environments to skip: {failed_envs.tolist()}") + env_skip[failed_envs] = True return { - "success": False, - "chosen_pose_w": None, - "chosen_pre": None, - "chosen_delta": None, + "success": env_success, + "chosen_pose_w": env_chosen_pose_w, + "chosen_pre": env_chosen_pre, + "chosen_delta": env_chosen_delta, + "skip_envs": env_skip, } else: - # Unknown error, re-raise raise except Exception as e: @@ -490,20 +558,28 @@ def grasp_trials(self, gg, std: float = 0.0005, max_retries: int = 3): self.clear_data() continue else: + # Mark all currently failed envs as skipped + failed_envs = np.where((~env_success) & (~env_skip))[0] + if len(failed_envs) > 0: + print(f"[INFO] Marking {len(failed_envs)} failed environments to skip: {failed_envs.tolist()}") + env_skip[failed_envs] = True return { - "success": False, - "chosen_pose_w": None, - "chosen_pre": None, - "chosen_delta": None, + "success": env_success, + "chosen_pose_w": env_chosen_pose_w, + "chosen_pre": env_chosen_pre, + "chosen_delta": env_chosen_delta, + "skip_envs": env_skip, } - # Fallback (should not reach here) + # Return final status return { - "success": False, - "chosen_pose_w": None, - "chosen_pre": None, - "chosen_delta": None, + "success": env_success, + "chosen_pose_w": env_chosen_pose_w, + "chosen_pre": env_chosen_pre, + "chosen_delta": env_chosen_delta, + "skip_envs": env_skip, } + def is_success(self, position_threshold: float = 0.05) -> bool: """ @@ -557,16 +633,98 @@ def is_success(self, position_threshold: float = 0.05) -> bool: print("="*50 + "\n") return all_success + + def save_data_selective(self, skip_envs: np.ndarray, ignore_keys: List[str] = []): + """ + Save data only for non-skipped environments. + + Args: + skip_envs: Boolean array [B] indicating which envs to skip + ignore_keys: List of keys to ignore during save + """ + save_root = self._demo_dir() + save_root.mkdir(parents=True, exist_ok=True) + + stacked = {k: np.array(v) for k, v in self.save_dict.items()} + + active_envs = np.where(~skip_envs)[0] + print(f"[INFO] Saving data for {len(active_envs)} active environments: {active_envs.tolist()}") + + for b in active_envs: + env_dir = self._env_dir(save_root, b) + for key, arr in stacked.items(): + if key in ignore_keys: + continue + if key == "rgb": + video_path = env_dir / "sim_video.mp4" + writer = imageio.get_writer( + video_path, fps=50, macro_block_size=None + ) + for t in range(arr.shape[0]): + writer.append_data(arr[t, b]) + writer.close() + elif key == "segmask": + video_path = env_dir / "mask_video.mp4" + writer = imageio.get_writer( + video_path, fps=50, macro_block_size=None + ) + for t in range(arr.shape[0]): + writer.append_data((arr[t, b].astype(np.uint8) * 255)) + writer.close() + elif key == "depth": + depth_seq = arr[:, b] + flat = depth_seq[depth_seq > 0] + max_depth = np.percentile(flat, 99) if flat.size > 0 else 1.0 + depth_norm = np.clip(depth_seq / max_depth * 255.0, 0, 255).astype( + np.uint8 + ) + video_path = env_dir / "depth_video.mp4" + writer = imageio.get_writer( + video_path, fps=50, macro_block_size=None + ) + for t in range(depth_norm.shape[0]): + writer.append_data(depth_norm[t]) + writer.close() + np.save(env_dir / f"{key}.npy", depth_seq) + else: + np.save(env_dir / f"{key}.npy", arr[:, b]) + json.dump(self.sim_cfgs, open(env_dir / "config.json", "w"), indent=2) + + print("[INFO]: Demonstration is saved at: ", save_root) + + # Compose real videos only for active environments + print("\n[INFO] Composing real videos with background...") + for b in active_envs: + print(f"[INFO] Processing environment {b}/{len(active_envs)}...") + success = self.compose_real_video(env_id=b) + if success: + print(f"[INFO] Real video composed successfully for env {b}") + else: + print(f"[WARN] Failed to compose real video for env {b}") + + demo_root = self.out_dir / "all_demos" + demo_root.mkdir(parents=True, exist_ok=True) + total_demo_id = get_next_demo_id(demo_root) + demo_save_path = demo_root / f"demo_{total_demo_id}" + demo_save_path.mkdir(parents=True, exist_ok=True) + meta_info = { + "path": str(save_root), + "fps": 50, + "active_envs": active_envs.tolist(), + "skipped_envs": np.where(skip_envs)[0].tolist(), + } + with open(demo_save_path / "meta_info.json", "w") as f: + json.dump(meta_info, f) + os.system(f"cp -r {save_root}/* {demo_save_path}") + print("[INFO]: Demonstration is saved at: ", demo_save_path) def inference(self, std: float = 0.0, max_restart_attempts: int = 10, - max_grasp_retries: int = 3, regrasp_after_failures: int = 3) -> bool: + max_grasp_retries: int = 1, regrasp_after_failures: int = 1) -> bool: """ - Main function with automatic restart on motion planning failures. - Now has separate retry loops for: - 1. Grasp selection phase (max_grasp_retries) - 2. Trajectory following phase (max_restart_attempts) - 3. Re-grasp after N consecutive trajectory failures (regrasp_after_failures) + Main function with async per-env grasp selection and trajectory following. + Each environment progresses independently through grasp selection, and only + moves to trajectory following once ALL envs have found successful grasps. """ B = self.scene.num_envs @@ -578,59 +736,94 @@ def inference(self, std: float = 0.0, max_restart_attempts: int = 10, gg = GraspGroup().from_npy(npy_file_path=npy_path) gg = get_best_grasp_with_hints(gg, point=None, direction=[0, 0, -1]) - # Track consecutive trajectory failures for re-grasping - consecutive_trajectory_failures = 0 + # Track per-environment state + env_grasp_success = np.zeros(B, dtype=bool) + env_traj_success = np.zeros(B, dtype=bool) + env_consecutive_failures = np.zeros(B, dtype=int) + env_skip = np.zeros(B, dtype=bool) # Track envs to skip entirely + total_grasp_attempts = 0 - max_total_grasp_attempts = 5 # Prevent infinite re-grasping loop + max_total_grasp_attempts = 5 # ========== OUTER LOOP: RE-GRASP IF NEEDED ========== while total_grasp_attempts < max_total_grasp_attempts: total_grasp_attempts += 1 - # ========== PHASE 1: GRASP SELECTION (with retries) ========== + # ========== PHASE 1: ASYNC GRASP SELECTION ========== if self.grasp_idx >= 0: - # Fixed grasp index mode + # Fixed grasp index mode - all envs use same grasp if self.grasp_idx >= len(gg): print(f"[ERR] grasp_idx {self.grasp_idx} out of range [0,{len(gg)})") return False print(f"[INFO] using fixed grasp index {self.grasp_idx} for all envs.") p_w, q_w = grasp_to_world(gg[int(self.grasp_idx)]) - ret = { - "success": True, - "chosen_pose_w": (p_w.astype(np.float32), q_w.astype(np.float32)), - "chosen_pre": self.grasp_pre if self.grasp_pre is not None else 0.12, - "chosen_delta": self.grasp_delta if self.grasp_delta is not None else 0.0, - } + + # Prepare per-env format + env_grasp_success = np.ones(B, dtype=bool) + env_chosen_pose_w = [(p_w.astype(np.float32), q_w.astype(np.float32))] * B + env_chosen_pre = np.full(B, self.grasp_pre if self.grasp_pre is not None else 0.12, dtype=np.float32) + env_chosen_delta = np.full(B, self.grasp_delta if self.grasp_delta is not None else 0.0, dtype=np.float32) + env_skip = np.zeros(B, dtype=bool) else: - # Automatic grasp selection with retries + # Automatic async grasp selection if total_grasp_attempts > 1: print(f"\n{'#'*60}") print(f"RE-GRASP ATTEMPT {total_grasp_attempts}/{max_total_grasp_attempts}") print(f"{'#'*60}\n") + ret = self.grasp_trials(gg, std=std, max_retries=max_grasp_retries) + + env_grasp_success = ret["success"] + env_chosen_pose_w = ret["chosen_pose_w"] + env_chosen_pre = ret["chosen_pre"] + env_chosen_delta = ret["chosen_delta"] + env_skip = ret["skip_envs"] + + # Check if any active envs found grasps + active_envs = np.where(~env_skip)[0] + successful_envs = np.where(env_grasp_success & ~env_skip)[0] + + if len(active_envs) == 0: + print("[ERR] All environments failed grasp selection and were skipped") + return False - if not ret["success"]: - print("[ERR] No successful grasp found after all attempts") + if len(successful_envs) == 0: + print(f"[ERR] None of the {len(active_envs)} active environments found grasps") return False - print(f"\n[SUCCESS] Grasp selected: delta={ret['chosen_delta']:.4f}m") + print(f"\n[SUCCESS] {len(successful_envs)}/{len(active_envs)} active environments found valid grasps!") + if len(successful_envs) < len(active_envs): + skipped_envs = np.where(env_skip)[0] + print(f"[INFO] {len(skipped_envs)} environments skipped: {skipped_envs.tolist()}") + + for b in successful_envs: + print(f" Env {b}: delta={env_chosen_delta[b]:.4f}m") - # Reset trajectory failure counter for new grasp - consecutive_trajectory_failures = 0 + # Reset trajectory failure counters for new grasps + env_consecutive_failures[:] = 0 + env_traj_success[:] = False - # ========== PHASE 2: TRAJECTORY FOLLOWING (with restarts) ========== + # ========== PHASE 2: TRAJECTORY FOLLOWING (with per-env restart) ========== for restart_attempt in range(max_restart_attempts): try: print(f"\n{'='*60}") print(f"TRAJECTORY EXECUTION ATTEMPT {restart_attempt + 1}/{max_restart_attempts}") + active_envs = np.where(~env_skip)[0] + successful_traj_envs = np.where(env_traj_success & ~env_skip)[0] + print(f"Active envs: {active_envs.tolist()}") + print(f"Successful envs: {len(successful_traj_envs)}/{len(active_envs)}") print(f"{'='*60}\n") - # Prepare grasp info - p_win, q_win = ret["chosen_pose_w"] - p_all = np.repeat(p_win.reshape(1, 3), B, axis=0) - q_all = np.repeat(q_win.reshape(1, 4), B, axis=0) - pre_all = np.full((B,), ret["chosen_pre"], dtype=np.float32) - del_all = np.full((B,), ret["chosen_delta"], dtype=np.float32) + # If all active envs succeeded in trajectory, we're done + if np.all(env_traj_success[~env_skip]): + print(f"[SUCCESS] All {len(active_envs)} active environments completed trajectories!") + break + + # Prepare grasp info for ALL envs using their individual grasps + p_all = np.stack([pose[0] for pose in env_chosen_pose_w], axis=0) # [B,3] + q_all = np.stack([pose[1] for pose in env_chosen_pose_w], axis=0) # [B,4] + pre_all = env_chosen_pre # [B] + del_all = env_chosen_delta # [B] info_all = self.build_grasp_info(p_all, q_all, pre_all, del_all) # Reset and execute: open → pre → grasp → close → follow_object_goals @@ -651,108 +844,132 @@ def inference(self, std: float = 0.0, max_restart_attempts: int = 10, self.wait(gripper_open=True, steps=4) # Pre-grasp - print("[INFO] Moving to pre-grasp position...") + print("[INFO] Moving to pre-grasp positions...") jp, success = self.move_to(info_all["pre_p_b"], info_all["pre_q_b"], gripper_open=True) if jp is None or torch.any(success == False): - print("[WARN] Pre-grasp motion failed, restarting...") - consecutive_trajectory_failures += 1 - if consecutive_trajectory_failures >= regrasp_after_failures: - break # Exit to re-grasp + print("[WARN] Pre-grasp motion failed for some envs") + failed_envs = torch.where(success == False)[0].cpu().numpy() if success is not None else np.arange(B) + # Only increment failure counter for ACTIVE envs that failed + for b in failed_envs: + if not env_skip[b]: # Only track failures for active envs + env_consecutive_failures[b] += 1 + + # Check if any ACTIVE env needs re-grasp + active_need_regrasp = np.where((env_consecutive_failures >= regrasp_after_failures) & ~env_skip)[0] + if len(active_need_regrasp) > 0: + print(f"[INFO] Active envs {active_need_regrasp.tolist()} need re-grasping") + break continue + self.save_dict["actions"].append(np.concatenate([info_all["pre_p_b"].cpu().numpy(), info_all["pre_q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1)) jp = self.wait(gripper_open=True, steps=3) # Grasp - print("[INFO] Moving to grasp position...") + print("[INFO] Moving to grasp positions...") jp, success = self.move_to(info_all["p_b"], info_all["q_b"], gripper_open=True) if jp is None or torch.any(success == False): - print("[WARN] Grasp motion failed, restarting...") - consecutive_trajectory_failures += 1 - if consecutive_trajectory_failures >= regrasp_after_failures: - break # Exit to re-grasp + print("[WARN] Grasp motion failed for some envs") + failed_envs = torch.where(success == False)[0].cpu().numpy() if success is not None else np.arange(B) + # Only increment failure counter for ACTIVE envs that failed + for b in failed_envs: + if not env_skip[b]: # Only track failures for active envs + env_consecutive_failures[b] += 1 + + # Check if any ACTIVE env needs re-grasp + active_need_regrasp = np.where((env_consecutive_failures >= regrasp_after_failures) & ~env_skip)[0] + if len(active_need_regrasp) > 0: + print(f"[INFO] Active envs {active_need_regrasp.tolist()} need re-grasping") + break continue + self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.zeros((B, 1))], axis=1)) # Close gripper - print("[INFO] Closing gripper...") + print("[INFO] Closing grippers...") jp = self.wait(gripper_open=False, steps=50) self.save_dict["actions"].append(np.concatenate([info_all["p_b"].cpu().numpy(), info_all["q_b"].cpu().numpy(), np.ones((B, 1))], axis=1)) - # Follow object trajectory - THIS CAN RAISE RuntimeError - print("[INFO] Following object trajectory...") + # Follow object trajectory + print("[INFO] Following object trajectories...") jp = self.follow_object_goals(jp, sample_step=5, visualize=True) - # If we reach here, trajectory following succeeded! - print("[SUCCESS] Trajectory completed successfully!") + # If we reach here, trajectory following succeeded for all! + print("[SUCCESS] All trajectories completed successfully!") + env_traj_success[:] = True - # Verify task completion + # Verify task completion (only for active envs) task_success = self.is_success(position_threshold=0.1) if task_success: - print("[SUCCESS] Task verification passed!") - self.save_data() + print(f"[SUCCESS] Task verification passed for all {len(active_envs)} active environments!") + self.save_data_selective(env_skip) return True else: - print("[WARN] Task verification failed, but trajectory completed") - print(f"[ERROR] Attempting restart") + print("[WARN] Task verification failed for some environments") self.clear_data() except RuntimeError as e: if "MotionPlanningFailure_RestartNeeded" in str(e): print(f"\n[RESTART] Motion planning failed mid-trajectory on attempt {restart_attempt + 1}") print("[RESTART] Object was likely dropped. Restarting from grasp phase...") - print(f"[RESTART] Remaining attempts: {max_restart_attempts - restart_attempt - 1}\n") - # Clear the corrupted trajectory data + # Increment failure counter ONLY for ACTIVE envs that haven't succeeded + for b in range(B): + if not env_traj_success[b] and not env_skip[b]: # Only active, unsuccessful envs + env_consecutive_failures[b] += 1 + self.clear_data() - consecutive_trajectory_failures += 1 - # Check if we need to re-grasp - if consecutive_trajectory_failures >= regrasp_after_failures: + # Check if any ACTIVE env needs re-grasp + active_need_regrasp = np.where((env_consecutive_failures >= regrasp_after_failures) & ~env_skip)[0] + if len(active_need_regrasp) > 0: print(f"\n{'!'*60}") - print(f"CONSECUTIVE TRAJECTORY FAILURES: {consecutive_trajectory_failures}") - print(f"RE-SELECTING GRASP (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})") + print(f"Active envs {active_need_regrasp.tolist()} need re-grasping") + print(f"RE-SELECTING GRASPS (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})") print(f"{'!'*60}\n") - break # Exit trajectory loop to re-grasp + break - # Continue to next restart attempt continue else: - # Unknown error, re-raise raise except Exception as e: print(f"[ERROR] Unexpected error during execution: {type(e).__name__}: {e}") - print(f"[ERROR] Attempting restart {restart_attempt + 1}/{max_restart_attempts}...") + + # Increment failure counter ONLY for ACTIVE envs that haven't succeeded + for b in range(B): + if not env_traj_success[b] and not env_skip[b]: # Only active, unsuccessful envs + env_consecutive_failures[b] += 1 + self.clear_data() - consecutive_trajectory_failures += 1 - # Check if we need to re-grasp - if consecutive_trajectory_failures >= regrasp_after_failures: + # Check if any ACTIVE env needs re-grasp + active_need_regrasp = np.where((env_consecutive_failures >= regrasp_after_failures) & ~env_skip)[0] + if len(active_need_regrasp) > 0: print(f"\n{'!'*60}") - print(f"CONSECUTIVE TRAJECTORY FAILURES: {consecutive_trajectory_failures}") - print(f"RE-SELECTING GRASP (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})") + print(f"Active envs {active_need_regrasp.tolist()} need re-grasping") + print(f"RE-SELECTING GRASPS (total attempt {total_grasp_attempts + 1}/{max_total_grasp_attempts})") print(f"{'!'*60}\n") - break # Exit trajectory loop to re-grasp + break continue # Check why we exited the trajectory loop - if consecutive_trajectory_failures >= regrasp_after_failures: - # We broke out to re-grasp + active_need_regrasp = np.where((env_consecutive_failures >= regrasp_after_failures) & ~env_skip)[0] + if len(active_need_regrasp) > 0: + # Some ACTIVE envs need new grasps if total_grasp_attempts >= max_total_grasp_attempts: - print(f"\n[FAILURE] Exhausted all grasp attempts ({max_total_grasp_attempts})") + print(f"\n[FAILURE] Active envs {active_need_regrasp.tolist()} exhausted all grasp attempts ({max_total_grasp_attempts})") return False - # Continue outer loop to select new grasp + # Continue outer loop to select new grasps for failed envs continue else: - # We exhausted all restart attempts without hitting re-grasp threshold + # We exhausted all restart attempts print(f"\n[FAILURE] Failed after {max_restart_attempts} trajectory restart attempts") return False # If we exit the outer loop, we exhausted all grasp attempts print(f"\n[FAILURE] Failed after {max_total_grasp_attempts} total grasp selection attempts") return False - # ───────────────────────────────────────────────────────────────────────────── Entry Point ───────────────────────────────────────────────────────────────────────────── def main(): sim_cfgs = load_sim_parameters(BASE_DIR, args_cli.key)