In [1]:
from pathlib import Path
model_ckpt = Path("/rl_benchmark/real_robot/model_checkpoints/XArm_pretrained_ckpt") \
             / "PlaceCubeInBowlXArm-v5/sac/0-nobboxobs-eepctl-tongzhou8s4u1p-g90/models/model_3200000.ckpt"
model_ckpt

PosixPath('/rl_benchmark/real_robot/model_checkpoints/XArm_pretrained_ckpt/PlaceCubeInBowlXArm-v5/sac/0-nobboxobs-eepctl-tongzhou8s4u1p-g90/models/model_3200000.ckpt')

In [2]:
# cmdline args
args = [
    '/rl_benchmark/pyrl/configs/mfrl/sac/maniskill2/maniskill2_state.py',
    '--eval',
    '--cfg-options',
    "env_cfg.env_name=PlaceCubeInBowlXArm-v5",
    "env_cfg.remove_obs_extra=cube_bbox,bowl_bbox",
    "env_cfg.control_mode=pd_ee_delta_pos",
    "env_cfg.horizon=50",
    
    "agent_cfg.actor_cfg.nn_cfg.mlp_spec=obs_shape,256,256,256,action_shape*2",
    "agent_cfg.critic_cfg.nn_cfg.mlp_spec=obs_shape+action_shape,256,256,256,1",
    "agent_cfg.actor_cfg.head_cfg.log_std_clip_tanh=True",
    "agent_cfg.actor_cfg.head_cfg.log_std_bound=-5,2",
    "agent_cfg.gamma=0.9",
]

In [3]:
import open3d as o3d

def pyrl_init(args):

    # Imports
    import argparse
    import glob
    import os
    import os.path as osp
    import shutil
    import time
    import warnings
    from copy import deepcopy
    from pathlib import Path
    import gym
    import numpy as np

    from pyrl.utils.meta import (
        Config,
        DictAction,
        add_dist_var,
        add_env_var,
        colored_print,
        get_logger,
        is_debug_mode,
        set_cpu_random_seed,
        log_meta_info,
        get_total_memory,
    )

    warnings.simplefilter(action="ignore")

    from pyrl.utils.data import is_not_null, is_null

    def parse_args(cmd_args=[]):
        parser = argparse.ArgumentParser(description="Unified API for Training and Evaluation")
        # Configurations
        parser.add_argument("config", help="Configuration file path")
        parser.add_argument(
            "--cfg-options",
            "--opt",
            nargs="+",
            action=DictAction,
            help="Override some settings in the configuration file. The key-value pair "
            "in xxx=yyy format will be merged into config file. If the value to "
            'be overridden is a list, it should be like key="[a,b]" or key=a,b '
            'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
            "Note that the quotation marks are necessary and that no white space "
            "is allowed.",
        )
        parser.add_argument("--debug", action="store_true", default=False)

        # Parameters for log dir
        parser.add_argument("--work-dir", help="The directory to save logs and models")
        parser.add_argument("--dev", action="store_true", default=False, help="Add timestamp to the name of work-dir")
        parser.add_argument("--with-agent-type", default=False, action="store_true", help="Add agent type to work-dir")
        parser.add_argument(
            "--agent-type-first",
            default=False,
            action="store_true",
            help="When work-dir is None, we will use agent_type/config_name or config_name/agent_type as work-dir",
        )
        parser.add_argument("--clean-up", help="Clean up the work-dir", action="store_true")

        # Evaluation mode
        parser.add_argument("--evaluation", "--eval", help="Evaluate a model, instead of training it", action="store_true")
        parser.add_argument("--reg-loss", help="Measure regression loss during evaluation", action="store_true")
        parser.add_argument("--test-name", help="Subdirectory name under work-dir to save the test result (if None, use {work-dir}/test)", default=None)

        # Resume checkpoint model
        parser.add_argument("--resume-from", default=None, nargs="+", help="A specific checkpoint file to resume from")
        parser.add_argument(
            "--auto-resume",
            help="Auto-resume the checkpoint under work-dir. If --resume-from is not specified, --auto-resume is set to True",
            action="store_true",
        )
        parser.add_argument("--resume-keys-map", default=None, nargs="+", action=DictAction, help="Specify how to change the model keys in checkpoints")

        # Specify GPU
        group_gpus = parser.add_mutually_exclusive_group()
        group_gpus.add_argument("--num-cpus", default=None, type=int, help="Number of gpus to use")
        group_gpus.add_argument("--num-gpus", default=None, type=int, help="Number of gpus to use")
        group_gpus.add_argument("--gpu-ids", default=None, type=int, nargs="+", help="ids of gpus to use")
        parser.add_argument(
            "--env-gpu-ids", default=None, type=int, nargs="+", help="ids of gpus for environment simulation; if not specified, this equals --gpu-ids"
        )

        # Torch and reproducibility settings
        parser.add_argument("--seed", type=int, default=None, help="Set torch and numpy random seed")
        parser.add_argument("--cudnn-benchmark", action="store_true", help="Whether to use benchmark mode in cudnn.")

        parser.add_argument("--deterministic", action="store_true", help="Whether to use deterministic mode for torch.")
        parser.add_argument(
            "--reproducible", action="store_true", help="Use deterministic mode also, the program will check the if the code is committed with git!"
        )

        # Distributed parameters
        # parser.add_argument('--launcher', choices=['none', 'pytorch', 'slurm', 'mpi'], default='none', help='job launcher')
        # parser.add_argument('--local-rank', type=int, default=0)
        args = parser.parse_args(cmd_args)

        # Merge cfg with args.cfg_options
        cfg = Config.fromfile(args.config)
        if args.cfg_options is not None:
            for key, value in args.cfg_options.items():
                try:
                    value = eval(value)
                    args.cfg_options[key] = value
                except:
                    pass
            cfg.merge_from_dict(args.cfg_options)

        args.with_agent_type = args.with_agent_type or args.agent_type_first
        for key in ["work_dir", "env_cfg", "resume_from", "eval_cfg", "replay_cfg", "expert_replay_cfg", "recent_traj_replay_cfg", "rollout_cfg"]:
            cfg[key] = cfg.get(key, None)
        if args.debug:
            os.environ["PYRL_DEBUG"] = "True"
        elif "PYRL_DEBUG" not in os.environ:
            os.environ["PYRL_DEBUG"] = "False"
        if args.seed is None:
            args.seed = np.random.randint(2**32 - int(1e8))
            args.deterministic = args.deterministic
        else:
            args.deterministic = True

        if args.reproducible:
            args.deterministic = True

        if args.evaluation:
            from pyrl.methods.builder import SL

            args.reg_loss = args.reg_loss or cfg.agent_cfg.type in SL
        args.mode = "eval" if args.evaluation else "train"

        if args.num_cpus:
            pass
        return args, cfg

    def get_python_env_info():
        if is_not_null(args.num_gpus) and is_not_null(args.gpu_ids):
            colored_print("Please use either 'num-gpus' or 'gpu-ids'!", level="error")
            exit(0)

        if is_not_null(args.num_gpus):
            args.gpu_ids = list(range(args.num_gpus))
            args.num_gpus = None
        if args.gpu_ids is None:
            args.gpu_ids = []

        if args.evaluation and len(args.gpu_ids) > 1:
            colored_print(f"Multiple GPU evaluation is not supported; we will use the first GPU to do evaluation!", level="warning")
            args.gpu_ids = args.gpu_ids[:1]

    def build_work_dir():
        if is_null(args.work_dir):
            root_dir = "./work_dirs"
            env_name = cfg.env_cfg.get("env_name", None) if is_not_null(cfg.env_cfg) else None
            config_name = osp.splitext(osp.basename(args.config))[0]
            folder_name = env_name if is_not_null(env_name) else config_name
            if args.with_agent_type:
                if args.agent_type_first:
                    args.work_dir = osp.join(root_dir, agent_type, folder_name)
                else:
                    args.work_dir = osp.join(root_dir, folder_name, agent_type)
            else:
                args.work_dir = osp.join(root_dir, folder_name)
        elif args.with_agent_type:
            if args.agent_type_first:
                colored_print("When you specify the work dir path, the agent type cannot be at the beginning of the path!", level="warning")
            args.work_dir = osp.join(args.work_dir, agent_type)

        if args.dev:
            splits = list(osp.split(args.work_dir))
            splits[1] += "-dev"
            args.work_dir = osp.join(*splits)
            args.work_dir = osp.join(args.work_dir, args.timestamp)

        if args.clean_up:
            if args.evaluation or args.auto_resume or (is_not_null(args.resume_from) and os.path.commonprefix(args.resume_from) == args.work_dir):
                colored_print(
                    "We will ignore the clean-up flag, since we are either in the evaluation mode or resuming from the directory!", level="warning"
                )
            else:
                shutil.rmtree(args.work_dir, ignore_errors=True)
        os.makedirs(osp.abspath(args.work_dir), exist_ok=True)
        
    def find_checkpoint():
        logger = get_logger()
        if is_not_null(args.resume_from):
            if is_not_null(cfg.resume_from):
                colored_print(f"The resumed checkpoint from the config file is overwritten by {args.resume_from}!", level="warning")
            cfg.resume_from = args.resume_from

        if args.auto_resume or (args.evaluation and is_null(cfg.resume_from)):
            logger.info(f"Search model under {args.work_dir}.")
            model_names = list(glob.glob(osp.join(args.work_dir, "models", "*.ckpt")))
            latest_index = -1
            latest_name = None
            for model_i in model_names:
                index_str = osp.basename(model_i).split(".")[0].split("_")[1]
                if index_str == "final":
                    continue
                index = eval(index_str)
                if index > latest_index:
                    latest_index = index
                    latest_name = model_i

            if is_null(latest_name):
                colored_print(f"Find no checkpoints under {args.work_dir}!", level="warning")
            else:
                cfg.resume_from = latest_name
                cfg.train_cfg["resume_steps"] = latest_index
        if is_not_null(cfg.resume_from):
            if isinstance(cfg.resume_from, str):
                cfg.resume_from = [
                    cfg.resume_from,
                ]
            logger.info(f"Get {len(cfg.resume_from)} checkpoint {cfg.resume_from}.")
            logger.info(f"Check checkpoint {cfg.resume_from}!")

            for file in cfg.resume_from:
                if not (osp.exists(file) and osp.isfile(file)):
                    logger.error(f"Checkpoint file {file} does not exist!")
                    exit(-1)
                    
    # Remove mujoco_py lock
    mjpy_lock = Path(gym.__file__).parent.parent / "mujoco_py/generated/mujocopy-buildlock.lock"
    if mjpy_lock.exists():
        os.remove(str(mjpy_lock))

    add_env_var()

    args, cfg = parse_args(args)
    args.timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime())
    agent_type = cfg.agent_cfg.type

    if args.reproducible:
        from pyrl.utils.meta import check_reproducibility

        check_reproducibility()

    get_python_env_info()

    build_work_dir()
    find_checkpoint()

    work_dir = args.work_dir
    if args.evaluation:
        test_name = args.test_name if args.test_name is not None else "test"
        work_dir = osp.join(work_dir, test_name)
        # Always clean up for evaluation
        shutil.rmtree(work_dir, ignore_errors=True)
        os.makedirs(work_dir, exist_ok=True)
    args.work_dir = work_dir

    logger_name = cfg.env_cfg.env_name if is_not_null(cfg.env_cfg) else cfg.agent_cfg.type
    args.name_suffix = f"{args.mode}"
    if args.test_name is not None:
        args.name_suffix += f"-{args.test_name}"
    os.environ["PYRL_LOGGER_NAME"] = f"{logger_name}-{args.name_suffix}"
    cfg.dump(osp.join(work_dir, f"{args.timestamp}-{args.name_suffix}.py"))

    import numpy as np
    rank, world_size = 0, 1

    args.seed += rank

    add_dist_var(rank, world_size)
    set_cpu_random_seed(args.seed)
    np.set_printoptions(precision=5, suppress=True)

    if is_not_null(cfg.env_cfg) and len(args.gpu_ids) > 0:
        if args.env_gpu_ids is not None:
            assert len(args.env_gpu_ids) == len(args.gpu_ids), "Number of simulation gpus should be the same as the number of training gpus!"
        else:
            args.env_gpu_ids = args.gpu_ids
        cfg.env_cfg.device = f"cuda:{args.env_gpu_ids[rank]}"

    work_dir = args.work_dir
    logger_file = osp.join(work_dir, f"{args.timestamp}-{args.name_suffix}.log")
    logger = get_logger(name=None, log_file=logger_file, log_level=cfg.get("log_level", "INFO"))

    if is_debug_mode():
        dash_line = "-" * 60 + "\n"
        logger.info("Environment info:\n" + dash_line + args.env_info + "\n" + dash_line)

    if args.cfg_options is not None:
        logger.info(f"Extra arguments that replace the default setting from the config file.")
        for key, value in args.cfg_options.items():
            logger.info(f"{key} {value}")

    logger.info(f"Config:\n{cfg.pretty_text}")
    logger.info(f"Set random seed to {args.seed}")

    # Create replay buffer for RL
    if is_not_null(cfg.replay_cfg) and (not args.evaluation or (args.reg_loss and cfg.replay_cfg.get("buffer_filenames", None) is not None)):
        logger.info(f"Build replay buffer!")
        from pyrl.env import build_replay

        replay = build_replay(cfg.replay_cfg)
        expert_replay, recent_traj_replay = None, None
        if is_not_null(cfg.expert_replay_cfg):
            assert cfg.expert_replay_cfg.buffer_filenames is not None
            expert_replay = build_replay(cfg.expert_replay_cfg)
        if is_not_null(cfg.recent_traj_replay_cfg):
            recent_traj_replay = build_replay(cfg.recent_traj_replay_cfg)
    else:
        replay = None
        expert_replay = None
        recent_traj_replay = None

    # Create rollout module for online methods
    if not args.evaluation and is_not_null(cfg.rollout_cfg):
        from pyrl.env import build_rollout

        logger.info(f"Build rollout! Total memory before build rollout: {get_total_memory()}!")
        rollout_cfg = cfg.rollout_cfg
        rollout_cfg["env_cfg"] = deepcopy(cfg.env_cfg)
        rollout = build_rollout(rollout_cfg)
    else:
        rollout = None

    # Build evaluation module
    if is_not_null(cfg.eval_cfg) and rank == 0:
        # Only the first process will do evaluation
        from pyrl.env import build_evaluation

        logger.info(f"Build evaluation!")
        eval_cfg = cfg.eval_cfg
        # Evaluation environment setup can be different from the training setup. (Like early-stop or object sets)
        if eval_cfg.get("env_cfg", None) is None:
            eval_cfg["env_cfg"] = deepcopy(cfg.env_cfg)
        else:
            tmp = eval_cfg["env_cfg"]
            eval_cfg["env_cfg"] = deepcopy(cfg.env_cfg)
            eval_cfg["env_cfg"].update(tmp)
        get_logger().info(f"Building evaluation: eval_cfg: {eval_cfg}")
        evaluator = build_evaluation(eval_cfg)
    else:
        evaluator = None

    # Get environments information for agents
    obs_shape, action_shape = None, None
    if is_not_null(cfg.env_cfg):
        # For RL which needs environments
        logger.info(f"Get obs shape!")
        from pyrl.env import get_env_info

        if rollout is not None:
            env_params = get_env_info(cfg.env_cfg, rollout.vec_env)
        elif hasattr(evaluator, "vec_env"):
            env_params = get_env_info(cfg.env_cfg, evaluator.vec_env)
        else:
            env_params = get_env_info(cfg.env_cfg)
        cfg.agent_cfg["env_params"] = env_params
        obs_shape = env_params["obs_shape"]
        action_shape = env_params["action_shape"]
        logger.info(f'State shape:{env_params["obs_shape"]}, action shape:{env_params["action_shape"]}')
        logger.info(env_params["message"])
    elif is_not_null(replay):
        obs_shape = None
        for obs_key in ["inputs", "obs"]:
            if obs_key in replay.memory:
                obs_shape = replay.memory.slice(0).shape[obs_key]
                break

    if is_not_null(obs_shape) or is_not_null(action_shape):
        from pyrl.networks.utils import get_kwargs_from_shape, replace_placeholder_with_args

        replaceable_kwargs = get_kwargs_from_shape(obs_shape, action_shape)
        cfg = replace_placeholder_with_args(cfg, **replaceable_kwargs)
    from pyrl.methods.mfrl.constraint import get_kwargs_for_constraint
    cfg = replace_placeholder_with_args(cfg, **get_kwargs_for_constraint(cfg))
    logger.info(f"Final agent config:\n{cfg.agent_cfg}")

    # Output version of important packages
    log_meta_info(logger)

    from pyrl.methods.builder import MPC

    # if cfg.agent_cfg.type in MPC:
    #     main_mpc(rollout, evaluator, cfg)
    # else:
    #     main_rl(rollout, evaluator, replay, args, cfg, expert_replay=expert_replay, recent_traj_replay=recent_traj_replay)

    # if rank == 0:
    #     env_info_dict = collect_env()
    #     args.env_info = "\n".join([f"{k}: {v}" for k, v in env_info_dict.items()])

    # if is_not_null(evaluator):
    #     evaluator.close()
    #     logger.info("Close evaluator object")
    # if is_not_null(rollout):
    #     rollout.close()
    #     logger.info("Close rollout object")
    # if is_not_null(replay):
    #     replay.close()
    #     logger.info("Delete replay buffer")

    from pyrl.methods.builder import build_agent
    agent = build_agent(cfg.agent_cfg)
    agent = agent.to("cuda")
    return agent, rollout, evaluator, replay

agent, rollout, evaluator, replay = pyrl_init(args)
# trajectories = rollout.forward_with_policy(None, 5000, replay=replay)
# memory = replay
# self = agent

# sampled_batch = memory.sample(self.batch_size).to_torch(
#     device=self.device, non_blocking=True
# )
# sampled_batch = self.process_obs(sampled_batch)

# prev_actions = sampled_batch["prev_actions"]
# obs, actions = sampled_batch["obs"], sampled_batch["actions"]
# rewards, dones = sampled_batch["rewards"], sampled_batch["dones"].float()
# next_obs, infos = sampled_batch["next_obs"], sampled_batch["infos"]

#ret = rollout.vec_env.step_random_actions(10)
#ret

#trajectories = rollout.forward_with_policy(None, 50, replay=replay)

from pyrl.utils.torch import BaseAgent, load_checkpoint
print("Resume agent with checkpoint!")
load_checkpoint(agent, model_ckpt, "cuda")

agent = agent.eval()

print("Done")

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
torchsparse is not installed correctly!
No module named 'torchsparse'
Pointnet++ is not compiled
cannot import name 'ball_query_ext' from partially initialized module 'pyrl.utils.cpp_ops.ops_3d.ball_query' (most likely due to a circular import) (/usr/local/lib/python3.10/dist-packages/pyrl/utils/cpp_ops/ops_3d/ball_query/__init__.py)
Piontnet++ is not supported
cannot import name 'PointFPModule' from 'pyrl.networks.modules' (/usr/local/lib/python3.10/dist-packages/pyrl/networks/modules/__init__.py)
SparseConv is not supported
No module named 'torchsparse'


[38;21mpyrl - (3636172862.py:195) - INFO - 2023-05-19,11:18:35 - Search model under ./work_dirs/PlaceCubeInBowlXArm-v5.[0m
[38;21mPlaceCubeInBowlXArm-v5-eval - (3636172862.py:288) - INFO - 2023-05-19,11:18:35 - Extra arguments that replace the default setting from the config file.[0m
[38;21mPlaceCubeInBowlXArm-v5-eval - (3636172862.py:290) - INFO - 2023-05-19,11:18:35 - env_cfg.env_name PlaceCubeInBowlXArm-v5[0m
[38;21mPlaceCubeInBowlXArm-v5-eval - (3636172862.py:290) - INFO - 2023-05-19,11:18:35 - env_cfg.remove_obs_extra ['cube_bbox', 'bowl_bbox'][0m
[38;21mPlaceCubeInBowlXArm-v5-eval - (3636172862.py:290) - INFO - 2023-05-19,11:18:35 - env_cfg.control_mode pd_ee_delta_pos[0m
[38;21mPlaceCubeInBowlXArm-v5-eval - (3636172862.py:290) - INFO - 2023-05-19,11:18:35 - env_cfg.horizon 50[0m
[38;21mPlaceCubeInBowlXArm-v5-eval - (3636172862.py:290) - INFO - 2023-05-19,11:18:35 - agent_cfg.actor_cfg.nn_cfg.mlp_spec ['obs_shape', 256, 256, 256, 'action_shape*2'][0m
[38;21mPlaceCu



[38;21mPlaceCubeInBowlXArm-v5-eval - (3636172862.py:292) - INFO - 2023-05-19,11:18:35 - Config:
agent_cfg = dict(
    type='SAC',
    batch_size=1024,
    gamma=0.9,
    update_coeff=0.005,
    alpha=0.2,
    target_update_interval=1,
    automatic_alpha_tuning=True,
    alpha_optim_cfg=dict(type='Adam', lr=0.0003),
    actor_cfg=dict(
        type='ContinuousActor',
        head_cfg=dict(
            type='TanhGaussianHead',
            log_std_bound=[-5, 2],
            log_std_clip_tanh=True),
        nn_cfg=dict(
            type='LinearMLP',
            norm_cfg=None,
            mlp_spec=['obs_shape', 256, 256, 256, 'action_shape*2'],
            bias='auto',
            inactivated_output=True,
            dense_init_cfg=dict(type='xavier_init', gain=1, bias=0)),
        optim_cfg=dict(type='Adam', lr=0.0003)),
    critic_cfg=dict(
        type='ContinuousCritic',
        num_heads='2 + num_constraints',
        nn_cfg=dict(
            type='LinearMLP',
            norm_cfg=No

[ENV] No successful grasp pose found!
Resume agent with checkpoint!
Done


In [4]:
import numpy as np
from grounded_sam_track import GroundedSAMTrack
grounded_sam_track = GroundedSAMTrack(
    aot_max_len_long_term=2,
    predict_gap=9999,
    prompt_with_robot_arm=True,
    device="cuda:1",
)
env_object_texts = ["red cube", "green bowl"]

import pyrealsense2 as rs
from real_robot.utils.realsense import RealSenseAPI
from real_robot.utils.camera import depth2xyz, transform_points

camera_pose = np.load("/rl_benchmark/real_robot/notebooks/Tb_b2c.npy")

realsense = RealSenseAPI(preset="High Accuracy",
                         depth_option_kwargs={
                             rs.option.exposure: 1500
                         })
color_image, depth_image, intr_array = realsense.capture()

xyz_image = depth2xyz(depth_image, *intr_array)
world_xyz_image = transform_points(xyz_image, camera_pose)

world_xyz_image.shape

final text_encoder_type: bert-base-uncased


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


_IncompatibleKeys(missing_keys=[], unexpected_keys=['label_enc.weight'])

Loading GroundingDINO: Took 2.906 seconds

Loading SAM: Took 4.199 seconds


[38;21mRealSenseAPI - (realsense.py:161) - INFO - 2023-05-19 11:18 - Found Intel RealSense D435 (S/N: 146322072630 FW: 05.12.09.00 on USB 3.2)[0m
[38;21mRSDevice - (realsense.py:88) - INFO - 2023-05-19 11:18 - Loaded "High Accuracy" preset for <RSDevice: Intel RealSense D435 (S/N: 146322072630)>[0m
[38;21mRSDevice - (realsense.py:37) - INFO - 2023-05-19 11:18 - Setting Depth "option.exposure" to 1500[0m
[38;21mRealSenseAPI - (realsense.py:167) - INFO - 2023-05-19 11:18 - Loading finished: found 1 devices[0m
[38;21mRSDevice - (realsense.py:100) - INFO - 2023-05-19 11:18 - Started device <RSDevice: Intel RealSense D435 (S/N: 146322072630)> with 2 streams[0m
[38;21mRSDevice - (realsense.py:102) - INFO - 2023-05-19 11:18 - <pyrealsense2.[video_]stream_profile: Depth(0) 848x480 @ 30fps Z16>[0m
[38;21mRSDevice - (realsense.py:102) - INFO - 2023-05-19 11:18 - <pyrealsense2.[video_]stream_profile: Color(0) 848x480 @ 30fps RGB8>[0m


(480, 848, 3)

In [5]:
import numpy as np
import torch
import cv2
import open3d as o3d

from real_robot.agents.xarm import XArm7

robot = XArm7()

from pyrl.utils.lib3d import np2pcd
def _process_pts(
        pts_lst,
        voxel_downsample_size, nb_neighbors, std_ratio
    ):
        from pyrl.utils.lib3d import np2pcd

        if isinstance(pts_lst, np.ndarray):
            pts_lst = [pts_lst]

        ret_pts_lst = []
        for pts in pts_lst:
            pcd = np2pcd(pts)
            if voxel_downsample_size is not None:
                pcd = pcd.voxel_down_sample(voxel_size=voxel_downsample_size)
            pcd_filter, inlier_inds = pcd.remove_statistical_outlier(
                nb_neighbors=nb_neighbors, std_ratio=std_ratio
            )
            ret_pts_lst.append(np.asarray(pcd_filter.points))

        if len(ret_pts_lst) == 1:
            return ret_pts_lst[0]

        return ret_pts_lst

from collections import OrderedDict
def get_obs(object_filt_pcds):
    cube_pts = object_filt_pcds[env_object_texts[0]]
    bowl_pts = object_filt_pcds[env_object_texts[1]]

    # Extract cube position
    # cube_pos = np.mean(cube_pts, axis=0)
    # bowl_pos = np.mean(bowl_pts, axis=0)
    # Extract bbox from object_pts
    bowl_mins, bowl_maxs = bowl_pts.min(0), bowl_pts.max(0)
    cube_mins, cube_maxs = cube_pts.min(0), cube_pts.max(0)

    cube_pos = np.mean([cube_mins, cube_maxs], axis=0)
    bowl_pos = np.mean([bowl_mins, bowl_maxs], axis=0)

    goal_pos = bowl_pos + [0, 0, 0.05]
    tcp_pose = robot.get_tcp_pose()

    obs = OrderedDict()
    obs["agent"] = OrderedDict(
        qpos=robot.get_qpos(),
        qvel=robot.get_qvel(),
        base_pose=np.array([0., 0, 0, 1, 0, 0, 0]),
    )
    obs["extra"] = OrderedDict(
        tcp_pose=tcp_pose,
        goal_pos=goal_pos,
        tcp_to_goal_pos=goal_pos - tcp_pose[:3],
        cube_pose=np.hstack([cube_pos, [1, 0, 0, 0]]),
        tcp_to_cube_pos=cube_pos - tcp_pose[:3],
        cube_to_goal_pos=goal_pos - cube_pos,
        bowl_pose=np.hstack([bowl_pos, [1, 0, 0, 0]]),
        tcp_to_bowl_pos=bowl_pos - tcp_pose[:3],
        cube_to_bowl_pos=bowl_pos - cube_pos,
    )

    with np.printoptions(suppress=True, precision=3):
        for k, v in obs.items():
            if isinstance(v, dict):
                for kk, vv in v.items():
                    print(f"{kk}: {vv}")

    from mani_skill2.utils.common import flatten_state_dict
    obs = flatten_state_dict(obs)
    return obs

cv2.namedWindow("Color / Depth")
cv2.imshow("Color / Depth", color_image)
cv2.waitKey(1)

voxel_downsample_size, nb_neighbors, std_ratio = 0.005, 20, 0.005

pcd_vis = o3d.visualization.Visualizer()
pcd_vis.create_window("Point Cloud", width=1280, height=720)
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(world_xyz_image.reshape(-1, 3))
pcd.colors = o3d.utility.Vector3dVector(color_image.reshape(-1, 3) / 255.0)
coord_frame = o3d.geometry.TriangleMesh().create_coordinate_frame()
pcd_vis.add_geometry(coord_frame)
pcd_vis.add_geometry(pcd)


frame_i = 0
ret_dict = grounded_sam_track.predict_and_track_batch([color_image], [frame_i], env_object_texts)
ret_dict["pred_masks"] = np.stack(ret_dict["pred_masks"], axis=0)
pred_masks = ret_dict["pred_masks"][0]  # [H, W]

object_pcds = {}
object_filt_pcds = {}
for i, object_text in enumerate(env_object_texts):
    object_pcd = world_xyz_image[pred_masks == i+1]
    object_pcds[object_text] = object_pcd
    object_filt_pcds[object_text] = _process_pts(
        object_pcd, voxel_downsample_size, nb_neighbors, std_ratio
    )

cube_aabb = np2pcd(object_filt_pcds["red cube"]).get_axis_aligned_bounding_box()
bowl_aabb = np2pcd(object_filt_pcds["green bowl"]).get_axis_aligned_bounding_box()
pcd_vis.add_geometry(cube_aabb)
pcd_vis.add_geometry(bowl_aabb)

obs = get_obs(object_filt_pcds)
action = agent(torch.Tensor(obs)).cpu().numpy()
with np.printoptions(suppress=True, precision=3):
    print(f"\naction: {action}")
while True:
    pcd_vis.poll_events()
    pcd_vis.update_renderer()

    depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
    cv2.imshow("Color / Depth", np.hstack([cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR), depth_colormap]))

    key = cv2.waitKey(1)
    if key == 27:  # ESC
        break
    elif key == ord('s'):
        print("Stepping action ...")
        robot.set_action(action, wait=True, action_scale=50)

        color_image, depth_image, intr_array = realsense.capture()
        xyz_image = depth2xyz(depth_image, *intr_array)
        world_xyz_image = transform_points(xyz_image, camera_pose)
        pcd.points = o3d.utility.Vector3dVector(world_xyz_image.reshape(-1, 3))
        pcd.colors = o3d.utility.Vector3dVector(color_image.reshape(-1, 3) / 255.0)

        pcd_vis.update_geometry(pcd)

        frame_i += 1
        ret_dict = grounded_sam_track.predict_and_track_batch([color_image], [frame_i], env_object_texts)
        ret_dict["pred_masks"] = np.stack(ret_dict["pred_masks"], axis=0)
        pred_masks = ret_dict["pred_masks"][0]  # [H, W]

        object_pcds = {}
        object_filt_pcds = {}
        for i, object_text in enumerate(env_object_texts):
            object_pcd = world_xyz_image[pred_masks == i+1]
            object_pcds[object_text] = object_pcd
            object_filt_pcds[object_text] = _process_pts(
                object_pcd, voxel_downsample_size, nb_neighbors, std_ratio
            )

        cube_aabb_new = np2pcd(object_filt_pcds["red cube"]).get_axis_aligned_bounding_box()
        bowl_aabb_new = np2pcd(object_filt_pcds["green bowl"]).get_axis_aligned_bounding_box()
        cube_aabb.max_bound = cube_aabb_new.max_bound
        cube_aabb.min_bound = cube_aabb_new.min_bound
        bowl_aabb.max_bound = bowl_aabb_new.max_bound
        bowl_aabb.min_bound = bowl_aabb_new.min_bound
        pcd_vis.update_geometry(cube_aabb)
        pcd_vis.update_geometry(bowl_aabb)

        obs = get_obs(object_filt_pcds)
        action = agent(torch.Tensor(obs)).cpu().numpy()
        with np.printoptions(suppress=True, precision=3):
            print(f"\naction: {action}")
    elif key == ord('r'):
        robot.reset()

        color_image, depth_image, intr_array = realsense.capture()
        xyz_image = depth2xyz(depth_image, *intr_array)
        world_xyz_image = transform_points(xyz_image, camera_pose)
        pcd.points = o3d.utility.Vector3dVector(world_xyz_image.reshape(-1, 3))
        pcd.colors = o3d.utility.Vector3dVector(color_image.reshape(-1, 3) / 255.0)

        pcd_vis.update_geometry(pcd)

        frame_i += 1
        ret_dict = grounded_sam_track.predict_and_track_batch([color_image], [frame_i], env_object_texts)
        ret_dict["pred_masks"] = np.stack(ret_dict["pred_masks"], axis=0)
        pred_masks = ret_dict["pred_masks"][0]  # [H, W]

        object_pcds = {}
        object_filt_pcds = {}
        for i, object_text in enumerate(env_object_texts):
            object_pcd = world_xyz_image[pred_masks == i+1]
            object_pcds[object_text] = object_pcd
            object_filt_pcds[object_text] = _process_pts(
                object_pcd, voxel_downsample_size, nb_neighbors, std_ratio
            )

        cube_aabb_new = np2pcd(object_filt_pcds["red cube"]).get_axis_aligned_bounding_box()
        bowl_aabb_new = np2pcd(object_filt_pcds["green bowl"]).get_axis_aligned_bounding_box()
        cube_aabb.max_bound = cube_aabb_new.max_bound
        cube_aabb.min_bound = cube_aabb_new.min_bound
        bowl_aabb.max_bound = bowl_aabb_new.max_bound
        bowl_aabb.min_bound = bowl_aabb_new.min_bound
        pcd_vis.update_geometry(cube_aabb)
        pcd_vis.update_geometry(bowl_aabb)

        obs = get_obs(object_filt_pcds)
        action = agent(torch.Tensor(obs)).cpu().numpy()
        with np.printoptions(suppress=True, precision=3):
            print(f"\naction: {action}")
    elif key == ord('c'):
        robot.arm.get_err_warn_code(show=True)
        robot.arm.clean_warn()
        robot.arm.clean_error()
        robot.arm.motion_enable(enable=True)
        robot.arm.set_state(state=0)
        robot.arm.set_mode(0)

cv2.destroyAllWindows()
pcd_vis.destroy_window()
del realsense

SDK_VERSION: 1.11.6
ROBOT_IP: 192.168.1.209, VERSION: v1.12.0, PROTOCOL: V1, DETAIL: 7,7,XS1203,XX0000,v1.12.0, TYPE1300: [0, 0]
change prot_flag to 3
************* GetErrorWarnCode, Status: 0 **************
* ErrorCode: 0, Info: Normal
* WarnCode: 0, Info: Normal
**************************************************


2023-05-19 11:18:56.508943: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


qpos: [ 0.     0.     0.     1.047  0.     1.047 -1.571  0.001  0.001]
qvel: [0. 0. 0. 0. 0. 0. 0. 0. 0.]
base_pose: [0. 0. 0. 1. 0. 0. 0.]
tcp_pose: [ 0.464 -0.     0.187 -0.    -0.707 -0.707 -0.   ]
goal_pos: [ 0.566 -0.21   0.083]
tcp_to_goal_pos: [ 0.102 -0.21  -0.104]
cube_pose: [ 0.344 -0.264  0.024  1.     0.     0.     0.   ]
tcp_to_cube_pos: [-0.12  -0.264 -0.163]
cube_to_goal_pos: [0.222 0.055 0.059]
bowl_pose: [ 0.566 -0.21   0.033  1.     0.     0.     0.   ]
tcp_to_bowl_pos: [ 0.102 -0.21  -0.154]
cube_to_bowl_pos: [0.222 0.055 0.009]

action: [-0.99  -0.996 -0.187  0.142]
Stepping action ...
delta xyz [-49.51439 -49.79398  -9.35998] gripper_action 358.9745408296585
qpos: [-0.041 -0.164 -0.074  0.898 -0.014  1.061 -1.679  0.025  0.025]
qvel: [0. 0. 0. 0. 0. 0. 0. 0. 0.]
base_pose: [0. 0. 0. 1. 0. 0. 0.]
tcp_pose: [ 0.414 -0.05   0.196  0.    -0.707 -0.707  0.   ]
goal_pos: [ 0.564 -0.211  0.079]
tcp_to_goal_pos: [ 0.149 -0.162 -0.117]
cube_pose: [ 0.343 -0.264  0.023  1.  

In [9]:
robot.get_tcp_pose()

array([ 0.32831, -0.01156, -0.01011,  0.0036 , -0.7189 , -0.69509,
        0.00473], dtype=float32)

In [4]:
import mani_skill2.envs
import gym
import numpy as np
import torch

env = gym.make("PlaceCubeInBowlXArm-v5", control_mode="pd_ee_delta_pos",
                remove_obs_extra=["cube_bbox","bowl_bbox"])
env._max_episode_steps = 200



In [7]:
obs = env.reset(seed=0)

env.render("human")
env.unwrapped._viewer.toggle_pause(True)

while True:
    env.render("human")
    action = agent(torch.Tensor(obs)).cpu().numpy()
    obs, reward, done, info = env.step(action)
    print(f"Stepping with action {action}")
    if done:
        print("Success")
        break

[W] Mouse not available
Stepping with action [-0.99154 -0.99463  0.84437 -0.79757]
Stepping with action [-0.99911 -0.99794  0.71402 -0.5568 ]
Stepping with action [-0.99321 -0.9906   0.74618 -0.76542]
Stepping with action [-0.99212  0.35183  0.92736 -0.86966]
Stepping with action [-0.99995  0.73728  0.19965 -0.90442]
Stepping with action [-0.9987   0.19392  0.97342 -0.44084]
Stepping with action [-0.9753   0.22705  0.74432  0.99984]
Stepping with action [ 0.99954  0.98037 -0.99436  0.98153]
Stepping with action [ 0.99961  0.17239 -0.97624  0.99058]
Stepping with action [ 0.99844  0.08987 -0.73548  0.99543]
Stepping with action [ 0.99972 -0.45171  0.93234  0.9974 ]
Stepping with action [ 0.83907 -0.61358  0.96607 -0.28416]
Success
