In [19]:
import numpy as np
from tqdm import tqdm

# Make sure this matches your worker observation builder
from hrl.worker_env import TableTennisWorker
from stable_baselines3.common.vec_env import DummyVecEnv
from config import Config

In [10]:
import numpy as np
from tqdm import tqdm
from stable_baselines3.common.vec_env import DummyVecEnv

from config import Config
from hrl.worker_env import TableTennisWorker

def reset_env(venv):
    result = venv.reset()
    return result[0] if isinstance(result, tuple) else result

def get_base_env(venv):
    """
    Unwrap DummyVecEnv / Monitor / VecNormalize
    down to the base gym env that has obs_dict.
    """
    env = venv.envs[0]
    while hasattr(env, "env"):
        env = env.env
    return env.unwrapped

def collect_relative_offsets(cfg, num_steps=30000):
    venv = DummyVecEnv([lambda: TableTennisWorker(cfg)])
    base_env = get_base_env(venv)

    dxs, dys, dzs = [], [], []
    dpxs, dpys = [], []
    dts = []

    reset_env(venv)
    for _ in tqdm(range(num_steps)):
        action = [venv.action_space.sample()]
        out = venv.step(action)

        obs_dict = base_env.obs_dict  # raw MyoSuite obs
        ball_pos = np.asarray(obs_dict["ball_pos"], np.float32)
        paddle_pos = np.asarray(obs_dict["paddle_pos"], np.float32)
        pelvis_xy = np.asarray(obs_dict["pelvis_pos"][:2], np.float32)
        t = float(obs_dict["time"])

        # Relative differences
        dx, dy, dz = paddle_pos - ball_pos
        dpx, dpy = pelvis_xy - ball_pos[:2]

        dxs.append(dx)
        dys.append(dy)
        dzs.append(dz)
        dpxs.append(dpx)
        dpys.append(dpy)
        dts.append(t)

        done = bool(out[2][0] or out[3][0])
        if done:
            reset_env(venv)

    venv.close()

    return {
        "dx": np.array(dxs),
        "dy": np.array(dys),
        "dz": np.array(dzs),
        "dpx": np.array(dpxs),
        "dpy": np.array(dpys),
        "dt": np.array(dts),
    }

def suggest_goal_bounds(data, pct_min=1, pct_max=99, slack=1.2):
    low, high = {}, {}
    for key, values in data.items():
        lo = np.percentile(values, pct_min)
        hi = np.percentile(values, pct_max)
        center = 0.5 * (lo + hi)
        width = (hi - lo) * slack
        low[key]  = center - 0.5 * width
        high[key] = center + 0.5 * width
    return low, high

if __name__ == "__main__":
    cfg = Config()
    print("Collecting relative offsets …")
    data = collect_relative_offsets(cfg, num_steps=30000)

    low, high = suggest_goal_bounds(data, pct_min=1, pct_max=99, slack=1.2)
    print("=== Suggested goal bounds ===")
    print("goal_low  =", [
        low["dx"], low["dy"], low["dz"],
        low["dpx"], low["dpy"], 0.2
    ])
    print("goal_high =", [
        high["dx"], high["dy"], high["dz"],
        high["dpx"], high["dpy"], 0.8
    ])

Collecting relative offsets …


100%|██████████| 30000/30000 [05:33<00:00, 90.02it/s]

=== Suggested goal bounds ===
goal_low  = [2.6226827461719515, 0.09191729348897937, -0.27048435640335083, 2.733618104457855, -0.573699263960123, 0.2]
goal_high = [3.141523899316788, 1.236468253314495, -0.15279872941970826, 3.25203111410141, 0.5706091401278972, 0.8]





In [None]:
from stable_baselines3.common.vec_env import DummyVecEnv
from hrl.worker_env import TableTennisWorker
from config import Config

venv = DummyVecEnv([lambda: TableTennisWorker(Config())])
obs = venv.reset()

  logger.warn(


{'time': array([0.]),
 'pelvis_pos': array([ 2.0205e+00, -6.6971e-04,  9.5000e-01]),
 'body_qpos': array([-0.4205,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    , -0.0972,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
         0.    ,  0.1368,  0.1484,  0.7069, -0.7405, -0.3432,  0.8055,
        -0.1028, -0.0829, -0.7304,  0.0632,  0.7   ,  0.075 , -0.2967,
         0.7227, -0.1361,  0.2671,  0.3535,  0.6598, -0.1518,  0.4242,
         0.5184,  0.919 , -0.2094,  0.2592,  0.5106,  0.7934, -0.2042,
         0.2278,  0.    ]),
 'body_qvel': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.]),
 'ball_pos': array([-0.9342

In [13]:
from myosuite.utils import gym


env = gym.make("myoChallengeTableTennisP2-v0", max_episode_steps=1000)
obs = env.reset()
obs, reward, terminated, truncated, info = env.step(env.action_space.sample())

In [17]:
for k, v in info['obs_dict'].items():
    try:
        print(f"{k:20s} shape={np.asarray(v).shape}")
    except Exception as e:
        print(f"{k:20s} ERROR {e}")

time                 shape=()
pelvis_pos           shape=(3,)
body_qpos            shape=(58,)
body_qvel            shape=(58,)
ball_pos             shape=(3,)
ball_vel             shape=(3,)
paddle_pos           shape=(3,)
paddle_vel           shape=(3,)
paddle_ori           shape=(4,)
padde_ori_err        shape=(4,)
reach_err            shape=(3,)
palm_pos             shape=(3,)
palm_err             shape=(3,)
touching_info        shape=(6,)
act                  shape=(273,)


In [18]:
info['obs_dict']['time']

array(0.01)