In [51]:
from legged_gym.dataset.replay_buffer import ReplayBuffer
import numpy as np

def get_rb_structure(rb_path):
    print("="*50)
    print(rb_path)
    rb = ReplayBuffer.create_from_path(rb_path)
    print(rb)
    print(f"Rewards, max: {rb.meta.episode_reward[:].max()}, min: {rb.meta.episode_reward[:].min()}, mean: {rb.meta.episode_reward[:].mean()}, std: {rb.meta.episode_reward[:].std()}")
    ep_ends = rb.meta.episode_ends[:]
    ep_lengths = np.diff(ep_ends)
    print(f"Episode lengths, max: {ep_lengths.max()}, min: {ep_lengths.min()}, mean: {ep_lengths.mean()}, std: {ep_lengths.std()}")
    actions = rb.data.actions[:]
    print(f"Actions, max: {np.max(actions, axis=0)}\nmin: {np.min(actions, axis=0)}\nmean: {np.mean(actions, axis=0)}\nstd: {np.std(actions, axis=0)}")
    print("="*50)
    return rb

# get_rb_structure("dataset/collected_single/switch.zarr")
# get_rb_structure("dataset/collected_single/constant.zarr")
# # get_rb_structure("dataset/collected_large/switch.zarr")
get_rb_structure("dataset/collected_trajectories_v2/constant.zarr")
# # get_rb_structure("dataset/collected_single_short/switch.zarr")
# get_rb_structure("dataset/new_ds_test/constant.zarr")
# get_rb_structure("dataset/collected_trajectories_v2/switch.zarr")
# get_rb_structure("dataset/collected_trajectories_v2/constant.zarr")


dataset/collected_trajectories_v2/constant.zarr
/
 ├── data
 │   ├── actions (9975546, 19) float32
 │   ├── clock (9975546, 5, 2) float32
 │   ├── commands (9975546, 5, 11) float32
 │   ├── critic_obs (9975546, 321) float32
 │   ├── dones (9975546,) bool
 │   ├── proprio (9975546, 5, 63) float32
 │   ├── rewards (9975546,) float32
 │   └── root_states (9975546, 13) float32
 └── meta
     ├── episode_command (20000, 11) float32
     ├── episode_ends (20000,) int64
     └── episode_reward (20000,) float32
Rewards, max: 61.48185729980469, min: -13.266547203063965, mean: 43.4718132019043, std: 3.2603378295898438
Episode lengths, max: 499, min: 24, mean: 498.7772888644432, std: 8.773650954857128
Actions, max: [ 2.400217   2.9353268  3.5993326  6.4726024 14.767946   1.4442703
  3.4785094  3.8924353  5.9731183 15.682708   4.7368326  9.729494
  2.474783   0.9053369  0.2897315  9.024212   2.9907978  0.6573744
  0.6608664]
min: [ -1.5382621   -4.1886096   -7.7008653  -13.434552   -11.393337
  -2

/
 ├── data
 │   ├── actions (9975546, 19) float32
 │   ├── clock (9975546, 5, 2) float32
 │   ├── commands (9975546, 5, 11) float32
 │   ├── critic_obs (9975546, 321) float32
 │   ├── dones (9975546,) bool
 │   ├── proprio (9975546, 5, 63) float32
 │   ├── rewards (9975546,) float32
 │   └── root_states (9975546, 13) float32
 └── meta
     ├── episode_command (20000, 11) float32
     ├── episode_ends (20000,) int64
     └── episode_reward (20000,) float32

In [None]:
import os
import shutil
from legged_gym.dataset.replay_buffer import ReplayBuffer
import numpy as np

def dataset_process(rb):
    data = {key: rb.data[key][:] for key in rb.data.keys()}
    meta = {key: rb.meta[key][:] for key in rb.meta.keys()}
    new_data = {}
    new_data['clock'] = data['clock'][:, -1]
    new_data['commands'] = data['commands'][:, -1]
    new_data['proprio'] = data['proprio'][:, -1, :44]
    new_data['privileged'] = data['critic_obs'][:, -221-24:-221]
    new_data['terrain'] = data['critic_obs'][:, -221:]
    new_data['actions'] = data['actions']
    new_data['rewards'] = data['rewards']
    new_data['dones'] = data['dones']
    return new_data, meta
    

def dataset_convert(rb_path):
    rb = ReplayBuffer.create_from_path(rb_path)
    print(rb)
    converted_path = rb_path.replace("dataset", "converted_dataset")
    if os.path.exists(converted_path):
        shutil.rmtree(converted_path)
    os.makedirs(converted_path, exist_ok=True)
    converted_rb = ReplayBuffer.create_empty_zarr(storage=converted_path)
    print("Loading data...")
    new_data, new_meta = dataset_process(rb)
    print("Writing data...")
    converted_rb.add_chunked_data(new_data, target_chunk_bytes=128 * 1024 * 1024)
    converted_rb.add_chunked_meta(new_meta, target_chunk_bytes=64 * 1024 * 1024)
    print(converted_rb)
    assert np.all(converted_rb.data['actions'].shape == rb.data['actions'].shape)
    assert np.all(converted_rb.meta['episode_ends'].shape == rb.meta['episode_ends'].shape)
    print("Conversion completed")
    shutil.rmtree(rb_path)
    return rb

In [53]:
dataset_convert("dataset/collected_single/constant.zarr")  
dataset_convert("dataset/collected_single_short/constant.zarr")
dataset_convert("dataset/collected_trajectories_v2/constant.zarr")
dataset_convert("dataset/collected_trajectories_v2/switch.zarr")


/
 ├── data
 │   ├── actions (49970512, 19) float32
 │   ├── clock (49970512, 5, 2) float32
 │   ├── commands (49970512, 5, 11) float32
 │   ├── critic_obs (49970512, 321) float32
 │   ├── dones (49970512,) bool
 │   ├── proprio (49970512, 5, 63) float32
 │   ├── rewards (49970512,) float32
 │   └── root_states (49970512, 13) float32
 └── meta
     ├── episode_command (100000, 11) float32
     ├── episode_ends (100000,) int64
     └── episode_reward (100000,) float32


KeyboardInterrupt: 

In [None]:
from torch.utils.data import Dataset, DataLoader
from legged_gym.dataset.replay_buffer import ReplayBuffer
import numpy as np

class RBDataset(Dataset):
    def __init__(self, rb_path):
        self.rb = ReplayBuffer.create_from_path(rb_path)
        print(self.rb)
        self.data_dict = {}
        preload_keys = ["actions", "commands"]
        disk_keys = ["proprio", "critic_obs"]
        for key in preload_keys:
            self.data_dict[key] = self.rb.data[key][:]
        for key in disk_keys:
            self.data_dict[key] = self.rb.data[key]
    
    def __len__(self):
        return len(self.data_dict['actions'])
    
    def __getitem__(self, idx):
        return {key: self.data_dict[key][idx] for key in self.data_dict.keys()}

rb_dataset = RBDataset("dataset/small_chunk4kb/constant.zarr")
# print(rb_dataset[0])
dataloader = DataLoader(rb_dataset, batch_size=1024, shuffle=True, num_workers=4)
import tqdm
import time
pbar = tqdm.tqdm(dataloader)
start_time = time.time()
for idx, batch in enumerate(pbar):
    time_taken = time.time() - start_time
    pbar.set_description(f"time per batch: {time_taken / (idx + 1):.3f}s")

/
 ├── data
 │   ├── actions (125000, 19) float32
 │   ├── clock (125000, 5, 2) float32
 │   ├── commands (125000, 5, 11) float32
 │   ├── critic_obs (16000, 321) float32
 │   ├── dones (125000,) bool
 │   ├── proprio (16125, 5, 63) float32
 │   ├── rewards (125000,) float32
 │   └── root_states (125000, 13) float32
 └── meta


  0%|          | 0/123 [00:00<?, ?it/s]

  0%|          | 0/123 [00:00<?, ?it/s]


BoundsCheckError: index out of bounds for dimension with length Caught BoundsCheckError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_120271/1397987268.py", line 21, in __getitem__
    return {key: self.data_dict[key][idx] for key in self.data_dict.keys()}
  File "/tmp/ipykernel_120271/1397987268.py", line 21, in <dictcomp>
    return {key: self.data_dict[key][idx] for key in self.data_dict.keys()}
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/site-packages/zarr/core.py", line 844, in __getitem__
    result = self.get_basic_selection(pure_selection, fields=fields)
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/site-packages/zarr/core.py", line 970, in get_basic_selection
    return self._get_basic_selection_nd(selection=selection, out=out, fields=fields)
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/site-packages/zarr/core.py", line 1010, in _get_basic_selection_nd
    indexer = BasicIndexer(selection, self)
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/site-packages/zarr/indexing.py", line 342, in __init__
    dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len)
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/site-packages/zarr/indexing.py", line 150, in __init__
    dim_sel = normalize_integer_selection(dim_sel, dim_len)
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/site-packages/zarr/indexing.py", line 124, in normalize_integer_selection
    raise BoundsCheckError(dim_len)
zarr.errors.BoundsCheckError: index out of bounds for dimension with length 16125


Process Process-32:
Process Process-29:
Process Process-31:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/multiprocessing/process.py", line 318, in _bootstrap
    util._exit_function()
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/multiprocessing/process.py", line 318, in _bootstrap
    util._exit_function()
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/multiprocessing/process.py", line 318, in _bootstrap
    util._exit_function()
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/multiprocessing/util.py", line 360, in _exit_function
    _run_finalizers()
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/multiprocessing/util.py", line 360, in _exit_function
    _run_finalizers()
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/multiprocessing/util.py", line 360, in _exit_function
    _run_finalizers()
  File "/root/miniconda3/envs/hugwbc/lib/python3.8/multiprocessi

In [38]:
from torch.utils.data import Dataset, DataLoader
from legged_gym.dataset.replay_buffer import ReplayBuffer
import numpy as np

class RBDataset(Dataset):
    def __init__(self, rb_path):
        self.rb = ReplayBuffer.create_from_path(rb_path)
        self.data_dict = {}
        preload_keys = ["actions", "commands"]
        disk_keys = ["proprio", "critic_obs"]
        preload_keys.extend(disk_keys)
        for key in disk_keys:
            self.data_dict[key] = self.rb.data[key]
        for key in preload_keys:
            self.data_dict[key] = self.rb.data[key][:]

    
    def __len__(self):
        return len(self.data_dict['actions'])
    
    def __getitem__(self, idx):
        return {key: self.data_dict[key][idx] for key in self.data_dict.keys()}

# rb_dataset = RBDataset("dataset/small_chunk_large/constant.zarr")
rb = ReplayBuffer.create_from_path("dataset/small_chunk_large/constant.zarr")
# print(rb_dataset[0])
# dataloader = DataLoader(rb_dataset, batch_size=1024, shuffle=True, num_workers=4)
# import tqdm
# import time
# pbar = tqdm.tqdm(dataloader)
# start_time = time.time()
# for idx, batch in enumerate(pbar):
#     time_taken = time.time() - start_time
#     pbar.set_description(f"time per batch: {time_taken / (idx + 1):.3f}s")


In [49]:

proprio_dim = 44
action_dim = 19
cmd_dim = 11
clock_dim = 2
privileged_dim = 24
terrain_dim = 221

obs = rb.data['proprio'][1]
critic_obs = rb.data['critic_obs'][1]
action = rb.data['actions'][0]
cmd = rb.data['commands'][1]
print(obs.shape)
print(critic_obs.shape)
print(np.all((obs[-1, :proprio_dim] - critic_obs[:proprio_dim]) < 1e-6))
print(np.all((obs[-1, proprio_dim:proprio_dim + action_dim] - action) < 1e-6))
print(np.all((cmd[-1] - critic_obs[proprio_dim + action_dim:proprio_dim + action_dim + cmd_dim]) < 1e-6))


(5, 63)
(321,)
True
True
True


In [None]:
import os
import shutil
from legged_gym.dataset.replay_buffer import ReplayBuffer
import numpy as np
from tqdm import tqdm

def dataset_repair(raw_dataset_path):
    repaired_path = raw_dataset_path.replace(".zarr", "_repaired.zarr")
    if os.path.exists(repaired_path):
        shutil.rmtree(repaired_path)
    repaired_rb = ReplayBuffer.create_empty_zarr(repaired_path)
    rb = ReplayBuffer.create_from_path(raw_dataset_path)
    print(rb)
    rb_data = {key: rb.data[key][:] for key in rb.data.keys()}
    meta_data = {key: rb.meta[key][:] for key in rb.meta.keys()}
    data_buffers = {key: [] for key in rb.data.keys()}
    new_episode_ends = []
    mis_matched_keys = set(['proprio', 'commands', 'clock', 'critic_obs', 'root_states'])
    for ep_idx, ep_ends in tqdm(enumerate(rb.meta.episode_ends)):
        if ep_idx == 0:
            ep_start = 0
        else:
            ep_start = rb.meta.episode_ends[ep_idx - 1]
        ep_end = rb.meta.episode_ends[ep_idx]
        ep_len = ep_end - ep_start
        if len(new_episode_ends) == 0:
            new_episode_ends.append(ep_len - 1)
        else:
            new_episode_ends.append(new_episode_ends[-1] + ep_len - 1)
        ep_data = {}
        for key in rb_data.keys():
            if key not in mis_matched_keys:
                ep_data[key] = rb_data[key][ep_start:ep_end][1:]
            else:
                ep_data[key] = rb_data[key][ep_start:ep_end][:-1]
        for key, val in ep_data.items():
            data_buffers[key].append(val)
        assert ep_len - 1 == len(ep_data['actions'])
        if (ep_idx + 1) % 1000 == 0:
            # print(f"Processed {ep_idx + 1} episodes, saving to {repaired_path}")
            for key, val in data_buffers.items():
                data_buffers[key] = np.concatenate(val, axis=0)
            repaired_rb.add_chunked_data(data_buffers, target_chunk_bytes=1024 * 1024 * 1024 * 2)
            data_buffers = {key: [] for key in rb.data.keys()}
    if len(data_buffers['actions']) > 0:
        for key, val in data_buffers.items():
            data_buffers[key] = np.concatenate(val, axis=0)
        repaired_rb.add_chunked_data(data_buffers, target_chunk_bytes=1024 * 1024 * 1024 * 2)
    meta_data['episode_ends'] = np.array(new_episode_ends)
    repaired_rb.add_chunked_meta(meta_data, target_chunk_bytes=1024 * 1024 * 1024 * 2)
    print(repaired_rb)
    return repaired_rb

/

In [1]:
import os
import shutil
from legged_gym.dataset.replay_buffer import ReplayBuffer
import numpy as np
from tqdm import tqdm

def dataset_transfer_chunked(raw_dataset_path):
    repaired_path = raw_dataset_path.replace(".zarr", "_transferred.zarr")
    if os.path.exists(repaired_path):
        shutil.rmtree(repaired_path)
    repaired_rb = ReplayBuffer.create_empty_zarr(repaired_path)
    rb = ReplayBuffer.create_from_path(raw_dataset_path)
    print(rb)
    save_interval = 1000
    meta_data = {key: rb.meta[key][:] for key in rb.meta.keys()}
    large_keys = set(['proprio', 'critic_obs'])
    for ep_idx in tqdm(range(0, len(rb.meta.episode_ends), save_interval)):
        pre_load_start = 0 if ep_idx == 0 else rb.meta.episode_ends[ep_idx - 1]
        pre_load_end = rb.meta.episode_ends[ep_idx + save_interval - 1]
        rb_data = {key: rb.data[key][pre_load_start:pre_load_end] for key in rb.data.keys()}
        ep_data = {}
        large_ep_data = {}
        for key in rb_data.keys():
            if key not in large_keys:
                ep_data[key] = rb_data[key]
            else:
                large_ep_data[key] = rb_data[key]
        repaired_rb.add_chunked_data_encoded(ep_data, target_chunk_bytes=1024 * 1024 * 1024 * 2)
        repaired_rb.add_chunked_data_encoded(large_ep_data, storage="sharded")   
    repaired_rb.add_chunked_meta_encoded(meta_data, target_chunk_bytes=1024 * 1024 * 1024 * 1024 * 2)
    print(repaired_rb)
    return repaired_rb


In [2]:
dataset_transfer_chunked("collected_large/constant.zarr")
dataset_transfer_chunked("collected_large/switch.zarr")

/
 ├── data
 │   ├── actions (81784864, 19) float32
 │   ├── clock (81784864, 5, 2) float32
 │   ├── commands (81784864, 5, 11) float32
 │   ├── critic_obs (81784864, 321) float32
 │   ├── dones (81784864,) bool
 │   ├── proprio (81784864, 5, 63) float32
 │   ├── rewards (81784864,) float32
 │   └── root_states (81784864, 13) float32
 └── meta
     ├── episode_command (164000, 11) float32
     ├── episode_ends (164000,) int64
     └── episode_reward (164000,) float32


  0%|          | 0/164 [00:01<?, ?it/s]

cannot import name 'ShardingCodec' from 'numcodecs' (/root/miniconda3/envs/hugwbc/lib/python3.8/site-packages/numcodecs/__init__.py)





RuntimeError: Requested storage='sharded' but numcodecs.ShardingCodec is unavailable (need numcodecs>=0.11). Try `pip install -U numcodecs` or use storage='zstd_bitshuffle'.

In [15]:
command_scales = np.array(
    [2, 2, 0.25, 1, 1, 1, 0.15, 2.0, 0.5, 0.5, 1]
)

In [14]:
print("proprio", rb.data['proprio'][0, -1])
print("history_action", rb.data['history_action'][0, -1])
print("commands", rb.data['commands'][0, -1])
print("commands", rb.data['commands'][0, -2])
print("clock", rb.data['clock'][0, -1])
print("Actual commands", rb.meta['episode_command_A'][0])


proprio [-8.2740314e-02  1.2649980e-01  1.4966875e-01  1.6320884e-02
 -1.5481804e-03 -1.0315908e+00  7.1848519e-03  7.0408601e-03
  1.3767800e-01  4.2155340e-02 -7.5490206e-02  2.3022011e-02
  1.5871401e-04 -6.4363830e-02  2.0078197e-02  2.5046857e-02
 -1.2482957e-02  3.1448044e-02  3.2764327e-02  2.1609386e-02
 -6.0810940e-04 -1.3010241e-02 -3.2440796e-02  1.4345683e-02
  2.9225057e-02  4.2959228e-02 -7.5227008e-03 -1.3294491e-01
  6.4396299e-02 -1.3069978e-01  4.2481008e-03 -4.0397819e-02
  6.8747178e-02 -1.1066597e-01 -1.1843402e-01 -3.3626080e-02
  2.8691573e-02  6.9325961e-02  6.8520859e-02 -4.7457766e-02
 -2.6354637e-02 -4.9802817e-02 -1.8618196e-02 -2.5761236e-02]
history_action [ 0.31945917  0.04709061  0.11344864 -0.43507007 -0.9177139   0.18261054
  0.21388778  0.2092223   0.29703414 -0.70665544 -0.33453184  0.9841815
  0.3678542   0.16917886 -0.2781665   0.49742576 -0.23865336 -0.00202002
 -0.26939726]
commands [ 1.0143085  -0.19354099 -0.085485    2.9061322   0.          0.

In [16]:
print(rb.meta['episode_command_A'][0] * command_scales)

[-0.95385414  0.47704041  0.          2.5704217   0.5         0.5
  0.04543421 -0.05299602  0.10762804 -0.18675983  0.        ]


In [7]:
from legged_gym.dataset.replay_buffer import ReplayBuffer
import numpy as np

rb = ReplayBuffer.create_from_path("/root/workspace/HugWBC/collected_trajectories_v2/constant.zarr", mode='a')
print(rb)
# print(rb.meta.episode_reward[:].max())
# print(rb.meta.episode_reward[:].min())
# print(rb.meta.episode_reward[:].mean())
# print(rb.meta.episode_reward[:].std())
# episode_ends = rb.meta.episode_ends[:]
# rb.new_meta_key("episode_ends_new", shape=(0,), dtype=np.int64, compressor=None, overwrite=False)
# rb.add_chunked_meta({"episode_ends_new": episode_ends}, target_chunk_bytes=1024 * 1024 * 1024 * 2)
# import numpy as np
# rb.new_meta_key("episode_ends_new", shape=(0,), dtype=np.int64, compressor=None, overwrite=False)
# rb.add_chunked_meta({"episode_ends_new": episode_ends}, target_chunk_bytes=1024 * 1024 * 1024 * 2)


/
 ├── data
 │   ├── actions (9995546, 19) float32
 │   ├── clock (9995546, 5, 2) float32
 │   ├── commands (9995546, 5, 11) float32
 │   ├── critic_obs (9995546, 321) float32
 │   ├── dones (9995546,) bool
 │   ├── proprio (9995546, 5, 63) float32
 │   ├── rewards (9995546,) float32
 │   └── root_states (9995546, 13) float32
 └── meta
     ├── episode_command (20000, 11) float32
     ├── episode_ends (20000,) int64
     └── episode_reward (20000,) float32


In [4]:
rb.episode_ends[:]

array([   500,   1000,   1500,   2000,   2500,   3000,   3500,   4000,
         4500,   5000,   5500,   6000,   6500,   7000,   7500,   8000,
         8500,   9000,   9500,  10000,  10500,  11000,  11500,  12000,
        12500,  13000,  13500,  14000,  14500,  15000,  15500,  16000,
        16500,  17000,  17500,  18000,  18500,  19000,  19500,  20000,
        20500,  21000,  21500,  22000,  22500,  23000,  23500,  24000,
        24500,  25000,  25500,  26000,  26500,  27000,  27500,  28000,
        28500,  29000,  29500,  30000,  30500,  31000,  31500,  32000,
        32500,  33000,  33500,  34000,  34500,  35000,  35500,  36000,
        36500,  37000,  37500,  38000,  38500,  39000,  39500,  40000,
        40500,  41000,  41500,  42000,  42500,  43000,  43500,  44000,
        44500,  45000,  45500,  46000,  46500,  47000,  47500,  48000,
        48500,  49000,  49500,  50000,  50500,  51000,  51500,  52000,
        52500,  53000,  53500,  54000,  54500,  55000,  55500,  56000,
      

In [12]:
parent_path = "/root/workspace/HugWBC/example_trajectories"
import os
import shutil
for dirn in os.listdir(parent_path):
    if os.path.isdir(os.path.join(parent_path, dirn)):
        # assert os.path.exists(os.path.join(parent_path, dirn, "data", "obs")), f"obs not found in {os.path.join(parent_path, dirn)}"
        # shutil.rmtree(os.path.join(parent_path, dirn, "data", "proprio"))
        # shutil.rmtree(os.path.join(parent_path, dirn, "data", "history_action"))
        # os.rename(os.path.join(parent_path, dirn, "data", "obs"), os.path.join(parent_path, dirn, "data", "proprio"))
        rb = ReplayBuffer.create_from_path(os.path.join(parent_path, dirn), mode='a')
        # pure_prop = rb.data['proprio'][:]
        # history_action = rb.data['history_action'][:]
        # obs = np.concatenate([pure_prop, history_action], axis=-1)
        # rb.add_chunked_data({
        #     'obs': obs,
        # }, target_chunk_bytes=1024 * 1024 * 1024 * 64)

        print(rb)

/
 ├── data
 │   ├── actions (3438, 19) float32
 │   ├── clock (3438, 5, 2) float32
 │   ├── commands (3438, 5, 11) float32
 │   ├── critic_obs (3438, 321) float32
 │   ├── dones (3438,) bool
 │   ├── proprio (3438, 5, 63) float32
 │   ├── rewards (3438,) float32
 │   └── root_states (3438, 13) float32
 └── meta
     ├── episode_command_A (10, 10) float64
     ├── episode_ends (10,) int64
     └── episode_reward (10,) float64
/
 ├── data
 │   ├── actions (4682, 19) float32
 │   ├── clock (4682, 5, 2) float32
 │   ├── commands (4682, 5, 11) float32
 │   ├── critic_obs (4682, 321) float32
 │   ├── dones (4682,) bool
 │   ├── proprio (4682, 5, 63) float32
 │   ├── rewards (4682,) float32
 │   └── root_states (4682, 13) float32
 └── meta
     ├── episode_command_A (10, 10) float64
     ├── episode_ends (10,) int64
     └── episode_reward (10,) float64
/
 ├── data
 │   ├── actions (4822, 19) float32
 │   ├── clock (4822, 5, 2) float32
 │   ├── commands (4822, 5, 11) float32
 │   ├── critic_

(25000,)