In [2]:
import numpy as np
import torch

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import h5py

data_path = "../robomimic/datasets/tool_hang/ph/image_agent.hdf5"

f = h5py.File(data_path, "r")

data = f["data"]

In [5]:
from robotics.model_src.dataset import RobosuiteImageActionDataset, RobosuiteImageActionDatasetMem, normalize_data

camera_type = "agentview"

pred_horizon = 8
obs_horizon = 1

ds = RobosuiteImageActionDatasetMem(data_path, camera_type, obs_horizon=obs_horizon, pred_horizon=pred_horizon)

100%|██████████| 200/200 [00:00<00:00, 53089.10it/s]


In [6]:
import torch

val_ratio = 0.2
n_total = len(ds)
n_val = int(n_total * val_ratio)
n_train = n_total - n_val

generator = torch.Generator().manual_seed(33)
train_set, val_set = torch.utils.data.random_split(
    ds, [n_train, n_val], generator=generator)

train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=224, shuffle=True,
    num_workers=4, pin_memory=True, persistent_workers=True)

val_loader = torch.utils.data.DataLoader(
    val_set, batch_size=224, shuffle=False,
    num_workers=4, pin_memory=True, persistent_workers=True)

# visualize data in batch
batch = next(iter(train_loader))
print("batch['image'].shape:", batch['img_obs'].shape)
print("batch['act_obs'].shape:", batch['act_obs'].shape)
print("batch['act_pred'].shape", batch['act_pred'].shape)

batch['image'].shape: torch.Size([224, 2, 3, 224, 224])
batch['act_obs'].shape: torch.Size([224, 2, 7])
batch['act_pred'].shape torch.Size([224, 8, 7])


In [8]:
import os, h5py, numpy as np, tqdm, torch
from torch.utils.data import Dataset, DataLoader

# пути к файлам
SRC_H5  = "../robomimic/datasets/tool_hang/ph/image_agent.hdf5"   # исходный heavy-HDF5
FLAT_H5 = "../robomimic/datasets/tool_hang/ph/flat_f16.hdf5"                                       # куда пишем «плоский» файл

CAMERA  = "agentview"                                             # если другая камера — поменять
OH, PH  = 1, 8                                                    # горизонты наблюдения / предсказания
BATCH   = 224                                                     # размер батча под вашу GPU


In [7]:
from robotics.model_src.dataset import create_trajectory_indices

cam_raw  = f"{CAMERA}_image"
cam_norm = f"{CAMERA}_image_norm"

with h5py.File(SRC_H5, "r") as s, h5py.File(FLAT_H5, "w") as d:
    demos = list(s["data"].values())
    N     = sum(len(dd["actions"]) for dd in demos)
    H,W   = demos[0]["obs"][cam_raw].shape[1:3]

    img = d.create_dataset("img", (N,3,H,W), "float16",
                           chunks=(1024,3,H,W))   # gzip отключён
    act = d.create_dataset("act", (N,7), "float32",
                           chunks=(4096,7))
    ends=[-1]; off=0
    for dd in tqdm.tqdm(demos):
        n = len(dd["actions"])
        img[off:off+n] = np.moveaxis(dd["obs"][cam_raw][:],-1,1).astype("f4")/255
        act[off:off+n] = dd["actions"][:]
        off+=n; ends.append(off-1)
    d["episode_ends"] = np.asarray(ends,np.int64)
    d["windows"]      = create_trajectory_indices(np.asarray(ends), OH, PH)



100%|██████████| 200/200 [12:18<00:00,  3.69s/it]
100%|██████████| 200/200 [00:00<00:00, 53762.79it/s]


In [9]:
class FlatRobosuiteDataset(Dataset):
    def __init__(self, path: str, oh=1, ph=8):
        self.file_path = path
        with h5py.File(path, "r") as f:           # откроем ненадолго
            self.win = f["windows"][:]            # (N_win, span)
        self.img = None                           # откроем в воркере
        self.act = None
        self.oh, self.ph = oh, ph

    def __len__(self):
        return len(self.win)

    def _lazy_init(self):
        # вызываем, если работаем без multiprocessing (num_workers=0)
        if self.img is None:
            f = h5py.File(self.file_path, "r")
            self.img = f["img"]
            self.act = f["act"]

    def __getitem__(self, idx: int):
        if self.img is None:                      # для single-process
            self._lazy_init()

        row, lo, hi = self.win[idx], None, None
        lo, hi = row[0], row[-1] + 1
        rel    = row - lo

        imgs = torch.from_numpy(self.img[lo:hi][rel]).float()  # f16→f32
        acts = torch.from_numpy(self.act[lo:hi][rel])

        return {
            "img_obs":  imgs[: self.oh + 1],
            "act_obs":  acts[: self.oh + 1],
            "act_pred": acts[self.oh + 1:],
        }

In [10]:
# --- функция, которую передадим в DataLoader ---
def h5_worker_init(_):
    wi = torch.utils.data.get_worker_info()
    ds = wi.dataset
    f  = h5py.File(ds.file_path, "r", libver="latest", swmr=True,
                   rdcc_nbytes=64 * 1024 * 1024, rdcc_nslots=1_000_003)
    ds.img = f["img"]
    ds.act = f["act"]

In [11]:
ds = FlatRobosuiteDataset(FLAT_H5, OH, PH)
print(f"Окон всего: {len(ds):,}")

loader = DataLoader(
    ds, batch_size=BATCH, shuffle=True,
    num_workers=os.cpu_count() // 2,
    pin_memory=True, prefetch_factor=4,
    persistent_workers=True, worker_init_fn=h5_worker_init
)

Окон всего: 95,762


In [10]:
import time

t0 = time.perf_counter()
for i, _ in enumerate(loader):
    if i == 10: break
print(f"Скорость ≈ {10/(time.perf_counter()-t0):.1f} батчей/с "
      f"при batch={BATCH}")

NameError: name 'time' is not defined

In [None]:
batch = next(iter(loader))
print("img_obs:",  batch["img_obs"].shape)
print("act_obs:",  batch["act_obs"].shape)
print("act_pred:", batch["act_pred"].shape)

In [None]:
row = ds.win[0]
lo, hi = row[0], row[-1] + 1
t0 = time.perf_counter()
_ = ds.img[lo:hi]         # единичный срез
print("Чтение блока:", time.perf_counter() - t0, "сек")

In [1]:
import numpy as np; np.show_runtime()


[{'numpy_version': '2.2.4',
  'python': '3.12.9 | packaged by Anaconda, Inc. | (main, Feb  6 2025, '
            '18:56:27) [GCC 11.2.0]',
  'uname': uname_result(system='Linux', node='fedora', release='6.14.11-200.fc41.x86_64', version='#1 SMP PREEMPT_DYNAMIC Tue Jun 10 16:33:19 UTC 2025', machine='x86_64')},
 {'simd_extensions': {'baseline': ['SSE', 'SSE2', 'SSE3'],
                      'found': ['SSSE3',
                                'SSE41',
                                'POPCNT',
                                'SSE42',
                                'AVX',
                                'F16C',
                                'FMA3',
                                'AVX2'],
                      'not_found': ['AVX512F',
                                    'AVX512CD',
                                    'AVX512_KNL',
                                    'AVX512_KNM',
                                    'AVX512_SKX',
                                    'AVX512_CLX',
         