In [1]:
import h5py
import numpy as np

H_list = [5, 10, 20]   # 분석할 chunk 길이 후보들
all_stats = {H: [] for H in H_list}
dataset_path = "/home/robros/git/diffusion-qcfql/robomimic/dataset/transport/mh/low_dim_v15.hdf5"

with h5py.File(dataset_path, "r") as f:
    demos = sorted(list(f["data"].keys()))  # ["demo_0", "demo_1", ...]
    
    for ep in demos:
        rew = np.array(f[f"data/{ep}/rewards"])   # [T]
        done = np.array(f[f"data/{ep}/dones"])    # [T] or [T,1]

        if done.ndim > 1:
            done = done.squeeze(-1)

        T = len(rew)

        for H in H_list:
            # 슬라이딩 윈도우: stride=1
            for start in range(0, T - H + 1):
                end = start + H
                r_chunk = rew[start:end]
                d_chunk = done[start:end]

                # valid mask: 중간에 done 나오면 이후는 0으로
                valid = np.ones(H, dtype=np.float32)
                for t in range(1, H):
                    if d_chunk[t-1] > 0.5:
                        valid[t:] = 0.0
                        break

                traj_return = float((r_chunk * valid).sum())

                all_stats[H].append({
                    "traj_return": traj_return,
                    "has_done": bool(d_chunk.max() > 0.5),
                    "start": start,
                    "ep": ep,
                })


In [2]:
all_stats

{5: [{'traj_return': 0.0, 'has_done': False, 'start': 0, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 1, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 2, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 3, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 4, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 5, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 6, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 7, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 8, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 9, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 10, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 11, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 12, 'ep': 'demo_0'},
  {'traj_return': 0.0, 'has_done': False, 'start': 13, 'ep': 'demo_0'},

In [None]:
import numpy as np

for i in H_list:
    stats = all_stats[i]
    returns = np.array([s["traj_return"] for s in stats])

    print("H=20, 총 chunk 수:", len(returns))
    print("min, max, mean, std:", returns.min(), returns.max(), returns.mean(), returns.std())

    # 성공 기준 잡기 (예: return > 0.5)
    success_mask = returns > 0.5
    print("성공 chunk 비율:", success_mask.mean())

    # 성공 chunk가 에피소드 어디에 몰려 있는지
    success_positions = np.array([s["start"] for s in stats])[success_mask]
    print("성공 chunk 시작 step 평균/표준편차:", success_positions.mean(),
        success_positions.std())


H=20, 총 chunk 수: 190100
min, max, mean, std: 0.0 1.0 0.008784850078905839 0.0933149317526246
성공 chunk 비율: 0.008784850078905839
성공 chunk 시작 step 평균/표준편차: 630.4299401197604 202.5863300904515
