In [1]:
import h5py

h5_path = "/home/parkjeongsu/TinyVLA/Droid/droid_with_lang/droid_1dot7t_lang_succ_t0001_s-0-0/episode_2.hdf5"

with h5py.File(h5_path, 'r') as f:
    print("🔑 Top-level keys:", list(f.keys()))

    obs = f["observations"]
    print("🔍 [observations] 내부 key 목록:", list(obs.keys()))

    for k in obs.keys():
        item = obs[k]
        if isinstance(item, h5py.Dataset):
            print(f"✅ {k} shape:", item.shape)
        elif isinstance(item, h5py.Group):
            print(f"📁 {k} is a Group → 내부 key 목록:", list(item.keys()))

    print(f['action'].shape)  # ← 이게 바로 10차원이 되어야 함



🔑 Top-level keys: ['action', 'is_edited', 'language_raw', 'observations']
🔍 [observations] 내부 key 목록: ['images', 'qpos', 'qvel']
📁 images is a Group → 내부 key 목록: ['left', 'right', 'wrist']
✅ qpos shape: (142, 10)
✅ qvel shape: (142, 10)
(142, 10)


텐서플로우 데이터셋 읽는 코드입니다.

In [9]:
import tensorflow_datasets as tfds
import tensorflow as tf
import numpy as np

# 1) 빌더 & 데이터셋
builder = tfds.builder_from_directory("/home/parkjeongsu/TinyVLA/Droid/droid_100/1.0.0")
ds = builder.as_dataset(split="train")  # tf.data.Dataset (episode 단위)

# 2) 한 에피소드 뽑기 (여긴 tf.data 그대로)
episode = next(iter(ds))
steps_ds = episode["steps"]             # 여전히 tf.data.Dataset

# 3) 첫 스텝만 numpy로 확인
to_numpy = lambda x: x.numpy()
first_tf = next(iter(steps_ds))
first = tf.nest.map_structure(to_numpy, first_tf)

print("top keys:", list(first.keys()))
print("obs keys:", list(first["observation"].keys()))
print("action_dict keys:", list(first["action_dict"].keys()))

# 4) 전 스텝을 numpy dict로 수집
steps = [tf.nest.map_structure(to_numpy, s) for s in steps_ds]

# 5) 안전 추출 헬퍼
def get_nested(d, *path):
    cur = d
    for k in path:
        if isinstance(cur, dict) and k in cur:
            cur = cur[k]
        else:
            return None
    return cur

# 6) 배열화
actions = np.stack([s["action"] for s in steps])  # (T, 7) or (T, ?)
qpos    = np.stack([get_nested(s, "observation", "joint_position") for s in steps])

# qvel 우선순위: observation.joint_velocity -> action_dict.joint_velocity -> finite diff
qvel_list = [get_nested(s, "observation", "joint_velocity") for s in steps]
if qvel_list[0] is None:
    qvel_list = [get_nested(s, "action_dict", "joint_velocity") for s in steps]

if qvel_list[0] is None:
    qvel = np.diff(qpos, axis=0, prepend=qpos[:1])  # dt=1 가정
else:
    qvel = np.stack(qvel_list)

print("shapes -> actions:", actions.shape, "qpos:", qpos.shape, "qvel:", qvel.shape)

# 1) action 선택: joint velocity를 사용
a = np.stack([s["action_dict"]["joint_velocity"] for s in steps])   # (T,7)
q = np.stack([s["observation"]["joint_position"] for s in steps])   # (T,7)
q_next = np.roll(q, -1, axis=0)[:-1]
a_t = a[:-1]
q_t = q[:-1]

# 2) dt 추정(대략): 최소자승으로 q_next - q ≈ dt * a
dt_hat = np.linalg.lstsq(a_t.reshape(-1,7), (q_next - q_t).reshape(-1,7), rcond=None)[0]
# dt_hat은 7x7이 나올 수 있으니 평균 스케일만 쓰려면:
scale = np.mean(np.diag(dt_hat))  # 간단 추정
print("velocity→pos scale ≈", scale)



top keys: ['action', 'action_dict', 'discount', 'is_first', 'is_last', 'is_terminal', 'language_instruction', 'language_instruction_2', 'language_instruction_3', 'observation', 'reward']
obs keys: ['cartesian_position', 'exterior_image_1_left', 'exterior_image_2_left', 'gripper_position', 'joint_position', 'wrist_image_left']
action_dict keys: ['cartesian_position', 'cartesian_velocity', 'gripper_position', 'gripper_velocity', 'joint_position', 'joint_velocity']
shapes -> actions: (166, 7) qpos: (166, 7) qvel: (166, 7)
velocity→pos scale ≈ 0.058800539014447616


In [7]:
import tensorflow_datasets as tfds

builder = tfds.builder_from_directory("/home/parkjeongsu/TinyVLA/Droid/droid_100/1.0.0")  # 'droid_100' 루트(그 아래 1.0.0/ 존재)
ds = builder.as_dataset(split="train")
info = builder.info
print(info)
for i, ex in enumerate(ds.take(1)):
    print(ex.keys())

# 첫 번째 episode 하나만 확인
for episode in ds.take(1):
    print("=== Episode 확인 ===")
    for idx, step in enumerate(episode['steps']):
        qpos = step['observation']['joint_position'].numpy()  # (7,)
        gripper_pos = step['observation']['gripper_position'].numpy()  # (1,)

        if idx in [0, len(episode['steps'])-1]:  # 첫 스텝, 마지막 스텝만
            print(f"\nStep {idx}")
            print("qpos:", qpos)
            print("gripper_position:", gripper_pos)
            print("qpos 마지막 값:", qpos[-1])
            print("같은가?:", np.allclose(qpos[-1], gripper_pos[0], atol=1e-6))
    break


2025-08-12 07:19:47.575274: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-12 07:19:47.704289: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-08-12 07:19:48.977170: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-08-12 07:19:49.066877: I tensorflow/comp

tfds.core.DatasetInfo(
    name='r2d2_faceblur',
    full_name='r2d2_faceblur/1.0.0',
    description="""
    
    """,
    homepage='https://www.tensorflow.org/datasets/catalog/r2d2_faceblur',
    data_path='/home/parkjeongsu/TinyVLA/Droid/droid_100/1.0.0',
    file_format=tfrecord,
    download_size=Unknown size,
    dataset_size=2.04 GiB,
    features=FeaturesDict({
        'episode_metadata': FeaturesDict({
            'file_path': string,
            'recording_folderpath': string,
        }),
        'steps': Dataset({
            'action': Tensor(shape=(7,), dtype=float64),
            'action_dict': FeaturesDict({
                'cartesian_position': Tensor(shape=(6,), dtype=float64),
                'cartesian_velocity': Tensor(shape=(6,), dtype=float64),
                'gripper_position': Tensor(shape=(1,), dtype=float64),
                'gripper_velocity': Tensor(shape=(1,), dtype=float64),
                'joint_position': Tensor(shape=(7,), dtype=float64),
             

In [7]:
import tensorflow_datasets as tfds

builder = tfds.builder_from_directory("/home/parkjeongsu/TinyVLA/Droid/droid_100/1.0.0")  # 'droid_100' 루트(그 아래 1.0.0/ 존재)
ds = builder.as_dataset(split="train")
info = builder.info
print(info)
for i, ex in enumerate(ds.take(1)):
    print(ex.keys())

# 첫 번째 episode 하나만 확인
for episode in ds.take(2):
    print("=== Episode 확인 ===")
    for idx, step in enumerate(episode['steps']):
        c_pos = step['action_dict']['joint_position'].numpy()  # (7,)
        gripper_pos = step['action_dict']['joint_velocity'].numpy()  # (1,)

        if idx in [0, len(episode['steps'])-1]:  # 첫 스텝, 마지막 스텝만
            print(f"\nStep {idx}")
            print("joint_position:", c_pos)
            print("joint_velocity:", gripper_pos)
            print("joint_position 마지막 값:", c_pos[-1])
      
    break


tfds.core.DatasetInfo(
    name='r2d2_faceblur',
    full_name='r2d2_faceblur/1.0.0',
    description="""
    
    """,
    homepage='https://www.tensorflow.org/datasets/catalog/r2d2_faceblur',
    data_path='/home/parkjeongsu/TinyVLA/Droid/droid_100/1.0.0',
    file_format=tfrecord,
    download_size=Unknown size,
    dataset_size=2.04 GiB,
    features=FeaturesDict({
        'episode_metadata': FeaturesDict({
            'file_path': string,
            'recording_folderpath': string,
        }),
        'steps': Dataset({
            'action': Tensor(shape=(7,), dtype=float64),
            'action_dict': FeaturesDict({
                'cartesian_position': Tensor(shape=(6,), dtype=float64),
                'cartesian_velocity': Tensor(shape=(6,), dtype=float64),
                'gripper_position': Tensor(shape=(1,), dtype=float64),
                'gripper_velocity': Tensor(shape=(1,), dtype=float64),
                'joint_position': Tensor(shape=(7,), dtype=float64),
             

In [None]:
import tensorflow_datasets as tfds
import tensorflow as tf
import numpy as np

# ===== 출력 제한 설정 =====
MAX_STEPS = 10      # steps 몇 개만 자세히 볼지
MAX_VALS  = 20      # 배열/텐서 값 최대 몇 개까지 출력할지
MAX_STR   = 200     # 문자열은 최대 몇 글자까지 출력할지

def _decode_str(x):
    if isinstance(x, (bytes, np.bytes_)):
        return x.decode("utf-8", errors="ignore")
    if isinstance(x, (str, np.str_)):
        return str(x)
    return str(x)

def _value_preview(val, max_vals=MAX_VALS, max_str=MAX_STR):
    """shape/dtype 없이 '값'만 요약 문자열로 반환"""
    # tf.Tensor -> numpy로
    if isinstance(val, tf.Tensor):
        val = val.numpy()

    # numpy 배열
    if isinstance(val, np.ndarray):
        if val.dtype.type is np.bytes_:
            # 문자열 텐서인 경우
            flat = val.ravel()
            shown = [ _decode_str(b)[:max_str] for b in flat[:max_vals] ]
            return f"{shown}" + (f" ... (+{flat.size-max_vals})" if flat.size > max_vals else "")
        else:
            flat = val.ravel()
            shown = flat[:max_vals].tolist()
            return f"{shown}" + (f" ... (+{flat.size-max_vals})" if flat.size > max_vals else "")

    # 파이썬 기본형/문자열
    if isinstance(val, (int, float, bool)):
        return str(val)
    s = _decode_str(val)
    return s[:max_str] + ("..." if len(s) > max_str else "")

def _print_values(key, val, indent=0):
    pad = "  " * indent
    # dict면 재귀
    if isinstance(val, dict):
        print(f"{pad}{key}:")
        for k, v in val.items():
            _print_values(k, v, indent+1)
        return
    # 나머지: 값만 프린트
    try:
        pv = _value_preview(val)
    except Exception as e:
        pv = f"<error: {e}>"
    print(f"{pad}{key}: {pv}")

# ===== 실제 로드 & 출력 =====
builder = tfds.builder_from_directory("/home/parkjeongsu/TinyVLA/Droid/droid_100/1.0.0")
ds = builder.as_dataset(split="train")

# 에피소드 1개만
for episode in ds.take(5):
    print("\n=== episode top-level values ===")
    for k in episode.keys():
        _print_values(k, episode[k], indent=0)

    print(f"\n=== steps values (first {MAX_STEPS} steps) ===")
    for idx, step in enumerate(episode["steps"]):
        print(f"\n--- step {idx} ---")
        # step은 dict이므로 각 key의 '값'만 표기
        for k in step.keys():
            _print_values(k, step[k], indent=1)
        if idx + 1 >= MAX_STEPS:
            break
    break



=== episode top-level values ===
episode_metadata:
  file_path: /nfs/kun2/datasets/r2d2/r2d2-data-full/RAIL/success/2023-04-17/Mon_Apr_17_14:48:05_2023/trajectory.h5
  recording_folderpath: /nfs/kun2/datasets/r2d2/r2d2-data-full/RAIL/success/2023-04-17/Mon_Apr_17_14:48:05_2023/recordings/MP4
steps: <_VariantDataset element_spec={'action': TensorSpec(shape=(7,), dtype=tf.float64, name=None), 'action_dict': {'cartesian_position': TensorSpec(shape=(6,), dtype=tf.float64, name=None), 'cartesian_velo...

=== steps values (first 10 steps) ===

--- step 0 ---
  action: [0.38357484340667725, 0.07346952706575394, 0.5513594150543213, -2.8934216499328613, -0.1987120658159256, 0.12699034810066223, 0.0]
  action_dict:
    cartesian_position: [0.38357484340667725, 0.07346952706575394, 0.5513594150543213, -2.8934216499328613, -0.1987120658159256, 0.12699034810066223]
    cartesian_velocity: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    gripper_position: [0.0]
    gripper_velocity: [0.0]
    joint_position: [0.

: 

In [10]:
import tensorflow_datasets as tfds
import numpy as np

builder = tfds.builder_from_directory("/home/parkjeongsu/TinyVLA/Droid/droid_100/1.0.0")
ds = builder.as_dataset(split="train")

def check_episode(episode):
    ok1 = True  # action[:6] == action_dict.cartesian_position
    ok2 = []    # ||obs.cartesian_position - action[:6]||
    ok3 = True  # action[-1] == action_dict.gripper_position[0]
    ok4 = True  # obs.joint_position[-1] == obs.gripper_position ? (기대: False)

    for step in episode["steps"]:
        a = step["action"].numpy()
        cd_pos = step["action_dict"]["cartesian_position"].numpy()
        grip_cmd = step["action_dict"]["gripper_position"].numpy()[0]
        obs_pose = step["observation"]["cartesian_position"].numpy()
        qpos = step["observation"]["joint_position"].numpy()
        grip_obs = step["observation"]["gripper_position"].numpy()[0]

        ok1 &= np.allclose(a[:6], cd_pos, atol=1e-8)
        ok3 &= np.allclose(a[-1], grip_cmd, atol=1e-8)
        ok4 &= np.allclose(qpos[-1], grip_obs, atol=1e-6)  # 대부분 False일 것
        ok2.append(np.linalg.norm(obs_pose - a[:6]))

    return ok1, np.mean(ok2), np.max(ok2), ok3, ok4

for i, ep in enumerate(ds.take(3)):
    ok1, m, M, ok3, ok4 = check_episode(ep)
    print(f"episode {i}: action==cart_pos? {ok1}, obs≈action mean_err={m:.4e} max_err={M:.4e}, "
          f"grip_cmd==action[-1]? {ok3}, qpos[-1]==grip_obs? {ok4}")


episode 0: action==cart_pos? True, obs≈action mean_err=3.1652e-01 max_err=6.2435e+00, grip_cmd==action[-1]? True, qpos[-1]==grip_obs? False
episode 1: action==cart_pos? True, obs≈action mean_err=4.7968e-01 max_err=6.2766e+00, grip_cmd==action[-1]? True, qpos[-1]==grip_obs? False
episode 2: action==cart_pos? True, obs≈action mean_err=9.4281e-01 max_err=6.2768e+00, grip_cmd==action[-1]? True, qpos[-1]==grip_obs? False


In [1]:
import h5py

h5_path = "/home/parkjeongsu/TinyVLA/Droid/trans_data/droid_1dot7t_lang_succ_t0001_s-0-0/episode_74.hdf5"

with h5py.File(h5_path, 'r') as f:
    print("🔑 Top-level keys:", list(f.keys()))

    obs = f["observations"]
    print("🔍 [observations] 내부 key 목록:", list(obs.keys()))

    for k in obs.keys():
        item = obs[k]
        if isinstance(item, h5py.Dataset):
            print(f"✅ {k} shape:", item.shape)
        elif isinstance(item, h5py.Group):
            print(f"📁 {k} is a Group → 내부 key 목록:", list(item.keys()))

    print(f['action'].shape)  # ← 이게 바로 10차원이 되어야 함



🔑 Top-level keys: ['action', 'language_raw', 'observations']
🔍 [observations] 내부 key 목록: ['images', 'joint_positions', 'qpos', 'qvel']
📁 images is a Group → 내부 key 목록: ['left', 'right', 'wrist']
✅ joint_positions shape: (100, 7)
✅ qpos shape: (100, 7)
✅ qvel shape: (100, 7)
(100, 10)


In [2]:
import h5py
import numpy as np

h5_path = "/home/parkjeongsu/TinyVLA/Droid/trans_data/droid_1dot7t_lang_succ_t0001_s-0-0/episode_74.hdf5"

def safe_get(h5, path):
    """H5에서 path가 있으면 numpy 배열로 반환, 없으면 None"""
    if path in h5:
        return h5[path][()]
    return None

with h5py.File(h5_path, 'r') as f:
    # 1) 상위/observations 구조 훑기
    print("🔑 Top-level keys:", list(f.keys()))
    obs = f["observations"]
    print("🔍 [observations] 내부 key 목록:", list(obs.keys()))

    # 각 항목 shape / 그룹 여부 출력
    for k in obs.keys():
        item = obs[k]
        if isinstance(item, h5py.Dataset):
            print(f"✅ {k} shape:", item.shape)
        elif isinstance(item, h5py.Group):
            print(f"📁 {k} is a Group → 내부 key 목록:", list(item.keys()))

    # action shape 확인
    print("🧭 action shape:", f['action'].shape)

    # 2) qpos와 joint_positions(또는 joint_position) 후보 로드
    qpos = safe_get(f, "/observations/qpos")
    joint_candidates = ["/observations/joint_positions", "/observations/joint_position"]
    joint = None
    joint_key = None
    for jk in joint_candidates:
        if jk in f:
            joint = f[jk][()]
            joint_key = jk
            break

    # 3) 일부 값 출력(앞 3스텝)
    np.set_printoptions(precision=6, suppress=True, linewidth=140)

    if qpos is not None:
        print("\n🟦 qpos (앞 3 스텝):")
        print(qpos[:3])
        print("qpos shape:", qpos.shape)
    else:
        print("\n⚠️ '/observations/qpos'가 없습니다.")

    if joint is not None:
        print(f"\n🟩 {joint_key} (앞 3 스텝):")
        print(joint[:3])
        print(f"{joint_key} shape:", joint.shape)
    else:
        print("\nℹ️ H5에 'joint_positions' 저장이 없습니다. (이번 변환 스크립트는 보통 qpos만 저장)")

    # 4) 둘 다 있으면 완전 동일/근사 동일 여부 체크
    if (qpos is not None) and (joint is not None):
        same_shape = (qpos.shape == joint.shape)
        print("\n📐 동일 shape?", same_shape)
        if same_shape:
            exactly_equal = np.array_equal(qpos, joint)
            allclose_equal = np.allclose(qpos, joint, atol=1e-6)
            max_abs_diff = np.max(np.abs(qpos - joint))
            print("✅ 완전 동일(np.array_equal)?", exactly_equal)
            print("✅ 근사 동일(np.allclose, atol=1e-6)?", allclose_equal)
            print("🔎 최대 절대 오차:", float(max_abs_diff))
        else:
            print("❌ shape이 달라 비교 불가.")


🔑 Top-level keys: ['action', 'language_raw', 'observations']
🔍 [observations] 내부 key 목록: ['images', 'joint_positions', 'qpos', 'qvel']
📁 images is a Group → 내부 key 목록: ['left', 'right', 'wrist']
✅ joint_positions shape: (100, 7)
✅ qpos shape: (100, 7)
✅ qvel shape: (100, 7)
🧭 action shape: (100, 10)

🟦 qpos (앞 3 스텝):
[[-0.149206 -0.669474 -0.404594 -2.789922 -0.141941  2.247962 -0.505818]
 [-0.149196 -0.669059 -0.404564 -2.789907 -0.14192   2.247981 -0.505812]
 [-0.149201 -0.669068 -0.40457  -2.789902 -0.141911  2.247984 -0.50582 ]]
qpos shape: (100, 7)

🟩 /observations/joint_positions (앞 3 스텝):
[[-0.149206 -0.669474 -0.404594 -2.789922 -0.141941  2.247962 -0.505818]
 [-0.149196 -0.669059 -0.404564 -2.789907 -0.14192   2.247981 -0.505812]
 [-0.149201 -0.669068 -0.40457  -2.789902 -0.141911  2.247984 -0.50582 ]]
/observations/joint_positions shape: (100, 7)

📐 동일 shape? True
✅ 완전 동일(np.array_equal)? True
✅ 근사 동일(np.allclose, atol=1e-6)? True
🔎 최대 절대 오차: 0.0


In [8]:
import h5py

h5_path = "/home/parkjeongsu/Downloads/set_8_pick_bottle_10/robopen05_20230528-222842_paths_roboset.h5"

with h5py.File(h5_path, 'r') as f:
    print("🔑 Top-level keys:", list(f.keys()))

    #bs1 = f["observations"]
    obs2 = f["Trial0"]
    #print("🔍 [observations] 내부 key 목록:", list(obs1.keys()))
    print("🔍 [observations] 내부 key 목록:", list(obs2.keys()))
    for k in obs2.keys():
        item = obs2[k]
        if isinstance(item, h5py.Dataset):
            print(f"✅ {k} shape:", item.shape)
        elif isinstance(item, h5py.Group):
            print(f"📁 {k} is a Group → 내부 key 목록:", list(item.keys()))

    print(f['action'].shape)  # ← 이게 바로 10차원이 되어야 함



🔑 Top-level keys: ['Trial0']
🔍 [observations] 내부 key 목록: ['data']
📁 data is a Group → 내부 key 목록: ['ctrl_arm', 'ctrl_ee', 'd_left', 'd_right', 'd_top', 'qp_arm', 'qp_ee', 'qv_arm', 'qv_ee', 'rgb_left', 'rgb_right', 'rgb_top', 'time']


KeyError: "Unable to synchronously open object (object 'action' doesn't exist)"