In [None]:
import os
import json
import shutil
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
import imageio

# Đường dẫn gốc
src_folder = "episode_0000"
dst_folder = "episode_0000_lerobot.data"

# Tạo các thư mục đích
os.makedirs(f"{dst_folder}/data/chunk-000", exist_ok=True)
os.makedirs(f"{dst_folder}/videos/chunk-000/observation.images.cam_head", exist_ok=True)
os.makedirs(f"{dst_folder}/meta", exist_ok=True)

# Copy ảnh sang đúng vị trí
src_images = Path(f"{src_folder}/images")
dst_images = Path(f"{dst_folder}/videos/chunk-000/observation.images.cam_head")
for img_file in src_images.glob("*"):
    shutil.copy(img_file, dst_images / img_file.name)

# Đọc metadata
with open(f"{src_folder}/metadata.json", "r") as f:
    metadata = json.load(f)

# Đọc trajectory: CHUẨN cho file dạng {'frames': [...]}
trajectory = []
with open(f"{src_folder}/trajectory.json", "r") as f:
    try:
        data = json.load(f)
        if isinstance(data, dict) and "frames" in data:
            for frame in data["frames"]:
                if isinstance(frame, dict) and "states" in frame and "actions" in frame:
                    trajectory.append(frame)
        elif isinstance(data, list):
            for frame in data:
                if isinstance(frame, dict) and "states" in frame and "actions" in frame:
                    trajectory.append(frame)
    except Exception:
        # Nếu không phải json chuẩn, thử đọc từng dòng (jsonl)
        f.seek(0)
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                traj = json.loads(line)
                if isinstance(traj, dict) and "states" in traj and "actions" in traj:
                    trajectory.append(traj)
            except Exception:
                pass

if not trajectory:
    raise RuntimeError("Không tìm thấy frame hợp lệ trong trajectory.json!")

# Tạo DataFrame cho LeRobot
states = [traj['states'] for traj in trajectory]
actions = [traj['actions'] for traj in trajectory]
# Nếu states/actions là dict nhiều key, bạn cần flatten hoặc chọn key phù hợp
# Ví dụ: flatten tất cả value thành 1 list
def flatten_state_action(item):
    out = []
    for v in item.values():
        if isinstance(v, dict):
            out.extend(flatten_state_action(v))
        elif isinstance(v, list):
            out.extend(v)
        else:
            out.append(v)
    return out

states_flat = [flatten_state_action(s) for s in states]
actions_flat = [flatten_state_action(a) for a in actions]

num_frames = len(states_flat)
fps = metadata.get("fps", 30)
timestamps = np.arange(num_frames) / fps

df = pd.DataFrame({
    'observation.state': states_flat,
    'action': actions_flat,
    'timestamp': timestamps.tolist(),
    'episode_index': [0] * num_frames,
    'index': list(range(num_frames)),
    'task_index': [0] * num_frames,
    'annotation.human.annotation.task': [metadata.get("task", "unknown")] * num_frames,
})

parquet_path = f"{dst_folder}/data/chunk-000/episode_000000.parquet"
df.to_parquet(parquet_path)

# Tạo file meta/modality.json
modality = {
    "video": ["observation.images.cam_head"],
    "state": list(metadata.get("state_keys", [])),
    "action": list(metadata.get("action_keys", [])),
    "language": list(metadata.get("language_keys", []))
}
with open(f"{dst_folder}/meta/modality.json", "w") as f:
    json.dump(modality, f, indent=2)

# Tạo file meta/info.json
info = {
    "embodiment_tag": "m2",
    "num_episodes": 1,
    "other_info": metadata.get("other_info", {})
}
with open(f"{dst_folder}/meta/info.json", "w") as f:
    json.dump(info, f, indent=2)

# Tạo file meta/episodes.jsonl
with open(f"{dst_folder}/meta/episodes.jsonl", "w") as f:
    f.write(json.dumps({"episode_id": "0000", "length": num_frames}) + "\n")

# Tạo file meta/tasks.jsonl (nếu có)
if "tasks" in metadata:
    with open(f"{dst_folder}/meta/tasks.jsonl", "w") as f:
        for task in metadata["tasks"]:
            f.write(json.dumps(task) + "\n")

# Tạo file meta/stats.json
state_array = np.array(states_flat)
action_array = np.array(actions_flat)
stats = {
    "state": {
        "mean": state_array.mean(axis=0).tolist(),
        "std": state_array.std(axis=0).tolist(),
        "min": state_array.min(axis=0).tolist(),
        "max": state_array.max(axis=0).tolist()
    },
    "action": {
        "mean": action_array.mean(axis=0).tolist(),
        "std": action_array.std(axis=0).tolist(),
        "min": action_array.min(axis=0).tolist(),
        "max": action_array.max(axis=0).tolist()
    }
}
with open(f"{dst_folder}/meta/stats.json", "w") as f:
    json.dump(stats, f, indent=2)

print("✅ Đã chuyển đổi dữ liệu sang format LeRobot!")

RuntimeError: Không tìm thấy frame hợp lệ trong trajectory.json!

In [8]:
pip install imageio

Collecting imageio
  Using cached imageio-2.37.2-py3-none-any.whl.metadata (9.7 kB)
Using cached imageio-2.37.2-py3-none-any.whl (317 kB)
Using cached imageio-2.37.2-py3-none-any.whl (317 kB)
Installing collected packages: imageio
Installing collected packages: imageio
Successfully installed imageio-2.37.2
Successfully installed imageio-2.37.2
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
