## 0. View the LeRobot Dataset

Note that we use two different envs for the conversion. One for LeRobot dataset->.npy using lerobot lerobot env; another for .npy->RLDS using rlds env.

This two-steps process is necessary because the two envs can hardly be compactable to each other.

Setup the `lerobot` env following the [lerobot repository](https://github.com/huggingface/lerobot/tree/main).

In [4]:
from pprint import pprint

import torch
from huggingface_hub import HfApi

import lerobot
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata

# Let's take this one for this example
repo_id = "lerobot/droid_1.0.1"
dataset = LeRobotDataset(repo_id, episodes=list(range(3, 6)))
print(f"Selected episodes: {dataset.episodes}")
print(f"Number of episodes selected: {dataset.num_episodes}")
print(f"Number of frames selected: {dataset.num_frames}")
# The previous metadata class is contained in the 'meta' attribute of the dataset:
print(dataset.meta)

Selected episodes: [3, 4, 5]
Number of episodes selected: 3
Number of frames selected: 25460854
LeRobotDatasetMetadata({
    Repository ID: 'lerobot/droid_1.0.1',
    Total episodes: '95617',
    Total frames: '27618651',
    Features: '['is_first', 'is_last', 'is_terminal', 'language_instruction', 'language_instruction_2', 'language_instruction_3', 'observation.state.gripper_position', 'observation.state.cartesian_position', 'observation.state.joint_position', 'observation.state', 'observation.images.wrist_left', 'observation.images.exterior_1_left', 'observation.images.exterior_2_left', 'action.gripper_position', 'action.gripper_velocity', 'action.cartesian_position', 'action.cartesian_velocity', 'action.joint_position', 'action.joint_velocity', 'action.original', 'action', 'discount', 'reward', 'task_category', 'building', 'collector_id', 'date', 'camera_extrinsics.wrist_left', 'camera_extrinsics.exterior_1_left', 'camera_extrinsics.exterior_2_left', 'is_episode_successful', 'timest

## 1. Use uni-file for conversion for all features

In [8]:
import os
import numpy as np
import tqdm
import torch
from lerobot.datasets.lerobot_dataset import LeRobotDataset


def convert_lerobot_to_npy(repo_id, output_dir=None, episodes=None):
    """
    将LeRobot格式的数据集转换为.npy格式文件，支持选择特定episode
    
    Args:
        repo_id: LeRobot数据集名称
        output_dir: 保存路径，默认为f"data/{repo_id}"
        episodes: 要转换的episode索引列表，如[0,1,2]，默认转换所有episode
    """
    # 设置输出目录
    if output_dir is None:
        output_dir = f"data/{repo_id}"
    os.makedirs(output_dir, exist_ok=True)
    
    # 加载数据集（支持选择特定episode）
    print(f"Loading dataset: {repo_id}")
    dataset = LeRobotDataset(repo_id, episodes=episodes)
    selected_episodes = dataset.episodes
    print(f"Selected episodes: {selected_episodes}")
    print(f"Number of episodes to convert: {dataset.num_episodes}")
    print(f"Total frames to convert: {dataset.num_frames}")
    
    # 获取所有特征
    features = list(dataset.features.keys())
    print(f"Detected features: {features}")
    
    # 转换每个selected episode
    for episode_idx in tqdm.tqdm(selected_episodes, desc="Converting episodes"):
        # 获取当前episode的起止帧索引
        from_idx = dataset.meta.episodes["dataset_from_index"][episode_idx]
        to_idx = dataset.meta.episodes["dataset_to_index"][episode_idx]
        episode_length = to_idx - from_idx
        print(f"\nProcessing episode {episode_idx} (frames: {episode_length})")
        
        # 构建episode数据列表
        episode_data = []
        for frame_idx in tqdm.tqdm(range(from_idx, to_idx), desc=f"Episode {episode_idx} frames", leave=False):
            frame = dataset[frame_idx]
            frame_data = {}
            
            # 提取所有特征
            for feature in features:
                try:
                    # 处理张量类型（转为numpy数组）
                    if isinstance(frame[feature], torch.Tensor):
                        frame_data[feature] = frame[feature].cpu().numpy()
                    else:
                        frame_data[feature] = frame[feature]
                except (KeyError, AttributeError) as e:
                    print(f"Warning: Could not access feature {feature} in frame {frame_idx}: {e}")
                    continue
            
            episode_data.append(frame_data)
        
        # 保存为npy文件
        save_path = os.path.join(output_dir, f"episode_{episode_idx}.npy")
        np.save(save_path, episode_data)
        print(f"Saved episode {episode_idx} to {save_path}")
    
    print(f"\nSuccessfully converted dataset to {output_dir}")


if __name__ == "__main__":
    # 示例用法
    repo_id = "lerobot/droid_1.0.1"
    
    # 示例1：转换指定的episode（如0,10,11,23）
    convert_lerobot_to_npy(repo_id, episodes=list(range(0, 5)))
    
    # 示例2：转换所有episode（不指定episodes参数）
    # convert_lerobot_to_npy(repo_id)
    
    # 示例3：指定输出目录
    # convert_lerobot_to_npy(repo_id, output_dir="custom_output_dir", episodes=list(range(5)))

Loading dataset: lerobot/droid_1.0.1
Selected episodes: [0, 1, 2, 3, 4]
Number of episodes to convert: 5
Total frames to convert: 25460854
Detected features: ['is_first', 'is_last', 'is_terminal', 'language_instruction', 'language_instruction_2', 'language_instruction_3', 'observation.state.gripper_position', 'observation.state.cartesian_position', 'observation.state.joint_position', 'observation.state', 'observation.images.wrist_left', 'observation.images.exterior_1_left', 'observation.images.exterior_2_left', 'action.gripper_position', 'action.gripper_velocity', 'action.cartesian_position', 'action.cartesian_velocity', 'action.joint_position', 'action.joint_velocity', 'action.original', 'action', 'discount', 'reward', 'task_category', 'building', 'collector_id', 'date', 'camera_extrinsics.wrist_left', 'camera_extrinsics.exterior_1_left', 'camera_extrinsics.exterior_2_left', 'is_episode_successful', 'timestamp', 'frame_index', 'episode_index', 'index', 'task_index']


Converting episodes:   0%|          | 0/5 [00:00<?, ?it/s]


Processing episode 0 (frames: 167)


Converting episodes:  20%|██        | 1/5 [00:01<00:05,  1.29s/it]

Saved episode 0 to data/lerobot/droid_1.0.1/episode_0.npy

Processing episode 1 (frames: 266)


Converting episodes:  40%|████      | 2/5 [00:03<00:05,  1.89s/it]

Saved episode 1 to data/lerobot/droid_1.0.1/episode_1.npy

Processing episode 2 (frames: 411)


Converting episodes:  60%|██████    | 3/5 [00:06<00:04,  2.38s/it]

Saved episode 2 to data/lerobot/droid_1.0.1/episode_2.npy

Processing episode 3 (frames: 239)


Converting episodes:  80%|████████  | 4/5 [00:08<00:02,  2.24s/it]

Saved episode 3 to data/lerobot/droid_1.0.1/episode_3.npy

Processing episode 4 (frames: 370)


Converting episodes: 100%|██████████| 5/5 [00:11<00:00,  2.32s/it]

Saved episode 4 to data/lerobot/droid_1.0.1/episode_4.npy

Successfully converted dataset to data/lerobot/droid_1.0.1





### 1.1 then run the tfds build commands

```bash
cd rlds_dataset_builder/le_robot_to_rlds
tfds build --overwrite
```

The results will be saved to `~/tensorflow_datasets/le_robot_to_rlds`. To visualize the results:

```bash
cd rlds_dataset_builder
python3 visualize_dataset.py le_robot_to_rlds
```




## 2. Use class API for conversion for desired features

In [1]:
from lerobot_to_npy_converter import LeRobotToNPYConverter, CustomFeatureConverter

# 示例1：使用默认特征提取器
print("=== Using default feature extractor ===")
converter = LeRobotToNPYConverter(
    repo_id="lerobot/droid_1.0.1",
    episodes=list(range(0, 3))  # 转换前3个episode
)

# 示例2：使用自定义特征提取器
print("\n=== Using custom feature extractor ===")
custom_converter = CustomFeatureConverter(
    repo_id="lerobot/droid_1.0.1",
    output_dir="data/custom_droid_features",  # 自定义输出目录
    episodes=list(range(3, 5))  # 转换第3-4个episode
)


  from .autonotebook import tqdm as notebook_tqdm


=== Using default feature extractor ===

=== Using custom feature extractor ===


In [2]:
# converter.load_dataset()
# converter.convert_and_save()


In [3]:
custom_converter.load_dataset()

Loading dataset: lerobot/droid_1.0.1
Selected episodes: [3, 4]
Number of episodes selected: 2
Total frames selected: 25460854
All available features in dataset:
  1. is_first
  2. is_last
  3. is_terminal
  4. language_instruction
  5. language_instruction_2
  6. language_instruction_3
  7. observation.state.gripper_position
  8. observation.state.cartesian_position
  9. observation.state.joint_position
  10. observation.state
  11. observation.images.wrist_left
  12. observation.images.exterior_1_left
  13. observation.images.exterior_2_left
  14. action.gripper_position
  15. action.gripper_velocity
  16. action.cartesian_position
  17. action.cartesian_velocity
  18. action.joint_position
  19. action.joint_velocity
  20. action.original
  21. action
  22. discount
  23. reward
  24. task_category
  25. building
  26. collector_id
  27. date
  28. camera_extrinsics.wrist_left
  29. camera_extrinsics.exterior_1_left
  30. camera_extrinsics.exterior_2_left
  31. is_episode_successful


LeRobotDataset({
    Repository ID: 'lerobot/droid_1.0.1',
    Number of selected episodes: '2',
    Number of selected samples: '25460854',
    Features: '['is_first', 'is_last', 'is_terminal', 'language_instruction', 'language_instruction_2', 'language_instruction_3', 'observation.state.gripper_position', 'observation.state.cartesian_position', 'observation.state.joint_position', 'observation.state', 'observation.images.wrist_left', 'observation.images.exterior_1_left', 'observation.images.exterior_2_left', 'action.gripper_position', 'action.gripper_velocity', 'action.cartesian_position', 'action.cartesian_velocity', 'action.joint_position', 'action.joint_velocity', 'action.original', 'action', 'discount', 'reward', 'task_category', 'building', 'collector_id', 'date', 'camera_extrinsics.wrist_left', 'camera_extrinsics.exterior_1_left', 'camera_extrinsics.exterior_2_left', 'is_episode_successful', 'timestamp', 'frame_index', 'episode_index', 'index', 'task_index']',
})',

In [4]:

custom_converter.convert_and_save()

Converting episodes:   0%|          | 0/2 [00:00<?, ?it/s]


Processing episode 3 (frames: 239)


Converting episodes:  50%|█████     | 1/2 [00:02<00:02,  2.30s/it]

Saved episode 3 to data/custom_droid_features/episode_3.npy

Processing episode 4 (frames: 370)


Converting episodes: 100%|██████████| 2/2 [00:05<00:00,  2.93s/it]

Saved episode 4 to data/custom_droid_features/episode_4.npy

Successfully converted dataset to data/custom_droid_features





### 2.1 then run the tfds build commands

```bash
cd rlds_dataset_builder/le_robot_to_rlds
tfds build --overwrite
```

The results will be saved to `~/tensorflow_datasets/le_robot_to_rlds`. To visualize the results:

```bash
cd rlds_dataset_builder
python3 visualize_dataset.py le_robot_to_rlds
```