# Guide to load dataset for inference


## 1. LeRobot Format

* This tutorial will show how to load data in LeRobot Format by using our dataloader. 
* We will use the `robot_sim.PickNPlace` dataset as an example which is already converted to LeRobot Format. 
* To understand how to convert your own dataset, please refer to [Gr00t's LeRobot.md](LeRobot_compatible_data_schema.md)

In [None]:
# download dataset from hub
!huggingface-cli download libero_10_no_noops_1.0.0_lerobot --repo-id libero_10_no_noops_1.0.0_lerobot --allow-patterns "*.mp4"

- meta data

In [7]:
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata

from eo.data.lerobot_dataset import LeRobotDataset

meta = LeRobotDatasetMetadata(
    repo_id="demos25",
    root="../demo_data/demos25",
)
meta

LeRobotDatasetMetadata({
    Repository ID: 'demos25',
    Total episodes: '25',
    Total frames: '44492',
    Features: '['observation.images.head', 'observation.images.head_center_fisheye', 'observation.images.head_left_fisheye', 'observation.images.head_right_fisheye', 'observation.images.hand_left', 'observation.images.hand_right', 'observation.images.back_left_fisheye', 'observation.images.back_right_fisheye', 'observation.states.effector.position', 'observation.states.end.orientation', 'observation.states.end.position', 'observation.states.head.position', 'observation.states.joint.current_value', 'observation.states.joint.position', 'observation.states.robot.orientation', 'observation.states.robot.position', 'observation.states.waist.position', 'actions.effector.position', 'actions.end.orientation', 'actions.end.position', 'actions.head.position', 'actions.joint.position', 'actions.robot.velocity', 'actions.waist.position', 'timestamp', 'frame_index', 'episode_index', 'index', '

In [18]:
select_action_keys = [k for k in meta.features if k.startswith("action")]

dataset = LeRobotDataset(
    repo_id="demos25",
    root="../demo_data/demos25",
    tolerance_s=1e-3,
    delta_timestamps={
        k: [i / meta.fps for i in range(0, 50)]
        for k in select_action_keys
    }
)
dataset[0].keys()

ValueError: One or several timestamps unexpectedly violate the tolerance inside episode range.
                This might be due to synchronization issues during data collection.
                
[{'diff': np.float32(-59.433334),
  'episode_index': 0,
  'timestamps': [np.float32(59.433334), np.float32(0.0)]},
 {'diff': np.float32(-66.96667),
  'episode_index': 1,
  'timestamps': [np.float32(66.96667), np.float32(0.0)]},
 {'diff': np.float32(-59.033333),
  'episode_index': 2,
  'timestamps': [np.float32(59.033333), np.float32(0.0)]},
 {'diff': np.float32(-58.533333),
  'episode_index': 3,
  'timestamps': [np.float32(58.533333), np.float32(0.0)]},
 {'diff': np.float32(-61.2),
  'episode_index': 4,
  'timestamps': [np.float32(61.2), np.float32(0.0)]},
 {'diff': np.float32(-66.0),
  'episode_index': 5,
  'timestamps': [np.float32(66.0), np.float32(0.0)]},
 {'diff': np.float32(-60.166668),
  'episode_index': 6,
  'timestamps': [np.float32(60.166668), np.float32(0.0)]},
 {'diff': np.float32(-63.366665),
  'episode_index': 7,
  'timestamps': [np.float32(63.366665), np.float32(0.0)]},
 {'diff': np.float32(-61.0),
  'episode_index': 8,
  'timestamps': [np.float32(61.0), np.float32(0.0)]},
 {'diff': np.float32(-57.1),
  'episode_index': 9,
  'timestamps': [np.float32(57.1), np.float32(0.0)]},
 {'diff': np.float32(-59.166668),
  'episode_index': 10,
  'timestamps': [np.float32(59.166668), np.float32(0.0)]},
 {'diff': np.float32(-59.033333),
  'episode_index': 11,
  'timestamps': [np.float32(59.033333), np.float32(0.0)]},
 {'diff': np.float32(-57.266666),
  'episode_index': 12,
  'timestamps': [np.float32(57.266666), np.float32(0.0)]},
 {'diff': np.float32(-52.933334),
  'episode_index': 13,
  'timestamps': [np.float32(52.933334), np.float32(0.0)]},
 {'diff': np.float32(-58.933334),
  'episode_index': 14,
  'timestamps': [np.float32(58.933334), np.float32(0.0)]},
 {'diff': np.float32(-57.8),
  'episode_index': 15,
  'timestamps': [np.float32(57.8), np.float32(0.0)]},
 {'diff': np.float32(-55.7),
  'episode_index': 16,
  'timestamps': [np.float32(55.7), np.float32(0.0)]},
 {'diff': np.float32(-61.933334),
  'episode_index': 17,
  'timestamps': [np.float32(61.933334), np.float32(0.0)]},
 {'diff': np.float32(-55.433334),
  'episode_index': 18,
  'timestamps': [np.float32(55.433334), np.float32(0.0)]},
 {'diff': np.float32(-57.0),
  'episode_index': 19,
  'timestamps': [np.float32(57.0), np.float32(0.0)]},
 {'diff': np.float32(-63.066666),
  'episode_index': 20,
  'timestamps': [np.float32(63.066666), np.float32(0.0)]},
 {'diff': np.float32(-61.833332),
  'episode_index': 21,
  'timestamps': [np.float32(61.833332), np.float32(0.0)]},
 {'diff': np.float32(-54.033333),
  'episode_index': 22,
  'timestamps': [np.float32(54.033333), np.float32(0.0)]},
 {'diff': np.float32(-54.833332),
  'episode_index': 23,
  'timestamps': [np.float32(54.833332), np.float32(0.0)]}]

## 2. Specific Robot Keys

In [None]:
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata

from eo.data.lerobot_dataset import LeRobotDataset

dataset = LeRobotDataset(
    repo_id="libero_10_no_noops_1.0.0_lerobot",
    root="/nvme/eorobotics-oss/DATA/libero_10_no_noops_1.0.0_lerobot",
    episodes=[0],
    select_video_keys=["observation.images.image"],
    select_state_keys=["observation.state"],
    select_action_keys=["action"],
    delta_timestamps={k: [i / meta.fps for i in range(0, 32)] for k in ["action"]}
)

dataset[0].keys()

## 3. Multi-robot Dataset

In [None]:
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata

from eo.data.lerobot_dataset import MultiLeRobotDataset
from eo.data.schema import LerobotConfig

# we use yaml data configs in large scale training
multi_dataset = MultiLeRobotDataset(
    data_configs=[
        LerobotConfig(
            repo_id="libero_10_no_noops_1.0.0_lerobot",
            root="/nvme/eorobotics-oss/DATA",
            episodes=[0],
        ),
        LerobotConfig(
            repo_id="libero_spatial_no_noops_1.0.0_lerobot",
            root="/nvme/eorobotics-oss/DATA",
            episodes=[0],
        )
    ],
    chunk_size=16 # NOTE: automatically construct delta_timestamps from the `metadata` and `chunk_size``
)

## Load MultiModal Datasets

In [1]:
from eo.data.multim_dataset import MultimodaDataset
from eo.data.schema import MMDatasetConfig

multim_dataset = MultimodaDataset(
    data_configs = [
        MMDatasetConfig(
            json_path="../demo_data/refcoco/refcoco.jsonl",
            vision_base_path="../demo_data/refcoco"
        ),
    ]
)

len(multim_dataset)

  from .autonotebook import tqdm as notebook_tqdm


Loaded 9 samples from ../demo_data/refcoco/refcoco.jsonl


9

In [2]:
from eo.data.lerobot_dataset import MultiLeRobotDataset
from eo.data.multim_dataset import MultimodaDataset
from eo.data.schema import LerobotConfig, MMDatasetConfig

lerobot_dataset = MultiLeRobotDataset(
    data_configs = [
        LerobotConfig(
            repo_id="demos25",
            root="../demo_data",
        )
    ]
)

* load 1 lerobot datasets with 8 processes ...


Loading lerobot datasets:   0%|          | 0/1 [00:00<?, ?it/s]

[warn] read dataset demos25 failed, skipped!
One or several timestamps unexpectedly violate the tolerance inside episode range.
                This might be due to synchronization issues during data collection.
                
[{'diff': np.float32(-59.433334),
  'episode_index': 0,
  'timestamps': [np.float32(59.433334), np.float32(0.0)]},
 {'diff': np.float32(-66.96667),
  'episode_index': 1,
  'timestamps': [np.float32(66.96667), np.float32(0.0)]},
 {'diff': np.float32(-59.033333),
  'episode_index': 2,
  'timestamps': [np.float32(59.033333), np.float32(0.0)]},
 {'diff': np.float32(-58.533333),
  'episode_index': 3,
  'timestamps': [np.float32(58.533333), np.float32(0.0)]},
 {'diff': np.float32(-61.2),
  'episode_index': 4,
  'timestamps': [np.float32(61.2), np.float32(0.0)]},
 {'diff': np.float32(-66.0),
  'episode_index': 5,
  'timestamps': [np.float32(66.0), np.float32(0.0)]},
 {'diff': np.float32(-60.166668),
  'episode_index': 6,
  'timestamps': [np.float32(60.166668), np.floa

Loading lerobot datasets: 100%|██████████| 1/1 [00:25<00:00, 25.90s/it]

successfully load dataset 0/1:
[] 





In [5]:
multim_dataset = MultimodaDataset(
    data_configs = [
        MMDatasetConfig(
            json_path="../demo_data/interleaved_demo.jsonl"
        ),
    ],
    meta_dataset = lerobot_dataset
)

multim_dataset[0]

Loaded 1132 samples from ../demo_data/interleaved_demo.jsonl


ValueError: invalid dataset: demos25. available dataset: []