From 06be6d34b31e8ad62c2a7e28da190bdfeb72c7e9 Mon Sep 17 00:00:00 2001
From: William Yue <williamyue37@gmail.com>
Date: Mon, 29 Dec 2025 16:11:32 -0800
Subject: [PATCH 1/6] added tutorials

---
 docs/source/index.rst                   |   1 +
 docs/source/tutorials.rst               |  17 ++++
 docs/source/tutorials/ci.rst            | 120 ++++++++++++++++++++++++
 docs/source/tutorials/datasets.rst      |  71 ++++++++++++++
 docs/source/tutorials/evaluation.rst    |  59 ++++++++++++
 docs/source/tutorials/inference.rst     |  44 +++++++++
 docs/source/tutorials/setup.rst         |  53 +++++++++++
 docs/source/tutorials/training.rst      |  59 ++++++++++++
 docs/source/tutorials/visualization.rst |  19 ++++
 9 files changed, 443 insertions(+)
 create mode 100644 docs/source/tutorials.rst
 create mode 100644 docs/source/tutorials/ci.rst
 create mode 100644 docs/source/tutorials/datasets.rst
 create mode 100644 docs/source/tutorials/evaluation.rst
 create mode 100644 docs/source/tutorials/inference.rst
 create mode 100644 docs/source/tutorials/setup.rst
 create mode 100644 docs/source/tutorials/training.rst
 create mode 100644 docs/source/tutorials/visualization.rst
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 08fbf1ae..773ac629 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -8,6 +8,7 @@ OpenTau documentation
    overview
    installation
    concepts
+   tutorials
 
 .. toctree::
    :maxdepth: 2
diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst
new file mode 100644
index 00000000..9af0ffae
--- /dev/null
+++ b/docs/source/tutorials.rst
@@ -0,0 +1,17 @@
+Tutorials
+=========
+
+This section provides step-by-step guides for common tasks in OpenTau, including training, inference, evaluation, and data management.
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Guides:
+
+   tutorials/setup
+   tutorials/training
+   tutorials/inference
+   tutorials/evaluation
+   tutorials/datasets
+   tutorials/visualization
+   tutorials/ci
+
diff --git a/docs/source/tutorials/ci.rst b/docs/source/tutorials/ci.rst
new file mode 100644
index 00000000..d6138184
--- /dev/null
+++ b/docs/source/tutorials/ci.rst
@@ -0,0 +1,120 @@
+CI/CD
+=====
+
+Running the GitLab CI server
+----------------------------
+
+To run the gitlab CI server, you need to have `docker <https://www.docker.com>`_, `gitlab-runner <https://docs.gitlab.com/runner/>`_, and `Nvidia Container Toolkit <https://docs.nvidia.com/ai-enterprise/deployment/vmware/latest/docker.html>`_ installed on your machine. The ``/autox`` NAS should also be mounted for sharing checkpoints.
+
+Once you have these installed, you can use the example config to run the gitlab CI server. The default location for the config is ``$HOME/.gitlab-runner/config.toml``.
+
+.. code-block:: toml
+
+    concurrent = 4
+    check_interval = 0
+    shutdown_timeout = 0
+
+    [session_server]
+      session_timeout = 1800
+
+    [[runners]]                             # Runner for (parallel) GPU jobs
+      name = "sj-k8s-gpu-005"
+      limit = 1                             # Only 1 GPU job at a time
+      url = "https://code.autox.ds"
+      id = 468
+      token = "<Token generated by GitLab runner registration>"
+      token_obtained_at = 2025-07-07T20:01:30Z
+      token_expires_at = 0001-01-01T00:00:00Z
+      executor = "docker"
+      [runners.cache]
+        MaxUploadedArchiveSize = 0
+        [runners.cache.s3]
+        [runners.cache.gcs]
+        [runners.cache.azure]
+      [runners.docker]
+        tls_verify = false
+        image = "gitlab-runner:snapshot04"  # This image needs to be built with docker/gitlab-ci-image/Dockerfile
+        ipc_mode="host"                     # Ignored when shm_size is set, but good to have
+        pull_policy = "if-not-present"      # Prevents pulling the image every time
+        privileged = false
+        disable_entrypoint_overwrite = false
+        oom_kill_disable = false
+        disable_cache = false
+        volumes = ["/cache", "/autox:/autox:rw"]  # mounts the /autox NAS to the containers
+        entrypoint = ["/bin/bash", "-c"]
+        shm_size = 34359738368              # 32 GiB. Deepspeed uses more than the default 64 MiB.
+        network_mtu = 0
+        gpus = "\"device=0,1\""             # Use GPU 0 and 1 for the multi-GPU runner
+
+    [[runners]]                             # Runner for single-GPU jobs
+      name = "sj-k8s-gpu-005"
+      limit = 1
+      url = "https://code.autox.ds"
+      id = 471
+      token = "<Token generated by GitLab runner registration>"
+      token_obtained_at = 2025-07-29T20:18:22Z
+      token_expires_at = 0001-01-01T00:00:00Z
+      executor = "docker"
+      [runners.cache]
+        MaxUploadedArchiveSize = 0
+        [runners.cache.s3]
+        [runners.cache.gcs]
+        [runners.cache.azure]
+      [runners.docker]
+        tls_verify = false
+        image = "gitlab-runner:snapshot04"  # This image needs to be built with docker/gitlab-ci-image/Dockerfile
+        pull_policy = "if-not-present"      # Prevents pulling the image every time
+        privileged = false
+        disable_entrypoint_overwrite = false
+        oom_kill_disable = false
+        gpus = "\"device=2\""               # Use GPU 2 for the single-GPU runner
+        disable_cache = false
+        volumes = ["/cache", "/autox:/autox:rw"]
+        shm_size = 0
+        network_mtu = 0
+
+    [[runners]]                             # Runner for CPU jobs
+      name = "sj-k8s-gpu-005"
+      limit = 2                             # Up to 2 CPU jobs at a time
+      url = "https://code.autox.ds"
+      id = 470
+      token = "<Another token generated by GitLab runner registration>"
+      token_obtained_at = 2025-07-29T18:27:09Z
+      token_expires_at = 0001-01-01T00:00:00Z
+      executor = "docker"
+      [runners.cache]
+        MaxUploadedArchiveSize = 0
+        [runners.cache.s3]
+        [runners.cache.gcs]
+        [runners.cache.azure]
+      [runners.docker]
+        tls_verify = false
+        image = "gitlab-runner:snapshot04"  # This image needs to be built with docker/gitlab-ci-image/Dockerfile
+        pull_policy = "if-not-present"      # Prevents pulling the image every time
+        privileged = false
+        disable_entrypoint_overwrite = false
+        oom_kill_disable = false
+        disable_cache = false
+        volumes = ["/cache"]
+        shm_size = 0
+        network_mtu = 0
+
+To verify the above config works, you can manually create a container using:
+
+.. code-block:: bash
+
+    # Test the multi-GPU runner setup
+    docker container run -it --rm --gpus '"device=0,1"' --shm-size 34359738368 --ipc=host -v /autox:/autox:rw gitlab-runner:snapshot04
+    # Test the single-GPU runner setup
+    docker container run -it --rm --gpus '"device=2"' --shm-size 34359738368 --ipc=host -v /autox:/autox:rw gitlab-runner:snapshot04
+    # Test the CPU-only runner setup
+    docker container run -it --rm gitlab-runner:snapshot04
+
+The gitlab runner can be started with:
+
+.. code-block:: bash
+
+    sudo gitlab-runner restart && gitlab-runner run
+
+You can also use ``gitlab-runner status`` to check the status of the runner.
+
diff --git a/docs/source/tutorials/datasets.rst b/docs/source/tutorials/datasets.rst
new file mode 100644
index 00000000..1a549697
--- /dev/null
+++ b/docs/source/tutorials/datasets.rst
@@ -0,0 +1,71 @@
+Datasets
+========
+
+Standard Data Format (for Development and Inference)
+----------------------------------------------------
+
+The "Standard Data Format" is the expected data format returned by ``torch.utils.data.Dataset``'s ``__getitem__`` and the expected input to ``torch.nn.Module``'s ``forward`` method. Any new datasets, VLMs, or VLAs that get added to this repository need to adhere to this format. Data being passed to the model during inference should also adhere to this format. The format is as follows:
+
+.. code-block:: python
+
+    {
+        "camera0": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
+        "camera1": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
+        # ...
+        "camera{num_cams-1}": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
+
+        "local_camera0": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
+        "local_camera1": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
+        # ...
+        "local_camera{action_expert_num_cams-1}": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
+
+        "state": torch.Tensor,    # shape (max_state_dim)
+        "actions": torch.Tensor,  # shape (action_chunk, max_action_dim)
+        "frozen_actions": torch.Tensor,  # shape (frozen_actions, max_action_dim)
+        "prompt": str,            # the task prompt, e.g. "Pick up the object and place it on the table."
+        "response": str,          # the response from the VLM for vision QA tasks. For LeRobotDataset, this will be an empty string.
+        "loss_type": str,         # the loss type to be applied to this sample (either "CE" for cross entropy or "MSE" for mean squared error)
+
+        "img_is_pad": torch.BoolTensor,  # shape (num_cams,) with values 0 or 1, where 1 indicates that the camera image is a padded image.
+        "local_img_is_pad": torch.BoolTensor,  # shape (action_expert_num_cam,) with values 0 or 1, where 1 indicates that the local camera image is a padded image.
+        "action_is_pad": torch.BoolTensor,  # shape (action_chunk,) with values 0 or 1, where 1 indicates that the action is a padded action.
+        "frozen_action_is_pad": torch.BoolTensor,  # shape (frozen_actions,) with values 0 or 1, where 1 indicates that the frozen action is a padded action.
+    }
+
+The config file will have to provide the following information in ``TrainPipelineConfig``:
+
+- ``H, W``: The height and width of the camera images. These should be the same for all cameras.
+- ``num_cams``: The number of cameras for the cloud VLM in the dataset.
+- ``action_expert_num_cams``: The number of cameras for the action expert in the dataset.
+- ``max_state_dim``: The maximum dimension of the state vector.
+- ``max_action_dim``: The maximum dimension of the action vector.
+- ``action_chunk``: The number of actions in the action vector. This is usually 1 for single action tasks, but can be more for multi-action tasks.
+
+Cameras should be labeled in order of importance (e.g. camera0 is the most important camera, camera1 is the second most important camera, etc.). The model dataset will select the most important cameras to use if num_cams is less than the number of cameras in the dataset.
+
+Both the prompt and response strings should contain exactly one newline character at the end of the string unless they are empty strings.
+
+
+Computing max token length for dataset mixture
+----------------------------------------------
+
+Each training config (e.g., `dev-config <../../examples/dev_config.json>`_) should contain a dataset mixture definition. To evaluate the maximum token length for the dataset mixture, you can run the following command:
+
+.. code-block:: bash
+
+    python lerobot/scripts/compute_max_token_length.py \
+        --target_cfg=<path/to/your/training/config.json>\
+        --output_path=outputs/stats/token_count.json \
+        --num_workers=10
+
+This will output a token count for each language key in the dataset mixture, and save it to ``outputs/stats/token_count.json``.
+
+AgiBot dataset
+--------------
+
+A clone of the ``agibot-world/AgiBotWorld-Alpha`` dataset is provided at ``/autox/teams/project-bot/AgiBotWorld-Alpha``. You can use the script ``lerobot/scripts/agibot_to_lerobot.py`` to convert it to lerobot format, saved in a given directory. For example:
+
+.. code-block:: bash
+
+    $ python3 lerobot/scripts/agibot_to_lerobot.py --src_path /autox/teams/project-bot/AgiBotWorld-Alpha --tgt_path <path> --task_id 327
+
diff --git a/docs/source/tutorials/evaluation.rst b/docs/source/tutorials/evaluation.rst
new file mode 100644
index 00000000..6ffdd897
--- /dev/null
+++ b/docs/source/tutorials/evaluation.rst
@@ -0,0 +1,59 @@
+Evaluation
+==========
+
+Evaluating a policy in Simulation
+---------------------------------
+
+To evaluate a policy in simulation, you can launch the ``lerobot/scripts/eval.py`` script with ``accelerate launch``.
+Each accelerate process will only work on its fraction of the tasks, improving throughput.
+For example, to evaluate a policy on the LIBERO 10, run:
+
+.. code-block:: bash
+
+    $ accelerate launch --config_file <ACCELERATE_CONFIG_PATH> lerobot/scripts/eval.py --config_path=outputs/train/tau0/checkpoints/000040/train_config.json
+
+.. note::
+   You can't pass in an DeepSpeed accelerate config file to ``eval.py`` as DeepSpeed expects optimizer and dataloader during ``accelerator.prepare()``, which we do not provide during eval. It is recommended to pass in a DDP config.
+
+.. note::
+   Make sure that the ``EnvConfig`` and ``EvalConfig`` are set to the correct values for the simulation environment in your train config file.
+
+Evaluating policy in a libero environment
+-----------------------------------------
+
+To evaluate the policy on the LIBERO benchmark, add the following section to the training config:
+
+.. code-block:: json
+
+    {
+        ...,
+        "env": {
+            "type": "libero",
+            "task": "libero_spatial",
+            "task_ids": [0, 2]
+        },
+        "eval": {
+            "n_episodes": 8,
+            "batch_size": 8
+        },
+        "eval_freq": 25,
+        ...
+    }
+
+This will run the 0th task and 2nd task in ``libero_spatial``. Each task will run for 8 simulations in parallel.
+
+When launched with accelerate, each GPU process will only work on its fraction of the tasks, improving throughput.
+
+Using Simulations
+-----------------
+
+Metaworld
+^^^^^^^^^
+
+When using Metaworld on Ubuntu machines with headless rendering, make sure to export these environment variables:
+
+.. code-block:: bash
+
+    export MUJOCO_GL=egl
+    export PYOPENGL_PLATFORM=egl
+
diff --git a/docs/source/tutorials/inference.rst b/docs/source/tutorials/inference.rst
new file mode 100644
index 00000000..b5acd1e6
--- /dev/null
+++ b/docs/source/tutorials/inference.rst
@@ -0,0 +1,44 @@
+Inference
+=========
+
+Running inference with a trained model
+--------------------------------------
+
+To run inference on a trained model, you will need the saved checkpoint folder from training that contains at least ``train_config.json`` and ``model.safetensors`` files. If you ran the checkpointing and resuming tutorial, you should be able to find the checkpoint config file at ``outputs/train/tau0/checkpoints/000040/train_config.json``. Make sure you ran the ``zero_to_fp32.py`` and ``bin_to_safetensors.py`` scripts (or the ``convert_checkpoint.sh`` script) to convert the sharded model checkpoint files into a single ``model.safetensors`` file.
+
+To run inference with the entire model on the same device, run:
+
+.. code-block:: bash
+
+    $ python lerobot/scripts/unified_model_inference.py --config_path=outputs/train/tau0/checkpoints/000040/train_config.json
+
+Running inference on the TAU0 model takes less than 8 GB of GPU memory.
+
+For an example of how to run inference with the VLM in the cloud and the action expert on the robot, run:
+
+.. code-block:: bash
+
+    $ python lerobot/scripts/cloud_robot_inference.py --config_path=outputs/train/tau0/checkpoints/000040/train_config.json
+
+
+Zero Shot inferencing smolvla
+-----------------------------
+
+To run inference with the entire model on the same device, run:
+
+.. code-block:: bash
+
+    $ python lerobot/scripts/unified_model_inference.py --config_path=examples/train_config_smolvla.json
+
+Download the smolvla weights using huggingface cli:
+
+.. code-block:: bash
+
+    $ huggingface-cli download lerobot/smolvla_base
+
+Download the SmolVLM2-500M-Video-Instruct using hugging face cli:
+
+.. code-block:: bash
+
+    $ huggingface-cli download HuggingFaceTB/SmolVLM2-500M-Video-Instruct
+
diff --git a/docs/source/tutorials/setup.rst b/docs/source/tutorials/setup.rst
new file mode 100644
index 00000000..aa5bb59b
--- /dev/null
+++ b/docs/source/tutorials/setup.rst
@@ -0,0 +1,53 @@
+Setup and Configuration
+=======================
+
+AutoX Installation
+------------------
+
+Download the source code:
+
+.. code-block:: bash
+
+    $ git clone git@code.autox.ds:xisp/agi/lerobot.git
+    $ cd lerobot
+
+We use `uv <https://docs.astral.sh/uv/>`_ to manage Python dependencies as it is easier and faster to use than Conda. If you would still like to use Conda, see the Conda environment setup below in the LeRobot installation instructions. See the `uv installation instructions <https://docs.astral.sh/uv/getting-started/installation/>`_ to set it up. Once uv is installed, run the following to set up the environment:
+
+If you haven't already, please install cmake 3.x using ``apt``, ``brew``, or another package manager.
+
+.. code-block:: bash
+
+    $ uv sync --extra tau0 --extra test --extra video_benchmark --extra accelerate --extra dev --extra feetech --extra openai --extra onnx --extra smolvla --extra libero
+    $ source .venv/bin/activate
+
+Note that PI0.5 and Tau0/PI0 are not compatible with each other due to different ``transformers`` package versions. If you need to run PI0.5, run the following command which will override the ``transformers`` package version:
+
+.. code-block:: bash
+
+    $ uv pip install -r requirements-pi05.txt
+
+To use `Weights and Biases <https://docs.wandb.ai/quickstart>`_ for experiment tracking, log in with:
+
+.. code-block:: bash
+
+    $ wandb login
+
+Setting Pre-commit
+------------------
+
+Please, always set pre-commit, so it will check code styling and other necessary environment checks before committing to git.
+
+Check if ``.pre-commit-config.yaml`` file is present under lerobot directory.
+To set pre-commit use the following command:
+
+.. code-block:: bash
+
+    pre-commit install
+
+After successfully executing the above command, the pre-commit checks will automatically be done whenever git commit is called. If any error is found by pre-commit, the commit to git will be made only when the error is fixed.
+
+Setting .ENV file
+-----------------
+
+To run high level planner with gpt4o, OpenAI api key is needed to be set. Create an ``.env`` file under lerobot directory and set the variable ``OPENAI_API_KEY`` to your openai api key. The high level planner inference script will automatically load the api key and pass to openai client.
+
diff --git a/docs/source/tutorials/training.rst b/docs/source/tutorials/training.rst
new file mode 100644
index 00000000..3ea44a51
--- /dev/null
+++ b/docs/source/tutorials/training.rst
@@ -0,0 +1,59 @@
+Training and Checkpointing
+==========================
+
+Distributed Training Configuration
+----------------------------------
+
+To configure distributed training, run:
+
+.. code-block:: bash
+
+    $ accelerate config
+
+We are currently using DeepSpeed for model parallelism distributed training. For an accelerate config example, see `this config file <../../examples/accelerate_ci_config.yaml>`_ used for our CI pipelines.
+
+To train across multiple GPUs, run the following command:
+
+.. code-block:: bash
+
+    $ accelerate launch lerobot/scripts/train.py --config_path=examples/dev_config.json
+
+This uses the default accelerate config file set by running ``accelerate config``.
+
+To use a specific accelerate config, run:
+
+.. code-block:: bash
+
+    $ accelerate launch --config_file=examples/accelerate_ci_config.yaml lerobot/scripts/train.py --config_path=examples/dev_config.json
+
+
+Checkpointing and Resuming Training
+-----------------------------------
+
+Start training and saving checkpoints:
+
+.. code-block:: bash
+
+    $ accelerate launch lerobot/scripts/train.py --config_path=examples/dev_config.json --output_dir=outputs/train/tau0 --steps 40 --log_freq 5 --save_freq 10
+
+A checkpoint should be saved at step 40. The checkpoint should be saved in the directory ``outputs/train/tau0/checkpoints/000040/``.
+
+The ``model.safetensors`` file is not automatically generated by DeepSpeed's checkpointing during training. To consolidate the sharded model checkpoint files generated by DeepSpeed into a single ``model.safetensors`` file, run:
+
+.. code-block:: bash
+
+    $ ./convert_checkpoint.sh outputs/train/tau0/checkpoints/000040/
+
+This generates a ``model.safetensors`` file that can be used for inference or resuming training.
+
+Training can be resumed by running:
+
+.. code-block:: bash
+
+    $ accelerate launch lerobot/scripts/train.py --config_path=outputs/train/tau0/checkpoints/000040/train_config.json --resume=true --steps=100
+
+It should be noted that the training step count continues from the checkpoint step count, but the dataloader is reset.
+
+Optional TODO: Restore the state of the dataloader:
+https://huggingface.co/docs/accelerate/v1.6.0/en/usage_guides/checkpoint#restoring-the-state-of-the-dataloader
+
diff --git a/docs/source/tutorials/visualization.rst b/docs/source/tutorials/visualization.rst
new file mode 100644
index 00000000..74d4f575
--- /dev/null
+++ b/docs/source/tutorials/visualization.rst
@@ -0,0 +1,19 @@
+Visualization
+=============
+
+Using so100_visualization script
+--------------------------------
+
+The ``so100_visualization`` script imitates action from lerobot format dataset in simulator. To run ``bi-so100-block-manipulator`` dataset, pull the data from git lfs and move it under ``lerobot/lerobot`` directory. Install the simulator using:
+
+.. code-block:: bash
+
+    $ uv sync --extra tau0 --extra pusht --extra test --extra video_benchmark --extra accelerate --extra dev --extra feetech --extra openai --extra onnx --extra smolvla --extra so100
+    $ source .venv/bin/activate
+
+Then simply run the below script:
+
+.. code-block:: bash
+
+    python lerobot/scripts/so100_visualization.py --config_path=configs/so100/so100_viz_config.json
+

From 5f6e4fdfbdbcf4d54853b188a1b74793dea7ce31 Mon Sep 17 00:00:00 2001
From: William Yue <williamyue37@gmail.com>
Date: Tue, 30 Dec 2025 11:32:27 -0800
Subject: [PATCH 2/6] finished training and checkpoints doc

---
 configs/examples/dev_config.json        |  41 +++-----
 docs/source/tutorials.rst               |   5 +-
 docs/source/tutorials/ci.rst            | 120 ------------------------
 docs/source/tutorials/datasets.rst      |   3 +
 docs/source/tutorials/evaluation.rst    |   3 +
 docs/source/tutorials/inference.rst     |   3 +
 docs/source/tutorials/setup.rst         |  53 -----------
 docs/source/tutorials/training.rst      |  32 +++----
 docs/source/tutorials/visualization.rst |  19 ----
 9 files changed, 38 insertions(+), 241 deletions(-)
 delete mode 100644 docs/source/tutorials/ci.rst
 delete mode 100644 docs/source/tutorials/setup.rst
 delete mode 100644 docs/source/tutorials/visualization.rst

diff --git a/configs/examples/dev_config.json b/configs/examples/dev_config.json
index 4d30fdb3..297cc9e2 100644
--- a/configs/examples/dev_config.json
+++ b/configs/examples/dev_config.json
@@ -2,8 +2,9 @@
     "dataset_mixture": {
         "datasets": [
             {
-                "repo_id": "physical-intelligence/libero"
+                "repo_id": "lerobot/droid_100"
             }
+
         ],
         "weights": [
             1.0
@@ -13,36 +14,26 @@
         "vector_resample_strategy": "nearest"
     },
     "policy": {
-        "type": "tau0",
-        "pretrained_path": "lerobot/pi0",
-        "init_strategy": "full_he_init",
+        "type": "pi05",
+        "pretrained_path": "william-yue/pi05_base",
         "n_obs_steps": 1,
         "normalization_mapping": {
             "VISUAL": "IDENTITY",
-            "STATE": "MEAN_STD",
+            "STATE": "MIN_MAX",
             "ACTION": "MEAN_STD"
         },
         "chunk_size": 50,
         "n_action_steps": 50,
         "max_state_dim": 32,
         "max_action_dim": 32,
-        "action_expert_num_cams": 1,
-        "cloud_vlm_latency_mean": 0.16,
-        "cloud_vlm_latency_std": 0.05,
-        "cloud_vlm_latency_lower": 0.10,
-        "cloud_vlm_latency_upper": 0.25,
-        "action_decoder_latency_mean": 0.032,
-        "action_decoder_latency_std": 0.010,
-        "action_decoder_latency_lower": 0.020,
-        "action_decoder_latency_upper": 0.050,
-        "tokenizer_max_length": 52,
-        "response_max_tokens": 52,
         "proj_width": 1024,
         "num_steps": 10,
+        "init_strategy": "expert_only_he_init",
         "attention_implementation": "eager",
         "freeze_vision_encoder": true,
         "train_expert_only": true,
-        "train_state_proj": true,
+        "tokenizer_max_length": 256,
+        "discrete_action_max_length": 75,
         "optimizer_lr": 2.5e-05,
         "optimizer_betas": [
             0.9,
@@ -57,17 +48,16 @@
     "resume": false,
     "seed": 1000,
     "resolution": [224, 224],
-    "num_cams": 2,
-    "action_expert_num_cams": 1,
+    "num_cams": 1,
+    "action_expert_num_cams": 0,
     "max_state_dim": 32,
     "max_action_dim": 32,
     "action_chunk": 50,
-    "frozen_actions": 25,
-    "loss_weighting": {"MSE": 1, "CE": 1},
+    "loss_weighting": {"MSE": 10, "CE": 1},
     "num_workers": 4,
-    "batch_size": 2,
+    "batch_size": 1,
     "gradient_accumulation_steps": 1,
-    "dataloader_batch_size": 2,
+    "dataloader_batch_size": 1,
     "prefetch_factor": 8,
     "steps": 100,
     "log_freq": 1,
@@ -95,11 +85,10 @@
     },
     "wandb": {
         "enable": true,
-        "entity": "wyautox-autox",
-        "project": "tau0",
+        "project": "pi05",
         "run_id": null,
         "name": null,
-        "notes": "Dev config for pi0",
+        "notes": "Dev config for pi05",
         "tags": [],
         "group": null,
         "job_type": null,
diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst
index 9af0ffae..7d77e963 100644
--- a/docs/source/tutorials.rst
+++ b/docs/source/tutorials.rst
@@ -1,17 +1,14 @@
 Tutorials
 =========
 
-This section provides step-by-step guides for common tasks in OpenTau, including training, inference, evaluation, and data management.
+This section provides step-by-step guides for common tasks in OpenTau, including training, inference, evaluation, and adding datasets.
 
 .. toctree::
    :maxdepth: 1
    :caption: Guides:
 
-   tutorials/setup
    tutorials/training
    tutorials/inference
    tutorials/evaluation
    tutorials/datasets
-   tutorials/visualization
-   tutorials/ci
 
diff --git a/docs/source/tutorials/ci.rst b/docs/source/tutorials/ci.rst
deleted file mode 100644
index d6138184..00000000
--- a/docs/source/tutorials/ci.rst
+++ /dev/null
@@ -1,120 +0,0 @@
-CI/CD
-=====
-
-Running the GitLab CI server
-----------------------------
-
-To run the gitlab CI server, you need to have `docker <https://www.docker.com>`_, `gitlab-runner <https://docs.gitlab.com/runner/>`_, and `Nvidia Container Toolkit <https://docs.nvidia.com/ai-enterprise/deployment/vmware/latest/docker.html>`_ installed on your machine. The ``/autox`` NAS should also be mounted for sharing checkpoints.
-
-Once you have these installed, you can use the example config to run the gitlab CI server. The default location for the config is ``$HOME/.gitlab-runner/config.toml``.
-
-.. code-block:: toml
-
-    concurrent = 4
-    check_interval = 0
-    shutdown_timeout = 0
-
-    [session_server]
-      session_timeout = 1800
-
-    [[runners]]                             # Runner for (parallel) GPU jobs
-      name = "sj-k8s-gpu-005"
-      limit = 1                             # Only 1 GPU job at a time
-      url = "https://code.autox.ds"
-      id = 468
-      token = "<Token generated by GitLab runner registration>"
-      token_obtained_at = 2025-07-07T20:01:30Z
-      token_expires_at = 0001-01-01T00:00:00Z
-      executor = "docker"
-      [runners.cache]
-        MaxUploadedArchiveSize = 0
-        [runners.cache.s3]
-        [runners.cache.gcs]
-        [runners.cache.azure]
-      [runners.docker]
-        tls_verify = false
-        image = "gitlab-runner:snapshot04"  # This image needs to be built with docker/gitlab-ci-image/Dockerfile
-        ipc_mode="host"                     # Ignored when shm_size is set, but good to have
-        pull_policy = "if-not-present"      # Prevents pulling the image every time
-        privileged = false
-        disable_entrypoint_overwrite = false
-        oom_kill_disable = false
-        disable_cache = false
-        volumes = ["/cache", "/autox:/autox:rw"]  # mounts the /autox NAS to the containers
-        entrypoint = ["/bin/bash", "-c"]
-        shm_size = 34359738368              # 32 GiB. Deepspeed uses more than the default 64 MiB.
-        network_mtu = 0
-        gpus = "\"device=0,1\""             # Use GPU 0 and 1 for the multi-GPU runner
-
-    [[runners]]                             # Runner for single-GPU jobs
-      name = "sj-k8s-gpu-005"
-      limit = 1
-      url = "https://code.autox.ds"
-      id = 471
-      token = "<Token generated by GitLab runner registration>"
-      token_obtained_at = 2025-07-29T20:18:22Z
-      token_expires_at = 0001-01-01T00:00:00Z
-      executor = "docker"
-      [runners.cache]
-        MaxUploadedArchiveSize = 0
-        [runners.cache.s3]
-        [runners.cache.gcs]
-        [runners.cache.azure]
-      [runners.docker]
-        tls_verify = false
-        image = "gitlab-runner:snapshot04"  # This image needs to be built with docker/gitlab-ci-image/Dockerfile
-        pull_policy = "if-not-present"      # Prevents pulling the image every time
-        privileged = false
-        disable_entrypoint_overwrite = false
-        oom_kill_disable = false
-        gpus = "\"device=2\""               # Use GPU 2 for the single-GPU runner
-        disable_cache = false
-        volumes = ["/cache", "/autox:/autox:rw"]
-        shm_size = 0
-        network_mtu = 0
-
-    [[runners]]                             # Runner for CPU jobs
-      name = "sj-k8s-gpu-005"
-      limit = 2                             # Up to 2 CPU jobs at a time
-      url = "https://code.autox.ds"
-      id = 470
-      token = "<Another token generated by GitLab runner registration>"
-      token_obtained_at = 2025-07-29T18:27:09Z
-      token_expires_at = 0001-01-01T00:00:00Z
-      executor = "docker"
-      [runners.cache]
-        MaxUploadedArchiveSize = 0
-        [runners.cache.s3]
-        [runners.cache.gcs]
-        [runners.cache.azure]
-      [runners.docker]
-        tls_verify = false
-        image = "gitlab-runner:snapshot04"  # This image needs to be built with docker/gitlab-ci-image/Dockerfile
-        pull_policy = "if-not-present"      # Prevents pulling the image every time
-        privileged = false
-        disable_entrypoint_overwrite = false
-        oom_kill_disable = false
-        disable_cache = false
-        volumes = ["/cache"]
-        shm_size = 0
-        network_mtu = 0
-
-To verify the above config works, you can manually create a container using:
-
-.. code-block:: bash
-
-    # Test the multi-GPU runner setup
-    docker container run -it --rm --gpus '"device=0,1"' --shm-size 34359738368 --ipc=host -v /autox:/autox:rw gitlab-runner:snapshot04
-    # Test the single-GPU runner setup
-    docker container run -it --rm --gpus '"device=2"' --shm-size 34359738368 --ipc=host -v /autox:/autox:rw gitlab-runner:snapshot04
-    # Test the CPU-only runner setup
-    docker container run -it --rm gitlab-runner:snapshot04
-
-The gitlab runner can be started with:
-
-.. code-block:: bash
-
-    sudo gitlab-runner restart && gitlab-runner run
-
-You can also use ``gitlab-runner status`` to check the status of the runner.
-
diff --git a/docs/source/tutorials/datasets.rst b/docs/source/tutorials/datasets.rst
index 1a549697..24426546 100644
--- a/docs/source/tutorials/datasets.rst
+++ b/docs/source/tutorials/datasets.rst
@@ -1,6 +1,9 @@
 Datasets
 ========
 
+.. note::
+   Make sure you have followed the :doc:`/installation` guide before proceeding.
+
 Standard Data Format (for Development and Inference)
 ----------------------------------------------------
 
diff --git a/docs/source/tutorials/evaluation.rst b/docs/source/tutorials/evaluation.rst
index 6ffdd897..cbb3d689 100644
--- a/docs/source/tutorials/evaluation.rst
+++ b/docs/source/tutorials/evaluation.rst
@@ -1,6 +1,9 @@
 Evaluation
 ==========
 
+.. note::
+   Make sure you have followed the :doc:`/installation` guide before proceeding.
+
 Evaluating a policy in Simulation
 ---------------------------------
 
diff --git a/docs/source/tutorials/inference.rst b/docs/source/tutorials/inference.rst
index b5acd1e6..1093db3d 100644
--- a/docs/source/tutorials/inference.rst
+++ b/docs/source/tutorials/inference.rst
@@ -1,6 +1,9 @@
 Inference
 =========
 
+.. note::
+   Make sure you have followed the :doc:`/installation` guide before proceeding.
+
 Running inference with a trained model
 --------------------------------------
 
diff --git a/docs/source/tutorials/setup.rst b/docs/source/tutorials/setup.rst
deleted file mode 100644
index aa5bb59b..00000000
--- a/docs/source/tutorials/setup.rst
+++ /dev/null
@@ -1,53 +0,0 @@
-Setup and Configuration
-=======================
-
-AutoX Installation
-------------------
-
-Download the source code:
-
-.. code-block:: bash
-
-    $ git clone git@code.autox.ds:xisp/agi/lerobot.git
-    $ cd lerobot
-
-We use `uv <https://docs.astral.sh/uv/>`_ to manage Python dependencies as it is easier and faster to use than Conda. If you would still like to use Conda, see the Conda environment setup below in the LeRobot installation instructions. See the `uv installation instructions <https://docs.astral.sh/uv/getting-started/installation/>`_ to set it up. Once uv is installed, run the following to set up the environment:
-
-If you haven't already, please install cmake 3.x using ``apt``, ``brew``, or another package manager.
-
-.. code-block:: bash
-
-    $ uv sync --extra tau0 --extra test --extra video_benchmark --extra accelerate --extra dev --extra feetech --extra openai --extra onnx --extra smolvla --extra libero
-    $ source .venv/bin/activate
-
-Note that PI0.5 and Tau0/PI0 are not compatible with each other due to different ``transformers`` package versions. If you need to run PI0.5, run the following command which will override the ``transformers`` package version:
-
-.. code-block:: bash
-
-    $ uv pip install -r requirements-pi05.txt
-
-To use `Weights and Biases <https://docs.wandb.ai/quickstart>`_ for experiment tracking, log in with:
-
-.. code-block:: bash
-
-    $ wandb login
-
-Setting Pre-commit
-------------------
-
-Please, always set pre-commit, so it will check code styling and other necessary environment checks before committing to git.
-
-Check if ``.pre-commit-config.yaml`` file is present under lerobot directory.
-To set pre-commit use the following command:
-
-.. code-block:: bash
-
-    pre-commit install
-
-After successfully executing the above command, the pre-commit checks will automatically be done whenever git commit is called. If any error is found by pre-commit, the commit to git will be made only when the error is fixed.
-
-Setting .ENV file
------------------
-
-To run high level planner with gpt4o, OpenAI api key is needed to be set. Create an ``.env`` file under lerobot directory and set the variable ``OPENAI_API_KEY`` to your openai api key. The high level planner inference script will automatically load the api key and pass to openai client.
-
diff --git a/docs/source/tutorials/training.rst b/docs/source/tutorials/training.rst
index 3ea44a51..aaadd5c2 100644
--- a/docs/source/tutorials/training.rst
+++ b/docs/source/tutorials/training.rst
@@ -1,30 +1,27 @@
 Training and Checkpointing
 ==========================
 
+.. note::
+   Make sure you have followed the :doc:`/installation` guide before proceeding.
+
 Distributed Training Configuration
 ----------------------------------
 
-To configure distributed training, run:
-
-.. code-block:: bash
-
-    $ accelerate config
-
-We are currently using DeepSpeed for model parallelism distributed training. For an accelerate config example, see `this config file <../../examples/accelerate_ci_config.yaml>`_ used for our CI pipelines.
+For an accelerate config example, see `this config file <../../examples/accelerate_ci_config.yaml>`_ used for our CI pipelines.
 
-To train across multiple GPUs, run the following command:
+To train a model, run the following command:
 
 .. code-block:: bash
 
-    $ accelerate launch lerobot/scripts/train.py --config_path=examples/dev_config.json
+    $ accelerate launch lerobot/scripts/train.py --config_path=examples/pi05_config.json
 
-This uses the default accelerate config file set by running ``accelerate config``.
+This uses the default accelerate config file at `~/.cache/huggingface/accelerate/default_config.yaml` which is set by running ``accelerate config``.
 
-To use a specific accelerate config, run:
+Optionally, to use a specific accelerate config file (instead of the default), run:
 
 .. code-block:: bash
 
-    $ accelerate launch --config_file=examples/accelerate_ci_config.yaml lerobot/scripts/train.py --config_path=examples/dev_config.json
+    $ accelerate launch --config_file=examples/accelerate_ci_config.yaml lerobot/scripts/train.py --config_path=examples/pi05_config.json
 
 
 Checkpointing and Resuming Training
@@ -34,15 +31,15 @@ Start training and saving checkpoints:
 
 .. code-block:: bash
 
-    $ accelerate launch lerobot/scripts/train.py --config_path=examples/dev_config.json --output_dir=outputs/train/tau0 --steps 40 --log_freq 5 --save_freq 10
+    $ accelerate launch lerobot/scripts/train.py --config_path=examples/pi05_config.json --output_dir=outputs/train/pi05 --steps 40 --log_freq 5 --save_freq 20
 
-A checkpoint should be saved at step 40. The checkpoint should be saved in the directory ``outputs/train/tau0/checkpoints/000040/``.
+A checkpoint should be saved at step 40. The checkpoint should be saved in the directory ``outputs/train/pi05/checkpoints/000040/``.
 
 The ``model.safetensors`` file is not automatically generated by DeepSpeed's checkpointing during training. To consolidate the sharded model checkpoint files generated by DeepSpeed into a single ``model.safetensors`` file, run:
 
 .. code-block:: bash
 
-    $ ./convert_checkpoint.sh outputs/train/tau0/checkpoints/000040/
+    $ ./convert_checkpoint.sh outputs/train/pi05/checkpoints/000040/
 
 This generates a ``model.safetensors`` file that can be used for inference or resuming training.
 
@@ -50,10 +47,7 @@ Training can be resumed by running:
 
 .. code-block:: bash
 
-    $ accelerate launch lerobot/scripts/train.py --config_path=outputs/train/tau0/checkpoints/000040/train_config.json --resume=true --steps=100
+    $ accelerate launch lerobot/scripts/train.py --config_path=outputs/train/pi05/checkpoints/000040/train_config.json --resume=true --steps=100
 
 It should be noted that the training step count continues from the checkpoint step count, but the dataloader is reset.
 
-Optional TODO: Restore the state of the dataloader:
-https://huggingface.co/docs/accelerate/v1.6.0/en/usage_guides/checkpoint#restoring-the-state-of-the-dataloader
-
diff --git a/docs/source/tutorials/visualization.rst b/docs/source/tutorials/visualization.rst
deleted file mode 100644
index 74d4f575..00000000
--- a/docs/source/tutorials/visualization.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-Visualization
-=============
-
-Using so100_visualization script
---------------------------------
-
-The ``so100_visualization`` script imitates action from lerobot format dataset in simulator. To run ``bi-so100-block-manipulator`` dataset, pull the data from git lfs and move it under ``lerobot/lerobot`` directory. Install the simulator using:
-
-.. code-block:: bash
-
-    $ uv sync --extra tau0 --extra pusht --extra test --extra video_benchmark --extra accelerate --extra dev --extra feetech --extra openai --extra onnx --extra smolvla --extra so100
-    $ source .venv/bin/activate
-
-Then simply run the below script:
-
-.. code-block:: bash
-
-    python lerobot/scripts/so100_visualization.py --config_path=configs/so100/so100_viz_config.json
-

From 11d0aa74f851be96c2f291c7e82d2cb4c3d9dd6e Mon Sep 17 00:00:00 2001
From: William Yue <williamyue37@gmail.com>
Date: Tue, 30 Dec 2025 11:55:40 -0800
Subject: [PATCH 3/6] added inference

---
 docs/source/tutorials/inference.rst           | 38 ++-----------------
 docs/source/tutorials/training.rst            | 13 ++++---
 ...nified_model_inference.py => inference.py} |  0
 3 files changed, 11 insertions(+), 40 deletions(-)
 rename lerobot/scripts/{unified_model_inference.py => inference.py} (100%)

diff --git a/docs/source/tutorials/inference.rst b/docs/source/tutorials/inference.rst
index 1093db3d..bc2d5454 100644
--- a/docs/source/tutorials/inference.rst
+++ b/docs/source/tutorials/inference.rst
@@ -7,41 +7,11 @@ Inference
 Running inference with a trained model
 --------------------------------------
 
-To run inference on a trained model, you will need the saved checkpoint folder from training that contains at least ``train_config.json`` and ``model.safetensors`` files. If you ran the checkpointing and resuming tutorial, you should be able to find the checkpoint config file at ``outputs/train/tau0/checkpoints/000040/train_config.json``. Make sure you ran the ``zero_to_fp32.py`` and ``bin_to_safetensors.py`` scripts (or the ``convert_checkpoint.sh`` script) to convert the sharded model checkpoint files into a single ``model.safetensors`` file.
+To run inference on a trained model, you will need the saved checkpoint folder from training that contains at least these two files: ``train_config.json`` and ``model.safetensors``.
+If you ran the :doc:`checkpointing and resuming tutorial </tutorials/training>`, you should be able to find the checkpoint config file at ``outputs/train/pi05/checkpoints/000040/train_config.json``.
 
-To run inference with the entire model on the same device, run:
+To run inference, run the following command:
 
 .. code-block:: bash
 
-    $ python lerobot/scripts/unified_model_inference.py --config_path=outputs/train/tau0/checkpoints/000040/train_config.json
-
-Running inference on the TAU0 model takes less than 8 GB of GPU memory.
-
-For an example of how to run inference with the VLM in the cloud and the action expert on the robot, run:
-
-.. code-block:: bash
-
-    $ python lerobot/scripts/cloud_robot_inference.py --config_path=outputs/train/tau0/checkpoints/000040/train_config.json
-
-
-Zero Shot inferencing smolvla
------------------------------
-
-To run inference with the entire model on the same device, run:
-
-.. code-block:: bash
-
-    $ python lerobot/scripts/unified_model_inference.py --config_path=examples/train_config_smolvla.json
-
-Download the smolvla weights using huggingface cli:
-
-.. code-block:: bash
-
-    $ huggingface-cli download lerobot/smolvla_base
-
-Download the SmolVLM2-500M-Video-Instruct using hugging face cli:
-
-.. code-block:: bash
-
-    $ huggingface-cli download HuggingFaceTB/SmolVLM2-500M-Video-Instruct
-
+    python lerobot/scripts/inference.py --config_path=outputs/train/pi05/checkpoints/000040/train_config.json
diff --git a/docs/source/tutorials/training.rst b/docs/source/tutorials/training.rst
index aaadd5c2..55939fc9 100644
--- a/docs/source/tutorials/training.rst
+++ b/docs/source/tutorials/training.rst
@@ -13,7 +13,7 @@ To train a model, run the following command:
 
 .. code-block:: bash
 
-    $ accelerate launch lerobot/scripts/train.py --config_path=examples/pi05_config.json
+    accelerate launch lerobot/scripts/train.py --config_path=examples/pi05_config.json
 
 This uses the default accelerate config file at `~/.cache/huggingface/accelerate/default_config.yaml` which is set by running ``accelerate config``.
 
@@ -21,7 +21,7 @@ Optionally, to use a specific accelerate config file (instead of the default), r
 
 .. code-block:: bash
 
-    $ accelerate launch --config_file=examples/accelerate_ci_config.yaml lerobot/scripts/train.py --config_path=examples/pi05_config.json
+    accelerate launch --config_file=examples/accelerate_ci_config.yaml lerobot/scripts/train.py --config_path=examples/pi05_config.json
 
 
 Checkpointing and Resuming Training
@@ -31,7 +31,7 @@ Start training and saving checkpoints:
 
 .. code-block:: bash
 
-    $ accelerate launch lerobot/scripts/train.py --config_path=examples/pi05_config.json --output_dir=outputs/train/pi05 --steps 40 --log_freq 5 --save_freq 20
+    accelerate launch lerobot/scripts/train.py --config_path=examples/pi05_config.json --output_dir=outputs/train/pi05 --steps 40 --log_freq 5 --save_freq 20
 
 A checkpoint should be saved at step 40. The checkpoint should be saved in the directory ``outputs/train/pi05/checkpoints/000040/``.
 
@@ -39,7 +39,7 @@ The ``model.safetensors`` file is not automatically generated by DeepSpeed's che
 
 .. code-block:: bash
 
-    $ ./convert_checkpoint.sh outputs/train/pi05/checkpoints/000040/
+    ./convert_checkpoint.sh outputs/train/pi05/checkpoints/000040/
 
 This generates a ``model.safetensors`` file that can be used for inference or resuming training.
 
@@ -47,7 +47,8 @@ Training can be resumed by running:
 
 .. code-block:: bash
 
-    $ accelerate launch lerobot/scripts/train.py --config_path=outputs/train/pi05/checkpoints/000040/train_config.json --resume=true --steps=100
+    accelerate launch lerobot/scripts/train.py --config_path=outputs/train/pi05/checkpoints/000040/train_config.json --resume=true --steps=100
 
-It should be noted that the training step count continues from the checkpoint step count, but the dataloader is reset.
+.. note::
+   When resuming training from a checkpoint, the training step count will continue from the checkpoint's step, but the dataloader will be reset.
 
diff --git a/lerobot/scripts/unified_model_inference.py b/lerobot/scripts/inference.py
similarity index 100%
rename from lerobot/scripts/unified_model_inference.py
rename to lerobot/scripts/inference.py

From e570c45b8cda0f7390799ec233d7298f8a51d94b Mon Sep 17 00:00:00 2001
From: William Yue <williamyue37@gmail.com>
Date: Tue, 30 Dec 2025 12:03:19 -0800
Subject: [PATCH 4/6] finished evaluation

---
 docs/source/tutorials/evaluation.rst | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/docs/source/tutorials/evaluation.rst b/docs/source/tutorials/evaluation.rst
index cbb3d689..cf693de0 100644
--- a/docs/source/tutorials/evaluation.rst
+++ b/docs/source/tutorials/evaluation.rst
@@ -7,13 +7,13 @@ Evaluation
 Evaluating a policy in Simulation
 ---------------------------------
 
-To evaluate a policy in simulation, you can launch the ``lerobot/scripts/eval.py`` script with ``accelerate launch``.
+OpenTau supports evaluation in asynchronous vectorized simulation environments. To evaluate a policy in simulation, you can launch the ``lerobot/scripts/eval.py`` script with ``accelerate launch``.
 Each accelerate process will only work on its fraction of the tasks, improving throughput.
 For example, to evaluate a policy on the LIBERO 10, run:
 
 .. code-block:: bash
 
-    $ accelerate launch --config_file <ACCELERATE_CONFIG_PATH> lerobot/scripts/eval.py --config_path=outputs/train/tau0/checkpoints/000040/train_config.json
+    accelerate launch --config_file <ACCELERATE_CONFIG_PATH> lerobot/scripts/eval.py --config_path=outputs/train/tau0/checkpoints/000040/train_config.json
 
 .. note::
    You can't pass in an DeepSpeed accelerate config file to ``eval.py`` as DeepSpeed expects optimizer and dataloader during ``accelerator.prepare()``, which we do not provide during eval. It is recommended to pass in a DDP config.
@@ -21,10 +21,10 @@ For example, to evaluate a policy on the LIBERO 10, run:
 .. note::
    Make sure that the ``EnvConfig`` and ``EvalConfig`` are set to the correct values for the simulation environment in your train config file.
 
-Evaluating policy in a libero environment
+Evaluating policy in a LIBERO environment
 -----------------------------------------
 
-To evaluate the policy on the LIBERO benchmark, add the following section to the training config:
+OpenTau currently supports the `LIBERO benchmark <https://libero-project.github.io/main.html>`_. To evaluate the policy on the LIBERO benchmark, add the following section to the training config:
 
 .. code-block:: json
 
@@ -47,16 +47,3 @@ This will run the 0th task and 2nd task in ``libero_spatial``. Each task will ru
 
 When launched with accelerate, each GPU process will only work on its fraction of the tasks, improving throughput.
 
-Using Simulations
------------------
-
-Metaworld
-^^^^^^^^^
-
-When using Metaworld on Ubuntu machines with headless rendering, make sure to export these environment variables:
-
-.. code-block:: bash
-
-    export MUJOCO_GL=egl
-    export PYOPENGL_PLATFORM=egl
-

From bafe553d9a75edc996325500bfb077ed93f5207f Mon Sep 17 00:00:00 2001
From: William Yue <williamyue37@gmail.com>
Date: Tue, 30 Dec 2025 12:48:17 -0800
Subject: [PATCH 5/6] save

---
 docs/source/tutorials/datasets.rst   | 51 ++--------------------------
 docs/source/tutorials/evaluation.rst |  2 +-
 2 files changed, 3 insertions(+), 50 deletions(-)

diff --git a/docs/source/tutorials/datasets.rst b/docs/source/tutorials/datasets.rst
index 24426546..2976617d 100644
--- a/docs/source/tutorials/datasets.rst
+++ b/docs/source/tutorials/datasets.rst
@@ -4,49 +4,10 @@ Datasets
 .. note::
    Make sure you have followed the :doc:`/installation` guide before proceeding.
 
-Standard Data Format (for Development and Inference)
-----------------------------------------------------
+Adding a new dataset
+--------------------
 
-The "Standard Data Format" is the expected data format returned by ``torch.utils.data.Dataset``'s ``__getitem__`` and the expected input to ``torch.nn.Module``'s ``forward`` method. Any new datasets, VLMs, or VLAs that get added to this repository need to adhere to this format. Data being passed to the model during inference should also adhere to this format. The format is as follows:
 
-.. code-block:: python
-
-    {
-        "camera0": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
-        "camera1": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
-        # ...
-        "camera{num_cams-1}": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
-
-        "local_camera0": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
-        "local_camera1": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
-        # ...
-        "local_camera{action_expert_num_cams-1}": torch.Tensor,  # shape (C, H, W) with values from [0, 1] and with the H, W resized to the config's specifications.
-
-        "state": torch.Tensor,    # shape (max_state_dim)
-        "actions": torch.Tensor,  # shape (action_chunk, max_action_dim)
-        "frozen_actions": torch.Tensor,  # shape (frozen_actions, max_action_dim)
-        "prompt": str,            # the task prompt, e.g. "Pick up the object and place it on the table."
-        "response": str,          # the response from the VLM for vision QA tasks. For LeRobotDataset, this will be an empty string.
-        "loss_type": str,         # the loss type to be applied to this sample (either "CE" for cross entropy or "MSE" for mean squared error)
-
-        "img_is_pad": torch.BoolTensor,  # shape (num_cams,) with values 0 or 1, where 1 indicates that the camera image is a padded image.
-        "local_img_is_pad": torch.BoolTensor,  # shape (action_expert_num_cam,) with values 0 or 1, where 1 indicates that the local camera image is a padded image.
-        "action_is_pad": torch.BoolTensor,  # shape (action_chunk,) with values 0 or 1, where 1 indicates that the action is a padded action.
-        "frozen_action_is_pad": torch.BoolTensor,  # shape (frozen_actions,) with values 0 or 1, where 1 indicates that the frozen action is a padded action.
-    }
-
-The config file will have to provide the following information in ``TrainPipelineConfig``:
-
-- ``H, W``: The height and width of the camera images. These should be the same for all cameras.
-- ``num_cams``: The number of cameras for the cloud VLM in the dataset.
-- ``action_expert_num_cams``: The number of cameras for the action expert in the dataset.
-- ``max_state_dim``: The maximum dimension of the state vector.
-- ``max_action_dim``: The maximum dimension of the action vector.
-- ``action_chunk``: The number of actions in the action vector. This is usually 1 for single action tasks, but can be more for multi-action tasks.
-
-Cameras should be labeled in order of importance (e.g. camera0 is the most important camera, camera1 is the second most important camera, etc.). The model dataset will select the most important cameras to use if num_cams is less than the number of cameras in the dataset.
-
-Both the prompt and response strings should contain exactly one newline character at the end of the string unless they are empty strings.
 
 
 Computing max token length for dataset mixture
@@ -63,12 +24,4 @@ Each training config (e.g., `dev-config <../../examples/dev_config.json>`_) shou
 
 This will output a token count for each language key in the dataset mixture, and save it to ``outputs/stats/token_count.json``.
 
-AgiBot dataset
---------------
-
-A clone of the ``agibot-world/AgiBotWorld-Alpha`` dataset is provided at ``/autox/teams/project-bot/AgiBotWorld-Alpha``. You can use the script ``lerobot/scripts/agibot_to_lerobot.py`` to convert it to lerobot format, saved in a given directory. For example:
-
-.. code-block:: bash
-
-    $ python3 lerobot/scripts/agibot_to_lerobot.py --src_path /autox/teams/project-bot/AgiBotWorld-Alpha --tgt_path <path> --task_id 327
 
diff --git a/docs/source/tutorials/evaluation.rst b/docs/source/tutorials/evaluation.rst
index cf693de0..ddb03eec 100644
--- a/docs/source/tutorials/evaluation.rst
+++ b/docs/source/tutorials/evaluation.rst
@@ -26,7 +26,7 @@ Evaluating policy in a LIBERO environment
 
 OpenTau currently supports the `LIBERO benchmark <https://libero-project.github.io/main.html>`_. To evaluate the policy on the LIBERO benchmark, add the following section to the training config:
 
-.. code-block:: json
+.. code-block:: javascript
 
     {
         ...,

From 5364f5dc7e5016b6e64a2536c6491b4727132f65 Mon Sep 17 00:00:00 2001
From: William Yue <williamyue37@gmail.com>
Date: Tue, 30 Dec 2025 15:35:50 -0800
Subject: [PATCH 6/6] finish datasets page

---
 docs/source/tutorials/datasets.rst | 56 ++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/docs/source/tutorials/datasets.rst b/docs/source/tutorials/datasets.rst
index 2976617d..8f1abb36 100644
--- a/docs/source/tutorials/datasets.rst
+++ b/docs/source/tutorials/datasets.rst
@@ -4,16 +4,66 @@ Datasets
 .. note::
    Make sure you have followed the :doc:`/installation` guide before proceeding.
 
-Adding a new dataset
---------------------
+Building a dataset mixture 
+--------------------------
 
+You can define a dataset mixture in your configuration file using the ``dataset_mixture`` key. Here is an example:
 
+.. code-block:: json
 
+    {
+        "dataset_mixture": {
+            "datasets": [
+                {
+                    "repo_id": "physical-intelligence/libero"
+                },
+                {
+                    "repo_id": "lerobot/droid_100"
+                }
+            ],
+            "weights": [
+                0.3,
+                0.7
+            ],
+            "action_freq": 30.0,
+        },
+        ...
+    }
+
+For each new dataset, you must add an entry to ``opentau/datasets/standard_data_format_mapping.py`` to map the dataset features to the Standard Data Format (see the :ref:`Standard Data Format section <concepts/standard-data-format>` in the Concepts documentation).
+Alternatively, you can provide a custom mapping in the dataset config using the ``data_features_name_mapping`` and ``loss_type_mapping`` keys.
+For example:
+
+.. code-block:: json
+
+    {
+        "dataset_mixture": {
+            "datasets": [
+                {
+                    "repo_id": "physical-intelligence/libero"
+                    "data_features_name_mapping": {
+                        "camera0": "observation.images.exterior_image_1_left",
+                        "camera1": "observation.images.exterior_image_2_left",
+                    }
+                    "loss_type_mapping": "MSE"
+                },
+                {
+                    "repo_id": "lerobot/droid_100"
+                }
+            ],
+            "weights": [
+                0.3,
+                0.7
+            ],
+            "action_freq": 30.0,
+        },
+        ...
+    }
 
 Computing max token length for dataset mixture
 ----------------------------------------------
 
-Each training config (e.g., `dev-config <../../examples/dev_config.json>`_) should contain a dataset mixture definition. To evaluate the maximum token length for the dataset mixture, you can run the following command:
+Each training config should contain a dataset mixture definition. To evaluate the maximum token length for the dataset mixture, you can run the following command:
 
 .. code-block:: bash