diff --git a/configs/examples/dev_config.json b/configs/examples/dev_config.json index 4d30fdb3..297cc9e2 100644 --- a/configs/examples/dev_config.json +++ b/configs/examples/dev_config.json @@ -2,8 +2,9 @@ "dataset_mixture": { "datasets": [ { - "repo_id": "physical-intelligence/libero" + "repo_id": "lerobot/droid_100" } + ], "weights": [ 1.0 @@ -13,36 +14,26 @@ "vector_resample_strategy": "nearest" }, "policy": { - "type": "tau0", - "pretrained_path": "lerobot/pi0", - "init_strategy": "full_he_init", + "type": "pi05", + "pretrained_path": "william-yue/pi05_base", "n_obs_steps": 1, "normalization_mapping": { "VISUAL": "IDENTITY", - "STATE": "MEAN_STD", + "STATE": "MIN_MAX", "ACTION": "MEAN_STD" }, "chunk_size": 50, "n_action_steps": 50, "max_state_dim": 32, "max_action_dim": 32, - "action_expert_num_cams": 1, - "cloud_vlm_latency_mean": 0.16, - "cloud_vlm_latency_std": 0.05, - "cloud_vlm_latency_lower": 0.10, - "cloud_vlm_latency_upper": 0.25, - "action_decoder_latency_mean": 0.032, - "action_decoder_latency_std": 0.010, - "action_decoder_latency_lower": 0.020, - "action_decoder_latency_upper": 0.050, - "tokenizer_max_length": 52, - "response_max_tokens": 52, "proj_width": 1024, "num_steps": 10, + "init_strategy": "expert_only_he_init", "attention_implementation": "eager", "freeze_vision_encoder": true, "train_expert_only": true, - "train_state_proj": true, + "tokenizer_max_length": 256, + "discrete_action_max_length": 75, "optimizer_lr": 2.5e-05, "optimizer_betas": [ 0.9, @@ -57,17 +48,16 @@ "resume": false, "seed": 1000, "resolution": [224, 224], - "num_cams": 2, - "action_expert_num_cams": 1, + "num_cams": 1, + "action_expert_num_cams": 0, "max_state_dim": 32, "max_action_dim": 32, "action_chunk": 50, - "frozen_actions": 25, - "loss_weighting": {"MSE": 1, "CE": 1}, + "loss_weighting": {"MSE": 10, "CE": 1}, "num_workers": 4, - "batch_size": 2, + "batch_size": 1, "gradient_accumulation_steps": 1, - "dataloader_batch_size": 2, + "dataloader_batch_size": 1, "prefetch_factor": 8, "steps": 100, "log_freq": 1, @@ -95,11 +85,10 @@ }, "wandb": { "enable": true, - "entity": "wyautox-autox", - "project": "tau0", + "project": "pi05", "run_id": null, "name": null, - "notes": "Dev config for pi0", + "notes": "Dev config for pi05", "tags": [], "group": null, "job_type": null, diff --git a/docs/source/index.rst b/docs/source/index.rst index 08fbf1ae..773ac629 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,6 +8,7 @@ OpenTau documentation overview installation concepts + tutorials .. toctree:: :maxdepth: 2 diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst new file mode 100644 index 00000000..7d77e963 --- /dev/null +++ b/docs/source/tutorials.rst @@ -0,0 +1,14 @@ +Tutorials +========= + +This section provides step-by-step guides for common tasks in OpenTau, including training, inference, evaluation, and adding datasets. + +.. toctree:: + :maxdepth: 1 + :caption: Guides: + + tutorials/training + tutorials/inference + tutorials/evaluation + tutorials/datasets + diff --git a/docs/source/tutorials/datasets.rst b/docs/source/tutorials/datasets.rst new file mode 100644 index 00000000..8f1abb36 --- /dev/null +++ b/docs/source/tutorials/datasets.rst @@ -0,0 +1,77 @@ +Datasets +======== + +.. note:: + Make sure you have followed the :doc:`/installation` guide before proceeding. + +Building a dataset mixture +-------------------------- + +You can define a dataset mixture in your configuration file using the ``dataset_mixture`` key. Here is an example: + +.. code-block:: json + + { + "dataset_mixture": { + "datasets": [ + { + "repo_id": "physical-intelligence/libero" + }, + { + "repo_id": "lerobot/droid_100" + } + ], + "weights": [ + 0.3, + 0.7 + ], + "action_freq": 30.0, + }, + ... + } + +For each new dataset, you must add an entry to ``opentau/datasets/standard_data_format_mapping.py`` to map the dataset features to the Standard Data Format (see the :ref:`Standard Data Format section ` in the Concepts documentation). +Alternatively, you can provide a custom mapping in the dataset config using the ``data_features_name_mapping`` and ``loss_type_mapping`` keys. +For example: + +.. code-block:: json + + { + "dataset_mixture": { + "datasets": [ + { + "repo_id": "physical-intelligence/libero" + "data_features_name_mapping": { + "camera0": "observation.images.exterior_image_1_left", + "camera1": "observation.images.exterior_image_2_left", + } + "loss_type_mapping": "MSE" + }, + { + "repo_id": "lerobot/droid_100" + } + ], + "weights": [ + 0.3, + 0.7 + ], + "action_freq": 30.0, + }, + ... + } + +Computing max token length for dataset mixture +---------------------------------------------- + +Each training config should contain a dataset mixture definition. To evaluate the maximum token length for the dataset mixture, you can run the following command: + +.. code-block:: bash + + python lerobot/scripts/compute_max_token_length.py \ + --target_cfg=\ + --output_path=outputs/stats/token_count.json \ + --num_workers=10 + +This will output a token count for each language key in the dataset mixture, and save it to ``outputs/stats/token_count.json``. + + diff --git a/docs/source/tutorials/evaluation.rst b/docs/source/tutorials/evaluation.rst new file mode 100644 index 00000000..ddb03eec --- /dev/null +++ b/docs/source/tutorials/evaluation.rst @@ -0,0 +1,49 @@ +Evaluation +========== + +.. note:: + Make sure you have followed the :doc:`/installation` guide before proceeding. + +Evaluating a policy in Simulation +--------------------------------- + +OpenTau supports evaluation in asynchronous vectorized simulation environments. To evaluate a policy in simulation, you can launch the ``lerobot/scripts/eval.py`` script with ``accelerate launch``. +Each accelerate process will only work on its fraction of the tasks, improving throughput. +For example, to evaluate a policy on the LIBERO 10, run: + +.. code-block:: bash + + accelerate launch --config_file lerobot/scripts/eval.py --config_path=outputs/train/tau0/checkpoints/000040/train_config.json + +.. note:: + You can't pass in an DeepSpeed accelerate config file to ``eval.py`` as DeepSpeed expects optimizer and dataloader during ``accelerator.prepare()``, which we do not provide during eval. It is recommended to pass in a DDP config. + +.. note:: + Make sure that the ``EnvConfig`` and ``EvalConfig`` are set to the correct values for the simulation environment in your train config file. + +Evaluating policy in a LIBERO environment +----------------------------------------- + +OpenTau currently supports the `LIBERO benchmark `_. To evaluate the policy on the LIBERO benchmark, add the following section to the training config: + +.. code-block:: javascript + + { + ..., + "env": { + "type": "libero", + "task": "libero_spatial", + "task_ids": [0, 2] + }, + "eval": { + "n_episodes": 8, + "batch_size": 8 + }, + "eval_freq": 25, + ... + } + +This will run the 0th task and 2nd task in ``libero_spatial``. Each task will run for 8 simulations in parallel. + +When launched with accelerate, each GPU process will only work on its fraction of the tasks, improving throughput. + diff --git a/docs/source/tutorials/inference.rst b/docs/source/tutorials/inference.rst new file mode 100644 index 00000000..bc2d5454 --- /dev/null +++ b/docs/source/tutorials/inference.rst @@ -0,0 +1,17 @@ +Inference +========= + +.. note:: + Make sure you have followed the :doc:`/installation` guide before proceeding. + +Running inference with a trained model +-------------------------------------- + +To run inference on a trained model, you will need the saved checkpoint folder from training that contains at least these two files: ``train_config.json`` and ``model.safetensors``. +If you ran the :doc:`checkpointing and resuming tutorial `, you should be able to find the checkpoint config file at ``outputs/train/pi05/checkpoints/000040/train_config.json``. + +To run inference, run the following command: + +.. code-block:: bash + + python lerobot/scripts/inference.py --config_path=outputs/train/pi05/checkpoints/000040/train_config.json diff --git a/docs/source/tutorials/training.rst b/docs/source/tutorials/training.rst new file mode 100644 index 00000000..55939fc9 --- /dev/null +++ b/docs/source/tutorials/training.rst @@ -0,0 +1,54 @@ +Training and Checkpointing +========================== + +.. note:: + Make sure you have followed the :doc:`/installation` guide before proceeding. + +Distributed Training Configuration +---------------------------------- + +For an accelerate config example, see `this config file <../../examples/accelerate_ci_config.yaml>`_ used for our CI pipelines. + +To train a model, run the following command: + +.. code-block:: bash + + accelerate launch lerobot/scripts/train.py --config_path=examples/pi05_config.json + +This uses the default accelerate config file at `~/.cache/huggingface/accelerate/default_config.yaml` which is set by running ``accelerate config``. + +Optionally, to use a specific accelerate config file (instead of the default), run: + +.. code-block:: bash + + accelerate launch --config_file=examples/accelerate_ci_config.yaml lerobot/scripts/train.py --config_path=examples/pi05_config.json + + +Checkpointing and Resuming Training +----------------------------------- + +Start training and saving checkpoints: + +.. code-block:: bash + + accelerate launch lerobot/scripts/train.py --config_path=examples/pi05_config.json --output_dir=outputs/train/pi05 --steps 40 --log_freq 5 --save_freq 20 + +A checkpoint should be saved at step 40. The checkpoint should be saved in the directory ``outputs/train/pi05/checkpoints/000040/``. + +The ``model.safetensors`` file is not automatically generated by DeepSpeed's checkpointing during training. To consolidate the sharded model checkpoint files generated by DeepSpeed into a single ``model.safetensors`` file, run: + +.. code-block:: bash + + ./convert_checkpoint.sh outputs/train/pi05/checkpoints/000040/ + +This generates a ``model.safetensors`` file that can be used for inference or resuming training. + +Training can be resumed by running: + +.. code-block:: bash + + accelerate launch lerobot/scripts/train.py --config_path=outputs/train/pi05/checkpoints/000040/train_config.json --resume=true --steps=100 + +.. note:: + When resuming training from a checkpoint, the training step count will continue from the checkpoint's step, but the dataloader will be reset. + diff --git a/lerobot/scripts/unified_model_inference.py b/lerobot/scripts/inference.py similarity index 100% rename from lerobot/scripts/unified_model_inference.py rename to lerobot/scripts/inference.py