diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml index defe0426..cd1bb4c7 100644 --- a/.github/workflows/build-publish.yml +++ b/.github/workflows/build-publish.yml @@ -7,10 +7,6 @@ name: build-publish on: - push: - branches: [main] - pull_request: - branches: [main] release: types: [published] diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bb591f86..087b6968 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,7 +2,10 @@ # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: build -on: [pull_request, push] +on: + pull_request: + push: + branches: [main] permissions: contents: read diff --git a/.github/workflows/docs-test.yml b/.github/workflows/docs-test.yml new file mode 100644 index 00000000..f70d5905 --- /dev/null +++ b/.github/workflows/docs-test.yml @@ -0,0 +1,37 @@ +name: Test Documentation +on: + pull_request: + push: + branches: [main] +permissions: + contents: read +jobs: + docs: + name: Test documentation + runs-on: ubuntu-latest + env: + SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: Install dependencies + run: pip install -r docs/requirements.txt + + - name: Install Minari + run: pip install .[testing] + + - name: Build Datasets Docs + run: python docs/_scripts/gen_dataset_md.py + + - name: Build + run: sphinx-build -b dirhtml -v docs _build + + - name: Run markdown documentation tests + run: pytest docs --markdown-docs -m markdown-docs + + - name: Run tutorial documentation tests + run: pytest --nbmake docs/tutorials/**/*.ipynb --nbmake-timeout=600 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 74a74ed6..1f9bee78 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,8 +15,10 @@ repos: hooks: - id: flake8 args: - - '--per-file-ignores=*/__init__.py:F401' - - --extend-ignore=E203 + - '--per-file-ignores=*/__init__.py:F401 + docs/tutorials/using_datasets/behavioral_cloning.py:E999 + docs/_scripts/gen_dataset_md.py:E221,E222,E231' + - --extend-ignore=E203,W604 - --max-complexity=205 - --max-line-length=300 - --show-source diff --git a/README.md b/README.md index 13e01b40..33344cc4 100644 --- a/README.md +++ b/README.md @@ -14,13 +14,13 @@ Note: Minari was previously developed under the name Kabuki. ## Installation To install Minari from [PyPI](https://pypi.org/project/minari/): -``` +```bash pip install minari ``` Note that currently Minari is under a beta release. If you'd like to start testing or contribute to Minari please install this project from source with: -```bash +``` git clone https://github.com/Farama-Foundation/Minari.git cd Minari pip install -e . @@ -34,26 +34,20 @@ For an introduction to Minari, see [Basic Usage](https://minari.farama.org/main/ To check available remote datasets: -```python -import minari - -minari.list_remote_datasets() +```bash +minari list remote ``` To check available local datasets: -```python -import minari - -minari.list_local_datasets() +```bash +minari list local ``` To download a dataset: -```python -import minari - -minari.download_dataset("door-cloned-v1") +```bash +minari download door-human-v1 ``` To load a dataset: @@ -61,7 +55,7 @@ To load a dataset: ```python import minari -dataset = minari.load_dataset("door-cloned-v1") +dataset = minari.load_dataset("door-human-v1") ``` ## Project Maintainers diff --git a/docs/README.md b/docs/README.md index e795b12a..0218bafa 100644 --- a/docs/README.md +++ b/docs/README.md @@ -8,7 +8,7 @@ For more information about how to contribute to the documentation go to our [CON Install the required packages and Minari: -```bash +``` git clone https://github.com/Farama-Foundation/Minari.git cd Minari pip install -e . @@ -17,14 +17,14 @@ pip install -r docs/requirements.txt To build the documentation once: -```bash +``` cd docs make dirhtml ``` To rebuild the documentation automatically every time a change is made: -```bash +``` cd docs sphinx-autobuild -b dirhtml . _build ``` diff --git a/docs/content/basic_usage.md b/docs/content/basic_usage.md index 88dcf9e8..847e703f 100644 --- a/docs/content/basic_usage.md +++ b/docs/content/basic_usage.md @@ -9,11 +9,15 @@ Minari is a standard dataset hosting interface for Offline Reinforcement Learnin ## Installation -To install the most recent version of the Minari library run this command: `pip install minari` +To install the most recent version of the Minari library run this command: + +```bash +pip install minari +``` The beta release is currently under development. If you'd like to start testing or contribute to Minari then please install this project from source with: -```bash +``` git clone https://github.com/Farama-Foundation/Minari.git cd Minari pip install -e . @@ -35,22 +39,22 @@ The wrapper is very simple to initialize: from minari import DataCollectorV0 import gymnasium as gym -env = gym.make('LunarLander-v2') +env = gym.make('CartPole-v1') env = DataCollectorV0(env, record_infos=True, max_buffer_steps=100000) ``` ```{eval-rst} -In this example, the :class:`minari.DataCollectorV0` wraps the `'LunarLander-v2'` environment from Gymnasium. The arguments passed are ``record_infos`` (when set to ``True`` the wrapper will also collect the returned ``info`` dictionaries to create the dataset), and the ``max_buffer_steps`` argument, which specifies a caching scheduler by giving the number of data steps to store in-memory before moving them to a temporary file on disk. There are more arguments that can be passed to this wrapper, a detailed description of them can be read in the :class:`minari.DataCollectorV0` documentation. +In this example, the :class:`minari.DataCollectorV0` wraps the `'CartPole-v1'` environment from Gymnasium. The arguments passed are ``record_infos`` (when set to ``True`` the wrapper will also collect the returned ``info`` dictionaries to create the dataset), and the ``max_buffer_steps`` argument, which specifies a caching scheduler by giving the number of data steps to store in-memory before moving them to a temporary file on disk. There are more arguments that can be passed to this wrapper, a detailed description of them can be read in the :class:`minari.DataCollectorV0` documentation. ``` ### Save Dataset ```{eval-rst} -To create a Minari dataset first we need to step the environment with a given policy to allow the :class:`minari.DataCollectorV0` to record the data that will comprise the dataset. This is as simple as just looping through the Gymansium MDP API. For our example we will loop through ``100`` episodes of the ``'LunarLander-v2'`` environment with a random policy. +To create a Minari dataset first we need to step the environment with a given policy to allow the :class:`minari.DataCollectorV0` to record the data that will comprise the dataset. This is as simple as just looping through the Gymansium MDP API. For our example we will loop through ``100`` episodes of the ``'CartPole-v1'`` environment with a random policy. Finally, we need to create the Minari dataset and give it a name id. This is done by calling the :func:`minari.create_dataset_from_collector_env` Minari function which will move the temporary data recorded in the :class:`minari.DataCollectorV0` environment to a permanent location in the `local Minari root path `_ with the Minari dataset standard structure. -Extending the code example for the ``'LunarLander-v2'`` environment we can create the Minari dataset as follows: +Extending the code example for the ``'CartPole-v1'`` environment we can create the Minari dataset as follows: ``` ```python @@ -58,7 +62,7 @@ import minari import gymnasium as gym from minari import DataCollectorV0 -env = gym.make('LunarLander-v2') +env = gym.make('CartPole-v1') env = DataCollectorV0(env, record_infos=True, max_buffer_steps=100000) total_episodes = 100 @@ -73,7 +77,7 @@ for _ in range(total_episodes): if terminated or truncated: break -dataset = minari.create_dataset_from_collector_env(dataset_id="LunarLander-v2-test-v0", +dataset = minari.create_dataset_from_collector_env(dataset_id="CartPole-v1-test-v0", collector_env=env, algorithm_name="Random-Policy", code_permalink="https://github.com/Farama-Foundation/Minari", @@ -93,7 +97,7 @@ Once the dataset has been created we can check if the Minari dataset id appears >>> import minari >>> local_datasets = minari.list_local_datasets() >>> local_datasets.keys() -dict_keys(['LunarLander-v2-test-v0']) +dict_keys(['CartPole-v1-test-v0']) ``` ```{eval-rst} @@ -110,7 +114,7 @@ When collecting data with the :class:`minari.DataCollectorV0` wrapper, the recor To checkpoint a dataset we can call the :func:`minari.MinariDataset.update_dataset_from_collector_env` method. Every time the function :func:`minari.create_dataset_from_collector_env` or the method :func:`minari.MinariDataset.update_dataset_from_collector_env` are called, the buffers from the :class:`minari.DataCollectorV0` environment are cleared. -Continuing the ``'LunarLander-v2'`` example we can checkpoint the newly created Minari dataset every 10 episodes as follows: +Continuing the ``'CartPole-v1'`` example we can checkpoint the newly created Minari dataset every 10 episodes as follows: ``` ```python @@ -118,12 +122,14 @@ import minari import gymnasium as gym from minari import DataCollectorV0 -env = gym.make('LunarLander-v2') +env = gym.make('CartPole-v1') env = DataCollectorV0(env, record_infos=True, max_buffer_steps=100000) total_episodes = 100 -dataset_name = "LunarLander-v2-test-v0" +dataset_name = "CartPole-v1-test-v0" dataset = None +if dataset_name in minari.list_local_datasets(): + dataset = minari.load_dataset(dataset_name) for episode_id in range(total_episodes): env.reset(seed=123) @@ -146,8 +152,7 @@ for episode_id in range(total_episodes): author="Farama", author_email="contact@farama.org") else: - assert dataset is not None - dataset.update_dataset_from_collector_env(env) + env.add_to_dataset(dataset) ``` ## Using Minari Datasets @@ -158,9 +163,9 @@ Minari will only be able to load datasets that are stored in your `local root di ```python >>> import minari ->>> dataset = minari.load_dataset('LunarLander-v2-test-v0') +>>> dataset = minari.load_dataset('CartPole-v1-test-v0') >>> dataset.name -'LunarLander-v2-test-v0' +'CartPole-v1-test-v0' ``` ### Download Remote Datasets @@ -173,7 +178,7 @@ Minari also has a remote storage in a Google Cloud Platform (GCP) bucket which p >>> import minari >>> remote_datasets = minari.list_remote_datasets() >>> remote_datasets.keys() -dict_keys(['door-expert-v0', 'door-human-v0', 'door-cloned-v0']) +dict_keys(['door-expert-v1', 'door-human-v1', 'door-cloned-v1']) ``` ```{eval-rst} @@ -184,7 +189,7 @@ To download any of the remote datasets into the local `Minari root path >> import minari ->>> minari.download_dataset(dataset_id="door-cloned-v0") +>>> minari.download_dataset(dataset_id="door-cloned-v1") >>> local_datasets = minari.list_local_datasets() >>> local_datasets.keys() dict_keys(['door-cloned-v0']) @@ -199,7 +204,7 @@ Minari can retrieve a certain amount of episode shards from the dataset files as ```python import minari -dataset = minari.load_dataset("door-cloned-v0") +dataset = minari.load_dataset("door-human-v1", download=True) dataset.set_seed(seed=123) for i in range(5): @@ -232,7 +237,7 @@ To create your own buffers and dataloaders, you may need the ability to iterate ```python import minari -dataset = minari.load_dataset("door-cloned-v0") +dataset = minari.load_dataset("door-human-v1", download=True) episodes_generator = dataset.iterate_episodes(episode_indices=[1, 2, 0]) for episode in episodes_generator: @@ -256,7 +261,7 @@ In addition, the :class:`minari.MinariDataset` dataset itself is iterable. Howev ```python import minari -dataset = minari.load_dataset("door-cloned-v0") +dataset = minari.load_dataset("door-human-v1", download=True) for episode in dataset: print(f"EPISODE ID {episode.id}") @@ -272,12 +277,12 @@ The episodes in the dataset can be filtered before sampling. This is done with a ```python import minari -dataset = minari.load_dataset("door-human-v0") +dataset = minari.load_dataset("door-human-v1", download=True) print(f'TOTAL EPISODES ORIGINAL DATASET: {dataset.total_episodes}') # get episodes with mean reward greater than 2 -filter_dataset = dataset.filter_episodes(lambda episode: episode["rewards"].attrs.get("mean") > 2) +filter_dataset = dataset.filter_episodes(lambda episode: episode.rewards.mean() > 2) print(f'TOTAL EPISODES FILTER DATASET: {filter_dataset.total_episodes}') ``` @@ -298,7 +303,7 @@ Minari provides another utility function to divide a dataset into multiple datas ```python import minari -dataset = minari.load_dataset("door-human-v0") +dataset = minari.load_dataset("door-human-v1", download=True) split_datasets = minari.split_dataset(dataset, sizes=[20, 5], seed=123) @@ -320,7 +325,7 @@ From a :class:`minari.MinariDataset` object we can also recover the Gymnasium en ```python import minari -dataset = minari.load_dataset('LunarLander-v2-test-v0') +dataset = minari.load_dataset('CartPole-v1-test-v0') env = dataset.recover_environment() env.reset() diff --git a/docs/content/dataset_standards.md b/docs/content/dataset_standards.md index 9b1a733d..b702ff70 100644 --- a/docs/content/dataset_standards.md +++ b/docs/content/dataset_standards.md @@ -403,22 +403,7 @@ Note how the `Tuple` space elements are assigned corresponding keys of the forma The required `datasets` found in the episode groups correspond to the data involved in every Gymnasium step call: `obs, rew, terminated, truncated, info = env.step(action)`: `observations`, `actions`, `rewards`, `terminations`, and `truncations`. These datasets are `np.ndarray` or nested groups of `np.ndarray` and other groups, depending on the observation and action spaces, and the space of all datasets under each required top-level episode key is equal to: - `actions`: `shape=(num_steps, action_space_component_shape)`. If the action or observation space is `Dict` or a `Tuple`, then the corresponding entry will be a group instead of a dataset. Within this group, there will be nested groups and datasets, as specified by the action and observation spaces. `Dict` and `Tuple` spaces are represented as groups, and `Box` and `Discrete` spaces are represented as datasets. All datasets at any level under the top-level key `actions` will have the same `num_steps`, but will vary in `action_space_component_shape` on for each particular action space component. For example, a `Dict` space may contain two `Box` spaces with different shapes. -- `observations`: `shape=(num_steps + 1, observation_space_component_shape)`. Observations nest in the same way as actions if the top level space is a `Tuple` or `Dict` space. The value of `num_steps + 1` is the same for datasets at any level under `observations`. These datasets have an additional element because the initial observation of the environment when calling `obs, info = env.reset()` is also saved. `observation_space_component_shape` will vary between datasets, depending on the shapes of the simple spaces specified in the observation space. You can get a transition of the form `(o_t, a_t, o_t+1)` from the datasets in the episode group, where `o_t` is the current observation, `o_t+1` is the next observation after taking action `a`, and `t` is the discrete transition index -; as follows: - - ```python - next_observations = observations[1:] - observations = observations[:-1] - - # get transition at timestep t - observation = observations[t] # o_t - action = actions[t] # a_t - next_observation = next_observations[t] # o_t+1 - reward = rewards[t] # r_t - terminated = terminations[t] - truncated = truncations[t] - ``` - +- `observations`: `shape=(num_steps + 1, observation_space_component_shape)`. Observations nest in the same way as actions if the top level space is a `Tuple` or `Dict` space. The value of `num_steps + 1` is the same for datasets at any level under `observations`. These datasets have an additional element because the initial observation of the environment when calling `obs, info = env.reset()` is also saved. `observation_space_component_shape` will vary between datasets, depending on the shapes of the simple spaces specified in the observation space. - `rewards`: `shape=(num_steps, 1)`, stores the returned reward in each step. - `terminations`: `shape=(num_steps, 1)`, the `dtype` is `np.bool` and the last element value will be `True` if the episode finished due to a `terminated` step return. - `truncations`: `shape=(num_steps, 1)`, the `dtype` is `np.bool` and the last element value will be `True` if the episode finished due to a `truncated` step return. @@ -434,6 +419,7 @@ For example, the `Adroit Hand` environments in the `Gymnasium-Robotics` project The following code snippet creates a custom `StepDataCallbak` and adds a new key, `state`, to the returned `StepData` dictionary. `state` is a nested dictionary with `np.ndarray` values and the keys are relevant MuJoCo data that represent the state of the simulation: `qpos`, `qvel`, and some other body positions. ```python +from minari import StepDataCallback class AdroitStepDataCallback(StepDataCallback): def __call__(self, env, **kwargs): step_data = super().__call__(env, **kwargs) @@ -551,7 +537,8 @@ A Minari dataset is encapsulated in the `MinariDataset` class which allows for i Episodes can be accessed from a Minari dataset through iteration, random sampling, or even filtering episodes from a dataset through an arbitrary condition via the `filter_episodes` method. Take the following example where we load the `door-human-v0` dataset and randomly sample 10 episodes: ```python -dataset = minari.load_dataset("door-human-v0") +import minari +dataset = minari.load_dataset("door-human-v1", download=True) sampled_episodes = dataset.sample_episodes(10) ``` diff --git a/docs/tutorials/dataset_creation/point_maze_dataset.py b/docs/tutorials/dataset_creation/point_maze_dataset.py index 7b53142d..1388c3d4 100644 --- a/docs/tutorials/dataset_creation/point_maze_dataset.py +++ b/docs/tutorials/dataset_creation/point_maze_dataset.py @@ -1,4 +1,3 @@ -# fmt: off """ PointMaze D4RL dataset ========================================= @@ -14,7 +13,7 @@ # 2. Then we also need to generate the actions so that the agent can follow the waypoints of the trajectory. For this purpose D4RL implements a PD controller. # 3. Finally, to create the Minari dataset, we will wrap the environment with a :class:`minari.DataCollectorV0` and step through it by generating actions with the path planner and waypoint controller. # -# For this tutorial we will be using the ``pointmaze-medium-v3`` environment to collect 1,000,000 transitions. However, any map implementation in the PointMaze environment group can be used. +# For this tutorial we will be using the ``pointmaze-medium-v3`` environment to collect transition data. However, any map implementation in the PointMaze environment group can be used. # Another important factor to take into account is that the environment is continuing, which means that it won't be ``terminated`` when reaching a goal. Instead a new goal target will be randomly selected and the agent # will start from the location it's currently at (no ``env.reset()`` required). # @@ -73,6 +72,7 @@ class QIteration: Inspired by https://github.com/Farama-Foundation/D4RL/blob/master/d4rl/pointmaze/q_iteration.py """ + def __init__(self, maze): self.maze = maze self.num_states = maze.map_length * maze.map_width @@ -87,7 +87,9 @@ def generate_path(self, current_cell, goal_cell): waypoints = {} while True: action_id = np.argmax(q_values[current_state]) - next_state, _ = self.get_next_state(current_state, EXPLORATION_ACTIONS[action_id]) + next_state, _ = self.get_next_state( + current_state, EXPLORATION_ACTIONS[action_id] + ) current_cell = self.state_to_cell(current_state) waypoints[current_cell] = self.state_to_cell(next_state) if waypoints[current_cell] == goal_cell: @@ -104,7 +106,7 @@ def reward_function(self, desired_cell, current_cell): return 0.0 def state_to_cell(self, state): - i = int(state/self.maze.map_width) + i = int(state / self.maze.map_width) j = state % self.maze.map_width return (i, j) @@ -115,7 +117,7 @@ def get_q_values(self, num_itrs=50, discount=0.99): q_fn = np.zeros((self.num_states, self.num_actions)) for _ in range(num_itrs): v_fn = np.max(q_fn, axis=1) - q_fn = self.rew_matrix + discount*self.transition_matrix.dot(v_fn) + q_fn = self.rew_matrix + discount * self.transition_matrix.dot(v_fn) return q_fn def compute_reward_matrix(self, goal_cell): @@ -123,7 +125,9 @@ def compute_reward_matrix(self, goal_cell): for action in range(self.num_actions): next_state, _ = self.get_next_state(state, EXPLORATION_ACTIONS[action]) next_cell = self.state_to_cell(next_state) - self.rew_matrix[state, action] = self.reward_function(goal_cell, next_cell) + self.rew_matrix[state, action] = self.reward_function( + goal_cell, next_cell + ) def compute_transition_matrix(self): """Constructs this environment's transition matrix. @@ -132,7 +136,9 @@ def compute_transition_matrix(self): corresponds to the probability of transitioning into state ns after taking action a from state s. """ - self.transition_matrix = np.zeros((self.num_states, self.num_actions, self.num_states)) + self.transition_matrix = np.zeros( + (self.num_states, self.num_actions, self.num_states) + ) for state in range(self.num_states): for action_idx, action in EXPLORATION_ACTIONS.items(): next_state, valid = self.get_next_state(state, action) @@ -234,6 +240,7 @@ class WaypointController: Inspired by https://github.com/Farama-Foundation/D4RL/blob/master/d4rl/pointmaze/waypoint_controller.py """ + def __init__(self, maze, gains={"p": 10.0, "d": -1.0}, waypoint_threshold=0.1): self.global_target_xy = np.empty(2) self.maze = maze @@ -246,35 +253,63 @@ def __init__(self, maze, gains={"p": 10.0, "d": -1.0}, waypoint_threshold=0.1): def compute_action(self, obs): # Check if we need to generate new waypoint path due to change in global target - if np.linalg.norm(self.global_target_xy - obs['desired_goal']) > 1e-3 or self.waypoint_targets is None: + if ( + np.linalg.norm(self.global_target_xy - obs["desired_goal"]) > 1e-3 + or self.waypoint_targets is None + ): # Convert xy to cell id - achieved_goal_cell = tuple(self.maze.cell_xy_to_rowcol(obs['achieved_goal'])) - self.global_target_id = tuple(self.maze.cell_xy_to_rowcol(obs['desired_goal'])) - self.global_target_xy = obs['desired_goal'] - - self.waypoint_targets = self.maze_solver.generate_path(achieved_goal_cell, self.global_target_id) + achieved_goal_cell = tuple( + self.maze.cell_xy_to_rowcol(obs["achieved_goal"]) + ) + self.global_target_id = tuple( + self.maze.cell_xy_to_rowcol(obs["desired_goal"]) + ) + self.global_target_xy = obs["desired_goal"] + + self.waypoint_targets = self.maze_solver.generate_path( + achieved_goal_cell, self.global_target_id + ) # Check if the waypoint dictionary is empty # If empty then the ball is already in the target cell location if self.waypoint_targets: - self.current_control_target_id = self.waypoint_targets[achieved_goal_cell] - self.current_control_target_xy = self.maze.cell_rowcol_to_xy(np.array(self.current_control_target_id)) + self.current_control_target_id = self.waypoint_targets[ + achieved_goal_cell + ] + self.current_control_target_xy = self.maze.cell_rowcol_to_xy( + np.array(self.current_control_target_id) + ) else: - self.waypoint_targets[self.current_control_target_id] = self.current_control_target_id + self.waypoint_targets[ + self.current_control_target_id + ] = self.current_control_target_id self.current_control_target_id = self.global_target_id self.current_control_target_xy = self.global_target_xy # Check if we need to go to the next waypoint - dist = np.linalg.norm(self.current_control_target_xy - obs['achieved_goal']) - if dist <= self.waypoint_threshold and self.current_control_target_id != self.global_target_id: - self.current_control_target_id = self.waypoint_targets[self.current_control_target_id] + dist = np.linalg.norm(self.current_control_target_xy - obs["achieved_goal"]) + if ( + dist <= self.waypoint_threshold + and self.current_control_target_id != self.global_target_id + ): + self.current_control_target_id = self.waypoint_targets[ + self.current_control_target_id + ] # If target is global goal go directly to goal position if self.current_control_target_id == self.global_target_id: self.current_control_target_xy = self.global_target_xy else: - self.current_control_target_xy = self.maze.cell_rowcol_to_xy(np.array(self.current_control_target_id)) - np.random.uniform(size=(2,))*0.2 - - action = self.gains['p'] * (self.current_control_target_xy - obs['achieved_goal']) + self.gains['d'] * obs['observation'][2:] + self.current_control_target_xy = ( + self.maze.cell_rowcol_to_xy( + np.array(self.current_control_target_id) + ) + - np.random.uniform(size=(2,)) * 0.2 + ) + + action = ( + self.gains["p"] * (self.current_control_target_xy - obs["achieved_goal"]) + + self.gains["d"] * obs["observation"][2:] + ) action = np.clip(action, -1, 1) return action @@ -294,6 +329,7 @@ def compute_action(self, obs): # case we will be generating new hdf5 datasets ``qpos``, ``qvel``, and ``goal`` in the ``infos`` subgroup of each episode group. # + class PointMazeStepDataCallback(StepDataCallback): """Add environment state information to 'infos'. @@ -301,78 +337,73 @@ class PointMazeStepDataCallback(StepDataCallback): never terminated or truncated. This callback overrides the truncation value to True when the step returns a True 'succes' key in 'infos'. This way we can divide the Minari dataset into different trajectories. """ - def __call__(self, env, obs, info, action=None, rew=None, terminated=None, truncated=None): - qpos = obs['observation'][:2] - qvel = obs['observation'][2:] - goal = obs['desired_goal'] + + def __call__( + self, env, obs, info, action=None, rew=None, terminated=None, truncated=None + ): + qpos = obs["observation"][:2] + qvel = obs["observation"][2:] + goal = obs["desired_goal"] step_data = super().__call__(env, obs, info, action, rew, terminated, truncated) - if step_data['infos']['success']: - step_data['truncations'] = True - step_data['infos']['qpos'] = qpos - step_data['infos']['qvel'] = qvel - step_data['infos']['goal'] = goal + if step_data["infos"]["success"]: + step_data["truncations"] = True + step_data["infos"]["qpos"] = qpos + step_data["infos"]["qvel"] = qvel + step_data["infos"]["goal"] = goal return step_data + # %% # Collect Data and Create Minari Dataset # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Now we will finally perform our data collection and create the Minari dataset. This is as simple as wrapping the environment with # the :class:`minari.DataCollectorV0` wrapper and add the custom callback methods. Once we've done this we can step the environment with the ``WayPointController`` -# as our policy. Don't forget to initialize the environment with a ``max_episode_steps`` of ``1,000,000`` since that's the total amount of steps we want to +# as our policy. For the tutorial, we collect 10,000 transitions. Thus, we initialize the environment with ``max_episode_steps=10,000`` since that's the total amount of steps we want to # collect for our dataset and we don't want the environment to get ``truncated`` during the data collection due to a time limit. # -# To create the Minari dataset we will first create the dataset by calling the function :func:`minari.create_dataset_from_collector_env`, and then checkpoint the dataset -# every ``200,000`` steps taken by the environment. -# dataset_name = "pointmaze-umaze-v0" - -# Check if dataset already exist and load to add more data -if dataset_name in minari.list_local_datasets(): - dataset = minari.load_dataset(dataset_name) -else: - dataset = None +total_steps = 10_000 # continuing task => the episode doesn't terminate or truncate when reaching a goal # it will generate a new target. For this reason we set the maximum episode steps to # the desired size of our Minari dataset (evade truncation due to time limit) -env = gym.make('PointMaze_Medium-v3', continuing_task=True, max_episode_steps=1e6) +env = gym.make("PointMaze_Medium-v3", continuing_task=True, max_episode_steps=total_steps) # Data collector wrapper to save temporary data while stepping. Characteristics: # * Custom StepDataCallback to add extra state information to 'infos' and divide dataset in different episodes by overridng # truncation value to True when target is reached # * Record the 'info' value of every step -collector_env = DataCollectorV0(env, step_data_callback=PointMazeStepDataCallback, record_infos=True) +collector_env = DataCollectorV0( + env, step_data_callback=PointMazeStepDataCallback, record_infos=True +) obs, _ = collector_env.reset(seed=123) waypoint_controller = WaypointController(maze=env.maze) -for n_step in range(int(1e6)): +for n_step in range(int(total_steps)): action = waypoint_controller.compute_action(obs) # Add some noise to each step action - action += np.random.randn(*action.shape)*0.5 - action = np.clip(action, env.action_space.low, env.action_space.high, dtype=np.float32) + action += np.random.randn(*action.shape) * 0.5 + action = np.clip( + action, env.action_space.low, env.action_space.high, dtype=np.float32 + ) obs, rew, terminated, truncated, info = collector_env.step(action) - if (n_step + 1) % 200000 == 0: - print('STEPS RECORDED:') - print(n_step) - if dataset is None: - dataset = minari.create_dataset_from_collector_env(collector_env=collector_env, - dataset_name=dataset_name, - algorithm_name="QIteration", - code_permalink="https://github.com/Farama-Foundation/Minari/blob/main/docs/tutorials/dataset_creation/point_maze_dataset.py", - author="Rodrigo Perez-Vicente", - author_email="rperezvicente@farama.org") - else: - # Update local Minari dataset every 200000 steps. - # This works as a checkpoint to not lose the already collected data - dataset.update_dataset_from_collector_env(collector_env) + +dataset = minari.create_dataset_from_collector_env( + collector_env=collector_env, + dataset_id=dataset_name, + algorithm_name="QIteration", + code_permalink="https://github.com/Farama-Foundation/Minari/blob/main/docs/tutorials/dataset_creation/point_maze_dataset.py", + author="Rodrigo Perez-Vicente", + author_email="rperezvicente@farama.org", +) # %% diff --git a/docs/tutorials/using_datasets/behavioral_cloning.py b/docs/tutorials/using_datasets/behavioral_cloning.py index b6d2fee8..c3d79789 100644 --- a/docs/tutorials/using_datasets/behavioral_cloning.py +++ b/docs/tutorials/using_datasets/behavioral_cloning.py @@ -1,4 +1,3 @@ -# fmt: off """ Behavioral cloning with PyTorch ========================================= @@ -12,10 +11,10 @@ # Policy training # ~~~~~~~~~~~~~~~~~~~ # To train the expert policy, we use `SB3 `_'s `rl-zoo3 `_ library. -# After installing the library with ``pip install rl_zoo3``, -# we train a PPO agent on the environment with the following command: -# -# ``python -m rl_zoo3.train --algo ppo --env CartPole-v1`` +# After installing the library, we train a PPO agent on the environment: + +!pip install rl_zoo3 +!python -m rl_zoo3.train --algo ppo --env CartPole-v1`` # %% # This will generate a new folder named `log` with the expert policy. @@ -41,7 +40,6 @@ import minari from minari import DataCollectorV0 - torch.manual_seed(42) # %% @@ -64,13 +62,14 @@ if terminated or truncated: break -dataset = minari.create_dataset_from_collector_env(dataset_id="CartPole-v1-expert", - collector_env=env, - algorithm_name="ExpertPolicy", - code_permalink="https://minari.farama.org/tutorials/behavioral_cloning", - author="Farama", - author_email="contact@farama.org" - ) +dataset = minari.create_dataset_from_collector_env( + dataset_id="CartPole-v1-expert", + collector_env=env, + algorithm_name="ExpertPolicy", + code_permalink="https://minari.farama.org/tutorials/behavioral_cloning", + author="Farama", + author_email="contact@farama.org" +) # %% # Once executing the script, the dataset will be saved on your disk. You can display the list of datasets with ``minari list local`` command. diff --git a/minari/storage/hosting.py b/minari/storage/hosting.py index 375c66f7..ef03e1d0 100644 --- a/minari/storage/hosting.py +++ b/minari/storage/hosting.py @@ -68,7 +68,7 @@ def _upload_local_directory_to_gcs(local_path, bucket, gcs_path): if len(combined_datasets) > 0: print( - f"Dataset {dataset_id} is formed by a combination of the following datasets:" + f"Dataset {dataset_id} is formed by a combination of the following datasets: " ) for name in combined_datasets: print(f"\t{name}") @@ -202,10 +202,10 @@ def download_dataset(dataset_id: str, force_download: bool = False): # If the dataset is a combination of other datasets download the subdatasets recursively if len(combined_datasets) > 0: print( - f"\nDataset {dataset_id} is formed by a combination of the following datasets:" + f"\nDataset {dataset_id} is formed by a combination of the following datasets: " ) for name in combined_datasets: - print(f" * {name}") + print(f" * {name}") print("\nDownloading extra datasets ...") for dataset in combined_datasets: download_dataset(dataset_id=dataset) diff --git a/pyproject.toml b/pyproject.toml index 43e7a89e..2e2f2430 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,23 +24,26 @@ classifiers = [ 'Topic :: Scientific/Engineering :: Artificial Intelligence', ] dependencies = [ - "numpy >=1.21.0", + "numpy>=1.21.0", "h5py>=3.8.0", "tqdm>=4.65.0", "typing_extensions>=4.4.0", - "google-cloud-storage==2.5.0", - "typer[all]==0.9.0", + "google-cloud-storage>=2.5.0", + "typer[all]>=0.9.0", "gymnasium>=0.28.1", - "portion==2.4.0", - "packaging==23.1", + "portion>=2.4.0", + "packaging>=23.1", ] dynamic = ["version"] [project.optional-dependencies] testing = [ - "pytest ==7.1.3", - "gymnasium-robotics>=1.2.1", - "imageio >=2.14.1" + "pytest>=7.1.3", + "pytest-markdown-docs>=0.4.3", + "nbmake", + "gymnasium-robotics>=1.2.3", + "imageio>=2.14.1", + "minigrid>=2.2.0", ] [project.urls] @@ -63,6 +66,7 @@ include = ["minari", "minari.*"] [tool.black] safe = true +force-exclude = ["docs/tutorials/using_datasets/behavioral_cloning.py"] [tool.isort] atomic = true @@ -105,3 +109,4 @@ reportUnboundVariable = "warning" [tool.pytest.ini_options] filterwarnings = ['ignore:.*The environment .* is out of date.*'] # filterwarnings = ['ignore:.*step API.*:DeprecationWarning'] +addopts = ["--ignore=docs/tutorials"]