diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml
index defe0426..cd1bb4c7 100644
--- a/.github/workflows/build-publish.yml
+++ b/.github/workflows/build-publish.yml
@@ -7,10 +7,6 @@
 name: build-publish
 
 on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
   release:
     types: [published]
 
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index bb591f86..087b6968 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -2,7 +2,10 @@
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 name: build
 
-on: [pull_request, push]
+on:
+  pull_request:
+  push:
+    branches: [main]
 
 permissions:
   contents: read
diff --git a/.github/workflows/docs-test.yml b/.github/workflows/docs-test.yml
new file mode 100644
index 00000000..f70d5905
--- /dev/null
+++ b/.github/workflows/docs-test.yml
@@ -0,0 +1,37 @@
+name: Test Documentation
+on:
+  pull_request:
+  push:
+    branches: [main]
+permissions:
+  contents: read
+jobs:
+  docs:
+    name: Test documentation
+    runs-on: ubuntu-latest
+    env:
+      SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+            python-version: '3.9'
+
+      - name: Install dependencies
+        run: pip install -r docs/requirements.txt
+
+      - name: Install Minari
+        run: pip install .[testing]
+
+      - name: Build Datasets Docs
+        run: python docs/_scripts/gen_dataset_md.py
+
+      - name: Build
+        run: sphinx-build -b dirhtml -v docs _build
+
+      - name: Run markdown documentation tests
+        run: pytest docs --markdown-docs -m markdown-docs
+
+      - name: Run tutorial documentation tests
+        run:  pytest --nbmake docs/tutorials/**/*.ipynb --nbmake-timeout=600
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 74a74ed6..1f9bee78 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,8 +15,10 @@ repos:
     hooks:
       - id: flake8
         args:
-          - '--per-file-ignores=*/__init__.py:F401'
-          - --extend-ignore=E203
+          - '--per-file-ignores=*/__init__.py:F401
+            docs/tutorials/using_datasets/behavioral_cloning.py:E999
+            docs/_scripts/gen_dataset_md.py:E221,E222,E231'
+          - --extend-ignore=E203,W604
           - --max-complexity=205
           - --max-line-length=300
           - --show-source
diff --git a/README.md b/README.md
index 13e01b40..33344cc4 100644
--- a/README.md
+++ b/README.md
@@ -14,13 +14,13 @@ Note: Minari was previously developed under the name Kabuki.
 
 ## Installation
 To install Minari from [PyPI](https://pypi.org/project/minari/):
-```
+```bash
 pip install minari
 ```
 
 Note that currently Minari is under a beta release. If you'd like to start testing or contribute to Minari please install this project from source with: 
 
-```bash
+```
 git clone https://github.com/Farama-Foundation/Minari.git
 cd Minari
 pip install -e .
@@ -34,26 +34,20 @@ For an introduction to Minari, see [Basic Usage](https://minari.farama.org/main/
 
 To check available remote datasets:
 
-```python
-import minari
-
-minari.list_remote_datasets()
+```bash
+minari list remote
 ```
 
 To check available local datasets:
 
-```python
-import minari
-
-minari.list_local_datasets()
+```bash
+minari list local
 ```
 
 To download a dataset:
 
-```python
-import minari
-
-minari.download_dataset("door-cloned-v1")
+```bash
+minari download door-human-v1
 ```
 
 To load a dataset:
@@ -61,7 +55,7 @@ To load a dataset:
 ```python
 import minari
 
-dataset = minari.load_dataset("door-cloned-v1")
+dataset = minari.load_dataset("door-human-v1")
 ```
 
 ## Project Maintainers
diff --git a/docs/README.md b/docs/README.md
index e795b12a..0218bafa 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -8,7 +8,7 @@ For more information about how to contribute to the documentation go to our [CON
 
 Install the required packages and Minari:
 
-```bash
+```
 git clone https://github.com/Farama-Foundation/Minari.git
 cd Minari
 pip install -e .
@@ -17,14 +17,14 @@ pip install -r docs/requirements.txt
 
 To build the documentation once:
 
-```bash
+```
 cd docs
 make dirhtml
 ```
 
 To rebuild the documentation automatically every time a change is made:
 
-```bash
+```
 cd docs
 sphinx-autobuild -b dirhtml . _build
 ```
diff --git a/docs/content/basic_usage.md b/docs/content/basic_usage.md
index 88dcf9e8..847e703f 100644
--- a/docs/content/basic_usage.md
+++ b/docs/content/basic_usage.md
@@ -9,11 +9,15 @@ Minari is a standard dataset hosting interface for Offline Reinforcement Learnin
 
 ## Installation
 
-To install the most recent version of the Minari library run this command: `pip install minari`
+To install the most recent version of the Minari library run this command: 
+
+```bash
+pip install minari
+```
 
 The beta release is currently under development. If you'd like to start testing or contribute to Minari then please install this project from source with:
 
-```bash
+```
 git clone https://github.com/Farama-Foundation/Minari.git
 cd Minari
 pip install -e .
@@ -35,22 +39,22 @@ The wrapper is very simple to initialize:
 from minari import DataCollectorV0
 import gymnasium as gym
 
-env = gym.make('LunarLander-v2')
+env = gym.make('CartPole-v1')
 env = DataCollectorV0(env, record_infos=True, max_buffer_steps=100000)
 ```
 
 ```{eval-rst}
-In this example, the :class:`minari.DataCollectorV0` wraps the `'LunarLander-v2'` environment from Gymnasium. The arguments passed are ``record_infos`` (when set to ``True`` the wrapper will also collect the returned ``info`` dictionaries to create the dataset), and the ``max_buffer_steps`` argument, which specifies a caching scheduler by giving the number of data steps to store in-memory before moving them to a temporary file on disk. There are more arguments that can be passed to this wrapper, a detailed description of them can be read in the :class:`minari.DataCollectorV0` documentation.
+In this example, the :class:`minari.DataCollectorV0` wraps the `'CartPole-v1'` environment from Gymnasium. The arguments passed are ``record_infos`` (when set to ``True`` the wrapper will also collect the returned ``info`` dictionaries to create the dataset), and the ``max_buffer_steps`` argument, which specifies a caching scheduler by giving the number of data steps to store in-memory before moving them to a temporary file on disk. There are more arguments that can be passed to this wrapper, a detailed description of them can be read in the :class:`minari.DataCollectorV0` documentation.
 ```
 
 ### Save Dataset
 
 ```{eval-rst}
-To create a Minari dataset first we need to step the environment with a given policy to allow the :class:`minari.DataCollectorV0` to record the data that will comprise the dataset. This is as simple as just looping through the Gymansium MDP API. For our example we will loop through ``100`` episodes of the ``'LunarLander-v2'`` environment with a random policy.
+To create a Minari dataset first we need to step the environment with a given policy to allow the :class:`minari.DataCollectorV0` to record the data that will comprise the dataset. This is as simple as just looping through the Gymansium MDP API. For our example we will loop through ``100`` episodes of the ``'CartPole-v1'`` environment with a random policy.
 
 Finally, we need to create the Minari dataset and give it a name id. This is done by calling the :func:`minari.create_dataset_from_collector_env` Minari function which will move the temporary data recorded in the :class:`minari.DataCollectorV0` environment to a permanent location in the `local Minari root path </content/dataset_standards>`_ with the Minari dataset standard structure.
 
-Extending the code example for the ``'LunarLander-v2'`` environment we can create the Minari dataset as follows:
+Extending the code example for the ``'CartPole-v1'`` environment we can create the Minari dataset as follows:
 ```
 
 ```python
@@ -58,7 +62,7 @@ import minari
 import gymnasium as gym
 from minari import DataCollectorV0
 
-env = gym.make('LunarLander-v2')
+env = gym.make('CartPole-v1')
 env = DataCollectorV0(env, record_infos=True, max_buffer_steps=100000)
 
 total_episodes = 100
@@ -73,7 +77,7 @@ for _ in range(total_episodes):
         if terminated or truncated:
             break
 
-dataset = minari.create_dataset_from_collector_env(dataset_id="LunarLander-v2-test-v0", 
+dataset = minari.create_dataset_from_collector_env(dataset_id="CartPole-v1-test-v0", 
                                                    collector_env=env,
                                                    algorithm_name="Random-Policy",
                                                    code_permalink="https://github.com/Farama-Foundation/Minari",
@@ -93,7 +97,7 @@ Once the dataset has been created we can check if the Minari dataset id appears
 >>> import minari
 >>> local_datasets = minari.list_local_datasets()
 >>> local_datasets.keys()
-dict_keys(['LunarLander-v2-test-v0'])
+dict_keys(['CartPole-v1-test-v0'])
 ```
 
 ```{eval-rst}
@@ -110,7 +114,7 @@ When collecting data with the :class:`minari.DataCollectorV0` wrapper, the recor
 
 To checkpoint a dataset we can call the :func:`minari.MinariDataset.update_dataset_from_collector_env` method. Every time the function :func:`minari.create_dataset_from_collector_env` or the method :func:`minari.MinariDataset.update_dataset_from_collector_env` are called, the buffers from the :class:`minari.DataCollectorV0` environment are cleared.
 
-Continuing the ``'LunarLander-v2'`` example we can checkpoint the newly created Minari dataset every 10 episodes as follows:
+Continuing the ``'CartPole-v1'`` example we can checkpoint the newly created Minari dataset every 10 episodes as follows:
 ```
 
 ```python
@@ -118,12 +122,14 @@ import minari
 import gymnasium as gym
 from minari import DataCollectorV0
 
-env = gym.make('LunarLander-v2')
+env = gym.make('CartPole-v1')
 env = DataCollectorV0(env, record_infos=True, max_buffer_steps=100000)
 
 total_episodes = 100
-dataset_name = "LunarLander-v2-test-v0"
+dataset_name = "CartPole-v1-test-v0"
 dataset = None
+if dataset_name in minari.list_local_datasets():
+    dataset = minari.load_dataset(dataset_name)
 
 for episode_id in range(total_episodes):
     env.reset(seed=123)
@@ -146,8 +152,7 @@ for episode_id in range(total_episodes):
                                                     author="Farama",
                                                     author_email="contact@farama.org")
         else:
-            assert dataset is not None    
-            dataset.update_dataset_from_collector_env(env)
+            env.add_to_dataset(dataset)
 ```
 
 ## Using Minari Datasets
@@ -158,9 +163,9 @@ Minari will only be able to load datasets that are stored in your `local root di
 
 ```python
 >>> import minari
->>> dataset = minari.load_dataset('LunarLander-v2-test-v0')
+>>> dataset = minari.load_dataset('CartPole-v1-test-v0')
 >>> dataset.name
-'LunarLander-v2-test-v0'
+'CartPole-v1-test-v0'
 ```
 
 ### Download Remote Datasets
@@ -173,7 +178,7 @@ Minari also has a remote storage in a Google Cloud Platform (GCP) bucket which p
 >>> import minari
 >>> remote_datasets = minari.list_remote_datasets()
 >>> remote_datasets.keys()
-dict_keys(['door-expert-v0', 'door-human-v0', 'door-cloned-v0'])
+dict_keys(['door-expert-v1', 'door-human-v1', 'door-cloned-v1'])
 ```
 
 ```{eval-rst}
@@ -184,7 +189,7 @@ To download any of the remote datasets into the local `Minari root path </conten
 
 ```python
 >>> import minari
->>> minari.download_dataset(dataset_id="door-cloned-v0")
+>>> minari.download_dataset(dataset_id="door-cloned-v1")
 >>> local_datasets = minari.list_local_datasets()
 >>> local_datasets.keys()
 dict_keys(['door-cloned-v0'])
@@ -199,7 +204,7 @@ Minari can retrieve a certain amount of episode shards from the dataset files as
 ```python
 import minari
 
-dataset = minari.load_dataset("door-cloned-v0")
+dataset = minari.load_dataset("door-human-v1", download=True)
 dataset.set_seed(seed=123)
 
 for i in range(5):
@@ -232,7 +237,7 @@ To create your own buffers and dataloaders, you may need the ability to iterate
 ```python
 import minari
 
-dataset = minari.load_dataset("door-cloned-v0")
+dataset = minari.load_dataset("door-human-v1", download=True)
 episodes_generator = dataset.iterate_episodes(episode_indices=[1, 2, 0])
 
 for episode in episodes_generator:
@@ -256,7 +261,7 @@ In addition, the :class:`minari.MinariDataset` dataset itself is iterable. Howev
 ```python
 import minari
 
-dataset = minari.load_dataset("door-cloned-v0")
+dataset = minari.load_dataset("door-human-v1", download=True)
 
 for episode in dataset:
     print(f"EPISODE ID {episode.id}")
@@ -272,12 +277,12 @@ The episodes in the dataset can be filtered before sampling. This is done with a
 ```python
 import minari
 
-dataset = minari.load_dataset("door-human-v0")
+dataset = minari.load_dataset("door-human-v1", download=True)
 
 print(f'TOTAL EPISODES ORIGINAL DATASET: {dataset.total_episodes}')
 
 # get episodes with mean reward greater than 2
-filter_dataset = dataset.filter_episodes(lambda episode: episode["rewards"].attrs.get("mean") > 2)
+filter_dataset = dataset.filter_episodes(lambda episode: episode.rewards.mean() > 2)
 
 print(f'TOTAL EPISODES FILTER DATASET: {filter_dataset.total_episodes}')
 ```
@@ -298,7 +303,7 @@ Minari provides another utility function to divide a dataset into multiple datas
 ```python
 import minari
 
-dataset = minari.load_dataset("door-human-v0")
+dataset = minari.load_dataset("door-human-v1", download=True)
 
 split_datasets = minari.split_dataset(dataset, sizes=[20, 5], seed=123)
 
@@ -320,7 +325,7 @@ From a :class:`minari.MinariDataset` object we can also recover the Gymnasium en
 ```python
 import minari
 
-dataset = minari.load_dataset('LunarLander-v2-test-v0')
+dataset = minari.load_dataset('CartPole-v1-test-v0')
 env = dataset.recover_environment()
 
 env.reset()
diff --git a/docs/content/dataset_standards.md b/docs/content/dataset_standards.md
index 9b1a733d..b702ff70 100644
--- a/docs/content/dataset_standards.md
+++ b/docs/content/dataset_standards.md
@@ -403,22 +403,7 @@ Note how the `Tuple` space elements are assigned corresponding keys of the forma
 The required `datasets` found in the episode groups correspond to the data involved in every Gymnasium step call: `obs, rew, terminated, truncated, info = env.step(action)`: `observations`, `actions`, `rewards`, `terminations`, and `truncations`. These datasets are `np.ndarray` or nested groups of `np.ndarray` and other groups, depending on the observation and action spaces, and the space of all datasets under each required top-level episode key is equal to:
 
 - `actions`: `shape=(num_steps, action_space_component_shape)`. If the action or observation space is `Dict` or a `Tuple`, then the corresponding entry will be a group instead of a dataset. Within this group, there will be nested groups and datasets, as specified by the action and observation spaces. `Dict` and `Tuple` spaces are represented as groups, and `Box` and `Discrete` spaces are represented as datasets. All datasets at any level under the top-level key `actions` will have the same `num_steps`, but will vary in `action_space_component_shape` on for each particular action space component. For example, a `Dict` space may contain two `Box` spaces with different shapes.
-- `observations`: `shape=(num_steps + 1, observation_space_component_shape)`. Observations nest in the same way as actions if the top level space is a `Tuple` or `Dict` space. The value of `num_steps + 1` is the same for datasets at any level under `observations`. These datasets have an additional element because the initial observation of the environment when calling `obs, info = env.reset()` is also saved. `observation_space_component_shape` will vary between datasets, depending on the shapes of the simple spaces specified in the observation space. You can get a transition of the form `(o_t, a_t, o_t+1)` from the datasets in the episode group, where `o_t` is the current observation, `o_t+1` is the next observation after taking action `a`, and `t` is the discrete transition index
-; as follows:
-
-    ```python
-    next_observations = observations[1:]
-    observations = observations[:-1]
-
-    # get transition at timestep t
-    observation = observations[t]             # o_t
-    action = actions[t]                       # a_t
-    next_observation = next_observations[t]   # o_t+1
-    reward = rewards[t]                       # r_t
-    terminated = terminations[t]
-    truncated = truncations[t]
-    ```
-
+- `observations`: `shape=(num_steps + 1, observation_space_component_shape)`. Observations nest in the same way as actions if the top level space is a `Tuple` or `Dict` space. The value of `num_steps + 1` is the same for datasets at any level under `observations`. These datasets have an additional element because the initial observation of the environment when calling `obs, info = env.reset()` is also saved. `observation_space_component_shape` will vary between datasets, depending on the shapes of the simple spaces specified in the observation space.
 - `rewards`: `shape=(num_steps, 1)`, stores the returned reward in each step.
 - `terminations`: `shape=(num_steps, 1)`, the `dtype` is `np.bool` and the last element value will be `True` if the episode finished due to  a `terminated` step return.
 - `truncations`: `shape=(num_steps, 1)`, the `dtype` is `np.bool` and the last element value will be `True` if the episode finished due to a `truncated` step return.  
@@ -434,6 +419,7 @@ For example, the `Adroit Hand` environments in the `Gymnasium-Robotics` project
 The following code snippet creates a custom `StepDataCallbak` and adds a new key, `state`, to the returned `StepData` dictionary. `state` is a nested dictionary with `np.ndarray` values and the keys are relevant MuJoCo data that represent the state of the simulation: `qpos`, `qvel`, and some other body positions.
 
 ```python
+from minari import StepDataCallback
 class AdroitStepDataCallback(StepDataCallback):
     def __call__(self, env, **kwargs):
         step_data = super().__call__(env, **kwargs)
@@ -551,7 +537,8 @@ A Minari dataset is encapsulated in the `MinariDataset` class which allows for i
 Episodes can be accessed from a Minari dataset through iteration, random sampling, or even filtering episodes from a dataset through an arbitrary condition via the `filter_episodes` method. Take the following example where we load the `door-human-v0` dataset and randomly sample 10 episodes:
 
 ```python
-dataset = minari.load_dataset("door-human-v0")
+import minari
+dataset = minari.load_dataset("door-human-v1", download=True)
 sampled_episodes = dataset.sample_episodes(10)
 ```
 
diff --git a/docs/tutorials/dataset_creation/point_maze_dataset.py b/docs/tutorials/dataset_creation/point_maze_dataset.py
index 7b53142d..1388c3d4 100644
--- a/docs/tutorials/dataset_creation/point_maze_dataset.py
+++ b/docs/tutorials/dataset_creation/point_maze_dataset.py
@@ -1,4 +1,3 @@
-# fmt: off
 """
 PointMaze D4RL dataset
 =========================================
@@ -14,7 +13,7 @@
 #   2. Then we also need to generate the actions so that the agent can follow the waypoints of the trajectory. For this purpose D4RL implements a PD controller.
 #   3. Finally, to create the Minari dataset, we will wrap the environment with a :class:`minari.DataCollectorV0` and step through it by generating actions with the path planner and waypoint controller.
 #
-# For this tutorial we will be using the ``pointmaze-medium-v3`` environment to collect 1,000,000 transitions. However, any map implementation in the PointMaze environment group can be used.
+# For this tutorial we will be using the ``pointmaze-medium-v3`` environment to collect transition data. However, any map implementation in the PointMaze environment group can be used.
 # Another important factor to take into account is that the environment is continuing, which means that it won't be ``terminated`` when reaching a goal. Instead a new goal target will be randomly selected and the agent
 # will start from the location it's currently at (no ``env.reset()`` required).
 #
@@ -73,6 +72,7 @@ class QIteration:
 
     Inspired by https://github.com/Farama-Foundation/D4RL/blob/master/d4rl/pointmaze/q_iteration.py
     """
+
     def __init__(self, maze):
         self.maze = maze
         self.num_states = maze.map_length * maze.map_width
@@ -87,7 +87,9 @@ def generate_path(self, current_cell, goal_cell):
         waypoints = {}
         while True:
             action_id = np.argmax(q_values[current_state])
-            next_state, _ = self.get_next_state(current_state, EXPLORATION_ACTIONS[action_id])
+            next_state, _ = self.get_next_state(
+                current_state, EXPLORATION_ACTIONS[action_id]
+            )
             current_cell = self.state_to_cell(current_state)
             waypoints[current_cell] = self.state_to_cell(next_state)
             if waypoints[current_cell] == goal_cell:
@@ -104,7 +106,7 @@ def reward_function(self, desired_cell, current_cell):
             return 0.0
 
     def state_to_cell(self, state):
-        i = int(state/self.maze.map_width)
+        i = int(state / self.maze.map_width)
         j = state % self.maze.map_width
         return (i, j)
 
@@ -115,7 +117,7 @@ def get_q_values(self, num_itrs=50, discount=0.99):
         q_fn = np.zeros((self.num_states, self.num_actions))
         for _ in range(num_itrs):
             v_fn = np.max(q_fn, axis=1)
-            q_fn = self.rew_matrix + discount*self.transition_matrix.dot(v_fn)
+            q_fn = self.rew_matrix + discount * self.transition_matrix.dot(v_fn)
         return q_fn
 
     def compute_reward_matrix(self, goal_cell):
@@ -123,7 +125,9 @@ def compute_reward_matrix(self, goal_cell):
             for action in range(self.num_actions):
                 next_state, _ = self.get_next_state(state, EXPLORATION_ACTIONS[action])
                 next_cell = self.state_to_cell(next_state)
-                self.rew_matrix[state, action] = self.reward_function(goal_cell, next_cell)
+                self.rew_matrix[state, action] = self.reward_function(
+                    goal_cell, next_cell
+                )
 
     def compute_transition_matrix(self):
         """Constructs this environment's transition matrix.
@@ -132,7 +136,9 @@ def compute_transition_matrix(self):
           corresponds to the probability of transitioning into state ns after taking
           action a from state s.
         """
-        self.transition_matrix = np.zeros((self.num_states, self.num_actions, self.num_states))
+        self.transition_matrix = np.zeros(
+            (self.num_states, self.num_actions, self.num_states)
+        )
         for state in range(self.num_states):
             for action_idx, action in EXPLORATION_ACTIONS.items():
                 next_state, valid = self.get_next_state(state, action)
@@ -234,6 +240,7 @@ class WaypointController:
 
     Inspired by https://github.com/Farama-Foundation/D4RL/blob/master/d4rl/pointmaze/waypoint_controller.py
     """
+
     def __init__(self, maze, gains={"p": 10.0, "d": -1.0}, waypoint_threshold=0.1):
         self.global_target_xy = np.empty(2)
         self.maze = maze
@@ -246,35 +253,63 @@ def __init__(self, maze, gains={"p": 10.0, "d": -1.0}, waypoint_threshold=0.1):
 
     def compute_action(self, obs):
         # Check if we need to generate new waypoint path due to change in global target
-        if np.linalg.norm(self.global_target_xy - obs['desired_goal']) > 1e-3 or self.waypoint_targets is None:
+        if (
+            np.linalg.norm(self.global_target_xy - obs["desired_goal"]) > 1e-3
+            or self.waypoint_targets is None
+        ):
             # Convert xy to cell id
-            achieved_goal_cell = tuple(self.maze.cell_xy_to_rowcol(obs['achieved_goal']))
-            self.global_target_id = tuple(self.maze.cell_xy_to_rowcol(obs['desired_goal']))
-            self.global_target_xy = obs['desired_goal']
-
-            self.waypoint_targets = self.maze_solver.generate_path(achieved_goal_cell, self.global_target_id)
+            achieved_goal_cell = tuple(
+                self.maze.cell_xy_to_rowcol(obs["achieved_goal"])
+            )
+            self.global_target_id = tuple(
+                self.maze.cell_xy_to_rowcol(obs["desired_goal"])
+            )
+            self.global_target_xy = obs["desired_goal"]
+
+            self.waypoint_targets = self.maze_solver.generate_path(
+                achieved_goal_cell, self.global_target_id
+            )
 
             # Check if the waypoint dictionary is empty
             # If empty then the ball is already in the target cell location
             if self.waypoint_targets:
-                self.current_control_target_id = self.waypoint_targets[achieved_goal_cell]
-                self.current_control_target_xy = self.maze.cell_rowcol_to_xy(np.array(self.current_control_target_id))
+                self.current_control_target_id = self.waypoint_targets[
+                    achieved_goal_cell
+                ]
+                self.current_control_target_xy = self.maze.cell_rowcol_to_xy(
+                    np.array(self.current_control_target_id)
+                )
             else:
-                self.waypoint_targets[self.current_control_target_id] = self.current_control_target_id
+                self.waypoint_targets[
+                    self.current_control_target_id
+                ] = self.current_control_target_id
                 self.current_control_target_id = self.global_target_id
                 self.current_control_target_xy = self.global_target_xy
 
         # Check if we need to go to the next waypoint
-        dist = np.linalg.norm(self.current_control_target_xy - obs['achieved_goal'])
-        if dist <= self.waypoint_threshold and self.current_control_target_id != self.global_target_id:
-            self.current_control_target_id = self.waypoint_targets[self.current_control_target_id]
+        dist = np.linalg.norm(self.current_control_target_xy - obs["achieved_goal"])
+        if (
+            dist <= self.waypoint_threshold
+            and self.current_control_target_id != self.global_target_id
+        ):
+            self.current_control_target_id = self.waypoint_targets[
+                self.current_control_target_id
+            ]
             # If target is global goal go directly to goal position
             if self.current_control_target_id == self.global_target_id:
                 self.current_control_target_xy = self.global_target_xy
             else:
-                self.current_control_target_xy = self.maze.cell_rowcol_to_xy(np.array(self.current_control_target_id)) - np.random.uniform(size=(2,))*0.2
-
-        action = self.gains['p'] * (self.current_control_target_xy - obs['achieved_goal']) + self.gains['d'] * obs['observation'][2:]
+                self.current_control_target_xy = (
+                    self.maze.cell_rowcol_to_xy(
+                        np.array(self.current_control_target_id)
+                    )
+                    - np.random.uniform(size=(2,)) * 0.2
+                )
+
+        action = (
+            self.gains["p"] * (self.current_control_target_xy - obs["achieved_goal"])
+            + self.gains["d"] * obs["observation"][2:]
+        )
         action = np.clip(action, -1, 1)
 
         return action
@@ -294,6 +329,7 @@ def compute_action(self, obs):
 # case we will be generating new hdf5 datasets ``qpos``, ``qvel``, and ``goal`` in the ``infos`` subgroup of each episode group.
 #
 
+
 class PointMazeStepDataCallback(StepDataCallback):
     """Add environment state information to 'infos'.
 
@@ -301,78 +337,73 @@ class PointMazeStepDataCallback(StepDataCallback):
     never terminated or truncated. This callback overrides the truncation value to True when the step
     returns a True 'succes' key in 'infos'. This way we can divide the Minari dataset into different trajectories.
     """
-    def __call__(self, env, obs, info, action=None, rew=None, terminated=None, truncated=None):
-        qpos = obs['observation'][:2]
-        qvel = obs['observation'][2:]
-        goal = obs['desired_goal']
+
+    def __call__(
+        self, env, obs, info, action=None, rew=None, terminated=None, truncated=None
+    ):
+        qpos = obs["observation"][:2]
+        qvel = obs["observation"][2:]
+        goal = obs["desired_goal"]
 
         step_data = super().__call__(env, obs, info, action, rew, terminated, truncated)
 
-        if step_data['infos']['success']:
-            step_data['truncations'] = True
-        step_data['infos']['qpos'] = qpos
-        step_data['infos']['qvel'] = qvel
-        step_data['infos']['goal'] = goal
+        if step_data["infos"]["success"]:
+            step_data["truncations"] = True
+        step_data["infos"]["qpos"] = qpos
+        step_data["infos"]["qvel"] = qvel
+        step_data["infos"]["goal"] = goal
 
         return step_data
 
+
 # %%
 # Collect Data and Create Minari Dataset
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Now we will finally perform our data collection and create the Minari dataset. This is as simple as wrapping the environment with
 # the :class:`minari.DataCollectorV0` wrapper and add the custom callback methods. Once we've done this we can step the environment with the ``WayPointController``
-# as our policy. Don't forget to initialize the environment with a ``max_episode_steps`` of ``1,000,000`` since that's the total amount of steps we want to
+# as our policy. For the tutorial, we collect 10,000 transitions. Thus, we initialize the environment with ``max_episode_steps=10,000`` since that's the total amount of steps we want to
 # collect for our dataset and we don't want the environment to get ``truncated`` during the data collection due to a time limit.
 #
-# To create the Minari dataset we will first create the dataset by calling the function :func:`minari.create_dataset_from_collector_env`, and then checkpoint the dataset
-# every ``200,000`` steps taken by the environment.
-#
 
 
 dataset_name = "pointmaze-umaze-v0"
-
-# Check if dataset already exist and load to add more data
-if dataset_name in minari.list_local_datasets():
-    dataset = minari.load_dataset(dataset_name)
-else:
-    dataset = None
+total_steps = 10_000
 
 # continuing task => the episode doesn't terminate or truncate when reaching a goal
 # it will generate a new target. For this reason we set the maximum episode steps to
 # the desired size of our Minari dataset (evade truncation due to time limit)
-env = gym.make('PointMaze_Medium-v3', continuing_task=True, max_episode_steps=1e6)
+env = gym.make("PointMaze_Medium-v3", continuing_task=True, max_episode_steps=total_steps)
 
 # Data collector wrapper to save temporary data while stepping. Characteristics:
 #   * Custom StepDataCallback to add extra state information to 'infos' and divide dataset in different episodes by overridng
 #     truncation value to True when target is reached
 #   * Record the 'info' value of every step
-collector_env = DataCollectorV0(env, step_data_callback=PointMazeStepDataCallback, record_infos=True)
+collector_env = DataCollectorV0(
+    env, step_data_callback=PointMazeStepDataCallback, record_infos=True
+)
 
 obs, _ = collector_env.reset(seed=123)
 
 waypoint_controller = WaypointController(maze=env.maze)
 
-for n_step in range(int(1e6)):
+for n_step in range(int(total_steps)):
     action = waypoint_controller.compute_action(obs)
     # Add some noise to each step action
-    action += np.random.randn(*action.shape)*0.5
-    action = np.clip(action, env.action_space.low, env.action_space.high, dtype=np.float32)
+    action += np.random.randn(*action.shape) * 0.5
+    action = np.clip(
+        action, env.action_space.low, env.action_space.high, dtype=np.float32
+    )
 
     obs, rew, terminated, truncated, info = collector_env.step(action)
-    if (n_step + 1) % 200000 == 0:
-        print('STEPS RECORDED:')
-        print(n_step)
-        if dataset is None:
-            dataset = minari.create_dataset_from_collector_env(collector_env=collector_env,
-                                                               dataset_name=dataset_name,
-                                                               algorithm_name="QIteration",
-                                                               code_permalink="https://github.com/Farama-Foundation/Minari/blob/main/docs/tutorials/dataset_creation/point_maze_dataset.py",
-                                                               author="Rodrigo Perez-Vicente",
-                                                               author_email="rperezvicente@farama.org")
-        else:
-            # Update local Minari dataset every 200000 steps.
-            # This works as a checkpoint to not lose the already collected data
-            dataset.update_dataset_from_collector_env(collector_env)
+
+dataset = minari.create_dataset_from_collector_env(
+    collector_env=collector_env,
+    dataset_id=dataset_name,
+    algorithm_name="QIteration",
+    code_permalink="https://github.com/Farama-Foundation/Minari/blob/main/docs/tutorials/dataset_creation/point_maze_dataset.py",
+    author="Rodrigo Perez-Vicente",
+    author_email="rperezvicente@farama.org",
+)
 
 
 # %%
diff --git a/docs/tutorials/using_datasets/behavioral_cloning.py b/docs/tutorials/using_datasets/behavioral_cloning.py
index b6d2fee8..c3d79789 100644
--- a/docs/tutorials/using_datasets/behavioral_cloning.py
+++ b/docs/tutorials/using_datasets/behavioral_cloning.py
@@ -1,4 +1,3 @@
-# fmt: off
 """
 Behavioral cloning with PyTorch
 =========================================
@@ -12,10 +11,10 @@
 # Policy training
 # ~~~~~~~~~~~~~~~~~~~
 # To train the expert policy, we use `SB3 <https://github.com/DLR-RM/stable-baselines3>`_'s `rl-zoo3 <https://github.com/DLR-RM/rl-baselines3-zoo>`_ library.
-# After installing the library with ``pip install rl_zoo3``,
-# we train a PPO agent on the environment with the following command:
-#
-# ``python -m rl_zoo3.train --algo ppo --env CartPole-v1``
+# After installing the library, we train a PPO agent on the environment:
+
+!pip install rl_zoo3
+!python -m rl_zoo3.train --algo ppo --env CartPole-v1``
 
 # %%
 # This will generate a new folder named `log` with the expert policy.
@@ -41,7 +40,6 @@
 import minari
 from minari import DataCollectorV0
 
-
 torch.manual_seed(42)
 
 # %%
@@ -64,13 +62,14 @@
         if terminated or truncated:
             break
 
-dataset = minari.create_dataset_from_collector_env(dataset_id="CartPole-v1-expert",
-                                                   collector_env=env,
-                                                   algorithm_name="ExpertPolicy",
-                                                   code_permalink="https://minari.farama.org/tutorials/behavioral_cloning",
-                                                   author="Farama",
-                                                   author_email="contact@farama.org"
-                                                   )
+dataset = minari.create_dataset_from_collector_env(
+    dataset_id="CartPole-v1-expert",
+    collector_env=env,
+    algorithm_name="ExpertPolicy",
+    code_permalink="https://minari.farama.org/tutorials/behavioral_cloning",
+    author="Farama",
+    author_email="contact@farama.org"
+)
 
 # %%
 # Once executing the script, the dataset will be saved on your disk. You can display the list of datasets with ``minari list local`` command.
diff --git a/minari/storage/hosting.py b/minari/storage/hosting.py
index 375c66f7..ef03e1d0 100644
--- a/minari/storage/hosting.py
+++ b/minari/storage/hosting.py
@@ -68,7 +68,7 @@ def _upload_local_directory_to_gcs(local_path, bucket, gcs_path):
 
         if len(combined_datasets) > 0:
             print(
-                f"Dataset {dataset_id} is formed by a combination of the following datasets:"
+                f"Dataset {dataset_id} is formed by a combination of the following datasets: "
             )
             for name in combined_datasets:
                 print(f"\t{name}")
@@ -202,10 +202,10 @@ def download_dataset(dataset_id: str, force_download: bool = False):
         # If the dataset is a combination of other datasets download the subdatasets recursively
         if len(combined_datasets) > 0:
             print(
-                f"\nDataset {dataset_id} is formed by a combination of the following datasets:"
+                f"\nDataset {dataset_id} is formed by a combination of the following datasets: "
             )
             for name in combined_datasets:
-                print(f"  * {name}")
+                print(f" * {name}")
             print("\nDownloading extra datasets ...")
             for dataset in combined_datasets:
                 download_dataset(dataset_id=dataset)
diff --git a/pyproject.toml b/pyproject.toml
index 43e7a89e..2e2f2430 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,23 +24,26 @@ classifiers = [
     'Topic :: Scientific/Engineering :: Artificial Intelligence',
 ]
 dependencies = [
-    "numpy >=1.21.0",
+    "numpy>=1.21.0",
     "h5py>=3.8.0",
     "tqdm>=4.65.0",
     "typing_extensions>=4.4.0",
-    "google-cloud-storage==2.5.0",
-    "typer[all]==0.9.0",
+    "google-cloud-storage>=2.5.0",
+    "typer[all]>=0.9.0",
     "gymnasium>=0.28.1",
-    "portion==2.4.0",
-    "packaging==23.1",
+    "portion>=2.4.0",
+    "packaging>=23.1",
 ]
 dynamic = ["version"]
 
 [project.optional-dependencies]
 testing = [
-    "pytest ==7.1.3",
-    "gymnasium-robotics>=1.2.1",
-    "imageio >=2.14.1"
+    "pytest>=7.1.3",
+    "pytest-markdown-docs>=0.4.3",
+    "nbmake",
+    "gymnasium-robotics>=1.2.3",
+    "imageio>=2.14.1",
+    "minigrid>=2.2.0",
 ]
 
 [project.urls]
@@ -63,6 +66,7 @@ include = ["minari", "minari.*"]
 
 [tool.black]
 safe = true
+force-exclude = ["docs/tutorials/using_datasets/behavioral_cloning.py"]
 
 [tool.isort]
 atomic = true
@@ -105,3 +109,4 @@ reportUnboundVariable = "warning"
 [tool.pytest.ini_options]
 filterwarnings = ['ignore:.*The environment .* is out of date.*']
 # filterwarnings = ['ignore:.*step API.*:DeprecationWarning']
+addopts = ["--ignore=docs/tutorials"]