Update changelog and cleanup (#1434)

DLR-RM · Apr 8, 2023 · 84f5511 · 84f5511
1 parent 12250eb
commit 84f5511
Show file tree

Hide file tree

Showing 6 changed files with 23 additions and 13 deletions.
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -4,9 +4,11 @@ Changelog
 ==========
 
 
-Release 1.8.0a14 (WIP)
+Release 1.8.0 (2023-04-07)
 --------------------------
 
+**Multi-env HerReplayBuffer, Open RL Benchmark, Improved env checker**
+
 .. warning::
 
   Stable-Baselines3 (SB3) v1.8.0 will be the last one to use Gym as a backend.
@@ -31,23 +33,37 @@ New Features:
 - Added support for dict/tuple observations spaces for ``VecCheckNan``, the check is now active in the ``env_checker()`` (@DavyMorgan)
 - Added multiprocessing support for ``HerReplayBuffer``
 - ``HerReplayBuffer`` now supports all datatypes supported by ``ReplayBuffer``
-- Provide more helpful failure messages when validating the ``observation_space`` of custom gym environments using ``check_env``` (@FieteO)
+- Provide more helpful failure messages when validating the ``observation_space`` of custom gym environments using ``check_env`` (@FieteO)
 - Added ``stats_window_size`` argument to control smoothing in rollout logging (@jonasreiher)
 
 
 `SB3-Contrib`_
 ^^^^^^^^^^^^^^
+- Added warning about potential crashes caused by ``check_env`` in the ``MaskablePPO`` docs (@AlexPasqua)
+- Fixed ``sb3_contrib/qrdqn/*.py`` type hints
+- Removed shared layers in ``mlp_extractor`` (@AlexPasqua)
 
 `RL Zoo`_
 ^^^^^^^^^
+- `Open RL Benchmark <https://github.com/openrlbenchmark/openrlbenchmark/issues/7>`_
+- Upgraded to new `HerReplayBuffer` implementation that supports multiple envs
+- Removed `TimeFeatureWrapper` for Panda and Fetch envs, as the new replay buffer should handle timeout.
+- Tuned hyperparameters for RecurrentPPO on Swimmer
+- Documentation is now built using Sphinx and hosted on read the doc
+- Removed `use_auth_token` for push to hub util
+- Reverted from v3 to v2 for HumanoidStandup, Reacher, InvertedPendulum and InvertedDoublePendulum since they were not part of the mujoco refactoring (see https://github.com/openai/gym/pull/1304)
+- Fixed `gym-minigrid` policy (from `MlpPolicy` to `MultiInputPolicy`)
+- Replaced deprecated `optuna.suggest_loguniform(...)` by `optuna.suggest_float(..., log=True)`
+- Switched to `ruff` and `pyproject.toml`
+- Removed `online_sampling` and `max_episode_length` argument when using `HerReplayBuffer`
 
 Bug Fixes:
 ^^^^^^^^^^
 - Fixed Atari wrapper that missed the reset condition (@luizapozzobon)
 - Added the argument ``dtype`` (default to ``float32``) to the noise for consistency with gym action (@sidney-tio)
 - Fixed PPO train/n_updates metric not accounting for early stopping (@adamfrly)
 - Fixed loading of normalized image-based environments
-- Fixed `DictRolloutBuffer.add` with multidimensional action space (@younik)
+- Fixed ``DictRolloutBuffer.add`` with multidimensional action space (@younik)
 
 Deprecations:
 ^^^^^^^^^^^^^

diff --git a/setup.py b/setup.py
@@ -90,7 +90,7 @@
 
 extra_packages = extra_no_roms + [  # noqa: RUF005
     # For atari roms,
-    "autorom[accept-rom-license]~=0.5.5",
+    "autorom[accept-rom-license]~=0.6.0",
 ]
 
 
@@ -138,7 +138,7 @@
             # For spelling
             "sphinxcontrib.spelling",
             # Type hints support
-            "sphinx-autodoc-typehints==1.21.1",  # TODO: remove version constraint, see #1290
+            "sphinx-autodoc-typehints",
             # Copy button for code snippets
             "sphinx_copybutton",
         ],

diff --git a/stable_baselines3/common/distributions.py b/stable_baselines3/common/distributions.py
@@ -617,7 +617,6 @@ class TanhBijector:
     """
     Bijective transformation of a probability distribution
     using a squashing function (tanh)
-    TODO: use Pyro instead (https://pyro.ai/)
 
     :param epsilon: small value to avoid NaN due to numerical imprecision.
     """

diff --git a/stable_baselines3/common/policies.py b/stable_baselines3/common/policies.py
@@ -337,11 +337,6 @@ def predict(
         :return: the model's action and the next hidden state
             (used in recurrent policies)
         """
-        # TODO (GH/1): add support for RNN policies
-        # if state is None:
-        #     state = self.initial_state
-        # if episode_start is None:
-        #     episode_start = [False for _ in range(self.n_envs)]
         # Switch to eval mode (this affects batch norm / dropout)
         self.set_training_mode(False)
 

diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt
@@ -1 +1 @@
-1.8.0a14
+1.8.0
diff --git a/tests/test_her.py b/tests/test_her.py
@@ -260,7 +260,7 @@ def env_fn():
     del model.replay_buffer
 
     with pytest.raises(AttributeError):
-        model.replay_buffer
+        model.replay_buffer  # noqa: B018
 
     # Check that there is no warning
     assert len(recwarn) == 0