diff --git a/docs/conda_env.yml b/docs/conda_env.yml index a01d37bce..98a550820 100644 --- a/docs/conda_env.yml +++ b/docs/conda_env.yml @@ -6,9 +6,9 @@ dependencies: - cpuonly=1.0=0 - pip=21.1 - python=3.7 - - pytorch=1.8.1=py3.7_cpu_0 + - pytorch=1.11=py3.7_cpu_0 - pip: - - gym>=0.17.2 + - gym==0.21 - cloudpickle - opencv-python-headless - pandas @@ -16,5 +16,5 @@ dependencies: - matplotlib - sphinx_autodoc_typehints - sphinx>=4.2 - # See https://github.com/readthedocs/sphinx_rtd_theme/issues/1115 - sphinx_rtd_theme>=1.0 + - sphinx_copybutton diff --git a/docs/conf.py b/docs/conf.py index 18898d5c8..b44be6f66 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -24,6 +24,14 @@ except ImportError: enable_spell_check = False +# Try to enable copy button +try: + import sphinx_copybutton # noqa: F401 + + enable_copy_button = True +except ImportError: + enable_copy_button = False + # source code directory, relative to this file, for sphinx-autobuild sys.path.insert(0, os.path.abspath("..")) @@ -51,7 +59,7 @@ def __getattr__(cls, name): # -- Project information ----------------------------------------------------- project = "Stable Baselines3" -copyright = "2020, Stable Baselines3" +copyright = "2022, Stable Baselines3" author = "Stable Baselines3 Contributors" # The short X.Y version @@ -83,6 +91,9 @@ def __getattr__(cls, name): if enable_spell_check: extensions.append("sphinxcontrib.spelling") +if enable_copy_button: + extensions.append("sphinx_copybutton") + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst index a5b56b249..0d7e7c0a2 100644 --- a/docs/guide/examples.rst +++ b/docs/guide/examples.rst @@ -729,6 +729,16 @@ to keep track of the agent progress. model.learn(10_000) +SB3 with EnvPool or Isaac Gym +----------------------------- + +Just like Procgen (see above), `EnvPool `_ and `Isaac Gym `_ accelerate the environment by +already providing a vectorized implementation. + +To use SB3 with those tools, you must wrap the env with tool's specific ``VecEnvWrapper`` that will pre-process the data for SB3, +you can find links to those wrappers in `issue #772 `_. + + Record a Video -------------- diff --git a/docs/guide/install.rst b/docs/guide/install.rst index 3b2692787..a9bb76156 100644 --- a/docs/guide/install.rst +++ b/docs/guide/install.rst @@ -54,6 +54,17 @@ Bleeding-edge version pip install git+https://github.com/DLR-RM/stable-baselines3 +.. note:: + + If you want to use latest gym version (0.24+), you have to use + + .. code-block:: bash + + pip install git+https://github.com/carlosluis/stable-baselines3/tree/fix_tests + + See `PR #780 `_ for more information. + + Development version ------------------- diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 5e893d9a5..62f2ddb3a 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -4,9 +4,11 @@ Changelog ========== -Release 1.5.1a9 (WIP) +Release 1.6.0 (2022-07-11) --------------------------- +**Recurrent PPO (PPO LSTM), better defaults for learning from pixels with SAC/TD3** + Breaking Changes: ^^^^^^^^^^^^^^^^^ - Changed the way policy "aliases" are handled ("MlpPolicy", "CnnPolicy", ...), removing the former @@ -34,6 +36,7 @@ Bug Fixes: - Fixed issues due to newer version of protobuf (tensorboard) and sphinx - Fix exception causes all over the codebase (@cool-RR) - Prohibit simultaneous use of optimize_memory_usage and handle_timeout_termination due to a bug (@MWeltevrede) +- Fixed a bug in ``kl_divergence`` check that would fail when using numpy arrays with MultiCategorical distribution Deprecations: ^^^^^^^^^^^^^ @@ -51,6 +54,8 @@ Documentation: - Added remark about breaking Markov assumption and timeout handling - Added doc about MLFlow integration via custom logger (@git-thor) - Updated Huggingface integration doc +- Added copy button for code snippets +- Added doc about EnvPool and Isaac Gym support Release 1.5.0 (2022-03-25) diff --git a/setup.py b/setup.py index 05745e9ee..281631600 100644 --- a/setup.py +++ b/setup.py @@ -111,6 +111,8 @@ "sphinxcontrib.spelling", # Type hints support "sphinx-autodoc-typehints", + # Copy button for code snippets + "sphinx_copybutton", ], "extra": [ # For render diff --git a/stable_baselines3/common/buffers.py b/stable_baselines3/common/buffers.py index d36c5f5ca..5ed9b4c96 100644 --- a/stable_baselines3/common/buffers.py +++ b/stable_baselines3/common/buffers.py @@ -193,7 +193,8 @@ def __init__( # see https://github.com/DLR-RM/stable-baselines3/issues/934 if optimize_memory_usage and handle_timeout_termination: raise ValueError( - "ReplayBuffer does not support optimize_memory_usage = True and handle_timeout_termination = True simultaneously." + "ReplayBuffer does not support optimize_memory_usage = True " + "and handle_timeout_termination = True simultaneously." ) self.optimize_memory_usage = optimize_memory_usage diff --git a/stable_baselines3/common/distributions.py b/stable_baselines3/common/distributions.py index 3d1ff5aa0..7096d01b8 100644 --- a/stable_baselines3/common/distributions.py +++ b/stable_baselines3/common/distributions.py @@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union import gym +import numpy as np import torch as th from gym import spaces from torch import nn @@ -688,7 +689,7 @@ def kl_divergence(dist_true: Distribution, dist_pred: Distribution) -> th.Tensor # MultiCategoricalDistribution is not a PyTorch Distribution subclass # so we need to implement it ourselves! if isinstance(dist_pred, MultiCategoricalDistribution): - assert dist_pred.action_dims == dist_true.action_dims, "Error: distributions must have the same input space" + assert np.allclose(dist_pred.action_dims, dist_true.action_dims), "Error: distributions must have the same input space" return th.stack( [th.distributions.kl_divergence(p, q) for p, q in zip(dist_true.distribution, dist_pred.distribution)], dim=1, diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt index 125ec275d..dc1e644a1 100644 --- a/stable_baselines3/version.txt +++ b/stable_baselines3/version.txt @@ -1 +1 @@ -1.5.1a9 +1.6.0 diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 3652b1850..07920dbef 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -163,7 +163,9 @@ def test_categorical(dist, CAT_ACTIONS): BernoulliDistribution(N_ACTIONS).proba_distribution(th.rand(N_ACTIONS)), CategoricalDistribution(N_ACTIONS).proba_distribution(th.rand(N_ACTIONS)), DiagGaussianDistribution(N_ACTIONS).proba_distribution(th.rand(N_ACTIONS), th.rand(N_ACTIONS)), - MultiCategoricalDistribution([N_ACTIONS, N_ACTIONS]).proba_distribution(th.rand(1, sum([N_ACTIONS, N_ACTIONS]))), + MultiCategoricalDistribution(np.array([N_ACTIONS, N_ACTIONS])).proba_distribution( + th.rand(1, sum([N_ACTIONS, N_ACTIONS])) + ), SquashedDiagGaussianDistribution(N_ACTIONS).proba_distribution(th.rand(N_ACTIONS), th.rand(N_ACTIONS)), StateDependentNoiseDistribution(N_ACTIONS).proba_distribution( th.rand(N_ACTIONS), th.rand([N_ACTIONS, N_ACTIONS]), th.rand([N_ACTIONS, N_ACTIONS])