Fix reward of SimpleMultiObsEnv to always be float (#1676)

* Fix reward of SimpleMultiObsEnv to always be float Previously the reward was sometimes returned as an int. * changelog * Update changelog.rst * Update version.txt * Fix type annotation * Fix import --------- Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com> Co-authored-by: Antonin Raffin <antonin.raffin@ensta.org>
DLR-RM · Sep 16, 2023 · 1cd6ae4 · 1cd6ae4
1 parent 9971276
commit 1cd6ae4
Show file tree

Hide file tree

Showing 4 changed files with 6 additions and 5 deletions.
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -3,7 +3,7 @@
 Changelog
 ==========
 
-Release 2.2.0a2 (WIP)
+Release 2.2.0a3 (WIP)
 --------------------------
 
 Breaking Changes:
@@ -32,6 +32,7 @@ Bug Fixes:
 - Calls ``callback.update_locals()`` before ``callback.on_rollout_end()`` in OnPolicyAlgorithm (@PatrickHelm)
 - Fixed replay buffer device after loading in OffPolicyAlgorithm (@PatrickHelm)
 - Fixed ``render_mode`` which was not properly loaded when using ``VecNormalize.load()``
+- Fixed success reward dtype in ``SimpleMultiObsEnv`` (@NixGD)
 
 
 Deprecations:

diff --git a/stable_baselines3/common/envs/multi_input_envs.py b/stable_baselines3/common/envs/multi_input_envs.py
@@ -150,7 +150,7 @@ def step(self, action: Union[int, np.ndarray]) -> GymStepReturn:
             self.state -= self.num_col
 
         got_to_end = self.state == self.max_state
-        reward = 1 if got_to_end else reward
+        reward = 1.0 if got_to_end else reward
         truncated = self.count > self.max_count
         terminated = got_to_end
 

diff --git a/stable_baselines3/common/logger.py b/stable_baselines3/common/logger.py
@@ -8,10 +8,10 @@
 from io import TextIOBase
 from typing import Any, Dict, List, Mapping, Optional, Sequence, TextIO, Tuple, Union
 
+import matplotlib.figure
 import numpy as np
 import pandas
 import torch as th
-from matplotlib import pyplot as plt
 
 try:
     from torch.utils.tensorboard import SummaryWriter
@@ -52,7 +52,7 @@ class Figure:
     :param close: if true, close the figure after logging it
     """
 
-    def __init__(self, figure: plt.figure, close: bool):
+    def __init__(self, figure: matplotlib.figure.Figure, close: bool):
         self.figure = figure
         self.close = close
 

diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt
@@ -1 +1 @@
-2.2.0a2
+2.2.0a3