Fix - sample type inconsistency in (Multi)Categorical Probability Dis…

…tribution (#588) * Fix - sample type inconsistency in CategoricalProbabilityDistribution * Adding info on fix to changelog. * Fix - sample type inconsistency (change sample type of CategoricalProbabilityDistribution, MultiCategoricalProbabilityDistribution to tf.int64) * Change dtype of actions to int64 of ACER * Update changelog.rst
Stable-Baselines-Team · Dec 2, 2019 · 6039b89 · 6039b89
1 parent 04c35e1
commit 6039b89
Show file tree

Hide file tree

Showing 4 changed files with 9 additions and 8 deletions.
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -16,6 +16,7 @@ Breaking Changes:
 - `allow_early_resets` of the `Monitor` wrapper now default to `True`
 - `make_atari_env` now returns a `DummyVecEnv` by default (instead of a `SubprocVecEnv`)
   this usually improves performance.
+- Fix inconsistency of sample type, so that mode/sample function returns tensor of tf.int64 in CategoricalProbabilityDistribution/MultiCategoricalProbabilityDistribution (@seheevic)
 
 New Features:
 ^^^^^^^^^^^^^
@@ -546,4 +547,4 @@ Thanks to @bjmuld @iambenzo @iandanforth @r7vme @brendenpetersen @huvar @abhiskk
 @EliasHasle @mrakgr @Bleyddyn @antoine-galataud @junhyeokahn @AdamGleave @keshaviyengar @tperol
 @XMaster96 @kantneel @Pastafarianist @GerardMaggiolino @PatrickWalter214 @yutingsz @sc420 @Aaahh @billtubbs
 @Miffyli @dwiel @miguelrass @qxcv @jaberkow @eavelardev @ruifeng96150 @pedrohbtp @srivatsankrishnan @evilsocket
-@MarvineGothic @jdossgollin @SyllogismRXS @rusu24edward @jbulow @Antymon
+@MarvineGothic @jdossgollin @SyllogismRXS @rusu24edward @jbulow @Antymon @seheevic
diff --git a/stable_baselines/a2c/utils.py b/stable_baselines/a2c/utils.py
@@ -493,7 +493,7 @@ def get_by_index(input_tensor, idx):
     """
     assert len(input_tensor.get_shape()) == 2
     assert len(idx.get_shape()) == 1
-    idx_flattened = tf.range(0, input_tensor.shape[0]) * input_tensor.shape[1] + idx
+    idx_flattened = tf.range(0, input_tensor.shape[0], dtype=idx.dtype) * input_tensor.shape[1] + idx
     offset_tensor = tf.gather(tf.reshape(input_tensor, [-1]),  # flatten input
                               idx_flattened)  # use flattened indices
     return offset_tensor

diff --git a/stable_baselines/acer/acer_simple.py b/stable_baselines/acer/acer_simple.py
@@ -638,7 +638,7 @@ def run(self):
         """
         Run a step leaning of the model
 
-        :return: ([float], [float], [float], [float], [float], [bool], [float])
+        :return: ([float], [float], [int64], [float], [float], [bool], [float])
                  encoded observation, observations, actions, rewards, mus, dones, masks
         """
         enc_obs = [self.obs]
@@ -666,7 +666,7 @@ def run(self):
 
         enc_obs = np.asarray(enc_obs, dtype=self.obs_dtype).swapaxes(1, 0)
         mb_obs = np.asarray(mb_obs, dtype=self.obs_dtype).swapaxes(1, 0)
-        mb_actions = np.asarray(mb_actions, dtype=np.int32).swapaxes(1, 0)
+        mb_actions = np.asarray(mb_actions, dtype=np.int64).swapaxes(1, 0)
         mb_rewards = np.asarray(mb_rewards, dtype=np.float32).swapaxes(1, 0)
         mb_mus = np.asarray(mb_mus, dtype=np.float32).swapaxes(1, 0)
         mb_dones = np.asarray(mb_dones, dtype=np.bool).swapaxes(1, 0)

diff --git a/stable_baselines/common/distributions.py b/stable_baselines/common/distributions.py
@@ -178,7 +178,7 @@ def sample_shape(self):
         return []
 
     def sample_dtype(self):
-        return tf.int32
+        return tf.int64
 
 
 class MultiCategoricalProbabilityDistributionType(ProbabilityDistributionType):
@@ -211,7 +211,7 @@ def sample_shape(self):
         return [len(self.n_vec)]
 
     def sample_dtype(self):
-        return tf.int32
+        return tf.int64
 
 
 class DiagGaussianProbabilityDistributionType(ProbabilityDistributionType):
@@ -353,7 +353,7 @@ def flatparam(self):
         return self.flat
 
     def mode(self):
-        return tf.cast(tf.stack([p.mode() for p in self.categoricals], axis=-1), tf.int32)
+        return tf.stack([p.mode() for p in self.categoricals], axis=-1)
 
     def neglogp(self, x):
         return tf.add_n([p.neglogp(px) for p, px in zip(self.categoricals, tf.unstack(x, axis=-1))])
@@ -365,7 +365,7 @@ def entropy(self):
         return tf.add_n([p.entropy() for p in self.categoricals])
 
     def sample(self):
-        return tf.cast(tf.stack([p.sample() for p in self.categoricals], axis=-1), tf.int32)
+        return tf.stack([p.sample() for p in self.categoricals], axis=-1)
 
     @classmethod
     def fromflat(cls, flat):