hotfix broken import stable_baselines (#439)

* test_no_mpi: Use rreload to catch ImportError `reload` and `import` are not sufficient. * Move ddpg.noise to common.noise * rreload fix * Replace ddpg.noise references with common.noise * Update changelog * Revert "Update changelog" This reverts commit 2316ea224e6a7ddb5b183259ef432f4190d00290. * Revert "Replace ddpg.noise references with common.noise" This reverts commit 2a6987fb2aa0d5b6a461751f80128a4fd542fe81. * Simpler method to re-import stable baselines * Linting * Add sb.ddpg.noise.py stub * Try disable pylint unused import * fix typo * Another typo * trigger ci (Travis CI was down) * Restore old sys.modules to try and fix isinstance error * Linting * update changelog
Stable-Baselines-Team · Aug 13, 2019 · 7048a63 · 7048a63
1 parent 63f208e
commit 7048a63
Show file tree

Hide file tree

Showing 6 changed files with 133 additions and 112 deletions.
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -32,6 +32,11 @@ Deprecations:
 
 Others:
 ^^^^^^^
+- Implementations of noise classes (`AdaptiveParamNoiseSpec`, `NormalActionNoise`,
+  `OrnsteinUhlenbeckActionNoise`) were moved from `stable_baselines.ddpg.noise`
+  to `stable_baselines.common.noise`. The API remains backward-compatible;
+  for example `from stable_baselines.ddpg.noise import NormalActionNoise` is still
+  okay. (@shwang)
 
 Documentation:
 ^^^^^^^^^^^^^^

diff --git a/stable_baselines/common/noise.py b/stable_baselines/common/noise.py
@@ -0,0 +1,109 @@
+import numpy as np
+
+
+class AdaptiveParamNoiseSpec(object):
+    """
+    Implements adaptive parameter noise
+
+    :param initial_stddev: (float) the initial value for the standard deviation of the noise
+    :param desired_action_stddev: (float) the desired value for the standard deviation of the noise
+    :param adoption_coefficient: (float) the update coefficient for the standard deviation of the noise
+    """
+    def __init__(self, initial_stddev=0.1, desired_action_stddev=0.1, adoption_coefficient=1.01):
+        self.initial_stddev = initial_stddev
+        self.desired_action_stddev = desired_action_stddev
+        self.adoption_coefficient = adoption_coefficient
+
+        self.current_stddev = initial_stddev
+
+    def adapt(self, distance):
+        """
+        update the standard deviation for the parameter noise
+
+        :param distance: (float) the noise distance applied to the parameters
+        """
+        if distance > self.desired_action_stddev:
+            # Decrease stddev.
+            self.current_stddev /= self.adoption_coefficient
+        else:
+            # Increase stddev.
+            self.current_stddev *= self.adoption_coefficient
+
+    def get_stats(self):
+        """
+        return the standard deviation for the parameter noise
+
+        :return: (dict) the stats of the noise
+        """
+        return {'param_noise_stddev': self.current_stddev}
+
+    def __repr__(self):
+        fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adoption_coefficient={})'
+        return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adoption_coefficient)
+
+
+class ActionNoise(object):
+    """
+    The action noise base class
+    """
+    def reset(self):
+        """
+        call end of episode reset for the noise
+        """
+        pass
+
+
+class NormalActionNoise(ActionNoise):
+    """
+    A gaussian action noise
+
+    :param mean: (float) the mean value of the noise
+    :param sigma: (float) the scale of the noise (std here)
+    """
+    def __init__(self, mean, sigma):
+        self._mu = mean
+        self._sigma = sigma
+
+    def __call__(self):
+        return np.random.normal(self._mu, self._sigma)
+
+    def __repr__(self):
+        return 'NormalActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)
+
+
+class OrnsteinUhlenbeckActionNoise(ActionNoise):
+    """
+    A Ornstein Uhlenbeck action noise, this is designed to aproximate brownian motion with friction.
+
+    Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
+
+    :param mean: (float) the mean of the noise
+    :param sigma: (float) the scale of the noise
+    :param theta: (float) the rate of mean reversion
+    :param dt: (float) the timestep for the noise
+    :param initial_noise: ([float]) the initial value for the noise output, (if None: 0)
+    """
+
+    def __init__(self, mean, sigma, theta=.15, dt=1e-2, initial_noise=None):
+        self._theta = theta
+        self._mu = mean
+        self._sigma = sigma
+        self._dt = dt
+        self.initial_noise = initial_noise
+        self.noise_prev = None
+        self.reset()
+
+    def __call__(self):
+        noise = self.noise_prev + self._theta * (self._mu - self.noise_prev) * self._dt + \
+                self._sigma * np.sqrt(self._dt) * np.random.normal(size=self._mu.shape)
+        self.noise_prev = noise
+        return noise
+
+    def reset(self):
+        """
+        reset the Ornstein Uhlenbeck noise, to the initial position
+        """
+        self.noise_prev = self.initial_noise if self.initial_noise is not None else np.zeros_like(self._mu)
+
+    def __repr__(self):
+        return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)
diff --git a/stable_baselines/ddpg/__init__.py b/stable_baselines/ddpg/__init__.py
@@ -1,3 +1,3 @@
+from stable_baselines.common.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise
 from stable_baselines.ddpg.ddpg import DDPG
 from stable_baselines.ddpg.policies import MlpPolicy, CnnPolicy, LnMlpPolicy, LnCnnPolicy
-from stable_baselines.ddpg.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise
diff --git a/stable_baselines/ddpg/noise.py b/stable_baselines/ddpg/noise.py
@@ -1,109 +1 @@
-import numpy as np
-
-
-class AdaptiveParamNoiseSpec(object):
-    """
-    Implements adaptive parameter noise
-
-    :param initial_stddev: (float) the initial value for the standard deviation of the noise
-    :param desired_action_stddev: (float) the desired value for the standard deviation of the noise
-    :param adoption_coefficient: (float) the update coefficient for the standard deviation of the noise
-    """
-    def __init__(self, initial_stddev=0.1, desired_action_stddev=0.1, adoption_coefficient=1.01):
-        self.initial_stddev = initial_stddev
-        self.desired_action_stddev = desired_action_stddev
-        self.adoption_coefficient = adoption_coefficient
-
-        self.current_stddev = initial_stddev
-
-    def adapt(self, distance):
-        """
-        update the standard deviation for the parameter noise
-
-        :param distance: (float) the noise distance applied to the parameters
-        """
-        if distance > self.desired_action_stddev:
-            # Decrease stddev.
-            self.current_stddev /= self.adoption_coefficient
-        else:
-            # Increase stddev.
-            self.current_stddev *= self.adoption_coefficient
-
-    def get_stats(self):
-        """
-        return the standard deviation for the parameter noise
-
-        :return: (dict) the stats of the noise
-        """
-        return {'param_noise_stddev': self.current_stddev}
-
-    def __repr__(self):
-        fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adoption_coefficient={})'
-        return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adoption_coefficient)
-
-
-class ActionNoise(object):
-    """
-    The action noise base class
-    """
-    def reset(self):
-        """
-        call end of episode reset for the noise
-        """
-        pass
-
-
-class NormalActionNoise(ActionNoise):
-    """
-    A gaussian action noise
-
-    :param mean: (float) the mean value of the noise
-    :param sigma: (float) the scale of the noise (std here)
-    """
-    def __init__(self, mean, sigma):
-        self._mu = mean
-        self._sigma = sigma
-
-    def __call__(self):
-        return np.random.normal(self._mu, self._sigma)
-
-    def __repr__(self):
-        return 'NormalActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)
-
-
-class OrnsteinUhlenbeckActionNoise(ActionNoise):
-    """
-    A Ornstein Uhlenbeck action noise, this is designed to aproximate brownian motion with friction.
-
-    Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
-
-    :param mean: (float) the mean of the noise
-    :param sigma: (float) the scale of the noise
-    :param theta: (float) the rate of mean reversion
-    :param dt: (float) the timestep for the noise
-    :param initial_noise: ([float]) the initial value for the noise output, (if None: 0)
-    """
-
-    def __init__(self, mean, sigma, theta=.15, dt=1e-2, initial_noise=None):
-        self._theta = theta
-        self._mu = mean
-        self._sigma = sigma
-        self._dt = dt
-        self.initial_noise = initial_noise
-        self.noise_prev = None
-        self.reset()
-
-    def __call__(self):
-        noise = self.noise_prev + self._theta * (self._mu - self.noise_prev) * self._dt + \
-                self._sigma * np.sqrt(self._dt) * np.random.normal(size=self._mu.shape)
-        self.noise_prev = noise
-        return noise
-
-    def reset(self):
-        """
-        reset the Ornstein Uhlenbeck noise, to the initial position
-        """
-        self.noise_prev = self.initial_noise if self.initial_noise is not None else np.zeros_like(self._mu)
-
-    def __repr__(self):
-        return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)
+from stable_baselines.common.noise import NormalActionNoise, AdaptiveParamNoiseSpec, OrnsteinUhlenbeckActionNoise  # pylint: disable=unused-import
diff --git a/stable_baselines/td3/__init__.py b/stable_baselines/td3/__init__.py
@@ -1,3 +1,3 @@
+from stable_baselines.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
 from stable_baselines.td3.td3 import TD3
 from stable_baselines.td3.policies import MlpPolicy, CnnPolicy, LnMlpPolicy, LnCnnPolicy
-from stable_baselines.ddpg.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
diff --git a/tests/test_no_mpi.py b/tests/test_no_mpi.py
@@ -1,6 +1,21 @@
+import sys
+
 from .test_common import _maybe_disable_mpi
 
+
 def test_no_mpi_no_crash():
     with _maybe_disable_mpi(True):
+        # Temporarily delete previously imported stable baselines
+        old_modules = {}
+        sb_modules = [name for name in sys.modules.keys()
+                      if name.startswith('stable_baselines')]
+        for name in sb_modules:
+            old_modules[name] = sys.modules.pop(name)
+
+        # Re-import (with mpi disabled)
         import stable_baselines
-        del stable_baselines  # keep Codacy happy
+        del stable_baselines  # appease Codacy
+
+        # Restore old version of stable baselines (with MPI imported)
+        for name, mod in old_modules.items():
+            sys.modules[name] = mod