Skip to content

Commit

Permalink
hotfix broken import stable_baselines (#439)
Browse files Browse the repository at this point in the history
* test_no_mpi: Use rreload to catch ImportError

`reload` and `import` are not sufficient.

* Move ddpg.noise to common.noise

* rreload fix

* Replace ddpg.noise references with common.noise

* Update changelog

* Revert "Update changelog"

This reverts commit 2316ea224e6a7ddb5b183259ef432f4190d00290.

* Revert "Replace ddpg.noise references with common.noise"

This reverts commit 2a6987fb2aa0d5b6a461751f80128a4fd542fe81.

* Simpler method to re-import stable baselines

* Linting

* Add sb.ddpg.noise.py stub

* Try disable pylint unused import

* fix typo

* Another typo

* trigger ci

(Travis CI was down)

* Restore old sys.modules to try and fix isinstance error

* Linting

* update changelog
  • Loading branch information
shwang authored and araffin committed Aug 13, 2019
1 parent 63f208e commit 7048a63
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 112 deletions.
5 changes: 5 additions & 0 deletions docs/misc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ Deprecations:

Others:
^^^^^^^
- Implementations of noise classes (`AdaptiveParamNoiseSpec`, `NormalActionNoise`,
`OrnsteinUhlenbeckActionNoise`) were moved from `stable_baselines.ddpg.noise`
to `stable_baselines.common.noise`. The API remains backward-compatible;
for example `from stable_baselines.ddpg.noise import NormalActionNoise` is still
okay. (@shwang)

Documentation:
^^^^^^^^^^^^^^
Expand Down
109 changes: 109 additions & 0 deletions stable_baselines/common/noise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import numpy as np


class AdaptiveParamNoiseSpec(object):
"""
Implements adaptive parameter noise
:param initial_stddev: (float) the initial value for the standard deviation of the noise
:param desired_action_stddev: (float) the desired value for the standard deviation of the noise
:param adoption_coefficient: (float) the update coefficient for the standard deviation of the noise
"""
def __init__(self, initial_stddev=0.1, desired_action_stddev=0.1, adoption_coefficient=1.01):
self.initial_stddev = initial_stddev
self.desired_action_stddev = desired_action_stddev
self.adoption_coefficient = adoption_coefficient

self.current_stddev = initial_stddev

def adapt(self, distance):
"""
update the standard deviation for the parameter noise
:param distance: (float) the noise distance applied to the parameters
"""
if distance > self.desired_action_stddev:
# Decrease stddev.
self.current_stddev /= self.adoption_coefficient
else:
# Increase stddev.
self.current_stddev *= self.adoption_coefficient

def get_stats(self):
"""
return the standard deviation for the parameter noise
:return: (dict) the stats of the noise
"""
return {'param_noise_stddev': self.current_stddev}

def __repr__(self):
fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adoption_coefficient={})'
return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adoption_coefficient)


class ActionNoise(object):
"""
The action noise base class
"""
def reset(self):
"""
call end of episode reset for the noise
"""
pass


class NormalActionNoise(ActionNoise):
"""
A gaussian action noise
:param mean: (float) the mean value of the noise
:param sigma: (float) the scale of the noise (std here)
"""
def __init__(self, mean, sigma):
self._mu = mean
self._sigma = sigma

def __call__(self):
return np.random.normal(self._mu, self._sigma)

def __repr__(self):
return 'NormalActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)


class OrnsteinUhlenbeckActionNoise(ActionNoise):
"""
A Ornstein Uhlenbeck action noise, this is designed to aproximate brownian motion with friction.
Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
:param mean: (float) the mean of the noise
:param sigma: (float) the scale of the noise
:param theta: (float) the rate of mean reversion
:param dt: (float) the timestep for the noise
:param initial_noise: ([float]) the initial value for the noise output, (if None: 0)
"""

def __init__(self, mean, sigma, theta=.15, dt=1e-2, initial_noise=None):
self._theta = theta
self._mu = mean
self._sigma = sigma
self._dt = dt
self.initial_noise = initial_noise
self.noise_prev = None
self.reset()

def __call__(self):
noise = self.noise_prev + self._theta * (self._mu - self.noise_prev) * self._dt + \
self._sigma * np.sqrt(self._dt) * np.random.normal(size=self._mu.shape)
self.noise_prev = noise
return noise

def reset(self):
"""
reset the Ornstein Uhlenbeck noise, to the initial position
"""
self.noise_prev = self.initial_noise if self.initial_noise is not None else np.zeros_like(self._mu)

def __repr__(self):
return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)
2 changes: 1 addition & 1 deletion stable_baselines/ddpg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from stable_baselines.common.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise
from stable_baselines.ddpg.ddpg import DDPG
from stable_baselines.ddpg.policies import MlpPolicy, CnnPolicy, LnMlpPolicy, LnCnnPolicy
from stable_baselines.ddpg.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise
110 changes: 1 addition & 109 deletions stable_baselines/ddpg/noise.py
Original file line number Diff line number Diff line change
@@ -1,109 +1 @@
import numpy as np


class AdaptiveParamNoiseSpec(object):
"""
Implements adaptive parameter noise
:param initial_stddev: (float) the initial value for the standard deviation of the noise
:param desired_action_stddev: (float) the desired value for the standard deviation of the noise
:param adoption_coefficient: (float) the update coefficient for the standard deviation of the noise
"""
def __init__(self, initial_stddev=0.1, desired_action_stddev=0.1, adoption_coefficient=1.01):
self.initial_stddev = initial_stddev
self.desired_action_stddev = desired_action_stddev
self.adoption_coefficient = adoption_coefficient

self.current_stddev = initial_stddev

def adapt(self, distance):
"""
update the standard deviation for the parameter noise
:param distance: (float) the noise distance applied to the parameters
"""
if distance > self.desired_action_stddev:
# Decrease stddev.
self.current_stddev /= self.adoption_coefficient
else:
# Increase stddev.
self.current_stddev *= self.adoption_coefficient

def get_stats(self):
"""
return the standard deviation for the parameter noise
:return: (dict) the stats of the noise
"""
return {'param_noise_stddev': self.current_stddev}

def __repr__(self):
fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adoption_coefficient={})'
return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adoption_coefficient)


class ActionNoise(object):
"""
The action noise base class
"""
def reset(self):
"""
call end of episode reset for the noise
"""
pass


class NormalActionNoise(ActionNoise):
"""
A gaussian action noise
:param mean: (float) the mean value of the noise
:param sigma: (float) the scale of the noise (std here)
"""
def __init__(self, mean, sigma):
self._mu = mean
self._sigma = sigma

def __call__(self):
return np.random.normal(self._mu, self._sigma)

def __repr__(self):
return 'NormalActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)


class OrnsteinUhlenbeckActionNoise(ActionNoise):
"""
A Ornstein Uhlenbeck action noise, this is designed to aproximate brownian motion with friction.
Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
:param mean: (float) the mean of the noise
:param sigma: (float) the scale of the noise
:param theta: (float) the rate of mean reversion
:param dt: (float) the timestep for the noise
:param initial_noise: ([float]) the initial value for the noise output, (if None: 0)
"""

def __init__(self, mean, sigma, theta=.15, dt=1e-2, initial_noise=None):
self._theta = theta
self._mu = mean
self._sigma = sigma
self._dt = dt
self.initial_noise = initial_noise
self.noise_prev = None
self.reset()

def __call__(self):
noise = self.noise_prev + self._theta * (self._mu - self.noise_prev) * self._dt + \
self._sigma * np.sqrt(self._dt) * np.random.normal(size=self._mu.shape)
self.noise_prev = noise
return noise

def reset(self):
"""
reset the Ornstein Uhlenbeck noise, to the initial position
"""
self.noise_prev = self.initial_noise if self.initial_noise is not None else np.zeros_like(self._mu)

def __repr__(self):
return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)
from stable_baselines.common.noise import NormalActionNoise, AdaptiveParamNoiseSpec, OrnsteinUhlenbeckActionNoise # pylint: disable=unused-import
2 changes: 1 addition & 1 deletion stable_baselines/td3/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from stable_baselines.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
from stable_baselines.td3.td3 import TD3
from stable_baselines.td3.policies import MlpPolicy, CnnPolicy, LnMlpPolicy, LnCnnPolicy
from stable_baselines.ddpg.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
17 changes: 16 additions & 1 deletion tests/test_no_mpi.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
import sys

from .test_common import _maybe_disable_mpi


def test_no_mpi_no_crash():
with _maybe_disable_mpi(True):
# Temporarily delete previously imported stable baselines
old_modules = {}
sb_modules = [name for name in sys.modules.keys()
if name.startswith('stable_baselines')]
for name in sb_modules:
old_modules[name] = sys.modules.pop(name)

# Re-import (with mpi disabled)
import stable_baselines
del stable_baselines # keep Codacy happy
del stable_baselines # appease Codacy

# Restore old version of stable baselines (with MPI imported)
for name, mod in old_modules.items():
sys.modules[name] = mod

0 comments on commit 7048a63

Please sign in to comment.