-
Notifications
You must be signed in to change notification settings - Fork 60
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
hotfix broken import stable_baselines (#439)
* test_no_mpi: Use rreload to catch ImportError `reload` and `import` are not sufficient. * Move ddpg.noise to common.noise * rreload fix * Replace ddpg.noise references with common.noise * Update changelog * Revert "Update changelog" This reverts commit 2316ea224e6a7ddb5b183259ef432f4190d00290. * Revert "Replace ddpg.noise references with common.noise" This reverts commit 2a6987fb2aa0d5b6a461751f80128a4fd542fe81. * Simpler method to re-import stable baselines * Linting * Add sb.ddpg.noise.py stub * Try disable pylint unused import * fix typo * Another typo * trigger ci (Travis CI was down) * Restore old sys.modules to try and fix isinstance error * Linting * update changelog
- Loading branch information
Showing
6 changed files
with
133 additions
and
112 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import numpy as np | ||
|
||
|
||
class AdaptiveParamNoiseSpec(object): | ||
""" | ||
Implements adaptive parameter noise | ||
:param initial_stddev: (float) the initial value for the standard deviation of the noise | ||
:param desired_action_stddev: (float) the desired value for the standard deviation of the noise | ||
:param adoption_coefficient: (float) the update coefficient for the standard deviation of the noise | ||
""" | ||
def __init__(self, initial_stddev=0.1, desired_action_stddev=0.1, adoption_coefficient=1.01): | ||
self.initial_stddev = initial_stddev | ||
self.desired_action_stddev = desired_action_stddev | ||
self.adoption_coefficient = adoption_coefficient | ||
|
||
self.current_stddev = initial_stddev | ||
|
||
def adapt(self, distance): | ||
""" | ||
update the standard deviation for the parameter noise | ||
:param distance: (float) the noise distance applied to the parameters | ||
""" | ||
if distance > self.desired_action_stddev: | ||
# Decrease stddev. | ||
self.current_stddev /= self.adoption_coefficient | ||
else: | ||
# Increase stddev. | ||
self.current_stddev *= self.adoption_coefficient | ||
|
||
def get_stats(self): | ||
""" | ||
return the standard deviation for the parameter noise | ||
:return: (dict) the stats of the noise | ||
""" | ||
return {'param_noise_stddev': self.current_stddev} | ||
|
||
def __repr__(self): | ||
fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adoption_coefficient={})' | ||
return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adoption_coefficient) | ||
|
||
|
||
class ActionNoise(object): | ||
""" | ||
The action noise base class | ||
""" | ||
def reset(self): | ||
""" | ||
call end of episode reset for the noise | ||
""" | ||
pass | ||
|
||
|
||
class NormalActionNoise(ActionNoise): | ||
""" | ||
A gaussian action noise | ||
:param mean: (float) the mean value of the noise | ||
:param sigma: (float) the scale of the noise (std here) | ||
""" | ||
def __init__(self, mean, sigma): | ||
self._mu = mean | ||
self._sigma = sigma | ||
|
||
def __call__(self): | ||
return np.random.normal(self._mu, self._sigma) | ||
|
||
def __repr__(self): | ||
return 'NormalActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma) | ||
|
||
|
||
class OrnsteinUhlenbeckActionNoise(ActionNoise): | ||
""" | ||
A Ornstein Uhlenbeck action noise, this is designed to aproximate brownian motion with friction. | ||
Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab | ||
:param mean: (float) the mean of the noise | ||
:param sigma: (float) the scale of the noise | ||
:param theta: (float) the rate of mean reversion | ||
:param dt: (float) the timestep for the noise | ||
:param initial_noise: ([float]) the initial value for the noise output, (if None: 0) | ||
""" | ||
|
||
def __init__(self, mean, sigma, theta=.15, dt=1e-2, initial_noise=None): | ||
self._theta = theta | ||
self._mu = mean | ||
self._sigma = sigma | ||
self._dt = dt | ||
self.initial_noise = initial_noise | ||
self.noise_prev = None | ||
self.reset() | ||
|
||
def __call__(self): | ||
noise = self.noise_prev + self._theta * (self._mu - self.noise_prev) * self._dt + \ | ||
self._sigma * np.sqrt(self._dt) * np.random.normal(size=self._mu.shape) | ||
self.noise_prev = noise | ||
return noise | ||
|
||
def reset(self): | ||
""" | ||
reset the Ornstein Uhlenbeck noise, to the initial position | ||
""" | ||
self.noise_prev = self.initial_noise if self.initial_noise is not None else np.zeros_like(self._mu) | ||
|
||
def __repr__(self): | ||
return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
from stable_baselines.common.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise | ||
from stable_baselines.ddpg.ddpg import DDPG | ||
from stable_baselines.ddpg.policies import MlpPolicy, CnnPolicy, LnMlpPolicy, LnCnnPolicy | ||
from stable_baselines.ddpg.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,109 +1 @@ | ||
import numpy as np | ||
|
||
|
||
class AdaptiveParamNoiseSpec(object): | ||
""" | ||
Implements adaptive parameter noise | ||
:param initial_stddev: (float) the initial value for the standard deviation of the noise | ||
:param desired_action_stddev: (float) the desired value for the standard deviation of the noise | ||
:param adoption_coefficient: (float) the update coefficient for the standard deviation of the noise | ||
""" | ||
def __init__(self, initial_stddev=0.1, desired_action_stddev=0.1, adoption_coefficient=1.01): | ||
self.initial_stddev = initial_stddev | ||
self.desired_action_stddev = desired_action_stddev | ||
self.adoption_coefficient = adoption_coefficient | ||
|
||
self.current_stddev = initial_stddev | ||
|
||
def adapt(self, distance): | ||
""" | ||
update the standard deviation for the parameter noise | ||
:param distance: (float) the noise distance applied to the parameters | ||
""" | ||
if distance > self.desired_action_stddev: | ||
# Decrease stddev. | ||
self.current_stddev /= self.adoption_coefficient | ||
else: | ||
# Increase stddev. | ||
self.current_stddev *= self.adoption_coefficient | ||
|
||
def get_stats(self): | ||
""" | ||
return the standard deviation for the parameter noise | ||
:return: (dict) the stats of the noise | ||
""" | ||
return {'param_noise_stddev': self.current_stddev} | ||
|
||
def __repr__(self): | ||
fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adoption_coefficient={})' | ||
return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adoption_coefficient) | ||
|
||
|
||
class ActionNoise(object): | ||
""" | ||
The action noise base class | ||
""" | ||
def reset(self): | ||
""" | ||
call end of episode reset for the noise | ||
""" | ||
pass | ||
|
||
|
||
class NormalActionNoise(ActionNoise): | ||
""" | ||
A gaussian action noise | ||
:param mean: (float) the mean value of the noise | ||
:param sigma: (float) the scale of the noise (std here) | ||
""" | ||
def __init__(self, mean, sigma): | ||
self._mu = mean | ||
self._sigma = sigma | ||
|
||
def __call__(self): | ||
return np.random.normal(self._mu, self._sigma) | ||
|
||
def __repr__(self): | ||
return 'NormalActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma) | ||
|
||
|
||
class OrnsteinUhlenbeckActionNoise(ActionNoise): | ||
""" | ||
A Ornstein Uhlenbeck action noise, this is designed to aproximate brownian motion with friction. | ||
Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab | ||
:param mean: (float) the mean of the noise | ||
:param sigma: (float) the scale of the noise | ||
:param theta: (float) the rate of mean reversion | ||
:param dt: (float) the timestep for the noise | ||
:param initial_noise: ([float]) the initial value for the noise output, (if None: 0) | ||
""" | ||
|
||
def __init__(self, mean, sigma, theta=.15, dt=1e-2, initial_noise=None): | ||
self._theta = theta | ||
self._mu = mean | ||
self._sigma = sigma | ||
self._dt = dt | ||
self.initial_noise = initial_noise | ||
self.noise_prev = None | ||
self.reset() | ||
|
||
def __call__(self): | ||
noise = self.noise_prev + self._theta * (self._mu - self.noise_prev) * self._dt + \ | ||
self._sigma * np.sqrt(self._dt) * np.random.normal(size=self._mu.shape) | ||
self.noise_prev = noise | ||
return noise | ||
|
||
def reset(self): | ||
""" | ||
reset the Ornstein Uhlenbeck noise, to the initial position | ||
""" | ||
self.noise_prev = self.initial_noise if self.initial_noise is not None else np.zeros_like(self._mu) | ||
|
||
def __repr__(self): | ||
return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma) | ||
from stable_baselines.common.noise import NormalActionNoise, AdaptiveParamNoiseSpec, OrnsteinUhlenbeckActionNoise # pylint: disable=unused-import |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
from stable_baselines.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise | ||
from stable_baselines.td3.td3 import TD3 | ||
from stable_baselines.td3.policies import MlpPolicy, CnnPolicy, LnMlpPolicy, LnCnnPolicy | ||
from stable_baselines.ddpg.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,21 @@ | ||
import sys | ||
|
||
from .test_common import _maybe_disable_mpi | ||
|
||
|
||
def test_no_mpi_no_crash(): | ||
with _maybe_disable_mpi(True): | ||
# Temporarily delete previously imported stable baselines | ||
old_modules = {} | ||
sb_modules = [name for name in sys.modules.keys() | ||
if name.startswith('stable_baselines')] | ||
for name in sb_modules: | ||
old_modules[name] = sys.modules.pop(name) | ||
|
||
# Re-import (with mpi disabled) | ||
import stable_baselines | ||
del stable_baselines # keep Codacy happy | ||
del stable_baselines # appease Codacy | ||
|
||
# Restore old version of stable baselines (with MPI imported) | ||
for name, mod in old_modules.items(): | ||
sys.modules[name] = mod |