Skip to content

Commit

Permalink
Merge branch 'release/0.8.6'
Browse files Browse the repository at this point in the history
  • Loading branch information
0xangelo committed Jul 4, 2020
2 parents ec2d4b8 + dd0b9c9 commit 2ced9b3
Show file tree
Hide file tree
Showing 224 changed files with 2,990 additions and 618 deletions.
17 changes: 13 additions & 4 deletions examples/MAPO/swingup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,23 @@ def get_config():
"env_config": {"max_episode_steps": 500, "time_aware": False},
# === MAPOTorchPolicy ===
"module": {
"type": "ModelBasedSAC",
"type": "MBSAC",
"model": {
"ensemble_size": 1,
"residual": True,
"input_dependent_scale": True,
"network": {"units": (128, 128), "activation": "Swish"},
},
"actor": {
"encoder": {"units": (128, 128), "activation": "Swish"},
"input_dependent_scale": True,
"initial_entropy_coeff": 0.05,
},
"critic": {
"encoder": {"units": (128, 128), "activation": "Swish"},
"double_q": True,
},
"actor": {"encoder": {"units": (128, 128), "activation": "Swish"}},
"critic": {"encoder": {"units": (128, 128), "activation": "Swish"}},
"entropy": {"initial_alpha": 0.05},
"initializer": {"name": "xavier_uniform"},
},
"losses": {
# Gradient estimator for optimizing expectations. Possible types include
Expand Down
15 changes: 11 additions & 4 deletions examples/MBPO/swingup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,24 @@ def get_config():
"env_config": {"max_episode_steps": 500, "time_aware": False},
# === MBPOTorchPolicy ===
"module": {
"type": "ModelBasedSAC",
"type": "MBSAC",
"model": {
"encoder": {"units": (128, 128), "activation": "Swish"},
"ensemble_size": 7,
"parallelize": True,
"residual": True,
"input_dependent_scale": True,
"network": {"units": (128, 128), "activation": "Swish"},
},
"actor": {
"encoder": {"units": (128, 128), "activation": "Swish"},
"input_dependent_scale": True,
"initial_entropy_coeff": 0.05,
},
"critic": {
"encoder": {"units": (128, 128), "activation": "Swish"},
"double_q": True,
},
"critic": {"encoder": {"units": (128, 128), "activation": "Swish"}},
"entropy": {"initial_alpha": 0.05},
"initializer": {"name": "xavier_uniform"},
},
"torch_optimizer": {
"models": {"type": "Adam", "lr": 3e-4, "weight_decay": 0.0001},
Expand Down Expand Up @@ -63,6 +69,7 @@ def get_config():
"evaluation_num_episodes": 10,
"timesteps_per_iteration": 1000,
"num_cpus_for_driver": 4,
"compile_policy": True,
# === RolloutWorker ===
"rollout_fragment_length": 25,
"batch_mode": "truncate_episodes",
Expand Down
6 changes: 3 additions & 3 deletions examples/SOP/cheetah_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,23 @@ def get_config():
# for the policy and action-value function. No layers means the component is
# linear in states and/or actions.
"module": {
"type": "DDPGModule",
"type": "DDPG",
"initializer": {"name": "orthogonal"},
"actor": {
"parameter_noise": True,
"smooth_target_policy": True,
"target_gaussian_sigma": 0.3,
"beta": 1.2,
"encoder": {
"units": (256, 256),
"activation": "ELU",
"initializer_options": {"name": "Orthogonal"},
"layer_norm": True,
},
},
"critic": {
"encoder": {
"units": (256, 256),
"activation": "ELU",
"initializer_options": {"name": "Orthogonal"},
"delay_action": True,
},
},
Expand Down
6 changes: 3 additions & 3 deletions examples/SOP/hopper_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ def get_config():
# for the policy and action-value function. No layers means the component is
# linear in states and/or actions.
"module": {
"type": "DDPGModule",
"type": "DDPG",
"initializer": {"name": "orthogonal"},
"actor": {
"parameter_noise": True,
"smooth_target_policy": True,
"target_gaussian_sigma": 0.3,
"beta": 1.2,
"encoder": {
"units": (256, 256),
"activation": "ReLU",
"initializer_options": {"name": "xavier_uniform"},
"layer_norm": False,
},
},
Expand All @@ -44,7 +45,6 @@ def get_config():
"encoder": {
"units": (256, 256),
"activation": "ReLU",
"initializer_options": {"name": "xavier_uniform"},
"delay_action": True,
},
},
Expand Down
6 changes: 3 additions & 3 deletions examples/SOP/ib_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,16 @@ def get_config():
# for the policy and action-value function. No layers means the component is
# linear in states and/or actions.
"module": {
"type": "DDPGModule",
"type": "DDPG",
"initializer": {"name": "xavier_uniform"},
"actor": {
"parameter_noise": True,
"smooth_target_policy": True,
"target_gaussian_sigma": 0.3,
"beta": 1.2,
"encoder": {
"units": (256, 256),
"activation": "ReLU",
"initializer_options": {"name": "xavier_uniform"},
"layer_norm": False,
},
},
Expand All @@ -47,7 +48,6 @@ def get_config():
"encoder": {
"units": (256, 256),
"activation": "ReLU",
"initializer_options": {"name": "xavier_uniform"},
"delay_action": True,
},
},
Expand Down
3 changes: 2 additions & 1 deletion examples/SOP/swingup_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ def get_config():
"polyak": 0.995,
# === Network ===
"module": {
"type": "DDPGModule",
"type": "DDPG",
"actor": {
"parameter_noise": True,
"smooth_target_policy": True,
"target_gaussian_sigma": 0.3,
"beta": 1.2,
Expand Down
6 changes: 3 additions & 3 deletions examples/SOP/walker_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,16 @@ def get_config():
# for the policy and action-value function. No layers means the component is
# linear in states and/or actions.
"module": {
"type": "DDPGModule",
"type": "DDPG",
"initializer": {"name": "xavier_uniform"},
"actor": {
"parameter_noise": True,
"smooth_target_policy": True,
"target_gaussian_sigma": 0.2,
"beta": 1.2,
"encoder": {
"units": (256, 256),
"activation": "ReLU",
"initializer_options": {"name": "xavier_uniform"},
"layer_norm": False,
},
},
Expand All @@ -59,7 +60,6 @@ def get_config():
"encoder": {
"units": (256, 256),
"activation": "ReLU",
"initializer_options": {"name": "xavier_uniform"},
"delay_action": True,
},
},
Expand Down
34 changes: 17 additions & 17 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "raylab"
version = "0.8.5"
version = "0.9.0"
description = "Reinforcement learning algorithms in RLlib and PyTorch."
authors = ["Ângelo Gregório Lovatto <angelolovatto@gmail.com>"]
license = "MIT"
Expand All @@ -27,17 +27,17 @@ opencv-python = "^4.2.0"
[tool.poetry.dev-dependencies]
flake8 = "^3.8.3"
pylint = "^2.5.3"
watchdog = "^0.10.2"
watchdog = "^0.10.3"
black = "^19.10b0"
tox = "^3.15.2"
tox = "^3.16.1"
sphinx = "^3.1.1"
pytest = "^5.4.3"
gym-cartpole-swingup = "^0.1.0"
pre-commit = "^2.5.1"
pre-commit = "^2.6.0"
reorder-python-imports = "^2.3.1"
mypy = "^0.782"
coverage = "^5.1"
ipython = "^7.15.0"
ipython = "^7.16.1"
poetry-version = "^0.1.5"
pytest-mock = "^3.1.1"
pytest-sugar = "^0.9.3"
Expand Down
2 changes: 2 additions & 0 deletions raylab/agents/acktr/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import raylab.utils.dictionaries as dutil
from raylab.policy import TorchPolicy
from raylab.policy.action_dist import WrapStochasticPolicy
from raylab.pytorch.nn.distributions import Normal
from raylab.pytorch.optim import build_optimizer
from raylab.pytorch.optim.hessian_free import line_search
Expand Down Expand Up @@ -51,6 +52,7 @@ class ACKTRTorchPolicy(TorchPolicy):
"""Policy class for Actor-Critic with Kronecker factored Trust Region."""

# pylint:disable=abstract-method
dist_class = WrapStochasticPolicy

@staticmethod
@override(TorchPolicy)
Expand Down
9 changes: 6 additions & 3 deletions raylab/agents/mage/policy.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""Policy for MAGE using PyTorch."""
from raylab.agents.sop import SOPTorchPolicy
from raylab.losses import MAGE
from raylab.losses import ModelEnsembleMLE
from raylab.losses.mage import MAGEModules
from raylab.policy import EnvFnMixin
from raylab.policy import ModelTrainingMixin
from raylab.policy.action_dist import WrapDeterministicPolicy
from raylab.policy.losses import MAGE
from raylab.policy.losses import ModelEnsembleMLE
from raylab.policy.losses.mage import MAGEModules
from raylab.pytorch.optim import build_optimizer


Expand All @@ -18,6 +19,7 @@ class MAGETorchPolicy(ModelTrainingMixin, EnvFnMixin, SOPTorchPolicy):
"""

# pylint: disable=abstract-method
dist_class = WrapDeterministicPolicy

def __init__(self, observation_space, action_space, config):
super().__init__(observation_space, action_space, config)
Expand All @@ -37,6 +39,7 @@ def __init__(self, observation_space, action_space, config):

@staticmethod
def get_default_config():
# pylint:disable=cyclic-import
from raylab.agents.mage import DEFAULT_CONFIG

return DEFAULT_CONFIG
Expand Down
Loading

0 comments on commit 2ced9b3

Please sign in to comment.