From a1b9c039925be70ba2565907c26ba7ad10c53dd4 Mon Sep 17 00:00:00 2001 From: "Steven H. Wang" Date: Fri, 17 Jan 2020 17:32:52 -0800 Subject: [PATCH 1/9] make_vec_env: Resolve EnvSpec in main process --- src/imitation/util/util.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py index fa19bafdd..2207504c9 100644 --- a/src/imitation/util/util.py +++ b/src/imitation/util/util.py @@ -49,8 +49,20 @@ def make_vec_env(env_name: str, max_episode_steps: If specified, wraps VecEnv in TimeLimit wrapper with this episode length before returning. """ + # Resolve the spec outside of the subprocess first, so that it is available to + # subprocesses via automatic pickling. + spec = gym.spec(env_name) + def make_env(i, this_seed): - env = gym.make(env_name) + # Previously, we directly called `gym.make(env_name)`. + # + # That direct approach was problematic (especially in combination with Ray) + # because the forkserver from which subprocesses are forked might have been + # spawned before `env_name` was registered in the main process, + # causing `env_name` to never exist in the Gym registry of the forked + # subprocess that is running `make_env(env_name)`. + env = spec.make() + # Seed each environment with a different, non-sequential seed for diversity # (even if caller is passing us sequentially-assigned base seeds). int() is # necessary to work around gym bug where it chokes on numpy int64s. From 0576bc565ba8bf3daa4be44fec11fe4650fa3df4 Mon Sep 17 00:00:00 2001 From: "Steven H. Wang" Date: Fri, 17 Jan 2020 17:56:00 -0800 Subject: [PATCH 2/9] test_scripts: Test Ray+custom_ant+SubprocVecEnv --- tests/test_scripts.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index d86347bab..e3b625470 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -144,9 +144,9 @@ def test_transfer_learning(tmpdir): dict( sacred_ex_name="expert_demos", base_named_configs=["cartpole", "fast"], + n_seeds=2, search_space={ "config_updates": { - "seed": tune.grid_search([0, 1]), "init_rl_kwargs": { "learning_rate": tune.grid_search([3e-4, 1e-4]), }, @@ -169,9 +169,18 @@ def test_transfer_learning(tmpdir): }, }}, ), + # Test that custom environments are passed to SubprocVecEnv in Ray workers. + dict( + sacred_ex_name="train_adversarial", + base_named_configs=["custom_ant", "fast"], + base_config_updates=dict( + init_trainer_kwargs=dict( + parallel=True, + num_vec=2, + )), + ), ] - PARALLEL_CONFIG_LOW_RESOURCE = { # CI server only has 2 cores. "init_kwargs": {"num_cpus": 2}, From f527f0f4dbd16e5b76bad8cc8bc046e86100583b Mon Sep 17 00:00:00 2001 From: "Steven H. Wang" Date: Fri, 17 Jan 2020 20:39:16 -0800 Subject: [PATCH 3/9] make_vec_env: Apply spec.max_episode_steps --- src/imitation/util/util.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py index 2207504c9..852d236a7 100644 --- a/src/imitation/util/util.py +++ b/src/imitation/util/util.py @@ -70,6 +70,8 @@ def make_env(i, this_seed): if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps) + elif (spec.max_episode_steps is not None) and not spec.tags.get('vnc'): + env = TimeLimit(env, max_episode_steps=spec.max_episode_steps) # Use Monitor to record statistics needed for Baselines algorithms logging # Optionally, save to disk From a442b00d6f7dc9a52200e50bd51fb1fa3ad3596e Mon Sep 17 00:00:00 2001 From: "Steven H. Wang" Date: Tue, 21 Jan 2020 14:46:10 -0800 Subject: [PATCH 4/9] test_scripts: Auto-generate ant rollouts --- tests/test_scripts.py | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index e3b625470..339fb7af3 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -169,16 +169,6 @@ def test_transfer_learning(tmpdir): }, }}, ), - # Test that custom environments are passed to SubprocVecEnv in Ray workers. - dict( - sacred_ex_name="train_adversarial", - base_named_configs=["custom_ant", "fast"], - base_config_updates=dict( - init_trainer_kwargs=dict( - parallel=True, - num_vec=2, - )), - ), ] PARALLEL_CONFIG_LOW_RESOURCE = { @@ -203,6 +193,39 @@ def test_parallel(config_updates): assert run.status == 'COMPLETED' +def _generate_custom_data(tmpdir: str, env_named_config: str) -> str: + expert_demos_ex.run( + named_configs=[env_named_config, "fast"], + config_updates=dict( + rollout_save_interval=0, + log_dir=tmpdir, + )) + rollout_path = osp.abspath(f"{tmpdir}/rollouts/final.pkl") + return rollout_path + + +def test_parallel_train_adversarial_custom_env(tmpdir): + env_named_config = "custom_ant" + rollout_path = _generate_custom_data(tmpdir, env_named_config) + + config_updates = dict( + sacred_ex_name="train_adversarial", + n_seeds=1, + base_named_configs=[env_named_config, "fast"], + base_config_updates=dict( + init_trainer_kwargs=dict( + parallel=True, + num_vec=2, + ), + rollout_path=rollout_path, + ), + ) + config_updates.update(PARALLEL_CONFIG_LOW_RESOURCE) + run = parallel_ex.run(named_configs=["debug_log_root"], + config_updates=config_updates) + assert run.status == 'COMPLETED' + + @pytest.mark.parametrize("run_names", ([], list("adab"))) def test_analyze_imitation(tmpdir: str, run_names: List[str]): sacred_logs_dir = tmpdir From 560c06823ac9fd0a3a47d1f98f051ea1c8f7c9ca Mon Sep 17 00:00:00 2001 From: "Steven H. Wang" Date: Tue, 21 Jan 2020 15:13:44 -0800 Subject: [PATCH 5/9] make_vec_env: Don't attempt to explain bug --- src/imitation/util/util.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py index 852d236a7..42f5d1bdc 100644 --- a/src/imitation/util/util.py +++ b/src/imitation/util/util.py @@ -50,17 +50,18 @@ def make_vec_env(env_name: str, this episode length before returning. """ # Resolve the spec outside of the subprocess first, so that it is available to - # subprocesses via automatic pickling. + # subprocesses running `make_env` via automatic pickling. spec = gym.spec(env_name) def make_env(i, this_seed): - # Previously, we directly called `gym.make(env_name)`. - # - # That direct approach was problematic (especially in combination with Ray) - # because the forkserver from which subprocesses are forked might have been - # spawned before `env_name` was registered in the main process, - # causing `env_name` to never exist in the Gym registry of the forked - # subprocess that is running `make_env(env_name)`. + # Previously, we directly called `gym.make(env_name)`, but running + # `imitation.scripts.train_adversarial` within `imitation.scripts.parallel` + # created a weird interaction between Gym and Ray -- `gym.make` would fail + # inside this function for any of our custom environment unless those + # environments were also `gym.register()`ed inside `make_env`. Even + # registering the custom environment in the scope of `make_vec_env` didn't + # work. For more discussion and hypotheses on this issue see PR #160: + # https://github.com/HumanCompatibleAI/imitation/pull/160. env = spec.make() # Seed each environment with a different, non-sequential seed for diversity From 3f8d29f93a447a3f2288e9de7f7852eabdce7375 Mon Sep 17 00:00:00 2001 From: "Steven H. Wang" Date: Tue, 21 Jan 2020 15:14:23 -0800 Subject: [PATCH 6/9] test_scripts: Rename helper function --- tests/test_scripts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 339fb7af3..3c835f0a7 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -193,7 +193,7 @@ def test_parallel(config_updates): assert run.status == 'COMPLETED' -def _generate_custom_data(tmpdir: str, env_named_config: str) -> str: +def _generate_test_rollouts(tmpdir: str, env_named_config: str) -> str: expert_demos_ex.run( named_configs=[env_named_config, "fast"], config_updates=dict( @@ -206,7 +206,7 @@ def _generate_custom_data(tmpdir: str, env_named_config: str) -> str: def test_parallel_train_adversarial_custom_env(tmpdir): env_named_config = "custom_ant" - rollout_path = _generate_custom_data(tmpdir, env_named_config) + rollout_path = _generate_test_rollouts(tmpdir, env_named_config) config_updates = dict( sacred_ex_name="train_adversarial", From e7fc11db74abbc7bfafda14590a642d6b3d8840a Mon Sep 17 00:00:00 2001 From: "Steven H. Wang" Date: Tue, 21 Jan 2020 18:43:52 -0800 Subject: [PATCH 7/9] Update src/imitation/util/util.py Co-Authored-By: Adam Gleave --- src/imitation/util/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py index 42f5d1bdc..e47637337 100644 --- a/src/imitation/util/util.py +++ b/src/imitation/util/util.py @@ -71,7 +71,7 @@ def make_env(i, this_seed): if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps) - elif (spec.max_episode_steps is not None) and not spec.tags.get('vnc'): + elif spec.max_episode_steps is not None: env = TimeLimit(env, max_episode_steps=spec.max_episode_steps) # Use Monitor to record statistics needed for Baselines algorithms logging From 365c1772e790e1956cf6d08c765ccda6a2a3d1cd Mon Sep 17 00:00:00 2001 From: "Steven H. Wang" Date: Tue, 21 Jan 2020 18:52:55 -0800 Subject: [PATCH 8/9] make_vec_env: Expand docstring --- src/imitation/util/util.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py index e47637337..54230c7cc 100644 --- a/src/imitation/util/util.py +++ b/src/imitation/util/util.py @@ -46,8 +46,12 @@ def make_vec_env(env_name: str, seed: The environment seed. parallel: If True, uses SubprocVecEnv; otherwise, DummyVecEnv. log_dir: If specified, saves Monitor output to this directory. - max_episode_steps: If specified, wraps VecEnv in TimeLimit wrapper with - this episode length before returning. + max_episode_steps: If specified, wraps each env in a TimeLimit wrapper + with this episode length. If not specified and `max_episode_steps` + exists for this VecEnv in the Gym registry, uses the registry value + for every TimeLimit wrapper (this is the default behavior when calling + `gym.make`). Otherwise the environments are passed into the VecEnv + unwrapped. """ # Resolve the spec outside of the subprocess first, so that it is available to # subprocesses running `make_env` via automatic pickling. From 902fe96a6e370aa52c720d786dea3fbd43ca7989 Mon Sep 17 00:00:00 2001 From: "Steven H. Wang" Date: Tue, 21 Jan 2020 18:58:40 -0800 Subject: [PATCH 9/9] make_vec_env: Fix docstring --- src/imitation/util/util.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py index 51c0216cd..00926a20e 100644 --- a/src/imitation/util/util.py +++ b/src/imitation/util/util.py @@ -47,10 +47,10 @@ def make_vec_env(env_name: str, log_dir: If specified, saves Monitor output to this directory. max_episode_steps: If specified, wraps each env in a TimeLimit wrapper with this episode length. If not specified and `max_episode_steps` - exists for this VecEnv in the Gym registry, uses the registry value - for every TimeLimit wrapper (this is the default behavior when calling - `gym.make`). Otherwise the environments are passed into the VecEnv - unwrapped. + exists for this `env_name` in the Gym registry, uses the registry + `max_episode_steps` for every TimeLimit wrapper (this automatic + wrapper is the default behavior when calling `gym.make`). Otherwise + the environments are passed into the VecEnv unwrapped. """ # Resolve the spec outside of the subprocess first, so that it is available to # subprocesses running `make_env` via automatic pickling.