From a1b9c039925be70ba2565907c26ba7ad10c53dd4 Mon Sep 17 00:00:00 2001
From: "Steven H. Wang" <wang.steven.h@gmail.com>
Date: Fri, 17 Jan 2020 17:32:52 -0800
Subject: [PATCH 1/9] make_vec_env: Resolve EnvSpec in main process

---
 src/imitation/util/util.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py
index fa19bafdd..2207504c9 100644
--- a/src/imitation/util/util.py
+++ b/src/imitation/util/util.py
@@ -49,8 +49,20 @@ def make_vec_env(env_name: str,
       max_episode_steps: If specified, wraps VecEnv in TimeLimit wrapper with
           this episode length before returning.
   """
+  # Resolve the spec outside of the subprocess first, so that it is available to
+  # subprocesses via automatic pickling.
+  spec = gym.spec(env_name)
+
   def make_env(i, this_seed):
-    env = gym.make(env_name)
+    # Previously, we directly called `gym.make(env_name)`.
+    #
+    # That direct approach was problematic (especially in combination with Ray)
+    # because the forkserver from which subprocesses are forked might have been
+    # spawned before `env_name` was registered in the main process,
+    # causing `env_name` to never exist in the Gym registry of the forked
+    # subprocess that is running `make_env(env_name)`.
+    env = spec.make()
+
     # Seed each environment with a different, non-sequential seed for diversity
     # (even if caller is passing us sequentially-assigned base seeds). int() is
     # necessary to work around gym bug where it chokes on numpy int64s.

From 0576bc565ba8bf3daa4be44fec11fe4650fa3df4 Mon Sep 17 00:00:00 2001
From: "Steven H. Wang" <wang.steven.h@gmail.com>
Date: Fri, 17 Jan 2020 17:56:00 -0800
Subject: [PATCH 2/9] test_scripts: Test Ray+custom_ant+SubprocVecEnv

---
 tests/test_scripts.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tests/test_scripts.py b/tests/test_scripts.py
index d86347bab..e3b625470 100644
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@@ -144,9 +144,9 @@ def test_transfer_learning(tmpdir):
   dict(
     sacred_ex_name="expert_demos",
     base_named_configs=["cartpole", "fast"],
+    n_seeds=2,
     search_space={
       "config_updates": {
-        "seed": tune.grid_search([0, 1]),
         "init_rl_kwargs": {
           "learning_rate": tune.grid_search([3e-4, 1e-4]),
         },
@@ -169,9 +169,18 @@ def test_transfer_learning(tmpdir):
         },
       }},
   ),
+  # Test that custom environments are passed to SubprocVecEnv in Ray workers.
+  dict(
+    sacred_ex_name="train_adversarial",
+    base_named_configs=["custom_ant", "fast"],
+    base_config_updates=dict(
+      init_trainer_kwargs=dict(
+        parallel=True,
+        num_vec=2,
+      )),
+  ),
 ]
 
-
 PARALLEL_CONFIG_LOW_RESOURCE = {
     # CI server only has 2 cores.
     "init_kwargs": {"num_cpus": 2},

From f527f0f4dbd16e5b76bad8cc8bc046e86100583b Mon Sep 17 00:00:00 2001
From: "Steven H. Wang" <wang.steven.h@gmail.com>
Date: Fri, 17 Jan 2020 20:39:16 -0800
Subject: [PATCH 3/9] make_vec_env: Apply spec.max_episode_steps

---
 src/imitation/util/util.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py
index 2207504c9..852d236a7 100644
--- a/src/imitation/util/util.py
+++ b/src/imitation/util/util.py
@@ -70,6 +70,8 @@ def make_env(i, this_seed):
 
     if max_episode_steps is not None:
       env = TimeLimit(env, max_episode_steps)
+    elif (spec.max_episode_steps is not None) and not spec.tags.get('vnc'):
+      env = TimeLimit(env, max_episode_steps=spec.max_episode_steps)
 
     # Use Monitor to record statistics needed for Baselines algorithms logging
     # Optionally, save to disk

From a442b00d6f7dc9a52200e50bd51fb1fa3ad3596e Mon Sep 17 00:00:00 2001
From: "Steven H. Wang" <wang.steven.h@gmail.com>
Date: Tue, 21 Jan 2020 14:46:10 -0800
Subject: [PATCH 4/9] test_scripts: Auto-generate ant rollouts

---
 tests/test_scripts.py | 43 +++++++++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/tests/test_scripts.py b/tests/test_scripts.py
index e3b625470..339fb7af3 100644
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@@ -169,16 +169,6 @@ def test_transfer_learning(tmpdir):
         },
       }},
   ),
-  # Test that custom environments are passed to SubprocVecEnv in Ray workers.
-  dict(
-    sacred_ex_name="train_adversarial",
-    base_named_configs=["custom_ant", "fast"],
-    base_config_updates=dict(
-      init_trainer_kwargs=dict(
-        parallel=True,
-        num_vec=2,
-      )),
-  ),
 ]
 
 PARALLEL_CONFIG_LOW_RESOURCE = {
@@ -203,6 +193,39 @@ def test_parallel(config_updates):
   assert run.status == 'COMPLETED'
 
 
+def _generate_custom_data(tmpdir: str, env_named_config: str) -> str:
+  expert_demos_ex.run(
+    named_configs=[env_named_config, "fast"],
+    config_updates=dict(
+      rollout_save_interval=0,
+      log_dir=tmpdir,
+    ))
+  rollout_path = osp.abspath(f"{tmpdir}/rollouts/final.pkl")
+  return rollout_path
+
+
+def test_parallel_train_adversarial_custom_env(tmpdir):
+  env_named_config = "custom_ant"
+  rollout_path = _generate_custom_data(tmpdir, env_named_config)
+
+  config_updates = dict(
+    sacred_ex_name="train_adversarial",
+    n_seeds=1,
+    base_named_configs=[env_named_config, "fast"],
+    base_config_updates=dict(
+      init_trainer_kwargs=dict(
+        parallel=True,
+        num_vec=2,
+      ),
+      rollout_path=rollout_path,
+    ),
+  )
+  config_updates.update(PARALLEL_CONFIG_LOW_RESOURCE)
+  run = parallel_ex.run(named_configs=["debug_log_root"],
+                        config_updates=config_updates)
+  assert run.status == 'COMPLETED'
+
+
 @pytest.mark.parametrize("run_names", ([], list("adab")))
 def test_analyze_imitation(tmpdir: str, run_names: List[str]):
   sacred_logs_dir = tmpdir

From 560c06823ac9fd0a3a47d1f98f051ea1c8f7c9ca Mon Sep 17 00:00:00 2001
From: "Steven H. Wang" <wang.steven.h@gmail.com>
Date: Tue, 21 Jan 2020 15:13:44 -0800
Subject: [PATCH 5/9] make_vec_env: Don't attempt to explain bug

---
 src/imitation/util/util.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py
index 852d236a7..42f5d1bdc 100644
--- a/src/imitation/util/util.py
+++ b/src/imitation/util/util.py
@@ -50,17 +50,18 @@ def make_vec_env(env_name: str,
           this episode length before returning.
   """
   # Resolve the spec outside of the subprocess first, so that it is available to
-  # subprocesses via automatic pickling.
+  # subprocesses running `make_env` via automatic pickling.
   spec = gym.spec(env_name)
 
   def make_env(i, this_seed):
-    # Previously, we directly called `gym.make(env_name)`.
-    #
-    # That direct approach was problematic (especially in combination with Ray)
-    # because the forkserver from which subprocesses are forked might have been
-    # spawned before `env_name` was registered in the main process,
-    # causing `env_name` to never exist in the Gym registry of the forked
-    # subprocess that is running `make_env(env_name)`.
+    # Previously, we directly called `gym.make(env_name)`, but running
+    # `imitation.scripts.train_adversarial` within `imitation.scripts.parallel`
+    # created a weird interaction between Gym and Ray -- `gym.make` would fail
+    # inside this function for any of our custom environment unless those
+    # environments were also `gym.register()`ed inside `make_env`. Even
+    # registering the custom environment in the scope of `make_vec_env` didn't
+    # work. For more discussion and hypotheses on this issue see PR #160:
+    # https://github.com/HumanCompatibleAI/imitation/pull/160.
     env = spec.make()
 
     # Seed each environment with a different, non-sequential seed for diversity

From 3f8d29f93a447a3f2288e9de7f7852eabdce7375 Mon Sep 17 00:00:00 2001
From: "Steven H. Wang" <wang.steven.h@gmail.com>
Date: Tue, 21 Jan 2020 15:14:23 -0800
Subject: [PATCH 6/9] test_scripts: Rename helper function

---
 tests/test_scripts.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_scripts.py b/tests/test_scripts.py
index 339fb7af3..3c835f0a7 100644
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@@ -193,7 +193,7 @@ def test_parallel(config_updates):
   assert run.status == 'COMPLETED'
 
 
-def _generate_custom_data(tmpdir: str, env_named_config: str) -> str:
+def _generate_test_rollouts(tmpdir: str, env_named_config: str) -> str:
   expert_demos_ex.run(
     named_configs=[env_named_config, "fast"],
     config_updates=dict(
@@ -206,7 +206,7 @@ def _generate_custom_data(tmpdir: str, env_named_config: str) -> str:
 
 def test_parallel_train_adversarial_custom_env(tmpdir):
   env_named_config = "custom_ant"
-  rollout_path = _generate_custom_data(tmpdir, env_named_config)
+  rollout_path = _generate_test_rollouts(tmpdir, env_named_config)
 
   config_updates = dict(
     sacred_ex_name="train_adversarial",

From e7fc11db74abbc7bfafda14590a642d6b3d8840a Mon Sep 17 00:00:00 2001
From: "Steven H. Wang" <wang.steven.h@gmail.com>
Date: Tue, 21 Jan 2020 18:43:52 -0800
Subject: [PATCH 7/9] Update src/imitation/util/util.py

Co-Authored-By: Adam Gleave <adam@gleave.me>
---
 src/imitation/util/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py
index 42f5d1bdc..e47637337 100644
--- a/src/imitation/util/util.py
+++ b/src/imitation/util/util.py
@@ -71,7 +71,7 @@ def make_env(i, this_seed):
 
     if max_episode_steps is not None:
       env = TimeLimit(env, max_episode_steps)
-    elif (spec.max_episode_steps is not None) and not spec.tags.get('vnc'):
+    elif spec.max_episode_steps is not None:
       env = TimeLimit(env, max_episode_steps=spec.max_episode_steps)
 
     # Use Monitor to record statistics needed for Baselines algorithms logging

From 365c1772e790e1956cf6d08c765ccda6a2a3d1cd Mon Sep 17 00:00:00 2001
From: "Steven H. Wang" <wang.steven.h@gmail.com>
Date: Tue, 21 Jan 2020 18:52:55 -0800
Subject: [PATCH 8/9] make_vec_env: Expand docstring

---
 src/imitation/util/util.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py
index e47637337..54230c7cc 100644
--- a/src/imitation/util/util.py
+++ b/src/imitation/util/util.py
@@ -46,8 +46,12 @@ def make_vec_env(env_name: str,
       seed: The environment seed.
       parallel: If True, uses SubprocVecEnv; otherwise, DummyVecEnv.
       log_dir: If specified, saves Monitor output to this directory.
-      max_episode_steps: If specified, wraps VecEnv in TimeLimit wrapper with
-          this episode length before returning.
+      max_episode_steps: If specified, wraps each env in a TimeLimit wrapper
+          with this episode length. If not specified and `max_episode_steps`
+          exists for this VecEnv in the Gym registry, uses the registry value
+          for every TimeLimit wrapper (this is the default behavior when calling
+          `gym.make`). Otherwise the environments are passed into the VecEnv
+          unwrapped.
   """
   # Resolve the spec outside of the subprocess first, so that it is available to
   # subprocesses running `make_env` via automatic pickling.

From 902fe96a6e370aa52c720d786dea3fbd43ca7989 Mon Sep 17 00:00:00 2001
From: "Steven H. Wang" <wang.steven.h@gmail.com>
Date: Tue, 21 Jan 2020 18:58:40 -0800
Subject: [PATCH 9/9] make_vec_env: Fix docstring

---
 src/imitation/util/util.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py
index 51c0216cd..00926a20e 100644
--- a/src/imitation/util/util.py
+++ b/src/imitation/util/util.py
@@ -47,10 +47,10 @@ def make_vec_env(env_name: str,
       log_dir: If specified, saves Monitor output to this directory.
       max_episode_steps: If specified, wraps each env in a TimeLimit wrapper
           with this episode length. If not specified and `max_episode_steps`
-          exists for this VecEnv in the Gym registry, uses the registry value
-          for every TimeLimit wrapper (this is the default behavior when calling
-          `gym.make`). Otherwise the environments are passed into the VecEnv
-          unwrapped.
+          exists for this `env_name` in the Gym registry, uses the registry
+          `max_episode_steps` for every TimeLimit wrapper (this automatic
+          wrapper is the default behavior when calling `gym.make`). Otherwise
+          the environments are passed into the VecEnv unwrapped.
   """
   # Resolve the spec outside of the subprocess first, so that it is available to
   # subprocesses running `make_env` via automatic pickling.