HumanCompatibleAI · shwang · Jan 22, 2020 · Jan 18, 2020 · Jan 18, 2020 · Jan 18, 2020
diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py
@@ -49,15 +49,29 @@ def make_vec_env(env_name: str,
       max_episode_steps: If specified, wraps VecEnv in TimeLimit wrapper with
           this episode length before returning.
-          this episode length before returning.
+          this episode length before returning. Otherwise, defaults to `max_episode_steps` for `env_name` in the Gym registry.
-          this episode length before returning.
+          this episode length before returning. Otherwise, defaults to `max_episode_steps` for `env_name` in the Gym registry.
   """
+  # Resolve the spec outside of the subprocess first, so that it is available to
+  # subprocesses via automatic pickling.
+  spec = gym.spec(env_name)
+
   def make_env(i, this_seed):
-    env = gym.make(env_name)
+    # Previously, we directly called `gym.make(env_name)`.
+    #
+    # That direct approach was problematic (especially in combination with Ray)
+    # because the forkserver from which subprocesses are forked might have been
+    # spawned before `env_name` was registered in the main process,
+    # causing `env_name` to never exist in the Gym registry of the forked
+    # subprocess that is running `make_env(env_name)`.
+    env = spec.make()
+
     # Seed each environment with a different, non-sequential seed for diversity
     # (even if caller is passing us sequentially-assigned base seeds). int() is
     # necessary to work around gym bug where it chokes on numpy int64s.
     env.seed(int(this_seed))
 
     if max_episode_steps is not None:
       env = TimeLimit(env, max_episode_steps)
+    elif (spec.max_episode_steps is not None) and not spec.tags.get('vnc'):
+      env = TimeLimit(env, max_episode_steps=spec.max_episode_steps)
 
     # Use Monitor to record statistics needed for Baselines algorithms logging
     # Optionally, save to disk

diff --git a/tests/test_scripts.py b/tests/test_scripts.py
@@ -144,9 +144,9 @@ def test_transfer_learning(tmpdir):
   dict(
     sacred_ex_name="expert_demos",
     base_named_configs=["cartpole", "fast"],
+    n_seeds=2,
     search_space={
       "config_updates": {
-        "seed": tune.grid_search([0, 1]),
         "init_rl_kwargs": {
           "learning_rate": tune.grid_search([3e-4, 1e-4]),
         },
@@ -169,9 +169,18 @@ def test_transfer_learning(tmpdir):
         },
       }},
   ),
+  # Test that custom environments are passed to SubprocVecEnv in Ray workers.
+  dict(
+    sacred_ex_name="train_adversarial",
+    base_named_configs=["custom_ant", "fast"],
+    base_config_updates=dict(
+      init_trainer_kwargs=dict(
+        parallel=True,
+        num_vec=2,
+      )),
+  ),
 ]
 
-
 PARALLEL_CONFIG_LOW_RESOURCE = {
     # CI server only has 2 cores.
     "init_kwargs": {"num_cpus": 2},