diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py index ecd8ae2cc..00926a20e 100644 --- a/src/imitation/util/util.py +++ b/src/imitation/util/util.py @@ -45,11 +45,28 @@ def make_vec_env(env_name: str, seed: The environment seed. parallel: If True, uses SubprocVecEnv; otherwise, DummyVecEnv. log_dir: If specified, saves Monitor output to this directory. - max_episode_steps: If specified, wraps VecEnv in TimeLimit wrapper with - this episode length before returning. + max_episode_steps: If specified, wraps each env in a TimeLimit wrapper + with this episode length. If not specified and `max_episode_steps` + exists for this `env_name` in the Gym registry, uses the registry + `max_episode_steps` for every TimeLimit wrapper (this automatic + wrapper is the default behavior when calling `gym.make`). Otherwise + the environments are passed into the VecEnv unwrapped. """ + # Resolve the spec outside of the subprocess first, so that it is available to + # subprocesses running `make_env` via automatic pickling. + spec = gym.spec(env_name) + def make_env(i, this_seed): - env = gym.make(env_name) + # Previously, we directly called `gym.make(env_name)`, but running + # `imitation.scripts.train_adversarial` within `imitation.scripts.parallel` + # created a weird interaction between Gym and Ray -- `gym.make` would fail + # inside this function for any of our custom environment unless those + # environments were also `gym.register()`ed inside `make_env`. Even + # registering the custom environment in the scope of `make_vec_env` didn't + # work. For more discussion and hypotheses on this issue see PR #160: + # https://github.com/HumanCompatibleAI/imitation/pull/160. + env = spec.make() + # Seed each environment with a different, non-sequential seed for diversity # (even if caller is passing us sequentially-assigned base seeds). int() is # necessary to work around gym bug where it chokes on numpy int64s. @@ -57,6 +74,8 @@ def make_env(i, this_seed): if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps) + elif spec.max_episode_steps is not None: + env = TimeLimit(env, max_episode_steps=spec.max_episode_steps) # Use Monitor to record statistics needed for Baselines algorithms logging # Optionally, save to disk diff --git a/tests/test_scripts.py b/tests/test_scripts.py index d86347bab..3c835f0a7 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -144,9 +144,9 @@ def test_transfer_learning(tmpdir): dict( sacred_ex_name="expert_demos", base_named_configs=["cartpole", "fast"], + n_seeds=2, search_space={ "config_updates": { - "seed": tune.grid_search([0, 1]), "init_rl_kwargs": { "learning_rate": tune.grid_search([3e-4, 1e-4]), }, @@ -171,7 +171,6 @@ def test_transfer_learning(tmpdir): ), ] - PARALLEL_CONFIG_LOW_RESOURCE = { # CI server only has 2 cores. "init_kwargs": {"num_cpus": 2}, @@ -194,6 +193,39 @@ def test_parallel(config_updates): assert run.status == 'COMPLETED' +def _generate_test_rollouts(tmpdir: str, env_named_config: str) -> str: + expert_demos_ex.run( + named_configs=[env_named_config, "fast"], + config_updates=dict( + rollout_save_interval=0, + log_dir=tmpdir, + )) + rollout_path = osp.abspath(f"{tmpdir}/rollouts/final.pkl") + return rollout_path + + +def test_parallel_train_adversarial_custom_env(tmpdir): + env_named_config = "custom_ant" + rollout_path = _generate_test_rollouts(tmpdir, env_named_config) + + config_updates = dict( + sacred_ex_name="train_adversarial", + n_seeds=1, + base_named_configs=[env_named_config, "fast"], + base_config_updates=dict( + init_trainer_kwargs=dict( + parallel=True, + num_vec=2, + ), + rollout_path=rollout_path, + ), + ) + config_updates.update(PARALLEL_CONFIG_LOW_RESOURCE) + run = parallel_ex.run(named_configs=["debug_log_root"], + config_updates=config_updates) + assert run.status == 'COMPLETED' + + @pytest.mark.parametrize("run_names", ([], list("adab"))) def test_analyze_imitation(tmpdir: str, run_names: List[str]): sacred_logs_dir = tmpdir