From 03c180cb5758aac5c2d7b5aceaecc8f8f77cbfa7 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Fri, 8 Dec 2023 14:48:11 +0100 Subject: [PATCH] Some improvements in vectorized core - fixed episode collection - still some bugs in parameters collection - test pass now, however it's still not possible to provide correctly n_steps_per_fit to the environment --- mushroom_rl/core/_impl/vectorized_core_logic.py | 14 ++++++++------ tests/core/test_vectorized_envs.py | 11 +++++++++-- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/mushroom_rl/core/_impl/vectorized_core_logic.py b/mushroom_rl/core/_impl/vectorized_core_logic.py index 544da4af..f07ede5c 100644 --- a/mushroom_rl/core/_impl/vectorized_core_logic.py +++ b/mushroom_rl/core/_impl/vectorized_core_logic.py @@ -11,28 +11,30 @@ def __init__(self, backend, n_envs): super().__init__() def get_mask(self, last): - mask = self._array_backend.ones(self._n_envs, dtype=bool) - terminated_episodes = (last & self._running_envs).sum() - running_episodes = (~last & self._running_envs).sum() + terminated_episodes = (last & self._running_envs).sum().item() + running_episodes = (~last & self._running_envs).sum().item() first_batch = running_episodes == 0 and terminated_episodes == 0 if first_batch: + mask = self._array_backend.ones(self._n_envs, dtype=bool) terminated_episodes = self._n_envs + else: + mask = self._running_envs max_runs = terminated_episodes if self._n_episodes is not None: - missing_episodes_move = self._n_episodes - self._total_episodes_counter - running_episodes - + missing_episodes_move = max(self._n_episodes - self._total_episodes_counter - running_episodes, 0) max_runs = min(missing_episodes_move, max_runs) if self._n_episodes_per_fit is not None: - missing_episodes_fit = self._n_episodes_per_fit - self._current_episodes_counter - running_episodes + missing_episodes_fit = max(self._n_episodes_per_fit - self._current_episodes_counter - running_episodes, 0) max_runs = min(missing_episodes_fit, max_runs) new_mask = self._array_backend.ones(terminated_episodes, dtype=bool) new_mask[max_runs:] = False + if first_batch: mask = new_mask else: diff --git a/tests/core/test_vectorized_envs.py b/tests/core/test_vectorized_envs.py index fe17167f..8d1ec8f6 100644 --- a/tests/core/test_vectorized_envs.py +++ b/tests/core/test_vectorized_envs.py @@ -29,7 +29,7 @@ def __init__(self, mdp_info, backend): def fit(self, dataset): - assert len(dataset.episodes_length) == 20 + assert len(dataset.episodes_length) == 20 or len(dataset) == 150 class DummyVecEnv(VectorizedEnvironment): @@ -86,23 +86,30 @@ def run_exp(env_backend, agent_backend): core = VectorCore(agent, env) + print('- evaluate n_steps=2000') dataset = core.evaluate(n_steps=2000) assert len(dataset) == 2000 + print('- evaluate n_episodes=20') dataset = core.evaluate(n_episodes=20) assert len(dataset.episodes_length) == 20 + print('- learn n_steps=10000 n_episodes_per_fit=20') core.learn(n_steps=10000, n_episodes_per_fit=20) + # print('- learn n_episode=100 n_episodes_per_fit=150') + # core.learn(n_episodes=100, n_steps_per_fit=150) + def test_vectorized_env_(): + print('# CPU test') run_exp(env_backend='torch', agent_backend='torch') run_exp(env_backend='torch', agent_backend='numpy') run_exp(env_backend='numpy', agent_backend='torch') run_exp(env_backend='numpy', agent_backend='numpy') if torch.cuda.is_available(): - print('Testing also cuda') + print('# Testing also cuda') TorchUtils.set_default_device('cuda') run_exp(env_backend='torch', agent_backend='torch') run_exp(env_backend='torch', agent_backend='numpy')