Skip to content

Commit

Permalink
Some improvements in vectorized core
Browse files Browse the repository at this point in the history
- fixed episode collection
- still some bugs in parameters collection
- test pass now, however it's still not possible to provide correctly
n_steps_per_fit to the environment
  • Loading branch information
boris-il-forte committed Dec 8, 2023
1 parent 1254c48 commit 03c180c
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 8 deletions.
14 changes: 8 additions & 6 deletions mushroom_rl/core/_impl/vectorized_core_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,30 @@ def __init__(self, backend, n_envs):
super().__init__()

def get_mask(self, last):
mask = self._array_backend.ones(self._n_envs, dtype=bool)
terminated_episodes = (last & self._running_envs).sum()
running_episodes = (~last & self._running_envs).sum()
terminated_episodes = (last & self._running_envs).sum().item()
running_episodes = (~last & self._running_envs).sum().item()

first_batch = running_episodes == 0 and terminated_episodes == 0

if first_batch:
mask = self._array_backend.ones(self._n_envs, dtype=bool)
terminated_episodes = self._n_envs
else:
mask = self._running_envs

max_runs = terminated_episodes

if self._n_episodes is not None:
missing_episodes_move = self._n_episodes - self._total_episodes_counter - running_episodes

missing_episodes_move = max(self._n_episodes - self._total_episodes_counter - running_episodes, 0)
max_runs = min(missing_episodes_move, max_runs)

if self._n_episodes_per_fit is not None:
missing_episodes_fit = self._n_episodes_per_fit - self._current_episodes_counter - running_episodes
missing_episodes_fit = max(self._n_episodes_per_fit - self._current_episodes_counter - running_episodes, 0)
max_runs = min(missing_episodes_fit, max_runs)

new_mask = self._array_backend.ones(terminated_episodes, dtype=bool)
new_mask[max_runs:] = False

if first_batch:
mask = new_mask
else:
Expand Down
11 changes: 9 additions & 2 deletions tests/core/test_vectorized_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, mdp_info, backend):

def fit(self, dataset):

assert len(dataset.episodes_length) == 20
assert len(dataset.episodes_length) == 20 or len(dataset) == 150


class DummyVecEnv(VectorizedEnvironment):
Expand Down Expand Up @@ -86,23 +86,30 @@ def run_exp(env_backend, agent_backend):

core = VectorCore(agent, env)

print('- evaluate n_steps=2000')
dataset = core.evaluate(n_steps=2000)
assert len(dataset) == 2000

print('- evaluate n_episodes=20')
dataset = core.evaluate(n_episodes=20)
assert len(dataset.episodes_length) == 20

print('- learn n_steps=10000 n_episodes_per_fit=20')
core.learn(n_steps=10000, n_episodes_per_fit=20)

# print('- learn n_episode=100 n_episodes_per_fit=150')
# core.learn(n_episodes=100, n_steps_per_fit=150)


def test_vectorized_env_():
print('# CPU test')
run_exp(env_backend='torch', agent_backend='torch')
run_exp(env_backend='torch', agent_backend='numpy')
run_exp(env_backend='numpy', agent_backend='torch')
run_exp(env_backend='numpy', agent_backend='numpy')

if torch.cuda.is_available():
print('Testing also cuda')
print('# Testing also cuda')
TorchUtils.set_default_device('cuda')
run_exp(env_backend='torch', agent_backend='torch')
run_exp(env_backend='torch', agent_backend='numpy')
Expand Down

0 comments on commit 03c180c

Please sign in to comment.