Skip to content

Commit

Permalink
Added type agnostic operations in core
Browse files Browse the repository at this point in the history
- all operations are done with appropriate arrays
- removed np calls from everywhere
  • Loading branch information
boris-il-forte committed Nov 24, 2023
1 parent 5b77eaf commit 036381f
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 23 deletions.
33 changes: 33 additions & 0 deletions mushroom_rl/core/_impl/type_conversions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy
import numpy as np
import torch

from mushroom_rl.utils.torch import TorchUtils
Expand Down Expand Up @@ -43,6 +44,14 @@ def to_torch(array):
def to_backend_array(cls, array):
raise NotImplementedError

@staticmethod
def zeros(*dims):
raise NotImplementedError

@staticmethod
def ones(*dims):
raise NotImplementedError


class NumpyConversion(DataConversion):
@staticmethod
Expand All @@ -57,6 +66,14 @@ def to_torch(array):
def to_backend_array(cls, array):
return cls.to_numpy(array)

@staticmethod
def zeros(*dims):
return np.zeros(dims)

@staticmethod
def ones(*dims):
return np.ones(dims)


class TorchConversion(DataConversion):
@staticmethod
Expand All @@ -71,6 +88,14 @@ def to_torch(array):
def to_backend_array(cls, array):
return cls.to_torch(array)

@staticmethod
def zeros(*dims):
return torch.zeros(*dims, device=TorchUtils.get_device())

@staticmethod
def ones(*dims):
return torch.ones(*dims, device=TorchUtils.get_device())


class ListConversion(DataConversion):
@staticmethod
Expand All @@ -85,6 +110,14 @@ def to_torch(array):
def to_backend_array(cls, array):
return cls.to_numpy(array)

@staticmethod
def zeros(*dims):
return np.zeros(dims)

@staticmethod
def ones(*dims):
return np.ones(dims)




20 changes: 10 additions & 10 deletions mushroom_rl/core/_impl/vectorized_core_logic.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import numpy as np

from .type_conversions import DataConversion
from .core_logic import CoreLogic


class VectorizedCoreLogic(CoreLogic):
def __init__(self, n_envs):
def __init__(self, backend, n_envs):
self._converter = DataConversion.get_converter(backend)
self._n_envs = n_envs
self._running_envs = np.zeros(n_envs, dtype=bool)
self._running_envs = self._converter.zeros(n_envs, dtype=bool)

super().__init__()

def get_mask(self, last):
mask = np.ones(self._n_envs, dtype=bool)
terminated_episodes = np.logical_and(last, self._running_envs).sum()
running_episodes = np.logical_and(np.logical_not(last), self._running_envs).sum()
mask = self._converter.ones(self._n_envs, dtype=bool)
terminated_episodes = (last & self._running_envs).sum()
running_episodes = (~last & self._running_envs).sum()

if running_episodes == 0 and terminated_episodes == 0:
terminated_episodes = self._n_envs
Expand All @@ -29,7 +29,7 @@ def get_mask(self, last):
missing_episodes_fit = self._n_episodes_per_fit - self._current_episodes_counter - running_episodes
max_runs = min(missing_episodes_fit, max_runs)

new_mask = np.ones(terminated_episodes, dtype=bool)
new_mask = self._converter.ones(terminated_episodes, dtype=bool)
new_mask[max_runs:] = False
mask[last] = new_mask

Expand Down Expand Up @@ -59,8 +59,8 @@ def after_step(self, last):
def after_fit(self):
super().after_fit()
if self._n_episodes_per_fit is not None:
self._running_envs = np.zeros(self._n_envs, dtype=bool)
self._running_envs = self._converter.zeros(self._n_envs, dtype=bool)

def _reset_counters(self):
super()._reset_counters()
self._running_envs = np.zeros(self._n_envs, dtype=bool)
self._running_envs = self._converter.zeros(self._n_envs, dtype=bool)
27 changes: 14 additions & 13 deletions mushroom_rl/core/vectorized_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import numpy as np

from mushroom_rl.core.dataset import Dataset
from mushroom_rl.utils.record import VideoRecorder

Expand Down Expand Up @@ -36,7 +34,7 @@ def __init__(self, agent, env, callbacks_fit=None, callback_step=None, record_di
self._current_theta = None
self._episode_steps = None

self._core_logic = VectorizedCoreLogic(self.env.number)
self._core_logic = VectorizedCoreLogic(self.env.info.backend, self.env.number)

if record_dictionary is None:
record_dictionary = dict()
Expand Down Expand Up @@ -78,7 +76,7 @@ def evaluate(self, initial_states=None, n_steps=None, n_episodes=None, render=Fa
episode. The environment is reset at the beginning of the learning process.
Args:
initial_states (np.ndarray, None): the starting states of each episode;
initial_states (array, None): the starting states of each episode;
n_steps (int, None): number of steps to move the agent;
n_episodes (int, None): number of episodes to move the agent;
render (bool, False): whether to render the environment or not;
Expand All @@ -103,18 +101,21 @@ def evaluate(self, initial_states=None, n_steps=None, n_episodes=None, render=Fa
def _run(self, datasets, n_steps, n_episodes, render, quiet, record, initial_states=None):
self._core_logic.initialize_run(n_steps, n_episodes, initial_states, quiet)

last = np.ones(self.env.number, dtype=bool)

converter = datasets[0].converter

last = converter.ones(self.env.number, dtype=bool)
mask = None

while self._core_logic.move_required():
if np.any(last):
if last.any():
mask = self._core_logic.get_mask(last)
self._reset(initial_states, last, mask)
self._reset(converter, initial_states, last, mask)

samples, step_infos = self._step(render, record, mask)

self.callback_step(samples)
self._core_logic.after_step(np.logical_and(samples[5], mask))
self._core_logic.after_step(samples[5] & mask)

self._add_to_dataset(mask, datasets, samples, step_infos)

Expand Down Expand Up @@ -172,7 +173,7 @@ def _step(self, render, record, mask):
if mask[i]:
self._record[i](frames[i])

last = np.logical_or(absorbing, self._episode_steps >= self.env.info.horizon)
last = absorbing | self._episode_steps >= self.env.info.horizon

state = self._state
policy_state = self._policy_state
Expand All @@ -182,12 +183,12 @@ def _step(self, render, record, mask):

return (state, action, rewards, next_state, absorbing, last, policy_state, policy_next_state), step_info

def _reset(self, initial_states, last, mask):
def _reset(self, converter, initial_states, last, mask):
"""
Reset the states of the agent.
"""
reset_mask = np.logical_and(last, mask)
reset_mask = last & mask
initial_state = self._core_logic.get_initial_state(initial_states)

state, episode_info = self._preprocess(self.env.reset_all(reset_mask, initial_state))
Expand All @@ -196,7 +197,7 @@ def _reset(self, initial_states, last, mask):
self.agent.next_action = None

if self._episode_steps is None:
self._episode_steps = np.zeros(self.env.number, dtype=int)
self._episode_steps = converter.zeros(self.env.number, dtype=int)
else:
self._episode_steps[last] = 0

Expand All @@ -217,7 +218,7 @@ def _preprocess(self, states):
Method to apply state preprocessors.
Args:
states (Iterable of np.ndarray): the states to be preprocessed.
states (array): the states to be preprocessed.
Returns:
The preprocessed states.
Expand Down

0 comments on commit 036381f

Please sign in to comment.