From f4c4c81d71d25b96eb1d6c8ec352ac9d05609e44 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Fri, 8 Dec 2023 17:07:54 +0100 Subject: [PATCH] Added fixed files missing from previous commit --- mushroom_rl/core/vectorized_core.py | 8 ++++---- mushroom_rl/distributions/gaussian.py | 4 +++- tests/core/test_vectorized_envs.py | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/mushroom_rl/core/vectorized_core.py b/mushroom_rl/core/vectorized_core.py index 8a7345ee5..687904cc5 100644 --- a/mushroom_rl/core/vectorized_core.py +++ b/mushroom_rl/core/vectorized_core.py @@ -108,7 +108,7 @@ def _run(self, dataset, n_steps, n_episodes, render, quiet, record, initial_stat mask = self._core_logic.get_mask(last) reset_mask = self._reset(initial_states, last, mask) - if self.agent.info.is_episodic: + if self.agent.info.is_episodic and reset_mask.any(): dataset.append_theta_vectorized(self._current_theta, reset_mask) samples, step_infos = self._step(render, record, mask) @@ -118,18 +118,18 @@ def _run(self, dataset, n_steps, n_episodes, render, quiet, record, initial_stat dataset.append_vectorized(samples, step_infos, mask) + last = samples[5] + if self._core_logic.fit_required(): fit_dataset = dataset.flatten() self.agent.fit(fit_dataset) - self._core_logic.after_fit() + last = self._core_logic.after_fit_vectorized(last) for c in self.callbacks_fit: c(dataset) dataset.clear() - last = samples[5] - self.agent.stop() self.env.stop() diff --git a/mushroom_rl/distributions/gaussian.py b/mushroom_rl/distributions/gaussian.py index 82467dc1e..b8a210713 100644 --- a/mushroom_rl/distributions/gaussian.py +++ b/mushroom_rl/distributions/gaussian.py @@ -3,6 +3,7 @@ from scipy.stats import multivariate_normal from scipy.optimize import minimize + class GaussianDistribution(Distribution): """ Gaussian distribution with fixed covariance matrix. The parameters @@ -118,6 +119,7 @@ def _lagrangian_eta(lag_array, weights, theta, mu, sigma, n_dims, eps): return sum1 + sum2 + class GaussianDiagonalDistribution(Distribution): """ Gaussian distribution with diagonal covariance matrix. The parameters @@ -186,7 +188,7 @@ def con_wmle(self, theta, weights, eps, kappa): args=(weights, theta, mu, sigma, n_dims, eps, kappa), method='SLSQP') - eta_opt, omg_opt = res.x[0], res.x[1] + eta_opt, omg_opt = res.x[0], res.x[1] self._mu, self._std = GaussianDiagonalDistribution._compute_mu_sigma_from_lagrangian(weights, theta, mu, sigma, eta_opt, omg_opt) diff --git a/tests/core/test_vectorized_envs.py b/tests/core/test_vectorized_envs.py index 8d1ec8f68..87c07e817 100644 --- a/tests/core/test_vectorized_envs.py +++ b/tests/core/test_vectorized_envs.py @@ -28,7 +28,7 @@ def __init__(self, mdp_info, backend): super().__init__(mdp_info, policy, backend=backend) def fit(self, dataset): - + print(f'\t* samples={len(dataset)}, episodes={len(dataset.episodes_length)}') assert len(dataset.episodes_length) == 20 or len(dataset) == 150 @@ -97,7 +97,7 @@ def run_exp(env_backend, agent_backend): print('- learn n_steps=10000 n_episodes_per_fit=20') core.learn(n_steps=10000, n_episodes_per_fit=20) - # print('- learn n_episode=100 n_episodes_per_fit=150') + # print('- learn n_episode=100 n_episodes_per_fit=150') # FIXME add proper support for this configuration # core.learn(n_episodes=100, n_steps_per_fit=150)