diff --git a/algorithm/nn_models/layers.py b/algorithm/nn_models/layers.py index a12e759..2d78ac5 100644 --- a/algorithm/nn_models/layers.py +++ b/algorithm/nn_models/layers.py @@ -24,6 +24,7 @@ def __init__(self, input_size, dense_n=64, dense_depth=0, output_size=None): """ super().__init__() + self.input_size = input_size self.output_size = input_size dense = [] for i in range(dense_depth): @@ -40,6 +41,8 @@ def __init__(self, input_size, dense_n=64, dense_depth=0, output_size=None): self.dense = nn.Sequential(*dense) def forward(self, x): + assert x.shape[-1] == self.input_size + return self.dense(x) diff --git a/algorithm/sac_main.py b/algorithm/sac_main.py index f859f6c..c41bf16 100644 --- a/algorithm/sac_main.py +++ b/algorithm/sac_main.py @@ -3,6 +3,7 @@ import shutil import sys import time +import traceback from pathlib import Path import numpy as np @@ -235,6 +236,7 @@ def _run(self): except Exception as e: self._logger.error(e) + self._logger.error(traceback.format_exc()) self._logger.error('Exiting...') break diff --git a/ds/learner_trainer.py b/ds/learner_trainer.py index 29e70e8..3ed3ffa 100644 --- a/ds/learner_trainer.py +++ b/ds/learner_trainer.py @@ -4,6 +4,7 @@ import multiprocessing as mp import os import threading +import traceback from multiprocessing.connection import Connection from pathlib import Path from typing import List @@ -290,13 +291,17 @@ def run_train(self): with timer_train: with self.sac_lock: - step = self.sac.train(n_obses_list=n_obses_list, - n_actions=n_actions, - n_rewards=n_rewards, - next_obs_list=next_obs_list, - n_dones=n_dones, - n_mu_probs=n_mu_probs, - rnn_state=rnn_state) + try: + step = self.sac.train(n_obses_list=n_obses_list, + n_actions=n_actions, + n_rewards=n_rewards, + next_obs_list=next_obs_list, + n_dones=n_dones, + n_mu_probs=n_mu_probs, + rnn_state=rnn_state) + except Exception as e: + self._logger.error(e) + self._logger.error(traceback.format_exc()) if step % self.base_config['update_sac_bak_per_step'] == 0: self._update_sac_bak() diff --git a/ds/sac_ds_base.py b/ds/sac_ds_base.py index d3f9770..6aff1d3 100644 --- a/ds/sac_ds_base.py +++ b/ds/sac_ds_base.py @@ -127,7 +127,7 @@ def _random_action(self, action): if self.c_action_size: c_action = np.tanh(np.arctanh(c_action) + np.random.randn(batch, self.c_action_size) * self.noise) - return np.concatenate([d_action, c_action], axis=-1) + return np.concatenate([d_action, c_action], axis=-1).astype(np.float32) def choose_action(self, obs_list): action = super().choose_action(obs_list) diff --git a/envs/realcar/config.yaml b/envs/realcar/config.yaml index b82ae96..8fa6ac3 100644 --- a/envs/realcar/config.yaml +++ b/envs/realcar/config.yaml @@ -10,7 +10,6 @@ default: n_agents: 10 reset_on_iteration: false - max_iter: 100 reset_config: force_reset: true @@ -20,6 +19,9 @@ default: write_summary_per_step: 1000 n_step: 3 + burn_in_step: 10 + use_rnn: true + use_rnd: true rnd_n_sample: 50 diff --git a/envs/realcar/config_ds.yaml b/envs/realcar/config_ds.yaml index 4cd8506..601d4e6 100644 --- a/envs/realcar/config_ds.yaml +++ b/envs/realcar/config_ds.yaml @@ -21,5 +21,8 @@ default: save_model_per_step: 10000 # Save model every N steps n_step: 3 + burn_in_step: 10 + use_rnn: true + use_rnd: true rnd_n_sample: 50 diff --git a/envs/realcar/nn.py b/envs/realcar/nn.py index 4efdac9..fa3bcab 100644 --- a/envs/realcar/nn.py +++ b/envs/realcar/nn.py @@ -12,27 +12,23 @@ def _build_model(self): assert self.obs_shapes[2] == (84, 84, 3) assert self.obs_shapes[3] == (8,) - print('build_model') - - self.conv = m.ConvLayers(84, 84, 9, 'nature', + self.conv = m.ConvLayers(84, 84, 3 * 3, 'simple', out_dense_n=64, out_dense_depth=2) - self.rnn = m.GRU(self.conv.output_size + self.c_action_size, 64, 1) - - self.dense = m.LinearLayers(64 + 8 - EXTRA_SIZE, + self.dense = m.LinearLayers(self.conv.output_size + 8 - EXTRA_SIZE, dense_n=64, dense_depth=1) + self.rnn = m.GRU(64 + self.c_action_size, 64, 1) + def forward(self, obs_list, pre_action, rnn_state=None): *vis, vec = obs_list vec = vec[..., :-EXTRA_SIZE] - print(self.conv) - vis = self.conv(torch.cat(vis, dim=-1)) - output, hn = self.rnn(torch.cat([vis, pre_action], dim=-1), rnn_state) + state = self.dense(torch.cat([vis, vec], dim=-1)) - state = self.dense(torch.cat([output, vec], dim=-1)) + state, hn = self.rnn(torch.cat([state, pre_action], dim=-1), rnn_state) return state, hn diff --git a/envs/realcar/nn_blur_3.py b/envs/realcar/nn_blur_3.py index 444dd1b..b4687e3 100644 --- a/envs/realcar/nn_blur_3.py +++ b/envs/realcar/nn_blur_3.py @@ -3,7 +3,6 @@ import algorithm.nn_models as m from algorithm.nn_models.layers import Transform - from envs.realcar.nn import * diff --git a/envs/realcar/nn_blur_5.py b/envs/realcar/nn_blur_5.py index 43c8460..2846687 100644 --- a/envs/realcar/nn_blur_5.py +++ b/envs/realcar/nn_blur_5.py @@ -3,7 +3,6 @@ import algorithm.nn_models as m from algorithm.nn_models.layers import Transform - from envs.realcar.nn import * diff --git a/envs/realcar/nn_blur_7.py b/envs/realcar/nn_blur_7.py index b2acf0c..a6207d2 100644 --- a/envs/realcar/nn_blur_7.py +++ b/envs/realcar/nn_blur_7.py @@ -3,7 +3,6 @@ import algorithm.nn_models as m from algorithm.nn_models.layers import Transform - from envs.realcar.nn import * diff --git a/envs/realcar/nn_blur_9.py b/envs/realcar/nn_blur_9.py index 8b705cc..87013a5 100644 --- a/envs/realcar/nn_blur_9.py +++ b/envs/realcar/nn_blur_9.py @@ -3,7 +3,6 @@ import algorithm.nn_models as m from algorithm.nn_models.layers import Transform - from envs.realcar.nn import * diff --git a/envs/usv/config.yaml b/envs/usv/config.yaml index 4127b70..63d2933 100644 --- a/envs/usv/config.yaml +++ b/envs/usv/config.yaml @@ -26,100 +26,11 @@ visual: name: "vis_{time}" nn: nn_visual -visual_nature: - base_config: - name: "vis_nature_{time}" - nn: nn_visual_nature - - -ray_rnd50: - base_config: - name: "ray_rnd50_{time}" - nn: nn_ray - - sac_config: - use_rnd: true - -ray_rnd10: - base_config: - name: "ray_rnd10_{time}" - nn: nn_ray - - sac_config: - use_rnd: true - rnd_n_sample: 10 - -ray_rnd20: - base_config: - name: "ray_rnd20_{time}" - nn: nn_ray - - sac_config: - use_rnd: true - rnd_n_sample: 20 - -ray_rnd30: - base_config: - name: "ray_rnd30_{time}" - nn: nn_ray - - sac_config: - use_rnd: true - rnd_n_sample: 30 - -ray_rnd40: - base_config: - name: "ray_rnd40_{time}" - nn: nn_ray - - sac_config: - use_rnd: true - rnd_n_sample: 40 - -ray_nornd: - base_config: - name: "ray_nornd_{time}" - nn: nn_ray - - sac_config: - use_rnd: false - -ray_rnn: - base_config: - name: "ray_rnn_{time}" - nn: nn_ray_rnn - - sac_config: - use_rnn: true - burn_in_step: 30 - -ray_rnn_nornd: - base_config: - name: "ray_rnn_nornd_{time}" - nn: nn_ray_rnn - - sac_config: - use_rnn: true - burn_in_step: 30 - use_rnd: false - -ray_rnn_pre: - base_config: - name: "ray_rnn_pre_{time}" - nn: nn_ray_rnn - sac_config: + burn_in_step: 10 use_rnn: true - burn_in_step: 30 - use_prediction: true -ray_rnn_pre_noextra: +visual_nature: base_config: - name: "ray_rnn_pre_noextra_{time}" - nn: nn_ray_rnn - - sac_config: - use_rnn: true - burn_in_step: 30 - use_prediction: true - use_extra_data: false + name: "vis_nature_{time}" + nn: nn_visual_nature diff --git a/envs/usv/config_ds.yaml b/envs/usv/config_ds.yaml index c214375..9f57f03 100644 --- a/envs/usv/config_ds.yaml +++ b/envs/usv/config_ds.yaml @@ -13,9 +13,6 @@ default: evolver_enabled: false - replay_config: - batch_size: 1024 - reset_config: force_reset: true @@ -24,9 +21,17 @@ default: save_model_per_step: 10000 # Save model every N steps n_step: 3 + burn_in_step: 10 + use_rnn: true + use_rnd: true rnd_n_sample: 50 +visual: + base_config: + name: "vis_{time}" + nn: nn_visual + visual_nature: base_config: name: "vis_nature_{time}" diff --git a/envs/usv/nn_visual.py b/envs/usv/nn_visual.py index 1a22d97..10ef723 100644 --- a/envs/usv/nn_visual.py +++ b/envs/usv/nn_visual.py @@ -2,31 +2,35 @@ import algorithm.nn_models as m -EXTRA_SIZE = 4 +EXTRA_SIZE = 6 -class ModelRep(m.ModelBaseSimpleRep): +class ModelRep(m.ModelBaseRNNRep): def _build_model(self): assert self.obs_shapes[0] == (84, 84, 3) - assert self.obs_shapes[1] == (18,) - assert self.obs_shapes[2] == (18,) + assert self.obs_shapes[1] == (84, 84, 3) + assert self.obs_shapes[2] == (84, 84, 3) assert self.obs_shapes[3] == (11,) - self.conv = m.ConvLayers(84, 84, 3, 'simple', + self.conv = m.ConvLayers(84, 84, 3 * 3, 'simple', out_dense_n=64, out_dense_depth=2) self.dense = m.LinearLayers(self.conv.output_size + 11 - EXTRA_SIZE, dense_n=64, dense_depth=1) - def forward(self, obs_list): - vis, ray_1, ray_2, vec = obs_list + self.rnn = m.GRU(64 + self.c_action_size, 64, 1) + + def forward(self, obs_list, pre_action, rnn_state=None): + *vis, vec = obs_list vec = vec[..., :-EXTRA_SIZE] - vis = self.conv(vis) + vis = self.conv(torch.cat(vis, dim=-1)) state = self.dense(torch.cat([vis, vec], dim=-1)) - return state + state, hn = self.rnn(torch.cat([state, pre_action], dim=-1), rnn_state) + + return state, hn class ModelQ(m.ModelQ): diff --git a/envs/usv/nn_visual_nature.py b/envs/usv/nn_visual_nature.py index 0802d91..990c7bd 100644 --- a/envs/usv/nn_visual_nature.py +++ b/envs/usv/nn_visual_nature.py @@ -2,31 +2,35 @@ import algorithm.nn_models as m -EXTRA_SIZE = 4 +EXTRA_SIZE = 6 -class ModelRep(m.ModelBaseSimpleRep): +class ModelRep(m.ModelBaseRNNRep): def _build_model(self): assert self.obs_shapes[0] == (84, 84, 3) - assert self.obs_shapes[1] == (18,) - assert self.obs_shapes[2] == (18,) + assert self.obs_shapes[1] == (84, 84, 3) + assert self.obs_shapes[2] == (84, 84, 3) assert self.obs_shapes[3] == (11,) - self.conv = m.ConvLayers(84, 84, 3, 'nature', + self.conv = m.ConvLayers(84, 84, 3 * 3, 'nature', out_dense_n=64, out_dense_depth=2) - self.dense = m.LinearLayers(self.conv.output_size + 11 - EXTRA_SIZE, + self.dense = m.LinearLayers(self.conv.output_size + 8 - EXTRA_SIZE, dense_n=64, dense_depth=1) - def forward(self, obs_list): - vis, ray_1, ray_2, vec = obs_list + self.rnn = m.GRU(64 + self.c_action_size, 64, 1) + + def forward(self, obs_list, pre_action, rnn_state=None): + *vis, vec = obs_list vec = vec[..., :-EXTRA_SIZE] - vis = self.conv(vis) + vis = self.conv(torch.cat(vis, dim=-1)) state = self.dense(torch.cat([vis, vec], dim=-1)) - return state + state, hn = self.rnn(torch.cat([state, pre_action], dim=-1), rnn_state) + + return state, hn class ModelQ(m.ModelQ):