Skip to content

Commit

Permalink
add traceback
Browse files Browse the repository at this point in the history
fix ds random_action np.float64
  • Loading branch information
BlueFisher committed Sep 4, 2021
1 parent add2a6e commit f43836c
Show file tree
Hide file tree
Showing 15 changed files with 69 additions and 138 deletions.
3 changes: 3 additions & 0 deletions algorithm/nn_models/layers.py
Expand Up @@ -24,6 +24,7 @@ def __init__(self, input_size, dense_n=64, dense_depth=0, output_size=None):
"""
super().__init__()

self.input_size = input_size
self.output_size = input_size
dense = []
for i in range(dense_depth):
Expand All @@ -40,6 +41,8 @@ def __init__(self, input_size, dense_n=64, dense_depth=0, output_size=None):
self.dense = nn.Sequential(*dense)

def forward(self, x):
assert x.shape[-1] == self.input_size

return self.dense(x)


Expand Down
2 changes: 2 additions & 0 deletions algorithm/sac_main.py
Expand Up @@ -3,6 +3,7 @@
import shutil
import sys
import time
import traceback
from pathlib import Path

import numpy as np
Expand Down Expand Up @@ -235,6 +236,7 @@ def _run(self):

except Exception as e:
self._logger.error(e)
self._logger.error(traceback.format_exc())
self._logger.error('Exiting...')
break

Expand Down
19 changes: 12 additions & 7 deletions ds/learner_trainer.py
Expand Up @@ -4,6 +4,7 @@
import multiprocessing as mp
import os
import threading
import traceback
from multiprocessing.connection import Connection
from pathlib import Path
from typing import List
Expand Down Expand Up @@ -290,13 +291,17 @@ def run_train(self):

with timer_train:
with self.sac_lock:
step = self.sac.train(n_obses_list=n_obses_list,
n_actions=n_actions,
n_rewards=n_rewards,
next_obs_list=next_obs_list,
n_dones=n_dones,
n_mu_probs=n_mu_probs,
rnn_state=rnn_state)
try:
step = self.sac.train(n_obses_list=n_obses_list,
n_actions=n_actions,
n_rewards=n_rewards,
next_obs_list=next_obs_list,
n_dones=n_dones,
n_mu_probs=n_mu_probs,
rnn_state=rnn_state)
except Exception as e:
self._logger.error(e)
self._logger.error(traceback.format_exc())

if step % self.base_config['update_sac_bak_per_step'] == 0:
self._update_sac_bak()
2 changes: 1 addition & 1 deletion ds/sac_ds_base.py
Expand Up @@ -127,7 +127,7 @@ def _random_action(self, action):
if self.c_action_size:
c_action = np.tanh(np.arctanh(c_action) + np.random.randn(batch, self.c_action_size) * self.noise)

return np.concatenate([d_action, c_action], axis=-1)
return np.concatenate([d_action, c_action], axis=-1).astype(np.float32)

def choose_action(self, obs_list):
action = super().choose_action(obs_list)
Expand Down
4 changes: 3 additions & 1 deletion envs/realcar/config.yaml
Expand Up @@ -10,7 +10,6 @@ default:

n_agents: 10
reset_on_iteration: false
max_iter: 100

reset_config:
force_reset: true
Expand All @@ -20,6 +19,9 @@ default:
write_summary_per_step: 1000

n_step: 3
burn_in_step: 10
use_rnn: true

use_rnd: true
rnd_n_sample: 50

Expand Down
3 changes: 3 additions & 0 deletions envs/realcar/config_ds.yaml
Expand Up @@ -21,5 +21,8 @@ default:
save_model_per_step: 10000 # Save model every N steps

n_step: 3
burn_in_step: 10
use_rnn: true

use_rnd: true
rnd_n_sample: 50
16 changes: 6 additions & 10 deletions envs/realcar/nn.py
Expand Up @@ -12,27 +12,23 @@ def _build_model(self):
assert self.obs_shapes[2] == (84, 84, 3)
assert self.obs_shapes[3] == (8,)

print('build_model')

self.conv = m.ConvLayers(84, 84, 9, 'nature',
self.conv = m.ConvLayers(84, 84, 3 * 3, 'simple',
out_dense_n=64, out_dense_depth=2)

self.rnn = m.GRU(self.conv.output_size + self.c_action_size, 64, 1)

self.dense = m.LinearLayers(64 + 8 - EXTRA_SIZE,
self.dense = m.LinearLayers(self.conv.output_size + 8 - EXTRA_SIZE,
dense_n=64, dense_depth=1)

self.rnn = m.GRU(64 + self.c_action_size, 64, 1)

def forward(self, obs_list, pre_action, rnn_state=None):
*vis, vec = obs_list
vec = vec[..., :-EXTRA_SIZE]

print(self.conv)

vis = self.conv(torch.cat(vis, dim=-1))

output, hn = self.rnn(torch.cat([vis, pre_action], dim=-1), rnn_state)
state = self.dense(torch.cat([vis, vec], dim=-1))

state = self.dense(torch.cat([output, vec], dim=-1))
state, hn = self.rnn(torch.cat([state, pre_action], dim=-1), rnn_state)

return state, hn

Expand Down
1 change: 0 additions & 1 deletion envs/realcar/nn_blur_3.py
Expand Up @@ -3,7 +3,6 @@

import algorithm.nn_models as m
from algorithm.nn_models.layers import Transform

from envs.realcar.nn import *


Expand Down
1 change: 0 additions & 1 deletion envs/realcar/nn_blur_5.py
Expand Up @@ -3,7 +3,6 @@

import algorithm.nn_models as m
from algorithm.nn_models.layers import Transform

from envs.realcar.nn import *


Expand Down
1 change: 0 additions & 1 deletion envs/realcar/nn_blur_7.py
Expand Up @@ -3,7 +3,6 @@

import algorithm.nn_models as m
from algorithm.nn_models.layers import Transform

from envs.realcar.nn import *


Expand Down
1 change: 0 additions & 1 deletion envs/realcar/nn_blur_9.py
Expand Up @@ -3,7 +3,6 @@

import algorithm.nn_models as m
from algorithm.nn_models.layers import Transform

from envs.realcar.nn import *


Expand Down
97 changes: 4 additions & 93 deletions envs/usv/config.yaml
Expand Up @@ -26,100 +26,11 @@ visual:
name: "vis_{time}"
nn: nn_visual

visual_nature:
base_config:
name: "vis_nature_{time}"
nn: nn_visual_nature


ray_rnd50:
base_config:
name: "ray_rnd50_{time}"
nn: nn_ray

sac_config:
use_rnd: true

ray_rnd10:
base_config:
name: "ray_rnd10_{time}"
nn: nn_ray

sac_config:
use_rnd: true
rnd_n_sample: 10

ray_rnd20:
base_config:
name: "ray_rnd20_{time}"
nn: nn_ray

sac_config:
use_rnd: true
rnd_n_sample: 20

ray_rnd30:
base_config:
name: "ray_rnd30_{time}"
nn: nn_ray

sac_config:
use_rnd: true
rnd_n_sample: 30

ray_rnd40:
base_config:
name: "ray_rnd40_{time}"
nn: nn_ray

sac_config:
use_rnd: true
rnd_n_sample: 40

ray_nornd:
base_config:
name: "ray_nornd_{time}"
nn: nn_ray

sac_config:
use_rnd: false

ray_rnn:
base_config:
name: "ray_rnn_{time}"
nn: nn_ray_rnn

sac_config:
use_rnn: true
burn_in_step: 30

ray_rnn_nornd:
base_config:
name: "ray_rnn_nornd_{time}"
nn: nn_ray_rnn

sac_config:
use_rnn: true
burn_in_step: 30
use_rnd: false

ray_rnn_pre:
base_config:
name: "ray_rnn_pre_{time}"
nn: nn_ray_rnn

sac_config:
burn_in_step: 10
use_rnn: true
burn_in_step: 30
use_prediction: true

ray_rnn_pre_noextra:
visual_nature:
base_config:
name: "ray_rnn_pre_noextra_{time}"
nn: nn_ray_rnn

sac_config:
use_rnn: true
burn_in_step: 30
use_prediction: true
use_extra_data: false
name: "vis_nature_{time}"
nn: nn_visual_nature
11 changes: 8 additions & 3 deletions envs/usv/config_ds.yaml
Expand Up @@ -13,9 +13,6 @@ default:

evolver_enabled: false

replay_config:
batch_size: 1024

reset_config:
force_reset: true

Expand All @@ -24,9 +21,17 @@ default:
save_model_per_step: 10000 # Save model every N steps

n_step: 3
burn_in_step: 10
use_rnn: true

use_rnd: true
rnd_n_sample: 50

visual:
base_config:
name: "vis_{time}"
nn: nn_visual

visual_nature:
base_config:
name: "vis_nature_{time}"
Expand Down
22 changes: 13 additions & 9 deletions envs/usv/nn_visual.py
Expand Up @@ -2,31 +2,35 @@

import algorithm.nn_models as m

EXTRA_SIZE = 4
EXTRA_SIZE = 6


class ModelRep(m.ModelBaseSimpleRep):
class ModelRep(m.ModelBaseRNNRep):
def _build_model(self):
assert self.obs_shapes[0] == (84, 84, 3)
assert self.obs_shapes[1] == (18,)
assert self.obs_shapes[2] == (18,)
assert self.obs_shapes[1] == (84, 84, 3)
assert self.obs_shapes[2] == (84, 84, 3)
assert self.obs_shapes[3] == (11,)

self.conv = m.ConvLayers(84, 84, 3, 'simple',
self.conv = m.ConvLayers(84, 84, 3 * 3, 'simple',
out_dense_n=64, out_dense_depth=2)

self.dense = m.LinearLayers(self.conv.output_size + 11 - EXTRA_SIZE,
dense_n=64, dense_depth=1)

def forward(self, obs_list):
vis, ray_1, ray_2, vec = obs_list
self.rnn = m.GRU(64 + self.c_action_size, 64, 1)

def forward(self, obs_list, pre_action, rnn_state=None):
*vis, vec = obs_list
vec = vec[..., :-EXTRA_SIZE]

vis = self.conv(vis)
vis = self.conv(torch.cat(vis, dim=-1))

state = self.dense(torch.cat([vis, vec], dim=-1))

return state
state, hn = self.rnn(torch.cat([state, pre_action], dim=-1), rnn_state)

return state, hn


class ModelQ(m.ModelQ):
Expand Down
24 changes: 14 additions & 10 deletions envs/usv/nn_visual_nature.py
Expand Up @@ -2,31 +2,35 @@

import algorithm.nn_models as m

EXTRA_SIZE = 4
EXTRA_SIZE = 6


class ModelRep(m.ModelBaseSimpleRep):
class ModelRep(m.ModelBaseRNNRep):
def _build_model(self):
assert self.obs_shapes[0] == (84, 84, 3)
assert self.obs_shapes[1] == (18,)
assert self.obs_shapes[2] == (18,)
assert self.obs_shapes[1] == (84, 84, 3)
assert self.obs_shapes[2] == (84, 84, 3)
assert self.obs_shapes[3] == (11,)

self.conv = m.ConvLayers(84, 84, 3, 'nature',
self.conv = m.ConvLayers(84, 84, 3 * 3, 'nature',
out_dense_n=64, out_dense_depth=2)

self.dense = m.LinearLayers(self.conv.output_size + 11 - EXTRA_SIZE,
self.dense = m.LinearLayers(self.conv.output_size + 8 - EXTRA_SIZE,
dense_n=64, dense_depth=1)

def forward(self, obs_list):
vis, ray_1, ray_2, vec = obs_list
self.rnn = m.GRU(64 + self.c_action_size, 64, 1)

def forward(self, obs_list, pre_action, rnn_state=None):
*vis, vec = obs_list
vec = vec[..., :-EXTRA_SIZE]

vis = self.conv(vis)
vis = self.conv(torch.cat(vis, dim=-1))

state = self.dense(torch.cat([vis, vec], dim=-1))

return state
state, hn = self.rnn(torch.cat([state, pre_action], dim=-1), rnn_state)

return state, hn


class ModelQ(m.ModelQ):
Expand Down

0 comments on commit f43836c

Please sign in to comment.