-
Notifications
You must be signed in to change notification settings - Fork 12
/
sac_hopper.yaml
54 lines (47 loc) · 1.44 KB
/
sac_hopper.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
meta_data:
script_path: run_scripts/sac_alpha_exp_script.py
exp_name: test_sac_hopper
description: Train an agent using Soft-Actor-Critic
num_workers: 2
using_gpus: true
# -----------------------------------------------------------------------------
variables:
seed: [0]
# -----------------------------------------------------------------------------
constants:
net_size: 256
num_hidden_layers: 2
rl_alg_params:
num_epochs: 102
num_steps_per_epoch: 10000
num_steps_between_train_calls: 1000
num_train_steps_per_train_call: 1000
num_steps_per_eval: 10000
max_path_length: 1000
min_steps_before_training: 0
eval_deterministic: true
batch_size: 512
replay_buffer_size: 1000000
no_terminal: false
wrap_absorbing: false
save_best: true
freq_saving: 1
save_replay_buffer: false
# save_environment: false
# save_environment: false
sac_params:
alpha: 0.2
reward_scale: 1.0
discount: 0.99
soft_target_tau: 0.005
policy_lr: 0.0003
qf_lr: 0.0003
vf_lr: 0.0003
policy_mean_reg_weight: 0.001
policy_std_reg_weight: 0.001
env_specs:
env_name: 'hopper'
env_kwargs: {}
env_num: 4 # This parameter define how many vec envs are created
eval_env_seed: 0 # These two seeds are no longer needed since in our implementation we makes all seeds the same, see the script file, however you can change the script to make it valid
training_env_seed: 0