exp_specs/sac/sac_hopper.yaml

meta_data:
  script_path: run_scripts/sac_alpha_exp_script.py
  exp_name: test_sac_hopper
  description: Train an agent using Soft-Actor-Critic
  num_workers: 2
  using_gpus: true
# -----------------------------------------------------------------------------
variables:
  seed: [0]

# -----------------------------------------------------------------------------
constants:
  net_size: 256
  num_hidden_layers: 2

  rl_alg_params:
    num_epochs: 102
    num_steps_per_epoch: 10000
    num_steps_between_train_calls: 1000
    num_train_steps_per_train_call: 1000
    num_steps_per_eval: 10000
    max_path_length: 1000
    min_steps_before_training: 0

    eval_deterministic: true

    batch_size: 512
    replay_buffer_size: 1000000
    no_terminal: false
    wrap_absorbing: false

    save_best: true
    freq_saving: 1
    save_replay_buffer: false
    # save_environment: false
    # save_environment: false

  sac_params:
    alpha: 0.2
    reward_scale: 1.0
    discount: 0.99
    soft_target_tau: 0.005
    policy_lr: 0.0003
    qf_lr: 0.0003
    vf_lr: 0.0003
    policy_mean_reg_weight: 0.001
    policy_std_reg_weight: 0.001

  env_specs:
    env_name: 'hopper'
    env_kwargs: {}
    env_num: 4 # This parameter define how many vec envs are created
    eval_env_seed: 0 # These two seeds are no longer needed since in our implementation we makes all seeds the same, see the script file, however you can change the script to make it valid
    training_env_seed: 0