In [None]:
!unzip custom_hopper.zip

In [5]:
import gym
from stable_baselines3 import SAC
from env.custom_hopper import *
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.sac.policies import MlpPolicy
from optimize_hyperparam import *
from os.path import exists

<h3>Register and train source domain environment</h3>

In [3]:
env = gym.make("CustomHopper-source-v0")

In [4]:
if exists("SAC_source_env.zip"):
    model = SAC.load("SAC_source_env")
    print(f"Learning rate source domain: {model.learning_rate}")
else:
    model = SAC(MlpPolicy, env, verbose=1)
    model.learn(total_timesteps = 50000, log_interval = 50)
    model.save("SAC_source_env")
    
print('State space:', env.observation_space)  # state-space
print('Action space:', env.action_space)  # action-space
print('Dynamics parameters:', env.get_parameters())  # masses of each link of the Hopper

NameError: name 'exists' is not defined

<h3>Register and train target domain environment</h3>

In [7]:
env = gym.make("CustomHopper-target-v0")

In [8]:
if exists("SAC_target_env.zip"):
    model = SAC.load("SAC_target_env")
    print(f"Learning rate: target domain: {model.learning_rate}")
else:
    model = SAC(MlpPolicy, env, verbose=1)
    model.learn(total_timesteps = 50000, log_interval = 50)
    model.save("SAC_target_env")
    
print('State space:', env.observation_space)  # state-space
print('Action space:', env.action_space)  # action-space
print('Dynamics parameters:', env.get_parameters())  # masses of each link of the Hopper

Learning rate: target domain: 0.0003
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
Dynamics parameters: [3.53429174 3.92699082 2.71433605 5.0893801 ]


<h3>Hyperparameter optimization for source domain</h3>

In [None]:
!python3 optimize_hyperparam.py --algo sac --env CustomHopper-source-v0 -n 500 -optimize --n-jobs 4 --conf-file standard_config.yml

<h3>Hyperparameter optimization for target domain</h3>

In [3]:
!python3 optimize_hyperparam.py --algo sac --env CustomHopper-target-v0 -n 500 -optimize --n-jobs 4 --conf-file standard_config.yml

Seed: 1150878785
Loading hyperparameters from: standard_config.yml
Default hyperparameters for environment (ones being tuned will be overridden):
OrderedDict([('learning_rate', 'lin_7.3e-4'),
             ('n_timesteps', 50000),
             ('policy', 'MlpPolicy')])
Using 1 environments
Overwriting n_timesteps with n=500
Doing 1 intermediate evaluations for pruning based on the number of timesteps. (1 evaluation every 100k timesteps)
Optimizing hyperparameters
Sampler: tpe - Pruner: median
[32m[I 2023-05-18 16:50:20,177][0m A new study created in memory with name: no-name-16a3b04f-4b7d-4722-b48a-155e4da2b0b3[0m
  log_std_init = trial.suggest_uniform("log_std_init", -4, 1)
[32m[I 2023-05-18 16:50:22,058][0m Trial 2 finished with value: 34.783894 and parameters: {'gamma': 0.9, 'learning_rate': 0.08192304995376866, 'batch_size': 32, 'buffer_size': 10000, 'learning_starts': 20000, 'train_freq': 32, 'tau': 0.02, 'log_std_init': -0.88828706081688, 'net_arch': 'big'}. Best is trial 2 wi