In [None]:
!unzip custom_hopper.zip

In [5]:
import gym
from stable_baselines3 import SAC
from env.custom_hopper import *
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.sac.policies import MlpPolicy
from optimize_hyperparam import *
from os.path import exists

<h3>Register and train source domain environment</h3>

In [3]:
env = gym.make("CustomHopper-source-v0")

In [None]:
if exists("SAC_source_env.zip"):
    model = SAC.load("SAC_source_env")
    print(f"Learning rate source domain: {model.learning_rate}")
else:
    model = SAC(MlpPolicy, env, verbose=1)
    model.learn(total_timesteps = 50000, log_interval = 50)
    model.save("SAC_source_env")
    
print('State space:', env.observation_space)  # state-space
print('Action space:', env.action_space)  # action-space
print('Dynamics parameters:', env.get_parameters())  # masses of each link of the Hopper

<h3>Register and train target domain environment</h3>

In [7]:
env = gym.make("CustomHopper-target-v0")

In [8]:
if exists("SAC_target_env.zip"):
    model = SAC.load("SAC_target_env")
    print(f"Learning rate: target domain: {model.learning_rate}")
else:
    model = SAC(MlpPolicy, env, verbose=1)
    model.learn(total_timesteps = 50000, log_interval = 50)
    model.save("SAC_target_env")
    
print('State space:', env.observation_space)  # state-space
print('Action space:', env.action_space)  # action-space
print('Dynamics parameters:', env.get_parameters())  # masses of each link of the Hopper

Learning rate: target domain: 0.0003
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
Dynamics parameters: [3.53429174 3.92699082 2.71433605 5.0893801 ]


<h3>Hyperparameter optimization for source domain</h3>

In [None]:
!python3 optimize_hyperparam.py --algo sac --env CustomHopper-source-v0 -n 500 -optimize --n-jobs 4 --conf-file standard_config.yml

<h3>Hyperparameter optimization for target domain</h3>

In [None]:
!python3 optimize_hyperparam.py --algo sac --env CustomHopper-target-v0 -n 500 -optimize --n-jobs 4 --conf-file standard_config.yml