In [1]:

import warnings
warnings.filterwarnings('ignore')

import gym
from mjrl.utils.gym_env import GymEnv
from mjrl.policies.gaussian_mlp import MLP
from mjrl.baselines.mlp_baseline import MLPBaseline
from mjrl.algos.ppo_clip import PPO
from mjrl.utils.train_agent import train_agent
import myosuite

env = gym.make('ArmReachFixed-v0')

env.reset()

policy_size = (32, 32)
vf_hidden_size = (128, 128)
seed = 123
rl_step_size = 0.1
e = GymEnv(env)

policy = MLP(e.spec, hidden_sizes=policy_size, seed=seed, init_log_std=-0.25, min_log_std=-1.0)

baseline = MLPBaseline(e.spec, reg_coef=1e-3, batch_size=64, hidden_sizes=vf_hidden_size, \
                    epochs=5, learn_rate=1e-3)

agent = PPO(e, policy, baseline, normalized_step_size=rl_step_size, \
            seed=seed, save_logs=True, tensorboard_log="./ppo_objhold_tensorboard/")

print("========================================")
print("Starting policy learning")
print("========================================")

train_agent(job_name='.',
            agent=agent,
            seed=seed,
            niter=400,
            gamma=0.995,
            gae_lambda=0.97,
            sample_mode="trajectories",
            num_traj=96,
            num_samples=0,
            save_freq=100,
            evaluation_rollouts=10)

print("========================================")
print("Job Finished.")
print("========================================")


MyoSuite:> Registering Myo Envs
[36m    MyoSuite: A contact-rich simulation suite for musculoskeletal motor control
        Vittorio Caggiano, Huawei Wang, Guillaume Durandau, Massimo Sartori, Vikash Kumar
        L4DC-2019 | https://sites.google.com/view/myosuite
    [0m
Starting policy learning
Reading: logs\log.csv
Log read from logs\log.csv: had 400 entries
Loaded last saved iteration: 300
Resuming from an existing job folder ...
......................................................................................
ITERATION : 301 
Performing evaluation rollouts ........
[ Sat Mar  9 15:47:18 2024 ]  301 -398.60 -444.47 -100000000.00 
------------------  -------------
VF_error_after         0.00326685
VF_error_before        0.00337235
env_samples         1056
eval_score          -444.472
eval_success           0
iteration            300
kl_dist                0
num_samples         1056
rollout_success        0
running_score       -398.602
rwd_dense             -2.65438
rwd_sparse