In [None]:
import os
from pathlib import Path
import numpy as np

from applied_planning.envs import Lite6CubePointingEnv

BASE_PATH = "/Users/braeden/Development/applied-planning/"
model_path = BASE_PATH + "src/applied_planning/sim/assets/ufactory_lite6/lite6_gripper_narrow.xml"

# create environment
print("Creating environment...")
env = Lite6CubePointingEnv(
    model_path=str(model_path),
    render_mode=None,           # set to "human" to watch training (will be slow!)
    max_steps=200,
    success_threshold=0.05,     # 5cm
    reward_scale=10.0,
    cube_placement_radius=0.3,
    ee_site_name="end_effector"
)

print(f"Observation space: {env.observation_space}")
print(f"Action space: {env.action_space}")

Creating environment...
✓ Added 1 cubes to scene within 0.3m radius
IK solver using site: end_effector
Offscreen renderer initialized (notebook-friendly mode)
Observation space: Box(-inf, inf, (12,), float32)
Action space: Box(-1.0, 1.0, (6,), float32)


In [None]:
obs, info = env.reset()
print(f"Initial observation shape: {obs.shape}")
print(f"Cube position: {info['cube_pos']}")
print(f"EE position: {info['ee_pos']}")
print(f"Initial distance: {info['distance']:.4f} m")

# run a few random steps
for step in range(10):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    print(f"Step {step+1}: distance={info['distance']:.4f} m, reward={reward:.2f}")
    if terminated or truncated:
        break

Initial observation shape: (12,)
Cube position: [0.02319782 0.15650207 0.03      ]
EE position: [ 8.69984251e-02 -2.11781530e-06  7.24896309e-02]
Initial distance: 0.1743 m
Step 1: distance=0.6744 m, reward=-16.75
Step 2: distance=0.1393 m, reward=9.31
Step 3: distance=0.3536 m, reward=-7.82
Step 4: distance=0.4432 m, reward=-6.22
Step 5: distance=0.4873 m, reward=-5.75
Step 6: distance=0.6947 m, reward=-11.10
Step 7: distance=0.6518 m, reward=-5.66
Step 8: distance=0.8198 m, reward=-11.56
Step 9: distance=0.6649 m, reward=-3.55
Step 10: distance=0.2001 m, reward=7.29


In [3]:
from stable_baselines3 import PPO, SAC, TD3
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback

# make environemnt for stable-baselines3
def make_env():
    return Lite6CubePointingEnv(
        model_path=str(model_path),
        render_mode=None,
        max_steps=200,
        success_threshold=0.05,
        reward_scale=10.0,
        cube_placement_radius=0.3,
        ee_site_name="end_effector"
    )

# create vectorized environment
train_env = DummyVecEnv([make_env])
eval_env = DummyVecEnv([make_env])

# setup callbacks
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path="./logs/best_model",
    log_path="./logs/eval",
    eval_freq=5000,
    deterministic=True,
    render=False
)

checkpoint_callback = CheckpointCallback(
    save_freq=10000,
    save_path="./logs/checkpoints",
    name_prefix="cube_pointing_model"
)

# create RL model - using SAC (works well for continuous control)
print("\nCreating SAC model...")
model = SAC(
    "MlpPolicy",
    train_env,
    verbose=1,
    learning_rate=3e-4,
    buffer_size=100000,
    learning_starts=1000,
    batch_size=256,
    tau=0.005,
    gamma=0.99,
    train_freq=1,
    gradient_steps=1,
    tensorboard_log="./logs/tensorboard"
)

print("\n" + "="*60)
print("Training Configuration:")
print("="*60)
print(f"Algorithm: SAC")
print(f"Total timesteps: 100,000")
print(f"Eval frequency: 5,000 steps")
print(f"Checkpoint frequency: 10,000 steps")
print("="*60)

# train the model
print("\nStarting training...")
print("(This will take a while. Press Ctrl+C to stop early)\n")

try:
    model.learn(
        total_timesteps=100000,
        callback=[eval_callback, checkpoint_callback],
        progress_bar=True
    )

    # save final model
    model.save("./logs/cube_pointing_final")
    print("\n✓ Training complete! Model saved to ./logs/cube_pointing_final")

except KeyboardInterrupt:
    print("\n\nTraining interrupted by user")
    model.save("./logs/cube_pointing_interrupted")
    print("Model saved to ./logs/cube_pointing_interrupted")



✓ Added 1 cubes to scene within 0.3m radius
IK solver using site: end_effector
Offscreen renderer initialized (notebook-friendly mode)
✓ Added 1 cubes to scene within 0.3m radius
IK solver using site: end_effector
Offscreen renderer initialized (notebook-friendly mode)

Creating SAC model...
Using cpu device

Training Configuration:
Algorithm: SAC
Total timesteps: 100,000
Eval frequency: 5,000 steps
Checkpoint frequency: 10,000 steps

Starting training...
(This will take a while. Press Ctrl+C to stop early)

Logging to ./logs/tensorboard/SAC_2


Output()

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 4        |
|    fps             | 2398     |
|    time_elapsed    | 0        |
|    total_timesteps | 800      |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.125    |
| time/              |          |
|    episodes        | 8        |
|    fps             | 435      |
|    time_elapsed    | 3        |
|    total_timesteps | 1549     |
| train/             |          |
|    actor_loss      | 17.1     |
|    critic_loss     | 31       |
|    ent_coef        | 0.851    |
|    ent_coef_loss   | -1.48    |
|    learning_rate   | 0.0003   |
|    n_updates       | 548      |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.167    |
| time/              |          |
|    episodes        | 12       |
|    fps             | 309      |
|    time_elapsed    | 7        |
|    total_timesteps | 2178     |
| train/             |          |
|    actor_loss      | 48.3     |
|    critic_loss     | 19.1     |
|    ent_coef        | 0.714    |
|    ent_coef_loss   | -2.88    |
|    learning_rate   | 0.0003   |
|    n_updates       | 1177     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.188    |
| time/              |          |
|    episodes        | 16       |
|    fps             | 273      |
|    time_elapsed    | 10       |
|    total_timesteps | 2822     |
| train/             |          |
|    actor_loss      | 70.1     |
|    critic_loss     | 17.5     |
|    ent_coef        | 0.595    |
|    ent_coef_loss   | -4.18    |
|    learning_rate   | 0.0003   |
|    n_updates       | 1821     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.15     |
| time/              |          |
|    episodes        | 20       |
|    fps             | 242      |
|    time_elapsed    | 14       |
|    total_timesteps | 3622     |
| train/             |          |
|    actor_loss      | 77.7     |
|    critic_loss     | 17.9     |
|    ent_coef        | 0.476    |
|    ent_coef_loss   | -5.49    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2621     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.125    |
| time/              |          |
|    episodes        | 24       |
|    fps             | 233      |
|    time_elapsed    | 18       |
|    total_timesteps | 4422     |
| train/             |          |
|    actor_loss      | 115      |
|    critic_loss     | 21.4     |
|    ent_coef        | 0.385    |
|    ent_coef_loss   | -6.47    |
|    learning_rate   | 0.0003   |
|    n_updates       | 3421     |
---------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 194       |
|    mean_reward     | -1.28e+03 |
|    success_rate    | 0.2       |
| time/              |           |
|    total_timesteps | 5000      |
| train/             |           |
|    actor_loss      | 123       |
|    critic_loss     | 65.8      |
|    ent_coef        | 0.33      |
|    ent_coef_loss   | -6.47     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3999      |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.143    |
| time/              |          |
|    episodes        | 28       |
|    fps             | 222      |
|    time_elapsed    | 22       |
|    total_timesteps | 5065     |
| train/             |          |
|    actor_loss      | 129      |
|    critic_loss     | 22.8     |
|    ent_coef        | 0.325    |
|    ent_coef_loss   | -7.08    |
|    learning_rate   | 0.0003   |
|    n_updates       | 4064     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.125    |
| time/              |          |
|    episodes        | 32       |
|    fps             | 209      |
|    time_elapsed    | 28       |
|    total_timesteps | 5865     |
| train/             |          |
|    actor_loss      | 155      |
|    critic_loss     | 28       |
|    ent_coef        | 0.263    |
|    ent_coef_loss   | -8.35    |
|    learning_rate   | 0.0003   |
|    n_updates       | 4864     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.139    |
| time/              |          |
|    episodes        | 36       |
|    fps             | 202      |
|    time_elapsed    | 31       |
|    total_timesteps | 6486     |
| train/             |          |
|    actor_loss      | 157      |
|    critic_loss     | 124      |
|    ent_coef        | 0.225    |
|    ent_coef_loss   | -6.99    |
|    learning_rate   | 0.0003   |
|    n_updates       | 5485     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.125    |
| time/              |          |
|    episodes        | 40       |
|    fps             | 201      |
|    time_elapsed    | 36       |
|    total_timesteps | 7286     |
| train/             |          |
|    actor_loss      | 206      |
|    critic_loss     | 36.8     |
|    ent_coef        | 0.186    |
|    ent_coef_loss   | -6.4     |
|    learning_rate   | 0.0003   |
|    n_updates       | 6285     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.114    |
| time/              |          |
|    episodes        | 44       |
|    fps             | 200      |
|    time_elapsed    | 40       |
|    total_timesteps | 8086     |
| train/             |          |
|    actor_loss      | 234      |
|    critic_loss     | 51.4     |
|    ent_coef        | 0.157    |
|    ent_coef_loss   | -5.83    |
|    learning_rate   | 0.0003   |
|    n_updates       | 7085     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.104    |
| time/              |          |
|    episodes        | 48       |
|    fps             | 199      |
|    time_elapsed    | 44       |
|    total_timesteps | 8886     |
| train/             |          |
|    actor_loss      | 243      |
|    critic_loss     | 41.4     |
|    ent_coef        | 0.134    |
|    ent_coef_loss   | -3.02    |
|    learning_rate   | 0.0003   |
|    n_updates       | 7885     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.135    |
| time/              |          |
|    episodes        | 52       |
|    fps             | 199      |
|    time_elapsed    | 46       |
|    total_timesteps | 9376     |
| train/             |          |
|    actor_loss      | 260      |
|    critic_loss     | 50.3     |
|    ent_coef        | 0.123    |
|    ent_coef_loss   | -2.02    |
|    learning_rate   | 0.0003   |
|    n_updates       | 8375     |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 168      |
|    mean_reward     | -821     |
|    success_rate    | 0.2      |
| time/              |          |
|    total_timesteps | 10000    |
| train/             |          |
|    actor_loss      | 249      |
|    critic_loss     | 77.5     |
|    ent_coef        | 0.112    |
|    ent_coef_loss   | -1.57    |
|    learning_rate   | 0.0003   |
|    n_updates       | 8999     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.125    |
| time/              |          |
|    episodes        | 56       |
|    fps             | 196      |
|    time_elapsed    | 51       |
|    total_timesteps | 10176    |
| train/             |          |
|    actor_loss      | 345      |
|    critic_loss     | 117      |
|    ent_coef        | 0.109    |
|    ent_coef_loss   | -0.223   |
|    learning_rate   | 0.0003   |
|    n_updates       | 9175     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.117    |
| time/              |          |
|    episodes        | 60       |
|    fps             | 196      |
|    time_elapsed    | 55       |
|    total_timesteps | 10976    |
| train/             |          |
|    actor_loss      | 299      |
|    critic_loss     | 277      |
|    ent_coef        | 0.102    |
|    ent_coef_loss   | -0.255   |
|    learning_rate   | 0.0003   |
|    n_updates       | 9975     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.109    |
| time/              |          |
|    episodes        | 64       |
|    fps             | 196      |
|    time_elapsed    | 59       |
|    total_timesteps | 11776    |
| train/             |          |
|    actor_loss      | 258      |
|    critic_loss     | 45.1     |
|    ent_coef        | 0.103    |
|    ent_coef_loss   | -0.852   |
|    learning_rate   | 0.0003   |
|    n_updates       | 10775    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.103    |
| time/              |          |
|    episodes        | 68       |
|    fps             | 196      |
|    time_elapsed    | 63       |
|    total_timesteps | 12576    |
| train/             |          |
|    actor_loss      | 281      |
|    critic_loss     | 71.9     |
|    ent_coef        | 0.109    |
|    ent_coef_loss   | 0.95     |
|    learning_rate   | 0.0003   |
|    n_updates       | 11575    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.111    |
| time/              |          |
|    episodes        | 72       |
|    fps             | 196      |
|    time_elapsed    | 67       |
|    total_timesteps | 13180    |
| train/             |          |
|    actor_loss      | 351      |
|    critic_loss     | 86.8     |
|    ent_coef        | 0.116    |
|    ent_coef_loss   | 1.17     |
|    learning_rate   | 0.0003   |
|    n_updates       | 12179    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.105    |
| time/              |          |
|    episodes        | 76       |
|    fps             | 196      |
|    time_elapsed    | 71       |
|    total_timesteps | 13980    |
| train/             |          |
|    actor_loss      | 330      |
|    critic_loss     | 332      |
|    ent_coef        | 0.125    |
|    ent_coef_loss   | -0.186   |
|    learning_rate   | 0.0003   |
|    n_updates       | 12979    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.113    |
| time/              |          |
|    episodes        | 80       |
|    fps             | 195      |
|    time_elapsed    | 74       |
|    total_timesteps | 14587    |
| train/             |          |
|    actor_loss      | 327      |
|    critic_loss     | 46.8     |
|    ent_coef        | 0.123    |
|    ent_coef_loss   | -0.637   |
|    learning_rate   | 0.0003   |
|    n_updates       | 13586    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 89       |
|    mean_reward     | -708     |
|    success_rate    | 0.6      |
| time/              |          |
|    total_timesteps | 15000    |
| train/             |          |
|    actor_loss      | 312      |
|    critic_loss     | 56       |
|    ent_coef        | 0.121    |
|    ent_coef_loss   | -0.68    |
|    learning_rate   | 0.0003   |
|    n_updates       | 13999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.107    |
| time/              |          |
|    episodes        | 84       |
|    fps             | 194      |
|    time_elapsed    | 78       |
|    total_timesteps | 15387    |
| train/             |          |
|    actor_loss      | 331      |
|    critic_loss     | 64.3     |
|    ent_coef        | 0.121    |
|    ent_coef_loss   | 0.764    |
|    learning_rate   | 0.0003   |
|    n_updates       | 14386    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.102    |
| time/              |          |
|    episodes        | 88       |
|    fps             | 193      |
|    time_elapsed    | 83       |
|    total_timesteps | 16187    |
| train/             |          |
|    actor_loss      | 323      |
|    critic_loss     | 153      |
|    ent_coef        | 0.123    |
|    ent_coef_loss   | -0.712   |
|    learning_rate   | 0.0003   |
|    n_updates       | 15186    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.0978   |
| time/              |          |
|    episodes        | 92       |
|    fps             | 193      |
|    time_elapsed    | 87       |
|    total_timesteps | 16987    |
| train/             |          |
|    actor_loss      | 330      |
|    critic_loss     | 92.8     |
|    ent_coef        | 0.126    |
|    ent_coef_loss   | 0.737    |
|    learning_rate   | 0.0003   |
|    n_updates       | 15986    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.104    |
| time/              |          |
|    episodes        | 96       |
|    fps             | 192      |
|    time_elapsed    | 91       |
|    total_timesteps | 17605    |
| train/             |          |
|    actor_loss      | 343      |
|    critic_loss     | 95       |
|    ent_coef        | 0.129    |
|    ent_coef_loss   | -0.411   |
|    learning_rate   | 0.0003   |
|    n_updates       | 16604    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.11     |
| time/              |          |
|    episodes        | 100      |
|    fps             | 191      |
|    time_elapsed    | 95       |
|    total_timesteps | 18259    |
| train/             |          |
|    actor_loss      | 377      |
|    critic_loss     | 496      |
|    ent_coef        | 0.132    |
|    ent_coef_loss   | -0.176   |
|    learning_rate   | 0.0003   |
|    n_updates       | 17258    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.12     |
| time/              |          |
|    episodes        | 104      |
|    fps             | 191      |
|    time_elapsed    | 98       |
|    total_timesteps | 18883    |
| train/             |          |
|    actor_loss      | 387      |
|    critic_loss     | 102      |
|    ent_coef        | 0.131    |
|    ent_coef_loss   | 0.498    |
|    learning_rate   | 0.0003   |
|    n_updates       | 17882    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.12     |
| time/              |          |
|    episodes        | 108      |
|    fps             | 190      |
|    time_elapsed    | 102      |
|    total_timesteps | 19526    |
| train/             |          |
|    actor_loss      | 361      |
|    critic_loss     | 131      |
|    ent_coef        | 0.131    |
|    ent_coef_loss   | -0.524   |
|    learning_rate   | 0.0003   |
|    n_updates       | 18525    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 187      |
|    mean_reward     | -907     |
|    success_rate    | 0.2      |
| time/              |          |
|    total_timesteps | 20000    |
| train/             |          |
|    actor_loss      | 364      |
|    critic_loss     | 110      |
|    ent_coef        | 0.131    |
|    ent_coef_loss   | -1.06    |
|    learning_rate   | 0.0003   |
|    n_updates       | 18999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.11     |
| time/              |          |
|    episodes        | 112      |
|    fps             | 189      |
|    time_elapsed    | 107      |
|    total_timesteps | 20326    |
| train/             |          |
|    actor_loss      | 346      |
|    critic_loss     | 169      |
|    ent_coef        | 0.132    |
|    ent_coef_loss   | -1.16    |
|    learning_rate   | 0.0003   |
|    n_updates       | 19325    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.12     |
| time/              |          |
|    episodes        | 116      |
|    fps             | 189      |
|    time_elapsed    | 110      |
|    total_timesteps | 20909    |
| train/             |          |
|    actor_loss      | 420      |
|    critic_loss     | 174      |
|    ent_coef        | 0.132    |
|    ent_coef_loss   | 2.17     |
|    learning_rate   | 0.0003   |
|    n_updates       | 19908    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.12     |
| time/              |          |
|    episodes        | 120      |
|    fps             | 189      |
|    time_elapsed    | 114      |
|    total_timesteps | 21709    |
| train/             |          |
|    actor_loss      | 364      |
|    critic_loss     | 97.9     |
|    ent_coef        | 0.136    |
|    ent_coef_loss   | 0.297    |
|    learning_rate   | 0.0003   |
|    n_updates       | 20708    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.12     |
| time/              |          |
|    episodes        | 124      |
|    fps             | 189      |
|    time_elapsed    | 119      |
|    total_timesteps | 22509    |
| train/             |          |
|    actor_loss      | 362      |
|    critic_loss     | 537      |
|    ent_coef        | 0.135    |
|    ent_coef_loss   | -0.502   |
|    learning_rate   | 0.0003   |
|    n_updates       | 21508    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.12     |
| time/              |          |
|    episodes        | 128      |
|    fps             | 188      |
|    time_elapsed    | 122      |
|    total_timesteps | 23112    |
| train/             |          |
|    actor_loss      | 398      |
|    critic_loss     | 142      |
|    ent_coef        | 0.136    |
|    ent_coef_loss   | -0.917   |
|    learning_rate   | 0.0003   |
|    n_updates       | 22111    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.13     |
| time/              |          |
|    episodes        | 132      |
|    fps             | 188      |
|    time_elapsed    | 125      |
|    total_timesteps | 23719    |
| train/             |          |
|    actor_loss      | 369      |
|    critic_loss     | 126      |
|    ent_coef        | 0.133    |
|    ent_coef_loss   | -0.233   |
|    learning_rate   | 0.0003   |
|    n_updates       | 22718    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.12     |
| time/              |          |
|    episodes        | 136      |
|    fps             | 186      |
|    time_elapsed    | 131      |
|    total_timesteps | 24519    |
| train/             |          |
|    actor_loss      | 402      |
|    critic_loss     | 92       |
|    ent_coef        | 0.136    |
|    ent_coef_loss   | 0.286    |
|    learning_rate   | 0.0003   |
|    n_updates       | 23518    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 166      |
|    mean_reward     | -782     |
|    success_rate    | 0.2      |
| time/              |          |
|    total_timesteps | 25000    |
| train/             |          |
|    actor_loss      | 367      |
|    critic_loss     | 61.4     |
|    ent_coef        | 0.134    |
|    ent_coef_loss   | -0.267   |
|    learning_rate   | 0.0003   |
|    n_updates       | 23999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.12     |
| time/              |          |
|    episodes        | 140      |
|    fps             | 185      |
|    time_elapsed    | 136      |
|    total_timesteps | 25319    |
| train/             |          |
|    actor_loss      | 427      |
|    critic_loss     | 112      |
|    ent_coef        | 0.132    |
|    ent_coef_loss   | 1.63     |
|    learning_rate   | 0.0003   |
|    n_updates       | 24318    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.12     |
| time/              |          |
|    episodes        | 144      |
|    fps             | 185      |
|    time_elapsed    | 140      |
|    total_timesteps | 26119    |
| train/             |          |
|    actor_loss      | 380      |
|    critic_loss     | 152      |
|    ent_coef        | 0.131    |
|    ent_coef_loss   | 0.506    |
|    learning_rate   | 0.0003   |
|    n_updates       | 25118    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.14     |
| time/              |          |
|    episodes        | 148      |
|    fps             | 185      |
|    time_elapsed    | 142      |
|    total_timesteps | 26569    |
| train/             |          |
|    actor_loss      | 422      |
|    critic_loss     | 196      |
|    ent_coef        | 0.132    |
|    ent_coef_loss   | -0.0593  |
|    learning_rate   | 0.0003   |
|    n_updates       | 25568    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.13     |
| time/              |          |
|    episodes        | 152      |
|    fps             | 185      |
|    time_elapsed    | 146      |
|    total_timesteps | 27172    |
| train/             |          |
|    actor_loss      | 392      |
|    critic_loss     | 135      |
|    ent_coef        | 0.131    |
|    ent_coef_loss   | -0.453   |
|    learning_rate   | 0.0003   |
|    n_updates       | 26171    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.13     |
| time/              |          |
|    episodes        | 156      |
|    fps             | 185      |
|    time_elapsed    | 150      |
|    total_timesteps | 27972    |
| train/             |          |
|    actor_loss      | 436      |
|    critic_loss     | 104      |
|    ent_coef        | 0.131    |
|    ent_coef_loss   | 0.112    |
|    learning_rate   | 0.0003   |
|    n_updates       | 26971    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.13     |
| time/              |          |
|    episodes        | 160      |
|    fps             | 185      |
|    time_elapsed    | 154      |
|    total_timesteps | 28772    |
| train/             |          |
|    actor_loss      | 417      |
|    critic_loss     | 77.5     |
|    ent_coef        | 0.134    |
|    ent_coef_loss   | 0.126    |
|    learning_rate   | 0.0003   |
|    n_updates       | 27771    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.13     |
| time/              |          |
|    episodes        | 164      |
|    fps             | 185      |
|    time_elapsed    | 159      |
|    total_timesteps | 29572    |
| train/             |          |
|    actor_loss      | 408      |
|    critic_loss     | 157      |
|    ent_coef        | 0.138    |
|    ent_coef_loss   | -0.767   |
|    learning_rate   | 0.0003   |
|    n_updates       | 28571    |
---------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 166       |
|    mean_reward     | -1.11e+03 |
|    success_rate    | 0.2       |
| time/              |           |
|    total_timesteps | 30000     |
| train/             |           |
|    actor_loss      | 437       |
|    critic_loss     | 111       |
|    ent_coef        | 0.136     |
|    ent_coef_loss   | -0.235    |
|    learning_rate   | 0.0003    |
|    n_updates       | 28999     |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.14     |
| time/              |          |
|    episodes        | 168      |
|    fps             | 185      |
|    time_elapsed    | 163      |
|    total_timesteps | 30195    |
| train/             |          |
|    actor_loss      | 417      |
|    critic_loss     | 81.5     |
|    ent_coef        | 0.136    |
|    ent_coef_loss   | -0.72    |
|    learning_rate   | 0.0003   |
|    n_updates       | 29194    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.15     |
| time/              |          |
|    episodes        | 172      |
|    fps             | 185      |
|    time_elapsed    | 167      |
|    total_timesteps | 30949    |
| train/             |          |
|    actor_loss      | 438      |
|    critic_loss     | 746      |
|    ent_coef        | 0.135    |
|    ent_coef_loss   | -0.498   |
|    learning_rate   | 0.0003   |
|    n_updates       | 29948    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.15     |
| time/              |          |
|    episodes        | 176      |
|    fps             | 185      |
|    time_elapsed    | 171      |
|    total_timesteps | 31749    |
| train/             |          |
|    actor_loss      | 439      |
|    critic_loss     | 817      |
|    ent_coef        | 0.138    |
|    ent_coef_loss   | 0.109    |
|    learning_rate   | 0.0003   |
|    n_updates       | 30748    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.14     |
| time/              |          |
|    episodes        | 180      |
|    fps             | 185      |
|    time_elapsed    | 175      |
|    total_timesteps | 32549    |
| train/             |          |
|    actor_loss      | 456      |
|    critic_loss     | 70.3     |
|    ent_coef        | 0.143    |
|    ent_coef_loss   | 1.03     |
|    learning_rate   | 0.0003   |
|    n_updates       | 31548    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.14     |
| time/              |          |
|    episodes        | 184      |
|    fps             | 185      |
|    time_elapsed    | 179      |
|    total_timesteps | 33349    |
| train/             |          |
|    actor_loss      | 464      |
|    critic_loss     | 90.3     |
|    ent_coef        | 0.145    |
|    ent_coef_loss   | 0.417    |
|    learning_rate   | 0.0003   |
|    n_updates       | 32348    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.15     |
| time/              |          |
|    episodes        | 188      |
|    fps             | 185      |
|    time_elapsed    | 183      |
|    total_timesteps | 33987    |
| train/             |          |
|    actor_loss      | 450      |
|    critic_loss     | 85.1     |
|    ent_coef        | 0.143    |
|    ent_coef_loss   | -0.436   |
|    learning_rate   | 0.0003   |
|    n_updates       | 32986    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.17     |
| time/              |          |
|    episodes        | 192      |
|    fps             | 185      |
|    time_elapsed    | 186      |
|    total_timesteps | 34584    |
| train/             |          |
|    actor_loss      | 451      |
|    critic_loss     | 76.5     |
|    ent_coef        | 0.148    |
|    ent_coef_loss   | 0.277    |
|    learning_rate   | 0.0003   |
|    n_updates       | 33583    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -1e+03   |
|    success_rate    | 0        |
| time/              |          |
|    total_timesteps | 35000    |
| train/             |          |
|    actor_loss      | 454      |
|    critic_loss     | 75.2     |
|    ent_coef        | 0.146    |
|    ent_coef_loss   | -1.44    |
|    learning_rate   | 0.0003   |
|    n_updates       | 33999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.18     |
| time/              |          |
|    episodes        | 196      |
|    fps             | 184      |
|    time_elapsed    | 190      |
|    total_timesteps | 35182    |
| train/             |          |
|    actor_loss      | 445      |
|    critic_loss     | 714      |
|    ent_coef        | 0.146    |
|    ent_coef_loss   | 1.04     |
|    learning_rate   | 0.0003   |
|    n_updates       | 34181    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.17     |
| time/              |          |
|    episodes        | 200      |
|    fps             | 184      |
|    time_elapsed    | 194      |
|    total_timesteps | 35982    |
| train/             |          |
|    actor_loss      | 447      |
|    critic_loss     | 64.5     |
|    ent_coef        | 0.148    |
|    ent_coef_loss   | -0.402   |
|    learning_rate   | 0.0003   |
|    n_updates       | 34981    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.16     |
| time/              |          |
|    episodes        | 204      |
|    fps             | 184      |
|    time_elapsed    | 199      |
|    total_timesteps | 36782    |
| train/             |          |
|    actor_loss      | 453      |
|    critic_loss     | 721      |
|    ent_coef        | 0.152    |
|    ent_coef_loss   | -0.968   |
|    learning_rate   | 0.0003   |
|    n_updates       | 35781    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.16     |
| time/              |          |
|    episodes        | 208      |
|    fps             | 184      |
|    time_elapsed    | 202      |
|    total_timesteps | 37395    |
| train/             |          |
|    actor_loss      | 452      |
|    critic_loss     | 103      |
|    ent_coef        | 0.149    |
|    ent_coef_loss   | 0.534    |
|    learning_rate   | 0.0003   |
|    n_updates       | 36394    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.17     |
| time/              |          |
|    episodes        | 212      |
|    fps             | 184      |
|    time_elapsed    | 206      |
|    total_timesteps | 38084    |
| train/             |          |
|    actor_loss      | 459      |
|    critic_loss     | 170      |
|    ent_coef        | 0.149    |
|    ent_coef_loss   | 0.571    |
|    learning_rate   | 0.0003   |
|    n_updates       | 37083    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.15     |
| time/              |          |
|    episodes        | 216      |
|    fps             | 184      |
|    time_elapsed    | 210      |
|    total_timesteps | 38884    |
| train/             |          |
|    actor_loss      | 481      |
|    critic_loss     | 129      |
|    ent_coef        | 0.148    |
|    ent_coef_loss   | 1.27     |
|    learning_rate   | 0.0003   |
|    n_updates       | 37883    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.16     |
| time/              |          |
|    episodes        | 220      |
|    fps             | 184      |
|    time_elapsed    | 214      |
|    total_timesteps | 39486    |
| train/             |          |
|    actor_loss      | 490      |
|    critic_loss     | 647      |
|    ent_coef        | 0.151    |
|    ent_coef_loss   | 0.246    |
|    learning_rate   | 0.0003   |
|    n_updates       | 38485    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 162      |
|    mean_reward     | -462     |
|    success_rate    | 0.2      |
| time/              |          |
|    total_timesteps | 40000    |
| train/             |          |
|    actor_loss      | 453      |
|    critic_loss     | 177      |
|    ent_coef        | 0.15     |
|    ent_coef_loss   | 0.00369  |
|    learning_rate   | 0.0003   |
|    n_updates       | 38999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.17     |
| time/              |          |
|    episodes        | 224      |
|    fps             | 183      |
|    time_elapsed    | 218      |
|    total_timesteps | 40097    |
| train/             |          |
|    actor_loss      | 481      |
|    critic_loss     | 691      |
|    ent_coef        | 0.151    |
|    ent_coef_loss   | -1.27    |
|    learning_rate   | 0.0003   |
|    n_updates       | 39096    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.18     |
| time/              |          |
|    episodes        | 228      |
|    fps             | 183      |
|    time_elapsed    | 221      |
|    total_timesteps | 40585    |
| train/             |          |
|    actor_loss      | 522      |
|    critic_loss     | 79.1     |
|    ent_coef        | 0.152    |
|    ent_coef_loss   | 0.437    |
|    learning_rate   | 0.0003   |
|    n_updates       | 39584    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.17     |
| time/              |          |
|    episodes        | 232      |
|    fps             | 183      |
|    time_elapsed    | 225      |
|    total_timesteps | 41385    |
| train/             |          |
|    actor_loss      | 508      |
|    critic_loss     | 86.2     |
|    ent_coef        | 0.157    |
|    ent_coef_loss   | 0.0709   |
|    learning_rate   | 0.0003   |
|    n_updates       | 40384    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.18     |
| time/              |          |
|    episodes        | 236      |
|    fps             | 183      |
|    time_elapsed    | 229      |
|    total_timesteps | 42005    |
| train/             |          |
|    actor_loss      | 484      |
|    critic_loss     | 886      |
|    ent_coef        | 0.161    |
|    ent_coef_loss   | 0.452    |
|    learning_rate   | 0.0003   |
|    n_updates       | 41004    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.18     |
| time/              |          |
|    episodes        | 240      |
|    fps             | 182      |
|    time_elapsed    | 234      |
|    total_timesteps | 42805    |
| train/             |          |
|    actor_loss      | 467      |
|    critic_loss     | 93       |
|    ent_coef        | 0.163    |
|    ent_coef_loss   | 0.432    |
|    learning_rate   | 0.0003   |
|    n_updates       | 41804    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.18     |
| time/              |          |
|    episodes        | 244      |
|    fps             | 182      |
|    time_elapsed    | 239      |
|    total_timesteps | 43605    |
| train/             |          |
|    actor_loss      | 509      |
|    critic_loss     | 94       |
|    ent_coef        | 0.157    |
|    ent_coef_loss   | -0.456   |
|    learning_rate   | 0.0003   |
|    n_updates       | 42604    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.17     |
| time/              |          |
|    episodes        | 248      |
|    fps             | 182      |
|    time_elapsed    | 242      |
|    total_timesteps | 44224    |
| train/             |          |
|    actor_loss      | 483      |
|    critic_loss     | 132      |
|    ent_coef        | 0.157    |
|    ent_coef_loss   | -0.319   |
|    learning_rate   | 0.0003   |
|    n_updates       | 43223    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.17     |
| time/              |          |
|    episodes        | 252      |
|    fps             | 182      |
|    time_elapsed    | 246      |
|    total_timesteps | 44896    |
| train/             |          |
|    actor_loss      | 482      |
|    critic_loss     | 348      |
|    ent_coef        | 0.157    |
|    ent_coef_loss   | -0.214   |
|    learning_rate   | 0.0003   |
|    n_updates       | 43895    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 130      |
|    mean_reward     | -286     |
|    success_rate    | 0.6      |
| time/              |          |
|    total_timesteps | 45000    |
| train/             |          |
|    actor_loss      | 459      |
|    critic_loss     | 116      |
|    ent_coef        | 0.157    |
|    ent_coef_loss   | 0.136    |
|    learning_rate   | 0.0003   |
|    n_updates       | 43999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.18     |
| time/              |          |
|    episodes        | 256      |
|    fps             | 181      |
|    time_elapsed    | 250      |
|    total_timesteps | 45569    |
| train/             |          |
|    actor_loss      | 460      |
|    critic_loss     | 171      |
|    ent_coef        | 0.163    |
|    ent_coef_loss   | 0.116    |
|    learning_rate   | 0.0003   |
|    n_updates       | 44568    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.18     |
| time/              |          |
|    episodes        | 260      |
|    fps             | 181      |
|    time_elapsed    | 255      |
|    total_timesteps | 46369    |
| train/             |          |
|    actor_loss      | 463      |
|    critic_loss     | 112      |
|    ent_coef        | 0.161    |
|    ent_coef_loss   | -0.0382  |
|    learning_rate   | 0.0003   |
|    n_updates       | 45368    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.18     |
| time/              |          |
|    episodes        | 264      |
|    fps             | 181      |
|    time_elapsed    | 259      |
|    total_timesteps | 47169    |
| train/             |          |
|    actor_loss      | 488      |
|    critic_loss     | 196      |
|    ent_coef        | 0.163    |
|    ent_coef_loss   | 0.415    |
|    learning_rate   | 0.0003   |
|    n_updates       | 46168    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.17     |
| time/              |          |
|    episodes        | 268      |
|    fps             | 181      |
|    time_elapsed    | 263      |
|    total_timesteps | 47969    |
| train/             |          |
|    actor_loss      | 485      |
|    critic_loss     | 892      |
|    ent_coef        | 0.161    |
|    ent_coef_loss   | 0.0443   |
|    learning_rate   | 0.0003   |
|    n_updates       | 46968    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.15     |
| time/              |          |
|    episodes        | 272      |
|    fps             | 181      |
|    time_elapsed    | 268      |
|    total_timesteps | 48769    |
| train/             |          |
|    actor_loss      | 487      |
|    critic_loss     | 900      |
|    ent_coef        | 0.165    |
|    ent_coef_loss   | -0.652   |
|    learning_rate   | 0.0003   |
|    n_updates       | 47768    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.16     |
| time/              |          |
|    episodes        | 276      |
|    fps             | 181      |
|    time_elapsed    | 271      |
|    total_timesteps | 49375    |
| train/             |          |
|    actor_loss      | 473      |
|    critic_loss     | 85.5     |
|    ent_coef        | 0.162    |
|    ent_coef_loss   | 0.817    |
|    learning_rate   | 0.0003   |
|    n_updates       | 48374    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 163      |
|    mean_reward     | -462     |
|    success_rate    | 0.2      |
| time/              |          |
|    total_timesteps | 50000    |
| train/             |          |
|    actor_loss      | 484      |
|    critic_loss     | 94.4     |
|    ent_coef        | 0.163    |
|    ent_coef_loss   | -0.184   |
|    learning_rate   | 0.0003   |
|    n_updates       | 48999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.16     |
| time/              |          |
|    episodes        | 280      |
|    fps             | 181      |
|    time_elapsed    | 276      |
|    total_timesteps | 50175    |
| train/             |          |
|    actor_loss      | 486      |
|    critic_loss     | 101      |
|    ent_coef        | 0.163    |
|    ent_coef_loss   | -0.314   |
|    learning_rate   | 0.0003   |
|    n_updates       | 49174    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.16     |
| time/              |          |
|    episodes        | 284      |
|    fps             | 181      |
|    time_elapsed    | 280      |
|    total_timesteps | 50975    |
| train/             |          |
|    actor_loss      | 542      |
|    critic_loss     | 172      |
|    ent_coef        | 0.161    |
|    ent_coef_loss   | -0.861   |
|    learning_rate   | 0.0003   |
|    n_updates       | 49974    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.16     |
| time/              |          |
|    episodes        | 288      |
|    fps             | 181      |
|    time_elapsed    | 285      |
|    total_timesteps | 51768    |
| train/             |          |
|    actor_loss      | 474      |
|    critic_loss     | 80.4     |
|    ent_coef        | 0.16     |
|    ent_coef_loss   | -0.379   |
|    learning_rate   | 0.0003   |
|    n_updates       | 50767    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.16     |
| time/              |          |
|    episodes        | 292      |
|    fps             | 181      |
|    time_elapsed    | 287      |
|    total_timesteps | 52265    |
| train/             |          |
|    actor_loss      | 525      |
|    critic_loss     | 946      |
|    ent_coef        | 0.162    |
|    ent_coef_loss   | -0.828   |
|    learning_rate   | 0.0003   |
|    n_updates       | 51264    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.16     |
| time/              |          |
|    episodes        | 296      |
|    fps             | 181      |
|    time_elapsed    | 290      |
|    total_timesteps | 52713    |
| train/             |          |
|    actor_loss      | 481      |
|    critic_loss     | 1.08e+03 |
|    ent_coef        | 0.167    |
|    ent_coef_loss   | -0.287   |
|    learning_rate   | 0.0003   |
|    n_updates       | 51712    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.17     |
| time/              |          |
|    episodes        | 300      |
|    fps             | 181      |
|    time_elapsed    | 294      |
|    total_timesteps | 53359    |
| train/             |          |
|    actor_loss      | 527      |
|    critic_loss     | 173      |
|    ent_coef        | 0.171    |
|    ent_coef_loss   | -0.212   |
|    learning_rate   | 0.0003   |
|    n_updates       | 52358    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.17     |
| time/              |          |
|    episodes        | 304      |
|    fps             | 181      |
|    time_elapsed    | 298      |
|    total_timesteps | 54159    |
| train/             |          |
|    actor_loss      | 525      |
|    critic_loss     | 233      |
|    ent_coef        | 0.169    |
|    ent_coef_loss   | 0.609    |
|    learning_rate   | 0.0003   |
|    n_updates       | 53158    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.18     |
| time/              |          |
|    episodes        | 308      |
|    fps             | 181      |
|    time_elapsed    | 302      |
|    total_timesteps | 54864    |
| train/             |          |
|    actor_loss      | 536      |
|    critic_loss     | 362      |
|    ent_coef        | 0.163    |
|    ent_coef_loss   | -0.0495  |
|    learning_rate   | 0.0003   |
|    n_updates       | 53863    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 162      |
|    mean_reward     | -380     |
|    success_rate    | 0.2      |
| time/              |          |
|    total_timesteps | 55000    |
| train/             |          |
|    actor_loss      | 490      |
|    critic_loss     | 1.12e+03 |
|    ent_coef        | 0.165    |
|    ent_coef_loss   | 0.569    |
|    learning_rate   | 0.0003   |
|    n_updates       | 53999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.19     |
| time/              |          |
|    episodes        | 312      |
|    fps             | 180      |
|    time_elapsed    | 306      |
|    total_timesteps | 55398    |
| train/             |          |
|    actor_loss      | 508      |
|    critic_loss     | 996      |
|    ent_coef        | 0.166    |
|    ent_coef_loss   | -0.503   |
|    learning_rate   | 0.0003   |
|    n_updates       | 54397    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.21     |
| time/              |          |
|    episodes        | 316      |
|    fps             | 180      |
|    time_elapsed    | 309      |
|    total_timesteps | 55881    |
| train/             |          |
|    actor_loss      | 532      |
|    critic_loss     | 146      |
|    ent_coef        | 0.163    |
|    ent_coef_loss   | 0.121    |
|    learning_rate   | 0.0003   |
|    n_updates       | 54880    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.21     |
| time/              |          |
|    episodes        | 320      |
|    fps             | 180      |
|    time_elapsed    | 313      |
|    total_timesteps | 56614    |
| train/             |          |
|    actor_loss      | 512      |
|    critic_loss     | 89.2     |
|    ent_coef        | 0.168    |
|    ent_coef_loss   | -0.538   |
|    learning_rate   | 0.0003   |
|    n_updates       | 55613    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.21     |
| time/              |          |
|    episodes        | 324      |
|    fps             | 180      |
|    time_elapsed    | 317      |
|    total_timesteps | 57290    |
| train/             |          |
|    actor_loss      | 517      |
|    critic_loss     | 132      |
|    ent_coef        | 0.171    |
|    ent_coef_loss   | 0.491    |
|    learning_rate   | 0.0003   |
|    n_updates       | 56289    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.2      |
| time/              |          |
|    episodes        | 328      |
|    fps             | 180      |
|    time_elapsed    | 321      |
|    total_timesteps | 57900    |
| train/             |          |
|    actor_loss      | 500      |
|    critic_loss     | 144      |
|    ent_coef        | 0.17     |
|    ent_coef_loss   | -0.446   |
|    learning_rate   | 0.0003   |
|    n_updates       | 56899    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.21     |
| time/              |          |
|    episodes        | 332      |
|    fps             | 180      |
|    time_elapsed    | 324      |
|    total_timesteps | 58512    |
| train/             |          |
|    actor_loss      | 507      |
|    critic_loss     | 133      |
|    ent_coef        | 0.175    |
|    ent_coef_loss   | -0.519   |
|    learning_rate   | 0.0003   |
|    n_updates       | 57511    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.22     |
| time/              |          |
|    episodes        | 336      |
|    fps             | 180      |
|    time_elapsed    | 327      |
|    total_timesteps | 59081    |
| train/             |          |
|    actor_loss      | 498      |
|    critic_loss     | 102      |
|    ent_coef        | 0.177    |
|    ent_coef_loss   | 0.037    |
|    learning_rate   | 0.0003   |
|    n_updates       | 58080    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.22     |
| time/              |          |
|    episodes        | 340      |
|    fps             | 180      |
|    time_elapsed    | 331      |
|    total_timesteps | 59881    |
| train/             |          |
|    actor_loss      | 506      |
|    critic_loss     | 97.1     |
|    ent_coef        | 0.179    |
|    ent_coef_loss   | 0.0885   |
|    learning_rate   | 0.0003   |
|    n_updates       | 58880    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -511     |
|    success_rate    | 0        |
| time/              |          |
|    total_timesteps | 60000    |
| train/             |          |
|    actor_loss      | 498      |
|    critic_loss     | 271      |
|    ent_coef        | 0.179    |
|    ent_coef_loss   | -0.177   |
|    learning_rate   | 0.0003   |
|    n_updates       | 58999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.23     |
| time/              |          |
|    episodes        | 344      |
|    fps             | 179      |
|    time_elapsed    | 336      |
|    total_timesteps | 60539    |
| train/             |          |
|    actor_loss      | 521      |
|    critic_loss     | 130      |
|    ent_coef        | 0.181    |
|    ent_coef_loss   | 0.296    |
|    learning_rate   | 0.0003   |
|    n_updates       | 59538    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.23     |
| time/              |          |
|    episodes        | 348      |
|    fps             | 180      |
|    time_elapsed    | 340      |
|    total_timesteps | 61256    |
| train/             |          |
|    actor_loss      | 530      |
|    critic_loss     | 1.05e+03 |
|    ent_coef        | 0.181    |
|    ent_coef_loss   | 0.461    |
|    learning_rate   | 0.0003   |
|    n_updates       | 60255    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.23     |
| time/              |          |
|    episodes        | 352      |
|    fps             | 180      |
|    time_elapsed    | 343      |
|    total_timesteps | 61904    |
| train/             |          |
|    actor_loss      | 508      |
|    critic_loss     | 135      |
|    ent_coef        | 0.18     |
|    ent_coef_loss   | 0.838    |
|    learning_rate   | 0.0003   |
|    n_updates       | 60903    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.24     |
| time/              |          |
|    episodes        | 356      |
|    fps             | 180      |
|    time_elapsed    | 345      |
|    total_timesteps | 62330    |
| train/             |          |
|    actor_loss      | 517      |
|    critic_loss     | 180      |
|    ent_coef        | 0.184    |
|    ent_coef_loss   | -0.33    |
|    learning_rate   | 0.0003   |
|    n_updates       | 61329    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.25     |
| time/              |          |
|    episodes        | 360      |
|    fps             | 180      |
|    time_elapsed    | 349      |
|    total_timesteps | 63015    |
| train/             |          |
|    actor_loss      | 486      |
|    critic_loss     | 127      |
|    ent_coef        | 0.185    |
|    ent_coef_loss   | -0.739   |
|    learning_rate   | 0.0003   |
|    n_updates       | 62014    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.28     |
| time/              |          |
|    episodes        | 364      |
|    fps             | 180      |
|    time_elapsed    | 352      |
|    total_timesteps | 63506    |
| train/             |          |
|    actor_loss      | 505      |
|    critic_loss     | 934      |
|    ent_coef        | 0.186    |
|    ent_coef_loss   | 1.61     |
|    learning_rate   | 0.0003   |
|    n_updates       | 62505    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.28     |
| time/              |          |
|    episodes        | 368      |
|    fps             | 180      |
|    time_elapsed    | 356      |
|    total_timesteps | 64306    |
| train/             |          |
|    actor_loss      | 482      |
|    critic_loss     | 210      |
|    ent_coef        | 0.188    |
|    ent_coef_loss   | 0.349    |
|    learning_rate   | 0.0003   |
|    n_updates       | 63305    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.29     |
| time/              |          |
|    episodes        | 372      |
|    fps             | 180      |
|    time_elapsed    | 360      |
|    total_timesteps | 64909    |
| train/             |          |
|    actor_loss      | 512      |
|    critic_loss     | 122      |
|    ent_coef        | 0.187    |
|    ent_coef_loss   | -0.0657  |
|    learning_rate   | 0.0003   |
|    n_updates       | 63908    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 90       |
|    mean_reward     | -105     |
|    success_rate    | 0.6      |
| time/              |          |
|    total_timesteps | 65000    |
| train/             |          |
|    actor_loss      | 499      |
|    critic_loss     | 243      |
|    ent_coef        | 0.186    |
|    ent_coef_loss   | -0.0536  |
|    learning_rate   | 0.0003   |
|    n_updates       | 63999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.28     |
| time/              |          |
|    episodes        | 376      |
|    fps             | 179      |
|    time_elapsed    | 365      |
|    total_timesteps | 65709    |
| train/             |          |
|    actor_loss      | 501      |
|    critic_loss     | 243      |
|    ent_coef        | 0.186    |
|    ent_coef_loss   | 1.09     |
|    learning_rate   | 0.0003   |
|    n_updates       | 64708    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.28     |
| time/              |          |
|    episodes        | 380      |
|    fps             | 179      |
|    time_elapsed    | 369      |
|    total_timesteps | 66509    |
| train/             |          |
|    actor_loss      | 475      |
|    critic_loss     | 85.1     |
|    ent_coef        | 0.187    |
|    ent_coef_loss   | 0.167    |
|    learning_rate   | 0.0003   |
|    n_updates       | 65508    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.31     |
| time/              |          |
|    episodes        | 384      |
|    fps             | 179      |
|    time_elapsed    | 373      |
|    total_timesteps | 67115    |
| train/             |          |
|    actor_loss      | 487      |
|    critic_loss     | 188      |
|    ent_coef        | 0.187    |
|    ent_coef_loss   | 0.349    |
|    learning_rate   | 0.0003   |
|    n_updates       | 66114    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.31     |
| time/              |          |
|    episodes        | 388      |
|    fps             | 179      |
|    time_elapsed    | 377      |
|    total_timesteps | 67749    |
| train/             |          |
|    actor_loss      | 506      |
|    critic_loss     | 403      |
|    ent_coef        | 0.184    |
|    ent_coef_loss   | -0.563   |
|    learning_rate   | 0.0003   |
|    n_updates       | 66748    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.31     |
| time/              |          |
|    episodes        | 392      |
|    fps             | 179      |
|    time_elapsed    | 379      |
|    total_timesteps | 68213    |
| train/             |          |
|    actor_loss      | 509      |
|    critic_loss     | 317      |
|    ent_coef        | 0.183    |
|    ent_coef_loss   | -0.144   |
|    learning_rate   | 0.0003   |
|    n_updates       | 67212    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.3      |
| time/              |          |
|    episodes        | 396      |
|    fps             | 179      |
|    time_elapsed    | 383      |
|    total_timesteps | 68817    |
| train/             |          |
|    actor_loss      | 472      |
|    critic_loss     | 99.5     |
|    ent_coef        | 0.184    |
|    ent_coef_loss   | 0.3      |
|    learning_rate   | 0.0003   |
|    n_updates       | 67816    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.3      |
| time/              |          |
|    episodes        | 400      |
|    fps             | 179      |
|    time_elapsed    | 386      |
|    total_timesteps | 69447    |
| train/             |          |
|    actor_loss      | 501      |
|    critic_loss     | 248      |
|    ent_coef        | 0.181    |
|    ent_coef_loss   | -0.0332  |
|    learning_rate   | 0.0003   |
|    n_updates       | 68446    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 161      |
|    mean_reward     | -472     |
|    success_rate    | 0.2      |
| time/              |          |
|    total_timesteps | 70000    |
| train/             |          |
|    actor_loss      | 455      |
|    critic_loss     | 77.7     |
|    ent_coef        | 0.184    |
|    ent_coef_loss   | 0.299    |
|    learning_rate   | 0.0003   |
|    n_updates       | 68999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.31     |
| time/              |          |
|    episodes        | 404      |
|    fps             | 179      |
|    time_elapsed    | 390      |
|    total_timesteps | 70084    |
| train/             |          |
|    actor_loss      | 504      |
|    critic_loss     | 83.7     |
|    ent_coef        | 0.184    |
|    ent_coef_loss   | -0.143   |
|    learning_rate   | 0.0003   |
|    n_updates       | 69083    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.29     |
| time/              |          |
|    episodes        | 408      |
|    fps             | 179      |
|    time_elapsed    | 394      |
|    total_timesteps | 70884    |
| train/             |          |
|    actor_loss      | 451      |
|    critic_loss     | 84.8     |
|    ent_coef        | 0.182    |
|    ent_coef_loss   | 0.614    |
|    learning_rate   | 0.0003   |
|    n_updates       | 69883    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.28     |
| time/              |          |
|    episodes        | 412      |
|    fps             | 179      |
|    time_elapsed    | 398      |
|    total_timesteps | 71510    |
| train/             |          |
|    actor_loss      | 502      |
|    critic_loss     | 892      |
|    ent_coef        | 0.181    |
|    ent_coef_loss   | -0.592   |
|    learning_rate   | 0.0003   |
|    n_updates       | 70509    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.28     |
| time/              |          |
|    episodes        | 416      |
|    fps             | 179      |
|    time_elapsed    | 401      |
|    total_timesteps | 72045    |
| train/             |          |
|    actor_loss      | 480      |
|    critic_loss     | 240      |
|    ent_coef        | 0.182    |
|    ent_coef_loss   | -0.347   |
|    learning_rate   | 0.0003   |
|    n_updates       | 71044    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.29     |
| time/              |          |
|    episodes        | 420      |
|    fps             | 179      |
|    time_elapsed    | 404      |
|    total_timesteps | 72589    |
| train/             |          |
|    actor_loss      | 480      |
|    critic_loss     | 95.8     |
|    ent_coef        | 0.175    |
|    ent_coef_loss   | 0.38     |
|    learning_rate   | 0.0003   |
|    n_updates       | 71588    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.28     |
| time/              |          |
|    episodes        | 424      |
|    fps             | 179      |
|    time_elapsed    | 409      |
|    total_timesteps | 73389    |
| train/             |          |
|    actor_loss      | 475      |
|    critic_loss     | 201      |
|    ent_coef        | 0.181    |
|    ent_coef_loss   | -0.0332  |
|    learning_rate   | 0.0003   |
|    n_updates       | 72388    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.27     |
| time/              |          |
|    episodes        | 428      |
|    fps             | 179      |
|    time_elapsed    | 414      |
|    total_timesteps | 74189    |
| train/             |          |
|    actor_loss      | 485      |
|    critic_loss     | 194      |
|    ent_coef        | 0.182    |
|    ent_coef_loss   | 0.11     |
|    learning_rate   | 0.0003   |
|    n_updates       | 73188    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.28     |
| time/              |          |
|    episodes        | 432      |
|    fps             | 178      |
|    time_elapsed    | 417      |
|    total_timesteps | 74683    |
| train/             |          |
|    actor_loss      | 484      |
|    critic_loss     | 291      |
|    ent_coef        | 0.176    |
|    ent_coef_loss   | 0.321    |
|    learning_rate   | 0.0003   |
|    n_updates       | 73682    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -357     |
|    success_rate    | 0        |
| time/              |          |
|    total_timesteps | 75000    |
| train/             |          |
|    actor_loss      | 463      |
|    critic_loss     | 227      |
|    ent_coef        | 0.179    |
|    ent_coef_loss   | 0.731    |
|    learning_rate   | 0.0003   |
|    n_updates       | 73999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.28     |
| time/              |          |
|    episodes        | 436      |
|    fps             | 178      |
|    time_elapsed    | 422      |
|    total_timesteps | 75395    |
| train/             |          |
|    actor_loss      | 450      |
|    critic_loss     | 65.6     |
|    ent_coef        | 0.175    |
|    ent_coef_loss   | 0.711    |
|    learning_rate   | 0.0003   |
|    n_updates       | 74394    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.29     |
| time/              |          |
|    episodes        | 440      |
|    fps             | 178      |
|    time_elapsed    | 426      |
|    total_timesteps | 76055    |
| train/             |          |
|    actor_loss      | 460      |
|    critic_loss     | 124      |
|    ent_coef        | 0.177    |
|    ent_coef_loss   | -0.331   |
|    learning_rate   | 0.0003   |
|    n_updates       | 75054    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.31     |
| time/              |          |
|    episodes        | 444      |
|    fps             | 178      |
|    time_elapsed    | 428      |
|    total_timesteps | 76485    |
| train/             |          |
|    actor_loss      | 465      |
|    critic_loss     | 188      |
|    ent_coef        | 0.179    |
|    ent_coef_loss   | -0.177   |
|    learning_rate   | 0.0003   |
|    n_updates       | 75484    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.3      |
| time/              |          |
|    episodes        | 448      |
|    fps             | 177      |
|    time_elapsed    | 434      |
|    total_timesteps | 77285    |
| train/             |          |
|    actor_loss      | 466      |
|    critic_loss     | 820      |
|    ent_coef        | 0.18     |
|    ent_coef_loss   | 0.459    |
|    learning_rate   | 0.0003   |
|    n_updates       | 76284    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.3      |
| time/              |          |
|    episodes        | 452      |
|    fps             | 177      |
|    time_elapsed    | 437      |
|    total_timesteps | 77906    |
| train/             |          |
|    actor_loss      | 484      |
|    critic_loss     | 273      |
|    ent_coef        | 0.18     |
|    ent_coef_loss   | -0.148   |
|    learning_rate   | 0.0003   |
|    n_updates       | 76905    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.3      |
| time/              |          |
|    episodes        | 456      |
|    fps             | 177      |
|    time_elapsed    | 441      |
|    total_timesteps | 78509    |
| train/             |          |
|    actor_loss      | 476      |
|    critic_loss     | 824      |
|    ent_coef        | 0.174    |
|    ent_coef_loss   | -0.35    |
|    learning_rate   | 0.0003   |
|    n_updates       | 77508    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.32     |
| time/              |          |
|    episodes        | 460      |
|    fps             | 177      |
|    time_elapsed    | 443      |
|    total_timesteps | 78917    |
| train/             |          |
|    actor_loss      | 448      |
|    critic_loss     | 80.7     |
|    ent_coef        | 0.175    |
|    ent_coef_loss   | -0.56    |
|    learning_rate   | 0.0003   |
|    n_updates       | 77916    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.3      |
| time/              |          |
|    episodes        | 464      |
|    fps             | 177      |
|    time_elapsed    | 447      |
|    total_timesteps | 79537    |
| train/             |          |
|    actor_loss      | 432      |
|    critic_loss     | 245      |
|    ent_coef        | 0.177    |
|    ent_coef_loss   | -0.366   |
|    learning_rate   | 0.0003   |
|    n_updates       | 78536    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -508     |
|    success_rate    | 0        |
| time/              |          |
|    total_timesteps | 80000    |
| train/             |          |
|    actor_loss      | 458      |
|    critic_loss     | 858      |
|    ent_coef        | 0.179    |
|    ent_coef_loss   | -0.62    |
|    learning_rate   | 0.0003   |
|    n_updates       | 78999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.32     |
| time/              |          |
|    episodes        | 468      |
|    fps             | 177      |
|    time_elapsed    | 451      |
|    total_timesteps | 80113    |
| train/             |          |
|    actor_loss      | 480      |
|    critic_loss     | 119      |
|    ent_coef        | 0.18     |
|    ent_coef_loss   | -1.05    |
|    learning_rate   | 0.0003   |
|    n_updates       | 79112    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.32     |
| time/              |          |
|    episodes        | 472      |
|    fps             | 177      |
|    time_elapsed    | 455      |
|    total_timesteps | 80855    |
| train/             |          |
|    actor_loss      | 429      |
|    critic_loss     | 880      |
|    ent_coef        | 0.175    |
|    ent_coef_loss   | -0.651   |
|    learning_rate   | 0.0003   |
|    n_updates       | 79854    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.35     |
| time/              |          |
|    episodes        | 476      |
|    fps             | 177      |
|    time_elapsed    | 457      |
|    total_timesteps | 81272    |
| train/             |          |
|    actor_loss      | 440      |
|    critic_loss     | 76.1     |
|    ent_coef        | 0.176    |
|    ent_coef_loss   | -0.127   |
|    learning_rate   | 0.0003   |
|    n_updates       | 80271    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.36     |
| time/              |          |
|    episodes        | 480      |
|    fps             | 177      |
|    time_elapsed    | 461      |
|    total_timesteps | 81889    |
| train/             |          |
|    actor_loss      | 419      |
|    critic_loss     | 86.9     |
|    ent_coef        | 0.177    |
|    ent_coef_loss   | 0.087    |
|    learning_rate   | 0.0003   |
|    n_updates       | 80888    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.34     |
| time/              |          |
|    episodes        | 484      |
|    fps             | 177      |
|    time_elapsed    | 464      |
|    total_timesteps | 82553    |
| train/             |          |
|    actor_loss      | 444      |
|    critic_loss     | 75.9     |
|    ent_coef        | 0.179    |
|    ent_coef_loss   | -0.818   |
|    learning_rate   | 0.0003   |
|    n_updates       | 81552    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.36     |
| time/              |          |
|    episodes        | 488      |
|    fps             | 177      |
|    time_elapsed    | 467      |
|    total_timesteps | 83027    |
| train/             |          |
|    actor_loss      | 426      |
|    critic_loss     | 873      |
|    ent_coef        | 0.177    |
|    ent_coef_loss   | 0.32     |
|    learning_rate   | 0.0003   |
|    n_updates       | 82026    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.35     |
| time/              |          |
|    episodes        | 492      |
|    fps             | 177      |
|    time_elapsed    | 470      |
|    total_timesteps | 83632    |
| train/             |          |
|    actor_loss      | 448      |
|    critic_loss     | 98.3     |
|    ent_coef        | 0.174    |
|    ent_coef_loss   | 0.431    |
|    learning_rate   | 0.0003   |
|    n_updates       | 82631    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.36     |
| time/              |          |
|    episodes        | 496      |
|    fps             | 177      |
|    time_elapsed    | 474      |
|    total_timesteps | 84333    |
| train/             |          |
|    actor_loss      | 402      |
|    critic_loss     | 716      |
|    ent_coef        | 0.176    |
|    ent_coef_loss   | -0.0989  |
|    learning_rate   | 0.0003   |
|    n_updates       | 83332    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.37     |
| time/              |          |
|    episodes        | 500      |
|    fps             | 177      |
|    time_elapsed    | 478      |
|    total_timesteps | 84920    |
| train/             |          |
|    actor_loss      | 426      |
|    critic_loss     | 94.4     |
|    ent_coef        | 0.187    |
|    ent_coef_loss   | 0.257    |
|    learning_rate   | 0.0003   |
|    n_updates       | 83919    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -458     |
|    success_rate    | 0        |
| time/              |          |
|    total_timesteps | 85000    |
| train/             |          |
|    actor_loss      | 409      |
|    critic_loss     | 1.5e+03  |
|    ent_coef        | 0.187    |
|    ent_coef_loss   | 0.272    |
|    learning_rate   | 0.0003   |
|    n_updates       | 83999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.37     |
| time/              |          |
|    episodes        | 504      |
|    fps             | 176      |
|    time_elapsed    | 483      |
|    total_timesteps | 85537    |
| train/             |          |
|    actor_loss      | 421      |
|    critic_loss     | 65.8     |
|    ent_coef        | 0.191    |
|    ent_coef_loss   | 0.443    |
|    learning_rate   | 0.0003   |
|    n_updates       | 84536    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.4      |
| time/              |          |
|    episodes        | 508      |
|    fps             | 176      |
|    time_elapsed    | 484      |
|    total_timesteps | 85773    |
| train/             |          |
|    actor_loss      | 407      |
|    critic_loss     | 56.1     |
|    ent_coef        | 0.188    |
|    ent_coef_loss   | 0.777    |
|    learning_rate   | 0.0003   |
|    n_updates       | 84772    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.4      |
| time/              |          |
|    episodes        | 512      |
|    fps             | 176      |
|    time_elapsed    | 488      |
|    total_timesteps | 86384    |
| train/             |          |
|    actor_loss      | 421      |
|    critic_loss     | 1.34e+03 |
|    ent_coef        | 0.191    |
|    ent_coef_loss   | 0.13     |
|    learning_rate   | 0.0003   |
|    n_updates       | 85383    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.38     |
| time/              |          |
|    episodes        | 516      |
|    fps             | 176      |
|    time_elapsed    | 493      |
|    total_timesteps | 87184    |
| train/             |          |
|    actor_loss      | 402      |
|    critic_loss     | 76       |
|    ent_coef        | 0.193    |
|    ent_coef_loss   | -0.423   |
|    learning_rate   | 0.0003   |
|    n_updates       | 86183    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.38     |
| time/              |          |
|    episodes        | 520      |
|    fps             | 176      |
|    time_elapsed    | 496      |
|    total_timesteps | 87735    |
| train/             |          |
|    actor_loss      | 443      |
|    critic_loss     | 87.3     |
|    ent_coef        | 0.188    |
|    ent_coef_loss   | 0.523    |
|    learning_rate   | 0.0003   |
|    n_updates       | 86734    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.39     |
| time/              |          |
|    episodes        | 524      |
|    fps             | 176      |
|    time_elapsed    | 500      |
|    total_timesteps | 88341    |
| train/             |          |
|    actor_loss      | 425      |
|    critic_loss     | 56.7     |
|    ent_coef        | 0.186    |
|    ent_coef_loss   | 0.272    |
|    learning_rate   | 0.0003   |
|    n_updates       | 87340    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.41     |
| time/              |          |
|    episodes        | 528      |
|    fps             | 176      |
|    time_elapsed    | 504      |
|    total_timesteps | 89008    |
| train/             |          |
|    actor_loss      | 403      |
|    critic_loss     | 51.1     |
|    ent_coef        | 0.187    |
|    ent_coef_loss   | 0.436    |
|    learning_rate   | 0.0003   |
|    n_updates       | 88007    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.4      |
| time/              |          |
|    episodes        | 532      |
|    fps             | 176      |
|    time_elapsed    | 508      |
|    total_timesteps | 89735    |
| train/             |          |
|    actor_loss      | 394      |
|    critic_loss     | 63.4     |
|    ent_coef        | 0.182    |
|    ent_coef_loss   | 0.0786   |
|    learning_rate   | 0.0003   |
|    n_updates       | 88734    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 165      |
|    mean_reward     | -399     |
|    success_rate    | 0.2      |
| time/              |          |
|    total_timesteps | 90000    |
| train/             |          |
|    actor_loss      | 399      |
|    critic_loss     | 109      |
|    ent_coef        | 0.18     |
|    ent_coef_loss   | -0.373   |
|    learning_rate   | 0.0003   |
|    n_updates       | 88999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.39     |
| time/              |          |
|    episodes        | 536      |
|    fps             | 176      |
|    time_elapsed    | 513      |
|    total_timesteps | 90387    |
| train/             |          |
|    actor_loss      | 386      |
|    critic_loss     | 98.2     |
|    ent_coef        | 0.179    |
|    ent_coef_loss   | -0.219   |
|    learning_rate   | 0.0003   |
|    n_updates       | 89386    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.4      |
| time/              |          |
|    episodes        | 540      |
|    fps             | 176      |
|    time_elapsed    | 517      |
|    total_timesteps | 91096    |
| train/             |          |
|    actor_loss      | 390      |
|    critic_loss     | 73.4     |
|    ent_coef        | 0.182    |
|    ent_coef_loss   | -0.578   |
|    learning_rate   | 0.0003   |
|    n_updates       | 90095    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.38     |
| time/              |          |
|    episodes        | 544      |
|    fps             | 176      |
|    time_elapsed    | 521      |
|    total_timesteps | 91873    |
| train/             |          |
|    actor_loss      | 384      |
|    critic_loss     | 730      |
|    ent_coef        | 0.181    |
|    ent_coef_loss   | -0.56    |
|    learning_rate   | 0.0003   |
|    n_updates       | 90872    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.39     |
| time/              |          |
|    episodes        | 548      |
|    fps             | 175      |
|    time_elapsed    | 525      |
|    total_timesteps | 92478    |
| train/             |          |
|    actor_loss      | 385      |
|    critic_loss     | 1.38e+03 |
|    ent_coef        | 0.173    |
|    ent_coef_loss   | -1.07    |
|    learning_rate   | 0.0003   |
|    n_updates       | 91477    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.39     |
| time/              |          |
|    episodes        | 552      |
|    fps             | 175      |
|    time_elapsed    | 529      |
|    total_timesteps | 93117    |
| train/             |          |
|    actor_loss      | 396      |
|    critic_loss     | 104      |
|    ent_coef        | 0.169    |
|    ent_coef_loss   | -0.144   |
|    learning_rate   | 0.0003   |
|    n_updates       | 92116    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.39     |
| time/              |          |
|    episodes        | 556      |
|    fps             | 175      |
|    time_elapsed    | 532      |
|    total_timesteps | 93659    |
| train/             |          |
|    actor_loss      | 378      |
|    critic_loss     | 625      |
|    ent_coef        | 0.168    |
|    ent_coef_loss   | -0.24    |
|    learning_rate   | 0.0003   |
|    n_updates       | 92658    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.38     |
| time/              |          |
|    episodes        | 560      |
|    fps             | 175      |
|    time_elapsed    | 535      |
|    total_timesteps | 94224    |
| train/             |          |
|    actor_loss      | 371      |
|    critic_loss     | 57.5     |
|    ent_coef        | 0.172    |
|    ent_coef_loss   | -1.07    |
|    learning_rate   | 0.0003   |
|    n_updates       | 93223    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.38     |
| time/              |          |
|    episodes        | 564      |
|    fps             | 175      |
|    time_elapsed    | 539      |
|    total_timesteps | 94848    |
| train/             |          |
|    actor_loss      | 389      |
|    critic_loss     | 53.3     |
|    ent_coef        | 0.176    |
|    ent_coef_loss   | -0.365   |
|    learning_rate   | 0.0003   |
|    n_updates       | 93847    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -370     |
|    success_rate    | 0        |
| time/              |          |
|    total_timesteps | 95000    |
| train/             |          |
|    actor_loss      | 415      |
|    critic_loss     | 66       |
|    ent_coef        | 0.178    |
|    ent_coef_loss   | -0.0506  |
|    learning_rate   | 0.0003   |
|    n_updates       | 93999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.39     |
| time/              |          |
|    episodes        | 568      |
|    fps             | 175      |
|    time_elapsed    | 543      |
|    total_timesteps | 95368    |
| train/             |          |
|    actor_loss      | 392      |
|    critic_loss     | 609      |
|    ent_coef        | 0.181    |
|    ent_coef_loss   | 0.233    |
|    learning_rate   | 0.0003   |
|    n_updates       | 94367    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.41     |
| time/              |          |
|    episodes        | 572      |
|    fps             | 175      |
|    time_elapsed    | 546      |
|    total_timesteps | 95829    |
| train/             |          |
|    actor_loss      | 394      |
|    critic_loss     | 52.9     |
|    ent_coef        | 0.175    |
|    ent_coef_loss   | 0.367    |
|    learning_rate   | 0.0003   |
|    n_updates       | 94828    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.4      |
| time/              |          |
|    episodes        | 576      |
|    fps             | 175      |
|    time_elapsed    | 549      |
|    total_timesteps | 96395    |
| train/             |          |
|    actor_loss      | 376      |
|    critic_loss     | 583      |
|    ent_coef        | 0.171    |
|    ent_coef_loss   | -0.726   |
|    learning_rate   | 0.0003   |
|    n_updates       | 95394    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.39     |
| time/              |          |
|    episodes        | 580      |
|    fps             | 175      |
|    time_elapsed    | 554      |
|    total_timesteps | 97195    |
| train/             |          |
|    actor_loss      | 385      |
|    critic_loss     | 47.7     |
|    ent_coef        | 0.165    |
|    ent_coef_loss   | 1.01     |
|    learning_rate   | 0.0003   |
|    n_updates       | 96194    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.38     |
| time/              |          |
|    episodes        | 584      |
|    fps             | 175      |
|    time_elapsed    | 558      |
|    total_timesteps | 97995    |
| train/             |          |
|    actor_loss      | 368      |
|    critic_loss     | 83.6     |
|    ent_coef        | 0.167    |
|    ent_coef_loss   | 1        |
|    learning_rate   | 0.0003   |
|    n_updates       | 96994    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.37     |
| time/              |          |
|    episodes        | 588      |
|    fps             | 175      |
|    time_elapsed    | 561      |
|    total_timesteps | 98568    |
| train/             |          |
|    actor_loss      | 433      |
|    critic_loss     | 503      |
|    ent_coef        | 0.165    |
|    ent_coef_loss   | 0.254    |
|    learning_rate   | 0.0003   |
|    n_updates       | 97567    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.37     |
| time/              |          |
|    episodes        | 592      |
|    fps             | 175      |
|    time_elapsed    | 566      |
|    total_timesteps | 99225    |
| train/             |          |
|    actor_loss      | 378      |
|    critic_loss     | 50.6     |
|    ent_coef        | 0.164    |
|    ent_coef_loss   | -0.104   |
|    learning_rate   | 0.0003   |
|    n_updates       | 98224    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.38     |
| time/              |          |
|    episodes        | 596      |
|    fps             | 175      |
|    time_elapsed    | 567      |
|    total_timesteps | 99472    |
| train/             |          |
|    actor_loss      | 405      |
|    critic_loss     | 31       |
|    ent_coef        | 0.165    |
|    ent_coef_loss   | 0.539    |
|    learning_rate   | 0.0003   |
|    n_updates       | 98471    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.38     |
| time/              |          |
|    episodes        | 600      |
|    fps             | 175      |
|    time_elapsed    | 570      |
|    total_timesteps | 99917    |
| train/             |          |
|    actor_loss      | 351      |
|    critic_loss     | 601      |
|    ent_coef        | 0.164    |
|    ent_coef_loss   | 0.157    |
|    learning_rate   | 0.0003   |
|    n_updates       | 98916    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 200      |
|    mean_reward     | -497     |
|    success_rate    | 0        |
| time/              |          |
|    total_timesteps | 100000   |
| train/             |          |
|    actor_loss      | 389      |
|    critic_loss     | 83.3     |
|    ent_coef        | 0.164    |
|    ent_coef_loss   | 0.998    |
|    learning_rate   | 0.0003   |
|    n_updates       | 98999    |
---------------------------------



✓ Training complete! Model saved to ./logs/cube_pointing_final


In [4]:
# Test trained model
print("\n" + "="*60)
print("Testing trained model...")
print("="*60)

test_env = Lite6CubePointingEnv(
    model_path=str(model_path),
    render_mode="human",  # Render to see the trained agent
    max_steps=200,
    success_threshold=0.05,
    reward_scale=10.0,
    cube_placement_radius=0.3,
    ee_site_name="end_effector"
)

for episode in range(3):
    obs, info = test_env.reset()
    print(f"\nEpisode {episode+1} - Initial distance: {info['distance']:.4f} m")

    episode_reward = 0
    for step in range(200):
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, terminated, truncated, info = test_env.step(action)
        episode_reward += reward

        if terminated or truncated:
            print(f"  Finished in {step+1} steps")
            print(f"  Final distance: {info['distance']:.4f} m")
            print(f"  Success: {info['is_success']}")
            print(f"  Total reward: {episode_reward:.2f}")
            break

test_env.close()


Testing trained model...
✓ Added 1 cubes to scene within 0.3m radius
IK solver using site: end_effector
⚠ Viewer could not be launched: `launch_passive` requires that the Python script be run under `mjpython` on macOS
  Running in headless mode
  Tip: Use render_mode='offscreen' for notebook visualization

Episode 1 - Initial distance: 0.1615 m
  Finished in 200 steps
  Final distance: 0.2346 m
  Success: False
  Total reward: -470.67

Episode 2 - Initial distance: 0.1990 m
  Finished in 200 steps
  Final distance: 0.1921 m
  Success: False
  Total reward: -447.75

Episode 3 - Initial distance: 0.2178 m
  Finished in 200 steps
  Final distance: 0.3671 m
  Success: False
  Total reward: -594.53


In [5]:
env.close()

In [6]:
# Load and test the trained model in MuJoCo viewer
import time
from stable_baselines3 import SAC

print("="*60)
print("Testing Trained Model in MuJoCo Viewer")
print("="*60)

# Load the best model (or final model if you prefer)
try:
    model = SAC.load("./logs/best_model/best_model")
    print("✓ Loaded best model from evaluation")
except:
    model = SAC.load("./logs/cube_pointing_final")
    print("✓ Loaded final model")

# Create environment with rendering enabled
test_env = Lite6CubePointingEnv(
    model_path=str(model_path),
    render_mode="human",  # This will open MuJoCo viewer
    max_steps=200,
    success_threshold=0.05,
    reward_scale=10.0,
    cube_placement_radius=0.3,
    ee_site_name="end_effector"
)

print("\nRunning 5 test episodes...")
print("(MuJoCo viewer window should open)\n")

success_count = 0
total_rewards = []

for episode in range(5):
    obs, info = test_env.reset()
    print(f"\nEpisode {episode+1}:")
    print(f"  Cube position: {info['cube_pos']}")
    print(f"  Initial distance: {info['distance']:.4f} m")
    
    episode_reward = 0
    done = False
    
    for step in range(200):
        # Get action from trained model
        action, _states = model.predict(obs, deterministic=True)
        
        # Step environment
        obs, reward, terminated, truncated, info = test_env.step(action)
        episode_reward += reward
        
        # Small delay so we can watch the movement
        time.sleep(0.02)
        
        if terminated or truncated:
            done = True
            break
    
    # Print episode results
    print(f"  Steps taken: {step+1}")
    print(f"  Final distance: {info['distance']:.4f} m")
    print(f"  Success: {info['is_success']}")
    print(f"  Total reward: {episode_reward:.2f}")
    
    if info['is_success']:
        success_count += 1
    total_rewards.append(episode_reward)

test_env.close()

print("\n" + "="*60)
print("Test Summary:")
print("="*60)
print(f"Success rate: {success_count}/5 ({success_count/5*100:.1f}%)")
print(f"Average reward: {np.mean(total_rewards):.2f} ± {np.std(total_rewards):.2f}")
print("="*60)

Testing Trained Model in MuJoCo Viewer
✓ Loaded best model from evaluation
✓ Added 1 cubes to scene within 0.3m radius
IK solver using site: end_effector

Running 5 test episodes...
(MuJoCo viewer window should open)

⚠ Viewer could not be launched: `launch_passive` requires that the Python script be run under `mjpython` on macOS
  Running in headless mode
  Tip: Use render_mode='offscreen' for notebook visualization

Episode 1:
  Cube position: [0.08102471 0.18027519 0.03      ]
  Initial distance: 0.1853 m
  Steps taken: 200
  Final distance: 0.1637 m
  Success: False
  Total reward: -306.67

Episode 2:
  Cube position: [0.15893276 0.0362336  0.03      ]
  Initial distance: 0.0911 m
  Steps taken: 200
  Final distance: 0.0760 m
  Success: False
  Total reward: -351.45

Episode 3:
  Cube position: [ 0.25480012 -0.01995478  0.03      ]
  Initial distance: 0.1742 m
  Steps taken: 200
  Final distance: 0.2640 m
  Success: False
  Total reward: -468.18

Episode 4:
  Cube position: [-0.157