# Cube Stacking with RL

This notebook trains a robot to stack one cube on top of another using reinforcement learning (SAC algorithm).

**Goal**: Learn to stack cube1 on top of cube2, minimizing vertical distance and maximizing horizontal alignment.

In [None]:
import os
from pathlib import Path
import numpy as np

from applied_planning.envs import Lite6CubeStackingEnv

BASE_PATH = "/Users/braeden/Development/applied-planning/"
model_path = BASE_PATH + "src/applied_planning/sim/assets/ufactory_lite6/lite6_gripper_narrow.xml"

print("Creating cube stacking environment...")
env = Lite6CubeStackingEnv(
    model_path=str(model_path),
    render_mode=None,               # Set to "human" to watch training (will be slow!)
    max_steps=300,                  # More steps for complex stacking task
    success_threshold=0.08,         # 8cm vertical alignment
    horizontal_threshold=0.05,      # 5cm horizontal alignment
    reward_scale=10.0,
    cube_placement_radius=0.3,
    ee_site_name="end_effector",
    collision_penalty=100.0,
    terminate_on_collision=False
)

print(f"Observation space: {env.observation_space}")    # 16D: cube1(3) + cube2(3) + joints(6) + ee(3) + gripper(1)
print(f"Action space: {env.action_space}")              # 7D: joints(6) + gripper(1)

Creating cube stacking environment...
✓ Added 2 cubes to scene within 0.3m radius
IK solver using site: end_effector
Offscreen renderer initialized (notebook-friendly mode)
Observation space: Box(-inf, inf, (16,), float32)
Action space: Box(-1.0, 1.0, (7,), float32)


In [None]:
# test env w/ random actions
obs, info = env.reset(seed=42)
print(f"Initial observation shape: {obs.shape}")
print(f"Cube 1 position: {info['cube1_pos']}")
print(f"Cube 2 position: {info['cube2_pos']}")
print(f"Vertical distance: {info['vertical_distance']:.4f} m")
print(f"Horizontal distance: {info['horizontal_distance']:.4f} m")

print("\nRunning random actions...")
for step in range(10):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    print(f"Step {step+1}: v_dist={info['vertical_distance']:.4f} m, "
          f"h_dist={info['horizontal_distance']:.4f} m, "
          f"reward={reward:.2f}, collision={info['has_collision']}")
    if terminated or truncated:
        print(f"  Episode ended: success={info['is_success']}")
        break

Initial observation shape: (16,)
Cube 1 position: [-0.20456831  0.20575395  0.03      ]
Cube 2 position: [-0.02480644 -0.21832696  0.03      ]
Vertical distance: 0.0000 m
Horizontal distance: 0.4606 m

Running random actions...
Step 1: v_dist=0.0000 m, h_dist=0.4606 m, reward=-100.00, collision=True
Step 2: v_dist=0.0000 m, h_dist=0.4606 m, reward=0.00, collision=False
Step 3: v_dist=0.0000 m, h_dist=0.4606 m, reward=0.00, collision=False
Step 4: v_dist=0.0000 m, h_dist=0.4606 m, reward=-100.00, collision=True
Step 5: v_dist=0.0000 m, h_dist=0.4606 m, reward=0.00, collision=False
Step 6: v_dist=0.0000 m, h_dist=0.4606 m, reward=-100.00, collision=True
Step 7: v_dist=0.0000 m, h_dist=0.4606 m, reward=-100.00, collision=True
Step 8: v_dist=0.0000 m, h_dist=0.4606 m, reward=-100.00, collision=True
Step 9: v_dist=0.0000 m, h_dist=0.4606 m, reward=0.00, collision=False
Step 10: v_dist=0.0000 m, h_dist=0.4606 m, reward=-100.00, collision=True


## Train RL Agent

We use SAC (Soft Actor-Critic) which works well for continuous control tasks like cube stacking.

In [None]:
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback

# make environment for stable-baselines3
def make_env():
    return Lite6CubeStackingEnv(
        model_path=str(model_path),
        render_mode=None,
        max_steps=300,
        success_threshold=0.08,
        horizontal_threshold=0.05,
        reward_scale=10.0,
        cube_placement_radius=0.3,
        ee_site_name="end_effector",
        collision_penalty=100.0,
        terminate_on_collision=False
    )

# create vectorized environments
train_env = DummyVecEnv([make_env])
eval_env = DummyVecEnv([make_env])

# setup callbacks
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path="./logs/cube_stacking/best_model",
    log_path="./logs/cube_stacking/eval",
    eval_freq=5000,
    deterministic=True,
    render=False
)

checkpoint_callback = CheckpointCallback(
    save_freq=10000,
    save_path="./logs/cube_stacking/checkpoints",
    name_prefix="cube_stacking_model"
)

# soft actor critic model
print("\nCreating SAC model...")
model = SAC(
    "MlpPolicy",
    train_env,
    verbose=1,
    learning_rate=3e-4,
    buffer_size=100000,
    learning_starts=1000,
    batch_size=256,
    tau=0.005,
    gamma=0.99,
    train_freq=1,
    gradient_steps=1,
    tensorboard_log="./logs/cube_stacking/tensorboard"
)

print("\n" + "="*60)
print("Training Configuration:")
print("="*60)
print(f"Algorithm: SAC")
print(f"Task: Cube Stacking (2 cubes)")
print(f"Total timesteps: 200,000")
print(f"Eval frequency: 5,000 steps")
print(f"Checkpoint frequency: 10,000 steps")
print(f"Success criteria: vertical_dist < 0.08m, horizontal_dist < 0.05m")
print("="*60)

✓ Added 2 cubes to scene within 0.3m radius
IK solver using site: end_effector
Offscreen renderer initialized (notebook-friendly mode)
✓ Added 2 cubes to scene within 0.3m radius
IK solver using site: end_effector
Offscreen renderer initialized (notebook-friendly mode)

Creating SAC model...
Using cpu device

Training Configuration:
Algorithm: SAC
Task: Cube Stacking (2 cubes)
Total timesteps: 200,000
Eval frequency: 5,000 steps
Checkpoint frequency: 10,000 steps
Success criteria: vertical_dist < 0.08m, horizontal_dist < 0.05m


In [None]:
print("\nStarting training...")
print("(This will take a while. Press Ctrl+C to stop early)\n")

try:
    model.learn(
        total_timesteps=200000,
        callback=[eval_callback, checkpoint_callback],
        progress_bar=True
    )

    # save final model
    model.save("./logs/cube_stacking/cube_stacking_final")
    print("\n✓ Training complete! Model saved to ./logs/cube_stacking/cube_stacking_final")

except KeyboardInterrupt:
    print("\n\nTraining interrupted by user")
    model.save("./logs/cube_stacking/cube_stacking_interrupted")
    print("Model saved to ./logs/cube_stacking/cube_stacking_interrupted")


Starting training...
(This will take a while. Press Ctrl+C to stop early)

Logging to ./logs/cube_stacking/tensorboard/SAC_1


Output()

---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 4        |
|    fps             | 682      |
|    time_elapsed    | 1        |
|    total_timesteps | 1200     |
| train/             |          |
|    actor_loss      | 12.8     |
|    critic_loss     | 3.4e+03  |
|    ent_coef        | 0.942    |
|    ent_coef_loss   | -0.666   |
|    learning_rate   | 0.0003   |
|    n_updates       | 199      |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 8        |
|    fps             | 263      |
|    time_elapsed    | 9        |
|    total_timesteps | 2400     |
| train/             |          |
|    actor_loss      | -29.3    |
|    critic_loss     | 1.46e+03 |
|    ent_coef        | 0.762    |
|    ent_coef_loss   | -0.398   |
|    learning_rate   | 0.0003   |
|    n_updates       | 1399     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 12       |
|    fps             | 228      |
|    time_elapsed    | 15       |
|    total_timesteps | 3600     |
| train/             |          |
|    actor_loss      | -212     |
|    critic_loss     | 964      |
|    ent_coef        | 0.682    |
|    ent_coef_loss   | -0.544   |
|    learning_rate   | 0.0003   |
|    n_updates       | 2599     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 16       |
|    fps             | 215      |
|    time_elapsed    | 22       |
|    total_timesteps | 4800     |
| train/             |          |
|    actor_loss      | -288     |
|    critic_loss     | 1.16e+03 |
|    ent_coef        | 0.564    |
|    ent_coef_loss   | -0.568   |
|    learning_rate   | 0.0003   |
|    n_updates       | 3799     |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 300      |
|    mean_reward     | 1.05e+04 |
|    success_rate    | 0        |
| time/              |          |
|    total_timesteps | 5000     |
| train/             |          |
|    actor_loss      | -304     |
|    critic_loss     | 870      |
|    ent_coef        | 0.546    |
|    ent_coef_loss   | -0.637   |
|    learning_rate   | 0.0003   |
|    n_updates       | 3999     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 20       |
|    fps             | 204      |
|    time_elapsed    | 29       |
|    total_timesteps | 6000     |
| train/             |          |
|    actor_loss      | -358     |
|    critic_loss     | 823      |
|    ent_coef        | 0.486    |
|    ent_coef_loss   | -0.0536  |
|    learning_rate   | 0.0003   |
|    n_updates       | 4999     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 24       |
|    fps             | 201      |
|    time_elapsed    | 35       |
|    total_timesteps | 7200     |
| train/             |          |
|    actor_loss      | -438     |
|    critic_loss     | 1.02e+03 |
|    ent_coef        | 0.474    |
|    ent_coef_loss   | 0.292    |
|    learning_rate   | 0.0003   |
|    n_updates       | 6199     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 28       |
|    fps             | 196      |
|    time_elapsed    | 42       |
|    total_timesteps | 8400     |
| train/             |          |
|    actor_loss      | -467     |
|    critic_loss     | 920      |
|    ent_coef        | 0.518    |
|    ent_coef_loss   | 0.0502   |
|    learning_rate   | 0.0003   |
|    n_updates       | 7399     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 32       |
|    fps             | 194      |
|    time_elapsed    | 49       |
|    total_timesteps | 9600     |
| train/             |          |
|    actor_loss      | -574     |
|    critic_loss     | 891      |
|    ent_coef        | 0.644    |
|    ent_coef_loss   | -0.0346  |
|    learning_rate   | 0.0003   |
|    n_updates       | 8599     |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 300      |
|    mean_reward     | 6e+03    |
|    success_rate    | 0        |
| time/              |          |
|    total_timesteps | 10000    |
| train/             |          |
|    actor_loss      | -647     |
|    critic_loss     | 999      |
|    ent_coef        | 0.694    |
|    ent_coef_loss   | -0.0124  |
|    learning_rate   | 0.0003   |
|    n_updates       | 8999     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 36       |
|    fps             | 191      |
|    time_elapsed    | 56       |
|    total_timesteps | 10800    |
| train/             |          |
|    actor_loss      | -651     |
|    critic_loss     | 1.05e+03 |
|    ent_coef        | 0.687    |
|    ent_coef_loss   | -0.131   |
|    learning_rate   | 0.0003   |
|    n_updates       | 9799     |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 40       |
|    fps             | 190      |
|    time_elapsed    | 62       |
|    total_timesteps | 12000    |
| train/             |          |
|    actor_loss      | -767     |
|    critic_loss     | 1.31e+03 |
|    ent_coef        | 0.683    |
|    ent_coef_loss   | 0.00622  |
|    learning_rate   | 0.0003   |
|    n_updates       | 10999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 44       |
|    fps             | 190      |
|    time_elapsed    | 69       |
|    total_timesteps | 13200    |
| train/             |          |
|    actor_loss      | -860     |
|    critic_loss     | 996      |
|    ent_coef        | 0.679    |
|    ent_coef_loss   | -0.0984  |
|    learning_rate   | 0.0003   |
|    n_updates       | 12199    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 48       |
|    fps             | 189      |
|    time_elapsed    | 76       |
|    total_timesteps | 14400    |
| train/             |          |
|    actor_loss      | -896     |
|    critic_loss     | 1.12e+03 |
|    ent_coef        | 0.667    |
|    ent_coef_loss   | -0.13    |
|    learning_rate   | 0.0003   |
|    n_updates       | 13399    |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 300      |
|    mean_reward     | 4.77e+03 |
|    success_rate    | 0        |
| time/              |          |
|    total_timesteps | 15000    |
| train/             |          |
|    actor_loss      | -888     |
|    critic_loss     | 1.05e+03 |
|    ent_coef        | 0.654    |
|    ent_coef_loss   | 0.129    |
|    learning_rate   | 0.0003   |
|    n_updates       | 13999    |
---------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 52       |
|    fps             | 186      |
|    time_elapsed    | 83       |
|    total_timesteps | 15600    |
| train/             |          |
|    actor_loss      | -988     |
|    critic_loss     | 1.17e+03 |
|    ent_coef        | 0.657    |
|    ent_coef_loss   | -0.102   |
|    learning_rate   | 0.0003   |
|    n_updates       | 14599    |
---------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 56        |
|    fps             | 185       |
|    time_elapsed    | 90        |
|    total_timesteps | 16800     |
| train/             |           |
|    actor_loss      | -1.08e+03 |
|    critic_loss     | 1.1e+03   |
|    ent_coef        | 0.684     |
|    ent_coef_loss   | 0.0267    |
|    learning_rate   | 0.0003    |
|    n_updates       | 15799     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 60        |
|    fps             | 184       |
|    time_elapsed    | 97        |
|    total_timesteps | 18000     |
| train/             |           |
|    actor_loss      | -1.02e+03 |
|    critic_loss     | 1.17e+03  |
|    ent_coef        | 0.687     |
|    ent_coef_loss   | -0.00125  |
|    learning_rate   | 0.0003    |
|    n_updates       | 16999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 64        |
|    fps             | 184       |
|    time_elapsed    | 103       |
|    total_timesteps | 19200     |
| train/             |           |
|    actor_loss      | -1.03e+03 |
|    critic_loss     | 1.1e+03   |
|    ent_coef        | 0.701     |
|    ent_coef_loss   | -0.0952   |
|    learning_rate   | 0.0003    |
|    n_updates       | 18199     |
----------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 240      |
|    mean_reward     | 4.61e+03 |
|    success_rate    | 0.2      |
| time/              |          |
|    total_timesteps | 20000    |
| train/             |          |
|    actor_loss      | -1.1e+03 |
|    critic_loss     | 1.1e+03  |
|    ent_coef        | 0.699    |
|    ent_coef_loss   | 0.00496  |
|    learning_rate   | 0.0003   |
|    n_updates       | 18999    |
---------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 68        |
|    fps             | 183       |
|    time_elapsed    | 110       |
|    total_timesteps | 20400     |
| train/             |           |
|    actor_loss      | -1.08e+03 |
|    critic_loss     | 986       |
|    ent_coef        | 0.685     |
|    ent_coef_loss   | -0.0753   |
|    learning_rate   | 0.0003    |
|    n_updates       | 19399     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 72        |
|    fps             | 184       |
|    time_elapsed    | 117       |
|    total_timesteps | 21600     |
| train/             |           |
|    actor_loss      | -1.07e+03 |
|    critic_loss     | 1.25e+03  |
|    ent_coef        | 0.729     |
|    ent_coef_loss   | 0.0792    |
|    learning_rate   | 0.0003    |
|    n_updates       | 20599     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 76        |
|    fps             | 184       |
|    time_elapsed    | 123       |
|    total_timesteps | 22800     |
| train/             |           |
|    actor_loss      | -1.14e+03 |
|    critic_loss     | 1.1e+03   |
|    ent_coef        | 0.821     |
|    ent_coef_loss   | 0.00374   |
|    learning_rate   | 0.0003    |
|    n_updates       | 21799     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 80        |
|    fps             | 184       |
|    time_elapsed    | 130       |
|    total_timesteps | 24000     |
| train/             |           |
|    actor_loss      | -1.15e+03 |
|    critic_loss     | 1.25e+03  |
|    ent_coef        | 0.809     |
|    ent_coef_loss   | -0.0376   |
|    learning_rate   | 0.0003    |
|    n_updates       | 22999     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 2.58e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 25000     |
| train/             |           |
|    actor_loss      | -1.21e+03 |
|    critic_loss     | 874       |
|    ent_coef        | 0.77      |
|    ent_coef_loss   | -0.0638   |
|    learning_rate   | 0.0003    |
|    n_updates       | 23999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 84        |
|    fps             | 183       |
|    time_elapsed    | 137       |
|    total_timesteps | 25200     |
| train/             |           |
|    actor_loss      | -1.23e+03 |
|    critic_loss     | 1.34e+03  |
|    ent_coef        | 0.768     |
|    ent_coef_loss   | 0.0719    |
|    learning_rate   | 0.0003    |
|    n_updates       | 24199     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 88        |
|    fps             | 183       |
|    time_elapsed    | 143       |
|    total_timesteps | 26400     |
| train/             |           |
|    actor_loss      | -1.26e+03 |
|    critic_loss     | 1.33e+03  |
|    ent_coef        | 0.799     |
|    ent_coef_loss   | 0.00481   |
|    learning_rate   | 0.0003    |
|    n_updates       | 25399     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 92        |
|    fps             | 184       |
|    time_elapsed    | 149       |
|    total_timesteps | 27600     |
| train/             |           |
|    actor_loss      | -1.26e+03 |
|    critic_loss     | 1.23e+03  |
|    ent_coef        | 0.828     |
|    ent_coef_loss   | -0.0743   |
|    learning_rate   | 0.0003    |
|    n_updates       | 26599     |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 96       |
|    fps             | 184      |
|    time_elapsed    | 156      |
|    total_timesteps | 28800    |
| train/             |          |
|    actor_loss      | -1.3e+03 |
|    critic_loss     | 1.21e+03 |
|    ent_coef        | 0.83     |
|    ent_coef_loss   | -0.00211 |
|    learning_rate   | 0.0003   |
|    n_updates       | 27799    |
---------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 3.51e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 30000     |
| train/             |           |
|    actor_loss      | -1.35e+03 |
|    critic_loss     | 1.31e+03  |
|    ent_coef        | 0.827     |
|    ent_coef_loss   | 0.0299    |
|    learning_rate   | 0.0003    |
|    n_updates       | 28999     |
----------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 100      |
|    fps             | 183      |
|    time_elapsed    | 163      |
|    total_timesteps | 30000    |
---------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 104       |
|    fps             | 183       |
|    time_elapsed    | 169       |
|    total_timesteps | 31200     |
| train/             |           |
|    actor_loss      | -1.37e+03 |
|    critic_loss     | 1.19e+03  |
|    ent_coef        | 0.815     |
|    ent_coef_loss   | -0.0184   |
|    learning_rate   | 0.0003    |
|    n_updates       | 30199     |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 108      |
|    fps             | 183      |
|    time_elapsed    | 176      |
|    total_timesteps | 32400    |
| train/             |          |
|    actor_loss      | -1.4e+03 |
|    critic_loss     | 1.17e+03 |
|    ent_coef        | 0.832    |
|    ent_coef_loss   | 0.0143   |
|    learning_rate   | 0.0003   |
|    n_updates       | 31399    |
---------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 112       |
|    fps             | 183       |
|    time_elapsed    | 182       |
|    total_timesteps | 33600     |
| train/             |           |
|    actor_loss      | -1.41e+03 |
|    critic_loss     | 1.12e+03  |
|    ent_coef        | 0.838     |
|    ent_coef_loss   | -0.0381   |
|    learning_rate   | 0.0003    |
|    n_updates       | 32599     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 116       |
|    fps             | 183       |
|    time_elapsed    | 189       |
|    total_timesteps | 34800     |
| train/             |           |
|    actor_loss      | -1.52e+03 |
|    critic_loss     | 1.32e+03  |
|    ent_coef        | 0.814     |
|    ent_coef_loss   | 0.108     |
|    learning_rate   | 0.0003    |
|    n_updates       | 33799     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 7.31e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 35000     |
| train/             |           |
|    actor_loss      | -1.45e+03 |
|    critic_loss     | 1.24e+03  |
|    ent_coef        | 0.812     |
|    ent_coef_loss   | 0.105     |
|    learning_rate   | 0.0003    |
|    n_updates       | 33999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 120       |
|    fps             | 182       |
|    time_elapsed    | 196       |
|    total_timesteps | 36000     |
| train/             |           |
|    actor_loss      | -1.55e+03 |
|    critic_loss     | 1.18e+03  |
|    ent_coef        | 0.814     |
|    ent_coef_loss   | -0.0322   |
|    learning_rate   | 0.0003    |
|    n_updates       | 34999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 124       |
|    fps             | 182       |
|    time_elapsed    | 203       |
|    total_timesteps | 37200     |
| train/             |           |
|    actor_loss      | -1.52e+03 |
|    critic_loss     | 1.35e+03  |
|    ent_coef        | 0.831     |
|    ent_coef_loss   | -0.0443   |
|    learning_rate   | 0.0003    |
|    n_updates       | 36199     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 128       |
|    fps             | 181       |
|    time_elapsed    | 211       |
|    total_timesteps | 38400     |
| train/             |           |
|    actor_loss      | -1.56e+03 |
|    critic_loss     | 1.26e+03  |
|    ent_coef        | 0.828     |
|    ent_coef_loss   | 0.0562    |
|    learning_rate   | 0.0003    |
|    n_updates       | 37399     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 132       |
|    fps             | 181       |
|    time_elapsed    | 217       |
|    total_timesteps | 39600     |
| train/             |           |
|    actor_loss      | -1.53e+03 |
|    critic_loss     | 1.39e+03  |
|    ent_coef        | 0.826     |
|    ent_coef_loss   | 0.0488    |
|    learning_rate   | 0.0003    |
|    n_updates       | 38599     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 2.23e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 40000     |
| train/             |           |
|    actor_loss      | -1.64e+03 |
|    critic_loss     | 1.27e+03  |
|    ent_coef        | 0.829     |
|    ent_coef_loss   | -0.0871   |
|    learning_rate   | 0.0003    |
|    n_updates       | 38999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 136       |
|    fps             | 181       |
|    time_elapsed    | 224       |
|    total_timesteps | 40800     |
| train/             |           |
|    actor_loss      | -1.57e+03 |
|    critic_loss     | 1.31e+03  |
|    ent_coef        | 0.821     |
|    ent_coef_loss   | 0.0426    |
|    learning_rate   | 0.0003    |
|    n_updates       | 39799     |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 140      |
|    fps             | 181      |
|    time_elapsed    | 231      |
|    total_timesteps | 42000    |
| train/             |          |
|    actor_loss      | -1.6e+03 |
|    critic_loss     | 1.2e+03  |
|    ent_coef        | 0.807    |
|    ent_coef_loss   | -0.0235  |
|    learning_rate   | 0.0003   |
|    n_updates       | 40999    |
---------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 144       |
|    fps             | 181       |
|    time_elapsed    | 237       |
|    total_timesteps | 43200     |
| train/             |           |
|    actor_loss      | -1.66e+03 |
|    critic_loss     | 1.22e+03  |
|    ent_coef        | 0.841     |
|    ent_coef_loss   | 0.00586   |
|    learning_rate   | 0.0003    |
|    n_updates       | 42199     |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.01     |
| time/              |          |
|    episodes        | 148      |
|    fps             | 181      |
|    time_elapsed    | 242      |
|    total_timesteps | 44101    |
| train/             |          |
|    actor_loss      | -1.7e+03 |
|    critic_loss     | 1.17e+03 |
|    ent_coef        | 0.849    |
|    ent_coef_loss   | 0.0252   |
|    learning_rate   | 0.0003   |
|    n_updates       | 43100    |
---------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 4.94e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 45000     |
| train/             |           |
|    actor_loss      | -1.73e+03 |
|    critic_loss     | 1.26e+03  |
|    ent_coef        | 0.869     |
|    ent_coef_loss   | 0.0418    |
|    learning_rate   | 0.0003    |
|    n_updates       | 43999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 152       |
|    fps             | 181       |
|    time_elapsed    | 249       |
|    total_timesteps | 45301     |
| train/             |           |
|    actor_loss      | -1.66e+03 |
|    critic_loss     | 1.3e+03   |
|    ent_coef        | 0.871     |
|    ent_coef_loss   | -0.0298   |
|    learning_rate   | 0.0003    |
|    n_updates       | 44300     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 156       |
|    fps             | 181       |
|    time_elapsed    | 255       |
|    total_timesteps | 46501     |
| train/             |           |
|    actor_loss      | -1.67e+03 |
|    critic_loss     | 1.18e+03  |
|    ent_coef        | 0.844     |
|    ent_coef_loss   | -0.0622   |
|    learning_rate   | 0.0003    |
|    n_updates       | 45500     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 160       |
|    fps             | 181       |
|    time_elapsed    | 262       |
|    total_timesteps | 47701     |
| train/             |           |
|    actor_loss      | -1.74e+03 |
|    critic_loss     | 1.31e+03  |
|    ent_coef        | 0.838     |
|    ent_coef_loss   | -0.0115   |
|    learning_rate   | 0.0003    |
|    n_updates       | 46700     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 164       |
|    fps             | 182       |
|    time_elapsed    | 268       |
|    total_timesteps | 48901     |
| train/             |           |
|    actor_loss      | -1.76e+03 |
|    critic_loss     | 1.11e+03  |
|    ent_coef        | 0.819     |
|    ent_coef_loss   | -0.00215  |
|    learning_rate   | 0.0003    |
|    n_updates       | 47900     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 5.85e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 50000     |
| train/             |           |
|    actor_loss      | -1.86e+03 |
|    critic_loss     | 1.25e+03  |
|    ent_coef        | 0.804     |
|    ent_coef_loss   | 0.0285    |
|    learning_rate   | 0.0003    |
|    n_updates       | 48999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 168       |
|    fps             | 181       |
|    time_elapsed    | 275       |
|    total_timesteps | 50101     |
| train/             |           |
|    actor_loss      | -1.81e+03 |
|    critic_loss     | 1.28e+03  |
|    ent_coef        | 0.811     |
|    ent_coef_loss   | 0.0426    |
|    learning_rate   | 0.0003    |
|    n_updates       | 49100     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 172       |
|    fps             | 181       |
|    time_elapsed    | 281       |
|    total_timesteps | 51301     |
| train/             |           |
|    actor_loss      | -1.86e+03 |
|    critic_loss     | 1.6e+03   |
|    ent_coef        | 0.83      |
|    ent_coef_loss   | 0.0277    |
|    learning_rate   | 0.0003    |
|    n_updates       | 50300     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 176       |
|    fps             | 182       |
|    time_elapsed    | 288       |
|    total_timesteps | 52501     |
| train/             |           |
|    actor_loss      | -1.83e+03 |
|    critic_loss     | 1.32e+03  |
|    ent_coef        | 0.822     |
|    ent_coef_loss   | 0.0242    |
|    learning_rate   | 0.0003    |
|    n_updates       | 51500     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 180       |
|    fps             | 181       |
|    time_elapsed    | 293       |
|    total_timesteps | 53402     |
| train/             |           |
|    actor_loss      | -1.84e+03 |
|    critic_loss     | 2.2e+03   |
|    ent_coef        | 0.809     |
|    ent_coef_loss   | -0.0571   |
|    learning_rate   | 0.0003    |
|    n_updates       | 52401     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 184       |
|    fps             | 181       |
|    time_elapsed    | 300       |
|    total_timesteps | 54602     |
| train/             |           |
|    actor_loss      | -1.76e+03 |
|    critic_loss     | 1.31e+03  |
|    ent_coef        | 0.808     |
|    ent_coef_loss   | -0.0185   |
|    learning_rate   | 0.0003    |
|    n_updates       | 53601     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 5.03e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 55000     |
| train/             |           |
|    actor_loss      | -1.82e+03 |
|    critic_loss     | 1.71e+03  |
|    ent_coef        | 0.796     |
|    ent_coef_loss   | 0.00252   |
|    learning_rate   | 0.0003    |
|    n_updates       | 53999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 188       |
|    fps             | 181       |
|    time_elapsed    | 307       |
|    total_timesteps | 55802     |
| train/             |           |
|    actor_loss      | -1.87e+03 |
|    critic_loss     | 1.31e+03  |
|    ent_coef        | 0.772     |
|    ent_coef_loss   | -0.0402   |
|    learning_rate   | 0.0003    |
|    n_updates       | 54801     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 192       |
|    fps             | 181       |
|    time_elapsed    | 313       |
|    total_timesteps | 57002     |
| train/             |           |
|    actor_loss      | -1.78e+03 |
|    critic_loss     | 1.18e+03  |
|    ent_coef        | 0.777     |
|    ent_coef_loss   | 0.0673    |
|    learning_rate   | 0.0003    |
|    n_updates       | 56001     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 196       |
|    fps             | 181       |
|    time_elapsed    | 320       |
|    total_timesteps | 58202     |
| train/             |           |
|    actor_loss      | -1.82e+03 |
|    critic_loss     | 1.47e+03  |
|    ent_coef        | 0.765     |
|    ent_coef_loss   | 0.0266    |
|    learning_rate   | 0.0003    |
|    n_updates       | 57201     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 200       |
|    fps             | 181       |
|    time_elapsed    | 324       |
|    total_timesteps | 59103     |
| train/             |           |
|    actor_loss      | -1.84e+03 |
|    critic_loss     | 1.55e+03  |
|    ent_coef        | 0.78      |
|    ent_coef_loss   | -0.0242   |
|    learning_rate   | 0.0003    |
|    n_updates       | 58102     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 5.18e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 60000     |
| train/             |           |
|    actor_loss      | -1.93e+03 |
|    critic_loss     | 1.41e+03  |
|    ent_coef        | 0.776     |
|    ent_coef_loss   | 0.00947   |
|    learning_rate   | 0.0003    |
|    n_updates       | 58999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 204       |
|    fps             | 181       |
|    time_elapsed    | 331       |
|    total_timesteps | 60303     |
| train/             |           |
|    actor_loss      | -1.89e+03 |
|    critic_loss     | 1.86e+03  |
|    ent_coef        | 0.782     |
|    ent_coef_loss   | -0.0625   |
|    learning_rate   | 0.0003    |
|    n_updates       | 59302     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 208       |
|    fps             | 181       |
|    time_elapsed    | 338       |
|    total_timesteps | 61503     |
| train/             |           |
|    actor_loss      | -1.88e+03 |
|    critic_loss     | 1.51e+03  |
|    ent_coef        | 0.795     |
|    ent_coef_loss   | -0.0226   |
|    learning_rate   | 0.0003    |
|    n_updates       | 60502     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 212       |
|    fps             | 181       |
|    time_elapsed    | 344       |
|    total_timesteps | 62703     |
| train/             |           |
|    actor_loss      | -1.91e+03 |
|    critic_loss     | 1.15e+03  |
|    ent_coef        | 0.781     |
|    ent_coef_loss   | 0.0105    |
|    learning_rate   | 0.0003    |
|    n_updates       | 61702     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 216       |
|    fps             | 181       |
|    time_elapsed    | 351       |
|    total_timesteps | 63903     |
| train/             |           |
|    actor_loss      | -1.86e+03 |
|    critic_loss     | 1.61e+03  |
|    ent_coef        | 0.776     |
|    ent_coef_loss   | 0.0478    |
|    learning_rate   | 0.0003    |
|    n_updates       | 62902     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 7.33e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 65000     |
| train/             |           |
|    actor_loss      | -1.98e+03 |
|    critic_loss     | 1.35e+03  |
|    ent_coef        | 0.797     |
|    ent_coef_loss   | -0.0596   |
|    learning_rate   | 0.0003    |
|    n_updates       | 63999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 220       |
|    fps             | 181       |
|    time_elapsed    | 358       |
|    total_timesteps | 65103     |
| train/             |           |
|    actor_loss      | -2.03e+03 |
|    critic_loss     | 1.56e+03  |
|    ent_coef        | 0.796     |
|    ent_coef_loss   | -0.0895   |
|    learning_rate   | 0.0003    |
|    n_updates       | 64102     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 224       |
|    fps             | 181       |
|    time_elapsed    | 365       |
|    total_timesteps | 66303     |
| train/             |           |
|    actor_loss      | -1.82e+03 |
|    critic_loss     | 1.37e+03  |
|    ent_coef        | 0.79      |
|    ent_coef_loss   | -0.115    |
|    learning_rate   | 0.0003    |
|    n_updates       | 65302     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.04      |
| time/              |           |
|    episodes        | 228       |
|    fps             | 181       |
|    time_elapsed    | 370       |
|    total_timesteps | 67204     |
| train/             |           |
|    actor_loss      | -1.96e+03 |
|    critic_loss     | 1.9e+03   |
|    ent_coef        | 0.815     |
|    ent_coef_loss   | 0.0422    |
|    learning_rate   | 0.0003    |
|    n_updates       | 66203     |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.04     |
| time/              |          |
|    episodes        | 232      |
|    fps             | 181      |
|    time_elapsed    | 376      |
|    total_timesteps | 68404    |
| train/             |          |
|    actor_loss      | -2e+03   |
|    critic_loss     | 1.77e+03 |
|    ent_coef        | 0.822    |
|    ent_coef_loss   | 0.0103   |
|    learning_rate   | 0.0003   |
|    n_updates       | 67403    |
---------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.04      |
| time/              |           |
|    episodes        | 236       |
|    fps             | 181       |
|    time_elapsed    | 383       |
|    total_timesteps | 69604     |
| train/             |           |
|    actor_loss      | -1.99e+03 |
|    critic_loss     | 1.67e+03  |
|    ent_coef        | 0.825     |
|    ent_coef_loss   | -0.101    |
|    learning_rate   | 0.0003    |
|    n_updates       | 68603     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 3.69e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 70000     |
| train/             |           |
|    actor_loss      | -1.94e+03 |
|    critic_loss     | 1.46e+03  |
|    ent_coef        | 0.835     |
|    ent_coef_loss   | -0.0841   |
|    learning_rate   | 0.0003    |
|    n_updates       | 68999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.06      |
| time/              |           |
|    episodes        | 240       |
|    fps             | 180       |
|    time_elapsed    | 388       |
|    total_timesteps | 70206     |
| train/             |           |
|    actor_loss      | -2.03e+03 |
|    critic_loss     | 1.45e+03  |
|    ent_coef        | 0.841     |
|    ent_coef_loss   | 0.0226    |
|    learning_rate   | 0.0003    |
|    n_updates       | 69205     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.06      |
| time/              |           |
|    episodes        | 244       |
|    fps             | 180       |
|    time_elapsed    | 394       |
|    total_timesteps | 71406     |
| train/             |           |
|    actor_loss      | -1.96e+03 |
|    critic_loss     | 1.45e+03  |
|    ent_coef        | 0.83      |
|    ent_coef_loss   | 0.0841    |
|    learning_rate   | 0.0003    |
|    n_updates       | 70405     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.05      |
| time/              |           |
|    episodes        | 248       |
|    fps             | 180       |
|    time_elapsed    | 401       |
|    total_timesteps | 72606     |
| train/             |           |
|    actor_loss      | -1.97e+03 |
|    critic_loss     | 1.43e+03  |
|    ent_coef        | 0.834     |
|    ent_coef_loss   | -0.000228 |
|    learning_rate   | 0.0003    |
|    n_updates       | 71605     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.05      |
| time/              |           |
|    episodes        | 252       |
|    fps             | 180       |
|    time_elapsed    | 408       |
|    total_timesteps | 73806     |
| train/             |           |
|    actor_loss      | -2.05e+03 |
|    critic_loss     | 1.22e+03  |
|    ent_coef        | 0.837     |
|    ent_coef_loss   | 0.0218    |
|    learning_rate   | 0.0003    |
|    n_updates       | 72805     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 6.18e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 75000     |
| train/             |           |
|    actor_loss      | -2.15e+03 |
|    critic_loss     | 1.99e+03  |
|    ent_coef        | 0.846     |
|    ent_coef_loss   | 0.0314    |
|    learning_rate   | 0.0003    |
|    n_updates       | 73999     |
----------------------------------
---------------------------------
| rollout/           |          |
|    success_rate    | 0.05     |
| time/              |          |
|    episodes        | 256      |
|    fps             | 180      |
|    time_elapsed    | 415      |
|    total_timesteps | 75006    |
| train/             |          |
|    actor_loss      | -2.1e+03 |
|    critic_loss     | 1.47e+03 |
|    ent_coef        | 0.847    |
|    ent_coef_loss   | -0.0567  |
|    learning_rate   | 0.0003   |

----------------------------------
| rollout/           |           |
|    success_rate    | 0.05      |
| time/              |           |
|    episodes        | 260       |
|    fps             | 180       |
|    time_elapsed    | 421       |
|    total_timesteps | 76206     |
| train/             |           |
|    actor_loss      | -2.02e+03 |
|    critic_loss     | 1.75e+03  |
|    ent_coef        | 0.849     |
|    ent_coef_loss   | 0.0345    |
|    learning_rate   | 0.0003    |
|    n_updates       | 75205     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.05      |
| time/              |           |
|    episodes        | 264       |
|    fps             | 180       |
|    time_elapsed    | 428       |
|    total_timesteps | 77406     |
| train/             |           |
|    actor_loss      | -2.16e+03 |
|    critic_loss     | 1.85e+03  |
|    ent_coef        | 0.83      |
|    ent_coef_loss   | -0.0614   |
|    learning_rate   | 0.0003    |
|    n_updates       | 76405     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.05      |
| time/              |           |
|    episodes        | 268       |
|    fps             | 180       |
|    time_elapsed    | 434       |
|    total_timesteps | 78606     |
| train/             |           |
|    actor_loss      | -2.03e+03 |
|    critic_loss     | 3.08e+03  |
|    ent_coef        | 0.871     |
|    ent_coef_loss   | -0.00157  |
|    learning_rate   | 0.0003    |
|    n_updates       | 77605     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.05      |
| time/              |           |
|    episodes        | 272       |
|    fps             | 181       |
|    time_elapsed    | 440       |
|    total_timesteps | 79806     |
| train/             |           |
|    actor_loss      | -2.02e+03 |
|    critic_loss     | 2.38e+03  |
|    ent_coef        | 0.851     |
|    ent_coef_loss   | 0.00114   |
|    learning_rate   | 0.0003    |
|    n_updates       | 78805     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 8.66e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 80000     |
| train/             |           |
|    actor_loss      | -2.02e+03 |
|    critic_loss     | 2.5e+03   |
|    ent_coef        | 0.838     |
|    ent_coef_loss   | -0.108    |
|    learning_rate   | 0.0003    |
|    n_updates       | 78999     |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.05     |
| time/              |          |
|    episodes        | 276      |
|    fps             | 180      |
|    time_elapsed    | 447      |
|    total_timesteps | 81006    |
| train/             |          |
|    actor_loss      | -2.1e+03 |
|    critic_loss     | 3.23e+03 |
|    ent_coef        | 0.829    |
|    ent_coef_loss   | 0.105    |
|    learning_rate   | 0.0003   |
|    n_updates       | 80005    |
---------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.04      |
| time/              |           |
|    episodes        | 280       |
|    fps             | 181       |
|    time_elapsed    | 453       |
|    total_timesteps | 82206     |
| train/             |           |
|    actor_loss      | -2.09e+03 |
|    critic_loss     | 3.04e+03  |
|    ent_coef        | 0.823     |
|    ent_coef_loss   | 0.0282    |
|    learning_rate   | 0.0003    |
|    n_updates       | 81205     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.04      |
| time/              |           |
|    episodes        | 284       |
|    fps             | 181       |
|    time_elapsed    | 460       |
|    total_timesteps | 83406     |
| train/             |           |
|    actor_loss      | -2.08e+03 |
|    critic_loss     | 1.94e+03  |
|    ent_coef        | 0.847     |
|    ent_coef_loss   | 0.00654   |
|    learning_rate   | 0.0003    |
|    n_updates       | 82405     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.04      |
| time/              |           |
|    episodes        | 288       |
|    fps             | 181       |
|    time_elapsed    | 466       |
|    total_timesteps | 84606     |
| train/             |           |
|    actor_loss      | -2.19e+03 |
|    critic_loss     | 4.1e+03   |
|    ent_coef        | 0.84      |
|    ent_coef_loss   | -0.00137  |
|    learning_rate   | 0.0003    |
|    n_updates       | 83605     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 4.77e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 85000     |
| train/             |           |
|    actor_loss      | -2.14e+03 |
|    critic_loss     | 2.99e+03  |
|    ent_coef        | 0.84      |
|    ent_coef_loss   | 0.0451    |
|    learning_rate   | 0.0003    |
|    n_updates       | 83999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.04      |
| time/              |           |
|    episodes        | 292       |
|    fps             | 180       |
|    time_elapsed    | 474       |
|    total_timesteps | 85806     |
| train/             |           |
|    actor_loss      | -2.23e+03 |
|    critic_loss     | 2.53e+03  |
|    ent_coef        | 0.849     |
|    ent_coef_loss   | 0.0564    |
|    learning_rate   | 0.0003    |
|    n_updates       | 84805     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.04      |
| time/              |           |
|    episodes        | 296       |
|    fps             | 180       |
|    time_elapsed    | 481       |
|    total_timesteps | 87006     |
| train/             |           |
|    actor_loss      | -2.05e+03 |
|    critic_loss     | 2.59e+03  |
|    ent_coef        | 0.888     |
|    ent_coef_loss   | 0.0452    |
|    learning_rate   | 0.0003    |
|    n_updates       | 86005     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 300       |
|    fps             | 180       |
|    time_elapsed    | 487       |
|    total_timesteps | 88206     |
| train/             |           |
|    actor_loss      | -2.15e+03 |
|    critic_loss     | 2.44e+03  |
|    ent_coef        | 0.905     |
|    ent_coef_loss   | -0.00198  |
|    learning_rate   | 0.0003    |
|    n_updates       | 87205     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 304       |
|    fps             | 180       |
|    time_elapsed    | 495       |
|    total_timesteps | 89406     |
| train/             |           |
|    actor_loss      | -2.04e+03 |
|    critic_loss     | 2.45e+03  |
|    ent_coef        | 0.914     |
|    ent_coef_loss   | 0.0145    |
|    learning_rate   | 0.0003    |
|    n_updates       | 88405     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 1.44e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 90000     |
| train/             |           |
|    actor_loss      | -2.07e+03 |
|    critic_loss     | 4.19e+03  |
|    ent_coef        | 0.908     |
|    ent_coef_loss   | -0.0344   |
|    learning_rate   | 0.0003    |
|    n_updates       | 88999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 308       |
|    fps             | 179       |
|    time_elapsed    | 503       |
|    total_timesteps | 90606     |
| train/             |           |
|    actor_loss      | -2.02e+03 |
|    critic_loss     | 2.13e+03  |
|    ent_coef        | 0.89      |
|    ent_coef_loss   | -0.0319   |
|    learning_rate   | 0.0003    |
|    n_updates       | 89605     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 312       |
|    fps             | 179       |
|    time_elapsed    | 510       |
|    total_timesteps | 91806     |
| train/             |           |
|    actor_loss      | -2.27e+03 |
|    critic_loss     | 4.91e+03  |
|    ent_coef        | 0.927     |
|    ent_coef_loss   | 0.00983   |
|    learning_rate   | 0.0003    |
|    n_updates       | 90805     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 316       |
|    fps             | 179       |
|    time_elapsed    | 517       |
|    total_timesteps | 93006     |
| train/             |           |
|    actor_loss      | -2.19e+03 |
|    critic_loss     | 2.94e+03  |
|    ent_coef        | 0.938     |
|    ent_coef_loss   | 0.00724   |
|    learning_rate   | 0.0003    |
|    n_updates       | 92005     |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.03     |
| time/              |          |
|    episodes        | 320      |
|    fps             | 179      |
|    time_elapsed    | 524      |
|    total_timesteps | 94206    |
| train/             |          |
|    actor_loss      | -2.2e+03 |
|    critic_loss     | 4.75e+03 |
|    ent_coef        | 0.934    |
|    ent_coef_loss   | -0.0188  |
|    learning_rate   | 0.0003   |
|    n_updates       | 93205    |
---------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 4.02e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 95000     |
| train/             |           |
|    actor_loss      | -2.12e+03 |
|    critic_loss     | 4.05e+03  |
|    ent_coef        | 0.931     |
|    ent_coef_loss   | 0.0137    |
|    learning_rate   | 0.0003    |
|    n_updates       | 93999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 324       |
|    fps             | 179       |
|    time_elapsed    | 532       |
|    total_timesteps | 95406     |
| train/             |           |
|    actor_loss      | -2.19e+03 |
|    critic_loss     | 2.66e+03  |
|    ent_coef        | 0.918     |
|    ent_coef_loss   | -0.0237   |
|    learning_rate   | 0.0003    |
|    n_updates       | 94405     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 328       |
|    fps             | 178       |
|    time_elapsed    | 539       |
|    total_timesteps | 96606     |
| train/             |           |
|    actor_loss      | -2.11e+03 |
|    critic_loss     | 4.81e+03  |
|    ent_coef        | 0.918     |
|    ent_coef_loss   | 0.0175    |
|    learning_rate   | 0.0003    |
|    n_updates       | 95605     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 332       |
|    fps             | 178       |
|    time_elapsed    | 546       |
|    total_timesteps | 97806     |
| train/             |           |
|    actor_loss      | -2.13e+03 |
|    critic_loss     | 3.44e+03  |
|    ent_coef        | 0.971     |
|    ent_coef_loss   | -0.00394  |
|    learning_rate   | 0.0003    |
|    n_updates       | 96805     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 336       |
|    fps             | 178       |
|    time_elapsed    | 554       |
|    total_timesteps | 99006     |
| train/             |           |
|    actor_loss      | -2.32e+03 |
|    critic_loss     | 5.88e+03  |
|    ent_coef        | 0.968     |
|    ent_coef_loss   | 0.0344    |
|    learning_rate   | 0.0003    |
|    n_updates       | 98005     |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | -5.59e+03 |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 100000    |
| train/             |           |
|    actor_loss      | -2.27e+03 |
|    critic_loss     | 8.81e+03  |
|    ent_coef        | 0.996     |
|    ent_coef_loss   | -0.00107  |
|    learning_rate   | 0.0003    |
|    n_updates       | 98999     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 340       |
|    fps             | 178       |
|    time_elapsed    | 562       |
|    total_timesteps | 100206    |
| train/             |           |
|    actor_loss      | -2.33e+03 |
|    critic_loss     | 3.05e+03  |
|    ent_coef        | 1.01      |
|    ent_coef_loss   | -0.00333  |
|    learning_rate   | 0.0003    |
|    n_updates       | 99205     |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 344       |
|    fps             | 178       |
|    time_elapsed    | 569       |
|    total_timesteps | 101406    |
| train/             |           |
|    actor_loss      | -2.47e+03 |
|    critic_loss     | 5.03e+03  |
|    ent_coef        | 1.07      |
|    ent_coef_loss   | -0.0157   |
|    learning_rate   | 0.0003    |
|    n_updates       | 100405    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 348       |
|    fps             | 177       |
|    time_elapsed    | 576       |
|    total_timesteps | 102606    |
| train/             |           |
|    actor_loss      | -2.25e+03 |
|    critic_loss     | 3.99e+03  |
|    ent_coef        | 1.05      |
|    ent_coef_loss   | -0.00693  |
|    learning_rate   | 0.0003    |
|    n_updates       | 101605    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 352       |
|    fps             | 177       |
|    time_elapsed    | 584       |
|    total_timesteps | 103806    |
| train/             |           |
|    actor_loss      | -2.33e+03 |
|    critic_loss     | 2.87e+03  |
|    ent_coef        | 1.07      |
|    ent_coef_loss   | 0.031     |
|    learning_rate   | 0.0003    |
|    n_updates       | 102805    |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 4.06e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 105000    |
| train/             |           |
|    actor_loss      | -2.37e+03 |
|    critic_loss     | 6.44e+03  |
|    ent_coef        | 1.09      |
|    ent_coef_loss   | 0.0204    |
|    learning_rate   | 0.0003    |
|    n_updates       | 103999    |
----------------------------------
----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 356       |
|    fps             | 177       |
|    time_elapsed    | 592       |
|    total_timesteps | 105006    |
| train/             |           |
|    actor_loss      | -2.42e+03 |
|    critic_loss     | 5.95e+03  |
|    ent_coef        | 1.09      |
|    ent_coef_loss   | 0.0316    |
|    learning_rate  

----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 360       |
|    fps             | 177       |
|    time_elapsed    | 599       |
|    total_timesteps | 106206    |
| train/             |           |
|    actor_loss      | -2.54e+03 |
|    critic_loss     | 1.88e+04  |
|    ent_coef        | 1.12      |
|    ent_coef_loss   | 8.63e-05  |
|    learning_rate   | 0.0003    |
|    n_updates       | 105205    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 364       |
|    fps             | 177       |
|    time_elapsed    | 606       |
|    total_timesteps | 107406    |
| train/             |           |
|    actor_loss      | -2.53e+03 |
|    critic_loss     | 8.63e+03  |
|    ent_coef        | 1.18      |
|    ent_coef_loss   | 0.0183    |
|    learning_rate   | 0.0003    |
|    n_updates       | 106405    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 368       |
|    fps             | 176       |
|    time_elapsed    | 613       |
|    total_timesteps | 108606    |
| train/             |           |
|    actor_loss      | -2.55e+03 |
|    critic_loss     | 1.2e+04   |
|    ent_coef        | 1.29      |
|    ent_coef_loss   | -0.00582  |
|    learning_rate   | 0.0003    |
|    n_updates       | 107605    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 372       |
|    fps             | 176       |
|    time_elapsed    | 618       |
|    total_timesteps | 109508    |
| train/             |           |
|    actor_loss      | -2.42e+03 |
|    critic_loss     | 5.76e+03  |
|    ent_coef        | 1.31      |
|    ent_coef_loss   | -0.143    |
|    learning_rate   | 0.0003    |
|    n_updates       | 108507    |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 5.52e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 110000    |
| train/             |           |
|    actor_loss      | -2.63e+03 |
|    critic_loss     | 6.24e+03  |
|    ent_coef        | 1.28      |
|    ent_coef_loss   | 0.0348    |
|    learning_rate   | 0.0003    |
|    n_updates       | 108999    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 376       |
|    fps             | 176       |
|    time_elapsed    | 626       |
|    total_timesteps | 110708    |
| train/             |           |
|    actor_loss      | -2.36e+03 |
|    critic_loss     | 3.98e+03  |
|    ent_coef        | 1.25      |
|    ent_coef_loss   | -0.056    |
|    learning_rate   | 0.0003    |
|    n_updates       | 109707    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 380       |
|    fps             | 176       |
|    time_elapsed    | 633       |
|    total_timesteps | 111908    |
| train/             |           |
|    actor_loss      | -2.66e+03 |
|    critic_loss     | 7.61e+03  |
|    ent_coef        | 1.22      |
|    ent_coef_loss   | -0.0424   |
|    learning_rate   | 0.0003    |
|    n_updates       | 110907    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 384       |
|    fps             | 176       |
|    time_elapsed    | 640       |
|    total_timesteps | 113108    |
| train/             |           |
|    actor_loss      | -2.41e+03 |
|    critic_loss     | 3.85e+03  |
|    ent_coef        | 1.17      |
|    ent_coef_loss   | -0.0486   |
|    learning_rate   | 0.0003    |
|    n_updates       | 112107    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 388       |
|    fps             | 176       |
|    time_elapsed    | 647       |
|    total_timesteps | 114308    |
| train/             |           |
|    actor_loss      | -2.59e+03 |
|    critic_loss     | 4.49e+03  |
|    ent_coef        | 1.18      |
|    ent_coef_loss   | -0.00197  |
|    learning_rate   | 0.0003    |
|    n_updates       | 113307    |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 241       |
|    mean_reward     | 6.42e+03  |
|    success_rate    | 0.2       |
| time/              |           |
|    total_timesteps | 115000    |
| train/             |           |
|    actor_loss      | -2.52e+03 |
|    critic_loss     | 1.93e+04  |
|    ent_coef        | 1.14      |
|    ent_coef_loss   | 0.0383    |
|    learning_rate   | 0.0003    |
|    n_updates       | 113999    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 392       |
|    fps             | 176       |
|    time_elapsed    | 655       |
|    total_timesteps | 115508    |
| train/             |           |
|    actor_loss      | -2.56e+03 |
|    critic_loss     | 3.25e+03  |
|    ent_coef        | 1.13      |
|    ent_coef_loss   | 0.0469    |
|    learning_rate   | 0.0003    |
|    n_updates       | 114507    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.01      |
| time/              |           |
|    episodes        | 396       |
|    fps             | 176       |
|    time_elapsed    | 662       |
|    total_timesteps | 116708    |
| train/             |           |
|    actor_loss      | -2.37e+03 |
|    critic_loss     | 7.08e+03  |
|    ent_coef        | 1.1       |
|    ent_coef_loss   | -0.0423   |
|    learning_rate   | 0.0003    |
|    n_updates       | 115707    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 400       |
|    fps             | 175       |
|    time_elapsed    | 668       |
|    total_timesteps | 117609    |
| train/             |           |
|    actor_loss      | -2.53e+03 |
|    critic_loss     | 6.02e+03  |
|    ent_coef        | 1.08      |
|    ent_coef_loss   | 0.00679   |
|    learning_rate   | 0.0003    |
|    n_updates       | 116608    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 404       |
|    fps             | 175       |
|    time_elapsed    | 675       |
|    total_timesteps | 118809    |
| train/             |           |
|    actor_loss      | -2.66e+03 |
|    critic_loss     | 3.15e+03  |
|    ent_coef        | 1.1       |
|    ent_coef_loss   | 0.0231    |
|    learning_rate   | 0.0003    |
|    n_updates       | 117808    |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 2.38e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 120000    |
| train/             |           |
|    actor_loss      | -2.59e+03 |
|    critic_loss     | 3.72e+03  |
|    ent_coef        | 1.09      |
|    ent_coef_loss   | -0.0121   |
|    learning_rate   | 0.0003    |
|    n_updates       | 118999    |
----------------------------------
----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 408       |
|    fps             | 175       |
|    time_elapsed    | 683       |
|    total_timesteps | 120009    |
| train/             |           |
|    actor_loss      | -2.45e+03 |
|    critic_loss     | 4.66e+03  |
|    ent_coef        | 1.09      |
|    ent_coef_loss   | -0.00936  |
|    learning_rate  

----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 412       |
|    fps             | 175       |
|    time_elapsed    | 690       |
|    total_timesteps | 121209    |
| train/             |           |
|    actor_loss      | -2.51e+03 |
|    critic_loss     | 5.25e+03  |
|    ent_coef        | 1.15      |
|    ent_coef_loss   | -0.0785   |
|    learning_rate   | 0.0003    |
|    n_updates       | 120208    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 416       |
|    fps             | 175       |
|    time_elapsed    | 697       |
|    total_timesteps | 122409    |
| train/             |           |
|    actor_loss      | -2.41e+03 |
|    critic_loss     | 3.56e+03  |
|    ent_coef        | 1.15      |
|    ent_coef_loss   | -0.0379   |
|    learning_rate   | 0.0003    |
|    n_updates       | 121408    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 420       |
|    fps             | 175       |
|    time_elapsed    | 704       |
|    total_timesteps | 123609    |
| train/             |           |
|    actor_loss      | -2.54e+03 |
|    critic_loss     | 3.38e+03  |
|    ent_coef        | 1.15      |
|    ent_coef_loss   | 0.0507    |
|    learning_rate   | 0.0003    |
|    n_updates       | 122608    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 424       |
|    fps             | 175       |
|    time_elapsed    | 711       |
|    total_timesteps | 124809    |
| train/             |           |
|    actor_loss      | -2.57e+03 |
|    critic_loss     | 3.59e+03  |
|    ent_coef        | 1.12      |
|    ent_coef_loss   | 0.0716    |
|    learning_rate   | 0.0003    |
|    n_updates       | 123808    |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 4.03e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 125000    |
| train/             |           |
|    actor_loss      | -2.45e+03 |
|    critic_loss     | 3.43e+03  |
|    ent_coef        | 1.1       |
|    ent_coef_loss   | 0.0249    |
|    learning_rate   | 0.0003    |
|    n_updates       | 123999    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 428       |
|    fps             | 175       |
|    time_elapsed    | 719       |
|    total_timesteps | 126009    |
| train/             |           |
|    actor_loss      | -2.55e+03 |
|    critic_loss     | 3.53e+03  |
|    ent_coef        | 1.09      |
|    ent_coef_loss   | 0.00332   |
|    learning_rate   | 0.0003    |
|    n_updates       | 125008    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 432       |
|    fps             | 175       |
|    time_elapsed    | 726       |
|    total_timesteps | 127209    |
| train/             |           |
|    actor_loss      | -2.54e+03 |
|    critic_loss     | 3.48e+03  |
|    ent_coef        | 1.07      |
|    ent_coef_loss   | -0.00883  |
|    learning_rate   | 0.0003    |
|    n_updates       | 126208    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 436       |
|    fps             | 175       |
|    time_elapsed    | 731       |
|    total_timesteps | 128110    |
| train/             |           |
|    actor_loss      | -2.85e+03 |
|    critic_loss     | 3.29e+03  |
|    ent_coef        | 1.06      |
|    ent_coef_loss   | 0.0248    |
|    learning_rate   | 0.0003    |
|    n_updates       | 127109    |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.03     |
| time/              |          |
|    episodes        | 440      |
|    fps             | 175      |
|    time_elapsed    | 738      |
|    total_timesteps | 129310   |
| train/             |          |
|    actor_loss      | -2.7e+03 |
|    critic_loss     | 2.99e+03 |
|    ent_coef        | 1.07     |
|    ent_coef_loss   | 0.00161  |
|    learning_rate   | 0.0003   |
|    n_updates       | 128309   |
---------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 9.78e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 130000    |
| train/             |           |
|    actor_loss      | -2.77e+03 |
|    critic_loss     | 3.62e+03  |
|    ent_coef        | 1.08      |
|    ent_coef_loss   | 0.000618  |
|    learning_rate   | 0.0003    |
|    n_updates       | 128999    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 444       |
|    fps             | 174       |
|    time_elapsed    | 746       |
|    total_timesteps | 130510    |
| train/             |           |
|    actor_loss      | -2.67e+03 |
|    critic_loss     | 3.85e+03  |
|    ent_coef        | 1.08      |
|    ent_coef_loss   | 0.033     |
|    learning_rate   | 0.0003    |
|    n_updates       | 129509    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 448       |
|    fps             | 174       |
|    time_elapsed    | 753       |
|    total_timesteps | 131710    |
| train/             |           |
|    actor_loss      | -2.53e+03 |
|    critic_loss     | 4.07e+03  |
|    ent_coef        | 1.08      |
|    ent_coef_loss   | -0.0098   |
|    learning_rate   | 0.0003    |
|    n_updates       | 130709    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.04      |
| time/              |           |
|    episodes        | 452       |
|    fps             | 174       |
|    time_elapsed    | 759       |
|    total_timesteps | 132611    |
| train/             |           |
|    actor_loss      | -2.74e+03 |
|    critic_loss     | 3.17e+03  |
|    ent_coef        | 1.08      |
|    ent_coef_loss   | -0.000613 |
|    learning_rate   | 0.0003    |
|    n_updates       | 131610    |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.04     |
| time/              |          |
|    episodes        | 456      |
|    fps             | 174      |
|    time_elapsed    | 766      |
|    total_timesteps | 133811   |
| train/             |          |
|    actor_loss      | -2.6e+03 |
|    critic_loss     | 2.55e+03 |
|    ent_coef        | 1.07     |
|    ent_coef_loss   | -0.0179  |
|    learning_rate   | 0.0003   |
|    n_updates       | 132810   |
---------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 4.43e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 135000    |
| train/             |           |
|    actor_loss      | -2.95e+03 |
|    critic_loss     | 5.13e+03  |
|    ent_coef        | 1.09      |
|    ent_coef_loss   | -0.000204 |
|    learning_rate   | 0.0003    |
|    n_updates       | 133999    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.04      |
| time/              |           |
|    episodes        | 460       |
|    fps             | 174       |
|    time_elapsed    | 774       |
|    total_timesteps | 135011    |
| train/             |           |
|    actor_loss      | -2.71e+03 |
|    critic_loss     | 3.57e+03  |
|    ent_coef        | 1.09      |
|    ent_coef_loss   | -0.0334   |
|    learning_rate   | 0.0003    |
|    n_updates       | 134010    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.04      |
| time/              |           |
|    episodes        | 464       |
|    fps             | 174       |
|    time_elapsed    | 782       |
|    total_timesteps | 136211    |
| train/             |           |
|    actor_loss      | -2.71e+03 |
|    critic_loss     | 3.02e+03  |
|    ent_coef        | 1.13      |
|    ent_coef_loss   | -0.0872   |
|    learning_rate   | 0.0003    |
|    n_updates       | 135210    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.04      |
| time/              |           |
|    episodes        | 468       |
|    fps             | 174       |
|    time_elapsed    | 789       |
|    total_timesteps | 137411    |
| train/             |           |
|    actor_loss      | -2.83e+03 |
|    critic_loss     | 3.76e+03  |
|    ent_coef        | 1.1       |
|    ent_coef_loss   | 0.0333    |
|    learning_rate   | 0.0003    |
|    n_updates       | 136410    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 472       |
|    fps             | 174       |
|    time_elapsed    | 795       |
|    total_timesteps | 138611    |
| train/             |           |
|    actor_loss      | -2.81e+03 |
|    critic_loss     | 2.56e+03  |
|    ent_coef        | 1.09      |
|    ent_coef_loss   | -0.0233   |
|    learning_rate   | 0.0003    |
|    n_updates       | 137610    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 476       |
|    fps             | 174       |
|    time_elapsed    | 802       |
|    total_timesteps | 139811    |
| train/             |           |
|    actor_loss      | -2.77e+03 |
|    critic_loss     | 2.14e+04  |
|    ent_coef        | 1.03      |
|    ent_coef_loss   | 0.0081    |
|    learning_rate   | 0.0003    |
|    n_updates       | 138810    |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | -3.39     |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 140000    |
| train/             |           |
|    actor_loss      | -2.78e+03 |
|    critic_loss     | 3.95e+03  |
|    ent_coef        | 1.02      |
|    ent_coef_loss   | -0.00821  |
|    learning_rate   | 0.0003    |
|    n_updates       | 138999    |
----------------------------------


---------------------------------
| rollout/           |          |
|    success_rate    | 0.03     |
| time/              |          |
|    episodes        | 480      |
|    fps             | 174      |
|    time_elapsed    | 810      |
|    total_timesteps | 141011   |
| train/             |          |
|    actor_loss      | -2.9e+03 |
|    critic_loss     | 3.24e+03 |
|    ent_coef        | 0.984    |
|    ent_coef_loss   | -0.00125 |
|    learning_rate   | 0.0003   |
|    n_updates       | 140010   |
---------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 484       |
|    fps             | 174       |
|    time_elapsed    | 816       |
|    total_timesteps | 142211    |
| train/             |           |
|    actor_loss      | -2.75e+03 |
|    critic_loss     | 2.5e+03   |
|    ent_coef        | 0.957     |
|    ent_coef_loss   | -0.000386 |
|    learning_rate   | 0.0003    |
|    n_updates       | 141210    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 488       |
|    fps             | 174       |
|    time_elapsed    | 823       |
|    total_timesteps | 143411    |
| train/             |           |
|    actor_loss      | -2.86e+03 |
|    critic_loss     | 4.91e+03  |
|    ent_coef        | 0.956     |
|    ent_coef_loss   | -0.0175   |
|    learning_rate   | 0.0003    |
|    n_updates       | 142410    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 492       |
|    fps             | 174       |
|    time_elapsed    | 830       |
|    total_timesteps | 144611    |
| train/             |           |
|    actor_loss      | -2.78e+03 |
|    critic_loss     | 3.17e+03  |
|    ent_coef        | 0.97      |
|    ent_coef_loss   | -0.00648  |
|    learning_rate   | 0.0003    |
|    n_updates       | 143610    |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 5.48e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 145000    |
| train/             |           |
|    actor_loss      | -3e+03    |
|    critic_loss     | 2.79e+03  |
|    ent_coef        | 0.997     |
|    ent_coef_loss   | -0.000489 |
|    learning_rate   | 0.0003    |
|    n_updates       | 143999    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.03      |
| time/              |           |
|    episodes        | 496       |
|    fps             | 173       |
|    time_elapsed    | 838       |
|    total_timesteps | 145811    |
| train/             |           |
|    actor_loss      | -2.93e+03 |
|    critic_loss     | 2.73e+03  |
|    ent_coef        | 1.04      |
|    ent_coef_loss   | -0.0017   |
|    learning_rate   | 0.0003    |
|    n_updates       | 144810    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 500       |
|    fps             | 173       |
|    time_elapsed    | 846       |
|    total_timesteps | 147011    |
| train/             |           |
|    actor_loss      | -2.95e+03 |
|    critic_loss     | 3.8e+03   |
|    ent_coef        | 1.02      |
|    ent_coef_loss   | 0.00528   |
|    learning_rate   | 0.0003    |
|    n_updates       | 146010    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 504       |
|    fps             | 173       |
|    time_elapsed    | 854       |
|    total_timesteps | 148211    |
| train/             |           |
|    actor_loss      | -3.01e+03 |
|    critic_loss     | 4.75e+03  |
|    ent_coef        | 1.01      |
|    ent_coef_loss   | -0.000283 |
|    learning_rate   | 0.0003    |
|    n_updates       | 147210    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 508       |
|    fps             | 173       |
|    time_elapsed    | 861       |
|    total_timesteps | 149411    |
| train/             |           |
|    actor_loss      | -2.86e+03 |
|    critic_loss     | 3.85e+03  |
|    ent_coef        | 1.01      |
|    ent_coef_loss   | -0.00321  |
|    learning_rate   | 0.0003    |
|    n_updates       | 148410    |
----------------------------------


----------------------------------
| eval/              |           |
|    mean_ep_length  | 300       |
|    mean_reward     | 7.31e+03  |
|    success_rate    | 0         |
| time/              |           |
|    total_timesteps | 150000    |
| train/             |           |
|    actor_loss      | -2.84e+03 |
|    critic_loss     | 6.67e+03  |
|    ent_coef        | 1.01      |
|    ent_coef_loss   | -0.00214  |
|    learning_rate   | 0.0003    |
|    n_updates       | 148999    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 512       |
|    fps             | 173       |
|    time_elapsed    | 870       |
|    total_timesteps | 150611    |
| train/             |           |
|    actor_loss      | -3.04e+03 |
|    critic_loss     | 2.6e+03   |
|    ent_coef        | 1.02      |
|    ent_coef_loss   | 0.0027    |
|    learning_rate   | 0.0003    |
|    n_updates       | 149610    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 516       |
|    fps             | 172       |
|    time_elapsed    | 877       |
|    total_timesteps | 151811    |
| train/             |           |
|    actor_loss      | -2.86e+03 |
|    critic_loss     | 4.79e+03  |
|    ent_coef        | 1.02      |
|    ent_coef_loss   | 0.00854   |
|    learning_rate   | 0.0003    |
|    n_updates       | 150810    |
----------------------------------


----------------------------------
| rollout/           |           |
|    success_rate    | 0.02      |
| time/              |           |
|    episodes        | 520       |
|    fps             | 172       |
|    time_elapsed    | 885       |
|    total_timesteps | 153011    |
| train/             |           |
|    actor_loss      | -2.86e+03 |
|    critic_loss     | 3.7e+03   |
|    ent_coef        | 0.983     |
|    ent_coef_loss   | 0.00631   |
|    learning_rate   | 0.0003    |
|    n_updates       | 152010    |
----------------------------------


## Test Trained Model

Visualize the trained agent attempting to stack cubes in MuJoCo.

In [None]:
import time

print("="*60)
print("Testing Trained Model in MuJoCo Viewer")
print("="*60)

try:
    model = SAC.load("./logs/cube_stacking/best_model/best_model")
    print("✓ Loaded best model from evaluation")
except:
    model = SAC.load("./logs/cube_stacking/cube_stacking_final")
    print("✓ Loaded final model")

test_env = Lite6CubeStackingEnv(
    model_path=str(model_path),
    render_mode="human", # this will open MuJoCo viewer b/c we loading w human
    max_steps=300,
    success_threshold=0.08,
    horizontal_threshold=0.05,
    reward_scale=10.0,
    cube_placement_radius=0.3,
    ee_site_name="end_effector",
    collision_penalty=100.0,
    terminate_on_collision=False
)

print("\nRunning 5 test episodes...")
print("(MuJoCo viewer window should open)\n")

success_count = 0
total_rewards = []
collision_count = 0

for episode in range(5):
    obs, info = test_env.reset()
    print(f"\nEpisode {episode+1}:")
    print(f"  Cube 1 position: {info['cube1_pos']}")
    print(f"  Cube 2 position: {info['cube2_pos']}")
    print(f"  Initial vertical distance: {info['vertical_distance']:.4f} m")
    print(f"  Initial horizontal distance: {info['horizontal_distance']:.4f} m")
    
    episode_reward = 0
    episode_collisions = 0
    done = False
    
    for step in range(300):
        # get action from trained model
        action, _states = model.predict(obs, deterministic=True)
        
        # step environment
        obs, reward, terminated, truncated, info = test_env.step(action)
        episode_reward += reward
        
        if info['has_collision']:
            episode_collisions += 1
        
        # small delay so we can watch the movement
        time.sleep(0.02)
        
        if terminated or truncated:
            done = True
            break
    
    print(f"  Steps taken: {step+1}")
    print(f"  Final vertical distance: {info['vertical_distance']:.4f} m")
    print(f"  Final horizontal distance: {info['horizontal_distance']:.4f} m")
    print(f"  Success: {info['is_success']}")
    print(f"  Collisions: {episode_collisions}")
    print(f"  Total reward: {episode_reward:.2f}")
    
    if info['is_success']:
        success_count += 1
    collision_count += episode_collisions
    total_rewards.append(episode_reward)

test_env.close()

print("\n" + "="*60)
print("Test Summary:")
print("="*60)
print(f"Success rate: {success_count}/5 ({success_count/5*100:.1f}%)")
print(f"Average reward: {np.mean(total_rewards):.2f} ± {np.std(total_rewards):.2f}")
print(f"Total collisions: {collision_count}")
print("="*60)

In [None]:
# Clean up
env.close()