In [1]:
import os
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor

pygame 2.6.0 (SDL 2.28.4, Python 3.8.19)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
from custom_env import CustomEnv
from wrappers import FullyObsSB3MLPWrapper
from callbacks import EvalSaveCallback



In [3]:
# Define the session directory
session_dir = r"./experiments/door_key"
os.makedirs(session_dir, exist_ok=True)

# Set the log directory within the session directory
log_dir = os.path.join(session_dir, "logs")
os.makedirs(log_dir, exist_ok=True)

# Set training parameters
max_train_episode_steps = 1000
max_eval_episode_steps = 500
total_timesteps = int(2e5)
eval_deterministic = False
eval_freq = int(5e3)
n_eval_episodes = 10
model_save_path = os.path.join(session_dir, "latest_model")

In [4]:
# Initialize the environment and wrapper
train_env = CustomEnv(
    txt_file_path=r'./maps/simple_test_door_key.txt',
    display_size=6,
    display_mode="random",
    random_rotate=True,
    random_flip=True,
    custom_mission="Find the key and open the door.",
    max_steps=max_train_episode_steps
)

# Wrap the environment with FullyObsSB3MLPWrapper
train_env = FullyObsSB3MLPWrapper(train_env)

# Use DummyVecEnv and VecMonitor to create and wrap the environment
train_env = DummyVecEnv([lambda: train_env])  # Vectorize the environment
train_env = VecMonitor(train_env)  # Track episode statistics such as rewards

In [5]:
# Initialize the environment and wrapper
eval_env = CustomEnv(
    txt_file_path=r'./maps/simple_test_door_key.txt',
    display_size=6,
    display_mode="middle",
    random_rotate=False,
    random_flip=False,
    custom_mission="Find the key and open the door.",
    max_steps=max_eval_episode_steps
)

# Wrap the environment with FullyObsSB3MLPWrapper
eval_env = FullyObsSB3MLPWrapper(eval_env)

# Use DummyVecEnv and VecMonitor to create and wrap the environment
eval_env = DummyVecEnv([lambda: eval_env])  # Vectorize the environment
eval_env = VecMonitor(eval_env)  # Track episode statistics such as rewards

In [6]:
# Load or create a new model
if os.path.exists(f"{model_save_path}.zip"):
    model = PPO.load(model_save_path, env=train_env)
    print("Loaded model from saved path.")
else:
    model = PPO("MlpPolicy", train_env, verbose=1)
    print("Initialized new model.")

Using cuda device
Initialized new model.


In [7]:
# Create EvalSaveCallback
eval_save_callback = EvalSaveCallback(
    eval_env=eval_env,
    log_dir=log_dir,
    eval_freq=eval_freq,
    n_eval_episodes=n_eval_episodes,
    deterministic=eval_deterministic,
    verbose=1,
)

In [None]:
# Train the model and log performance with the custom callback
model.learn(total_timesteps=total_timesteps, callback=eval_save_callback, progress_bar=True)

Output()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 472      |
|    ep_rew_mean     | 0.55     |
| time/              |          |
|    fps             | 396      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 401         |
|    ep_rew_mean          | 0.629       |
| time/                   |             |
|    fps                  | 355         |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.013241606 |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.94       |
|    explained_variance   | 0.0813      |
|    learning_rate        | 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 371         |
|    ep_rew_mean          | 0.66        |
| time/                   |             |
|    fps                  | 259         |
|    iterations           | 3           |
|    time_elapsed         | 23          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.008720116 |
|    clip_fraction        | 0.0713      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.93       |
|    explained_variance   | 0.143       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0336     |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.0082     |
|    value_loss           | 0.00512     |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 322 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 294         |
|    ep_rew_mean          | 0.732       |
| time/                   |             |
|    fps                  | 246         |
|    iterations           | 5           |
|    time_elapsed         | 41          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.014325798 |
|    clip_fraction        | 0.179       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.9        |
|    explained_variance   | 0.475       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00405    |
|    n_updates            | 40          |
|    policy_gradient_loss | -0.0161     |
|    value_loss           | 0.00908     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 267   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 202         |
|    ep_rew_mean          | 0.817       |
| time/                   |             |
|    fps                  | 257         |
|    iterations           | 8           |
|    time_elapsed         | 63          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.009631499 |
|    clip_fraction        | 0.109       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.8        |
|    explained_variance   | 0.425       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00313    |
|    n_updates            | 70          |
|    policy_gradient_loss | -0.0118     |
|    value_loss           | 0.0077      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 189   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 171         |
|    ep_rew_mean          | 0.846       |
| time/                   |             |
|    fps                  | 262         |
|    iterations           | 10          |
|    time_elapsed         | 78          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.010857996 |
|    clip_fraction        | 0.125       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.76       |
|    explained_variance   | 0.515       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0339     |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.0126     |
|    value_loss           | 0.00519     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 136   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 107         |
|    ep_rew_mean          | 0.904       |
| time/                   |             |
|    fps                  | 271         |
|    iterations           | 13          |
|    time_elapsed         | 98          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.009316531 |
|    clip_fraction        | 0.142       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.58       |
|    explained_variance   | 0.355       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0203     |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.0136     |
|    value_loss           | 0.00762     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 109   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 91.9        |
|    ep_rew_mean          | 0.917       |
| time/                   |             |
|    fps                  | 273         |
|    iterations           | 15          |
|    time_elapsed         | 112         |
|    total_timesteps      | 30720       |
| train/                  |             |
|    approx_kl            | 0.011358605 |
|    clip_fraction        | 0.157       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.61       |
|    explained_variance   | 0.616       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0304     |
|    n_updates            | 140         |
|    policy_gradient_loss | -0.0151     |
|    value_loss           | 0.00347     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 92.2  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 93.1        |
|    ep_rew_mean          | 0.916       |
| time/                   |             |
|    fps                  | 276         |
|    iterations           | 18          |
|    time_elapsed         | 133         |
|    total_timesteps      | 36864       |
| train/                  |             |
|    approx_kl            | 0.015556719 |
|    clip_fraction        | 0.173       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.6        |
|    explained_variance   | 0.632       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.032      |
|    n_updates            | 170         |
|    policy_gradient_loss | -0.0117     |
|    value_loss           | 0.00377     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 86.2  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 78.1        |
|    ep_rew_mean          | 0.93        |
| time/                   |             |
|    fps                  | 278         |
|    iterations           | 20          |
|    time_elapsed         | 147         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.013925459 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.48       |
|    explained_variance   | 0.624       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0092     |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.0153     |
|    value_loss           | 0.0061      |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 59.9

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 45.2        |
|    ep_rew_mean          | 0.959       |
| time/                   |             |
|    fps                  | 276         |
|    iterations           | 22          |
|    time_elapsed         | 162         |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.012222793 |
|    clip_fraction        | 0.142       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.47       |
|    explained_variance   | 0.579       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0153     |
|    n_updates            | 210         |
|    policy_gradient_loss | -0.0148     |
|    value_loss           | 0.00722     |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 43.7

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 46.5        |
|    ep_rew_mean          | 0.958       |
| time/                   |             |
|    fps                  | 279         |
|    iterations           | 25          |
|    time_elapsed         | 182         |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.021915123 |
|    clip_fraction        | 0.169       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.2        |
|    explained_variance   | 0.397       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0216     |
|    n_updates            | 240         |
|    policy_gradient_loss | -0.012      |
|    value_loss           | 0.00256     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 36.6  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 35.7        |
|    ep_rew_mean          | 0.968       |
| time/                   |             |
|    fps                  | 281         |
|    iterations           | 27          |
|    time_elapsed         | 196         |
|    total_timesteps      | 55296       |
| train/                  |             |
|    approx_kl            | 0.012537618 |
|    clip_fraction        | 0.145       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.21       |
|    explained_variance   | 0.453       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0289     |
|    n_updates            | 260         |
|    policy_gradient_loss | -0.0125     |
|    value_loss           | 0.00313     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 33.3  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 31.6        |
|    ep_rew_mean          | 0.972       |
| time/                   |             |
|    fps                  | 284         |
|    iterations           | 30          |
|    time_elapsed         | 215         |
|    total_timesteps      | 61440       |
| train/                  |             |
|    approx_kl            | 0.015091234 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.11       |
|    explained_variance   | 0.733       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0341     |
|    n_updates            | 290         |
|    policy_gradient_loss | -0.0109     |
|    value_loss           | 0.00487     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 29    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 36.4        |
|    ep_rew_mean          | 0.967       |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 32          |
|    time_elapsed         | 228         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.012553837 |
|    clip_fraction        | 0.138       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.07       |
|    explained_variance   | 0.422       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0186     |
|    n_updates            | 310         |
|    policy_gradient_loss | -0.00604    |
|    value_loss           | 0.00285     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 39.5  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 17.2       |
|    ep_rew_mean          | 0.984      |
| time/                   |            |
|    fps                  | 288        |
|    iterations           | 35         |
|    time_elapsed         | 248        |
|    total_timesteps      | 71680      |
| train/                  |            |
|    approx_kl            | 0.07956502 |
|    clip_fraction        | 0.281      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.978     |
|    explained_variance   | 0.0481     |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0879    |
|    n_updates            | 340        |
|    policy_gradient_loss | -0.00446   |
|    value_loss           | 0.00261    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.9        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 0.99        |
| time/                   |             |
|    fps                  | 289         |
|    iterations           | 37          |
|    time_elapsed         | 261         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.023942053 |
|    clip_fraction        | 0.204       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.831      |
|    explained_variance   | 0.61        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0326     |
|    n_updates            | 360         |
|    policy_gradient_loss | -0.0219     |
|    value_loss           | 0.00246     |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6    

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 9.13       |
|    ep_rew_mean          | 0.992      |
| time/                   |            |
|    fps                  | 290        |
|    iterations           | 40         |
|    time_elapsed         | 281        |
|    total_timesteps      | 81920      |
| train/                  |            |
|    approx_kl            | 0.02548345 |
|    clip_fraction        | 0.184      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.731     |
|    explained_variance   | 0.434      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0409    |
|    n_updates            | 390        |
|    policy_gradient_loss | -0.00608   |
|    value_loss           | 0.00191    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.72        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 7.67       |
|    ep_rew_mean          | 0.993      |
| time/                   |            |
|    fps                  | 291        |
|    iterations           | 42         |
|    time_elapsed         | 295        |
|    total_timesteps      | 86016      |
| train/                  |            |
|    approx_kl            | 0.02280941 |
|    clip_fraction        | 0.185      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.542     |
|    explained_variance   | 0.561      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0464    |
|    n_updates            | 410        |
|    policy_gradient_loss | -0.0056    |
|    value_loss           | 0.00123    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 13.5       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.6        |
|    ep_rew_mean          | 0.989       |
| time/                   |             |
|    fps                  | 292         |
|    iterations           | 44          |
|    time_elapsed         | 308         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.050774947 |
|    clip_fraction        | 0.241       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.541      |
|    explained_variance   | 0.768       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0271      |
|    n_updates            | 430         |
|    policy_gradient_loss | -0.0159     |
|    value_loss           | 0.00136     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.21  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.78       |
|    ep_rew_mean          | 0.994      |
| time/                   |            |
|    fps                  | 293        |
|    iterations           | 47         |
|    time_elapsed         | 328        |
|    total_timesteps      | 96256      |
| train/                  |            |
|    approx_kl            | 0.06826825 |
|    clip_fraction        | 0.322      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.498     |
|    explained_variance   | 0.73       |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0973    |
|    n_updates            | 460        |
|    policy_gradient_loss | -0.00446   |
|    value_loss           | 0.000706   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.6        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.57       |
|    ep_rew_mean          | 0.994      |
| time/                   |            |
|    fps                  | 293        |
|    iterations           | 49         |
|    time_elapsed         | 341        |
|    total_timesteps      | 100352     |
| train/                  |            |
|    approx_kl            | 0.08519956 |
|    clip_fraction        | 0.17       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.621     |
|    explained_variance   | 0.905      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.299      |
|    n_updates            | 480        |
|    policy_gradient_loss | 0.0291     |
|    value_loss           | 0.00108    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.5       |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 9.05       |
|    ep_rew_mean          | 0.992      |
| time/                   |            |
|    fps                  | 294        |
|    iterations           | 52         |
|    time_elapsed         | 361        |
|    total_timesteps      | 106496     |
| train/                  |            |
|    approx_kl            | 0.11450315 |
|    clip_fraction        | 0.205      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.393     |
|    explained_variance   | 0.435      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.109     |
|    n_updates            | 510        |
|    policy_gradient_loss | 0.00536    |
|    value_loss           | 0.00173    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.44       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.21        |
|    ep_rew_mean          | 0.992       |
| time/                   |             |
|    fps                  | 294         |
|    iterations           | 54          |
|    time_elapsed         | 375         |
|    total_timesteps      | 110592      |
| train/                  |             |
|    approx_kl            | 0.107268676 |
|    clip_fraction        | 0.162       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.244      |
|    explained_variance   | 0.434       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.035      |
|    n_updates            | 530         |
|    policy_gradient_loss | -0.017      |
|    value_loss           | 0.000329    |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.5    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.67        |
|    ep_rew_mean          | 0.992       |
| time/                   |             |
|    fps                  | 295         |
|    iterations           | 57          |
|    time_elapsed         | 395         |
|    total_timesteps      | 116736      |
| train/                  |             |
|    approx_kl            | 0.057304613 |
|    clip_fraction        | 0.203       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.482      |
|    explained_variance   | 0.711       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0774     |
|    n_updates            | 560         |
|    policy_gradient_loss | -0.00943    |
|    value_loss           | 0.00146     |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.27  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.16        |
|    ep_rew_mean          | 0.994       |
| time/                   |             |
|    fps                  | 295         |
|    iterations           | 59          |
|    time_elapsed         | 408         |
|    total_timesteps      | 120832      |
| train/                  |             |
|    approx_kl            | 0.037306406 |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.291      |
|    explained_variance   | 0.668       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0457     |
|    n_updates            | 580         |
|    policy_gradient_loss | -0.0238     |
|    value_loss           | 0.0003      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.4   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6.7         |
|    ep_rew_mean          | 0.994       |
| time/                   |             |
|    fps                  | 296         |
|    iterations           | 62          |
|    time_elapsed         | 428         |
|    total_timesteps      | 126976      |
| train/                  |             |
|    approx_kl            | 0.026027108 |
|    clip_fraction        | 0.132       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.278      |
|    explained_variance   | 0.608       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0273     |
|    n_updates            | 610         |
|    policy_gradient_loss | 0.0004      |
|    value_loss           | 0.000801    |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.4     

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.14        |
|    ep_rew_mean          | 0.994       |
| time/                   |             |
|    fps                  | 296         |
|    iterations           | 64          |
|    time_elapsed         | 442         |
|    total_timesteps      | 131072      |
| train/                  |             |
|    approx_kl            | 0.029546645 |
|    clip_fraction        | 0.0882      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.189      |
|    explained_variance   | 0.463       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0679      |
|    n_updates            | 630         |
|    policy_gradient_loss | -0.0034     |
|    value_loss           | 0.000305    |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.46    

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.3        |
|    ep_rew_mean          | 0.994      |
| time/                   |            |
|    fps                  | 296        |
|    iterations           | 66         |
|    time_elapsed         | 456        |
|    total_timesteps      | 135168     |
| train/                  |            |
|    approx_kl            | 0.01763953 |
|    clip_fraction        | 0.051      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0773    |
|    explained_variance   | 0.744      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00485   |
|    n_updates            | 650        |
|    policy_gradient_loss | -0.00406   |
|    value_loss           | 0.000425   |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 26.2       |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 57.9       |
|    ep_rew_mean          | 0.944      |
| time/                   |            |
|    fps                  | 292        |
|    iterations           | 69         |
|    time_elapsed         | 482        |
|    total_timesteps      | 141312     |
| train/                  |            |
|    approx_kl            | 0.09458825 |
|    clip_fraction        | 0.288      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.431     |
|    explained_variance   | 0.303      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0611     |
|    n_updates            | 680        |
|    policy_gradient_loss | 0.0025     |
|    value_loss           | 0.000609   |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 84.3       |
|    ep_rew_mean

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 10.1      |
|    ep_rew_mean          | 0.991     |
| time/                   |           |
|    fps                  | 293       |
|    iterations           | 71        |
|    time_elapsed         | 495       |
|    total_timesteps      | 145408    |
| train/                  |           |
|    approx_kl            | 0.1673544 |
|    clip_fraction        | 0.319     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.359    |
|    explained_variance   | 0.214     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0179   |
|    n_updates            | 700       |
|    policy_gradient_loss | 0.0823    |
|    value_loss           | 0.00287   |
---------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 8.37       |
|    ep_rew_mean          | 0.992    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.8        |
|    ep_rew_mean          | 0.987       |
| time/                   |             |
|    fps                  | 293         |
|    iterations           | 74          |
|    time_elapsed         | 515         |
|    total_timesteps      | 151552      |
| train/                  |             |
|    approx_kl            | 0.051810406 |
|    clip_fraction        | 0.204       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.293      |
|    explained_variance   | 0.531       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.036      |
|    n_updates            | 730         |
|    policy_gradient_loss | -0.0231     |
|    value_loss           | 0.00162     |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 18.4    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6.47        |
|    ep_rew_mean          | 0.994       |
| time/                   |             |
|    fps                  | 293         |
|    iterations           | 76          |
|    time_elapsed         | 530         |
|    total_timesteps      | 155648      |
| train/                  |             |
|    approx_kl            | 0.020214964 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.787      |
|    explained_variance   | 0.84        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.018       |
|    n_updates            | 750         |
|    policy_gradient_loss | -9.79e-05   |
|    value_loss           | 0.00139     |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 14.5    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 15.4        |
|    ep_rew_mean          | 0.986       |
| time/                   |             |
|    fps                  | 292         |
|    iterations           | 79          |
|    time_elapsed         | 553         |
|    total_timesteps      | 161792      |
| train/                  |             |
|    approx_kl            | 0.074879035 |
|    clip_fraction        | 0.274       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.653      |
|    explained_variance   | 0.823       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0126      |
|    n_updates            | 780         |
|    policy_gradient_loss | -0.00284    |
|    value_loss           | 0.000992    |
-----------------------------------------
