# PPO training for Task 7 pendulum

Reproduces the PPO workflow from `task_2_lqr_balance_data_gen.ipynb`, but runs directly on the custom PyBullet + Pinocchio environment implemented in `scripts/task_7_env.py`.

In [1]:
import os
from pathlib import Path

import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv

In [2]:
current = Path.cwd()

if (current / 'notebooks').exists():
    PROJECT_ROOT = current
else:
    PROJECT_ROOT = current.parent

os.chdir(PROJECT_ROOT)

import sys
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from scripts.task_7_env import Task7PendulumEnv

DATA_DIR = Path('data')
MODELS_DIR = DATA_DIR
for directory in (DATA_DIR, MODELS_DIR):
    directory.mkdir(exist_ok=True)

print('Project root:', PROJECT_ROOT)

Project root: /home/acepeax/Desktop/Studies/MVA/Robotics/Project


pybullet build time: Dec  4 2025 20:11:42


## Environment helpers

In [3]:
MAX_STEPS = 600
SHOULD_BALANCE = True
SIM_SUBSTEPS = 12

def make_task7_env(gui=False, should_balance=SHOULD_BALANCE):
    def _init():
        env = Task7PendulumEnv(
            max_steps=MAX_STEPS,
            should_balance=should_balance,
            gui=gui,
            sim_substeps=SIM_SUBSTEPS,
        )
        return Monitor(env)
    return _init

train_env = DummyVecEnv([make_task7_env(gui=False, should_balance=SHOULD_BALANCE)])
print('Observation space:', train_env.observation_space)
print('Action space:', train_env.action_space)

Observation space: Box(-inf, inf, (14,), float32)
Action space: Discrete(3)


## Train PPO

In [None]:
model = PPO(
    policy='MlpPolicy',
    env=train_env,
    verbose=0,
    n_steps=2048,
    batch_size=256,
    gamma=0.99,
    gae_lambda=0.95,
    ent_coef=0.0,
    learning_rate=3e-4,
    clip_range=0.2,
    tensorboard_log='runs/task7_ppo',
)

Using cpu device


In [5]:
TOTAL_TIMESTEPS = 200_000
model.learn(total_timesteps=TOTAL_TIMESTEPS, progress_bar=True)

Logging to runs/task7_ppo/PPO_1


Output()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5.84     |
|    ep_rew_mean     | 4.84     |
| time/              |          |
|    fps             | 274      |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 2048     |
---------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.6         |
|    ep_rew_mean          | 4.6         |
| time/                   |             |
|    fps                  | 268         |
|    iterations           | 2           |
|    time_elapsed         | 15          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.011344364 |
|    clip_fraction        | 0.187       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.00491    |
|    learning_rate        | 0.0003      |
|    loss                 | 3.77        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0272     |
|    value_loss           | 12.2        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.89        |
|    ep_rew_mean          | 6.89        |
| time/                   |             |
|    fps                  | 261         |
|    iterations           | 3           |
|    time_elapsed         | 23          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.019952781 |
|    clip_fraction        | 0.275       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.05       |
|    explained_variance   | 0.0684      |
|    learning_rate        | 0.0003      |
|    loss                 | 2.96        |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.0348     |
|    value_loss           | 7.07        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.22        |
|    ep_rew_mean          | 8.22        |
| time/                   |             |
|    fps                  | 239         |
|    iterations           | 4           |
|    time_elapsed         | 34          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.018958151 |
|    clip_fraction        | 0.207       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.999      |
|    explained_variance   | 0.107       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.33        |
|    n_updates            | 30          |
|    policy_gradient_loss | -0.0323     |
|    value_loss           | 11.7        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.1        |
|    ep_rew_mean          | 11.1        |
| time/                   |             |
|    fps                  | 229         |
|    iterations           | 5           |
|    time_elapsed         | 44          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.027848797 |
|    clip_fraction        | 0.244       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.884      |
|    explained_variance   | 0.109       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.23        |
|    n_updates            | 40          |
|    policy_gradient_loss | -0.0398     |
|    value_loss           | 13.8        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.6        |
|    ep_rew_mean          | 15.6        |
| time/                   |             |
|    fps                  | 223         |
|    iterations           | 6           |
|    time_elapsed         | 54          |
|    total_timesteps      | 12288       |
| train/                  |             |
|    approx_kl            | 0.018594494 |
|    clip_fraction        | 0.075       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.82       |
|    explained_variance   | 0.102       |
|    learning_rate        | 0.0003      |
|    loss                 | 10.6        |
|    n_updates            | 50          |
|    policy_gradient_loss | -0.019      |
|    value_loss           | 21          |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 19.4        |
|    ep_rew_mean          | 18.4        |
| time/                   |             |
|    fps                  | 225         |
|    iterations           | 7           |
|    time_elapsed         | 63          |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.008494664 |
|    clip_fraction        | 0.0311      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.736      |
|    explained_variance   | 0.143       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.5        |
|    n_updates            | 60          |
|    policy_gradient_loss | -0.0123     |
|    value_loss           | 25.7        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 23.5        |
|    ep_rew_mean          | 22.5        |
| time/                   |             |
|    fps                  | 228         |
|    iterations           | 8           |
|    time_elapsed         | 71          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.005338687 |
|    clip_fraction        | 0.0339      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.669      |
|    explained_variance   | 0.216       |
|    learning_rate        | 0.0003      |
|    loss                 | 12.1        |
|    n_updates            | 70          |
|    policy_gradient_loss | -0.01       |
|    value_loss           | 28.5        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 25.1        |
|    ep_rew_mean          | 24.1        |
| time/                   |             |
|    fps                  | 230         |
|    iterations           | 9           |
|    time_elapsed         | 79          |
|    total_timesteps      | 18432       |
| train/                  |             |
|    approx_kl            | 0.003743227 |
|    clip_fraction        | 0.0111      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.634      |
|    explained_variance   | 0.308       |
|    learning_rate        | 0.0003      |
|    loss                 | 14.3        |
|    n_updates            | 80          |
|    policy_gradient_loss | -0.00621    |
|    value_loss           | 31.6        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 26.7        |
|    ep_rew_mean          | 25.7        |
| time/                   |             |
|    fps                  | 230         |
|    iterations           | 10          |
|    time_elapsed         | 88          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.003870286 |
|    clip_fraction        | 0.0183      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.589      |
|    explained_variance   | 0.423       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.9        |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.00772    |
|    value_loss           | 26.3        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 30.8        |
|    ep_rew_mean          | 29.8        |
| time/                   |             |
|    fps                  | 227         |
|    iterations           | 11          |
|    time_elapsed         | 98          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.005969581 |
|    clip_fraction        | 0.0361      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.558      |
|    explained_variance   | 0.538       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.7        |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.00843    |
|    value_loss           | 26.3        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 33.3        |
|    ep_rew_mean          | 32.3        |
| time/                   |             |
|    fps                  | 223         |
|    iterations           | 12          |
|    time_elapsed         | 109         |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.005067444 |
|    clip_fraction        | 0.0243      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.513      |
|    explained_variance   | 0.522       |
|    learning_rate        | 0.0003      |
|    loss                 | 16.3        |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.00994    |
|    value_loss           | 33.9        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 36.1         |
|    ep_rew_mean          | 35.1         |
| time/                   |              |
|    fps                  | 222          |
|    iterations           | 13           |
|    time_elapsed         | 119          |
|    total_timesteps      | 26624        |
| train/                  |              |
|    approx_kl            | 0.0035088398 |
|    clip_fraction        | 0.0219       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.482       |
|    explained_variance   | 0.593        |
|    learning_rate        | 0.0003       |
|    loss                 | 14           |
|    n_updates            | 120          |
|    policy_gradient_loss | -0.00557     |
|    value_loss           | 32.5         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 37.3         |
|    ep_rew_mean          | 36.3         |
| time/                   |              |
|    fps                  | 222          |
|    iterations           | 14           |
|    time_elapsed         | 129          |
|    total_timesteps      | 28672        |
| train/                  |              |
|    approx_kl            | 0.0051775146 |
|    clip_fraction        | 0.0309       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.457       |
|    explained_variance   | 0.589        |
|    learning_rate        | 0.0003       |
|    loss                 | 14.2         |
|    n_updates            | 130          |
|    policy_gradient_loss | -0.00809     |
|    value_loss           | 34.4         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 42.1        |
|    ep_rew_mean          | 41.1        |
| time/                   |             |
|    fps                  | 221         |
|    iterations           | 15          |
|    time_elapsed         | 138         |
|    total_timesteps      | 30720       |
| train/                  |             |
|    approx_kl            | 0.004797793 |
|    clip_fraction        | 0.0438      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.431      |
|    explained_variance   | 0.55        |
|    learning_rate        | 0.0003      |
|    loss                 | 20.4        |
|    n_updates            | 140         |
|    policy_gradient_loss | -0.00737    |
|    value_loss           | 39.1        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 45.8         |
|    ep_rew_mean          | 44.8         |
| time/                   |              |
|    fps                  | 220          |
|    iterations           | 16           |
|    time_elapsed         | 148          |
|    total_timesteps      | 32768        |
| train/                  |              |
|    approx_kl            | 0.0027062988 |
|    clip_fraction        | 0.0216       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.406       |
|    explained_variance   | 0.549        |
|    learning_rate        | 0.0003       |
|    loss                 | 21.7         |
|    n_updates            | 150          |
|    policy_gradient_loss | -0.00519     |
|    value_loss           | 45.1         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 48.7        |
|    ep_rew_mean          | 47.7        |
| time/                   |             |
|    fps                  | 220         |
|    iterations           | 17          |
|    time_elapsed         | 158         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.004004533 |
|    clip_fraction        | 0.0248      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.411      |
|    explained_variance   | 0.492       |
|    learning_rate        | 0.0003      |
|    loss                 | 27.4        |
|    n_updates            | 160         |
|    policy_gradient_loss | -0.00501    |
|    value_loss           | 56.7        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 50.3         |
|    ep_rew_mean          | 49.3         |
| time/                   |              |
|    fps                  | 219          |
|    iterations           | 18           |
|    time_elapsed         | 167          |
|    total_timesteps      | 36864        |
| train/                  |              |
|    approx_kl            | 0.0022897865 |
|    clip_fraction        | 0.0139       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.4         |
|    explained_variance   | 0.499        |
|    learning_rate        | 0.0003       |
|    loss                 | 33.1         |
|    n_updates            | 170          |
|    policy_gradient_loss | -0.00554     |
|    value_loss           | 62           |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 54.8         |
|    ep_rew_mean          | 53.8         |
| time/                   |              |
|    fps                  | 218          |
|    iterations           | 19           |
|    time_elapsed         | 178          |
|    total_timesteps      | 38912        |
| train/                  |              |
|    approx_kl            | 0.0022056317 |
|    clip_fraction        | 0.0117       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.382       |
|    explained_variance   | 0.509        |
|    learning_rate        | 0.0003       |
|    loss                 | 33.3         |
|    n_updates            | 180          |
|    policy_gradient_loss | -0.00443     |
|    value_loss           | 64.5         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 58.9         |
|    ep_rew_mean          | 57.9         |
| time/                   |              |
|    fps                  | 217          |
|    iterations           | 20           |
|    time_elapsed         | 187          |
|    total_timesteps      | 40960        |
| train/                  |              |
|    approx_kl            | 0.0026558712 |
|    clip_fraction        | 0.0172       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.368       |
|    explained_variance   | 0.419        |
|    learning_rate        | 0.0003       |
|    loss                 | 37.6         |
|    n_updates            | 190          |
|    policy_gradient_loss | -0.00458     |
|    value_loss           | 79.7         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 61.9         |
|    ep_rew_mean          | 60.9         |
| time/                   |              |
|    fps                  | 217          |
|    iterations           | 21           |
|    time_elapsed         | 197          |
|    total_timesteps      | 43008        |
| train/                  |              |
|    approx_kl            | 0.0019714092 |
|    clip_fraction        | 0.0149       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.368       |
|    explained_variance   | 0.483        |
|    learning_rate        | 0.0003       |
|    loss                 | 36.7         |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00422     |
|    value_loss           | 78.3         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 66.2         |
|    ep_rew_mean          | 65.2         |
| time/                   |              |
|    fps                  | 216          |
|    iterations           | 22           |
|    time_elapsed         | 207          |
|    total_timesteps      | 45056        |
| train/                  |              |
|    approx_kl            | 0.0033156052 |
|    clip_fraction        | 0.0285       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.359       |
|    explained_variance   | 0.587        |
|    learning_rate        | 0.0003       |
|    loss                 | 26.6         |
|    n_updates            | 210          |
|    policy_gradient_loss | -0.00577     |
|    value_loss           | 64.8         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 73           |
|    ep_rew_mean          | 72           |
| time/                   |              |
|    fps                  | 216          |
|    iterations           | 23           |
|    time_elapsed         | 217          |
|    total_timesteps      | 47104        |
| train/                  |              |
|    approx_kl            | 0.0019989694 |
|    clip_fraction        | 0.0143       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.37        |
|    explained_variance   | 0.486        |
|    learning_rate        | 0.0003       |
|    loss                 | 41           |
|    n_updates            | 220          |
|    policy_gradient_loss | -0.00331     |
|    value_loss           | 82.1         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 78.4         |
|    ep_rew_mean          | 77.4         |
| time/                   |              |
|    fps                  | 215          |
|    iterations           | 24           |
|    time_elapsed         | 227          |
|    total_timesteps      | 49152        |
| train/                  |              |
|    approx_kl            | 0.0016594635 |
|    clip_fraction        | 0.00913      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.371       |
|    explained_variance   | 0.416        |
|    learning_rate        | 0.0003       |
|    loss                 | 45.6         |
|    n_updates            | 230          |
|    policy_gradient_loss | -0.00261     |
|    value_loss           | 95.6         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 87          |
|    ep_rew_mean          | 86          |
| time/                   |             |
|    fps                  | 215         |
|    iterations           | 25          |
|    time_elapsed         | 237         |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.002487632 |
|    clip_fraction        | 0.0231      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.369      |
|    explained_variance   | 0.526       |
|    learning_rate        | 0.0003      |
|    loss                 | 35.1        |
|    n_updates            | 240         |
|    policy_gradient_loss | -0.00354    |
|    value_loss           | 86.1        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 94.7         |
|    ep_rew_mean          | 93.7         |
| time/                   |              |
|    fps                  | 215          |
|    iterations           | 26           |
|    time_elapsed         | 247          |
|    total_timesteps      | 53248        |
| train/                  |              |
|    approx_kl            | 0.0022256947 |
|    clip_fraction        | 0.019        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.38        |
|    explained_variance   | 0.405        |
|    learning_rate        | 0.0003       |
|    loss                 | 41.3         |
|    n_updates            | 250          |
|    policy_gradient_loss | -0.00419     |
|    value_loss           | 100          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 103          |
|    ep_rew_mean          | 102          |
| time/                   |              |
|    fps                  | 214          |
|    iterations           | 27           |
|    time_elapsed         | 257          |
|    total_timesteps      | 55296        |
| train/                  |              |
|    approx_kl            | 0.0022931683 |
|    clip_fraction        | 0.0247       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.367       |
|    explained_variance   | 0.565        |
|    learning_rate        | 0.0003       |
|    loss                 | 52.4         |
|    n_updates            | 260          |
|    policy_gradient_loss | -0.00501     |
|    value_loss           | 85.9         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 116          |
|    ep_rew_mean          | 115          |
| time/                   |              |
|    fps                  | 214          |
|    iterations           | 28           |
|    time_elapsed         | 267          |
|    total_timesteps      | 57344        |
| train/                  |              |
|    approx_kl            | 0.0018883585 |
|    clip_fraction        | 0.00957      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.363       |
|    explained_variance   | 0.21         |
|    learning_rate        | 0.0003       |
|    loss                 | 63.5         |
|    n_updates            | 270          |
|    policy_gradient_loss | -0.00363     |
|    value_loss           | 130          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 126          |
|    ep_rew_mean          | 125          |
| time/                   |              |
|    fps                  | 214          |
|    iterations           | 29           |
|    time_elapsed         | 277          |
|    total_timesteps      | 59392        |
| train/                  |              |
|    approx_kl            | 0.0015784666 |
|    clip_fraction        | 0.00366      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.38        |
|    explained_variance   | 0.117        |
|    learning_rate        | 0.0003       |
|    loss                 | 58.4         |
|    n_updates            | 280          |
|    policy_gradient_loss | -0.00216     |
|    value_loss           | 115          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 134          |
|    ep_rew_mean          | 133          |
| time/                   |              |
|    fps                  | 214          |
|    iterations           | 30           |
|    time_elapsed         | 286          |
|    total_timesteps      | 61440        |
| train/                  |              |
|    approx_kl            | 0.0027542962 |
|    clip_fraction        | 0.0115       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.363       |
|    explained_variance   | 0.509        |
|    learning_rate        | 0.0003       |
|    loss                 | 50.2         |
|    n_updates            | 290          |
|    policy_gradient_loss | -0.00393     |
|    value_loss           | 99.5         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 144          |
|    ep_rew_mean          | 143          |
| time/                   |              |
|    fps                  | 213          |
|    iterations           | 31           |
|    time_elapsed         | 297          |
|    total_timesteps      | 63488        |
| train/                  |              |
|    approx_kl            | 0.0016180378 |
|    clip_fraction        | 0.0118       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.367       |
|    explained_variance   | 0.514        |
|    learning_rate        | 0.0003       |
|    loss                 | 44.5         |
|    n_updates            | 300          |
|    policy_gradient_loss | -0.00212     |
|    value_loss           | 90.9         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 155          |
|    ep_rew_mean          | 154          |
| time/                   |              |
|    fps                  | 213          |
|    iterations           | 32           |
|    time_elapsed         | 307          |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 0.0017230567 |
|    clip_fraction        | 0.0122       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.369       |
|    explained_variance   | 0.332        |
|    learning_rate        | 0.0003       |
|    loss                 | 56.1         |
|    n_updates            | 310          |
|    policy_gradient_loss | -0.00291     |
|    value_loss           | 116          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 163          |
|    ep_rew_mean          | 162          |
| time/                   |              |
|    fps                  | 212          |
|    iterations           | 33           |
|    time_elapsed         | 317          |
|    total_timesteps      | 67584        |
| train/                  |              |
|    approx_kl            | 0.0028656945 |
|    clip_fraction        | 0.0249       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.37        |
|    explained_variance   | 0.452        |
|    learning_rate        | 0.0003       |
|    loss                 | 37.3         |
|    n_updates            | 320          |
|    policy_gradient_loss | -0.0039      |
|    value_loss           | 89.4         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 175          |
|    ep_rew_mean          | 174          |
| time/                   |              |
|    fps                  | 212          |
|    iterations           | 34           |
|    time_elapsed         | 327          |
|    total_timesteps      | 69632        |
| train/                  |              |
|    approx_kl            | 0.0024660288 |
|    clip_fraction        | 0.0249       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.357       |
|    explained_variance   | 0.303        |
|    learning_rate        | 0.0003       |
|    loss                 | 45.5         |
|    n_updates            | 330          |
|    policy_gradient_loss | -0.00434     |
|    value_loss           | 125          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 174          |
|    ep_rew_mean          | 173          |
| time/                   |              |
|    fps                  | 211          |
|    iterations           | 35           |
|    time_elapsed         | 338          |
|    total_timesteps      | 71680        |
| train/                  |              |
|    approx_kl            | 0.0013943578 |
|    clip_fraction        | 0.00479      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.355       |
|    explained_variance   | 0.287        |
|    learning_rate        | 0.0003       |
|    loss                 | 43           |
|    n_updates            | 340          |
|    policy_gradient_loss | -0.00248     |
|    value_loss           | 107          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 188          |
|    ep_rew_mean          | 187          |
| time/                   |              |
|    fps                  | 211          |
|    iterations           | 36           |
|    time_elapsed         | 349          |
|    total_timesteps      | 73728        |
| train/                  |              |
|    approx_kl            | 0.0010385385 |
|    clip_fraction        | 0.00449      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.334       |
|    explained_variance   | 0.367        |
|    learning_rate        | 0.0003       |
|    loss                 | 50           |
|    n_updates            | 350          |
|    policy_gradient_loss | -0.00208     |
|    value_loss           | 130          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 205         |
|    ep_rew_mean          | 204         |
| time/                   |             |
|    fps                  | 210         |
|    iterations           | 37          |
|    time_elapsed         | 359         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.002014001 |
|    clip_fraction        | 0.0147      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.331      |
|    explained_variance   | 0.476       |
|    learning_rate        | 0.0003      |
|    loss                 | 44.5        |
|    n_updates            | 360         |
|    policy_gradient_loss | -0.0021     |
|    value_loss           | 81.5        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 215          |
|    ep_rew_mean          | 214          |
| time/                   |              |
|    fps                  | 210          |
|    iterations           | 38           |
|    time_elapsed         | 369          |
|    total_timesteps      | 77824        |
| train/                  |              |
|    approx_kl            | 0.0027457834 |
|    clip_fraction        | 0.03         |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.351       |
|    explained_variance   | 0.289        |
|    learning_rate        | 0.0003       |
|    loss                 | 73.4         |
|    n_updates            | 370          |
|    policy_gradient_loss | -0.00249     |
|    value_loss           | 95.7         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 218         |
|    ep_rew_mean          | 217         |
| time/                   |             |
|    fps                  | 210         |
|    iterations           | 39          |
|    time_elapsed         | 379         |
|    total_timesteps      | 79872       |
| train/                  |             |
|    approx_kl            | 0.003121477 |
|    clip_fraction        | 0.0287      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.36       |
|    explained_variance   | 0.303       |
|    learning_rate        | 0.0003      |
|    loss                 | 41.2        |
|    n_updates            | 380         |
|    policy_gradient_loss | -0.00306    |
|    value_loss           | 88.7        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 218          |
|    ep_rew_mean          | 217          |
| time/                   |              |
|    fps                  | 210          |
|    iterations           | 40           |
|    time_elapsed         | 389          |
|    total_timesteps      | 81920        |
| train/                  |              |
|    approx_kl            | 0.0026190928 |
|    clip_fraction        | 0.0153       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.338       |
|    explained_variance   | 0.209        |
|    learning_rate        | 0.0003       |
|    loss                 | 65.2         |
|    n_updates            | 390          |
|    policy_gradient_loss | -0.00313     |
|    value_loss           | 123          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 231          |
|    ep_rew_mean          | 230          |
| time/                   |              |
|    fps                  | 209          |
|    iterations           | 41           |
|    time_elapsed         | 399          |
|    total_timesteps      | 83968        |
| train/                  |              |
|    approx_kl            | 0.0022714064 |
|    clip_fraction        | 0.0162       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.326       |
|    explained_variance   | 0.37         |
|    learning_rate        | 0.0003       |
|    loss                 | 75.6         |
|    n_updates            | 400          |
|    policy_gradient_loss | -0.0035      |
|    value_loss           | 132          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 240          |
|    ep_rew_mean          | 239          |
| time/                   |              |
|    fps                  | 209          |
|    iterations           | 42           |
|    time_elapsed         | 409          |
|    total_timesteps      | 86016        |
| train/                  |              |
|    approx_kl            | 0.0053449576 |
|    clip_fraction        | 0.0484       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.333       |
|    explained_variance   | 0.398        |
|    learning_rate        | 0.0003       |
|    loss                 | 44.4         |
|    n_updates            | 410          |
|    policy_gradient_loss | -0.00465     |
|    value_loss           | 103          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 249          |
|    ep_rew_mean          | 248          |
| time/                   |              |
|    fps                  | 209          |
|    iterations           | 43           |
|    time_elapsed         | 420          |
|    total_timesteps      | 88064        |
| train/                  |              |
|    approx_kl            | 0.0019969407 |
|    clip_fraction        | 0.0124       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.328       |
|    explained_variance   | 0.299        |
|    learning_rate        | 0.0003       |
|    loss                 | 33.4         |
|    n_updates            | 420          |
|    policy_gradient_loss | -0.00198     |
|    value_loss           | 80.2         |
------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 256        |
|    ep_rew_mean          | 255        |
| time/                   |            |
|    fps                  | 209        |
|    iterations           | 44         |
|    time_elapsed         | 430        |
|    total_timesteps      | 90112      |
| train/                  |            |
|    approx_kl            | 0.00427718 |
|    clip_fraction        | 0.022      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.322     |
|    explained_variance   | 0.16       |
|    learning_rate        | 0.0003     |
|    loss                 | 41.5       |
|    n_updates            | 430        |
|    policy_gradient_loss | -0.00234   |
|    value_loss           | 94.7       |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 262          |
|    ep_rew_mean          | 262          |
| time/                   |              |
|    fps                  | 209          |
|    iterations           | 45           |
|    time_elapsed         | 440          |
|    total_timesteps      | 92160        |
| train/                  |              |
|    approx_kl            | 0.0014445074 |
|    clip_fraction        | 0.017        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.316       |
|    explained_variance   | 0.297        |
|    learning_rate        | 0.0003       |
|    loss                 | 66           |
|    n_updates            | 440          |
|    policy_gradient_loss | -0.00343     |
|    value_loss           | 132          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 271         |
|    ep_rew_mean          | 271         |
| time/                   |             |
|    fps                  | 208         |
|    iterations           | 46          |
|    time_elapsed         | 451         |
|    total_timesteps      | 94208       |
| train/                  |             |
|    approx_kl            | 0.003245529 |
|    clip_fraction        | 0.0223      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.312      |
|    explained_variance   | 0.379       |
|    learning_rate        | 0.0003      |
|    loss                 | 41.4        |
|    n_updates            | 450         |
|    policy_gradient_loss | -0.00274    |
|    value_loss           | 89.7        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 287         |
|    ep_rew_mean          | 286         |
| time/                   |             |
|    fps                  | 208         |
|    iterations           | 47          |
|    time_elapsed         | 462         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.001220983 |
|    clip_fraction        | 0.0165      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.315      |
|    explained_variance   | 0.452       |
|    learning_rate        | 0.0003      |
|    loss                 | 43.7        |
|    n_updates            | 460         |
|    policy_gradient_loss | -0.00148    |
|    value_loss           | 89.2        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 296         |
|    ep_rew_mean          | 295         |
| time/                   |             |
|    fps                  | 208         |
|    iterations           | 48          |
|    time_elapsed         | 472         |
|    total_timesteps      | 98304       |
| train/                  |             |
|    approx_kl            | 0.004684698 |
|    clip_fraction        | 0.0376      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.312      |
|    explained_variance   | 0.321       |
|    learning_rate        | 0.0003      |
|    loss                 | 58.3        |
|    n_updates            | 470         |
|    policy_gradient_loss | -0.00528    |
|    value_loss           | 114         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 304         |
|    ep_rew_mean          | 303         |
| time/                   |             |
|    fps                  | 207         |
|    iterations           | 49          |
|    time_elapsed         | 482         |
|    total_timesteps      | 100352      |
| train/                  |             |
|    approx_kl            | 0.002422871 |
|    clip_fraction        | 0.0131      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.312      |
|    explained_variance   | 0.445       |
|    learning_rate        | 0.0003      |
|    loss                 | 60.3        |
|    n_updates            | 480         |
|    policy_gradient_loss | -0.0019     |
|    value_loss           | 86.8        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 317         |
|    ep_rew_mean          | 316         |
| time/                   |             |
|    fps                  | 207         |
|    iterations           | 50          |
|    time_elapsed         | 493         |
|    total_timesteps      | 102400      |
| train/                  |             |
|    approx_kl            | 0.002618659 |
|    clip_fraction        | 0.0295      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.321      |
|    explained_variance   | 0.121       |
|    learning_rate        | 0.0003      |
|    loss                 | 49.7        |
|    n_updates            | 490         |
|    policy_gradient_loss | -0.00143    |
|    value_loss           | 86.1        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 335          |
|    ep_rew_mean          | 334          |
| time/                   |              |
|    fps                  | 207          |
|    iterations           | 51           |
|    time_elapsed         | 503          |
|    total_timesteps      | 104448       |
| train/                  |              |
|    approx_kl            | 0.0039349543 |
|    clip_fraction        | 0.0255       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.322       |
|    explained_variance   | 0.213        |
|    learning_rate        | 0.0003       |
|    loss                 | 42.7         |
|    n_updates            | 500          |
|    policy_gradient_loss | -0.00261     |
|    value_loss           | 102          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 342          |
|    ep_rew_mean          | 341          |
| time/                   |              |
|    fps                  | 206          |
|    iterations           | 52           |
|    time_elapsed         | 514          |
|    total_timesteps      | 106496       |
| train/                  |              |
|    approx_kl            | 0.0035393955 |
|    clip_fraction        | 0.0335       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.306       |
|    explained_variance   | 0.219        |
|    learning_rate        | 0.0003       |
|    loss                 | 71.7         |
|    n_updates            | 510          |
|    policy_gradient_loss | -0.00209     |
|    value_loss           | 121          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 347          |
|    ep_rew_mean          | 346          |
| time/                   |              |
|    fps                  | 206          |
|    iterations           | 53           |
|    time_elapsed         | 526          |
|    total_timesteps      | 108544       |
| train/                  |              |
|    approx_kl            | 0.0023998623 |
|    clip_fraction        | 0.0165       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.305       |
|    explained_variance   | 0.158        |
|    learning_rate        | 0.0003       |
|    loss                 | 36           |
|    n_updates            | 520          |
|    policy_gradient_loss | -0.00207     |
|    value_loss           | 92.4         |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 347           |
|    ep_rew_mean          | 346           |
| time/                   |               |
|    fps                  | 206           |
|    iterations           | 54            |
|    time_elapsed         | 536           |
|    total_timesteps      | 110592        |
| train/                  |               |
|    approx_kl            | 0.00085083605 |
|    clip_fraction        | 0.00908       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.301        |
|    explained_variance   | 0.504         |
|    learning_rate        | 0.0003        |
|    loss                 | 31.4          |
|    n_updates            | 530           |
|    policy_gradient_loss | -0.00127      |
|    value_loss           | 91.8          |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 357          |
|    ep_rew_mean          | 356          |
| time/                   |              |
|    fps                  | 205          |
|    iterations           | 55           |
|    time_elapsed         | 547          |
|    total_timesteps      | 112640       |
| train/                  |              |
|    approx_kl            | 0.0021061758 |
|    clip_fraction        | 0.0203       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.293       |
|    explained_variance   | 0.384        |
|    learning_rate        | 0.0003       |
|    loss                 | 39.2         |
|    n_updates            | 540          |
|    policy_gradient_loss | -0.00156     |
|    value_loss           | 82.5         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 366          |
|    ep_rew_mean          | 366          |
| time/                   |              |
|    fps                  | 205          |
|    iterations           | 56           |
|    time_elapsed         | 557          |
|    total_timesteps      | 114688       |
| train/                  |              |
|    approx_kl            | 0.0049419794 |
|    clip_fraction        | 0.0464       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.293       |
|    explained_variance   | -0.00707     |
|    learning_rate        | 0.0003       |
|    loss                 | 60.4         |
|    n_updates            | 550          |
|    policy_gradient_loss | -0.00332     |
|    value_loss           | 101          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 382          |
|    ep_rew_mean          | 382          |
| time/                   |              |
|    fps                  | 205          |
|    iterations           | 57           |
|    time_elapsed         | 568          |
|    total_timesteps      | 116736       |
| train/                  |              |
|    approx_kl            | 0.0016806667 |
|    clip_fraction        | 0.0165       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.29        |
|    explained_variance   | 0.242        |
|    learning_rate        | 0.0003       |
|    loss                 | 46.1         |
|    n_updates            | 560          |
|    policy_gradient_loss | -0.00185     |
|    value_loss           | 88.9         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 386          |
|    ep_rew_mean          | 385          |
| time/                   |              |
|    fps                  | 205          |
|    iterations           | 58           |
|    time_elapsed         | 579          |
|    total_timesteps      | 118784       |
| train/                  |              |
|    approx_kl            | 0.0039284048 |
|    clip_fraction        | 0.063        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.304       |
|    explained_variance   | -0.0007      |
|    learning_rate        | 0.0003       |
|    loss                 | 43           |
|    n_updates            | 570          |
|    policy_gradient_loss | -0.00294     |
|    value_loss           | 81.9         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | 399          |
| time/                   |              |
|    fps                  | 204          |
|    iterations           | 59           |
|    time_elapsed         | 589          |
|    total_timesteps      | 120832       |
| train/                  |              |
|    approx_kl            | 0.0037888002 |
|    clip_fraction        | 0.0279       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.298       |
|    explained_variance   | 0.124        |
|    learning_rate        | 0.0003       |
|    loss                 | 49.8         |
|    n_updates            | 580          |
|    policy_gradient_loss | -0.00244     |
|    value_loss           | 102          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 415          |
|    ep_rew_mean          | 415          |
| time/                   |              |
|    fps                  | 204          |
|    iterations           | 60           |
|    time_elapsed         | 600          |
|    total_timesteps      | 122880       |
| train/                  |              |
|    approx_kl            | 0.0017344161 |
|    clip_fraction        | 0.00557      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.281       |
|    explained_variance   | 0.332        |
|    learning_rate        | 0.0003       |
|    loss                 | 71.8         |
|    n_updates            | 590          |
|    policy_gradient_loss | -0.0014      |
|    value_loss           | 140          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 423         |
|    ep_rew_mean          | 422         |
| time/                   |             |
|    fps                  | 204         |
|    iterations           | 61          |
|    time_elapsed         | 611         |
|    total_timesteps      | 124928      |
| train/                  |             |
|    approx_kl            | 0.005351347 |
|    clip_fraction        | 0.0667      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.291      |
|    explained_variance   | -0.00921    |
|    learning_rate        | 0.0003      |
|    loss                 | 19.8        |
|    n_updates            | 600         |
|    policy_gradient_loss | -0.000126   |
|    value_loss           | 84.2        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 435          |
|    ep_rew_mean          | 435          |
| time/                   |              |
|    fps                  | 204          |
|    iterations           | 62           |
|    time_elapsed         | 622          |
|    total_timesteps      | 126976       |
| train/                  |              |
|    approx_kl            | 0.0015534491 |
|    clip_fraction        | 0.0401       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.276       |
|    explained_variance   | 0.019        |
|    learning_rate        | 0.0003       |
|    loss                 | 25.2         |
|    n_updates            | 610          |
|    policy_gradient_loss | -0.00215     |
|    value_loss           | 83.7         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 438          |
|    ep_rew_mean          | 437          |
| time/                   |              |
|    fps                  | 203          |
|    iterations           | 63           |
|    time_elapsed         | 633          |
|    total_timesteps      | 129024       |
| train/                  |              |
|    approx_kl            | 0.0033958643 |
|    clip_fraction        | 0.0292       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.27        |
|    explained_variance   | 0.0111       |
|    learning_rate        | 0.0003       |
|    loss                 | 36.3         |
|    n_updates            | 620          |
|    policy_gradient_loss | -0.00164     |
|    value_loss           | 113          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 446          |
|    ep_rew_mean          | 445          |
| time/                   |              |
|    fps                  | 203          |
|    iterations           | 64           |
|    time_elapsed         | 644          |
|    total_timesteps      | 131072       |
| train/                  |              |
|    approx_kl            | 0.0022021069 |
|    clip_fraction        | 0.0177       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.281       |
|    explained_variance   | 0.489        |
|    learning_rate        | 0.0003       |
|    loss                 | 42.5         |
|    n_updates            | 630          |
|    policy_gradient_loss | -0.00302     |
|    value_loss           | 85.6         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 448          |
|    ep_rew_mean          | 448          |
| time/                   |              |
|    fps                  | 203          |
|    iterations           | 65           |
|    time_elapsed         | 655          |
|    total_timesteps      | 133120       |
| train/                  |              |
|    approx_kl            | 0.0036366764 |
|    clip_fraction        | 0.0309       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.286       |
|    explained_variance   | 0.384        |
|    learning_rate        | 0.0003       |
|    loss                 | 44.9         |
|    n_updates            | 640          |
|    policy_gradient_loss | -0.00217     |
|    value_loss           | 82.2         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 459          |
|    ep_rew_mean          | 459          |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 66           |
|    time_elapsed         | 666          |
|    total_timesteps      | 135168       |
| train/                  |              |
|    approx_kl            | 0.0016414158 |
|    clip_fraction        | 0.0112       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.289       |
|    explained_variance   | 0.318        |
|    learning_rate        | 0.0003       |
|    loss                 | 47.7         |
|    n_updates            | 650          |
|    policy_gradient_loss | -0.000941    |
|    value_loss           | 105          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 470          |
|    ep_rew_mean          | 469          |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 67           |
|    time_elapsed         | 677          |
|    total_timesteps      | 137216       |
| train/                  |              |
|    approx_kl            | 0.0020576518 |
|    clip_fraction        | 0.00547      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.279       |
|    explained_variance   | 0.401        |
|    learning_rate        | 0.0003       |
|    loss                 | 25.8         |
|    n_updates            | 660          |
|    policy_gradient_loss | -0.0015      |
|    value_loss           | 89.6         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 479          |
|    ep_rew_mean          | 479          |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 68           |
|    time_elapsed         | 687          |
|    total_timesteps      | 139264       |
| train/                  |              |
|    approx_kl            | 0.0022485116 |
|    clip_fraction        | 0.0247       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.266       |
|    explained_variance   | 0.127        |
|    learning_rate        | 0.0003       |
|    loss                 | 75.8         |
|    n_updates            | 670          |
|    policy_gradient_loss | -0.00287     |
|    value_loss           | 87.8         |
------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 486        |
|    ep_rew_mean          | 486        |
| time/                   |            |
|    fps                  | 202        |
|    iterations           | 69         |
|    time_elapsed         | 698        |
|    total_timesteps      | 141312     |
| train/                  |            |
|    approx_kl            | 0.00164883 |
|    clip_fraction        | 0.0265     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.282     |
|    explained_variance   | 0.0567     |
|    learning_rate        | 0.0003     |
|    loss                 | 32         |
|    n_updates            | 680        |
|    policy_gradient_loss | -0.00134   |
|    value_loss           | 87.5       |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 490          |
|    ep_rew_mean          | 490          |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 70           |
|    time_elapsed         | 709          |
|    total_timesteps      | 143360       |
| train/                  |              |
|    approx_kl            | 0.0027423752 |
|    clip_fraction        | 0.0183       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.268       |
|    explained_variance   | -0.0292      |
|    learning_rate        | 0.0003       |
|    loss                 | 48.2         |
|    n_updates            | 690          |
|    policy_gradient_loss | -0.000964    |
|    value_loss           | 120          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 496          |
|    ep_rew_mean          | 495          |
| time/                   |              |
|    fps                  | 201          |
|    iterations           | 71           |
|    time_elapsed         | 720          |
|    total_timesteps      | 145408       |
| train/                  |              |
|    approx_kl            | 0.0010568046 |
|    clip_fraction        | 0.00693      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.268       |
|    explained_variance   | 0.364        |
|    learning_rate        | 0.0003       |
|    loss                 | 72.8         |
|    n_updates            | 700          |
|    policy_gradient_loss | -0.00123     |
|    value_loss           | 111          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 505          |
|    ep_rew_mean          | 505          |
| time/                   |              |
|    fps                  | 201          |
|    iterations           | 72           |
|    time_elapsed         | 730          |
|    total_timesteps      | 147456       |
| train/                  |              |
|    approx_kl            | 0.0034874128 |
|    clip_fraction        | 0.0403       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.271       |
|    explained_variance   | 0.132        |
|    learning_rate        | 0.0003       |
|    loss                 | 22.6         |
|    n_updates            | 710          |
|    policy_gradient_loss | -0.00277     |
|    value_loss           | 81.8         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 501          |
|    ep_rew_mean          | 500          |
| time/                   |              |
|    fps                  | 201          |
|    iterations           | 73           |
|    time_elapsed         | 741          |
|    total_timesteps      | 149504       |
| train/                  |              |
|    approx_kl            | 0.0039506853 |
|    clip_fraction        | 0.0338       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.282       |
|    explained_variance   | 0.6          |
|    learning_rate        | 0.0003       |
|    loss                 | 58.6         |
|    n_updates            | 720          |
|    policy_gradient_loss | -0.00249     |
|    value_loss           | 89.9         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 502          |
|    ep_rew_mean          | 501          |
| time/                   |              |
|    fps                  | 201          |
|    iterations           | 74           |
|    time_elapsed         | 752          |
|    total_timesteps      | 151552       |
| train/                  |              |
|    approx_kl            | 0.0019059534 |
|    clip_fraction        | 0.0211       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.278       |
|    explained_variance   | 0.484        |
|    learning_rate        | 0.0003       |
|    loss                 | 64.1         |
|    n_updates            | 730          |
|    policy_gradient_loss | -0.00233     |
|    value_loss           | 113          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 506         |
|    ep_rew_mean          | 505         |
| time/                   |             |
|    fps                  | 201         |
|    iterations           | 75          |
|    time_elapsed         | 763         |
|    total_timesteps      | 153600      |
| train/                  |             |
|    approx_kl            | 0.002597554 |
|    clip_fraction        | 0.0168      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.282      |
|    explained_variance   | 0.0996      |
|    learning_rate        | 0.0003      |
|    loss                 | 48.6        |
|    n_updates            | 740         |
|    policy_gradient_loss | -0.00192    |
|    value_loss           | 109         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 513          |
|    ep_rew_mean          | 513          |
| time/                   |              |
|    fps                  | 201          |
|    iterations           | 76           |
|    time_elapsed         | 773          |
|    total_timesteps      | 155648       |
| train/                  |              |
|    approx_kl            | 0.0039667212 |
|    clip_fraction        | 0.0457       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.272       |
|    explained_variance   | 0.0652       |
|    learning_rate        | 0.0003       |
|    loss                 | 50.3         |
|    n_updates            | 750          |
|    policy_gradient_loss | -0.00286     |
|    value_loss           | 115          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 515          |
|    ep_rew_mean          | 515          |
| time/                   |              |
|    fps                  | 201          |
|    iterations           | 77           |
|    time_elapsed         | 782          |
|    total_timesteps      | 157696       |
| train/                  |              |
|    approx_kl            | 0.0036267946 |
|    clip_fraction        | 0.0174       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.266       |
|    explained_variance   | 0.341        |
|    learning_rate        | 0.0003       |
|    loss                 | 28.3         |
|    n_updates            | 760          |
|    policy_gradient_loss | -0.00309     |
|    value_loss           | 102          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 519          |
|    ep_rew_mean          | 519          |
| time/                   |              |
|    fps                  | 201          |
|    iterations           | 78           |
|    time_elapsed         | 792          |
|    total_timesteps      | 159744       |
| train/                  |              |
|    approx_kl            | 0.0024146396 |
|    clip_fraction        | 0.0193       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.246       |
|    explained_variance   | 0.419        |
|    learning_rate        | 0.0003       |
|    loss                 | 71.2         |
|    n_updates            | 770          |
|    policy_gradient_loss | -0.00204     |
|    value_loss           | 108          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 521          |
|    ep_rew_mean          | 521          |
| time/                   |              |
|    fps                  | 201          |
|    iterations           | 79           |
|    time_elapsed         | 801          |
|    total_timesteps      | 161792       |
| train/                  |              |
|    approx_kl            | 0.0023856512 |
|    clip_fraction        | 0.0251       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.248       |
|    explained_variance   | 0.177        |
|    learning_rate        | 0.0003       |
|    loss                 | 40.4         |
|    n_updates            | 780          |
|    policy_gradient_loss | -0.00234     |
|    value_loss           | 126          |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 526         |
|    ep_rew_mean          | 526         |
| time/                   |             |
|    fps                  | 202         |
|    iterations           | 80          |
|    time_elapsed         | 810         |
|    total_timesteps      | 163840      |
| train/                  |             |
|    approx_kl            | 0.004516804 |
|    clip_fraction        | 0.0322      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.243      |
|    explained_variance   | 0.00425     |
|    learning_rate        | 0.0003      |
|    loss                 | 56.4        |
|    n_updates            | 790         |
|    policy_gradient_loss | -0.00188    |
|    value_loss           | 120         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 522          |
|    ep_rew_mean          | 522          |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 81           |
|    time_elapsed         | 820          |
|    total_timesteps      | 165888       |
| train/                  |              |
|    approx_kl            | 0.0034624161 |
|    clip_fraction        | 0.0544       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.247       |
|    explained_variance   | 0.236        |
|    learning_rate        | 0.0003       |
|    loss                 | 32.6         |
|    n_updates            | 800          |
|    policy_gradient_loss | -0.00205     |
|    value_loss           | 75.7         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 520          |
|    ep_rew_mean          | 520          |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 82           |
|    time_elapsed         | 829          |
|    total_timesteps      | 167936       |
| train/                  |              |
|    approx_kl            | 0.0018532802 |
|    clip_fraction        | 0.00669      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.24        |
|    explained_variance   | 0.117        |
|    learning_rate        | 0.0003       |
|    loss                 | 91           |
|    n_updates            | 810          |
|    policy_gradient_loss | -0.00016     |
|    value_loss           | 137          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 520          |
|    ep_rew_mean          | 519          |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 83           |
|    time_elapsed         | 839          |
|    total_timesteps      | 169984       |
| train/                  |              |
|    approx_kl            | 0.0023882869 |
|    clip_fraction        | 0.0185       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.243       |
|    explained_variance   | 0.411        |
|    learning_rate        | 0.0003       |
|    loss                 | 32.7         |
|    n_updates            | 820          |
|    policy_gradient_loss | -0.00224     |
|    value_loss           | 91.1         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 520          |
|    ep_rew_mean          | 519          |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 84           |
|    time_elapsed         | 848          |
|    total_timesteps      | 172032       |
| train/                  |              |
|    approx_kl            | 0.0030576512 |
|    clip_fraction        | 0.0232       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.248       |
|    explained_variance   | 0.00835      |
|    learning_rate        | 0.0003       |
|    loss                 | 79.1         |
|    n_updates            | 830          |
|    policy_gradient_loss | -0.00161     |
|    value_loss           | 118          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 526          |
|    ep_rew_mean          | 525          |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 85           |
|    time_elapsed         | 859          |
|    total_timesteps      | 174080       |
| train/                  |              |
|    approx_kl            | 0.0050577456 |
|    clip_fraction        | 0.0434       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.24        |
|    explained_variance   | 0.184        |
|    learning_rate        | 0.0003       |
|    loss                 | 34.9         |
|    n_updates            | 840          |
|    policy_gradient_loss | -0.00356     |
|    value_loss           | 80.6         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 526          |
|    ep_rew_mean          | 525          |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 86           |
|    time_elapsed         | 871          |
|    total_timesteps      | 176128       |
| train/                  |              |
|    approx_kl            | 0.0036442396 |
|    clip_fraction        | 0.0237       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.241       |
|    explained_variance   | 0.00745      |
|    learning_rate        | 0.0003       |
|    loss                 | 44           |
|    n_updates            | 850          |
|    policy_gradient_loss | -0.00115     |
|    value_loss           | 119          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 521          |
|    ep_rew_mean          | 521          |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 87           |
|    time_elapsed         | 881          |
|    total_timesteps      | 178176       |
| train/                  |              |
|    approx_kl            | 0.0064312746 |
|    clip_fraction        | 0.0387       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.238       |
|    explained_variance   | -0.0193      |
|    learning_rate        | 0.0003       |
|    loss                 | 60.9         |
|    n_updates            | 860          |
|    policy_gradient_loss | -0.0012      |
|    value_loss           | 91.1         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 523         |
|    ep_rew_mean          | 523         |
| time/                   |             |
|    fps                  | 201         |
|    iterations           | 88          |
|    time_elapsed         | 893         |
|    total_timesteps      | 180224      |
| train/                  |             |
|    approx_kl            | 0.002935983 |
|    clip_fraction        | 0.0309      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.245      |
|    explained_variance   | 0.24        |
|    learning_rate        | 0.0003      |
|    loss                 | 54.1        |
|    n_updates            | 870         |
|    policy_gradient_loss | -0.000761   |
|    value_loss           | 104         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 528         |
|    ep_rew_mean          | 528         |
| time/                   |             |
|    fps                  | 201         |
|    iterations           | 89          |
|    time_elapsed         | 904         |
|    total_timesteps      | 182272      |
| train/                  |             |
|    approx_kl            | 0.003605581 |
|    clip_fraction        | 0.0537      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.23       |
|    explained_variance   | 0.224       |
|    learning_rate        | 0.0003      |
|    loss                 | 52.3        |
|    n_updates            | 880         |
|    policy_gradient_loss | -0.00277    |
|    value_loss           | 101         |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 529          |
|    ep_rew_mean          | 529          |
| time/                   |              |
|    fps                  | 201          |
|    iterations           | 90           |
|    time_elapsed         | 915          |
|    total_timesteps      | 184320       |
| train/                  |              |
|    approx_kl            | 0.0063353353 |
|    clip_fraction        | 0.0611       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.228       |
|    explained_variance   | 0.755        |
|    learning_rate        | 0.0003       |
|    loss                 | 13.1         |
|    n_updates            | 890          |
|    policy_gradient_loss | -0.00285     |
|    value_loss           | 52.3         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 531          |
|    ep_rew_mean          | 531          |
| time/                   |              |
|    fps                  | 201          |
|    iterations           | 91           |
|    time_elapsed         | 926          |
|    total_timesteps      | 186368       |
| train/                  |              |
|    approx_kl            | 0.0028637191 |
|    clip_fraction        | 0.0244       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.227       |
|    explained_variance   | 0.319        |
|    learning_rate        | 0.0003       |
|    loss                 | 42.6         |
|    n_updates            | 900          |
|    policy_gradient_loss | -0.00164     |
|    value_loss           | 100          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 534          |
|    ep_rew_mean          | 534          |
| time/                   |              |
|    fps                  | 200          |
|    iterations           | 92           |
|    time_elapsed         | 937          |
|    total_timesteps      | 188416       |
| train/                  |              |
|    approx_kl            | 0.0027379442 |
|    clip_fraction        | 0.0203       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.221       |
|    explained_variance   | 0.493        |
|    learning_rate        | 0.0003       |
|    loss                 | 38.3         |
|    n_updates            | 910          |
|    policy_gradient_loss | -0.00195     |
|    value_loss           | 93.9         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 529          |
|    ep_rew_mean          | 529          |
| time/                   |              |
|    fps                  | 200          |
|    iterations           | 93           |
|    time_elapsed         | 948          |
|    total_timesteps      | 190464       |
| train/                  |              |
|    approx_kl            | 0.0038638117 |
|    clip_fraction        | 0.0183       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.23        |
|    explained_variance   | -0.0856      |
|    learning_rate        | 0.0003       |
|    loss                 | 36.2         |
|    n_updates            | 920          |
|    policy_gradient_loss | -0.00141     |
|    value_loss           | 95.2         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 509          |
|    ep_rew_mean          | 509          |
| time/                   |              |
|    fps                  | 200          |
|    iterations           | 94           |
|    time_elapsed         | 960          |
|    total_timesteps      | 192512       |
| train/                  |              |
|    approx_kl            | 0.0022355276 |
|    clip_fraction        | 0.0222       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.22        |
|    explained_variance   | 0.0123       |
|    learning_rate        | 0.0003       |
|    loss                 | 72.1         |
|    n_updates            | 930          |
|    policy_gradient_loss | -0.0027      |
|    value_loss           | 149          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 512          |
|    ep_rew_mean          | 512          |
| time/                   |              |
|    fps                  | 200          |
|    iterations           | 95           |
|    time_elapsed         | 971          |
|    total_timesteps      | 194560       |
| train/                  |              |
|    approx_kl            | 0.0016831752 |
|    clip_fraction        | 0.00688      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.216       |
|    explained_variance   | 0.363        |
|    learning_rate        | 0.0003       |
|    loss                 | 50.1         |
|    n_updates            | 940          |
|    policy_gradient_loss | -0.0017      |
|    value_loss           | 115          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 512          |
|    ep_rew_mean          | 512          |
| time/                   |              |
|    fps                  | 200          |
|    iterations           | 96           |
|    time_elapsed         | 982          |
|    total_timesteps      | 196608       |
| train/                  |              |
|    approx_kl            | 0.0008507465 |
|    clip_fraction        | 0.0214       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.21        |
|    explained_variance   | 0.406        |
|    learning_rate        | 0.0003       |
|    loss                 | 59.2         |
|    n_updates            | 950          |
|    policy_gradient_loss | -0.00251     |
|    value_loss           | 122          |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 512          |
|    ep_rew_mean          | 512          |
| time/                   |              |
|    fps                  | 200          |
|    iterations           | 97           |
|    time_elapsed         | 992          |
|    total_timesteps      | 198656       |
| train/                  |              |
|    approx_kl            | 0.0014845565 |
|    clip_fraction        | 0.0249       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.217       |
|    explained_variance   | -0.0446      |
|    learning_rate        | 0.0003       |
|    loss                 | 54.4         |
|    n_updates            | 960          |
|    policy_gradient_loss | -0.00165     |
|    value_loss           | 91.5         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 514          |
|    ep_rew_mean          | 514          |
| time/                   |              |
|    fps                  | 199          |
|    iterations           | 98           |
|    time_elapsed         | 1004         |
|    total_timesteps      | 200704       |
| train/                  |              |
|    approx_kl            | 0.0028758259 |
|    clip_fraction        | 0.0268       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.22        |
|    explained_variance   | -0.0142      |
|    learning_rate        | 0.0003       |
|    loss                 | 34           |
|    n_updates            | 970          |
|    policy_gradient_loss | -0.00158     |
|    value_loss           | 116          |
------------------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x7220f9193c20>

## Save / load the trained policy

In [None]:
MODEL_PATH = MODELS_DIR / 'test_ppo_task7_balance'
model.save(MODEL_PATH)
print('Model saved to', MODEL_PATH)

Model saved to data/ppo_task7_balance


In [4]:
MODEL_PATH = MODELS_DIR / 'ppo_task7_balance'
model = PPO.load(MODEL_PATH, train_env)

## Rollout helpers

In [5]:
train_env.close()

def rollout_episode(env, model, max_steps=MAX_STEPS, deterministic=True):
    obs, _ = env.reset()
    rewards = []
    infos = []
    for _ in range(max_steps):
        action, _ = model.predict(obs, deterministic=deterministic)
        obs, reward, terminated, truncated, info = env.step(action)
        rewards.append(reward)
        infos.append(info)
        if terminated or truncated:
            break
    return np.array(rewards), infos

eval_env = Task7PendulumEnv(
    max_steps=MAX_STEPS, should_balance=SHOULD_BALANCE, gui=True, sim_substeps=SIM_SUBSTEPS
)

startThreads creating 1 threads.
starting thread 0
started thread 0 
argc=2
argv[0] = --unused
argv[1] = --start_demo_name=Physics Server
ExampleBrowserThreadFunc started
X11 functions dynamically loaded using dlopen/dlsym OK!
X11 functions dynamically loaded using dlopen/dlsym OK!
Creating context
Created GL 3.3 context
Direct GLX rendering context obtained
Making context current
GL_VENDOR=Intel
GL_RENDERER=Mesa Intel(R) Iris(R) Xe Graphics (RPL-U)
GL_VERSION=4.6 (Core Profile) Mesa 25.0.7-0ubuntu0.24.04.1
GL_SHADING_LANGUAGE_VERSION=4.60
pthread_getconcurrency()=0
Version = 4.6 (Core Profile) Mesa 25.0.7-0ubuntu0.24.04.1
Vendor = Intel
Renderer = Mesa Intel(R) Iris(R) Xe Graphics (RPL-U)
b3Printf: Selected demo: Physics Server
startThreads creating 1 threads.
starting thread 0
started thread 0 
MotionThreadFunc thread started
ven = Intel
Workaround for some crash in the Intel OpenGL driver on Linux/Ubuntu
ven = Intel
Workaround for some crash in the Intel OpenGL driver on Linux/Ubunt

In [9]:
rewards, infos = rollout_episode(eval_env, model)
print('Episode length:', len(rewards))
print('Total reward:', rewards.sum())
print('Final info:', infos[-1] if infos else None)

KeyboardInterrupt: 

## Export rollouts for datasets

In [None]:
def record_task7_dataset(
    model,
    n_episodes=5,
    max_steps=MAX_STEPS,
    should_balance=SHOULD_BALANCE,
    filename='task7_ppo_dataset',
):
    env = Task7PendulumEnv(
        max_steps=max_steps, should_balance=should_balance, gui=False, sim_substeps=SIM_SUBSTEPS
    )
    all_obs, all_actions, all_rewards, episode_ids = [], [], [], []
    recorded = 0
    while recorded < n_episodes:
        obs, _ = env.reset()
        local_obs, local_actions, local_rewards = [], [], []
        success = True
        for step in range(max_steps):
            local_obs.append(obs.copy())
            action, _ = model.predict(obs, deterministic=True)
            action = int(action)
            local_actions.append(action)
            obs, reward, terminated, truncated, info = env.step(action)
            local_rewards.append(reward)
            if terminated or truncated:
                success = info.get('success', False) and not info.get('failure', False)
                break
        if success and len(local_obs) == max_steps:
            all_obs.extend(local_obs)
            all_actions.extend(local_actions)
            all_rewards.extend(local_rewards)
            episode_ids.extend([recorded] * len(local_obs))
            recorded += 1
            print(f'Recorded episode {recorded}/{n_episodes}')
        else:
            print('Episode failed, retrying...')
    env.close()
    save_path = DATA_DIR / f'{filename}.npz'
    np.savez(
        save_path,
        observations=np.array(all_obs, dtype=np.float32),
        actions=np.array(all_actions, dtype=np.int64),
        rewards=np.array(all_rewards, dtype=np.float32),
        episode_ids=np.array(episode_ids, dtype=np.int32),
        max_steps=max_steps,
    )
    print('Dataset saved to', save_path)
    return save_path

In [None]:
# Example usage once a good policy is trained
# dataset_path = record_task7_dataset(model, n_episodes=3, filename='task7_ppo_balance_rollouts')

In [None]:
eval_env.close()
train_env.close()