# Training Super Mario via Reinforcement Learning

Project for learning reasons, tutorial here: https://www.youtube.com/watch?v=2eeYqJ0uBKE

In [None]:
%pip install gym-super-mario-bros nes_py

In [None]:
%pip install stable-baselines3[extra]

In [1]:
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

from gym.wrappers import GrayScaleObservation 
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv # VecFrameStack contains multiple frames from the past
from matplotlib import pyplot as plt

import os
from stable_baselines3 import PPO # reinforcement learning algorithm
from stable_baselines3.common.callbacks import BaseCallback # callback as savepoints

In [2]:
# setup the game
env = gym_super_mario_bros.make("SuperMarioBros-v0")
env = JoypadSpace(env, SIMPLE_MOVEMENT) # reduce the action space of 256 to 7
# Grayscale for faster training
env = GrayScaleObservation(env, keep_dim=True)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, 4, channels_order="last") # 4 Frames to Stack in this example


In [3]:
class TrainAndLoggingCallback(BaseCallback):
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [4]:
CHECKPOINT_DIR = "./train/"
LOG_DIR = "./logs/"

In [5]:
callback = TrainAndLoggingCallback(check_freq=100000, save_path=CHECKPOINT_DIR)

In [8]:
# create an ai model
model = PPO("CnnPolicy", env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, n_steps=512)

Using cuda device
Wrapping the env in a VecTransposeImage.


In [9]:
# training the model
model.learn(total_timesteps=500000, callback=callback, progress_bar=True)

Logging to ./logs/PPO_2


Output()

----------------------------
| time/              |     |
|    fps             | 75  |
|    iterations      | 1   |
|    time_elapsed    | 6   |
|    total_timesteps | 512 |
----------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 61            |
|    iterations           | 2             |
|    time_elapsed         | 16            |
|    total_timesteps      | 1024          |
| train/                  |               |
|    approx_kl            | 4.0899497e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -5.25e-05     |
|    learning_rate        | 1e-06         |
|    loss                 | 259           |
|    n_updates            | 10            |
|    policy_gradient_loss | -0.000141     |
|    value_loss           | 601           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 59            |
|    iterations           | 3             |
|    time_elapsed         | 26            |
|    total_timesteps      | 1536          |
| train/                  |               |
|    approx_kl            | 0.00010292884 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -0.0244       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.217         |
|    n_updates            | 20            |
|    policy_gradient_loss | -0.00122      |
|    value_loss           | 2.24          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 4             |
|    time_elapsed         | 35            |
|    total_timesteps      | 2048          |
| train/                  |               |
|    approx_kl            | 2.4444656e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.0321        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.288         |
|    n_updates            | 30            |
|    policy_gradient_loss | -0.000505     |
|    value_loss           | 1.48          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 57            |
|    iterations           | 5             |
|    time_elapsed         | 44            |
|    total_timesteps      | 2560          |
| train/                  |               |
|    approx_kl            | 1.3275421e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.00976       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.133         |
|    n_updates            | 40            |
|    policy_gradient_loss | -0.000123     |
|    value_loss           | 0.713         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 56            |
|    iterations           | 6             |
|    time_elapsed         | 54            |
|    total_timesteps      | 3072          |
| train/                  |               |
|    approx_kl            | 2.7219183e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -0.0124       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0872        |
|    n_updates            | 50            |
|    policy_gradient_loss | -0.000268     |
|    value_loss           | 0.416         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 56           |
|    iterations           | 7            |
|    time_elapsed         | 63           |
|    total_timesteps      | 3584         |
| train/                  |              |
|    approx_kl            | 6.041024e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.95        |
|    explained_variance   | -0.00205     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0821       |
|    n_updates            | 60           |
|    policy_gradient_loss | -1.59e-05    |
|    value_loss           | 0.456        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 56            |
|    iterations           | 8             |
|    time_elapsed         | 73            |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 2.1951157e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -0.00957      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.269         |
|    n_updates            | 70            |
|    policy_gradient_loss | -0.000395     |
|    value_loss           | 0.48          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 55            |
|    iterations           | 9             |
|    time_elapsed         | 82            |
|    total_timesteps      | 4608          |
| train/                  |               |
|    approx_kl            | 1.7889426e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.00678       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.104         |
|    n_updates            | 80            |
|    policy_gradient_loss | -0.000271     |
|    value_loss           | 0.474         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 55            |
|    iterations           | 10            |
|    time_elapsed         | 91            |
|    total_timesteps      | 5120          |
| train/                  |               |
|    approx_kl            | 1.3526529e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.00834       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0884        |
|    n_updates            | 90            |
|    policy_gradient_loss | -0.000228     |
|    value_loss           | 0.205         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 55            |
|    iterations           | 11            |
|    time_elapsed         | 100           |
|    total_timesteps      | 5632          |
| train/                  |               |
|    approx_kl            | 2.3837783e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.00282       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0947        |
|    n_updates            | 100           |
|    policy_gradient_loss | -0.000419     |
|    value_loss           | 0.246         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 55            |
|    iterations           | 12            |
|    time_elapsed         | 109           |
|    total_timesteps      | 6144          |
| train/                  |               |
|    approx_kl            | 2.7066679e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.00037       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.106         |
|    n_updates            | 110           |
|    policy_gradient_loss | -0.000474     |
|    value_loss           | 0.264         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 55            |
|    iterations           | 13            |
|    time_elapsed         | 119           |
|    total_timesteps      | 6656          |
| train/                  |               |
|    approx_kl            | 6.4265914e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | -0.00936      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0738        |
|    n_updates            | 120           |
|    policy_gradient_loss | -4.83e-05     |
|    value_loss           | 0.18          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 55            |
|    iterations           | 14            |
|    time_elapsed         | 129           |
|    total_timesteps      | 7168          |
| train/                  |               |
|    approx_kl            | 5.1592244e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.000141      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.108         |
|    n_updates            | 130           |
|    policy_gradient_loss | -0.000842     |
|    value_loss           | 0.242         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 55           |
|    iterations           | 15           |
|    time_elapsed         | 138          |
|    total_timesteps      | 7680         |
| train/                  |              |
|    approx_kl            | 9.782612e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.0212       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0593       |
|    n_updates            | 140          |
|    policy_gradient_loss | -0.000139    |
|    value_loss           | 0.159        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 55            |
|    iterations           | 16            |
|    time_elapsed         | 147           |
|    total_timesteps      | 8192          |
| train/                  |               |
|    approx_kl            | 4.5523164e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.00585      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0464        |
|    n_updates            | 150           |
|    policy_gradient_loss | -0.000675     |
|    value_loss           | 0.154         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 55           |
|    iterations           | 17           |
|    time_elapsed         | 157          |
|    total_timesteps      | 8704         |
| train/                  |              |
|    approx_kl            | 7.064082e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.0216       |
|    learning_rate        | 1e-06        |
|    loss                 | 136          |
|    n_updates            | 160          |
|    policy_gradient_loss | -0.000372    |
|    value_loss           | 392          |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 55          |
|    iterations           | 18          |
|    time_elapsed         | 166         |
|    total_timesteps      | 9216        |
| train/                  |             |
|    approx_kl            | 1.45511e-05 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.94       |
|    explained_variance   | 0.0209      |
|    learning_rate        | 1e-06       |
|    loss                 | 125         |
|    n_updates            | 170         |
|    policy_gradient_loss | -0.000157   |
|    value_loss           | 162         |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 55           |
|    iterations           | 19           |
|    time_elapsed         | 175          |
|    total_timesteps      | 9728         |
| train/                  |              |
|    approx_kl            | 3.090978e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | -0.0174      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.213        |
|    n_updates            | 180          |
|    policy_gradient_loss | -0.000225    |
|    value_loss           | 1.11         |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 55            |
|    iterations           | 20            |
|    time_elapsed         | 185           |
|    total_timesteps      | 10240         |
| train/                  |               |
|    approx_kl            | 1.4306512e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.0183        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.116         |
|    n_updates            | 190           |
|    policy_gradient_loss | -3.48e-05     |
|    value_loss           | 0.765         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 55           |
|    iterations           | 21           |
|    time_elapsed         | 195          |
|    total_timesteps      | 10752        |
| train/                  |              |
|    approx_kl            | 3.111316e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.259        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.132        |
|    n_updates            | 200          |
|    policy_gradient_loss | -2.42e-05    |
|    value_loss           | 0.586        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 22            |
|    time_elapsed         | 204           |
|    total_timesteps      | 11264         |
| train/                  |               |
|    approx_kl            | 1.1687749e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.0164        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.182         |
|    n_updates            | 210           |
|    policy_gradient_loss | -0.000131     |
|    value_loss           | 0.497         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 23           |
|    time_elapsed         | 214          |
|    total_timesteps      | 11776        |
| train/                  |              |
|    approx_kl            | 3.366929e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | -0.0217      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.126        |
|    n_updates            | 220          |
|    policy_gradient_loss | -0.000373    |
|    value_loss           | 0.419        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 24            |
|    time_elapsed         | 223           |
|    total_timesteps      | 12288         |
| train/                  |               |
|    approx_kl            | 2.2865366e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.00453      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.209         |
|    n_updates            | 230           |
|    policy_gradient_loss | -0.00031      |
|    value_loss           | 0.378         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 25           |
|    time_elapsed         | 232          |
|    total_timesteps      | 12800        |
| train/                  |              |
|    approx_kl            | 7.704715e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.00142      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.143        |
|    n_updates            | 240          |
|    policy_gradient_loss | -0.000143    |
|    value_loss           | 0.33         |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 26            |
|    time_elapsed         | 242           |
|    total_timesteps      | 13312         |
| train/                  |               |
|    approx_kl            | 2.3172237e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.00158      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.124         |
|    n_updates            | 250           |
|    policy_gradient_loss | -0.000221     |
|    value_loss           | 0.243         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 27            |
|    time_elapsed         | 251           |
|    total_timesteps      | 13824         |
| train/                  |               |
|    approx_kl            | 2.2911816e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.0046        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.137         |
|    n_updates            | 260           |
|    policy_gradient_loss | -0.000234     |
|    value_loss           | 0.28          |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 28           |
|    time_elapsed         | 260          |
|    total_timesteps      | 14336        |
| train/                  |              |
|    approx_kl            | 4.179147e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | -0.0132      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0721       |
|    n_updates            | 270          |
|    policy_gradient_loss | -0.000416    |
|    value_loss           | 0.133        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 29            |
|    time_elapsed         | 270           |
|    total_timesteps      | 14848         |
| train/                  |               |
|    approx_kl            | 4.0907995e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.0206       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0628        |
|    n_updates            | 280           |
|    policy_gradient_loss | -0.00051      |
|    value_loss           | 0.152         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 30           |
|    time_elapsed         | 279          |
|    total_timesteps      | 15360        |
| train/                  |              |
|    approx_kl            | 0.0001452472 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.0101       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.146        |
|    n_updates            | 290          |
|    policy_gradient_loss | -0.00118     |
|    value_loss           | 0.294        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 31            |
|    time_elapsed         | 288           |
|    total_timesteps      | 15872         |
| train/                  |               |
|    approx_kl            | 5.5975746e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.0193        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0945        |
|    n_updates            | 300           |
|    policy_gradient_loss | -0.000447     |
|    value_loss           | 0.167         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 32            |
|    time_elapsed         | 298           |
|    total_timesteps      | 16384         |
| train/                  |               |
|    approx_kl            | 3.1865085e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.00622       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0574        |
|    n_updates            | 310           |
|    policy_gradient_loss | -0.000311     |
|    value_loss           | 0.1           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 33            |
|    time_elapsed         | 307           |
|    total_timesteps      | 16896         |
| train/                  |               |
|    approx_kl            | 0.00011146837 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.0614        |
|    learning_rate        | 1e-06         |
|    loss                 | 239           |
|    n_updates            | 320           |
|    policy_gradient_loss | -6.88e-05     |
|    value_loss           | 613           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 34            |
|    time_elapsed         | 317           |
|    total_timesteps      | 17408         |
| train/                  |               |
|    approx_kl            | 1.1129654e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.45          |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0926        |
|    n_updates            | 330           |
|    policy_gradient_loss | -2.45e-05     |
|    value_loss           | 1.17          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 35            |
|    time_elapsed         | 326           |
|    total_timesteps      | 17920         |
| train/                  |               |
|    approx_kl            | 2.0167907e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.133         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.127         |
|    n_updates            | 340           |
|    policy_gradient_loss | -0.00014      |
|    value_loss           | 0.558         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 36            |
|    time_elapsed         | 335           |
|    total_timesteps      | 18432         |
| train/                  |               |
|    approx_kl            | 1.2072967e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.0219        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0496        |
|    n_updates            | 350           |
|    policy_gradient_loss | -5e-05        |
|    value_loss           | 0.36          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 37            |
|    time_elapsed         | 345           |
|    total_timesteps      | 18944         |
| train/                  |               |
|    approx_kl            | 2.7427217e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | 0.0122        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0782        |
|    n_updates            | 360           |
|    policy_gradient_loss | -0.000193     |
|    value_loss           | 0.296         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 38            |
|    time_elapsed         | 354           |
|    total_timesteps      | 19456         |
| train/                  |               |
|    approx_kl            | 1.2980541e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.0128        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0642        |
|    n_updates            | 370           |
|    policy_gradient_loss | -0.000108     |
|    value_loss           | 0.232         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 39           |
|    time_elapsed         | 364          |
|    total_timesteps      | 19968        |
| train/                  |              |
|    approx_kl            | 3.705814e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | -0.00682     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.138        |
|    n_updates            | 380          |
|    policy_gradient_loss | -0.000531    |
|    value_loss           | 0.282        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 40            |
|    time_elapsed         | 373           |
|    total_timesteps      | 20480         |
| train/                  |               |
|    approx_kl            | 2.8641196e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | -0.0161       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.032         |
|    n_updates            | 390           |
|    policy_gradient_loss | -0.000322     |
|    value_loss           | 0.149         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 41            |
|    time_elapsed         | 382           |
|    total_timesteps      | 20992         |
| train/                  |               |
|    approx_kl            | 2.5125686e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.0236        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.112         |
|    n_updates            | 400           |
|    policy_gradient_loss | -0.000216     |
|    value_loss           | 0.182         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 42            |
|    time_elapsed         | 392           |
|    total_timesteps      | 21504         |
| train/                  |               |
|    approx_kl            | 4.1785766e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.0347        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0911        |
|    n_updates            | 410           |
|    policy_gradient_loss | -0.000424     |
|    value_loss           | 0.156         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 43            |
|    time_elapsed         | 401           |
|    total_timesteps      | 22016         |
| train/                  |               |
|    approx_kl            | 1.9603292e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.00986       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.136         |
|    n_updates            | 420           |
|    policy_gradient_loss | -0.000166     |
|    value_loss           | 0.202         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 44            |
|    time_elapsed         | 410           |
|    total_timesteps      | 22528         |
| train/                  |               |
|    approx_kl            | 2.9115705e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.0257        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0844        |
|    n_updates            | 430           |
|    policy_gradient_loss | -0.000224     |
|    value_loss           | 0.166         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 45            |
|    time_elapsed         | 420           |
|    total_timesteps      | 23040         |
| train/                  |               |
|    approx_kl            | 4.4895336e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.59          |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0662        |
|    n_updates            | 440           |
|    policy_gradient_loss | -0.000518     |
|    value_loss           | 0.15          |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 46           |
|    time_elapsed         | 429          |
|    total_timesteps      | 23552        |
| train/                  |              |
|    approx_kl            | 3.066694e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0.0205       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0844       |
|    n_updates            | 450          |
|    policy_gradient_loss | -0.000269    |
|    value_loss           | 0.211        |
------------------------------------------


--------------------------------------------
| time/                   |                |
|    fps                  | 54             |
|    iterations           | 47             |
|    time_elapsed         | 438            |
|    total_timesteps      | 24064          |
| train/                  |                |
|    approx_kl            | 0.000116314855 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.93          |
|    explained_variance   | 0.0115         |
|    learning_rate        | 1e-06          |
|    loss                 | 0.0316         |
|    n_updates            | 460            |
|    policy_gradient_loss | -0.00122       |
|    value_loss           | 0.137          |
--------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 48           |
|    time_elapsed         | 448          |
|    total_timesteps      | 24576        |
| train/                  |              |
|    approx_kl            | 9.620329e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | -0.000618    |
|    learning_rate        | 1e-06        |
|    loss                 | 7.27         |
|    n_updates            | 470          |
|    policy_gradient_loss | -4.5e-05     |
|    value_loss           | 9.7          |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 49            |
|    time_elapsed         | 457           |
|    total_timesteps      | 25088         |
| train/                  |               |
|    approx_kl            | 0.00012329652 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.0292        |
|    learning_rate        | 1e-06         |
|    loss                 | 194           |
|    n_updates            | 480           |
|    policy_gradient_loss | 8.33e-05      |
|    value_loss           | 478           |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 50           |
|    time_elapsed         | 466          |
|    total_timesteps      | 25600        |
| train/                  |              |
|    approx_kl            | 5.645561e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0.0272       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.134        |
|    n_updates            | 490          |
|    policy_gradient_loss | -0.000393    |
|    value_loss           | 0.927        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 51            |
|    time_elapsed         | 475           |
|    total_timesteps      | 26112         |
| train/                  |               |
|    approx_kl            | 4.4876128e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | -0.0389       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.076         |
|    n_updates            | 500           |
|    policy_gradient_loss | -0.000332     |
|    value_loss           | 0.409         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 52            |
|    time_elapsed         | 484           |
|    total_timesteps      | 26624         |
| train/                  |               |
|    approx_kl            | 3.1827833e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.0687        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0842        |
|    n_updates            | 510           |
|    policy_gradient_loss | -0.000223     |
|    value_loss           | 0.282         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 53           |
|    time_elapsed         | 493          |
|    total_timesteps      | 27136        |
| train/                  |              |
|    approx_kl            | 3.602798e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0.0209       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0679       |
|    n_updates            | 520          |
|    policy_gradient_loss | -0.000286    |
|    value_loss           | 0.256        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 54            |
|    time_elapsed         | 503           |
|    total_timesteps      | 27648         |
| train/                  |               |
|    approx_kl            | 3.2588723e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | 0.00322       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0651        |
|    n_updates            | 530           |
|    policy_gradient_loss | -0.000326     |
|    value_loss           | 0.209         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 55           |
|    time_elapsed         | 512          |
|    total_timesteps      | 28160        |
| train/                  |              |
|    approx_kl            | 3.641704e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | -0.0199      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0867       |
|    n_updates            | 540          |
|    policy_gradient_loss | -0.00029     |
|    value_loss           | 0.252        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 56            |
|    time_elapsed         | 522           |
|    total_timesteps      | 28672         |
| train/                  |               |
|    approx_kl            | 3.9324164e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | -0.0146       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.177         |
|    n_updates            | 550           |
|    policy_gradient_loss | -0.00058      |
|    value_loss           | 0.561         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 57            |
|    time_elapsed         | 531           |
|    total_timesteps      | 29184         |
| train/                  |               |
|    approx_kl            | 4.1130814e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | -0.0235       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.124         |
|    n_updates            | 560           |
|    policy_gradient_loss | -0.000444     |
|    value_loss           | 0.225         |
-------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 54          |
|    iterations           | 58          |
|    time_elapsed         | 541         |
|    total_timesteps      | 29696       |
| train/                  |             |
|    approx_kl            | 5.36961e-05 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.92       |
|    explained_variance   | 0.0135      |
|    learning_rate        | 1e-06       |
|    loss                 | 0.06        |
|    n_updates            | 570         |
|    policy_gradient_loss | -0.000559   |
|    value_loss           | 0.133       |
-----------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 59            |
|    time_elapsed         | 550           |
|    total_timesteps      | 30208         |
| train/                  |               |
|    approx_kl            | 5.8338046e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | -0.0329       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0574        |
|    n_updates            | 580           |
|    policy_gradient_loss | -6.09e-05     |
|    value_loss           | 0.138         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 60            |
|    time_elapsed         | 559           |
|    total_timesteps      | 30720         |
| train/                  |               |
|    approx_kl            | 5.8517093e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | 0.00316       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0406        |
|    n_updates            | 590           |
|    policy_gradient_loss | -0.000486     |
|    value_loss           | 0.126         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 61            |
|    time_elapsed         | 568           |
|    total_timesteps      | 31232         |
| train/                  |               |
|    approx_kl            | 2.8106268e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | -0.0135       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0685        |
|    n_updates            | 600           |
|    policy_gradient_loss | -0.000225     |
|    value_loss           | 0.12          |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 62           |
|    time_elapsed         | 578          |
|    total_timesteps      | 31744        |
| train/                  |              |
|    approx_kl            | 6.209337e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.92        |
|    explained_variance   | -0.00938     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0523       |
|    n_updates            | 610          |
|    policy_gradient_loss | -0.000493    |
|    value_loss           | 0.103        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 63            |
|    time_elapsed         | 587           |
|    total_timesteps      | 32256         |
| train/                  |               |
|    approx_kl            | 1.2384029e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | 0.572         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0825        |
|    n_updates            | 620           |
|    policy_gradient_loss | -0.000115     |
|    value_loss           | 0.133         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 64            |
|    time_elapsed         | 596           |
|    total_timesteps      | 32768         |
| train/                  |               |
|    approx_kl            | 0.00015639793 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | 0.0879        |
|    learning_rate        | 1e-06         |
|    loss                 | 113           |
|    n_updates            | 630           |
|    policy_gradient_loss | -4.94e-06     |
|    value_loss           | 245           |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 65           |
|    time_elapsed         | 606          |
|    total_timesteps      | 33280        |
| train/                  |              |
|    approx_kl            | 0.0003184816 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.92        |
|    explained_variance   | 0.0869       |
|    learning_rate        | 1e-06        |
|    loss                 | 55.7         |
|    n_updates            | 640          |
|    policy_gradient_loss | -0.000303    |
|    value_loss           | 118          |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 66            |
|    time_elapsed         | 615           |
|    total_timesteps      | 33792         |
| train/                  |               |
|    approx_kl            | 0.00011456141 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | -0.00908      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.167         |
|    n_updates            | 650           |
|    policy_gradient_loss | -0.000347     |
|    value_loss           | 1.16          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 67            |
|    time_elapsed         | 625           |
|    total_timesteps      | 34304         |
| train/                  |               |
|    approx_kl            | 9.1229565e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | 0.316         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.166         |
|    n_updates            | 660           |
|    policy_gradient_loss | -0.00066      |
|    value_loss           | 0.837         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 68           |
|    time_elapsed         | 634          |
|    total_timesteps      | 34816        |
| train/                  |              |
|    approx_kl            | 4.764588e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.92        |
|    explained_variance   | -0.0141      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.128        |
|    n_updates            | 670          |
|    policy_gradient_loss | -0.000201    |
|    value_loss           | 0.622        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 69           |
|    time_elapsed         | 645          |
|    total_timesteps      | 35328        |
| train/                  |              |
|    approx_kl            | 9.314856e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.92        |
|    explained_variance   | 0.00771      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.132        |
|    n_updates            | 680          |
|    policy_gradient_loss | 3.69e-05     |
|    value_loss           | 0.613        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 70           |
|    time_elapsed         | 655          |
|    total_timesteps      | 35840        |
| train/                  |              |
|    approx_kl            | 9.640143e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.91        |
|    explained_variance   | 0.0529       |
|    learning_rate        | 1e-06        |
|    loss                 | 50.7         |
|    n_updates            | 690          |
|    policy_gradient_loss | -0.000426    |
|    value_loss           | 83.6         |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 71            |
|    time_elapsed         | 665           |
|    total_timesteps      | 36352         |
| train/                  |               |
|    approx_kl            | 2.3442204e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.91         |
|    explained_variance   | -0.00285      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.134         |
|    n_updates            | 700           |
|    policy_gradient_loss | 0.000147      |
|    value_loss           | 0.88          |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 72           |
|    time_elapsed         | 675          |
|    total_timesteps      | 36864        |
| train/                  |              |
|    approx_kl            | 5.231204e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.91        |
|    explained_variance   | -4.86e-05    |
|    learning_rate        | 1e-06        |
|    loss                 | 0.105        |
|    n_updates            | 710          |
|    policy_gradient_loss | -0.000228    |
|    value_loss           | 0.589        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 73            |
|    time_elapsed         | 685           |
|    total_timesteps      | 37376         |
| train/                  |               |
|    approx_kl            | 3.2544835e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | 0.152         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.128         |
|    n_updates            | 720           |
|    policy_gradient_loss | -0.000139     |
|    value_loss           | 0.463         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 74            |
|    time_elapsed         | 695           |
|    total_timesteps      | 37888         |
| train/                  |               |
|    approx_kl            | 8.4220665e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | -0.00965      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.126         |
|    n_updates            | 730           |
|    policy_gradient_loss | -0.000533     |
|    value_loss           | 0.408         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 75            |
|    time_elapsed         | 705           |
|    total_timesteps      | 38400         |
| train/                  |               |
|    approx_kl            | 6.3800835e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | 0.01          |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0815        |
|    n_updates            | 740           |
|    policy_gradient_loss | -0.00035      |
|    value_loss           | 0.242         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 76           |
|    time_elapsed         | 716          |
|    total_timesteps      | 38912        |
| train/                  |              |
|    approx_kl            | 4.127575e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.92        |
|    explained_variance   | 0.00172      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0713       |
|    n_updates            | 750          |
|    policy_gradient_loss | -0.000274    |
|    value_loss           | 0.202        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 77           |
|    time_elapsed         | 726          |
|    total_timesteps      | 39424        |
| train/                  |              |
|    approx_kl            | 4.323898e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.92        |
|    explained_variance   | 0.00707      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0735       |
|    n_updates            | 760          |
|    policy_gradient_loss | -0.000361    |
|    value_loss           | 0.181        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 78            |
|    time_elapsed         | 735           |
|    total_timesteps      | 39936         |
| train/                  |               |
|    approx_kl            | 6.2099774e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | -0.0219       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0589        |
|    n_updates            | 770           |
|    policy_gradient_loss | -0.00034      |
|    value_loss           | 0.164         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 79           |
|    time_elapsed         | 745          |
|    total_timesteps      | 40448        |
| train/                  |              |
|    approx_kl            | 9.389396e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.91        |
|    explained_variance   | 0.00216      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0526       |
|    n_updates            | 780          |
|    policy_gradient_loss | -0.00059     |
|    value_loss           | 0.119        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 80            |
|    time_elapsed         | 755           |
|    total_timesteps      | 40960         |
| train/                  |               |
|    approx_kl            | 0.00065147784 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.91         |
|    explained_variance   | 0.0164        |
|    learning_rate        | 1e-06         |
|    loss                 | 121           |
|    n_updates            | 790           |
|    policy_gradient_loss | -0.00172      |
|    value_loss           | 351           |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 81           |
|    time_elapsed         | 764          |
|    total_timesteps      | 41472        |
| train/                  |              |
|    approx_kl            | 9.730342e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.91        |
|    explained_variance   | 0.286        |
|    learning_rate        | 1e-06        |
|    loss                 | 5.75         |
|    n_updates            | 800          |
|    policy_gradient_loss | -7.78e-06    |
|    value_loss           | 14.3         |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 82           |
|    time_elapsed         | 774          |
|    total_timesteps      | 41984        |
| train/                  |              |
|    approx_kl            | 9.970786e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.91        |
|    explained_variance   | 0.696        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0951       |
|    n_updates            | 810          |
|    policy_gradient_loss | -0.000597    |
|    value_loss           | 0.454        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 83            |
|    time_elapsed         | 784           |
|    total_timesteps      | 42496         |
| train/                  |               |
|    approx_kl            | 1.1444092e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.91         |
|    explained_variance   | -0.0175       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0751        |
|    n_updates            | 820           |
|    policy_gradient_loss | 0.000152      |
|    value_loss           | 0.422         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 84            |
|    time_elapsed         | 793           |
|    total_timesteps      | 43008         |
| train/                  |               |
|    approx_kl            | 3.2641925e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.91         |
|    explained_variance   | 0.0511        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0481        |
|    n_updates            | 830           |
|    policy_gradient_loss | -0.000142     |
|    value_loss           | 0.321         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 85            |
|    time_elapsed         | 803           |
|    total_timesteps      | 43520         |
| train/                  |               |
|    approx_kl            | 0.00013939431 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.9          |
|    explained_variance   | 0.105         |
|    learning_rate        | 1e-06         |
|    loss                 | 42.5          |
|    n_updates            | 840           |
|    policy_gradient_loss | 9.47e-05      |
|    value_loss           | 74            |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 86           |
|    time_elapsed         | 812          |
|    total_timesteps      | 44032        |
| train/                  |              |
|    approx_kl            | 0.0017064936 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.132        |
|    learning_rate        | 1e-06        |
|    loss                 | 240          |
|    n_updates            | 850          |
|    policy_gradient_loss | 0.000516     |
|    value_loss           | 581          |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 54          |
|    iterations           | 87          |
|    time_elapsed         | 822         |
|    total_timesteps      | 44544       |
| train/                  |             |
|    approx_kl            | 0.000153995 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.89       |
|    explained_variance   | 0.0192      |
|    learning_rate        | 1e-06       |
|    loss                 | 0.145       |
|    n_updates            | 860         |
|    policy_gradient_loss | -0.000178   |
|    value_loss           | 3.44        |
-----------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 88            |
|    time_elapsed         | 831           |
|    total_timesteps      | 45056         |
| train/                  |               |
|    approx_kl            | 0.00018839678 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.89         |
|    explained_variance   | 0.438         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.13          |
|    n_updates            | 870           |
|    policy_gradient_loss | -0.000349     |
|    value_loss           | 1.35          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 89            |
|    time_elapsed         | 841           |
|    total_timesteps      | 45568         |
| train/                  |               |
|    approx_kl            | 6.0028047e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.89         |
|    explained_variance   | 0.0033        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.178         |
|    n_updates            | 880           |
|    policy_gradient_loss | 0.000136      |
|    value_loss           | 1.02          |
-------------------------------------------


--------------------------------------------
| time/                   |                |
|    fps                  | 54             |
|    iterations           | 90             |
|    time_elapsed         | 851            |
|    total_timesteps      | 46080          |
| train/                  |                |
|    approx_kl            | 1.13702845e-05 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.89          |
|    explained_variance   | -0.0257        |
|    learning_rate        | 1e-06          |
|    loss                 | 0.0686         |
|    n_updates            | 890            |
|    policy_gradient_loss | 0.000407       |
|    value_loss           | 0.832          |
--------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 91            |
|    time_elapsed         | 861           |
|    total_timesteps      | 46592         |
| train/                  |               |
|    approx_kl            | 5.0259056e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.89         |
|    explained_variance   | 0.364         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.101         |
|    n_updates            | 900           |
|    policy_gradient_loss | -5.78e-05     |
|    value_loss           | 0.598         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 92           |
|    time_elapsed         | 870          |
|    total_timesteps      | 47104        |
| train/                  |              |
|    approx_kl            | 7.944286e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | -0.00209     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.086        |
|    n_updates            | 910          |
|    policy_gradient_loss | -0.00034     |
|    value_loss           | 0.517        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 93           |
|    time_elapsed         | 880          |
|    total_timesteps      | 47616        |
| train/                  |              |
|    approx_kl            | 0.0013788218 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.0974       |
|    learning_rate        | 1e-06        |
|    loss                 | 100          |
|    n_updates            | 920          |
|    policy_gradient_loss | -0.000688    |
|    value_loss           | 319          |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 94           |
|    time_elapsed         | 890          |
|    total_timesteps      | 48128        |
| train/                  |              |
|    approx_kl            | 0.0001160407 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.88        |
|    explained_variance   | 0.472        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.151        |
|    n_updates            | 930          |
|    policy_gradient_loss | -0.000836    |
|    value_loss           | 7.5          |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 95           |
|    time_elapsed         | 900          |
|    total_timesteps      | 48640        |
| train/                  |              |
|    approx_kl            | 9.230524e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.0169       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0576       |
|    n_updates            | 940          |
|    policy_gradient_loss | -0.000208    |
|    value_loss           | 0.886        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 96           |
|    time_elapsed         | 910          |
|    total_timesteps      | 49152        |
| train/                  |              |
|    approx_kl            | 6.145483e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.118        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0801       |
|    n_updates            | 950          |
|    policy_gradient_loss | -5.73e-06    |
|    value_loss           | 0.857        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 97           |
|    time_elapsed         | 920          |
|    total_timesteps      | 49664        |
| train/                  |              |
|    approx_kl            | 0.0001031406 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.204        |
|    learning_rate        | 1e-06        |
|    loss                 | 24.8         |
|    n_updates            | 960          |
|    policy_gradient_loss | 3.39e-05     |
|    value_loss           | 70.5         |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 98            |
|    time_elapsed         | 929           |
|    total_timesteps      | 50176         |
| train/                  |               |
|    approx_kl            | 0.00020848296 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.89         |
|    explained_variance   | 0.622         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.121         |
|    n_updates            | 970           |
|    policy_gradient_loss | -0.000649     |
|    value_loss           | 1.27          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 99            |
|    time_elapsed         | 938           |
|    total_timesteps      | 50688         |
| train/                  |               |
|    approx_kl            | 4.6965666e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.89         |
|    explained_variance   | 0.00704       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0961        |
|    n_updates            | 980           |
|    policy_gradient_loss | 0.000444      |
|    value_loss           | 0.886         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 100          |
|    time_elapsed         | 948          |
|    total_timesteps      | 51200        |
| train/                  |              |
|    approx_kl            | 0.0001258296 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | -0.0177      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.079        |
|    n_updates            | 990          |
|    policy_gradient_loss | -0.000704    |
|    value_loss           | 0.681        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 101          |
|    time_elapsed         | 957          |
|    total_timesteps      | 51712        |
| train/                  |              |
|    approx_kl            | 0.0001140479 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.494        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0722       |
|    n_updates            | 1000         |
|    policy_gradient_loss | -0.000513    |
|    value_loss           | 0.499        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 102          |
|    time_elapsed         | 966          |
|    total_timesteps      | 52224        |
| train/                  |              |
|    approx_kl            | 7.373502e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.0637       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.081        |
|    n_updates            | 1010         |
|    policy_gradient_loss | -4.25e-05    |
|    value_loss           | 0.387        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 103          |
|    time_elapsed         | 976          |
|    total_timesteps      | 52736        |
| train/                  |              |
|    approx_kl            | 5.884678e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.291        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.125        |
|    n_updates            | 1020         |
|    policy_gradient_loss | -0.000129    |
|    value_loss           | 0.436        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 104           |
|    time_elapsed         | 985           |
|    total_timesteps      | 53248         |
| train/                  |               |
|    approx_kl            | 9.3515846e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.89         |
|    explained_variance   | -0.00824      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.142         |
|    n_updates            | 1030          |
|    policy_gradient_loss | -0.000444     |
|    value_loss           | 0.423         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 105           |
|    time_elapsed         | 994           |
|    total_timesteps      | 53760         |
| train/                  |               |
|    approx_kl            | 4.5177527e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.89         |
|    explained_variance   | -0.00343      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.112         |
|    n_updates            | 1040          |
|    policy_gradient_loss | -0.000311     |
|    value_loss           | 0.406         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 106          |
|    time_elapsed         | 1004         |
|    total_timesteps      | 54272        |
| train/                  |              |
|    approx_kl            | 6.182934e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.0422       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.136        |
|    n_updates            | 1050         |
|    policy_gradient_loss | -0.000255    |
|    value_loss           | 0.385        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 107           |
|    time_elapsed         | 1013          |
|    total_timesteps      | 54784         |
| train/                  |               |
|    approx_kl            | 4.4761342e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.89         |
|    explained_variance   | 0.00158       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.113         |
|    n_updates            | 1060          |
|    policy_gradient_loss | -0.000252     |
|    value_loss           | 0.303         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 108          |
|    time_elapsed         | 1022         |
|    total_timesteps      | 55296        |
| train/                  |              |
|    approx_kl            | 7.851631e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.88        |
|    explained_variance   | 0.168        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.873        |
|    n_updates            | 1070         |
|    policy_gradient_loss | 2.74e-05     |
|    value_loss           | 2.2          |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 109          |
|    time_elapsed         | 1032         |
|    total_timesteps      | 55808        |
| train/                  |              |
|    approx_kl            | 0.0014442133 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.87        |
|    explained_variance   | 0.111        |
|    learning_rate        | 1e-06        |
|    loss                 | 148          |
|    n_updates            | 1080         |
|    policy_gradient_loss | -0.000704    |
|    value_loss           | 375          |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 110          |
|    time_elapsed         | 1041         |
|    total_timesteps      | 56320        |
| train/                  |              |
|    approx_kl            | 8.798821e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.87        |
|    explained_variance   | -0.07        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.211        |
|    n_updates            | 1090         |
|    policy_gradient_loss | -0.0003      |
|    value_loss           | 1.6          |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 54          |
|    iterations           | 111         |
|    time_elapsed         | 1050        |
|    total_timesteps      | 56832       |
| train/                  |             |
|    approx_kl            | 6.73458e-05 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.87       |
|    explained_variance   | 0.235       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.2         |
|    n_updates            | 1100        |
|    policy_gradient_loss | -0.000283   |
|    value_loss           | 0.642       |
-----------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 112           |
|    time_elapsed         | 1060          |
|    total_timesteps      | 57344         |
| train/                  |               |
|    approx_kl            | 5.1407143e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.87         |
|    explained_variance   | -0.0457       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0779        |
|    n_updates            | 1110          |
|    policy_gradient_loss | -5.83e-05     |
|    value_loss           | 0.475         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 113           |
|    time_elapsed         | 1069          |
|    total_timesteps      | 57856         |
| train/                  |               |
|    approx_kl            | 0.00011818437 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.87         |
|    explained_variance   | 0.0778        |
|    learning_rate        | 1e-06         |
|    loss                 | 43            |
|    n_updates            | 1120          |
|    policy_gradient_loss | 2.73e-05      |
|    value_loss           | 86.8          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 114           |
|    time_elapsed         | 1079          |
|    total_timesteps      | 58368         |
| train/                  |               |
|    approx_kl            | 0.00013261661 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.87         |
|    explained_variance   | -0.215        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0879        |
|    n_updates            | 1130          |
|    policy_gradient_loss | -0.000511     |
|    value_loss           | 0.799         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 115           |
|    time_elapsed         | 1089          |
|    total_timesteps      | 58880         |
| train/                  |               |
|    approx_kl            | 2.8225593e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.87         |
|    explained_variance   | -0.0029       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.111         |
|    n_updates            | 1140          |
|    policy_gradient_loss | 0.000199      |
|    value_loss           | 0.489         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 116           |
|    time_elapsed         | 1098          |
|    total_timesteps      | 59392         |
| train/                  |               |
|    approx_kl            | 0.00012221199 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.87         |
|    explained_variance   | 0.0672        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0787        |
|    n_updates            | 1150          |
|    policy_gradient_loss | -0.000515     |
|    value_loss           | 0.41          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 117           |
|    time_elapsed         | 1108          |
|    total_timesteps      | 59904         |
| train/                  |               |
|    approx_kl            | 0.00021157961 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.87         |
|    explained_variance   | 0.134         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.102         |
|    n_updates            | 1160          |
|    policy_gradient_loss | -0.000782     |
|    value_loss           | 0.294         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 118          |
|    time_elapsed         | 1118         |
|    total_timesteps      | 60416        |
| train/                  |              |
|    approx_kl            | 0.0002355742 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.87        |
|    explained_variance   | -0.0519      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0551       |
|    n_updates            | 1170         |
|    policy_gradient_loss | -0.00114     |
|    value_loss           | 0.204        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 119          |
|    time_elapsed         | 1127         |
|    total_timesteps      | 60928        |
| train/                  |              |
|    approx_kl            | 9.611552e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.87        |
|    explained_variance   | 0.179        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0481       |
|    n_updates            | 1180         |
|    policy_gradient_loss | -0.000498    |
|    value_loss           | 0.182        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 120           |
|    time_elapsed         | 1138          |
|    total_timesteps      | 61440         |
| train/                  |               |
|    approx_kl            | 0.00022383744 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | 0.0301        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.055         |
|    n_updates            | 1190          |
|    policy_gradient_loss | -0.00108      |
|    value_loss           | 0.168         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 121           |
|    time_elapsed         | 1151          |
|    total_timesteps      | 61952         |
| train/                  |               |
|    approx_kl            | 0.00022715912 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | -0.0051       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0559        |
|    n_updates            | 1200          |
|    policy_gradient_loss | -0.00143      |
|    value_loss           | 0.117         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 122           |
|    time_elapsed         | 1161          |
|    total_timesteps      | 62464         |
| train/                  |               |
|    approx_kl            | 0.00017324742 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | -0.00679      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0577        |
|    n_updates            | 1210          |
|    policy_gradient_loss | -0.000997     |
|    value_loss           | 0.144         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 123           |
|    time_elapsed         | 1171          |
|    total_timesteps      | 62976         |
| train/                  |               |
|    approx_kl            | 0.00014909869 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | -0.021        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0378        |
|    n_updates            | 1220          |
|    policy_gradient_loss | -0.000692     |
|    value_loss           | 0.104         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 124           |
|    time_elapsed         | 1180          |
|    total_timesteps      | 63488         |
| train/                  |               |
|    approx_kl            | 0.00089565525 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.85         |
|    explained_variance   | 0.155         |
|    learning_rate        | 1e-06         |
|    loss                 | 103           |
|    n_updates            | 1230          |
|    policy_gradient_loss | 0.0009        |
|    value_loss           | 236           |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 125          |
|    time_elapsed         | 1190         |
|    total_timesteps      | 64000        |
| train/                  |              |
|    approx_kl            | 0.0005708947 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.83        |
|    explained_variance   | 0.252        |
|    learning_rate        | 1e-06        |
|    loss                 | 74           |
|    n_updates            | 1240         |
|    policy_gradient_loss | 6.17e-05     |
|    value_loss           | 145          |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 126           |
|    time_elapsed         | 1200          |
|    total_timesteps      | 64512         |
| train/                  |               |
|    approx_kl            | 6.0344697e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.83         |
|    explained_variance   | -0.0139       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0532        |
|    n_updates            | 1250          |
|    policy_gradient_loss | 2.35e-05      |
|    value_loss           | 0.845         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 127           |
|    time_elapsed         | 1209          |
|    total_timesteps      | 65024         |
| train/                  |               |
|    approx_kl            | 0.00012062315 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.84         |
|    explained_variance   | 0.00697       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.086         |
|    n_updates            | 1260          |
|    policy_gradient_loss | -0.00032      |
|    value_loss           | 0.569         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 128          |
|    time_elapsed         | 1219         |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 6.344658e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.84        |
|    explained_variance   | 0.025        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.064        |
|    n_updates            | 1270         |
|    policy_gradient_loss | -0.000147    |
|    value_loss           | 0.447        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 129           |
|    time_elapsed         | 1228          |
|    total_timesteps      | 66048         |
| train/                  |               |
|    approx_kl            | 0.00010009913 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.84         |
|    explained_variance   | 0.0543        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0447        |
|    n_updates            | 1280          |
|    policy_gradient_loss | -0.000418     |
|    value_loss           | 0.292         |
-------------------------------------------


--------------------------------------------
| time/                   |                |
|    fps                  | 53             |
|    iterations           | 130            |
|    time_elapsed         | 1237           |
|    total_timesteps      | 66560          |
| train/                  |                |
|    approx_kl            | 0.000108431675 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -1.84          |
|    explained_variance   | 0.0124         |
|    learning_rate        | 1e-06          |
|    loss                 | 0.0695         |
|    n_updates            | 1290           |
|    policy_gradient_loss | -0.000344      |
|    value_loss           | 0.242          |
--------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 131          |
|    time_elapsed         | 1246         |
|    total_timesteps      | 67072        |
| train/                  |              |
|    approx_kl            | 5.544629e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.84        |
|    explained_variance   | -0.00798     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0386       |
|    n_updates            | 1300         |
|    policy_gradient_loss | -0.000162    |
|    value_loss           | 0.202        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 132          |
|    time_elapsed         | 1256         |
|    total_timesteps      | 67584        |
| train/                  |              |
|    approx_kl            | 9.159336e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.84        |
|    explained_variance   | -0.000851    |
|    learning_rate        | 1e-06        |
|    loss                 | 0.069        |
|    n_updates            | 1310         |
|    policy_gradient_loss | -0.000433    |
|    value_loss           | 0.171        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 133           |
|    time_elapsed         | 1266          |
|    total_timesteps      | 68096         |
| train/                  |               |
|    approx_kl            | 0.00010001799 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.84         |
|    explained_variance   | -0.0114       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.042         |
|    n_updates            | 1320          |
|    policy_gradient_loss | -0.000447     |
|    value_loss           | 0.135         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 134           |
|    time_elapsed         | 1276          |
|    total_timesteps      | 68608         |
| train/                  |               |
|    approx_kl            | 0.00028706423 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.84         |
|    explained_variance   | 0.288         |
|    learning_rate        | 1e-06         |
|    loss                 | 35.2          |
|    n_updates            | 1330          |
|    policy_gradient_loss | 0.00076       |
|    value_loss           | 71.5          |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 135          |
|    time_elapsed         | 1286         |
|    total_timesteps      | 69120        |
| train/                  |              |
|    approx_kl            | 9.713927e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.83        |
|    explained_variance   | -0.0634      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0919       |
|    n_updates            | 1340         |
|    policy_gradient_loss | -0.000218    |
|    value_loss           | 0.607        |
------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 136          |
|    time_elapsed         | 1295         |
|    total_timesteps      | 69632        |
| train/                  |              |
|    approx_kl            | 8.002762e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.83        |
|    explained_variance   | 0.0228       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0502       |
|    n_updates            | 1350         |
|    policy_gradient_loss | -0.000176    |
|    value_loss           | 0.393        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 137           |
|    time_elapsed         | 1305          |
|    total_timesteps      | 70144         |
| train/                  |               |
|    approx_kl            | 0.00022646668 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.83         |
|    explained_variance   | 0.0471        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.118         |
|    n_updates            | 1360          |
|    policy_gradient_loss | -0.000889     |
|    value_loss           | 0.319         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 138           |
|    time_elapsed         | 1314          |
|    total_timesteps      | 70656         |
| train/                  |               |
|    approx_kl            | 0.00035122212 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.83         |
|    explained_variance   | -0.0387       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0426        |
|    n_updates            | 1370          |
|    policy_gradient_loss | -0.00127      |
|    value_loss           | 0.224         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 139           |
|    time_elapsed         | 1324          |
|    total_timesteps      | 71168         |
| train/                  |               |
|    approx_kl            | 0.00010070368 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.82         |
|    explained_variance   | 0.0228        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0428        |
|    n_updates            | 1380          |
|    policy_gradient_loss | -0.000579     |
|    value_loss           | 0.167         |
-------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 53          |
|    iterations           | 140         |
|    time_elapsed         | 1333        |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.002523345 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.81       |
|    explained_variance   | 0.18        |
|    learning_rate        | 1e-06       |
|    loss                 | 131         |
|    n_updates            | 1390        |
|    policy_gradient_loss | -0.000785   |
|    value_loss           | 416         |
-----------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 141           |
|    time_elapsed         | 1343          |
|    total_timesteps      | 72192         |
| train/                  |               |
|    approx_kl            | 6.0559716e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.8          |
|    explained_variance   | -0.0072       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.184         |
|    n_updates            | 1400          |
|    policy_gradient_loss | 0.000363      |
|    value_loss           | 2.15          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 142           |
|    time_elapsed         | 1353          |
|    total_timesteps      | 72704         |
| train/                  |               |
|    approx_kl            | 9.9143945e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.79         |
|    explained_variance   | 0.05          |
|    learning_rate        | 1e-06         |
|    loss                 | 24.4          |
|    n_updates            | 1410          |
|    policy_gradient_loss | 0.000596      |
|    value_loss           | 71.9          |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 143          |
|    time_elapsed         | 1363         |
|    total_timesteps      | 73216        |
| train/                  |              |
|    approx_kl            | 7.660431e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.8         |
|    explained_variance   | -0.00685     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.135        |
|    n_updates            | 1420         |
|    policy_gradient_loss | 0.000518     |
|    value_loss           | 1.47         |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 144           |
|    time_elapsed         | 1373          |
|    total_timesteps      | 73728         |
| train/                  |               |
|    approx_kl            | 0.00015912019 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.8          |
|    explained_variance   | -0.00908      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.155         |
|    n_updates            | 1430          |
|    policy_gradient_loss | -0.000647     |
|    value_loss           | 1.13          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 145           |
|    time_elapsed         | 1382          |
|    total_timesteps      | 74240         |
| train/                  |               |
|    approx_kl            | 0.00023783848 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.81         |
|    explained_variance   | 0.00112       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.113         |
|    n_updates            | 1440          |
|    policy_gradient_loss | -0.000513     |
|    value_loss           | 0.7           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 146           |
|    time_elapsed         | 1392          |
|    total_timesteps      | 74752         |
| train/                  |               |
|    approx_kl            | 0.00025000726 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.81         |
|    explained_variance   | 0.589         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.15          |
|    n_updates            | 1450          |
|    policy_gradient_loss | -0.00087      |
|    value_loss           | 0.651         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 147          |
|    time_elapsed         | 1402         |
|    total_timesteps      | 75264        |
| train/                  |              |
|    approx_kl            | 8.877495e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.81        |
|    explained_variance   | 0.0241       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.12         |
|    n_updates            | 1460         |
|    policy_gradient_loss | -8.72e-05    |
|    value_loss           | 0.496        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 148           |
|    time_elapsed         | 1412          |
|    total_timesteps      | 75776         |
| train/                  |               |
|    approx_kl            | 0.00016126386 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.81         |
|    explained_variance   | -0.00528      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.132         |
|    n_updates            | 1470          |
|    policy_gradient_loss | -0.000745     |
|    value_loss           | 0.508         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 149          |
|    time_elapsed         | 1421         |
|    total_timesteps      | 76288        |
| train/                  |              |
|    approx_kl            | 2.395804e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.81        |
|    explained_variance   | -0.00626     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0801       |
|    n_updates            | 1480         |
|    policy_gradient_loss | 8.75e-06     |
|    value_loss           | 0.365        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 150           |
|    time_elapsed         | 1432          |
|    total_timesteps      | 76800         |
| train/                  |               |
|    approx_kl            | 0.00027605938 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.81         |
|    explained_variance   | -0.0156       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.081         |
|    n_updates            | 1490          |
|    policy_gradient_loss | -0.00148      |
|    value_loss           | 0.26          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 151           |
|    time_elapsed         | 1442          |
|    total_timesteps      | 77312         |
| train/                  |               |
|    approx_kl            | 0.00016219786 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.82         |
|    explained_variance   | 0.0293        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0519        |
|    n_updates            | 1500          |
|    policy_gradient_loss | -0.000637     |
|    value_loss           | 0.146         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 152          |
|    time_elapsed         | 1451         |
|    total_timesteps      | 77824        |
| train/                  |              |
|    approx_kl            | 0.0002030225 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.82        |
|    explained_variance   | -0.0332      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0409       |
|    n_updates            | 1510         |
|    policy_gradient_loss | -0.00092     |
|    value_loss           | 0.147        |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 153           |
|    time_elapsed         | 1461          |
|    total_timesteps      | 78336         |
| train/                  |               |
|    approx_kl            | 0.00016651908 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.83         |
|    explained_variance   | -0.00982      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0506        |
|    n_updates            | 1520          |
|    policy_gradient_loss | -0.000776     |
|    value_loss           | 0.119         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 154           |
|    time_elapsed         | 1470          |
|    total_timesteps      | 78848         |
| train/                  |               |
|    approx_kl            | 0.00022747181 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.83         |
|    explained_variance   | -0.0143       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0513        |
|    n_updates            | 1530          |
|    policy_gradient_loss | -0.000935     |
|    value_loss           | 0.141         |
-------------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 155          |
|    time_elapsed         | 1480         |
|    total_timesteps      | 79360        |
| train/                  |              |
|    approx_kl            | 4.737149e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.83        |
|    explained_variance   | 0.552        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.562        |
|    n_updates            | 1540         |
|    policy_gradient_loss | -0.000271    |
|    value_loss           | 1.19         |
------------------------------------------


-----------------------------------------
| time/                   |             |
|    fps                  | 53          |
|    iterations           | 156         |
|    time_elapsed         | 1489        |
|    total_timesteps      | 79872       |
| train/                  |             |
|    approx_kl            | 0.002267042 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.81       |
|    explained_variance   | 0.017       |
|    learning_rate        | 1e-06       |
|    loss                 | 136         |
|    n_updates            | 1550        |
|    policy_gradient_loss | -0.000177   |
|    value_loss           | 425         |
-----------------------------------------


------------------------------------------
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 157          |
|    time_elapsed         | 1499         |
|    total_timesteps      | 80384        |
| train/                  |              |
|    approx_kl            | 0.0012375562 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.78        |
|    explained_variance   | 0.343        |
|    learning_rate        | 1e-06        |
|    loss                 | 70.3         |
|    n_updates            | 1560         |
|    policy_gradient_loss | -0.00219     |
|    value_loss           | 142          |
------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 158           |
|    time_elapsed         | 1508          |
|    total_timesteps      | 80896         |
| train/                  |               |
|    approx_kl            | 0.00013253943 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.78         |
|    explained_variance   | 0.0107        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.306         |
|    n_updates            | 1570          |
|    policy_gradient_loss | 0.000668      |
|    value_loss           | 2.17          |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 159           |
|    time_elapsed         | 1517          |
|    total_timesteps      | 81408         |
| train/                  |               |
|    approx_kl            | 0.00016124989 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.79         |
|    explained_variance   | 0.0336        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.114         |
|    n_updates            | 1580          |
|    policy_gradient_loss | -0.000261     |
|    value_loss           | 1.5           |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 160           |
|    time_elapsed         | 1527          |
|    total_timesteps      | 81920         |
| train/                  |               |
|    approx_kl            | 6.3055195e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.8          |
|    explained_variance   | 0.00261       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0958        |
|    n_updates            | 1590          |
|    policy_gradient_loss | 0.000542      |
|    value_loss           | 0.994         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 161           |
|    time_elapsed         | 1536          |
|    total_timesteps      | 82432         |
| train/                  |               |
|    approx_kl            | 0.00021276227 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.8          |
|    explained_variance   | 0.0229        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0478        |
|    n_updates            | 1600          |
|    policy_gradient_loss | -0.000503     |
|    value_loss           | 0.703         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 162           |
|    time_elapsed         | 1546          |
|    total_timesteps      | 82944         |
| train/                  |               |
|    approx_kl            | 0.00019482302 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.8          |
|    explained_variance   | -0.00517      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0889        |
|    n_updates            | 1610          |
|    policy_gradient_loss | -0.000569     |
|    value_loss           | 0.435         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 163           |
|    time_elapsed         | 1556          |
|    total_timesteps      | 83456         |
| train/                  |               |
|    approx_kl            | 0.00040143356 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.8          |
|    explained_variance   | -0.0674       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0552        |
|    n_updates            | 1620          |
|    policy_gradient_loss | -0.00125      |
|    value_loss           | 0.354         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 164           |
|    time_elapsed         | 1567          |
|    total_timesteps      | 83968         |
| train/                  |               |
|    approx_kl            | 5.4578646e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.79         |
|    explained_variance   | -0.0945       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0567        |
|    n_updates            | 1630          |
|    policy_gradient_loss | -0.000175     |
|    value_loss           | 0.288         |
-------------------------------------------


-------------------------------------------
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 165           |
|    time_elapsed         | 1582          |
|    total_timesteps      | 84480         |
| train/                  |               |
|    approx_kl            | 0.00017387734 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.79         |
|    explained_variance   | 0.0144        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0454        |
|    n_updates            | 1640          |
|    policy_gradient_loss | -0.000547     |
|    value_loss           | 0.205         |
-------------------------------------------


In [9]:
# test the best model: 

model = PPO.load('./train/best_model_50000')

state = env.reset()
while True:     
    action, _ = model.predict(state)
    state, reward, done, info = env.step(action)
    env.render()

KeyboardInterrupt: 

: 