In this notebook we would be trying to make a car(agent) learn to solve(self-drive) around a race track(environment) using **Reinforcement Learning**.

## Import Dependencies

In [2]:
# !pip install gym[box2d] pyglet==1.5.26
# follow this: https://www.youtube.com/watch?v=e3DyCg0fgx0
# !pip install stable-baselines3[extra]

In [3]:
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import os

## Test Environment

In [5]:
# https://www.gymlibrary.ml/environments/box2d/car_racing/
# https://github.com/openai/gym/blob/master/gym/envs/box2d/car_racing.py
environment_name = 'CarRacing-v2'
env = gym.make(environment_name, render_mode="human")

In [9]:
env.action_space

Box([-1.  0.  0.], 1.0, (3,), float32)

In [10]:
env.observation_space

Box(0, 255, (96, 96, 3), uint8)

In [11]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print(f'Episode:{episode} Score:{score}')
env.close()

Episode:1 Score:-37.08609271523239
Episode:2 Score:-34.02777777777824
Episode:3 Score:-23.954372623574336
Episode:4 Score:-23.07692307692328
Episode:5 Score:-30.313588850174643


## Train Model

In [6]:
env = gym.make(environment_name)
env = DummyVecEnv([lambda: env])

In [7]:
model = PPO("CnnPolicy", env, verbose=1)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [8]:
model.learn(total_timesteps=5) # Just to show how a model is trained
# later we will train model over millions of timesteps

-----------------------------
| time/              |      |
|    fps             | 64   |
|    iterations      | 1    |
|    time_elapsed    | 31   |
|    total_timesteps | 2048 |
-----------------------------


<stable_baselines3.ppo.ppo.PPO at 0x18fc6914730>

## Save and Load model

In [22]:
PPO_path = os.path.join('Training', 'Saved Models', 'PPO_model')

In [11]:
model.save(PPO_path)

In [12]:
del model

In [13]:
model = PPO.load(PPO_path, env=env)

Wrapping the env in a VecTransposeImage.


## Evaluation

In [9]:
evaluate_policy(model, env, n_eval_episodes=10)



(4.68673897460103, 39.74052565126847)

## Test model

In [31]:
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    env.render()
    if done: 
        print('info', info)
        break
env.close()

info [{'TimeLimit.truncated': True, 'terminal_observation': array([[[100, 202, 100],
        [100, 202, 100],
        [100, 202, 100],
        ...,
        [100, 202, 100],
        [100, 202, 100],
        [100, 202, 100]],

       [[100, 202, 100],
        [100, 202, 100],
        [100, 202, 100],
        ...,
        [100, 202, 100],
        [100, 202, 100],
        [100, 202, 100]],

       [[100, 202, 100],
        [100, 202, 100],
        [100, 202, 100],
        ...,
        [100, 202, 100],
        [100, 202, 100],
        [100, 202, 100]],

       ...,

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [  0, 

## Train for longer timesteps

In [39]:
# 430k Time steps
PPO_path = os.path.join('Training', 'Saved Models', 'PPO_430K_Driving_model')

model.learn(total_timesteps=430000)

model.save(PPO_path)

-----------------------------
| time/              |      |
|    fps             | 28   |
|    iterations      | 1    |
|    time_elapsed    | 71   |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 22           |
|    iterations           | 2            |
|    time_elapsed         | 180          |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0062228646 |
|    clip_fraction        | 0.0748       |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.23        |
|    explained_variance   | 0.00124      |
|    learning_rate        | 0.0003       |
|    loss                 | 0.176        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00741     |
|    std                  | 0.985        |
|    value_loss           | 0.594        |
----------------

-----------------------------------------
| time/                   |             |
|    fps                  | 30          |
|    iterations           | 12          |
|    time_elapsed         | 794         |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.020070236 |
|    clip_fraction        | 0.191       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.95       |
|    explained_variance   | 0.664       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0149     |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.0181     |
|    std                  | 0.898       |
|    value_loss           | 0.125       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 31         |
|    iterations           | 13         |
|    time_elapsed         | 848       

-----------------------------------------
| time/                   |             |
|    fps                  | 34          |
|    iterations           | 23          |
|    time_elapsed         | 1384        |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.026520796 |
|    clip_fraction        | 0.209       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.64       |
|    explained_variance   | 0.92        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00693    |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.0219     |
|    std                  | 0.811       |
|    value_loss           | 0.19        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 34          |
|    iterations           | 24          |
|    time_elapsed         | 1438  

-----------------------------------------
| time/                   |             |
|    fps                  | 26          |
|    iterations           | 34          |
|    time_elapsed         | 2613        |
|    total_timesteps      | 69632       |
| train/                  |             |
|    approx_kl            | 0.036464922 |
|    clip_fraction        | 0.293       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.3        |
|    explained_variance   | 0.934       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0315     |
|    n_updates            | 330         |
|    policy_gradient_loss | -0.036      |
|    std                  | 0.721       |
|    value_loss           | 0.219       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 26         |
|    iterations           | 35         |
|    time_elapsed         | 2667      

----------------------------------------
| time/                   |            |
|    fps                  | 28         |
|    iterations           | 45         |
|    time_elapsed         | 3210       |
|    total_timesteps      | 92160      |
| train/                  |            |
|    approx_kl            | 0.09077087 |
|    clip_fraction        | 0.459      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.95      |
|    explained_variance   | 0.872      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.598      |
|    n_updates            | 440        |
|    policy_gradient_loss | -0.0175    |
|    std                  | 0.651      |
|    value_loss           | 3.04       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 28         |
|    iterations           | 46         |
|    time_elapsed         | 3267       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 29         |
|    iterations           | 56         |
|    time_elapsed         | 3845       |
|    total_timesteps      | 114688     |
| train/                  |            |
|    approx_kl            | 0.18562555 |
|    clip_fraction        | 0.47       |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.87      |
|    explained_variance   | 0.982      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.188      |
|    n_updates            | 550        |
|    policy_gradient_loss | -0.00802   |
|    std                  | 0.632      |
|    value_loss           | 1.47       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 29          |
|    iterations           | 57          |
|    time_elapsed         | 3904        |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 30         |
|    iterations           | 67         |
|    time_elapsed         | 4486       |
|    total_timesteps      | 137216     |
| train/                  |            |
|    approx_kl            | 0.10118689 |
|    clip_fraction        | 0.524      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.75      |
|    explained_variance   | 0.707      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.33       |
|    n_updates            | 660        |
|    policy_gradient_loss | -0.00813   |
|    std                  | 0.614      |
|    value_loss           | 4.46       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 30         |
|    iterations           | 68         |
|    time_elapsed         | 4543       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 31         |
|    iterations           | 78         |
|    time_elapsed         | 5134       |
|    total_timesteps      | 159744     |
| train/                  |            |
|    approx_kl            | 0.13057491 |
|    clip_fraction        | 0.601      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.68      |
|    explained_variance   | 0.761      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.21       |
|    n_updates            | 770        |
|    policy_gradient_loss | 0.0348     |
|    std                  | 0.602      |
|    value_loss           | 8.41       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 31         |
|    iterations           | 79         |
|    time_elapsed         | 5192       |
|    total_times

--------------------------------------
| time/                   |          |
|    fps                  | 31       |
|    iterations           | 89       |
|    time_elapsed         | 5770     |
|    total_timesteps      | 182272   |
| train/                  |          |
|    approx_kl            | 0.230839 |
|    clip_fraction        | 0.602    |
|    clip_range           | 0.2      |
|    entropy_loss         | -2.72    |
|    explained_variance   | 0.883    |
|    learning_rate        | 0.0003   |
|    loss                 | 1.47     |
|    n_updates            | 880      |
|    policy_gradient_loss | 0.0188   |
|    std                  | 0.608    |
|    value_loss           | 8.63     |
--------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 31          |
|    iterations           | 90          |
|    time_elapsed         | 5827        |
|    total_timesteps      | 184320      |
| train

---------------------------------------
| time/                   |           |
|    fps                  | 32        |
|    iterations           | 100       |
|    time_elapsed         | 6397      |
|    total_timesteps      | 204800    |
| train/                  |           |
|    approx_kl            | 0.1340432 |
|    clip_fraction        | 0.562     |
|    clip_range           | 0.2       |
|    entropy_loss         | -2.79     |
|    explained_variance   | 0.751     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.633     |
|    n_updates            | 990       |
|    policy_gradient_loss | 0.00964   |
|    std                  | 0.626     |
|    value_loss           | 4.59      |
---------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 32        |
|    iterations           | 101       |
|    time_elapsed         | 6462      |
|    total_timesteps      | 206848    |


----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 111        |
|    time_elapsed         | 7090       |
|    total_timesteps      | 227328     |
| train/                  |            |
|    approx_kl            | 0.17018896 |
|    clip_fraction        | 0.587      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.81      |
|    explained_variance   | 0.947      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.18       |
|    n_updates            | 1100       |
|    policy_gradient_loss | 0.0215     |
|    std                  | 0.627      |
|    value_loss           | 8.59       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 112        |
|    time_elapsed         | 7155       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 122        |
|    time_elapsed         | 7780       |
|    total_timesteps      | 249856     |
| train/                  |            |
|    approx_kl            | 0.20201394 |
|    clip_fraction        | 0.47       |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.76      |
|    explained_variance   | 0.968      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.447      |
|    n_updates            | 1210       |
|    policy_gradient_loss | 0.00273    |
|    std                  | 0.61       |
|    value_loss           | 2.44       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 123        |
|    time_elapsed         | 7845       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 133        |
|    time_elapsed         | 8440       |
|    total_timesteps      | 272384     |
| train/                  |            |
|    approx_kl            | 0.34634414 |
|    clip_fraction        | 0.647      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.8       |
|    explained_variance   | 0.949      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.623      |
|    n_updates            | 1320       |
|    policy_gradient_loss | 0.0249     |
|    std                  | 0.626      |
|    value_loss           | 10.3       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 134        |
|    time_elapsed         | 8499       |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 32        |
|    iterations           | 144       |
|    time_elapsed         | 9087      |
|    total_timesteps      | 294912    |
| train/                  |           |
|    approx_kl            | 0.2062858 |
|    clip_fraction        | 0.543     |
|    clip_range           | 0.2       |
|    entropy_loss         | -2.79     |
|    explained_variance   | 0.909     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.353     |
|    n_updates            | 1430      |
|    policy_gradient_loss | 0.0115    |
|    std                  | 0.623     |
|    value_loss           | 3.61      |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 145        |
|    time_elapsed         | 9145       |
|    total_timesteps      | 296960 

----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 155        |
|    time_elapsed         | 9703       |
|    total_timesteps      | 317440     |
| train/                  |            |
|    approx_kl            | 0.12471409 |
|    clip_fraction        | 0.563      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.83      |
|    explained_variance   | 0.942      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.271      |
|    n_updates            | 1540       |
|    policy_gradient_loss | 0.0304     |
|    std                  | 0.637      |
|    value_loss           | 6.16       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 156        |
|    time_elapsed         | 9757       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 166        |
|    time_elapsed         | 10296      |
|    total_timesteps      | 339968     |
| train/                  |            |
|    approx_kl            | 0.13184397 |
|    clip_fraction        | 0.492      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.89      |
|    explained_variance   | 0.962      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0637     |
|    n_updates            | 1650       |
|    policy_gradient_loss | 0.0305     |
|    std                  | 0.644      |
|    value_loss           | 2.46       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 167        |
|    time_elapsed         | 10350      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 177        |
|    time_elapsed         | 10928      |
|    total_timesteps      | 362496     |
| train/                  |            |
|    approx_kl            | 0.16847982 |
|    clip_fraction        | 0.545      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.72      |
|    explained_variance   | 0.82       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.439      |
|    n_updates            | 1760       |
|    policy_gradient_loss | 0.0188     |
|    std                  | 0.614      |
|    value_loss           | 7.39       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 178        |
|    time_elapsed         | 10987      |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 33          |
|    iterations           | 188         |
|    time_elapsed         | 11544       |
|    total_timesteps      | 385024      |
| train/                  |             |
|    approx_kl            | 0.071627535 |
|    clip_fraction        | 0.408       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.59       |
|    explained_variance   | 0.954       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.25        |
|    n_updates            | 1870        |
|    policy_gradient_loss | -0.00799    |
|    std                  | 0.589       |
|    value_loss           | 1.68        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 189        |
|    time_elapsed         | 11598     

---------------------------------------
| time/                   |           |
|    fps                  | 33        |
|    iterations           | 199       |
|    time_elapsed         | 12188     |
|    total_timesteps      | 407552    |
| train/                  |           |
|    approx_kl            | 0.1769662 |
|    clip_fraction        | 0.566     |
|    clip_range           | 0.2       |
|    entropy_loss         | -2.53     |
|    explained_variance   | 0.877     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.752     |
|    n_updates            | 1980      |
|    policy_gradient_loss | 0.0227    |
|    std                  | 0.581     |
|    value_loss           | 7.54      |
---------------------------------------
--------------------------------------
| time/                   |          |
|    fps                  | 33       |
|    iterations           | 200      |
|    time_elapsed         | 12247    |
|    total_timesteps      | 409600   |
| trai

----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 210        |
|    time_elapsed         | 12820      |
|    total_timesteps      | 430080     |
| train/                  |            |
|    approx_kl            | 0.16902441 |
|    clip_fraction        | 0.538      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.54      |
|    explained_variance   | 0.936      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.255      |
|    n_updates            | 2090       |
|    policy_gradient_loss | 0.0137     |
|    std                  | 0.58       |
|    value_loss           | 5.66       |
----------------------------------------


In [40]:
model = PPO.load(PPO_path, env=env)

Wrapping the env in a VecTransposeImage.


In [41]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

(246.6938610434532, 141.88138320834923)

In [8]:
# 2M Time steps
PPO_path = os.path.join('Training', 'Saved Models', 'PPO_2M_Driving_model')

model.learn(total_timesteps=2000000)

model.save(PPO_path)

-----------------------------
| time/              |      |
|    fps             | 57   |
|    iterations      | 1    |
|    time_elapsed    | 35   |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 44           |
|    iterations           | 2            |
|    time_elapsed         | 91           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0060943174 |
|    clip_fraction        | 0.0728       |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.24        |
|    explained_variance   | 0.000735     |
|    learning_rate        | 0.0003       |
|    loss                 | 0.135        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00551     |
|    std                  | 0.992        |
|    value_loss           | 0.563        |
----------------

-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 13          |
|    time_elapsed         | 708         |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.018434055 |
|    clip_fraction        | 0.178       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.97       |
|    explained_variance   | 0.873       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.079       |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.0173     |
|    std                  | 0.903       |
|    value_loss           | 0.19        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 14          |
|    time_elapsed         | 762   

-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 24          |
|    time_elapsed         | 1308        |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.024409294 |
|    clip_fraction        | 0.225       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.62       |
|    explained_variance   | 0.901       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.144       |
|    n_updates            | 230         |
|    policy_gradient_loss | -0.0296     |
|    std                  | 0.805       |
|    value_loss           | 0.489       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 25          |
|    time_elapsed         | 1363  

-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 35          |
|    time_elapsed         | 1905        |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.033986017 |
|    clip_fraction        | 0.232       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.31       |
|    explained_variance   | 0.972       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.137       |
|    n_updates            | 340         |
|    policy_gradient_loss | -0.0167     |
|    std                  | 0.731       |
|    value_loss           | 0.55        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 36          |
|    time_elapsed         | 1959  

----------------------------------------
| time/                   |            |
|    fps                  | 37         |
|    iterations           | 46         |
|    time_elapsed         | 2499       |
|    total_timesteps      | 94208      |
| train/                  |            |
|    approx_kl            | 0.04276219 |
|    clip_fraction        | 0.29       |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.12      |
|    explained_variance   | 0.971      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.539      |
|    n_updates            | 450        |
|    policy_gradient_loss | -0.00822   |
|    std                  | 0.688      |
|    value_loss           | 2.59       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 47          |
|    time_elapsed         | 2553        |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 37         |
|    iterations           | 57         |
|    time_elapsed         | 3094       |
|    total_timesteps      | 116736     |
| train/                  |            |
|    approx_kl            | 0.12161015 |
|    clip_fraction        | 0.407      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.04      |
|    explained_variance   | 0.974      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.1        |
|    n_updates            | 560        |
|    policy_gradient_loss | 0.00461    |
|    std                  | 0.667      |
|    value_loss           | 1.73       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 58          |
|    time_elapsed         | 3149        |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 37         |
|    iterations           | 68         |
|    time_elapsed         | 3689       |
|    total_timesteps      | 139264     |
| train/                  |            |
|    approx_kl            | 0.11567511 |
|    clip_fraction        | 0.455      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.03      |
|    explained_variance   | 0.952      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.313      |
|    n_updates            | 670        |
|    policy_gradient_loss | 0.019      |
|    std                  | 0.666      |
|    value_loss           | 5.47       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 69          |
|    time_elapsed         | 3742        |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 79          |
|    time_elapsed         | 4280        |
|    total_timesteps      | 161792      |
| train/                  |             |
|    approx_kl            | 0.049921826 |
|    clip_fraction        | 0.366       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.02       |
|    explained_variance   | 0.942       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.482       |
|    n_updates            | 780         |
|    policy_gradient_loss | 0.0141      |
|    std                  | 0.669       |
|    value_loss           | 3.81        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 80          |
|    time_elapsed         | 4334  

----------------------------------------
| time/                   |            |
|    fps                  | 37         |
|    iterations           | 90         |
|    time_elapsed         | 4871       |
|    total_timesteps      | 184320     |
| train/                  |            |
|    approx_kl            | 0.07505749 |
|    clip_fraction        | 0.478      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.03      |
|    explained_variance   | 0.806      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.76       |
|    n_updates            | 890        |
|    policy_gradient_loss | 0.00969    |
|    std                  | 0.675      |
|    value_loss           | 7.98       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 37         |
|    iterations           | 91         |
|    time_elapsed         | 4925       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 101         |
|    time_elapsed         | 5465        |
|    total_timesteps      | 206848      |
| train/                  |             |
|    approx_kl            | 0.075720444 |
|    clip_fraction        | 0.458       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.04       |
|    explained_variance   | 0.771       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.5         |
|    n_updates            | 1000        |
|    policy_gradient_loss | 0.0122      |
|    std                  | 0.675       |
|    value_loss           | 13.5        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 37         |
|    iterations           | 102        |
|    time_elapsed         | 5520      

-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 112         |
|    time_elapsed         | 6065        |
|    total_timesteps      | 229376      |
| train/                  |             |
|    approx_kl            | 0.051964927 |
|    clip_fraction        | 0.304       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.02       |
|    explained_variance   | 0.933       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.476       |
|    n_updates            | 1110        |
|    policy_gradient_loss | 0.0112      |
|    std                  | 0.675       |
|    value_loss           | 2.96        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 113         |
|    time_elapsed         | 6121  

----------------------------------------
| time/                   |            |
|    fps                  | 26         |
|    iterations           | 123        |
|    time_elapsed         | 9499       |
|    total_timesteps      | 251904     |
| train/                  |            |
|    approx_kl            | 0.18637788 |
|    clip_fraction        | 0.369      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3         |
|    explained_variance   | 0.98       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.161      |
|    n_updates            | 1220       |
|    policy_gradient_loss | 0.0131     |
|    std                  | 0.668      |
|    value_loss           | 2.78       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 26         |
|    iterations           | 124        |
|    time_elapsed         | 9551       |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 27        |
|    iterations           | 134       |
|    time_elapsed         | 10077     |
|    total_timesteps      | 274432    |
| train/                  |           |
|    approx_kl            | 0.1048007 |
|    clip_fraction        | 0.393     |
|    clip_range           | 0.2       |
|    entropy_loss         | -3.08     |
|    explained_variance   | 0.979     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.57      |
|    n_updates            | 1330      |
|    policy_gradient_loss | 0.0233    |
|    std                  | 0.685     |
|    value_loss           | 6.28      |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 27         |
|    iterations           | 135        |
|    time_elapsed         | 10129      |
|    total_timesteps      | 276480 

----------------------------------------
| time/                   |            |
|    fps                  | 27         |
|    iterations           | 145        |
|    time_elapsed         | 10658      |
|    total_timesteps      | 296960     |
| train/                  |            |
|    approx_kl            | 0.18527655 |
|    clip_fraction        | 0.401      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.16      |
|    explained_variance   | 0.978      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.927      |
|    n_updates            | 1440       |
|    policy_gradient_loss | 0.0204     |
|    std                  | 0.701      |
|    value_loss           | 6.71       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 27         |
|    iterations           | 146        |
|    time_elapsed         | 10710      |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 28        |
|    iterations           | 156       |
|    time_elapsed         | 11238     |
|    total_timesteps      | 319488    |
| train/                  |           |
|    approx_kl            | 0.6192689 |
|    clip_fraction        | 0.494     |
|    clip_range           | 0.2       |
|    entropy_loss         | -3.18     |
|    explained_variance   | 0.943     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.365     |
|    n_updates            | 1550      |
|    policy_gradient_loss | 0.0523    |
|    std                  | 0.708     |
|    value_loss           | 4.38      |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 28         |
|    iterations           | 157        |
|    time_elapsed         | 11291      |
|    total_timesteps      | 321536 

----------------------------------------
| time/                   |            |
|    fps                  | 28         |
|    iterations           | 167        |
|    time_elapsed         | 11824      |
|    total_timesteps      | 342016     |
| train/                  |            |
|    approx_kl            | 0.33054507 |
|    clip_fraction        | 0.598      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.19      |
|    explained_variance   | 0.91       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.338      |
|    n_updates            | 1660       |
|    policy_gradient_loss | 0.0263     |
|    std                  | 0.703      |
|    value_loss           | 3.73       |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 28        |
|    iterations           | 168       |
|    time_elapsed         | 11877     |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 29         |
|    iterations           | 178        |
|    time_elapsed         | 12404      |
|    total_timesteps      | 364544     |
| train/                  |            |
|    approx_kl            | 0.64156216 |
|    clip_fraction        | 0.652      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.42      |
|    explained_variance   | 0.941      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.57       |
|    n_updates            | 1770       |
|    policy_gradient_loss | 0.0531     |
|    std                  | 0.757      |
|    value_loss           | 6.76       |
----------------------------------------
--------------------------------------
| time/                   |          |
|    fps                  | 29       |
|    iterations           | 179      |
|    time_elapsed         | 12456    |
|    total_timesteps      

----------------------------------------
| time/                   |            |
|    fps                  | 29         |
|    iterations           | 189        |
|    time_elapsed         | 12982      |
|    total_timesteps      | 387072     |
| train/                  |            |
|    approx_kl            | 0.39793643 |
|    clip_fraction        | 0.643      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.38      |
|    explained_variance   | 0.891      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.508      |
|    n_updates            | 1880       |
|    policy_gradient_loss | 0.0448     |
|    std                  | 0.747      |
|    value_loss           | 7.15       |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 29        |
|    iterations           | 190       |
|    time_elapsed         | 13037     |
|    total_timesteps 

---------------------------------------
| time/                   |           |
|    fps                  | 30        |
|    iterations           | 200       |
|    time_elapsed         | 13566     |
|    total_timesteps      | 409600    |
| train/                  |           |
|    approx_kl            | 0.9428203 |
|    clip_fraction        | 0.618     |
|    clip_range           | 0.2       |
|    entropy_loss         | -3.53     |
|    explained_variance   | 0.945     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.124     |
|    n_updates            | 1990      |
|    policy_gradient_loss | 0.0776    |
|    std                  | 0.785     |
|    value_loss           | 3.08      |
---------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 30        |
|    iterations           | 201       |
|    time_elapsed         | 13618     |
|    total_timesteps      | 411648    |


----------------------------------------
| time/                   |            |
|    fps                  | 30         |
|    iterations           | 211        |
|    time_elapsed         | 14141      |
|    total_timesteps      | 432128     |
| train/                  |            |
|    approx_kl            | 0.49619403 |
|    clip_fraction        | 0.693      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.7       |
|    explained_variance   | 0.922      |
|    learning_rate        | 0.0003     |
|    loss                 | 4.47       |
|    n_updates            | 2100       |
|    policy_gradient_loss | 0.162      |
|    std                  | 0.834      |
|    value_loss           | 15.9       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 30         |
|    iterations           | 212        |
|    time_elapsed         | 14193      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 30         |
|    iterations           | 222        |
|    time_elapsed         | 14718      |
|    total_timesteps      | 454656     |
| train/                  |            |
|    approx_kl            | 0.21182433 |
|    clip_fraction        | 0.51       |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.9       |
|    explained_variance   | 0.943      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.604      |
|    n_updates            | 2210       |
|    policy_gradient_loss | 0.0318     |
|    std                  | 0.89       |
|    value_loss           | 6.96       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 30         |
|    iterations           | 223        |
|    time_elapsed         | 14771      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 31         |
|    iterations           | 233        |
|    time_elapsed         | 15292      |
|    total_timesteps      | 477184     |
| train/                  |            |
|    approx_kl            | 0.03255274 |
|    clip_fraction        | 0.335      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.77      |
|    explained_variance   | 0.512      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.395      |
|    n_updates            | 2320       |
|    policy_gradient_loss | 0.00877    |
|    std                  | 0.847      |
|    value_loss           | 1.36       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 31         |
|    iterations           | 234        |
|    time_elapsed         | 15345      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 31         |
|    iterations           | 244        |
|    time_elapsed         | 15868      |
|    total_timesteps      | 499712     |
| train/                  |            |
|    approx_kl            | 0.12522055 |
|    clip_fraction        | 0.435      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.32      |
|    explained_variance   | 0.805      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0734     |
|    n_updates            | 2430       |
|    policy_gradient_loss | -0.0132    |
|    std                  | 0.727      |
|    value_loss           | 1.28       |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 31        |
|    iterations           | 245       |
|    time_elapsed         | 15920     |
|    total_timesteps 

---------------------------------------
| time/                   |           |
|    fps                  | 31        |
|    iterations           | 255       |
|    time_elapsed         | 16443     |
|    total_timesteps      | 522240    |
| train/                  |           |
|    approx_kl            | 0.2211031 |
|    clip_fraction        | 0.444     |
|    clip_range           | 0.2       |
|    entropy_loss         | -3.1      |
|    explained_variance   | 0.904     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.0651    |
|    n_updates            | 2540      |
|    policy_gradient_loss | -0.0184   |
|    std                  | 0.68      |
|    value_loss           | 0.86      |
---------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 31          |
|    iterations           | 256         |
|    time_elapsed         | 16496       |
|    total_timesteps      | 52

----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 266        |
|    time_elapsed         | 17019      |
|    total_timesteps      | 544768     |
| train/                  |            |
|    approx_kl            | 0.25971904 |
|    clip_fraction        | 0.518      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.83      |
|    explained_variance   | 0.935      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.165      |
|    n_updates            | 2650       |
|    policy_gradient_loss | 0.00759    |
|    std                  | 0.619      |
|    value_loss           | 1.24       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 267        |
|    time_elapsed         | 17072      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 277        |
|    time_elapsed         | 17597      |
|    total_timesteps      | 567296     |
| train/                  |            |
|    approx_kl            | 0.39143094 |
|    clip_fraction        | 0.657      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.72      |
|    explained_variance   | 0.944      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.339      |
|    n_updates            | 2760       |
|    policy_gradient_loss | 0.0386     |
|    std                  | 0.599      |
|    value_loss           | 2.64       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 278        |
|    time_elapsed         | 17649      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 288        |
|    time_elapsed         | 18177      |
|    total_timesteps      | 589824     |
| train/                  |            |
|    approx_kl            | 0.43465054 |
|    clip_fraction        | 0.713      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.9       |
|    explained_variance   | 0.923      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.284      |
|    n_updates            | 2870       |
|    policy_gradient_loss | 0.041      |
|    std                  | 0.638      |
|    value_loss           | 2.33       |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 32        |
|    iterations           | 289       |
|    time_elapsed         | 18230     |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 299        |
|    time_elapsed         | 18757      |
|    total_timesteps      | 612352     |
| train/                  |            |
|    approx_kl            | 0.53012407 |
|    clip_fraction        | 0.634      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.13      |
|    explained_variance   | 0.968      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.375      |
|    n_updates            | 2980       |
|    policy_gradient_loss | 0.00882    |
|    std                  | 0.684      |
|    value_loss           | 2.78       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 300        |
|    time_elapsed         | 18810      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 310        |
|    time_elapsed         | 19340      |
|    total_timesteps      | 634880     |
| train/                  |            |
|    approx_kl            | 0.36881757 |
|    clip_fraction        | 0.717      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.34      |
|    explained_variance   | 0.843      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.21       |
|    n_updates            | 3090       |
|    policy_gradient_loss | 0.0776     |
|    std                  | 0.745      |
|    value_loss           | 5.61       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 32         |
|    iterations           | 311        |
|    time_elapsed         | 19393      |
|    total_times

--------------------------------------
| time/                   |          |
|    fps                  | 32       |
|    iterations           | 321      |
|    time_elapsed         | 19922    |
|    total_timesteps      | 657408   |
| train/                  |          |
|    approx_kl            | 0.30557  |
|    clip_fraction        | 0.558    |
|    clip_range           | 0.2      |
|    entropy_loss         | -3.65    |
|    explained_variance   | 0.976    |
|    learning_rate        | 0.0003   |
|    loss                 | 0.292    |
|    n_updates            | 3200     |
|    policy_gradient_loss | 0.0627   |
|    std                  | 0.823    |
|    value_loss           | 4.22     |
--------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 33        |
|    iterations           | 322       |
|    time_elapsed         | 19975     |
|    total_timesteps      | 659456    |
| train/           

----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 332        |
|    time_elapsed         | 20504      |
|    total_timesteps      | 679936     |
| train/                  |            |
|    approx_kl            | 0.68717784 |
|    clip_fraction        | 0.678      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.91      |
|    explained_variance   | 0.981      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.72       |
|    n_updates            | 3310       |
|    policy_gradient_loss | 0.0649     |
|    std                  | 0.902      |
|    value_loss           | 7.16       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 333        |
|    time_elapsed         | 20557      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 343        |
|    time_elapsed         | 21086      |
|    total_timesteps      | 702464     |
| train/                  |            |
|    approx_kl            | 0.57312036 |
|    clip_fraction        | 0.668      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4         |
|    explained_variance   | 0.946      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.853      |
|    n_updates            | 3420       |
|    policy_gradient_loss | 0.0524     |
|    std                  | 0.928      |
|    value_loss           | 7.53       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 344        |
|    time_elapsed         | 21139      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 354        |
|    time_elapsed         | 21667      |
|    total_timesteps      | 724992     |
| train/                  |            |
|    approx_kl            | 0.17218947 |
|    clip_fraction        | 0.656      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.06      |
|    explained_variance   | 0.825      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.42       |
|    n_updates            | 3530       |
|    policy_gradient_loss | 0.0913     |
|    std                  | 0.954      |
|    value_loss           | 9.3        |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 355        |
|    time_elapsed         | 21721      |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 33        |
|    iterations           | 365       |
|    time_elapsed         | 22253     |
|    total_timesteps      | 747520    |
| train/                  |           |
|    approx_kl            | 0.1427187 |
|    clip_fraction        | 0.542     |
|    clip_range           | 0.2       |
|    entropy_loss         | -4.22     |
|    explained_variance   | 0.896     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.796     |
|    n_updates            | 3640      |
|    policy_gradient_loss | 0.00632   |
|    std                  | 1         |
|    value_loss           | 3.92      |
---------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 33          |
|    iterations           | 366         |
|    time_elapsed         | 22306       |
|    total_timesteps      | 74

----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 376        |
|    time_elapsed         | 22842      |
|    total_timesteps      | 770048     |
| train/                  |            |
|    approx_kl            | 0.14667952 |
|    clip_fraction        | 0.59       |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.38      |
|    explained_variance   | 0.837      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.896      |
|    n_updates            | 3750       |
|    policy_gradient_loss | 0.0211     |
|    std                  | 1.07       |
|    value_loss           | 4.95       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 33          |
|    iterations           | 377         |
|    time_elapsed         | 22895       |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 387        |
|    time_elapsed         | 23431      |
|    total_timesteps      | 792576     |
| train/                  |            |
|    approx_kl            | 0.18219127 |
|    clip_fraction        | 0.557      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.4       |
|    explained_variance   | 0.939      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.11       |
|    n_updates            | 3860       |
|    policy_gradient_loss | 0.0209     |
|    std                  | 1.06       |
|    value_loss           | 5.41       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 388        |
|    time_elapsed         | 23485      |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 33        |
|    iterations           | 398       |
|    time_elapsed         | 24021     |
|    total_timesteps      | 815104    |
| train/                  |           |
|    approx_kl            | 0.1538513 |
|    clip_fraction        | 0.558     |
|    clip_range           | 0.2       |
|    entropy_loss         | -4.49     |
|    explained_variance   | 0.942     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.1       |
|    n_updates            | 3970      |
|    policy_gradient_loss | 0.016     |
|    std                  | 1.1       |
|    value_loss           | 5.78      |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 33         |
|    iterations           | 399        |
|    time_elapsed         | 24074      |
|    total_timesteps      | 817152 

---------------------------------------
| time/                   |           |
|    fps                  | 34        |
|    iterations           | 409       |
|    time_elapsed         | 24613     |
|    total_timesteps      | 837632    |
| train/                  |           |
|    approx_kl            | 0.1163931 |
|    clip_fraction        | 0.514     |
|    clip_range           | 0.2       |
|    entropy_loss         | -4.64     |
|    explained_variance   | 0.977     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.1       |
|    n_updates            | 4080      |
|    policy_gradient_loss | 0.0125    |
|    std                  | 1.16      |
|    value_loss           | 3.97      |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 410        |
|    time_elapsed         | 24666      |
|    total_timesteps      | 839680 

----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 420        |
|    time_elapsed         | 25206      |
|    total_timesteps      | 860160     |
| train/                  |            |
|    approx_kl            | 0.09707971 |
|    clip_fraction        | 0.501      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.71      |
|    explained_variance   | 0.977      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.578      |
|    n_updates            | 4190       |
|    policy_gradient_loss | 0.00862    |
|    std                  | 1.19       |
|    value_loss           | 3.84       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 34          |
|    iterations           | 421         |
|    time_elapsed         | 25260       |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 34          |
|    iterations           | 431         |
|    time_elapsed         | 25797       |
|    total_timesteps      | 882688      |
| train/                  |             |
|    approx_kl            | 0.091091506 |
|    clip_fraction        | 0.564       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.92       |
|    explained_variance   | 0.974       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.883       |
|    n_updates            | 4300        |
|    policy_gradient_loss | 0.0194      |
|    std                  | 1.27        |
|    value_loss           | 4           |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 432        |
|    time_elapsed         | 25851     

-----------------------------------------
| time/                   |             |
|    fps                  | 34          |
|    iterations           | 442         |
|    time_elapsed         | 26390       |
|    total_timesteps      | 905216      |
| train/                  |             |
|    approx_kl            | 0.069077455 |
|    clip_fraction        | 0.438       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.99       |
|    explained_variance   | 0.986       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.915       |
|    n_updates            | 4410        |
|    policy_gradient_loss | -3.57e-05   |
|    std                  | 1.32        |
|    value_loss           | 3.12        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 443        |
|    time_elapsed         | 26444     

-----------------------------------------
| time/                   |             |
|    fps                  | 34          |
|    iterations           | 453         |
|    time_elapsed         | 26984       |
|    total_timesteps      | 927744      |
| train/                  |             |
|    approx_kl            | 0.121102706 |
|    clip_fraction        | 0.52        |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.04       |
|    explained_variance   | 0.956       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.675       |
|    n_updates            | 4520        |
|    policy_gradient_loss | -0.00465    |
|    std                  | 1.33        |
|    value_loss           | 5.05        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 454        |
|    time_elapsed         | 27039     

----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 464        |
|    time_elapsed         | 27575      |
|    total_timesteps      | 950272     |
| train/                  |            |
|    approx_kl            | 0.11061459 |
|    clip_fraction        | 0.546      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.14      |
|    explained_variance   | 0.976      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.09       |
|    n_updates            | 4630       |
|    policy_gradient_loss | 0.00466    |
|    std                  | 1.37       |
|    value_loss           | 4.8        |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 465        |
|    time_elapsed         | 27628      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 475        |
|    time_elapsed         | 28166      |
|    total_timesteps      | 972800     |
| train/                  |            |
|    approx_kl            | 0.09341004 |
|    clip_fraction        | 0.486      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.22      |
|    explained_variance   | 0.974      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.866      |
|    n_updates            | 4740       |
|    policy_gradient_loss | -0.00283   |
|    std                  | 1.42       |
|    value_loss           | 3.68       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 476        |
|    time_elapsed         | 28220      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 486        |
|    time_elapsed         | 28761      |
|    total_timesteps      | 995328     |
| train/                  |            |
|    approx_kl            | 0.07104224 |
|    clip_fraction        | 0.441      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.27      |
|    explained_variance   | 0.982      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.978      |
|    n_updates            | 4850       |
|    policy_gradient_loss | -0.00097   |
|    std                  | 1.45       |
|    value_loss           | 3.69       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 487        |
|    time_elapsed         | 28815      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 497        |
|    time_elapsed         | 29351      |
|    total_timesteps      | 1017856    |
| train/                  |            |
|    approx_kl            | 0.28286424 |
|    clip_fraction        | 0.593      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.22      |
|    explained_variance   | 0.898      |
|    learning_rate        | 0.0003     |
|    loss                 | 3.07       |
|    n_updates            | 4960       |
|    policy_gradient_loss | 0.0467     |
|    std                  | 1.43       |
|    value_loss           | 32.4       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 498        |
|    time_elapsed         | 29405      |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 34          |
|    iterations           | 508         |
|    time_elapsed         | 29944       |
|    total_timesteps      | 1040384     |
| train/                  |             |
|    approx_kl            | 0.102828555 |
|    clip_fraction        | 0.501       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.27       |
|    explained_variance   | 0.944       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.967       |
|    n_updates            | 5070        |
|    policy_gradient_loss | 0.0127      |
|    std                  | 1.45        |
|    value_loss           | 5.75        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 509        |
|    time_elapsed         | 29997     

----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 519        |
|    time_elapsed         | 30535      |
|    total_timesteps      | 1062912    |
| train/                  |            |
|    approx_kl            | 0.18897802 |
|    clip_fraction        | 0.663      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.34      |
|    explained_variance   | 0.806      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.73       |
|    n_updates            | 5180       |
|    policy_gradient_loss | 0.058      |
|    std                  | 1.48       |
|    value_loss           | 13.2       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 520        |
|    time_elapsed         | 30590      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 530        |
|    time_elapsed         | 31131      |
|    total_timesteps      | 1085440    |
| train/                  |            |
|    approx_kl            | 0.11704459 |
|    clip_fraction        | 0.523      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.29      |
|    explained_variance   | 0.403      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.33       |
|    n_updates            | 5290       |
|    policy_gradient_loss | 0.0245     |
|    std                  | 1.45       |
|    value_loss           | 11.4       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 531        |
|    time_elapsed         | 31185      |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 34          |
|    iterations           | 541         |
|    time_elapsed         | 31725       |
|    total_timesteps      | 1107968     |
| train/                  |             |
|    approx_kl            | 0.056140188 |
|    clip_fraction        | 0.445       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.36       |
|    explained_variance   | 0.961       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.734       |
|    n_updates            | 5400        |
|    policy_gradient_loss | 0.0013      |
|    std                  | 1.48        |
|    value_loss           | 3.17        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 542        |
|    time_elapsed         | 31778     

---------------------------------------
| time/                   |           |
|    fps                  | 34        |
|    iterations           | 552       |
|    time_elapsed         | 32318     |
|    total_timesteps      | 1130496   |
| train/                  |           |
|    approx_kl            | 0.1164777 |
|    clip_fraction        | 0.511     |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.45     |
|    explained_variance   | 0.987     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.64      |
|    n_updates            | 5510      |
|    policy_gradient_loss | 0.00205   |
|    std                  | 1.54      |
|    value_loss           | 2.63      |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 553        |
|    time_elapsed         | 32373      |
|    total_timesteps      | 1132544

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 563         |
|    time_elapsed         | 32917       |
|    total_timesteps      | 1153024     |
| train/                  |             |
|    approx_kl            | 0.107024536 |
|    clip_fraction        | 0.535       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.6        |
|    explained_variance   | 0.949       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.615       |
|    n_updates            | 5620        |
|    policy_gradient_loss | 0.00823     |
|    std                  | 1.61        |
|    value_loss           | 5.6         |
-----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 564       |
|    time_elapsed         | 32972     |
| 

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 574         |
|    time_elapsed         | 33520       |
|    total_timesteps      | 1175552     |
| train/                  |             |
|    approx_kl            | 0.066167295 |
|    clip_fraction        | 0.452       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.59       |
|    explained_variance   | 0.988       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.642       |
|    n_updates            | 5730        |
|    policy_gradient_loss | -0.00399    |
|    std                  | 1.62        |
|    value_loss           | 3           |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 575        |
|    time_elapsed         | 33574     

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 585         |
|    time_elapsed         | 34117       |
|    total_timesteps      | 1198080     |
| train/                  |             |
|    approx_kl            | 0.105848834 |
|    clip_fraction        | 0.481       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.57       |
|    explained_variance   | 0.969       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.556       |
|    n_updates            | 5840        |
|    policy_gradient_loss | -0.00228    |
|    std                  | 1.61        |
|    value_loss           | 4.03        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 586        |
|    time_elapsed         | 34171     

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 596        |
|    time_elapsed         | 34715      |
|    total_timesteps      | 1220608    |
| train/                  |            |
|    approx_kl            | 0.07837089 |
|    clip_fraction        | 0.484      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.52      |
|    explained_variance   | 0.972      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.1        |
|    n_updates            | 5950       |
|    policy_gradient_loss | 0.00587    |
|    std                  | 1.59       |
|    value_loss           | 6.28       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 597        |
|    time_elapsed         | 34770      |
|    total_times

--------------------------------------
| time/                   |          |
|    fps                  | 35       |
|    iterations           | 607      |
|    time_elapsed         | 35312    |
|    total_timesteps      | 1243136  |
| train/                  |          |
|    approx_kl            | 0.077252 |
|    clip_fraction        | 0.483    |
|    clip_range           | 0.2      |
|    entropy_loss         | -5.49    |
|    explained_variance   | 0.988    |
|    learning_rate        | 0.0003   |
|    loss                 | 1.02     |
|    n_updates            | 6060     |
|    policy_gradient_loss | 0.00641  |
|    std                  | 1.58     |
|    value_loss           | 3.14     |
--------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 608        |
|    time_elapsed         | 35366      |
|    total_timesteps      | 1245184    |
| train/     

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 618        |
|    time_elapsed         | 35909      |
|    total_timesteps      | 1265664    |
| train/                  |            |
|    approx_kl            | 0.09622033 |
|    clip_fraction        | 0.508      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.56      |
|    explained_variance   | 0.989      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.709      |
|    n_updates            | 6170       |
|    policy_gradient_loss | 0.00552    |
|    std                  | 1.62       |
|    value_loss           | 4.22       |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 619       |
|    time_elapsed         | 35963     |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 629        |
|    time_elapsed         | 36507      |
|    total_timesteps      | 1288192    |
| train/                  |            |
|    approx_kl            | 0.23566161 |
|    clip_fraction        | 0.523      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.59      |
|    explained_variance   | 0.976      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.823      |
|    n_updates            | 6280       |
|    policy_gradient_loss | 0.0144     |
|    std                  | 1.64       |
|    value_loss           | 6.68       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 630        |
|    time_elapsed         | 36560      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 640        |
|    time_elapsed         | 37102      |
|    total_timesteps      | 1310720    |
| train/                  |            |
|    approx_kl            | 0.11646259 |
|    clip_fraction        | 0.554      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.68      |
|    explained_variance   | 0.985      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.874      |
|    n_updates            | 6390       |
|    policy_gradient_loss | 0.0262     |
|    std                  | 1.69       |
|    value_loss           | 4.85       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 641        |
|    time_elapsed         | 37156      |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 651         |
|    time_elapsed         | 37709       |
|    total_timesteps      | 1333248     |
| train/                  |             |
|    approx_kl            | 0.083957046 |
|    clip_fraction        | 0.498       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.81       |
|    explained_variance   | 0.982       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.725       |
|    n_updates            | 6500        |
|    policy_gradient_loss | 0.00353     |
|    std                  | 1.75        |
|    value_loss           | 4.29        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 652        |
|    time_elapsed         | 37775     

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 662        |
|    time_elapsed         | 38370      |
|    total_timesteps      | 1355776    |
| train/                  |            |
|    approx_kl            | 0.07127325 |
|    clip_fraction        | 0.482      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.95      |
|    explained_variance   | 0.971      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.13       |
|    n_updates            | 6610       |
|    policy_gradient_loss | 0.0182     |
|    std                  | 1.83       |
|    value_loss           | 5.18       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 663        |
|    time_elapsed         | 38427      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 673        |
|    time_elapsed         | 39007      |
|    total_timesteps      | 1378304    |
| train/                  |            |
|    approx_kl            | 0.08567266 |
|    clip_fraction        | 0.48       |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.04      |
|    explained_variance   | 0.925      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.437      |
|    n_updates            | 6720       |
|    policy_gradient_loss | -0.00357   |
|    std                  | 1.89       |
|    value_loss           | 7.14       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 674        |
|    time_elapsed         | 39065      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 684        |
|    time_elapsed         | 39651      |
|    total_timesteps      | 1400832    |
| train/                  |            |
|    approx_kl            | 0.19236022 |
|    clip_fraction        | 0.482      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.09      |
|    explained_variance   | 0.962      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.663      |
|    n_updates            | 6830       |
|    policy_gradient_loss | 0.0485     |
|    std                  | 1.93       |
|    value_loss           | 9.71       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 685        |
|    time_elapsed         | 39707      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 695        |
|    time_elapsed         | 40294      |
|    total_timesteps      | 1423360    |
| train/                  |            |
|    approx_kl            | 0.18904504 |
|    clip_fraction        | 0.612      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.13      |
|    explained_variance   | 0.917      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.11       |
|    n_updates            | 6940       |
|    policy_gradient_loss | 0.0748     |
|    std                  | 1.97       |
|    value_loss           | 6.31       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 696        |
|    time_elapsed         | 40352      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 706        |
|    time_elapsed         | 40931      |
|    total_timesteps      | 1445888    |
| train/                  |            |
|    approx_kl            | 0.07931337 |
|    clip_fraction        | 0.481      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.13      |
|    explained_variance   | 0.958      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.78       |
|    n_updates            | 7050       |
|    policy_gradient_loss | 0.0059     |
|    std                  | 1.96       |
|    value_loss           | 5.82       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 707        |
|    time_elapsed         | 40989      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 717        |
|    time_elapsed         | 41575      |
|    total_timesteps      | 1468416    |
| train/                  |            |
|    approx_kl            | 0.08237027 |
|    clip_fraction        | 0.469      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.18      |
|    explained_variance   | 0.939      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.601      |
|    n_updates            | 7160       |
|    policy_gradient_loss | 0.0108     |
|    std                  | 1.98       |
|    value_loss           | 5.01       |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 718       |
|    time_elapsed         | 41634     |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 728        |
|    time_elapsed         | 42219      |
|    total_timesteps      | 1490944    |
| train/                  |            |
|    approx_kl            | 0.09408791 |
|    clip_fraction        | 0.496      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.28      |
|    explained_variance   | 0.985      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.678      |
|    n_updates            | 7270       |
|    policy_gradient_loss | 0.0356     |
|    std                  | 2.07       |
|    value_loss           | 4.52       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 729        |
|    time_elapsed         | 42276      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 739        |
|    time_elapsed         | 42862      |
|    total_timesteps      | 1513472    |
| train/                  |            |
|    approx_kl            | 0.07785866 |
|    clip_fraction        | 0.448      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.4       |
|    explained_variance   | 0.969      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.858      |
|    n_updates            | 7380       |
|    policy_gradient_loss | 0.00427    |
|    std                  | 2.15       |
|    value_loss           | 4.12       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 740         |
|    time_elapsed         | 42921       |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 750         |
|    time_elapsed         | 43476       |
|    total_timesteps      | 1536000     |
| train/                  |             |
|    approx_kl            | 0.063563146 |
|    clip_fraction        | 0.433       |
|    clip_range           | 0.2         |
|    entropy_loss         | -6.42       |
|    explained_variance   | 0.983       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.705       |
|    n_updates            | 7490        |
|    policy_gradient_loss | 0.000173    |
|    std                  | 2.17        |
|    value_loss           | 3.77        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 751         |
|    time_elapsed         | 43529 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 761        |
|    time_elapsed         | 44086      |
|    total_timesteps      | 1558528    |
| train/                  |            |
|    approx_kl            | 0.32479376 |
|    clip_fraction        | 0.485      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.47      |
|    explained_variance   | 0.957      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.759      |
|    n_updates            | 7600       |
|    policy_gradient_loss | 0.0152     |
|    std                  | 2.23       |
|    value_loss           | 6.84       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 762        |
|    time_elapsed         | 44142      |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 772       |
|    time_elapsed         | 44701     |
|    total_timesteps      | 1581056   |
| train/                  |           |
|    approx_kl            | 0.1689514 |
|    clip_fraction        | 0.506     |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.52     |
|    explained_variance   | 0.972     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.701     |
|    n_updates            | 7710      |
|    policy_gradient_loss | 0.0241    |
|    std                  | 2.27      |
|    value_loss           | 7.09      |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 773        |
|    time_elapsed         | 44758      |
|    total_timesteps      | 1583104

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 783         |
|    time_elapsed         | 45316       |
|    total_timesteps      | 1603584     |
| train/                  |             |
|    approx_kl            | 0.111619696 |
|    clip_fraction        | 0.486       |
|    clip_range           | 0.2         |
|    entropy_loss         | -6.6        |
|    explained_variance   | 0.955       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.7         |
|    n_updates            | 7820        |
|    policy_gradient_loss | 0.0154      |
|    std                  | 2.34        |
|    value_loss           | 8.66        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 784        |
|    time_elapsed         | 45372     

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 794        |
|    time_elapsed         | 45925      |
|    total_timesteps      | 1626112    |
| train/                  |            |
|    approx_kl            | 0.06481965 |
|    clip_fraction        | 0.459      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.58      |
|    explained_variance   | 0.974      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.42       |
|    n_updates            | 7930       |
|    policy_gradient_loss | 0.0157     |
|    std                  | 2.34       |
|    value_loss           | 8.12       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 795        |
|    time_elapsed         | 45980      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 805        |
|    time_elapsed         | 46531      |
|    total_timesteps      | 1648640    |
| train/                  |            |
|    approx_kl            | 0.08460553 |
|    clip_fraction        | 0.468      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.58      |
|    explained_variance   | 0.969      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.493      |
|    n_updates            | 8040       |
|    policy_gradient_loss | 0.0159     |
|    std                  | 2.34       |
|    value_loss           | 6.66       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 806        |
|    time_elapsed         | 46585      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 816        |
|    time_elapsed         | 47122      |
|    total_timesteps      | 1671168    |
| train/                  |            |
|    approx_kl            | 0.06881673 |
|    clip_fraction        | 0.441      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.69      |
|    explained_variance   | 0.985      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.69       |
|    n_updates            | 8150       |
|    policy_gradient_loss | 0.0142     |
|    std                  | 2.42       |
|    value_loss           | 4.87       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 817         |
|    time_elapsed         | 47176       |
|    total_

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 827       |
|    time_elapsed         | 47713     |
|    total_timesteps      | 1693696   |
| train/                  |           |
|    approx_kl            | 0.8829888 |
|    clip_fraction        | 0.674     |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.73     |
|    explained_variance   | 0.875     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.4       |
|    n_updates            | 8260      |
|    policy_gradient_loss | 0.0776    |
|    std                  | 2.45      |
|    value_loss           | 32        |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 828        |
|    time_elapsed         | 47769      |
|    total_timesteps      | 1695744

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 838        |
|    time_elapsed         | 48306      |
|    total_timesteps      | 1716224    |
| train/                  |            |
|    approx_kl            | 0.12356926 |
|    clip_fraction        | 0.512      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.81      |
|    explained_variance   | 0.929      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.686      |
|    n_updates            | 8370       |
|    policy_gradient_loss | 0.012      |
|    std                  | 2.5        |
|    value_loss           | 9.2        |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 839        |
|    time_elapsed         | 48360      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 849        |
|    time_elapsed         | 48900      |
|    total_timesteps      | 1738752    |
| train/                  |            |
|    approx_kl            | 0.10467425 |
|    clip_fraction        | 0.519      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.73      |
|    explained_variance   | 0.921      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.12       |
|    n_updates            | 8480       |
|    policy_gradient_loss | 0.0134     |
|    std                  | 2.44       |
|    value_loss           | 8.63       |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 850       |
|    time_elapsed         | 48953     |
|    total_timesteps 

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 860         |
|    time_elapsed         | 49489       |
|    total_timesteps      | 1761280     |
| train/                  |             |
|    approx_kl            | 0.122496024 |
|    clip_fraction        | 0.501       |
|    clip_range           | 0.2         |
|    entropy_loss         | -6.8        |
|    explained_variance   | 0.979       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.538       |
|    n_updates            | 8590        |
|    policy_gradient_loss | 0.0118      |
|    std                  | 2.48        |
|    value_loss           | 4.03        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 861        |
|    time_elapsed         | 49542     

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 871        |
|    time_elapsed         | 50080      |
|    total_timesteps      | 1783808    |
| train/                  |            |
|    approx_kl            | 0.11747041 |
|    clip_fraction        | 0.533      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.94      |
|    explained_variance   | 0.935      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.905      |
|    n_updates            | 8700       |
|    policy_gradient_loss | 0.0333     |
|    std                  | 2.66       |
|    value_loss           | 16.3       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 872        |
|    time_elapsed         | 50134      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 882        |
|    time_elapsed         | 50667      |
|    total_timesteps      | 1806336    |
| train/                  |            |
|    approx_kl            | 0.07198158 |
|    clip_fraction        | 0.455      |
|    clip_range           | 0.2        |
|    entropy_loss         | -7.09      |
|    explained_variance   | 0.982      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.824      |
|    n_updates            | 8810       |
|    policy_gradient_loss | 0.00913    |
|    std                  | 2.78       |
|    value_loss           | 5.46       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 883        |
|    time_elapsed         | 50721      |
|    total_times

--------------------------------------
| time/                   |          |
|    fps                  | 35       |
|    iterations           | 893      |
|    time_elapsed         | 51253    |
|    total_timesteps      | 1828864  |
| train/                  |          |
|    approx_kl            | 0.108496 |
|    clip_fraction        | 0.547    |
|    clip_range           | 0.2      |
|    entropy_loss         | -7.1     |
|    explained_variance   | 0.95     |
|    learning_rate        | 0.0003   |
|    loss                 | 0.763    |
|    n_updates            | 8920     |
|    policy_gradient_loss | 0.0317   |
|    std                  | 2.79     |
|    value_loss           | 6.98     |
--------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 894        |
|    time_elapsed         | 51307      |
|    total_timesteps      | 1830912    |
| train/     

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 904        |
|    time_elapsed         | 51841      |
|    total_timesteps      | 1851392    |
| train/                  |            |
|    approx_kl            | 0.47463167 |
|    clip_fraction        | 0.485      |
|    clip_range           | 0.2        |
|    entropy_loss         | -7.13      |
|    explained_variance   | 0.965      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.742      |
|    n_updates            | 9030       |
|    policy_gradient_loss | 0.00404    |
|    std                  | 2.87       |
|    value_loss           | 8.37       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 905        |
|    time_elapsed         | 51895      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 915        |
|    time_elapsed         | 52431      |
|    total_timesteps      | 1873920    |
| train/                  |            |
|    approx_kl            | 0.06082036 |
|    clip_fraction        | 0.407      |
|    clip_range           | 0.2        |
|    entropy_loss         | -7.21      |
|    explained_variance   | 0.988      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.915      |
|    n_updates            | 9140       |
|    policy_gradient_loss | -0.00541   |
|    std                  | 2.97       |
|    value_loss           | 4.18       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 916         |
|    time_elapsed         | 52485       |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 926         |
|    time_elapsed         | 53021       |
|    total_timesteps      | 1896448     |
| train/                  |             |
|    approx_kl            | 0.050804794 |
|    clip_fraction        | 0.429       |
|    clip_range           | 0.2         |
|    entropy_loss         | -7.2        |
|    explained_variance   | 0.983       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.439       |
|    n_updates            | 9250        |
|    policy_gradient_loss | -0.00245    |
|    std                  | 2.97        |
|    value_loss           | 3.5         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 927         |
|    time_elapsed         | 53074 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 937        |
|    time_elapsed         | 53609      |
|    total_timesteps      | 1918976    |
| train/                  |            |
|    approx_kl            | 0.09473477 |
|    clip_fraction        | 0.517      |
|    clip_range           | 0.2        |
|    entropy_loss         | -7.09      |
|    explained_variance   | 0.972      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.547      |
|    n_updates            | 9360       |
|    policy_gradient_loss | 0.0175     |
|    std                  | 2.85       |
|    value_loss           | 6.72       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 938        |
|    time_elapsed         | 53662      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 948        |
|    time_elapsed         | 54200      |
|    total_timesteps      | 1941504    |
| train/                  |            |
|    approx_kl            | 0.07714959 |
|    clip_fraction        | 0.449      |
|    clip_range           | 0.2        |
|    entropy_loss         | -7.1       |
|    explained_variance   | 0.979      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.553      |
|    n_updates            | 9470       |
|    policy_gradient_loss | -0.00236   |
|    std                  | 2.89       |
|    value_loss           | 4.17       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 949        |
|    time_elapsed         | 54254      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 959        |
|    time_elapsed         | 54791      |
|    total_timesteps      | 1964032    |
| train/                  |            |
|    approx_kl            | 0.06338242 |
|    clip_fraction        | 0.46       |
|    clip_range           | 0.2        |
|    entropy_loss         | -7.13      |
|    explained_variance   | 0.97       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.518      |
|    n_updates            | 9580       |
|    policy_gradient_loss | -0.000642  |
|    std                  | 2.9        |
|    value_loss           | 6.67       |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 960        |
|    time_elapsed         | 54844      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 970        |
|    time_elapsed         | 55382      |
|    total_timesteps      | 1986560    |
| train/                  |            |
|    approx_kl            | 0.03986168 |
|    clip_fraction        | 0.371      |
|    clip_range           | 0.2        |
|    entropy_loss         | -7.21      |
|    explained_variance   | 0.993      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.692      |
|    n_updates            | 9690       |
|    policy_gradient_loss | -0.00339   |
|    std                  | 2.96       |
|    value_loss           | 2.68       |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 971         |
|    time_elapsed         | 55435       |
|    total_