## 1- Installation


In [1]:
pip install gymnasium[box2d] stable_baselines3[extra]

Collecting opencv-python (from stable_baselines3[extra])
  Obtaining dependency information for opencv-python from https://files.pythonhosted.org/packages/c7/ec/9dabb6a9abfdebb3c45b0cc52dec901caafef2b2c7e7d6a839ed86d81e91/opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl.metadata
  Using cached opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting shimmy[atari]~=1.3.0 (from stable_baselines3[extra])
  Obtaining dependency information for shimmy[atari]~=1.3.0 from https://files.pythonhosted.org/packages/dc/f9/07ef16463db14ac1b30f149c379760f5cacf3fc677b295d29a92f3127914/Shimmy-1.3.0-py3-none-any.whl.metadata
  Using cached Shimmy-1.3.0-py3-none-any.whl.metadata (3.7 kB)
Collecting autorom[accept-rom-license]~=0.6.1 (from stable_baselines3[extra])
  Obtaining dependency information for autorom[accept-rom-license]~=0.6.1 from https://files.pythonhosted.org/packages/02/e1/90b168a5dbc89a4e9b2206a99238f3ea3fc281052bb2dc236d4aa15e5e87/AutoROM-0.6.1-py3-none-any.whl.metadata
 

## 2- Importation

In [1]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_util import make_vec_env

## 3- Tester l'environnement

In [10]:
env = gym.make('LunarLander-v2', render_mode="human")

In [11]:
episodes=10
for episode in range (0,episodes+1):
    state=env.reset()
    done=False
    score=0
    while not done:
        env.render()
        action=env.action_space.sample()
        observation ,reward ,done ,truncated ,info =env.step(action)
        score+=reward;
    print("Episode : {} ,Score : {}".format(episode,score))
env.close()
        

Episode : 0 ,Score : -106.06068647208392
Episode : 1 ,Score : -30.363187126957698
Episode : 2 ,Score : -345.0444552209814
Episode : 3 ,Score : -77.67371878172969
Episode : 4 ,Score : -118.71002047618778
Episode : 5 ,Score : -64.08999497749542
Episode : 6 ,Score : -187.92720321285282
Episode : 7 ,Score : -289.36140405487805
Episode : 8 ,Score : -365.47787947578826
Episode : 9 ,Score : -252.36384743444887
Episode : 10 ,Score : -174.90442492027617


## 4- Apprentissage par renforcement

In [2]:
env = gym.make('LunarLander-v2')
env = DummyVecEnv([lambda:env])

In [3]:
model = PPO(
    policy="MlpPolicy",
    env=env,
    n_steps=1024,
    batch_size=64,
    n_epochs=4,
    gamma=0.999,
    gae_lambda=0.98,
    ent_coef=0.01,
    verbose=1,tensorboard_log="./PPOLunar_tensorboard/"
)
model.learn(total_timesteps=int(1e6),progress_bar=True)
env.close()

Using cpu device
Logging to ./PPOLunar_tensorboard/PPO_1


Output()

-----------------------------
| time/              |      |
|    fps             | 279  |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 1024 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 323          |
|    iterations           | 2            |
|    time_elapsed         | 6            |
|    total_timesteps      | 2048         |
| train/                  |              |
|    approx_kl            | 0.0028168964 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0.000943     |
|    learning_rate        | 0.0003       |
|    loss                 | 3.69e+03     |
|    n_updates            | 4            |
|    policy_gradient_loss | -0.00518     |
|    value_loss           | 6.48e+03     |
------------------------------------------
----------------

-------------------------------------------
| time/                   |               |
|    fps                  | 460           |
|    iterations           | 13            |
|    time_elapsed         | 28            |
|    total_timesteps      | 13312         |
| train/                  |               |
|    approx_kl            | 0.00094010396 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.38         |
|    explained_variance   | -0.00229      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.45e+03      |
|    n_updates            | 48            |
|    policy_gradient_loss | -0.00244      |
|    value_loss           | 3.42e+03      |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 468          |
|    iterations           | 14           |
|    time_elapsed         | 30      

------------------------------------------
| time/                   |              |
|    fps                  | 476          |
|    iterations           | 24           |
|    time_elapsed         | 51           |
|    total_timesteps      | 24576        |
| train/                  |              |
|    approx_kl            | 0.0004118813 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.33        |
|    explained_variance   | -0.013       |
|    learning_rate        | 0.0003       |
|    loss                 | 1.1e+03      |
|    n_updates            | 92           |
|    policy_gradient_loss | -0.00192     |
|    value_loss           | 2.63e+03     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 479          |
|    iterations           | 25           |
|    time_elapsed         | 53           |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 499          |
|    iterations           | 35           |
|    time_elapsed         | 71           |
|    total_timesteps      | 35840        |
| train/                  |              |
|    approx_kl            | 0.0008876128 |
|    clip_fraction        | 0.000977     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.27        |
|    explained_variance   | -0.033       |
|    learning_rate        | 0.0003       |
|    loss                 | 660          |
|    n_updates            | 136          |
|    policy_gradient_loss | -0.00269     |
|    value_loss           | 1.27e+03     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 500          |
|    iterations           | 36           |
|    time_elapsed         | 73           |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 513          |
|    iterations           | 46           |
|    time_elapsed         | 91           |
|    total_timesteps      | 47104        |
| train/                  |              |
|    approx_kl            | 0.0013750456 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.18        |
|    explained_variance   | -2.07e-05    |
|    learning_rate        | 0.0003       |
|    loss                 | 408          |
|    n_updates            | 180          |
|    policy_gradient_loss | -0.00302     |
|    value_loss           | 868          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 514         |
|    iterations           | 47          |
|    time_elapsed         | 93          |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 514          |
|    iterations           | 57           |
|    time_elapsed         | 113          |
|    total_timesteps      | 58368        |
| train/                  |              |
|    approx_kl            | 0.0003255679 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.013        |
|    learning_rate        | 0.0003       |
|    loss                 | 408          |
|    n_updates            | 224          |
|    policy_gradient_loss | -0.000981    |
|    value_loss           | 555          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 514           |
|    iterations           | 58            |
|    time_elapsed         | 115           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 520          |
|    iterations           | 68           |
|    time_elapsed         | 133          |
|    total_timesteps      | 69632        |
| train/                  |              |
|    approx_kl            | 0.0014581883 |
|    clip_fraction        | 0.00293      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.25        |
|    explained_variance   | 0.2          |
|    learning_rate        | 0.0003       |
|    loss                 | 319          |
|    n_updates            | 268          |
|    policy_gradient_loss | -0.00189     |
|    value_loss           | 622          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 520           |
|    iterations           | 69            |
|    time_elapsed         | 135           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 525          |
|    iterations           | 79           |
|    time_elapsed         | 153          |
|    total_timesteps      | 80896        |
| train/                  |              |
|    approx_kl            | 0.0020172948 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.22        |
|    explained_variance   | 0.188        |
|    learning_rate        | 0.0003       |
|    loss                 | 81.3         |
|    n_updates            | 312          |
|    policy_gradient_loss | -0.00283     |
|    value_loss           | 201          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 526           |
|    iterations           | 80            |
|    time_elapsed         | 155           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 531          |
|    iterations           | 90           |
|    time_elapsed         | 173          |
|    total_timesteps      | 92160        |
| train/                  |              |
|    approx_kl            | 0.0011008412 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.19        |
|    explained_variance   | 0.741        |
|    learning_rate        | 0.0003       |
|    loss                 | 91.8         |
|    n_updates            | 356          |
|    policy_gradient_loss | -0.00188     |
|    value_loss           | 206          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 532         |
|    iterations           | 91          |
|    time_elapsed         | 175         |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 534           |
|    iterations           | 101           |
|    time_elapsed         | 193           |
|    total_timesteps      | 103424        |
| train/                  |               |
|    approx_kl            | 0.00042961992 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.08         |
|    explained_variance   | 0.799         |
|    learning_rate        | 0.0003        |
|    loss                 | 136           |
|    n_updates            | 400           |
|    policy_gradient_loss | -0.000522     |
|    value_loss           | 291           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 534          |
|    iterations           | 102          |
|    time_elapsed         | 195     

-----------------------------------------
| time/                   |             |
|    fps                  | 537         |
|    iterations           | 112         |
|    time_elapsed         | 213         |
|    total_timesteps      | 114688      |
| train/                  |             |
|    approx_kl            | 0.004025527 |
|    clip_fraction        | 0.00806     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.15       |
|    explained_variance   | 0.659       |
|    learning_rate        | 0.0003      |
|    loss                 | 84.6        |
|    n_updates            | 444         |
|    policy_gradient_loss | -0.00253    |
|    value_loss           | 146         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 538          |
|    iterations           | 113          |
|    time_elapsed         | 215          |
|    total_timesteps      | 1

------------------------------------------
| time/                   |              |
|    fps                  | 538          |
|    iterations           | 123          |
|    time_elapsed         | 233          |
|    total_timesteps      | 125952       |
| train/                  |              |
|    approx_kl            | 0.0051633785 |
|    clip_fraction        | 0.0176       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.1         |
|    explained_variance   | 0.669        |
|    learning_rate        | 0.0003       |
|    loss                 | 117          |
|    n_updates            | 488          |
|    policy_gradient_loss | -0.00313     |
|    value_loss           | 263          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 539         |
|    iterations           | 124         |
|    time_elapsed         | 235         |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 540         |
|    iterations           | 134         |
|    time_elapsed         | 253         |
|    total_timesteps      | 137216      |
| train/                  |             |
|    approx_kl            | 0.008160891 |
|    clip_fraction        | 0.0681      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.12       |
|    explained_variance   | 0.797       |
|    learning_rate        | 0.0003      |
|    loss                 | 70.1        |
|    n_updates            | 532         |
|    policy_gradient_loss | -0.00459    |
|    value_loss           | 96.4        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 541         |
|    iterations           | 135         |
|    time_elapsed         | 255         |
|    total_timesteps      | 138240

----------------------------------------
| time/                   |            |
|    fps                  | 543        |
|    iterations           | 145        |
|    time_elapsed         | 273        |
|    total_timesteps      | 148480     |
| train/                  |            |
|    approx_kl            | 0.00829317 |
|    clip_fraction        | 0.0251     |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.02      |
|    explained_variance   | 0.726      |
|    learning_rate        | 0.0003     |
|    loss                 | 58.3       |
|    n_updates            | 576        |
|    policy_gradient_loss | -0.00426   |
|    value_loss           | 108        |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 542          |
|    iterations           | 146          |
|    time_elapsed         | 275          |
|    total_timesteps      | 149504       |
| tr

------------------------------------------
| time/                   |              |
|    fps                  | 544          |
|    iterations           | 156          |
|    time_elapsed         | 293          |
|    total_timesteps      | 159744       |
| train/                  |              |
|    approx_kl            | 0.0056890175 |
|    clip_fraction        | 0.062        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.92        |
|    explained_variance   | 0.873        |
|    learning_rate        | 0.0003       |
|    loss                 | 16.2         |
|    n_updates            | 620          |
|    policy_gradient_loss | -0.00248     |
|    value_loss           | 31.1         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 544          |
|    iterations           | 157          |
|    time_elapsed         | 295          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 545          |
|    iterations           | 167          |
|    time_elapsed         | 313          |
|    total_timesteps      | 171008       |
| train/                  |              |
|    approx_kl            | 0.0052773277 |
|    clip_fraction        | 0.019        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.04        |
|    explained_variance   | 0.748        |
|    learning_rate        | 0.0003       |
|    loss                 | 29.4         |
|    n_updates            | 664          |
|    policy_gradient_loss | -0.00779     |
|    value_loss           | 74.1         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 545         |
|    iterations           | 168         |
|    time_elapsed         | 315         |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 547          |
|    iterations           | 178          |
|    time_elapsed         | 333          |
|    total_timesteps      | 182272       |
| train/                  |              |
|    approx_kl            | 0.0054197675 |
|    clip_fraction        | 0.00781      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.02        |
|    explained_variance   | 0.952        |
|    learning_rate        | 0.0003       |
|    loss                 | 13.8         |
|    n_updates            | 708          |
|    policy_gradient_loss | -0.00128     |
|    value_loss           | 26.6         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 546         |
|    iterations           | 179         |
|    time_elapsed         | 335         |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 546          |
|    iterations           | 189          |
|    time_elapsed         | 354          |
|    total_timesteps      | 193536       |
| train/                  |              |
|    approx_kl            | 0.0032590218 |
|    clip_fraction        | 0.0205       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.978       |
|    explained_variance   | 0.98         |
|    learning_rate        | 0.0003       |
|    loss                 | 4.03         |
|    n_updates            | 752          |
|    policy_gradient_loss | -0.000133    |
|    value_loss           | 9.76         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 546          |
|    iterations           | 190          |
|    time_elapsed         | 356          |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 547         |
|    iterations           | 200         |
|    time_elapsed         | 373         |
|    total_timesteps      | 204800      |
| train/                  |             |
|    approx_kl            | 0.008705966 |
|    clip_fraction        | 0.0732      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.04       |
|    explained_variance   | 0.947       |
|    learning_rate        | 0.0003      |
|    loss                 | 10.1        |
|    n_updates            | 796         |
|    policy_gradient_loss | -0.00264    |
|    value_loss           | 26          |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 547          |
|    iterations           | 201          |
|    time_elapsed         | 375          |
|    total_timesteps      | 2

------------------------------------------
| time/                   |              |
|    fps                  | 549          |
|    iterations           | 211          |
|    time_elapsed         | 393          |
|    total_timesteps      | 216064       |
| train/                  |              |
|    approx_kl            | 0.0052493243 |
|    clip_fraction        | 0.00244      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.915       |
|    explained_variance   | 0.969        |
|    learning_rate        | 0.0003       |
|    loss                 | 8.37         |
|    n_updates            | 840          |
|    policy_gradient_loss | -0.0022      |
|    value_loss           | 16.2         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 549          |
|    iterations           | 212          |
|    time_elapsed         | 395          |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 549         |
|    iterations           | 222         |
|    time_elapsed         | 413         |
|    total_timesteps      | 227328      |
| train/                  |             |
|    approx_kl            | 0.003427805 |
|    clip_fraction        | 0.00854     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.996      |
|    explained_variance   | 0.522       |
|    learning_rate        | 0.0003      |
|    loss                 | 153         |
|    n_updates            | 884         |
|    policy_gradient_loss | -0.00129    |
|    value_loss           | 384         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 549          |
|    iterations           | 223          |
|    time_elapsed         | 415          |
|    total_timesteps      | 2

-------------------------------------------
| time/                   |               |
|    fps                  | 550           |
|    iterations           | 233           |
|    time_elapsed         | 433           |
|    total_timesteps      | 238592        |
| train/                  |               |
|    approx_kl            | 0.00028097903 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.872        |
|    explained_variance   | 0.443         |
|    learning_rate        | 0.0003        |
|    loss                 | 268           |
|    n_updates            | 928           |
|    policy_gradient_loss | -0.000112     |
|    value_loss           | 525           |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 550         |
|    iterations           | 234         |
|    time_elapsed         | 435         

-------------------------------------------
| time/                   |               |
|    fps                  | 551           |
|    iterations           | 244           |
|    time_elapsed         | 452           |
|    total_timesteps      | 249856        |
| train/                  |               |
|    approx_kl            | 0.00017443422 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.846        |
|    explained_variance   | 0.316         |
|    learning_rate        | 0.0003        |
|    loss                 | 102           |
|    n_updates            | 972           |
|    policy_gradient_loss | -5.28e-05     |
|    value_loss           | 414           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 551          |
|    iterations           | 245          |
|    time_elapsed         | 454     

-----------------------------------------
| time/                   |             |
|    fps                  | 552         |
|    iterations           | 255         |
|    time_elapsed         | 472         |
|    total_timesteps      | 261120      |
| train/                  |             |
|    approx_kl            | 0.003509261 |
|    clip_fraction        | 0.00562     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.953      |
|    explained_variance   | 0.357       |
|    learning_rate        | 0.0003      |
|    loss                 | 777         |
|    n_updates            | 1016        |
|    policy_gradient_loss | -0.00349    |
|    value_loss           | 840         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 552         |
|    iterations           | 256         |
|    time_elapsed         | 474         |
|    total_timesteps      | 262144

-----------------------------------------
| time/                   |             |
|    fps                  | 553         |
|    iterations           | 266         |
|    time_elapsed         | 492         |
|    total_timesteps      | 272384      |
| train/                  |             |
|    approx_kl            | 0.003749494 |
|    clip_fraction        | 0.0115      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.793      |
|    explained_variance   | -0.1        |
|    learning_rate        | 0.0003      |
|    loss                 | 154         |
|    n_updates            | 1060        |
|    policy_gradient_loss | -0.00245    |
|    value_loss           | 289         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 553          |
|    iterations           | 267          |
|    time_elapsed         | 493          |
|    total_timesteps      | 2

------------------------------------------
| time/                   |              |
|    fps                  | 554          |
|    iterations           | 277          |
|    time_elapsed         | 511          |
|    total_timesteps      | 283648       |
| train/                  |              |
|    approx_kl            | 0.0043718065 |
|    clip_fraction        | 0.0146       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.832       |
|    explained_variance   | 0.403        |
|    learning_rate        | 0.0003       |
|    loss                 | 79.7         |
|    n_updates            | 1104         |
|    policy_gradient_loss | -0.00208     |
|    value_loss           | 274          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 554          |
|    iterations           | 278          |
|    time_elapsed         | 513          |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 554         |
|    iterations           | 288         |
|    time_elapsed         | 531         |
|    total_timesteps      | 294912      |
| train/                  |             |
|    approx_kl            | 0.004146347 |
|    clip_fraction        | 0.0547      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.79       |
|    explained_variance   | -0.00511    |
|    learning_rate        | 0.0003      |
|    loss                 | 172         |
|    n_updates            | 1148        |
|    policy_gradient_loss | -0.00601    |
|    value_loss           | 301         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 555          |
|    iterations           | 289          |
|    time_elapsed         | 533          |
|    total_timesteps      | 2

------------------------------------------
| time/                   |              |
|    fps                  | 555          |
|    iterations           | 299          |
|    time_elapsed         | 551          |
|    total_timesteps      | 306176       |
| train/                  |              |
|    approx_kl            | 0.0021719662 |
|    clip_fraction        | 0.0254       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.778       |
|    explained_variance   | 0.505        |
|    learning_rate        | 0.0003       |
|    loss                 | 37.4         |
|    n_updates            | 1192         |
|    policy_gradient_loss | -0.0028      |
|    value_loss           | 84           |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 555          |
|    iterations           | 300          |
|    time_elapsed         | 552          |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 556        |
|    iterations           | 310        |
|    time_elapsed         | 570        |
|    total_timesteps      | 317440     |
| train/                  |            |
|    approx_kl            | 0.00808713 |
|    clip_fraction        | 0.0486     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.795     |
|    explained_variance   | 0.178      |
|    learning_rate        | 0.0003     |
|    loss                 | 413        |
|    n_updates            | 1236       |
|    policy_gradient_loss | -0.00421   |
|    value_loss           | 1.14e+03   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 556        |
|    iterations           | 311        |
|    time_elapsed         | 572        |
|    total_timesteps      | 318464     |
| train/        

------------------------------------------
| time/                   |              |
|    fps                  | 556          |
|    iterations           | 321          |
|    time_elapsed         | 590          |
|    total_timesteps      | 328704       |
| train/                  |              |
|    approx_kl            | 0.0038865046 |
|    clip_fraction        | 0.0371       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.844       |
|    explained_variance   | 0.69         |
|    learning_rate        | 0.0003       |
|    loss                 | 35.4         |
|    n_updates            | 1280         |
|    policy_gradient_loss | -0.000176    |
|    value_loss           | 46.1         |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 556        |
|    iterations           | 322        |
|    time_elapsed         | 591        |
|    total_timesteps 

------------------------------------------
| time/                   |              |
|    fps                  | 557          |
|    iterations           | 332          |
|    time_elapsed         | 609          |
|    total_timesteps      | 339968       |
| train/                  |              |
|    approx_kl            | 0.0050630216 |
|    clip_fraction        | 0.0586       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.801       |
|    explained_variance   | 0.713        |
|    learning_rate        | 0.0003       |
|    loss                 | 36.6         |
|    n_updates            | 1324         |
|    policy_gradient_loss | -0.00507     |
|    value_loss           | 64.5         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 557          |
|    iterations           | 333          |
|    time_elapsed         | 611          |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 558         |
|    iterations           | 343         |
|    time_elapsed         | 629         |
|    total_timesteps      | 351232      |
| train/                  |             |
|    approx_kl            | 0.003335855 |
|    clip_fraction        | 0.00879     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.898      |
|    explained_variance   | 0.139       |
|    learning_rate        | 0.0003      |
|    loss                 | 138         |
|    n_updates            | 1368        |
|    policy_gradient_loss | -0.00461    |
|    value_loss           | 997         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 558          |
|    iterations           | 344          |
|    time_elapsed         | 631          |
|    total_timesteps      | 3

------------------------------------------
| time/                   |              |
|    fps                  | 558          |
|    iterations           | 354          |
|    time_elapsed         | 648          |
|    total_timesteps      | 362496       |
| train/                  |              |
|    approx_kl            | 0.0007094623 |
|    clip_fraction        | 0.000488     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.782       |
|    explained_variance   | 0.794        |
|    learning_rate        | 0.0003       |
|    loss                 | 18.5         |
|    n_updates            | 1412         |
|    policy_gradient_loss | -0.000718    |
|    value_loss           | 68.9         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 558          |
|    iterations           | 355          |
|    time_elapsed         | 650          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 559          |
|    iterations           | 365          |
|    time_elapsed         | 668          |
|    total_timesteps      | 373760       |
| train/                  |              |
|    approx_kl            | 0.0010416359 |
|    clip_fraction        | 0.000732     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.732       |
|    explained_variance   | 0.322        |
|    learning_rate        | 0.0003       |
|    loss                 | 1.15e+03     |
|    n_updates            | 1456         |
|    policy_gradient_loss | 0.00199      |
|    value_loss           | 1.15e+03     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 559          |
|    iterations           | 366          |
|    time_elapsed         | 670          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 559          |
|    iterations           | 376          |
|    time_elapsed         | 688          |
|    total_timesteps      | 385024       |
| train/                  |              |
|    approx_kl            | 0.0011009863 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.826       |
|    explained_variance   | 0.351        |
|    learning_rate        | 0.0003       |
|    loss                 | 647          |
|    n_updates            | 1500         |
|    policy_gradient_loss | -0.00133     |
|    value_loss           | 1.21e+03     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 559          |
|    iterations           | 377          |
|    time_elapsed         | 689          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 560          |
|    iterations           | 387          |
|    time_elapsed         | 707          |
|    total_timesteps      | 396288       |
| train/                  |              |
|    approx_kl            | 0.0025108943 |
|    clip_fraction        | 0.00952      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.784       |
|    explained_variance   | 0.439        |
|    learning_rate        | 0.0003       |
|    loss                 | 145          |
|    n_updates            | 1544         |
|    policy_gradient_loss | -0.00226     |
|    value_loss           | 938          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 560           |
|    iterations           | 388           |
|    time_elapsed         | 709           |
|    t

-----------------------------------------
| time/                   |             |
|    fps                  | 559         |
|    iterations           | 398         |
|    time_elapsed         | 727         |
|    total_timesteps      | 407552      |
| train/                  |             |
|    approx_kl            | 0.001510944 |
|    clip_fraction        | 0.00195     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.755      |
|    explained_variance   | 0.825       |
|    learning_rate        | 0.0003      |
|    loss                 | 25.3        |
|    n_updates            | 1588        |
|    policy_gradient_loss | -0.000853   |
|    value_loss           | 60.2        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 559          |
|    iterations           | 399          |
|    time_elapsed         | 729          |
|    total_timesteps      | 4

------------------------------------------
| time/                   |              |
|    fps                  | 560          |
|    iterations           | 409          |
|    time_elapsed         | 747          |
|    total_timesteps      | 418816       |
| train/                  |              |
|    approx_kl            | 0.0023084085 |
|    clip_fraction        | 0.0112       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.827       |
|    explained_variance   | 0.225        |
|    learning_rate        | 0.0003       |
|    loss                 | 77.3         |
|    n_updates            | 1632         |
|    policy_gradient_loss | 0.000891     |
|    value_loss           | 1.74e+03     |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 560         |
|    iterations           | 410         |
|    time_elapsed         | 749         |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 560          |
|    iterations           | 420          |
|    time_elapsed         | 767          |
|    total_timesteps      | 430080       |
| train/                  |              |
|    approx_kl            | 0.0038968786 |
|    clip_fraction        | 0.0137       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.799       |
|    explained_variance   | 0.858        |
|    learning_rate        | 0.0003       |
|    loss                 | 19.6         |
|    n_updates            | 1676         |
|    policy_gradient_loss | -0.00314     |
|    value_loss           | 75           |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 560          |
|    iterations           | 421          |
|    time_elapsed         | 769          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 560          |
|    iterations           | 431          |
|    time_elapsed         | 786          |
|    total_timesteps      | 441344       |
| train/                  |              |
|    approx_kl            | 0.0043568914 |
|    clip_fraction        | 0.0466       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.809       |
|    explained_variance   | 0.875        |
|    learning_rate        | 0.0003       |
|    loss                 | 26.4         |
|    n_updates            | 1720         |
|    policy_gradient_loss | -0.00459     |
|    value_loss           | 48.3         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 560          |
|    iterations           | 432          |
|    time_elapsed         | 788          |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 561         |
|    iterations           | 442         |
|    time_elapsed         | 806         |
|    total_timesteps      | 452608      |
| train/                  |             |
|    approx_kl            | 0.004004014 |
|    clip_fraction        | 0.0229      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.64       |
|    explained_variance   | 0.963       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.58        |
|    n_updates            | 1764        |
|    policy_gradient_loss | -0.000809   |
|    value_loss           | 17.4        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 561          |
|    iterations           | 443          |
|    time_elapsed         | 807          |
|    total_timesteps      | 4

------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 453          |
|    time_elapsed         | 825          |
|    total_timesteps      | 463872       |
| train/                  |              |
|    approx_kl            | 0.0036039585 |
|    clip_fraction        | 0.0122       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.837       |
|    explained_variance   | 0.4          |
|    learning_rate        | 0.0003       |
|    loss                 | 477          |
|    n_updates            | 1808         |
|    policy_gradient_loss | -0.00305     |
|    value_loss           | 1.05e+03     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 454          |
|    time_elapsed         | 827          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 464          |
|    time_elapsed         | 845          |
|    total_timesteps      | 475136       |
| train/                  |              |
|    approx_kl            | 0.0006878323 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.843       |
|    explained_variance   | 0.597        |
|    learning_rate        | 0.0003       |
|    loss                 | 279          |
|    n_updates            | 1852         |
|    policy_gradient_loss | -0.00157     |
|    value_loss           | 825          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 465          |
|    time_elapsed         | 846          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 475          |
|    time_elapsed         | 864          |
|    total_timesteps      | 486400       |
| train/                  |              |
|    approx_kl            | 0.0016423711 |
|    clip_fraction        | 0.00781      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.762       |
|    explained_variance   | 0.829        |
|    learning_rate        | 0.0003       |
|    loss                 | 40.7         |
|    n_updates            | 1896         |
|    policy_gradient_loss | -0.00242     |
|    value_loss           | 115          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 476          |
|    time_elapsed         | 866          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 487          |
|    time_elapsed         | 886          |
|    total_timesteps      | 498688       |
| train/                  |              |
|    approx_kl            | 0.0023347186 |
|    clip_fraction        | 0.00952      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.763       |
|    explained_variance   | 0.95         |
|    learning_rate        | 0.0003       |
|    loss                 | 18.5         |
|    n_updates            | 1944         |
|    policy_gradient_loss | -0.00192     |
|    value_loss           | 43.9         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 488          |
|    time_elapsed         | 888          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 498          |
|    time_elapsed         | 906          |
|    total_timesteps      | 509952       |
| train/                  |              |
|    approx_kl            | 0.0035829647 |
|    clip_fraction        | 0.0356       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.811       |
|    explained_variance   | 0.912        |
|    learning_rate        | 0.0003       |
|    loss                 | 27.4         |
|    n_updates            | 1988         |
|    policy_gradient_loss | -0.00255     |
|    value_loss           | 44.2         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 499          |
|    time_elapsed         | 908          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 509          |
|    time_elapsed         | 926          |
|    total_timesteps      | 521216       |
| train/                  |              |
|    approx_kl            | 0.0007209343 |
|    clip_fraction        | 0.00586      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.649       |
|    explained_variance   | 0.92         |
|    learning_rate        | 0.0003       |
|    loss                 | 7.18         |
|    n_updates            | 2032         |
|    policy_gradient_loss | 0.000632     |
|    value_loss           | 33.7         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 562          |
|    iterations           | 510          |
|    time_elapsed         | 928          |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 562           |
|    iterations           | 520           |
|    time_elapsed         | 946           |
|    total_timesteps      | 532480        |
| train/                  |               |
|    approx_kl            | 0.00020024757 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.849        |
|    explained_variance   | 0.452         |
|    learning_rate        | 0.0003        |
|    loss                 | 790           |
|    n_updates            | 2076          |
|    policy_gradient_loss | -0.000146     |
|    value_loss           | 2.05e+03      |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 562         |
|    iterations           | 521         |
|    time_elapsed         | 947         

-------------------------------------------
| time/                   |               |
|    fps                  | 563           |
|    iterations           | 531           |
|    time_elapsed         | 965           |
|    total_timesteps      | 543744        |
| train/                  |               |
|    approx_kl            | 0.00056415686 |
|    clip_fraction        | 0.000977      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.888        |
|    explained_variance   | 0.421         |
|    learning_rate        | 0.0003        |
|    loss                 | 1.1e+03       |
|    n_updates            | 2120          |
|    policy_gradient_loss | -0.000718     |
|    value_loss           | 2.2e+03       |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 563          |
|    iterations           | 532          |
|    time_elapsed         | 967     

------------------------------------------
| time/                   |              |
|    fps                  | 563          |
|    iterations           | 542          |
|    time_elapsed         | 984          |
|    total_timesteps      | 555008       |
| train/                  |              |
|    approx_kl            | 0.0023379377 |
|    clip_fraction        | 0.00732      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.58        |
|    explained_variance   | 0.835        |
|    learning_rate        | 0.0003       |
|    loss                 | 42.5         |
|    n_updates            | 2164         |
|    policy_gradient_loss | -0.0013      |
|    value_loss           | 118          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 563          |
|    iterations           | 543          |
|    time_elapsed         | 986          |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 563          |
|    iterations           | 553          |
|    time_elapsed         | 1004         |
|    total_timesteps      | 566272       |
| train/                  |              |
|    approx_kl            | 0.0019070454 |
|    clip_fraction        | 0.0244       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.742       |
|    explained_variance   | 0.943        |
|    learning_rate        | 0.0003       |
|    loss                 | 11.5         |
|    n_updates            | 2208         |
|    policy_gradient_loss | -0.00191     |
|    value_loss           | 35.6         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 564          |
|    iterations           | 554          |
|    time_elapsed         | 1005         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 564           |
|    iterations           | 564           |
|    time_elapsed         | 1023          |
|    total_timesteps      | 577536        |
| train/                  |               |
|    approx_kl            | 0.00020008872 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.743        |
|    explained_variance   | 0.703         |
|    learning_rate        | 0.0003        |
|    loss                 | 34            |
|    n_updates            | 2252          |
|    policy_gradient_loss | -0.000829     |
|    value_loss           | 260           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 564           |
|    iterations           | 565           |
|    time_elapsed         | 1025

------------------------------------------
| time/                   |              |
|    fps                  | 564          |
|    iterations           | 575          |
|    time_elapsed         | 1042         |
|    total_timesteps      | 588800       |
| train/                  |              |
|    approx_kl            | 0.0010329202 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.877       |
|    explained_variance   | 0.651        |
|    learning_rate        | 0.0003       |
|    loss                 | 226          |
|    n_updates            | 2296         |
|    policy_gradient_loss | -0.00169     |
|    value_loss           | 662          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 564          |
|    iterations           | 576          |
|    time_elapsed         | 1044         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 565           |
|    iterations           | 586           |
|    time_elapsed         | 1061          |
|    total_timesteps      | 600064        |
| train/                  |               |
|    approx_kl            | 0.00085309905 |
|    clip_fraction        | 0.000244      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.679        |
|    explained_variance   | 0.896         |
|    learning_rate        | 0.0003        |
|    loss                 | 116           |
|    n_updates            | 2340          |
|    policy_gradient_loss | -0.000103     |
|    value_loss           | 312           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 565           |
|    iterations           | 587           |
|    time_elapsed         | 1063

-----------------------------------------
| time/                   |             |
|    fps                  | 565         |
|    iterations           | 597         |
|    time_elapsed         | 1081        |
|    total_timesteps      | 611328      |
| train/                  |             |
|    approx_kl            | 0.004986047 |
|    clip_fraction        | 0.0171      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.805      |
|    explained_variance   | 0.789       |
|    learning_rate        | 0.0003      |
|    loss                 | 16.3        |
|    n_updates            | 2384        |
|    policy_gradient_loss | -0.00386    |
|    value_loss           | 61.6        |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 565           |
|    iterations           | 598           |
|    time_elapsed         | 1082          |
|    total_timesteps    

-----------------------------------------
| time/                   |             |
|    fps                  | 566         |
|    iterations           | 608         |
|    time_elapsed         | 1099        |
|    total_timesteps      | 622592      |
| train/                  |             |
|    approx_kl            | 0.004802005 |
|    clip_fraction        | 0.019       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.706      |
|    explained_variance   | 0.885       |
|    learning_rate        | 0.0003      |
|    loss                 | 26.6        |
|    n_updates            | 2428        |
|    policy_gradient_loss | -0.00318    |
|    value_loss           | 89.9        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 566         |
|    iterations           | 609         |
|    time_elapsed         | 1101        |
|    total_timesteps      | 623616

------------------------------------------
| time/                   |              |
|    fps                  | 567          |
|    iterations           | 619          |
|    time_elapsed         | 1117         |
|    total_timesteps      | 633856       |
| train/                  |              |
|    approx_kl            | 0.0036148122 |
|    clip_fraction        | 0.0156       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.755       |
|    explained_variance   | 0.929        |
|    learning_rate        | 0.0003       |
|    loss                 | 17.4         |
|    n_updates            | 2472         |
|    policy_gradient_loss | -0.00252     |
|    value_loss           | 31.1         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 567           |
|    iterations           | 620           |
|    time_elapsed         | 1119          |
|    t

-----------------------------------------
| time/                   |             |
|    fps                  | 567         |
|    iterations           | 630         |
|    time_elapsed         | 1136        |
|    total_timesteps      | 645120      |
| train/                  |             |
|    approx_kl            | 0.002075245 |
|    clip_fraction        | 0.00659     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.786      |
|    explained_variance   | 0.592       |
|    learning_rate        | 0.0003      |
|    loss                 | 43.4        |
|    n_updates            | 2516        |
|    policy_gradient_loss | -0.00202    |
|    value_loss           | 191         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 567          |
|    iterations           | 631          |
|    time_elapsed         | 1137         |
|    total_timesteps      | 6

------------------------------------------
| time/                   |              |
|    fps                  | 568          |
|    iterations           | 641          |
|    time_elapsed         | 1154         |
|    total_timesteps      | 656384       |
| train/                  |              |
|    approx_kl            | 0.0032136724 |
|    clip_fraction        | 0.0295       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.769       |
|    explained_variance   | 0.959        |
|    learning_rate        | 0.0003       |
|    loss                 | 7.04         |
|    n_updates            | 2560         |
|    policy_gradient_loss | -0.000582    |
|    value_loss           | 21           |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 568          |
|    iterations           | 642          |
|    time_elapsed         | 1156         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 569          |
|    iterations           | 652          |
|    time_elapsed         | 1172         |
|    total_timesteps      | 667648       |
| train/                  |              |
|    approx_kl            | 0.0002367216 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.775       |
|    explained_variance   | 0.457        |
|    learning_rate        | 0.0003       |
|    loss                 | 575          |
|    n_updates            | 2604         |
|    policy_gradient_loss | -0.000652    |
|    value_loss           | 948          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 569           |
|    iterations           | 653           |
|    time_elapsed         | 1174          |
|    t

-----------------------------------------
| time/                   |             |
|    fps                  | 570         |
|    iterations           | 663         |
|    time_elapsed         | 1190        |
|    total_timesteps      | 678912      |
| train/                  |             |
|    approx_kl            | 0.004013717 |
|    clip_fraction        | 0.0244      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.696      |
|    explained_variance   | 0.953       |
|    learning_rate        | 0.0003      |
|    loss                 | 8.13        |
|    n_updates            | 2648        |
|    policy_gradient_loss | -0.00314    |
|    value_loss           | 27.4        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 570          |
|    iterations           | 664          |
|    time_elapsed         | 1192         |
|    total_timesteps      | 6

-----------------------------------------
| time/                   |             |
|    fps                  | 571         |
|    iterations           | 674         |
|    time_elapsed         | 1208        |
|    total_timesteps      | 690176      |
| train/                  |             |
|    approx_kl            | 0.001293661 |
|    clip_fraction        | 0.000244    |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.739      |
|    explained_variance   | 0.634       |
|    learning_rate        | 0.0003      |
|    loss                 | 330         |
|    n_updates            | 2692        |
|    policy_gradient_loss | -0.0013     |
|    value_loss           | 786         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 571          |
|    iterations           | 675          |
|    time_elapsed         | 1210         |
|    total_timesteps      | 6

------------------------------------------
| time/                   |              |
|    fps                  | 571          |
|    iterations           | 685          |
|    time_elapsed         | 1226         |
|    total_timesteps      | 701440       |
| train/                  |              |
|    approx_kl            | 0.0022155154 |
|    clip_fraction        | 0.00195      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.778       |
|    explained_variance   | 0.577        |
|    learning_rate        | 0.0003       |
|    loss                 | 173          |
|    n_updates            | 2736         |
|    policy_gradient_loss | -0.000886    |
|    value_loss           | 736          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 572          |
|    iterations           | 686          |
|    time_elapsed         | 1228         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 572          |
|    iterations           | 696          |
|    time_elapsed         | 1244         |
|    total_timesteps      | 712704       |
| train/                  |              |
|    approx_kl            | 0.0014230933 |
|    clip_fraction        | 0.00195      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.759       |
|    explained_variance   | 0.94         |
|    learning_rate        | 0.0003       |
|    loss                 | 12.2         |
|    n_updates            | 2780         |
|    policy_gradient_loss | 0.000253     |
|    value_loss           | 50.1         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 572         |
|    iterations           | 697         |
|    time_elapsed         | 1246        |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 707           |
|    time_elapsed         | 1262          |
|    total_timesteps      | 723968        |
| train/                  |               |
|    approx_kl            | 0.00065550976 |
|    clip_fraction        | 0.000977      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.772        |
|    explained_variance   | 0.404         |
|    learning_rate        | 0.0003        |
|    loss                 | 769           |
|    n_updates            | 2824          |
|    policy_gradient_loss | 6.65e-05      |
|    value_loss           | 2.12e+03      |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 573           |
|    iterations           | 708           |
|    time_elapsed         | 1264

------------------------------------------
| time/                   |              |
|    fps                  | 573          |
|    iterations           | 718          |
|    time_elapsed         | 1281         |
|    total_timesteps      | 735232       |
| train/                  |              |
|    approx_kl            | 0.0056566894 |
|    clip_fraction        | 0.022        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.711       |
|    explained_variance   | 0.963        |
|    learning_rate        | 0.0003       |
|    loss                 | 15.9         |
|    n_updates            | 2868         |
|    policy_gradient_loss | -0.00162     |
|    value_loss           | 32.7         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 573          |
|    iterations           | 719          |
|    time_elapsed         | 1282         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 574           |
|    iterations           | 729           |
|    time_elapsed         | 1299          |
|    total_timesteps      | 746496        |
| train/                  |               |
|    approx_kl            | 0.00030151883 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.796        |
|    explained_variance   | 0.539         |
|    learning_rate        | 0.0003        |
|    loss                 | 575           |
|    n_updates            | 2912          |
|    policy_gradient_loss | -3.03e-05     |
|    value_loss           | 1.38e+03      |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 574         |
|    iterations           | 730         |
|    time_elapsed         | 1300        

-----------------------------------------
| time/                   |             |
|    fps                  | 575         |
|    iterations           | 741         |
|    time_elapsed         | 1319        |
|    total_timesteps      | 758784      |
| train/                  |             |
|    approx_kl            | 0.003894319 |
|    clip_fraction        | 0.0203      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.759      |
|    explained_variance   | 0.504       |
|    learning_rate        | 0.0003      |
|    loss                 | 950         |
|    n_updates            | 2960        |
|    policy_gradient_loss | 0.000405    |
|    value_loss           | 1.95e+03    |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 575         |
|    iterations           | 742         |
|    time_elapsed         | 1321        |
|    total_timesteps      | 759808

------------------------------------------
| time/                   |              |
|    fps                  | 575          |
|    iterations           | 752          |
|    time_elapsed         | 1337         |
|    total_timesteps      | 770048       |
| train/                  |              |
|    approx_kl            | 0.0037114178 |
|    clip_fraction        | 0.0149       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.773       |
|    explained_variance   | 0.613        |
|    learning_rate        | 0.0003       |
|    loss                 | 260          |
|    n_updates            | 3004         |
|    policy_gradient_loss | -0.000882    |
|    value_loss           | 695          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 575         |
|    iterations           | 753         |
|    time_elapsed         | 1339        |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 576          |
|    iterations           | 763          |
|    time_elapsed         | 1355         |
|    total_timesteps      | 781312       |
| train/                  |              |
|    approx_kl            | 0.0008088083 |
|    clip_fraction        | 0.00146      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.654       |
|    explained_variance   | 0.623        |
|    learning_rate        | 0.0003       |
|    loss                 | 797          |
|    n_updates            | 3048         |
|    policy_gradient_loss | -0.00233     |
|    value_loss           | 948          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 576          |
|    iterations           | 764          |
|    time_elapsed         | 1357         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 577          |
|    iterations           | 774          |
|    time_elapsed         | 1373         |
|    total_timesteps      | 792576       |
| train/                  |              |
|    approx_kl            | 0.0021036514 |
|    clip_fraction        | 0.0354       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.696       |
|    explained_variance   | 0.564        |
|    learning_rate        | 0.0003       |
|    loss                 | 178          |
|    n_updates            | 3092         |
|    policy_gradient_loss | 0.000682     |
|    value_loss           | 1.08e+03     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 577          |
|    iterations           | 775          |
|    time_elapsed         | 1375         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 577          |
|    iterations           | 785          |
|    time_elapsed         | 1391         |
|    total_timesteps      | 803840       |
| train/                  |              |
|    approx_kl            | 0.0023559688 |
|    clip_fraction        | 0.000244     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.703       |
|    explained_variance   | 0.601        |
|    learning_rate        | 0.0003       |
|    loss                 | 362          |
|    n_updates            | 3136         |
|    policy_gradient_loss | -0.00228     |
|    value_loss           | 726          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 577           |
|    iterations           | 786           |
|    time_elapsed         | 1393          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 578          |
|    iterations           | 796          |
|    time_elapsed         | 1409         |
|    total_timesteps      | 815104       |
| train/                  |              |
|    approx_kl            | 0.0033048196 |
|    clip_fraction        | 0.011        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.693       |
|    explained_variance   | 0.943        |
|    learning_rate        | 0.0003       |
|    loss                 | 8.29         |
|    n_updates            | 3180         |
|    policy_gradient_loss | -0.00119     |
|    value_loss           | 60.5         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 578         |
|    iterations           | 797         |
|    time_elapsed         | 1411        |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 578          |
|    iterations           | 807          |
|    time_elapsed         | 1427         |
|    total_timesteps      | 826368       |
| train/                  |              |
|    approx_kl            | 0.0025548576 |
|    clip_fraction        | 0.0325       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.589       |
|    explained_variance   | 0.991        |
|    learning_rate        | 0.0003       |
|    loss                 | 4.83         |
|    n_updates            | 3224         |
|    policy_gradient_loss | -0.0012      |
|    value_loss           | 26.4         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 578         |
|    iterations           | 808         |
|    time_elapsed         | 1429        |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 579         |
|    iterations           | 818         |
|    time_elapsed         | 1445        |
|    total_timesteps      | 837632      |
| train/                  |             |
|    approx_kl            | 0.003965296 |
|    clip_fraction        | 0.0369      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.655      |
|    explained_variance   | 0.958       |
|    learning_rate        | 0.0003      |
|    loss                 | 19.9        |
|    n_updates            | 3268        |
|    policy_gradient_loss | -0.0032     |
|    value_loss           | 54.9        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 579          |
|    iterations           | 819          |
|    time_elapsed         | 1447         |
|    total_timesteps      | 8

------------------------------------------
| time/                   |              |
|    fps                  | 579          |
|    iterations           | 829          |
|    time_elapsed         | 1464         |
|    total_timesteps      | 848896       |
| train/                  |              |
|    approx_kl            | 0.0012763392 |
|    clip_fraction        | 0.000244     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.742       |
|    explained_variance   | 0.658        |
|    learning_rate        | 0.0003       |
|    loss                 | 279          |
|    n_updates            | 3312         |
|    policy_gradient_loss | -0.00123     |
|    value_loss           | 1.05e+03     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 579          |
|    iterations           | 830          |
|    time_elapsed         | 1465         |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 579         |
|    iterations           | 840         |
|    time_elapsed         | 1483        |
|    total_timesteps      | 860160      |
| train/                  |             |
|    approx_kl            | 0.003763086 |
|    clip_fraction        | 0.0649      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.677      |
|    explained_variance   | 0.664       |
|    learning_rate        | 0.0003      |
|    loss                 | 821         |
|    n_updates            | 3356        |
|    policy_gradient_loss | 0.000328    |
|    value_loss           | 969         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 579          |
|    iterations           | 841          |
|    time_elapsed         | 1484         |
|    total_timesteps      | 8

------------------------------------------
| time/                   |              |
|    fps                  | 580          |
|    iterations           | 851          |
|    time_elapsed         | 1501         |
|    total_timesteps      | 871424       |
| train/                  |              |
|    approx_kl            | 0.0037968191 |
|    clip_fraction        | 0.0493       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.56        |
|    explained_variance   | 0.886        |
|    learning_rate        | 0.0003       |
|    loss                 | 40.7         |
|    n_updates            | 3400         |
|    policy_gradient_loss | -0.00241     |
|    value_loss           | 124          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 580          |
|    iterations           | 852          |
|    time_elapsed         | 1503         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 580          |
|    iterations           | 862          |
|    time_elapsed         | 1519         |
|    total_timesteps      | 882688       |
| train/                  |              |
|    approx_kl            | 0.0011622143 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.565       |
|    explained_variance   | 0.849        |
|    learning_rate        | 0.0003       |
|    loss                 | 325          |
|    n_updates            | 3444         |
|    policy_gradient_loss | -0.00124     |
|    value_loss           | 550          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 580          |
|    iterations           | 863          |
|    time_elapsed         | 1521         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 581           |
|    iterations           | 873           |
|    time_elapsed         | 1537          |
|    total_timesteps      | 893952        |
| train/                  |               |
|    approx_kl            | 0.00034886017 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.683        |
|    explained_variance   | 0.698         |
|    learning_rate        | 0.0003        |
|    loss                 | 182           |
|    n_updates            | 3488          |
|    policy_gradient_loss | -0.000368     |
|    value_loss           | 465           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 581           |
|    iterations           | 874           |
|    time_elapsed         | 1539

-----------------------------------------
| time/                   |             |
|    fps                  | 581         |
|    iterations           | 884         |
|    time_elapsed         | 1555        |
|    total_timesteps      | 905216      |
| train/                  |             |
|    approx_kl            | 0.006319981 |
|    clip_fraction        | 0.0317      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.68       |
|    explained_variance   | 0.741       |
|    learning_rate        | 0.0003      |
|    loss                 | 47.7        |
|    n_updates            | 3532        |
|    policy_gradient_loss | -0.00305    |
|    value_loss           | 238         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 581          |
|    iterations           | 885          |
|    time_elapsed         | 1557         |
|    total_timesteps      | 9

-----------------------------------------
| time/                   |             |
|    fps                  | 582         |
|    iterations           | 895         |
|    time_elapsed         | 1573        |
|    total_timesteps      | 916480      |
| train/                  |             |
|    approx_kl            | 0.007767592 |
|    clip_fraction        | 0.0549      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.684      |
|    explained_variance   | 0.989       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.65        |
|    n_updates            | 3576        |
|    policy_gradient_loss | -0.00247    |
|    value_loss           | 13.2        |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 582           |
|    iterations           | 896           |
|    time_elapsed         | 1575          |
|    total_timesteps    

-------------------------------------------
| time/                   |               |
|    fps                  | 582           |
|    iterations           | 906           |
|    time_elapsed         | 1592          |
|    total_timesteps      | 927744        |
| train/                  |               |
|    approx_kl            | 0.00037280144 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.707        |
|    explained_variance   | 0.712         |
|    learning_rate        | 0.0003        |
|    loss                 | 301           |
|    n_updates            | 3620          |
|    policy_gradient_loss | -0.000721     |
|    value_loss           | 841           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 582          |
|    iterations           | 907          |
|    time_elapsed         | 1593    

------------------------------------------
| time/                   |              |
|    fps                  | 583          |
|    iterations           | 917          |
|    time_elapsed         | 1610         |
|    total_timesteps      | 939008       |
| train/                  |              |
|    approx_kl            | 0.0028763078 |
|    clip_fraction        | 0.0337       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.716       |
|    explained_variance   | 0.685        |
|    learning_rate        | 0.0003       |
|    loss                 | 270          |
|    n_updates            | 3664         |
|    policy_gradient_loss | -0.00101     |
|    value_loss           | 977          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 583           |
|    iterations           | 918           |
|    time_elapsed         | 1612          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 583          |
|    iterations           | 928          |
|    time_elapsed         | 1628         |
|    total_timesteps      | 950272       |
| train/                  |              |
|    approx_kl            | 0.0004397466 |
|    clip_fraction        | 0.000977     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.619       |
|    explained_variance   | 0.701        |
|    learning_rate        | 0.0003       |
|    loss                 | 347          |
|    n_updates            | 3708         |
|    policy_gradient_loss | 5.92e-05     |
|    value_loss           | 991          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 583          |
|    iterations           | 929          |
|    time_elapsed         | 1630         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 583          |
|    iterations           | 939          |
|    time_elapsed         | 1646         |
|    total_timesteps      | 961536       |
| train/                  |              |
|    approx_kl            | 0.0020530566 |
|    clip_fraction        | 0.000488     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.658       |
|    explained_variance   | 0.805        |
|    learning_rate        | 0.0003       |
|    loss                 | 71.5         |
|    n_updates            | 3752         |
|    policy_gradient_loss | -0.00118     |
|    value_loss           | 408          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 583          |
|    iterations           | 940          |
|    time_elapsed         | 1648         |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 584         |
|    iterations           | 950         |
|    time_elapsed         | 1665        |
|    total_timesteps      | 972800      |
| train/                  |             |
|    approx_kl            | 0.008673485 |
|    clip_fraction        | 0.0327      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.725      |
|    explained_variance   | 0.986       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.98        |
|    n_updates            | 3796        |
|    policy_gradient_loss | -0.00131    |
|    value_loss           | 14.5        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 584          |
|    iterations           | 951          |
|    time_elapsed         | 1666         |
|    total_timesteps      | 9

-----------------------------------------
| time/                   |             |
|    fps                  | 584         |
|    iterations           | 961         |
|    time_elapsed         | 1683        |
|    total_timesteps      | 984064      |
| train/                  |             |
|    approx_kl            | 0.001255392 |
|    clip_fraction        | 0.00122     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.703      |
|    explained_variance   | 0.994       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.41        |
|    n_updates            | 3840        |
|    policy_gradient_loss | 0.000675    |
|    value_loss           | 7.75        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 584         |
|    iterations           | 962         |
|    time_elapsed         | 1684        |
|    total_timesteps      | 985088

------------------------------------------
| time/                   |              |
|    fps                  | 585          |
|    iterations           | 972          |
|    time_elapsed         | 1701         |
|    total_timesteps      | 995328       |
| train/                  |              |
|    approx_kl            | 0.0006883679 |
|    clip_fraction        | 0.000732     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.686       |
|    explained_variance   | 0.975        |
|    learning_rate        | 0.0003       |
|    loss                 | 16.8         |
|    n_updates            | 3884         |
|    policy_gradient_loss | -0.000643    |
|    value_loss           | 39.1         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 585          |
|    iterations           | 973          |
|    time_elapsed         | 1702         |
|    total_

## 5- Evaluation du model

In [4]:
eval_env = Monitor(gym.make("LunarLander-v2",render_mode="human"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True,render=True)
eval_env.close()
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

mean_reward=263.65 +/- 12.86028496538323


## 6- Enregistrer le modele

In [5]:
model.save("model_lunar")
del model

## 7- Resultat

In [11]:
model=PPO.load("model_lunar",env=Monitor(gym.make("LunarLander-v2",render_mode="human")), tensorboard_log="./ppo_tensorboard/")
vec_env = model.get_env()

Wrapping the env in a DummyVecEnv.


In [12]:
for episode in range (1,11):
    obs = vec_env.reset()
    done=False
    score=0
    while not done:
        action, states = model.predict(obs, deterministic=True)
        obs, reward, done, info = vec_env.step(action)
        vec_env.render()
        score += reward;
    print("Episode : {} ,Score : {}".format(episode,score))
vec_env.close()

Episode : 1 ,Score : [284.29962]
Episode : 2 ,Score : [274.9049]
Episode : 3 ,Score : [259.3791]
Episode : 4 ,Score : [287.54697]
Episode : 5 ,Score : [265.3504]
Episode : 6 ,Score : [254.11987]
Episode : 7 ,Score : [249.16115]
Episode : 8 ,Score : [289.84793]
Episode : 9 ,Score : [260.9319]
Episode : 10 ,Score : [251.52075]


## 8- Les graphes

python -m tensorboard.main --logdir ./PPOLunar_tensorboard/