In [None]:
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy

env = gym.make("LunarLander-v3", render_mode="rgb_array")

model = DQN("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=int(2e5), progress_bar=True)
model.save("dqn_lunar")
del model

model = DQN.load("dqn_lunar", env=env)

mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
vec_env = model.get_env()
obs = vec_env.reset()

for i in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render("human")


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


Output()

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 84       |
|    ep_rew_mean      | -168     |
|    exploration_rate | 0.984    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1776     |
|    time_elapsed     | 0        |
|    total_timesteps  | 336      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.15     |
|    n_updates        | 58       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 81.1     |
|    ep_rew_mean      | -177     |
|    exploration_rate | 0.969    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1811     |
|    time_elapsed     | 0        |
|    total_timesteps  | 649      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.72     |
|    n_updates        | 137      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 88.8     |
|    ep_rew_mean      | -199     |
|    exploration_rate | 0.949    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 1805     |
|    time_elapsed     | 0        |
|    total_timesteps  | 1066     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.19     |
|    n_updates        | 241      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 89.2     |
|    ep_rew_mean      | -180     |
|    exploration_rate | 0.932    |
| time/               |          |
|    episodes         | 16       |
|    fps              | 1799     |
|    time_elapsed     | 0        |
|    total_timesteps  | 1428     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.16     |
|    n_updates        | 331      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 86.2     |
|    ep_rew_mean      | -182     |
|    exploration_rate | 0.918    |
| time/               |          |
|    episodes         | 20       |
|    fps              | 1827     |
|    time_elapsed     | 0        |
|    total_timesteps  | 1723     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.05     |
|    n_updates        | 405      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 90.6     |
|    ep_rew_mean      | -197     |
|    exploration_rate | 0.897    |
| time/               |          |
|    episodes         | 24       |
|    fps              | 1800     |
|    time_elapsed     | 1        |
|    total_timesteps  | 2175     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.701    |
|    n_updates        | 518      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 92.6     |
|    ep_rew_mean      | -194     |
|    exploration_rate | 0.877    |
| time/               |          |
|    episodes         | 28       |
|    fps              | 1795     |
|    time_elapsed     | 1        |
|    total_timesteps  | 2592     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.15     |
|    n_updates        | 622      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 94.4     |
|    ep_rew_mean      | -209     |
|    exploration_rate | 0.856    |
| time/               |          |
|    episodes         | 32       |
|    fps              | 1795     |
|    time_elapsed     | 1        |
|    total_timesteps  | 3022     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.72     |
|    n_updates        | 730      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 95.6     |
|    ep_rew_mean      | -210     |
|    exploration_rate | 0.836    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 1796     |
|    time_elapsed     | 1        |
|    total_timesteps  | 3443     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.23     |
|    n_updates        | 835      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 95.3     |
|    ep_rew_mean      | -217     |
|    exploration_rate | 0.819    |
| time/               |          |
|    episodes         | 40       |
|    fps              | 1799     |
|    time_elapsed     | 2        |
|    total_timesteps  | 3813     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.41     |
|    n_updates        | 928      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 96.6     |
|    ep_rew_mean      | -218     |
|    exploration_rate | 0.798    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 1785     |
|    time_elapsed     | 2        |
|    total_timesteps  | 4252     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.37     |
|    n_updates        | 1037     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 103      |
|    ep_rew_mean      | -223     |
|    exploration_rate | 0.765    |
| time/               |          |
|    episodes         | 48       |
|    fps              | 1762     |
|    time_elapsed     | 2        |
|    total_timesteps  | 4940     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.49     |
|    n_updates        | 1209     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 104      |
|    ep_rew_mean      | -219     |
|    exploration_rate | 0.744    |
| time/               |          |
|    episodes         | 52       |
|    fps              | 1764     |
|    time_elapsed     | 3        |
|    total_timesteps  | 5387     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.57     |
|    n_updates        | 1321     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 105      |
|    ep_rew_mean      | -217     |
|    exploration_rate | 0.722    |
| time/               |          |
|    episodes         | 56       |
|    fps              | 1758     |
|    time_elapsed     | 3        |
|    total_timesteps  | 5856     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.694    |
|    n_updates        | 1438     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 108      |
|    ep_rew_mean      | -214     |
|    exploration_rate | 0.693    |
| time/               |          |
|    episodes         | 60       |
|    fps              | 1747     |
|    time_elapsed     | 3        |
|    total_timesteps  | 6455     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.3      |
|    n_updates        | 1588     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 114      |
|    ep_rew_mean      | -212     |
|    exploration_rate | 0.652    |
| time/               |          |
|    episodes         | 64       |
|    fps              | 1685     |
|    time_elapsed     | 4        |
|    total_timesteps  | 7321     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.31     |
|    n_updates        | 1805     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 117      |
|    ep_rew_mean      | -211     |
|    exploration_rate | 0.623    |
| time/               |          |
|    episodes         | 68       |
|    fps              | 1677     |
|    time_elapsed     | 4        |
|    total_timesteps  | 7933     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.906    |
|    n_updates        | 1958     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 117      |
|    ep_rew_mean      | -208     |
|    exploration_rate | 0.6      |
| time/               |          |
|    episodes         | 72       |
|    fps              | 1676     |
|    time_elapsed     | 5        |
|    total_timesteps  | 8430     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.863    |
|    n_updates        | 2082     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 122      |
|    ep_rew_mean      | -206     |
|    exploration_rate | 0.561    |
| time/               |          |
|    episodes         | 76       |
|    fps              | 1661     |
|    time_elapsed     | 5        |
|    total_timesteps  | 9246     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.801    |
|    n_updates        | 2286     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 125      |
|    ep_rew_mean      | -201     |
|    exploration_rate | 0.525    |
| time/               |          |
|    episodes         | 80       |
|    fps              | 1641     |
|    time_elapsed     | 6        |
|    total_timesteps  | 9990     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.67     |
|    n_updates        | 2472     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 136      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.457    |
| time/               |          |
|    episodes         | 84       |
|    fps              | 1522     |
|    time_elapsed     | 7        |
|    total_timesteps  | 11428    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.897    |
|    n_updates        | 2831     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 145      |
|    ep_rew_mean      | -196     |
|    exploration_rate | 0.392    |
| time/               |          |
|    episodes         | 88       |
|    fps              | 1465     |
|    time_elapsed     | 8        |
|    total_timesteps  | 12791    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.886    |
|    n_updates        | 3172     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 164      |
|    ep_rew_mean      | -194     |
|    exploration_rate | 0.283    |
| time/               |          |
|    episodes         | 92       |
|    fps              | 1315     |
|    time_elapsed     | 11       |
|    total_timesteps  | 15098    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.33     |
|    n_updates        | 3749     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 190      |
|    ep_rew_mean      | -192     |
|    exploration_rate | 0.134    |
| time/               |          |
|    episodes         | 96       |
|    fps              | 1154     |
|    time_elapsed     | 15       |
|    total_timesteps  | 18237    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.844    |
|    n_updates        | 4534     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 201      |
|    ep_rew_mean      | -187     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 100      |
|    fps              | 1125     |
|    time_elapsed     | 17       |
|    total_timesteps  | 20148    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.02     |
|    n_updates        | 5011     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 221      |
|    ep_rew_mean      | -186     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 104      |
|    fps              | 1067     |
|    time_elapsed     | 21       |
|    total_timesteps  | 22459    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.12     |
|    n_updates        | 5589     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 241      |
|    ep_rew_mean      | -187     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 108      |
|    fps              | 1000     |
|    time_elapsed     | 24       |
|    total_timesteps  | 24792    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.878    |
|    n_updates        | 6172     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 242      |
|    ep_rew_mean      | -190     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 112      |
|    fps              | 1005     |
|    time_elapsed     | 25       |
|    total_timesteps  | 25248    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.45     |
|    n_updates        | 6286     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 244      |
|    ep_rew_mean      | -195     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 116      |
|    fps              | 1009     |
|    time_elapsed     | 25       |
|    total_timesteps  | 25833    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.976    |
|    n_updates        | 6433     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 257      |
|    ep_rew_mean      | -195     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 120      |
|    fps              | 992      |
|    time_elapsed     | 27       |
|    total_timesteps  | 27383    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.27     |
|    n_updates        | 6820     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 284      |
|    ep_rew_mean      | -190     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 124      |
|    fps              | 929      |
|    time_elapsed     | 32       |
|    total_timesteps  | 30604    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.681    |
|    n_updates        | 7625     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 303      |
|    ep_rew_mean      | -191     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 128      |
|    fps              | 903      |
|    time_elapsed     | 36       |
|    total_timesteps  | 32888    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.819    |
|    n_updates        | 8196     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 328      |
|    ep_rew_mean      | -181     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 132      |
|    fps              | 894      |
|    time_elapsed     | 40       |
|    total_timesteps  | 35830    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.651    |
|    n_updates        | 8932     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 347      |
|    ep_rew_mean      | -172     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 136      |
|    fps              | 882      |
|    time_elapsed     | 43       |
|    total_timesteps  | 38109    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.47     |
|    n_updates        | 9502     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 375      |
|    ep_rew_mean      | -160     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 140      |
|    fps              | 875      |
|    time_elapsed     | 47       |
|    total_timesteps  | 41347    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.779    |
|    n_updates        | 10311    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 411      |
|    ep_rew_mean      | -155     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 144      |
|    fps              | 831      |
|    time_elapsed     | 54       |
|    total_timesteps  | 45347    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.958    |
|    n_updates        | 11311    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 439      |
|    ep_rew_mean      | -140     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 148      |
|    fps              | 826      |
|    time_elapsed     | 59       |
|    total_timesteps  | 48801    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.706    |
|    n_updates        | 12175    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 467      |
|    ep_rew_mean      | -137     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 152      |
|    fps              | 821      |
|    time_elapsed     | 63       |
|    total_timesteps  | 52076    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.91     |
|    n_updates        | 12993    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 489      |
|    ep_rew_mean      | -132     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 156      |
|    fps              | 810      |
|    time_elapsed     | 67       |
|    total_timesteps  | 54789    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.02     |
|    n_updates        | 13672    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 520      |
|    ep_rew_mean      | -130     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 160      |
|    fps              | 801      |
|    time_elapsed     | 72       |
|    total_timesteps  | 58448    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.13     |
|    n_updates        | 14586    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 551      |
|    ep_rew_mean      | -124     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 164      |
|    fps              | 787      |
|    time_elapsed     | 79       |
|    total_timesteps  | 62448    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.982    |
|    n_updates        | 15586    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 585      |
|    ep_rew_mean      | -118     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 168      |
|    fps              | 779      |
|    time_elapsed     | 85       |
|    total_timesteps  | 66448    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.51     |
|    n_updates        | 16586    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 620      |
|    ep_rew_mean      | -116     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 172      |
|    fps              | 775      |
|    time_elapsed     | 90       |
|    total_timesteps  | 70448    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.1      |
|    n_updates        | 17586    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 652      |
|    ep_rew_mean      | -111     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 176      |
|    fps              | 771      |
|    time_elapsed     | 96       |
|    total_timesteps  | 74448    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.886    |
|    n_updates        | 18586    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 681      |
|    ep_rew_mean      | -112     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 180      |
|    fps              | 770      |
|    time_elapsed     | 101      |
|    total_timesteps  | 78108    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.47     |
|    n_updates        | 19501    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 707      |
|    ep_rew_mean      | -108     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 184      |
|    fps              | 762      |
|    time_elapsed     | 107      |
|    total_timesteps  | 82108    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.886    |
|    n_updates        | 20501    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 729      |
|    ep_rew_mean      | -104     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 188      |
|    fps              | 756      |
|    time_elapsed     | 113      |
|    total_timesteps  | 85664    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.496    |
|    n_updates        | 21390    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 744      |
|    ep_rew_mean      | -102     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 192      |
|    fps              | 754      |
|    time_elapsed     | 118      |
|    total_timesteps  | 89511    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.954    |
|    n_updates        | 22352    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 753      |
|    ep_rew_mean      | -101     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 196      |
|    fps              | 745      |
|    time_elapsed     | 125      |
|    total_timesteps  | 93511    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.839    |
|    n_updates        | 23352    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 769      |
|    ep_rew_mean      | -101     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 200      |
|    fps              | 745      |
|    time_elapsed     | 130      |
|    total_timesteps  | 97012    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.617    |
|    n_updates        | 24227    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 786      |
|    ep_rew_mean      | -99.3    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 204      |
|    fps              | 744      |
|    time_elapsed     | 135      |
|    total_timesteps  | 101012   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.511    |
|    n_updates        | 25227    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 802      |
|    ep_rew_mean      | -93      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 208      |
|    fps              | 742      |
|    time_elapsed     | 141      |
|    total_timesteps  | 105012   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.97     |
|    n_updates        | 26227    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 835      |
|    ep_rew_mean      | -82.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 212      |
|    fps              | 742      |
|    time_elapsed     | 146      |
|    total_timesteps  | 108768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.569    |
|    n_updates        | 27166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 869      |
|    ep_rew_mean      | -76.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 216      |
|    fps              | 732      |
|    time_elapsed     | 153      |
|    total_timesteps  | 112768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.799    |
|    n_updates        | 28166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 894      |
|    ep_rew_mean      | -71.7    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 220      |
|    fps              | 729      |
|    time_elapsed     | 159      |
|    total_timesteps  | 116768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.564    |
|    n_updates        | 29166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 902      |
|    ep_rew_mean      | -69.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 224      |
|    fps              | 727      |
|    time_elapsed     | 166      |
|    total_timesteps  | 120768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.646    |
|    n_updates        | 30166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 919      |
|    ep_rew_mean      | -66.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 228      |
|    fps              | 727      |
|    time_elapsed     | 171      |
|    total_timesteps  | 124768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.61     |
|    n_updates        | 31166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 929      |
|    ep_rew_mean      | -68.1    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 232      |
|    fps              | 726      |
|    time_elapsed     | 177      |
|    total_timesteps  | 128768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.543    |
|    n_updates        | 32166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 947      |
|    ep_rew_mean      | -71.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 236      |
|    fps              | 724      |
|    time_elapsed     | 183      |
|    total_timesteps  | 132768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.765    |
|    n_updates        | 33166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 954      |
|    ep_rew_mean      | -75.1    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 240      |
|    fps              | 725      |
|    time_elapsed     | 188      |
|    total_timesteps  | 136768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.574    |
|    n_updates        | 34166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 954      |
|    ep_rew_mean      | -74.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 244      |
|    fps              | 723      |
|    time_elapsed     | 194      |
|    total_timesteps  | 140768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.935    |
|    n_updates        | 35166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 960      |
|    ep_rew_mean      | -81.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 248      |
|    fps              | 722      |
|    time_elapsed     | 200      |
|    total_timesteps  | 144768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.709    |
|    n_updates        | 36166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 967      |
|    ep_rew_mean      | -81.7    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 252      |
|    fps              | 721      |
|    time_elapsed     | 206      |
|    total_timesteps  | 148768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.977    |
|    n_updates        | 37166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 980      |
|    ep_rew_mean      | -81.9    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 256      |
|    fps              | 721      |
|    time_elapsed     | 211      |
|    total_timesteps  | 152768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.512    |
|    n_updates        | 38166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 983      |
|    ep_rew_mean      | -81.3    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 260      |
|    fps              | 720      |
|    time_elapsed     | 217      |
|    total_timesteps  | 156768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.555    |
|    n_updates        | 39166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 983      |
|    ep_rew_mean      | -82.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 264      |
|    fps              | 720      |
|    time_elapsed     | 223      |
|    total_timesteps  | 160768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.568    |
|    n_updates        | 40166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 983      |
|    ep_rew_mean      | -83.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 268      |
|    fps              | 718      |
|    time_elapsed     | 229      |
|    total_timesteps  | 164768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.409    |
|    n_updates        | 41166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 983      |
|    ep_rew_mean      | -84.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 272      |
|    fps              | 715      |
|    time_elapsed     | 235      |
|    total_timesteps  | 168768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.01     |
|    n_updates        | 42166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 983      |
|    ep_rew_mean      | -85.3    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 276      |
|    fps              | 712      |
|    time_elapsed     | 242      |
|    total_timesteps  | 172768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.72     |
|    n_updates        | 43166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 987      |
|    ep_rew_mean      | -84.9    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 280      |
|    fps              | 710      |
|    time_elapsed     | 248      |
|    total_timesteps  | 176768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.343    |
|    n_updates        | 44166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 987      |
|    ep_rew_mean      | -84.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 284      |
|    fps              | 709      |
|    time_elapsed     | 254      |
|    total_timesteps  | 180768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.514    |
|    n_updates        | 45166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 991      |
|    ep_rew_mean      | -86.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 288      |
|    fps              | 710      |
|    time_elapsed     | 260      |
|    total_timesteps  | 184768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.634    |
|    n_updates        | 46166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 993      |
|    ep_rew_mean      | -85.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 292      |
|    fps              | 708      |
|    time_elapsed     | 266      |
|    total_timesteps  | 188768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.354    |
|    n_updates        | 47166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 993      |
|    ep_rew_mean      | -85.9    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 296      |
|    fps              | 708      |
|    time_elapsed     | 272      |
|    total_timesteps  | 192768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.619    |
|    n_updates        | 48166    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 998      |
|    ep_rew_mean      | -85.8    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 300      |
|    fps              | 708      |
|    time_elapsed     | 277      |
|    total_timesteps  | 196768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.508    |
|    n_updates        | 49166    |
----------------------------------


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


: 