In [None]:
# https://stable-baselines3.readthedocs.io/en/master/guide/rl.html
# https://spinningup.openai.com/en/latest/spinningup/rl_intro2.html#a-taxonomy-of-rl-algorithms

# 1. Import dependencies

In [17]:
import os
import gym 
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

# 2. Load and Test Environment

In [18]:
environment_name = "Pong-v4"

In [19]:
env = gym.make(environment_name)

In [20]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

  logger.warn(


Episode:1 Score:-21.0
Episode:2 Score:-21.0
Episode:3 Score:-20.0
Episode:4 Score:-21.0
Episode:5 Score:-20.0


# 3. Train an RL Model

In [21]:
log_path = os.path.join('Training','Logs')
training_log_path = os.path.join(log_path, 'DQN_Pong')

In [22]:
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
import os
from stable_baselines3.common.atari_wrappers import AtariWrapper

In [23]:
save_path = os.path.join('Training', 'Saved Models')
log_path = os.path.join('Training', 'Logs')

In [28]:
env = gym.make(environment_name)
env = AtariWrapper(env)

In [29]:
stop_callback = StopTrainingOnRewardThreshold(reward_threshold=20, verbose=1)
eval_callback = EvalCallback(env, 
                             callback_on_new_best=stop_callback, 
                             eval_freq=10000, 
                             best_model_save_path=save_path, 
                             verbose=1)

In [30]:
model_pong = DQN('CnnPolicy', env, verbose = 1,
            buffer_size = 100000,
            learning_rate = 0.0001, 
            batch_size = 32,
            learning_starts = 100000,
            target_update_interval = 1000,
            train_freq = 4,
            gradient_steps =  1,
            exploration_fraction = 0.1,
            exploration_final_eps = 0.01,
            optimize_memory_usage = False,
            tensorboard_log=log_path)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [32]:
model_pong.learn(total_timesteps=10000000, callback=eval_callback)

2022-08-18 23:37:10.159773: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Logging to Training/Logs/DQN_24




----------------------------------
| rollout/            |          |
|    ep_len_mean      | 305      |
|    ep_rew_mean      | -20.5    |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 506      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1221     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 300      |
|    ep_rew_mean      | -20.2    |
|    exploration_rate | 0.998    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 665      |
|    time_elapsed     | 3        |
|    total_timesteps  | 2398     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 292      |
|    ep_rew_mean      | -20.5    |
|    exploration_rate | 0.997    |
| time/               |          |
|    episodes       



Eval num_timesteps=10000, episode_reward=-13.00 +/- 16.00
Episode length: 298.20 +/- 99.97
----------------------------------
| eval/               |          |
|    mean_ep_length   | 298      |
|    mean_reward      | -13      |
| rollout/            |          |
|    exploration_rate | 0.99     |
| time/               |          |
|    total_timesteps  | 10000    |
----------------------------------
New best mean reward!
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 296      |
|    ep_rew_mean      | -20.8    |
|    exploration_rate | 0.989    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 676      |
|    time_elapsed     | 15       |
|    total_timesteps  | 10669    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 299      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.988    |
| time/     

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 300      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.967    |
| time/               |          |
|    episodes         | 112      |
|    fps              | 780      |
|    time_elapsed     | 42       |
|    total_timesteps  | 33507    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 301      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.966    |
| time/               |          |
|    episodes         | 116      |
|    fps              | 785      |
|    time_elapsed     | 44       |
|    total_timesteps  | 34689    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 301      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.965    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 302      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.943    |
| time/               |          |
|    episodes         | 192      |
|    fps              | 805      |
|    time_elapsed     | 71       |
|    total_timesteps  | 57696    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 302      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.942    |
| time/               |          |
|    episodes         | 196      |
|    fps              | 807      |
|    time_elapsed     | 72       |
|    total_timesteps  | 58861    |
----------------------------------
Eval num_timesteps=60000, episode_reward=-18.20 +/- 5.60
Episode length: 215.60 +/- 67.38
----------------------------------
| eval/               |          |
|    mean_ep_length   | 216      |
|    mean_reward      | -18.2    |


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 300      |
|    ep_rew_mean      | -21      |
|    exploration_rate | 0.92     |
| time/               |          |
|    episodes         | 268      |
|    fps              | 799      |
|    time_elapsed     | 100      |
|    total_timesteps  | 80627    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 301      |
|    ep_rew_mean      | -21      |
|    exploration_rate | 0.919    |
| time/               |          |
|    episodes         | 272      |
|    fps              | 801      |
|    time_elapsed     | 102      |
|    total_timesteps  | 81894    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 303      |
|    ep_rew_mean      | -21      |
|    exploration_rate | 0.918    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 301      |
|    ep_rew_mean      | -20.9    |
|    exploration_rate | 0.898    |
| time/               |          |
|    episodes         | 344      |
|    fps              | 794      |
|    time_elapsed     | 130      |
|    total_timesteps  | 103460   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0685   |
|    n_updates        | 864      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 300      |
|    ep_rew_mean      | -20.9    |
|    exploration_rate | 0.896    |
| time/               |          |
|    episodes         | 348      |
|    fps              | 789      |
|    time_elapsed     | 132      |
|    total_timesteps  | 104594   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0292   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 300      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.881    |
| time/               |          |
|    episodes         | 400      |
|    fps              | 724      |
|    time_elapsed     | 166      |
|    total_timesteps  | 120269   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00312  |
|    n_updates        | 5067     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 299      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.88     |
| time/               |          |
|    episodes         | 404      |
|    fps              | 721      |
|    time_elapsed     | 168      |
|    total_timesteps  | 121396   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00818  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 306      |
|    ep_rew_mean      | -20.8    |
|    exploration_rate | 0.863    |
| time/               |          |
|    episodes         | 460      |
|    fps              | 684      |
|    time_elapsed     | 202      |
|    total_timesteps  | 138708   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0118   |
|    n_updates        | 9676     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 306      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.862    |
| time/               |          |
|    episodes         | 464      |
|    fps              | 682      |
|    time_elapsed     | 204      |
|    total_timesteps  | 139898   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0089   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 303      |
|    ep_rew_mean      | -20.9    |
|    exploration_rate | 0.846    |
| time/               |          |
|    episodes         | 516      |
|    fps              | 651      |
|    time_elapsed     | 238      |
|    total_timesteps  | 155437   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00819  |
|    n_updates        | 13859    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 304      |
|    ep_rew_mean      | -20.8    |
|    exploration_rate | 0.845    |
| time/               |          |
|    episodes         | 520      |
|    fps              | 650      |
|    time_elapsed     | 240      |
|    total_timesteps  | 156647   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0123   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 306      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.829    |
| time/               |          |
|    episodes         | 572      |
|    fps              | 628      |
|    time_elapsed     | 275      |
|    total_timesteps  | 172947   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00903  |
|    n_updates        | 18236    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 306      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.828    |
| time/               |          |
|    episodes         | 576      |
|    fps              | 627      |
|    time_elapsed     | 277      |
|    total_timesteps  | 174186   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0191   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 312      |
|    ep_rew_mean      | -20.5    |
|    exploration_rate | 0.812    |
| time/               |          |
|    episodes         | 628      |
|    fps              | 608      |
|    time_elapsed     | 312      |
|    total_timesteps  | 190273   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0372   |
|    n_updates        | 22568    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 312      |
|    ep_rew_mean      | -20.5    |
|    exploration_rate | 0.81     |
| time/               |          |
|    episodes         | 632      |
|    fps              | 608      |
|    time_elapsed     | 314      |
|    total_timesteps  | 191479   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0153   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 314      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.793    |
| time/               |          |
|    episodes         | 688      |
|    fps              | 595      |
|    time_elapsed     | 350      |
|    total_timesteps  | 209002   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0156   |
|    n_updates        | 27250    |
----------------------------------
Eval num_timesteps=210000, episode_reward=-16.80 +/- 7.91
Episode length: 247.80 +/- 129.94
----------------------------------
| eval/               |          |
|    mean_ep_length   | 248      |
|    mean_reward      | -16.8    |
| rollout/            |          |
|    exploration_rate | 0.792    |
| time/               |          |
|    total_timesteps  | 210000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0466   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 323      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.775    |
| time/               |          |
|    episodes         | 744      |
|    fps              | 582      |
|    time_elapsed     | 390      |
|    total_timesteps  | 227444   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0152   |
|    n_updates        | 31860    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 324      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.774    |
| time/               |          |
|    episodes         | 748      |
|    fps              | 581      |
|    time_elapsed     | 393      |
|    total_timesteps  | 228643   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0212   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 322      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.757    |
| time/               |          |
|    episodes         | 800      |
|    fps              | 570      |
|    time_elapsed     | 429      |
|    total_timesteps  | 245260   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00556  |
|    n_updates        | 36314    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 322      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.756    |
| time/               |          |
|    episodes         | 804      |
|    fps              | 570      |
|    time_elapsed     | 432      |
|    total_timesteps  | 246587   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00632  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 322      |
|    ep_rew_mean      | -20.8    |
|    exploration_rate | 0.739    |
| time/               |          |
|    episodes         | 856      |
|    fps              | 559      |
|    time_elapsed     | 470      |
|    total_timesteps  | 263447   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0161   |
|    n_updates        | 40861    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 322      |
|    ep_rew_mean      | -20.8    |
|    exploration_rate | 0.738    |
| time/               |          |
|    episodes         | 860      |
|    fps              | 559      |
|    time_elapsed     | 472      |
|    total_timesteps  | 264709   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0188   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 331      |
|    ep_rew_mean      | -20.9    |
|    exploration_rate | 0.721    |
| time/               |          |
|    episodes         | 912      |
|    fps              | 551      |
|    time_elapsed     | 512      |
|    total_timesteps  | 282179   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00639  |
|    n_updates        | 45544    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 329      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.719    |
| time/               |          |
|    episodes         | 916      |
|    fps              | 550      |
|    time_elapsed     | 514      |
|    total_timesteps  | 283514   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0154   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 337      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.702    |
| time/               |          |
|    episodes         | 968      |
|    fps              | 541      |
|    time_elapsed     | 555      |
|    total_timesteps  | 300870   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.011    |
|    n_updates        | 50217    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 339      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.701    |
| time/               |          |
|    episodes         | 972      |
|    fps              | 541      |
|    time_elapsed     | 558      |
|    total_timesteps  | 302320   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0122   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 344      |
|    ep_rew_mean      | -20.5    |
|    exploration_rate | 0.683    |
| time/               |          |
|    episodes         | 1024     |
|    fps              | 532      |
|    time_elapsed     | 601      |
|    total_timesteps  | 320304   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00908  |
|    n_updates        | 55075    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 343      |
|    ep_rew_mean      | -20.5    |
|    exploration_rate | 0.682    |
| time/               |          |
|    episodes         | 1028     |
|    fps              | 532      |
|    time_elapsed     | 603      |
|    total_timesteps  | 321634   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0117   |
|    n_updates      

Eval num_timesteps=340000, episode_reward=-18.60 +/- 3.88
Episode length: 427.00 +/- 106.91
----------------------------------
| eval/               |          |
|    mean_ep_length   | 427      |
|    mean_reward      | -18.6    |
| rollout/            |          |
|    exploration_rate | 0.663    |
| time/               |          |
|    total_timesteps  | 340000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0147   |
|    n_updates        | 59999    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 344      |
|    ep_rew_mean      | -20.5    |
|    exploration_rate | 0.663    |
| time/               |          |
|    episodes         | 1084     |
|    fps              | 524      |
|    time_elapsed     | 649      |
|    total_timesteps  | 340763   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0191   

Eval num_timesteps=360000, episode_reward=-18.40 +/- 2.87
Episode length: 429.80 +/- 92.20
----------------------------------
| eval/               |          |
|    mean_ep_length   | 430      |
|    mean_reward      | -18.4    |
| rollout/            |          |
|    exploration_rate | 0.644    |
| time/               |          |
|    total_timesteps  | 360000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0159   |
|    n_updates        | 64999    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 351      |
|    ep_rew_mean      | -20.4    |
|    exploration_rate | 0.643    |
| time/               |          |
|    episodes         | 1140     |
|    fps              | 518      |
|    time_elapsed     | 695      |
|    total_timesteps  | 360703   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0229   |

Eval num_timesteps=380000, episode_reward=-20.00 +/- 0.63
Episode length: 529.80 +/- 27.85
----------------------------------
| eval/               |          |
|    mean_ep_length   | 530      |
|    mean_reward      | -20      |
| rollout/            |          |
|    exploration_rate | 0.624    |
| time/               |          |
|    total_timesteps  | 380000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00374  |
|    n_updates        | 69999    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 361      |
|    ep_rew_mean      | -20.3    |
|    exploration_rate | 0.623    |
| time/               |          |
|    episodes         | 1196     |
|    fps              | 511      |
|    time_elapsed     | 744      |
|    total_timesteps  | 381055   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00505  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 372      |
|    ep_rew_mean      | -20.3    |
|    exploration_rate | 0.603    |
| time/               |          |
|    episodes         | 1248     |
|    fps              | 506      |
|    time_elapsed     | 791      |
|    total_timesteps  | 400688   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0074   |
|    n_updates        | 75171    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 372      |
|    ep_rew_mean      | -20.3    |
|    exploration_rate | 0.602    |
| time/               |          |
|    episodes         | 1252     |
|    fps              | 505      |
|    time_elapsed     | 794      |
|    total_timesteps  | 402118   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.024    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 384      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.582    |
| time/               |          |
|    episodes         | 1304     |
|    fps              | 500      |
|    time_elapsed     | 843      |
|    total_timesteps  | 422321   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0114   |
|    n_updates        | 80580    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 383      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.581    |
| time/               |          |
|    episodes         | 1308     |
|    fps              | 500      |
|    time_elapsed     | 846      |
|    total_timesteps  | 423722   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00664  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 391      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.56     |
| time/               |          |
|    episodes         | 1360     |
|    fps              | 495      |
|    time_elapsed     | 895      |
|    total_timesteps  | 444004   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0117   |
|    n_updates        | 86000    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 391      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.559    |
| time/               |          |
|    episodes         | 1364     |
|    fps              | 495      |
|    time_elapsed     | 898      |
|    total_timesteps  | 445495   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0213   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 392      |
|    ep_rew_mean      | -20.3    |
|    exploration_rate | 0.539    |
| time/               |          |
|    episodes         | 1416     |
|    fps              | 491      |
|    time_elapsed     | 949      |
|    total_timesteps  | 466122   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00392  |
|    n_updates        | 91530    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 392      |
|    ep_rew_mean      | -20.2    |
|    exploration_rate | 0.537    |
| time/               |          |
|    episodes         | 1420     |
|    fps              | 491      |
|    time_elapsed     | 952      |
|    total_timesteps  | 467519   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00659  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 397      |
|    ep_rew_mean      | -19.8    |
|    exploration_rate | 0.517    |
| time/               |          |
|    episodes         | 1472     |
|    fps              | 487      |
|    time_elapsed     | 1002     |
|    total_timesteps  | 488227   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00514  |
|    n_updates        | 97056    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 397      |
|    ep_rew_mean      | -19.8    |
|    exploration_rate | 0.515    |
| time/               |          |
|    episodes         | 1476     |
|    fps              | 487      |
|    time_elapsed     | 1005     |
|    total_timesteps  | 489653   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00964  |
|    n_updates      

Eval num_timesteps=510000, episode_reward=-14.60 +/- 4.03
Episode length: 483.20 +/- 117.34
----------------------------------
| eval/               |          |
|    mean_ep_length   | 483      |
|    mean_reward      | -14.6    |
| rollout/            |          |
|    exploration_rate | 0.495    |
| time/               |          |
|    total_timesteps  | 510000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0141   |
|    n_updates        | 102499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 409      |
|    ep_rew_mean      | -19.5    |
|    exploration_rate | 0.493    |
| time/               |          |
|    episodes         | 1528     |
|    fps              | 482      |
|    time_elapsed     | 1061     |
|    total_timesteps  | 511694   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00363  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 422      |
|    ep_rew_mean      | -19.1    |
|    exploration_rate | 0.472    |
| time/               |          |
|    episodes         | 1580     |
|    fps              | 478      |
|    time_elapsed     | 1115     |
|    total_timesteps  | 533725   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00871  |
|    n_updates        | 108431   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 419      |
|    ep_rew_mean      | -19.1    |
|    exploration_rate | 0.47     |
| time/               |          |
|    episodes         | 1584     |
|    fps              | 478      |
|    time_elapsed     | 1118     |
|    total_timesteps  | 535130   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00775  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 410      |
|    ep_rew_mean      | -19.1    |
|    exploration_rate | 0.449    |
| time/               |          |
|    episodes         | 1636     |
|    fps              | 474      |
|    time_elapsed     | 1171     |
|    total_timesteps  | 556154   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0177   |
|    n_updates        | 114038   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 412      |
|    ep_rew_mean      | -19      |
|    exploration_rate | 0.448    |
| time/               |          |
|    episodes         | 1640     |
|    fps              | 474      |
|    time_elapsed     | 1174     |
|    total_timesteps  | 557798   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00933  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 408      |
|    ep_rew_mean      | -19.2    |
|    exploration_rate | 0.427    |
| time/               |          |
|    episodes         | 1692     |
|    fps              | 472      |
|    time_elapsed     | 1226     |
|    total_timesteps  | 579136   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0145   |
|    n_updates        | 119783   |
----------------------------------
Eval num_timesteps=580000, episode_reward=-16.60 +/- 2.06
Episode length: 552.20 +/- 84.22
----------------------------------
| eval/               |          |
|    mean_ep_length   | 552      |
|    mean_reward      | -16.6    |
| rollout/            |          |
|    exploration_rate | 0.426    |
| time/               |          |
|    total_timesteps  | 580000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00882  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 423      |
|    ep_rew_mean      | -19.2    |
|    exploration_rate | 0.404    |
| time/               |          |
|    episodes         | 1744     |
|    fps              | 467      |
|    time_elapsed     | 1286     |
|    total_timesteps  | 601744   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0125   |
|    n_updates        | 125435   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 422      |
|    ep_rew_mean      | -19.1    |
|    exploration_rate | 0.403    |
| time/               |          |
|    episodes         | 1748     |
|    fps              | 467      |
|    time_elapsed     | 1290     |
|    total_timesteps  | 603448   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0124   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 444      |
|    ep_rew_mean      | -19      |
|    exploration_rate | 0.379    |
| time/               |          |
|    episodes         | 1800     |
|    fps              | 464      |
|    time_elapsed     | 1348     |
|    total_timesteps  | 626861   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0231   |
|    n_updates        | 131715   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 444      |
|    ep_rew_mean      | -19      |
|    exploration_rate | 0.378    |
| time/               |          |
|    episodes         | 1804     |
|    fps              | 464      |
|    time_elapsed     | 1352     |
|    total_timesteps  | 628481   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0125   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 458      |
|    ep_rew_mean      | -18.8    |
|    exploration_rate | 0.356    |
| time/               |          |
|    episodes         | 1852     |
|    fps              | 460      |
|    time_elapsed     | 1412     |
|    total_timesteps  | 650978   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0103   |
|    n_updates        | 137744   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 458      |
|    ep_rew_mean      | -18.8    |
|    exploration_rate | 0.354    |
| time/               |          |
|    episodes         | 1856     |
|    fps              | 460      |
|    time_elapsed     | 1415     |
|    total_timesteps  | 652651   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0183   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 459      |
|    ep_rew_mean      | -18.6    |
|    exploration_rate | 0.33     |
| time/               |          |
|    episodes         | 1908     |
|    fps              | 458      |
|    time_elapsed     | 1475     |
|    total_timesteps  | 676783   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0127   |
|    n_updates        | 144195   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 461      |
|    ep_rew_mean      | -18.5    |
|    exploration_rate | 0.328    |
| time/               |          |
|    episodes         | 1912     |
|    fps              | 458      |
|    time_elapsed     | 1479     |
|    total_timesteps  | 678537   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0256   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 467      |
|    ep_rew_mean      | -18.3    |
|    exploration_rate | 0.306    |
| time/               |          |
|    episodes         | 1960     |
|    fps              | 454      |
|    time_elapsed     | 1542     |
|    total_timesteps  | 701387   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0272   |
|    n_updates        | 150346   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 471      |
|    ep_rew_mean      | -18.2    |
|    exploration_rate | 0.303    |
| time/               |          |
|    episodes         | 1964     |
|    fps              | 454      |
|    time_elapsed     | 1547     |
|    total_timesteps  | 703536   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0144   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 475      |
|    ep_rew_mean      | -18.1    |
|    exploration_rate | 0.279    |
| time/               |          |
|    episodes         | 2016     |
|    fps              | 452      |
|    time_elapsed     | 1609     |
|    total_timesteps  | 727948   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0215   |
|    n_updates        | 156986   |
----------------------------------
Eval num_timesteps=730000, episode_reward=-14.20 +/- 5.67
Episode length: 410.40 +/- 185.09
----------------------------------
| eval/               |          |
|    mean_ep_length   | 410      |
|    mean_reward      | -14.2    |
| rollout/            |          |
|    exploration_rate | 0.277    |
| time/               |          |
|    total_timesteps  | 730000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0172   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 476      |
|    ep_rew_mean      | -18.3    |
|    exploration_rate | 0.254    |
| time/               |          |
|    episodes         | 2068     |
|    fps              | 449      |
|    time_elapsed     | 1676     |
|    total_timesteps  | 753034   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0148   |
|    n_updates        | 163258   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 476      |
|    ep_rew_mean      | -18.3    |
|    exploration_rate | 0.253    |
| time/               |          |
|    episodes         | 2072     |
|    fps              | 449      |
|    time_elapsed     | 1681     |
|    total_timesteps  | 754992   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00988  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 476      |
|    ep_rew_mean      | -17.7    |
|    exploration_rate | 0.228    |
| time/               |          |
|    episodes         | 2124     |
|    fps              | 446      |
|    time_elapsed     | 1744     |
|    total_timesteps  | 779677   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00873  |
|    n_updates        | 169919   |
----------------------------------
Eval num_timesteps=780000, episode_reward=-13.00 +/- 3.63
Episode length: 529.80 +/- 147.15
----------------------------------
| eval/               |          |
|    mean_ep_length   | 530      |
|    mean_reward      | -13      |
| rollout/            |          |
|    exploration_rate | 0.228    |
| time/               |          |
|    total_timesteps  | 780000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0143   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 495      |
|    ep_rew_mean      | -17.3    |
|    exploration_rate | 0.202    |
| time/               |          |
|    episodes         | 2176     |
|    fps              | 443      |
|    time_elapsed     | 1816     |
|    total_timesteps  | 806462   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0113   |
|    n_updates        | 176615   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 498      |
|    ep_rew_mean      | -17.3    |
|    exploration_rate | 0.199    |
| time/               |          |
|    episodes         | 2180     |
|    fps              | 443      |
|    time_elapsed     | 1821     |
|    total_timesteps  | 808660   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00625  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 524      |
|    ep_rew_mean      | -17.2    |
|    exploration_rate | 0.174    |
| time/               |          |
|    episodes         | 2228     |
|    fps              | 441      |
|    time_elapsed     | 1891     |
|    total_timesteps  | 834345   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0188   |
|    n_updates        | 183586   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 520      |
|    ep_rew_mean      | -17.3    |
|    exploration_rate | 0.172    |
| time/               |          |
|    episodes         | 2232     |
|    fps              | 441      |
|    time_elapsed     | 1895     |
|    total_timesteps  | 836160   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00439  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 538      |
|    ep_rew_mean      | -17      |
|    exploration_rate | 0.146    |
| time/               |          |
|    episodes         | 2280     |
|    fps              | 438      |
|    time_elapsed     | 1967     |
|    total_timesteps  | 862500   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0199   |
|    n_updates        | 190624   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 535      |
|    ep_rew_mean      | -16.8    |
|    exploration_rate | 0.144    |
| time/               |          |
|    episodes         | 2284     |
|    fps              | 438      |
|    time_elapsed     | 1973     |
|    total_timesteps  | 864797   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00564  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 560      |
|    ep_rew_mean      | -16.2    |
|    exploration_rate | 0.117    |
| time/               |          |
|    episodes         | 2332     |
|    fps              | 435      |
|    time_elapsed     | 2048     |
|    total_timesteps  | 892132   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0191   |
|    n_updates        | 198032   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 562      |
|    ep_rew_mean      | -16.2    |
|    exploration_rate | 0.115    |
| time/               |          |
|    episodes         | 2336     |
|    fps              | 435      |
|    time_elapsed     | 2053     |
|    total_timesteps  | 894124   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00409  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 584      |
|    ep_rew_mean      | -15.8    |
|    exploration_rate | 0.086    |
| time/               |          |
|    episodes         | 2384     |
|    fps              | 432      |
|    time_elapsed     | 2133     |
|    total_timesteps  | 923221   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00609  |
|    n_updates        | 205805   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 591      |
|    ep_rew_mean      | -15.6    |
|    exploration_rate | 0.083    |
| time/               |          |
|    episodes         | 2388     |
|    fps              | 432      |
|    time_elapsed     | 2140     |
|    total_timesteps  | 926213   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00938  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 636      |
|    ep_rew_mean      | -15.2    |
|    exploration_rate | 0.0519   |
| time/               |          |
|    episodes         | 2436     |
|    fps              | 429      |
|    time_elapsed     | 2227     |
|    total_timesteps  | 957699   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0139   |
|    n_updates        | 214424   |
----------------------------------
Eval num_timesteps=960000, episode_reward=-11.40 +/- 5.31
Episode length: 633.00 +/- 186.71
----------------------------------
| eval/               |          |
|    mean_ep_length   | 633      |
|    mean_reward      | -11.4    |
| rollout/            |          |
|    exploration_rate | 0.0496   |
| time/               |          |
|    total_timesteps  | 960000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0201   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 696      |
|    ep_rew_mean      | -14.7    |
|    exploration_rate | 0.0171   |
| time/               |          |
|    episodes         | 2484     |
|    fps              | 426      |
|    time_elapsed     | 2328     |
|    total_timesteps  | 992801   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00812  |
|    n_updates        | 223200   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 690      |
|    ep_rew_mean      | -15      |
|    exploration_rate | 0.0148   |
| time/               |          |
|    episodes         | 2488     |
|    fps              | 426      |
|    time_elapsed     | 2333     |
|    total_timesteps  | 995196   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00614  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 715      |
|    ep_rew_mean      | -14.2    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2536     |
|    fps              | 423      |
|    time_elapsed     | 2429     |
|    total_timesteps  | 1029183  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00586  |
|    n_updates        | 232295   |
----------------------------------
Eval num_timesteps=1030000, episode_reward=-11.80 +/- 4.53
Episode length: 772.80 +/- 149.78
----------------------------------
| eval/               |          |
|    mean_ep_length   | 773      |
|    mean_reward      | -11.8    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1030000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0172  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 721      |
|    ep_rew_mean      | -14      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2584     |
|    fps              | 420      |
|    time_elapsed     | 2531     |
|    total_timesteps  | 1064894  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00722  |
|    n_updates        | 241223   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 722      |
|    ep_rew_mean      | -14      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2588     |
|    fps              | 420      |
|    time_elapsed     | 2537     |
|    total_timesteps  | 1067440  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0124   |
|    n_updates      

Eval num_timesteps=1100000, episode_reward=-11.40 +/- 5.12
Episode length: 569.20 +/- 278.18
----------------------------------
| eval/               |          |
|    mean_ep_length   | 569      |
|    mean_reward      | -11.4    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1100000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0153   |
|    n_updates        | 249999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 739      |
|    ep_rew_mean      | -14.1    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2636     |
|    fps              | 417      |
|    time_elapsed     | 2640     |
|    total_timesteps  | 1103049  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0135  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 750      |
|    ep_rew_mean      | -13.5    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2684     |
|    fps              | 415      |
|    time_elapsed     | 2742     |
|    total_timesteps  | 1139935  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00959  |
|    n_updates        | 259983   |
----------------------------------
Eval num_timesteps=1140000, episode_reward=-12.20 +/- 3.25
Episode length: 756.80 +/- 87.40
----------------------------------
| eval/               |          |
|    mean_ep_length   | 757      |
|    mean_reward      | -12.2    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1140000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0287   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 795      |
|    ep_rew_mean      | -12.4    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2732     |
|    fps              | 412      |
|    time_elapsed     | 2856     |
|    total_timesteps  | 1178749  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0161   |
|    n_updates        | 269687   |
----------------------------------
Eval num_timesteps=1180000, episode_reward=-6.00 +/- 7.38
Episode length: 768.20 +/- 231.99
----------------------------------
| eval/               |          |
|    mean_ep_length   | 768      |
|    mean_reward      | -6       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1180000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0306   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 807      |
|    ep_rew_mean      | -11.9    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2780     |
|    fps              | 410      |
|    time_elapsed     | 2969     |
|    total_timesteps  | 1217590  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.022    |
|    n_updates        | 279397   |
----------------------------------
Eval num_timesteps=1220000, episode_reward=-6.80 +/- 5.56
Episode length: 868.80 +/- 295.73
----------------------------------
| eval/               |          |
|    mean_ep_length   | 869      |
|    mean_reward      | -6.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1220000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0242   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 824      |
|    ep_rew_mean      | -11.7    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2828     |
|    fps              | 407      |
|    time_elapsed     | 3086     |
|    total_timesteps  | 1257930  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0289   |
|    n_updates        | 289482   |
----------------------------------
Eval num_timesteps=1260000, episode_reward=-11.00 +/- 2.76
Episode length: 790.40 +/- 99.95
----------------------------------
| eval/               |          |
|    mean_ep_length   | 790      |
|    mean_reward      | -11      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1260000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00537  

Eval num_timesteps=1300000, episode_reward=-10.40 +/- 6.18
Episode length: 633.60 +/- 331.52
----------------------------------
| eval/               |          |
|    mean_ep_length   | 634      |
|    mean_reward      | -10.4    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1300000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00699  |
|    n_updates        | 299999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 861      |
|    ep_rew_mean      | -11.5    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2876     |
|    fps              | 404      |
|    time_elapsed     | 3212     |
|    total_timesteps  | 1300720  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00937 

Eval num_timesteps=1340000, episode_reward=-6.60 +/- 6.92
Episode length: 721.20 +/- 364.17
----------------------------------
| eval/               |          |
|    mean_ep_length   | 721      |
|    mean_reward      | -6.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1340000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0106   |
|    n_updates        | 309999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 874      |
|    ep_rew_mean      | -12      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2924     |
|    fps              | 403      |
|    time_elapsed     | 3330     |
|    total_timesteps  | 1342424  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00345  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 879      |
|    ep_rew_mean      | -12.2    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2968     |
|    fps              | 401      |
|    time_elapsed     | 3443     |
|    total_timesteps  | 1381530  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0137   |
|    n_updates        | 320382   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 883      |
|    ep_rew_mean      | -12.2    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2972     |
|    fps              | 401      |
|    time_elapsed     | 3452     |
|    total_timesteps  | 1385181  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0124   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 908      |
|    ep_rew_mean      | -11.7    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3016     |
|    fps              | 399      |
|    time_elapsed     | 3569     |
|    total_timesteps  | 1425875  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00635  |
|    n_updates        | 331468   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 912      |
|    ep_rew_mean      | -11.4    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3020     |
|    fps              | 399      |
|    time_elapsed     | 3577     |
|    total_timesteps  | 1429620  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0106   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 917      |
|    ep_rew_mean      | -10.5    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3064     |
|    fps              | 397      |
|    time_elapsed     | 3694     |
|    total_timesteps  | 1469262  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00896  |
|    n_updates        | 342315   |
----------------------------------
Eval num_timesteps=1470000, episode_reward=-5.60 +/- 3.26
Episode length: 840.00 +/- 336.39
----------------------------------
| eval/               |          |
|    mean_ep_length   | 840      |
|    mean_reward      | -5.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1470000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.037    

Eval num_timesteps=1510000, episode_reward=-4.60 +/- 2.65
Episode length: 821.80 +/- 417.59
----------------------------------
| eval/               |          |
|    mean_ep_length   | 822      |
|    mean_reward      | -4.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1510000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0127   |
|    n_updates        | 352499   |
----------------------------------
New best mean reward!
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 908      |
|    ep_rew_mean      | -10.8    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3112     |
|    fps              | 395      |
|    time_elapsed     | 3826     |
|    total_timesteps  | 1512940  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 934      |
|    ep_rew_mean      | -10.6    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3156     |
|    fps              | 393      |
|    time_elapsed     | 3953     |
|    total_timesteps  | 1555617  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.011    |
|    n_updates        | 363904   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 936      |
|    ep_rew_mean      | -10.5    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3160     |
|    fps              | 393      |
|    time_elapsed     | 3962     |
|    total_timesteps  | 1559457  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0094   |
|    n_updates      

Eval num_timesteps=1600000, episode_reward=-7.80 +/- 5.11
Episode length: 825.40 +/- 397.26
----------------------------------
| eval/               |          |
|    mean_ep_length   | 825      |
|    mean_reward      | -7.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1600000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.019    |
|    n_updates        | 374999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 969      |
|    ep_rew_mean      | -9.9     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3204     |
|    fps              | 391      |
|    time_elapsed     | 4094     |
|    total_timesteps  | 1602608  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0244   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 974      |
|    ep_rew_mean      | -9.42    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3248     |
|    fps              | 389      |
|    time_elapsed     | 4219     |
|    total_timesteps  | 1645309  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0138   |
|    n_updates        | 386327   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 970      |
|    ep_rew_mean      | -9.44    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3252     |
|    fps              | 390      |
|    time_elapsed     | 4227     |
|    total_timesteps  | 1648938  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0135   |
|    n_updates      

Eval num_timesteps=1690000, episode_reward=-11.20 +/- 3.31
Episode length: 679.00 +/- 271.01
----------------------------------
| eval/               |          |
|    mean_ep_length   | 679      |
|    mean_reward      | -11.2    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1690000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0234   |
|    n_updates        | 397499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 987      |
|    ep_rew_mean      | -9.49    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3296     |
|    fps              | 388      |
|    time_elapsed     | 4362     |
|    total_timesteps  | 1693727  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00765 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 993      |
|    ep_rew_mean      | -9.23    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3340     |
|    fps              | 386      |
|    time_elapsed     | 4489     |
|    total_timesteps  | 1736932  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00876  |
|    n_updates        | 409232   |
----------------------------------
Eval num_timesteps=1740000, episode_reward=-10.60 +/- 3.61
Episode length: 784.00 +/- 252.40
----------------------------------
| eval/               |          |
|    mean_ep_length   | 784      |
|    mean_reward      | -10.6    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1740000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0168  

Eval num_timesteps=1780000, episode_reward=-8.00 +/- 3.03
Episode length: 983.20 +/- 126.40
----------------------------------
| eval/               |          |
|    mean_ep_length   | 983      |
|    mean_reward      | -8       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1780000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00416  |
|    n_updates        | 419999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 976      |
|    ep_rew_mean      | -10.2    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3388     |
|    fps              | 385      |
|    time_elapsed     | 4629     |
|    total_timesteps  | 1782748  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0493   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 958      |
|    ep_rew_mean      | -11      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3432     |
|    fps              | 383      |
|    time_elapsed     | 4754     |
|    total_timesteps  | 1825145  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0177   |
|    n_updates        | 431286   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 965      |
|    ep_rew_mean      | -10.6    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3436     |
|    fps              | 384      |
|    time_elapsed     | 4763     |
|    total_timesteps  | 1829248  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0111   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.01e+03 |
|    ep_rew_mean      | -9.41    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3476     |
|    fps              | 382      |
|    time_elapsed     | 4897     |
|    total_timesteps  | 1871840  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0249   |
|    n_updates        | 442959   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.01e+03 |
|    ep_rew_mean      | -9.33    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3480     |
|    fps              | 382      |
|    time_elapsed     | 4905     |
|    total_timesteps  | 1875405  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0125   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.02e+03 |
|    ep_rew_mean      | -9.05    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3524     |
|    fps              | 381      |
|    time_elapsed     | 5033     |
|    total_timesteps  | 1919474  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00957  |
|    n_updates        | 454868   |
----------------------------------
Eval num_timesteps=1920000, episode_reward=-6.80 +/- 3.43
Episode length: 855.00 +/- 281.83
----------------------------------
| eval/               |          |
|    mean_ep_length   | 855      |
|    mean_reward      | -6.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1920000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00539  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.02e+03 |
|    ep_rew_mean      | -9.26    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3568     |
|    fps              | 379      |
|    time_elapsed     | 5172     |
|    total_timesteps  | 1965013  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0272   |
|    n_updates        | 466253   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.02e+03 |
|    ep_rew_mean      | -9.19    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3572     |
|    fps              | 380      |
|    time_elapsed     | 5182     |
|    total_timesteps  | 1969260  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0146   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.04e+03 |
|    ep_rew_mean      | -9       |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3612     |
|    fps              | 378      |
|    time_elapsed     | 5311     |
|    total_timesteps  | 2010969  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00588  |
|    n_updates        | 477742   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.03e+03 |
|    ep_rew_mean      | -9.07    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3616     |
|    fps              | 378      |
|    time_elapsed     | 5320     |
|    total_timesteps  | 2014993  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00853  |
|    n_updates      

Eval num_timesteps=2060000, episode_reward=-6.80 +/- 6.55
Episode length: 1001.80 +/- 144.80
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1e+03    |
|    mean_reward      | -6.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2060000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0255   |
|    n_updates        | 489999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.04e+03 |
|    ep_rew_mean      | -9.47    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3660     |
|    fps              | 377      |
|    time_elapsed     | 5461     |
|    total_timesteps  | 2061103  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.014   

Eval num_timesteps=2110000, episode_reward=-8.60 +/- 3.93
Episode length: 942.80 +/- 72.28
----------------------------------
| eval/               |          |
|    mean_ep_length   | 943      |
|    mean_reward      | -8.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2110000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00852  |
|    n_updates        | 502499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.09e+03 |
|    ep_rew_mean      | -8.79    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3704     |
|    fps              | 376      |
|    time_elapsed     | 5614     |
|    total_timesteps  | 2111231  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00881  |

Eval num_timesteps=2160000, episode_reward=-8.20 +/- 7.22
Episode length: 758.80 +/- 350.84
----------------------------------
| eval/               |          |
|    mean_ep_length   | 759      |
|    mean_reward      | -8.2     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2160000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0259   |
|    n_updates        | 514999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.13e+03 |
|    ep_rew_mean      | -8.06    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3748     |
|    fps              | 375      |
|    time_elapsed     | 5761     |
|    total_timesteps  | 2161052  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00523  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.12e+03 |
|    ep_rew_mean      | -8.09    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3792     |
|    fps              | 374      |
|    time_elapsed     | 5900     |
|    total_timesteps  | 2208594  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0117   |
|    n_updates        | 527148   |
----------------------------------
Eval num_timesteps=2210000, episode_reward=-6.60 +/- 6.18
Episode length: 1049.80 +/- 216.55
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.05e+03 |
|    mean_reward      | -6.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2210000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0147  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.1e+03  |
|    ep_rew_mean      | -8.62    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3836     |
|    fps              | 372      |
|    time_elapsed     | 6052     |
|    total_timesteps  | 2257258  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00624  |
|    n_updates        | 539314   |
----------------------------------
Eval num_timesteps=2260000, episode_reward=-7.60 +/- 3.93
Episode length: 994.00 +/- 416.89
----------------------------------
| eval/               |          |
|    mean_ep_length   | 994      |
|    mean_reward      | -7.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2260000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.014    

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.13e+03 |
|    ep_rew_mean      | -7.98    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3880     |
|    fps              | 371      |
|    time_elapsed     | 6206     |
|    total_timesteps  | 2308161  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0249   |
|    n_updates        | 552040   |
----------------------------------
Eval num_timesteps=2310000, episode_reward=-7.00 +/- 9.63
Episode length: 740.80 +/- 319.83
----------------------------------
| eval/               |          |
|    mean_ep_length   | 741      |
|    mean_reward      | -7       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2310000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0169   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.15e+03 |
|    ep_rew_mean      | -8.39    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3924     |
|    fps              | 370      |
|    time_elapsed     | 6359     |
|    total_timesteps  | 2358920  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00642  |
|    n_updates        | 564729   |
----------------------------------
Eval num_timesteps=2360000, episode_reward=-7.80 +/- 5.19
Episode length: 830.20 +/- 334.58
----------------------------------
| eval/               |          |
|    mean_ep_length   | 830      |
|    mean_reward      | -7.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2360000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00328  

Eval num_timesteps=2410000, episode_reward=-2.80 +/- 6.94
Episode length: 992.00 +/- 517.13
----------------------------------
| eval/               |          |
|    mean_ep_length   | 992      |
|    mean_reward      | -2.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2410000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0176   |
|    n_updates        | 577499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.16e+03 |
|    ep_rew_mean      | -7.29    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3968     |
|    fps              | 369      |
|    time_elapsed     | 6524     |
|    total_timesteps  | 2411029  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0296   

Eval num_timesteps=2460000, episode_reward=-10.20 +/- 4.40
Episode length: 776.40 +/- 291.66
----------------------------------
| eval/               |          |
|    mean_ep_length   | 776      |
|    mean_reward      | -10.2    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2460000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00689  |
|    n_updates        | 589999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.16e+03 |
|    ep_rew_mean      | -6.69    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4012     |
|    fps              | 368      |
|    time_elapsed     | 6680     |
|    total_timesteps  | 2462651  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00857 

Eval num_timesteps=2510000, episode_reward=-6.60 +/- 2.15
Episode length: 1091.40 +/- 510.70
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.09e+03 |
|    mean_reward      | -6.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2510000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0314   |
|    n_updates        | 602499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.18e+03 |
|    ep_rew_mean      | -5.6     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4056     |
|    fps              | 367      |
|    time_elapsed     | 6839     |
|    total_timesteps  | 2514508  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0116  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.2e+03  |
|    ep_rew_mean      | -6.04    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4096     |
|    fps              | 366      |
|    time_elapsed     | 6991     |
|    total_timesteps  | 2563111  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00875  |
|    n_updates        | 615777   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.18e+03 |
|    ep_rew_mean      | -6.32    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4100     |
|    fps              | 366      |
|    time_elapsed     | 7000     |
|    total_timesteps  | 2566965  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00569  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.19e+03 |
|    ep_rew_mean      | -6.93    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4140     |
|    fps              | 365      |
|    time_elapsed     | 7149     |
|    total_timesteps  | 2614749  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0189   |
|    n_updates        | 628687   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.18e+03 |
|    ep_rew_mean      | -6.7     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4144     |
|    fps              | 365      |
|    time_elapsed     | 7160     |
|    total_timesteps  | 2619307  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0212   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.2e+03  |
|    ep_rew_mean      | -5.67    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4184     |
|    fps              | 364      |
|    time_elapsed     | 7314     |
|    total_timesteps  | 2668357  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0118   |
|    n_updates        | 642089   |
----------------------------------
Eval num_timesteps=2670000, episode_reward=-1.40 +/- 6.65
Episode length: 1059.40 +/- 238.21
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.06e+03 |
|    mean_reward      | -1.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2670000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00819 

Eval num_timesteps=2720000, episode_reward=-1.60 +/- 7.66
Episode length: 1003.40 +/- 346.74
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1e+03    |
|    mean_reward      | -1.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2720000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0237   |
|    n_updates        | 654999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.22e+03 |
|    ep_rew_mean      | -4.71    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4228     |
|    fps              | 363      |
|    time_elapsed     | 7488     |
|    total_timesteps  | 2722445  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00841 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.23e+03 |
|    ep_rew_mean      | -5.65    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4268     |
|    fps              | 362      |
|    time_elapsed     | 7644     |
|    total_timesteps  | 2772305  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00952  |
|    n_updates        | 668076   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.22e+03 |
|    ep_rew_mean      | -5.46    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4272     |
|    fps              | 362      |
|    time_elapsed     | 7655     |
|    total_timesteps  | 2777194  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00974  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.23e+03 |
|    ep_rew_mean      | -5.57    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4312     |
|    fps              | 361      |
|    time_elapsed     | 7806     |
|    total_timesteps  | 2824440  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0113   |
|    n_updates        | 681109   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | -5.29    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4316     |
|    fps              | 361      |
|    time_elapsed     | 7819     |
|    total_timesteps  | 2829884  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0112   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.2e+03  |
|    ep_rew_mean      | -5.61    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4356     |
|    fps              | 361      |
|    time_elapsed     | 7969     |
|    total_timesteps  | 2877290  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0104   |
|    n_updates        | 694322   |
----------------------------------
Eval num_timesteps=2880000, episode_reward=-6.20 +/- 2.04
Episode length: 1145.80 +/- 119.04
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.15e+03 |
|    mean_reward      | -6.2     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2880000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00362 

Eval num_timesteps=2930000, episode_reward=-5.00 +/- 1.26
Episode length: 1061.00 +/- 458.98
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.06e+03 |
|    mean_reward      | -5       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2930000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00735  |
|    n_updates        | 707499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.21e+03 |
|    ep_rew_mean      | -6.97    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4400     |
|    fps              | 359      |
|    time_elapsed     | 8145     |
|    total_timesteps  | 2932309  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0123  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.23e+03 |
|    ep_rew_mean      | -6.84    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4440     |
|    fps              | 359      |
|    time_elapsed     | 8306     |
|    total_timesteps  | 2982379  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00333  |
|    n_updates        | 720594   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.23e+03 |
|    ep_rew_mean      | -6.72    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4444     |
|    fps              | 359      |
|    time_elapsed     | 8317     |
|    total_timesteps  | 2987061  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0147   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | -4.76    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4484     |
|    fps              | 358      |
|    time_elapsed     | 8473     |
|    total_timesteps  | 3036800  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0209   |
|    n_updates        | 734199   |
----------------------------------
Eval num_timesteps=3040000, episode_reward=-4.00 +/- 2.61
Episode length: 1021.40 +/- 518.34
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | -4       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3040000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0199  

Eval num_timesteps=3090000, episode_reward=-0.80 +/- 3.19
Episode length: 1244.40 +/- 85.24
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.24e+03 |
|    mean_reward      | -0.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3090000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0183   |
|    n_updates        | 747499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | -5.57    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4528     |
|    fps              | 357      |
|    time_elapsed     | 8649     |
|    total_timesteps  | 3092089  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00465  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | -6.04    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4568     |
|    fps              | 356      |
|    time_elapsed     | 8809     |
|    total_timesteps  | 3142723  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.012    |
|    n_updates        | 760680   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | -6.05    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4572     |
|    fps              | 356      |
|    time_elapsed     | 8819     |
|    total_timesteps  | 3147158  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00412  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | -5.64    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4612     |
|    fps              | 356      |
|    time_elapsed     | 8983     |
|    total_timesteps  | 3199195  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0148   |
|    n_updates        | 774798   |
----------------------------------
Eval num_timesteps=3200000, episode_reward=-0.60 +/- 3.32
Episode length: 1247.20 +/- 385.66
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.25e+03 |
|    mean_reward      | -0.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3200000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0225  

Eval num_timesteps=3250000, episode_reward=0.60 +/- 2.42
Episode length: 1251.40 +/- 247.74
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.25e+03 |
|    mean_reward      | 0.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3250000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00736  |
|    n_updates        | 787499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | -4.66    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4656     |
|    fps              | 355      |
|    time_elapsed     | 9160     |
|    total_timesteps  | 3253937  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00402  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | -5.42    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4696     |
|    fps              | 354      |
|    time_elapsed     | 9321     |
|    total_timesteps  | 3306003  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0186   |
|    n_updates        | 801500   |
----------------------------------
Eval num_timesteps=3310000, episode_reward=-3.40 +/- 6.05
Episode length: 1278.00 +/- 156.93
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.28e+03 |
|    mean_reward      | -3.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3310000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0124  

Eval num_timesteps=3360000, episode_reward=-3.40 +/- 8.75
Episode length: 1018.80 +/- 154.63
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | -3.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3360000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0177   |
|    n_updates        | 814999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | -4.42    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4740     |
|    fps              | 353      |
|    time_elapsed     | 9505     |
|    total_timesteps  | 3361655  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0105  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | -4.5     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4780     |
|    fps              | 353      |
|    time_elapsed     | 9666     |
|    total_timesteps  | 3412478  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00614  |
|    n_updates        | 828119   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | -4.2     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4784     |
|    fps              | 353      |
|    time_elapsed     | 9678     |
|    total_timesteps  | 3417436  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0107   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | -3.6     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4824     |
|    fps              | 352      |
|    time_elapsed     | 9838     |
|    total_timesteps  | 3467687  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0249   |
|    n_updates        | 841921   |
----------------------------------
Eval num_timesteps=3470000, episode_reward=0.00 +/- 6.90
Episode length: 1026.00 +/- 404.10
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.03e+03 |
|    mean_reward      | 0        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3470000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0054   

Eval num_timesteps=3520000, episode_reward=-4.60 +/- 4.08
Episode length: 1022.40 +/- 238.18
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | -4.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3520000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00804  |
|    n_updates        | 854999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | -3.49    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4868     |
|    fps              | 351      |
|    time_elapsed     | 10018    |
|    total_timesteps  | 3524328  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0127  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | -3.54    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4908     |
|    fps              | 351      |
|    time_elapsed     | 10180    |
|    total_timesteps  | 3575868  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0163   |
|    n_updates        | 868966   |
----------------------------------
Eval num_timesteps=3580000, episode_reward=-3.00 +/- 3.90
Episode length: 1013.80 +/- 501.71
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.01e+03 |
|    mean_reward      | -3       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3580000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00972 

Eval num_timesteps=3630000, episode_reward=4.80 +/- 4.17
Episode length: 1199.20 +/- 424.83
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.2e+03  |
|    mean_reward      | 4.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3630000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0143   |
|    n_updates        | 882499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | -2.86    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4952     |
|    fps              | 350      |
|    time_elapsed     | 10359    |
|    total_timesteps  | 3631005  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0127   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | -2.12    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4992     |
|    fps              | 350      |
|    time_elapsed     | 10526    |
|    total_timesteps  | 3685130  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0116   |
|    n_updates        | 896282   |
----------------------------------
Eval num_timesteps=3690000, episode_reward=0.20 +/- 7.83
Episode length: 1073.20 +/- 509.10
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.07e+03 |
|    mean_reward      | 0.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3690000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00555  

Eval num_timesteps=3740000, episode_reward=2.20 +/- 5.53
Episode length: 1276.00 +/- 156.16
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.28e+03 |
|    mean_reward      | 2.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3740000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00896  |
|    n_updates        | 909999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | -1.52    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5036     |
|    fps              | 349      |
|    time_elapsed     | 10712    |
|    total_timesteps  | 3742633  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00971  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | -2.01    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5076     |
|    fps              | 348      |
|    time_elapsed     | 10873    |
|    total_timesteps  | 3793925  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00484  |
|    n_updates        | 923481   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | -1.96    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5080     |
|    fps              | 348      |
|    time_elapsed     | 10884    |
|    total_timesteps  | 3798650  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00613  |
|    n_updates      

Eval num_timesteps=3850000, episode_reward=5.60 +/- 4.84
Episode length: 991.20 +/- 384.50
----------------------------------
| eval/               |          |
|    mean_ep_length   | 991      |
|    mean_reward      | 5.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3850000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0138   |
|    n_updates        | 937499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | -1.45    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5120     |
|    fps              | 348      |
|    time_elapsed     | 11060    |
|    total_timesteps  | 3852547  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0121   |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | -0.75    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5160     |
|    fps              | 347      |
|    time_elapsed     | 11228    |
|    total_timesteps  | 3906566  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0111   |
|    n_updates        | 951641   |
----------------------------------
Eval num_timesteps=3910000, episode_reward=0.60 +/- 7.42
Episode length: 1243.40 +/- 361.53
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.24e+03 |
|    mean_reward      | 0.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3910000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0127   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | -0.33    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5200     |
|    fps              | 347      |
|    time_elapsed     | 11407    |
|    total_timesteps  | 3961455  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00355  |
|    n_updates        | 965363   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | 0.03     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5204     |
|    fps              | 347      |
|    time_elapsed     | 11419    |
|    total_timesteps  | 3966608  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0106   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | -0.18    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5244     |
|    fps              | 346      |
|    time_elapsed     | 11584    |
|    total_timesteps  | 4019686  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00685  |
|    n_updates        | 979921   |
----------------------------------
Eval num_timesteps=4020000, episode_reward=1.60 +/- 3.32
Episode length: 1208.00 +/- 82.99
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.21e+03 |
|    mean_reward      | 1.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4020000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0062   |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | -1.14    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5284     |
|    fps              | 346      |
|    time_elapsed     | 11761    |
|    total_timesteps  | 4073793  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00247  |
|    n_updates        | 993448   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | -1.1     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5288     |
|    fps              | 346      |
|    time_elapsed     | 11772    |
|    total_timesteps  | 4078322  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0211   |
|    n_updates      

Eval num_timesteps=4130000, episode_reward=5.00 +/- 6.87
Episode length: 984.00 +/- 325.52
----------------------------------
| eval/               |          |
|    mean_ep_length   | 984      |
|    mean_reward      | 5        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4130000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0142   |
|    n_updates        | 1007499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | -0.79    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5328     |
|    fps              | 345      |
|    time_elapsed     | 11943    |
|    total_timesteps  | 4131407  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00666  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | -0.76    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5368     |
|    fps              | 345      |
|    time_elapsed     | 12109    |
|    total_timesteps  | 4185360  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.014    |
|    n_updates        | 1021339  |
----------------------------------
Eval num_timesteps=4190000, episode_reward=-1.60 +/- 5.46
Episode length: 1043.80 +/- 423.04
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.04e+03 |
|    mean_reward      | -1.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4190000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00301 

Eval num_timesteps=4240000, episode_reward=-2.60 +/- 4.88
Episode length: 1133.80 +/- 410.57
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.13e+03 |
|    mean_reward      | -2.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4240000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0104   |
|    n_updates        | 1034999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | -1.9     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5412     |
|    fps              | 345      |
|    time_elapsed     | 12301    |
|    total_timesteps  | 4245185  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0108  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | -2.24    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5452     |
|    fps              | 344      |
|    time_elapsed     | 12465    |
|    total_timesteps  | 4296097  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0275   |
|    n_updates        | 1049024  |
----------------------------------
Eval num_timesteps=4300000, episode_reward=0.20 +/- 9.17
Episode length: 1103.40 +/- 250.44
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.1e+03  |
|    mean_reward      | 0.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4300000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0137   

Eval num_timesteps=4350000, episode_reward=-2.00 +/- 6.45
Episode length: 999.00 +/- 489.32
----------------------------------
| eval/               |          |
|    mean_ep_length   | 999      |
|    mean_reward      | -2       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4350000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00248  |
|    n_updates        | 1062499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | -2.09    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5496     |
|    fps              | 344      |
|    time_elapsed     | 12654    |
|    total_timesteps  | 4355491  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00583  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | -1.74    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5536     |
|    fps              | 343      |
|    time_elapsed     | 12816    |
|    total_timesteps  | 4406138  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0113   |
|    n_updates        | 1076534  |
----------------------------------
Eval num_timesteps=4410000, episode_reward=3.40 +/- 6.71
Episode length: 1034.80 +/- 438.50
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.03e+03 |
|    mean_reward      | 3.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4410000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00831  

Eval num_timesteps=4460000, episode_reward=4.20 +/- 8.80
Episode length: 1022.00 +/- 151.67
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | 4.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4460000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00781  |
|    n_updates        | 1089999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | -0.6     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5580     |
|    fps              | 343      |
|    time_elapsed     | 12998    |
|    total_timesteps  | 4463524  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.024    

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | -0.21    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5620     |
|    fps              | 343      |
|    time_elapsed     | 13165    |
|    total_timesteps  | 4516315  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00243  |
|    n_updates        | 1104078  |
----------------------------------
Eval num_timesteps=4520000, episode_reward=0.60 +/- 3.61
Episode length: 1285.40 +/- 319.12
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.29e+03 |
|    mean_reward      | 0.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4520000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00464  

Eval num_timesteps=4570000, episode_reward=-1.60 +/- 6.02
Episode length: 1220.40 +/- 300.77
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.22e+03 |
|    mean_reward      | -1.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4570000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00271  |
|    n_updates        | 1117499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 0.26     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5664     |
|    fps              | 342      |
|    time_elapsed     | 13354    |
|    total_timesteps  | 4575031  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00887 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | -0.19    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5704     |
|    fps              | 342      |
|    time_elapsed     | 13518    |
|    total_timesteps  | 4627838  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0111   |
|    n_updates        | 1131959  |
----------------------------------
Eval num_timesteps=4630000, episode_reward=-1.20 +/- 4.40
Episode length: 1131.40 +/- 435.93
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.13e+03 |
|    mean_reward      | -1.2     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4630000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00369 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | 0.03     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5744     |
|    fps              | 341      |
|    time_elapsed     | 13703    |
|    total_timesteps  | 4685528  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0108   |
|    n_updates        | 1146381  |
----------------------------------
Eval num_timesteps=4690000, episode_reward=1.60 +/- 3.50
Episode length: 1293.20 +/- 263.61
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.29e+03 |
|    mean_reward      | 1.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4690000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00423  

Eval num_timesteps=4740000, episode_reward=4.00 +/- 2.76
Episode length: 1014.20 +/- 455.83
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.01e+03 |
|    mean_reward      | 4        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4740000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00773  |
|    n_updates        | 1159999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.39e+03 |
|    ep_rew_mean      | -0.46    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5788     |
|    fps              | 341      |
|    time_elapsed     | 13893    |
|    total_timesteps  | 4745537  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0103   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.37e+03 |
|    ep_rew_mean      | -0.75    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5828     |
|    fps              | 341      |
|    time_elapsed     | 14063    |
|    total_timesteps  | 4798998  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00452  |
|    n_updates        | 1174749  |
----------------------------------
Eval num_timesteps=4800000, episode_reward=-1.40 +/- 7.58
Episode length: 1079.60 +/- 371.56
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.08e+03 |
|    mean_reward      | -1.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4800000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00988 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | -0.22    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5868     |
|    fps              | 340      |
|    time_elapsed     | 14237    |
|    total_timesteps  | 4853722  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0271   |
|    n_updates        | 1188430  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | -0.3     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5872     |
|    fps              | 340      |
|    time_elapsed     | 14248    |
|    total_timesteps  | 4858575  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0168   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | -0.11    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5912     |
|    fps              | 340      |
|    time_elapsed     | 14409    |
|    total_timesteps  | 4908872  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0182   |
|    n_updates        | 1202217  |
----------------------------------
Eval num_timesteps=4910000, episode_reward=2.00 +/- 4.47
Episode length: 1077.40 +/- 430.74
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.08e+03 |
|    mean_reward      | 2        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4910000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00549  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | -0.31    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5952     |
|    fps              | 340      |
|    time_elapsed     | 14584    |
|    total_timesteps  | 4962392  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0288   |
|    n_updates        | 1215597  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | -0.22    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5956     |
|    fps              | 340      |
|    time_elapsed     | 14596    |
|    total_timesteps  | 4967599  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0136   |
|    n_updates      

Eval num_timesteps=5020000, episode_reward=0.00 +/- 5.10
Episode length: 997.00 +/- 426.29
----------------------------------
| eval/               |          |
|    mean_ep_length   | 997      |
|    mean_reward      | 0        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5020000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0107   |
|    n_updates        | 1229999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | -1.08    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5996     |
|    fps              | 339      |
|    time_elapsed     | 14768    |
|    total_timesteps  | 5021172  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0303   |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | -1.11    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6036     |
|    fps              | 339      |
|    time_elapsed     | 14937    |
|    total_timesteps  | 5075350  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00669  |
|    n_updates        | 1243837  |
----------------------------------
Eval num_timesteps=5080000, episode_reward=1.40 +/- 5.82
Episode length: 1331.00 +/- 137.21
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.33e+03 |
|    mean_reward      | 1.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5080000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00579  

Eval num_timesteps=5130000, episode_reward=3.40 +/- 6.50
Episode length: 941.40 +/- 270.79
----------------------------------
| eval/               |          |
|    mean_ep_length   | 941      |
|    mean_reward      | 3.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5130000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00656  |
|    n_updates        | 1257499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | -0.16    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6080     |
|    fps              | 339      |
|    time_elapsed     | 15122    |
|    total_timesteps  | 5132710  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00877  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 0.13     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6120     |
|    fps              | 338      |
|    time_elapsed     | 15317    |
|    total_timesteps  | 5183659  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0104   |
|    n_updates        | 1270914  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 0.08     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6124     |
|    fps              | 338      |
|    time_elapsed     | 15330    |
|    total_timesteps  | 5189071  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00907  |
|    n_updates      

Eval num_timesteps=5240000, episode_reward=8.00 +/- 7.40
Episode length: 947.40 +/- 257.19
----------------------------------
| eval/               |          |
|    mean_ep_length   | 947      |
|    mean_reward      | 8        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5240000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.018    |
|    n_updates        | 1284999  |
----------------------------------
New best mean reward!
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 0.48     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6164     |
|    fps              | 338      |
|    time_elapsed     | 15498    |
|    total_timesteps  | 5241146  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 0.68     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6204     |
|    fps              | 338      |
|    time_elapsed     | 15660    |
|    total_timesteps  | 5294043  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.033    |
|    n_updates        | 1298510  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 1.11     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6208     |
|    fps              | 338      |
|    time_elapsed     | 15671    |
|    total_timesteps  | 5298548  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0179   |
|    n_updates      

Eval num_timesteps=5350000, episode_reward=10.40 +/- 2.33
Episode length: 967.40 +/- 230.18
----------------------------------
| eval/               |          |
|    mean_ep_length   | 967      |
|    mean_reward      | 10.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5350000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0111   |
|    n_updates        | 1312499  |
----------------------------------
New best mean reward!
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 0.51     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6248     |
|    fps              | 337      |
|    time_elapsed     | 15841    |
|    total_timesteps  | 5350903  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 0.09     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6288     |
|    fps              | 337      |
|    time_elapsed     | 16011    |
|    total_timesteps  | 5405971  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00871  |
|    n_updates        | 1326492  |
----------------------------------
Eval num_timesteps=5410000, episode_reward=4.60 +/- 2.24
Episode length: 1249.80 +/- 111.22
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.25e+03 |
|    mean_reward      | 4.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5410000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0286   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | 0.8      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6328     |
|    fps              | 337      |
|    time_elapsed     | 16187    |
|    total_timesteps  | 5461174  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0116   |
|    n_updates        | 1340293  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.37e+03 |
|    ep_rew_mean      | 1        |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6332     |
|    fps              | 337      |
|    time_elapsed     | 16200    |
|    total_timesteps  | 5466837  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00496  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 1.1      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6372     |
|    fps              | 337      |
|    time_elapsed     | 16364    |
|    total_timesteps  | 5519188  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00945  |
|    n_updates        | 1354796  |
----------------------------------
Eval num_timesteps=5520000, episode_reward=-2.40 +/- 6.65
Episode length: 1073.40 +/- 431.61
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.07e+03 |
|    mean_reward      | -2.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5520000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0152  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | 1.71     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6412     |
|    fps              | 337      |
|    time_elapsed     | 16535    |
|    total_timesteps  | 5572578  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0108   |
|    n_updates        | 1368144  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 1.64     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6416     |
|    fps              | 337      |
|    time_elapsed     | 16546    |
|    total_timesteps  | 5577192  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0164   |
|    n_updates      

Eval num_timesteps=5630000, episode_reward=3.00 +/- 5.66
Episode length: 984.80 +/- 479.11
----------------------------------
| eval/               |          |
|    mean_ep_length   | 985      |
|    mean_reward      | 3        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5630000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0161   |
|    n_updates        | 1382499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 1.52     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6456     |
|    fps              | 336      |
|    time_elapsed     | 16720    |
|    total_timesteps  | 5631120  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00729  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 0.66     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6496     |
|    fps              | 336      |
|    time_elapsed     | 16883    |
|    total_timesteps  | 5681506  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00927  |
|    n_updates        | 1395376  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 0.51     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6500     |
|    fps              | 336      |
|    time_elapsed     | 16895    |
|    total_timesteps  | 5686687  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0134   |
|    n_updates      

Eval num_timesteps=5740000, episode_reward=2.60 +/- 3.61
Episode length: 1221.60 +/- 262.69
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.22e+03 |
|    mean_reward      | 2.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5740000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00318  |
|    n_updates        | 1409999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 0.43     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6540     |
|    fps              | 336      |
|    time_elapsed     | 17072    |
|    total_timesteps  | 5741289  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00297  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | -0.08    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6580     |
|    fps              | 336      |
|    time_elapsed     | 17235    |
|    total_timesteps  | 5793771  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0221   |
|    n_updates        | 1423442  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 0.21     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6584     |
|    fps              | 336      |
|    time_elapsed     | 17247    |
|    total_timesteps  | 5798825  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00892  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 1.48     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6624     |
|    fps              | 336      |
|    time_elapsed     | 17406    |
|    total_timesteps  | 5849628  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0208   |
|    n_updates        | 1437406  |
----------------------------------
Eval num_timesteps=5850000, episode_reward=5.20 +/- 4.45
Episode length: 1228.20 +/- 232.43
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.23e+03 |
|    mean_reward      | 5.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5850000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0211   

Eval num_timesteps=5900000, episode_reward=5.80 +/- 6.94
Episode length: 1260.00 +/- 209.27
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.26e+03 |
|    mean_reward      | 5.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5900000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00668  |
|    n_updates        | 1449999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 1.81     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6668     |
|    fps              | 335      |
|    time_elapsed     | 17587    |
|    total_timesteps  | 5904921  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00689  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 2.39     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6708     |
|    fps              | 335      |
|    time_elapsed     | 17753    |
|    total_timesteps  | 5957945  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0113   |
|    n_updates        | 1464486  |
----------------------------------
Eval num_timesteps=5960000, episode_reward=1.80 +/- 5.49
Episode length: 1108.80 +/- 433.33
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.11e+03 |
|    mean_reward      | 1.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5960000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.015    

Eval num_timesteps=6010000, episode_reward=0.00 +/- 8.02
Episode length: 950.00 +/- 330.48
----------------------------------
| eval/               |          |
|    mean_ep_length   | 950      |
|    mean_reward      | 0        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6010000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0067   |
|    n_updates        | 1477499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 1.68     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6752     |
|    fps              | 335      |
|    time_elapsed     | 17934    |
|    total_timesteps  | 6014964  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0182   |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 1.77     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6792     |
|    fps              | 335      |
|    time_elapsed     | 18098    |
|    total_timesteps  | 6067723  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.012    |
|    n_updates        | 1491930  |
----------------------------------
Eval num_timesteps=6070000, episode_reward=0.60 +/- 2.42
Episode length: 1403.80 +/- 115.51
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.4e+03  |
|    mean_reward      | 0.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6070000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00391  

Eval num_timesteps=6120000, episode_reward=7.60 +/- 5.24
Episode length: 944.40 +/- 477.08
----------------------------------
| eval/               |          |
|    mean_ep_length   | 944      |
|    mean_reward      | 7.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6120000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00468  |
|    n_updates        | 1504999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 3.74     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6836     |
|    fps              | 334      |
|    time_elapsed     | 18279    |
|    total_timesteps  | 6123718  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00347  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 4.21     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6876     |
|    fps              | 334      |
|    time_elapsed     | 18435    |
|    total_timesteps  | 6174596  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00799  |
|    n_updates        | 1518648  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 4.04     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6880     |
|    fps              | 334      |
|    time_elapsed     | 18447    |
|    total_timesteps  | 6179516  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0148   |
|    n_updates      

Eval num_timesteps=6230000, episode_reward=7.20 +/- 5.46
Episode length: 909.00 +/- 372.93
----------------------------------
| eval/               |          |
|    mean_ep_length   | 909      |
|    mean_reward      | 7.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6230000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0123   |
|    n_updates        | 1532499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 3.86     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6920     |
|    fps              | 334      |
|    time_elapsed     | 18616    |
|    total_timesteps  | 6231043  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00866  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 4.51     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6960     |
|    fps              | 334      |
|    time_elapsed     | 18775    |
|    total_timesteps  | 6282502  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0103   |
|    n_updates        | 1545625  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 4.3      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6964     |
|    fps              | 334      |
|    time_elapsed     | 18787    |
|    total_timesteps  | 6287643  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00981  |
|    n_updates      

Eval num_timesteps=6340000, episode_reward=-2.80 +/- 5.34
Episode length: 1191.40 +/- 360.83
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.19e+03 |
|    mean_reward      | -2.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6340000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0338   |
|    n_updates        | 1559999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 3.84     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7004     |
|    fps              | 334      |
|    time_elapsed     | 18962    |
|    total_timesteps  | 6341228  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0129  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 3.76     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7044     |
|    fps              | 334      |
|    time_elapsed     | 19131    |
|    total_timesteps  | 6396347  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.006    |
|    n_updates        | 1574086  |
----------------------------------
Eval num_timesteps=6400000, episode_reward=6.40 +/- 3.83
Episode length: 990.60 +/- 471.36
----------------------------------
| eval/               |          |
|    mean_ep_length   | 991      |
|    mean_reward      | 6.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6400000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00834  |

Eval num_timesteps=6450000, episode_reward=4.20 +/- 4.45
Episode length: 1038.00 +/- 392.84
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.04e+03 |
|    mean_reward      | 4.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6450000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0105   |
|    n_updates        | 1587499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 4.03     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7088     |
|    fps              | 334      |
|    time_elapsed     | 19313    |
|    total_timesteps  | 6452289  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00673  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 4.12     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7128     |
|    fps              | 333      |
|    time_elapsed     | 19470    |
|    total_timesteps  | 6502649  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00403  |
|    n_updates        | 1600662  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 4.27     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7132     |
|    fps              | 334      |
|    time_elapsed     | 19483    |
|    total_timesteps  | 6508308  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00333  |
|    n_updates      

Eval num_timesteps=6560000, episode_reward=2.80 +/- 5.27
Episode length: 1243.40 +/- 306.04
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.24e+03 |
|    mean_reward      | 2.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6560000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00746  |
|    n_updates        | 1614999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 4.19     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7172     |
|    fps              | 333      |
|    time_elapsed     | 19660    |
|    total_timesteps  | 6561180  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00981  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 3.51     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7212     |
|    fps              | 333      |
|    time_elapsed     | 19823    |
|    total_timesteps  | 6613688  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00638  |
|    n_updates        | 1628421  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 3.44     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7216     |
|    fps              | 333      |
|    time_elapsed     | 19837    |
|    total_timesteps  | 6619490  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0113   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 3.1      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7256     |
|    fps              | 333      |
|    time_elapsed     | 19998    |
|    total_timesteps  | 6669991  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00506  |
|    n_updates        | 1642497  |
----------------------------------
Eval num_timesteps=6670000, episode_reward=2.20 +/- 8.52
Episode length: 1170.80 +/- 123.29
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.17e+03 |
|    mean_reward      | 2.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6670000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0129   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 3.04     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7296     |
|    fps              | 333      |
|    time_elapsed     | 20172    |
|    total_timesteps  | 6722161  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0114   |
|    n_updates        | 1655540  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 3.14     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7300     |
|    fps              | 333      |
|    time_elapsed     | 20185    |
|    total_timesteps  | 6727489  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00486  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 3.57     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7340     |
|    fps              | 333      |
|    time_elapsed     | 20343    |
|    total_timesteps  | 6776099  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00394  |
|    n_updates        | 1669024  |
----------------------------------
Eval num_timesteps=6780000, episode_reward=-4.40 +/- 5.95
Episode length: 1232.40 +/- 213.48
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.23e+03 |
|    mean_reward      | -4.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6780000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0135  

Eval num_timesteps=6830000, episode_reward=9.40 +/- 6.89
Episode length: 973.40 +/- 313.99
----------------------------------
| eval/               |          |
|    mean_ep_length   | 973      |
|    mean_reward      | 9.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6830000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00641  |
|    n_updates        | 1682499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 2.31     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7384     |
|    fps              | 332      |
|    time_elapsed     | 20526    |
|    total_timesteps  | 6832511  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0121   |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 2.77     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7424     |
|    fps              | 332      |
|    time_elapsed     | 20682    |
|    total_timesteps  | 6882686  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0117   |
|    n_updates        | 1695671  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 2.79     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7428     |
|    fps              | 332      |
|    time_elapsed     | 20692    |
|    total_timesteps  | 6886835  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00852  |
|    n_updates      

Eval num_timesteps=6940000, episode_reward=5.00 +/- 3.79
Episode length: 1284.40 +/- 111.40
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.28e+03 |
|    mean_reward      | 5        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6940000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00396  |
|    n_updates        | 1709999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 3.25     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7468     |
|    fps              | 332      |
|    time_elapsed     | 20868    |
|    total_timesteps  | 6941430  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00423  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 3.63     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7508     |
|    fps              | 332      |
|    time_elapsed     | 21030    |
|    total_timesteps  | 6992302  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00384  |
|    n_updates        | 1723075  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 3.27     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7512     |
|    fps              | 332      |
|    time_elapsed     | 21042    |
|    total_timesteps  | 6997370  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00992  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 5.06     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7552     |
|    fps              | 332      |
|    time_elapsed     | 21200    |
|    total_timesteps  | 7048781  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00346  |
|    n_updates        | 1737195  |
----------------------------------
Eval num_timesteps=7050000, episode_reward=5.80 +/- 3.76
Episode length: 1069.00 +/- 483.51
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.07e+03 |
|    mean_reward      | 5.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7050000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.018    

Eval num_timesteps=7100000, episode_reward=4.80 +/- 4.96
Episode length: 953.80 +/- 432.03
----------------------------------
| eval/               |          |
|    mean_ep_length   | 954      |
|    mean_reward      | 4.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7100000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0168   |
|    n_updates        | 1749999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 5.84     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7596     |
|    fps              | 332      |
|    time_elapsed     | 21377    |
|    total_timesteps  | 7104148  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00189  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 6.34     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7636     |
|    fps              | 332      |
|    time_elapsed     | 21534    |
|    total_timesteps  | 7154779  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0139   |
|    n_updates        | 1763694  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 6.25     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7640     |
|    fps              | 332      |
|    time_elapsed     | 21546    |
|    total_timesteps  | 7159961  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00529  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 6.33     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7680     |
|    fps              | 332      |
|    time_elapsed     | 21701    |
|    total_timesteps  | 7208856  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0173   |
|    n_updates        | 1777213  |
----------------------------------
Eval num_timesteps=7210000, episode_reward=0.20 +/- 6.91
Episode length: 991.00 +/- 469.88
----------------------------------
| eval/               |          |
|    mean_ep_length   | 991      |
|    mean_reward      | 0.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7210000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00716  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 6.11     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7720     |
|    fps              | 332      |
|    time_elapsed     | 21868    |
|    total_timesteps  | 7261208  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.013    |
|    n_updates        | 1790301  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 5.97     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7724     |
|    fps              | 332      |
|    time_elapsed     | 21879    |
|    total_timesteps  | 7265804  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.019    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 6.29     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7764     |
|    fps              | 332      |
|    time_elapsed     | 22037    |
|    total_timesteps  | 7318107  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00618  |
|    n_updates        | 1804526  |
----------------------------------
Eval num_timesteps=7320000, episode_reward=1.40 +/- 7.74
Episode length: 1127.00 +/- 208.87
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.13e+03 |
|    mean_reward      | 1.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7320000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00616  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 4.93     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7804     |
|    fps              | 331      |
|    time_elapsed     | 22208    |
|    total_timesteps  | 7371297  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00982  |
|    n_updates        | 1817824  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 4.91     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7808     |
|    fps              | 331      |
|    time_elapsed     | 22219    |
|    total_timesteps  | 7376027  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0166   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 5.22     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7848     |
|    fps              | 331      |
|    time_elapsed     | 22370    |
|    total_timesteps  | 7423719  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.021    |
|    n_updates        | 1830929  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 5.03     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7852     |
|    fps              | 331      |
|    time_elapsed     | 22382    |
|    total_timesteps  | 7428661  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00541  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | 5.61     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7892     |
|    fps              | 331      |
|    time_elapsed     | 22533    |
|    total_timesteps  | 7477399  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.049    |
|    n_updates        | 1844349  |
----------------------------------
Eval num_timesteps=7480000, episode_reward=1.80 +/- 7.22
Episode length: 1103.20 +/- 214.12
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.1e+03  |
|    mean_reward      | 1.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7480000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0201   

Eval num_timesteps=7530000, episode_reward=5.80 +/- 2.99
Episode length: 1083.20 +/- 354.89
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.08e+03 |
|    mean_reward      | 5.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7530000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0105   |
|    n_updates        | 1857499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | 6.34     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7936     |
|    fps              | 331      |
|    time_elapsed     | 22711    |
|    total_timesteps  | 7533632  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0134   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 5.53     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7976     |
|    fps              | 331      |
|    time_elapsed     | 22868    |
|    total_timesteps  | 7583538  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0037   |
|    n_updates        | 1870884  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 5.21     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7980     |
|    fps              | 331      |
|    time_elapsed     | 22880    |
|    total_timesteps  | 7588560  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00708  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 5.53     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8020     |
|    fps              | 331      |
|    time_elapsed     | 23037    |
|    total_timesteps  | 7637546  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00252  |
|    n_updates        | 1884386  |
----------------------------------
Eval num_timesteps=7640000, episode_reward=5.00 +/- 7.46
Episode length: 1168.60 +/- 179.87
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.17e+03 |
|    mean_reward      | 5        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7640000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0137   

Eval num_timesteps=7690000, episode_reward=4.80 +/- 7.30
Episode length: 1034.80 +/- 241.74
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.03e+03 |
|    mean_reward      | 4.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7690000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00429  |
|    n_updates        | 1897499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 5.65     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8064     |
|    fps              | 331      |
|    time_elapsed     | 23217    |
|    total_timesteps  | 7694974  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00567  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 5.54     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8104     |
|    fps              | 331      |
|    time_elapsed     | 23381    |
|    total_timesteps  | 7748270  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0155   |
|    n_updates        | 1912067  |
----------------------------------
Eval num_timesteps=7750000, episode_reward=3.40 +/- 7.86
Episode length: 973.00 +/- 268.65
----------------------------------
| eval/               |          |
|    mean_ep_length   | 973      |
|    mean_reward      | 3.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7750000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0316   |

Eval num_timesteps=7800000, episode_reward=8.20 +/- 3.92
Episode length: 1058.80 +/- 298.68
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.06e+03 |
|    mean_reward      | 8.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7800000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00575  |
|    n_updates        | 1924999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 5.79     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8148     |
|    fps              | 331      |
|    time_elapsed     | 23556    |
|    total_timesteps  | 7803965  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0163   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 5.59     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8188     |
|    fps              | 331      |
|    time_elapsed     | 23717    |
|    total_timesteps  | 7856048  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0101   |
|    n_updates        | 1939011  |
----------------------------------
Eval num_timesteps=7860000, episode_reward=9.40 +/- 3.88
Episode length: 1180.00 +/- 158.72
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.18e+03 |
|    mean_reward      | 9.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7860000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0104   

Eval num_timesteps=7910000, episode_reward=2.40 +/- 6.56
Episode length: 1171.80 +/- 181.42
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.17e+03 |
|    mean_reward      | 2.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7910000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00821  |
|    n_updates        | 1952499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 4.67     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8232     |
|    fps              | 331      |
|    time_elapsed     | 23893    |
|    total_timesteps  | 7911188  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0185   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 4.99     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8272     |
|    fps              | 331      |
|    time_elapsed     | 24059    |
|    total_timesteps  | 7964629  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00963  |
|    n_updates        | 1966157  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 5.11     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8276     |
|    fps              | 331      |
|    time_elapsed     | 24071    |
|    total_timesteps  | 7969545  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.018    |
|    n_updates      

Eval num_timesteps=8020000, episode_reward=2.40 +/- 6.50
Episode length: 1227.20 +/- 191.05
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.23e+03 |
|    mean_reward      | 2.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8020000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0368   |
|    n_updates        | 1979999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 4.42     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8316     |
|    fps              | 330      |
|    time_elapsed     | 24240    |
|    total_timesteps  | 8021228  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00729  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 5.35     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8356     |
|    fps              | 330      |
|    time_elapsed     | 24403    |
|    total_timesteps  | 8073099  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0242   |
|    n_updates        | 1993274  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 5.44     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8360     |
|    fps              | 330      |
|    time_elapsed     | 24415    |
|    total_timesteps  | 8078350  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00405  |
|    n_updates      

Eval num_timesteps=8130000, episode_reward=5.40 +/- 4.36
Episode length: 1078.00 +/- 233.79
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.08e+03 |
|    mean_reward      | 5.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8130000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0277   |
|    n_updates        | 2007499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 5.26     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8400     |
|    fps              | 330      |
|    time_elapsed     | 24588    |
|    total_timesteps  | 8131364  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00546  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 6.1      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8440     |
|    fps              | 330      |
|    time_elapsed     | 24755    |
|    total_timesteps  | 8185319  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00462  |
|    n_updates        | 2021329  |
----------------------------------
Eval num_timesteps=8190000, episode_reward=8.40 +/- 1.50
Episode length: 1023.20 +/- 298.07
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | 8.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8190000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00392  

Eval num_timesteps=8240000, episode_reward=6.20 +/- 8.93
Episode length: 1018.80 +/- 338.71
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | 6.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8240000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00838  |
|    n_updates        | 2034999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | 5.13     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8484     |
|    fps              | 330      |
|    time_elapsed     | 24940    |
|    total_timesteps  | 8244605  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0036   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 5.7      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8524     |
|    fps              | 330      |
|    time_elapsed     | 25097    |
|    total_timesteps  | 8295583  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00662  |
|    n_updates        | 2048895  |
----------------------------------
Eval num_timesteps=8300000, episode_reward=7.60 +/- 7.61
Episode length: 1006.60 +/- 464.04
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.01e+03 |
|    mean_reward      | 7.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8300000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0139   

Eval num_timesteps=8350000, episode_reward=7.80 +/- 3.66
Episode length: 1188.80 +/- 106.03
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.19e+03 |
|    mean_reward      | 7.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8350000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.01     |
|    n_updates        | 2062499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 6.85     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8568     |
|    fps              | 330      |
|    time_elapsed     | 25276    |
|    total_timesteps  | 8352131  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0214   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 6.48     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8608     |
|    fps              | 330      |
|    time_elapsed     | 25435    |
|    total_timesteps  | 8403969  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0111   |
|    n_updates        | 2075992  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 6.12     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8612     |
|    fps              | 330      |
|    time_elapsed     | 25446    |
|    total_timesteps  | 8408929  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00534  |
|    n_updates      

Eval num_timesteps=8460000, episode_reward=7.80 +/- 2.32
Episode length: 1081.00 +/- 191.58
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.08e+03 |
|    mean_reward      | 7.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8460000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0109   |
|    n_updates        | 2089999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 5.78     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8652     |
|    fps              | 330      |
|    time_elapsed     | 25616    |
|    total_timesteps  | 8461080  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0166   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 6.19     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8692     |
|    fps              | 330      |
|    time_elapsed     | 25776    |
|    total_timesteps  | 8513837  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0267   |
|    n_updates        | 2103459  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 5.91     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8696     |
|    fps              | 330      |
|    time_elapsed     | 25788    |
|    total_timesteps  | 8518735  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00657  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 6.74     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8736     |
|    fps              | 330      |
|    time_elapsed     | 25944    |
|    total_timesteps  | 8568338  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00347  |
|    n_updates        | 2117084  |
----------------------------------
Eval num_timesteps=8570000, episode_reward=6.80 +/- 6.62
Episode length: 996.40 +/- 233.01
----------------------------------
| eval/               |          |
|    mean_ep_length   | 996      |
|    mean_reward      | 6.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8570000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00288  |

Eval num_timesteps=8620000, episode_reward=7.00 +/- 8.00
Episode length: 1106.40 +/- 311.70
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.11e+03 |
|    mean_reward      | 7        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8620000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00581  |
|    n_updates        | 2129999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 6.55     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8780     |
|    fps              | 330      |
|    time_elapsed     | 26120    |
|    total_timesteps  | 8623840  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00927  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 6.09     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8820     |
|    fps              | 330      |
|    time_elapsed     | 26282    |
|    total_timesteps  | 8675971  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.011    |
|    n_updates        | 2143992  |
----------------------------------
Eval num_timesteps=8680000, episode_reward=2.00 +/- 9.01
Episode length: 937.20 +/- 482.87
----------------------------------
| eval/               |          |
|    mean_ep_length   | 937      |
|    mean_reward      | 2        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8680000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00385  |

Eval num_timesteps=8730000, episode_reward=7.80 +/- 3.76
Episode length: 1245.00 +/- 251.63
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.24e+03 |
|    mean_reward      | 7.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8730000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0186   |
|    n_updates        | 2157499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 4.96     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8864     |
|    fps              | 330      |
|    time_elapsed     | 26468    |
|    total_timesteps  | 8735015  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0145   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 4.6      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8904     |
|    fps              | 329      |
|    time_elapsed     | 26634    |
|    total_timesteps  | 8787435  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0129   |
|    n_updates        | 2171858  |
----------------------------------
Eval num_timesteps=8790000, episode_reward=6.80 +/- 2.79
Episode length: 1187.20 +/- 199.92
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.19e+03 |
|    mean_reward      | 6.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8790000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0115   

Eval num_timesteps=8840000, episode_reward=8.40 +/- 3.93
Episode length: 1123.60 +/- 145.53
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.12e+03 |
|    mean_reward      | 8.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8840000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0199   |
|    n_updates        | 2184999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 4.4      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8948     |
|    fps              | 329      |
|    time_elapsed     | 26820    |
|    total_timesteps  | 8845345  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00758  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 5.69     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8988     |
|    fps              | 329      |
|    time_elapsed     | 26983    |
|    total_timesteps  | 8896879  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00974  |
|    n_updates        | 2199219  |
----------------------------------
Eval num_timesteps=8900000, episode_reward=5.80 +/- 6.62
Episode length: 1167.80 +/- 397.43
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.17e+03 |
|    mean_reward      | 5.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8900000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0161   

Eval num_timesteps=8950000, episode_reward=6.40 +/- 8.19
Episode length: 1161.80 +/- 192.39
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.16e+03 |
|    mean_reward      | 6.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8950000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00832  |
|    n_updates        | 2212499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 5.1      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9032     |
|    fps              | 329      |
|    time_elapsed     | 27163    |
|    total_timesteps  | 8952996  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0114   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 6.02     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9072     |
|    fps              | 329      |
|    time_elapsed     | 27320    |
|    total_timesteps  | 9003334  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0109   |
|    n_updates        | 2225833  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 5.93     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9076     |
|    fps              | 329      |
|    time_elapsed     | 27332    |
|    total_timesteps  | 9008313  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00796  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 6.66     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9116     |
|    fps              | 329      |
|    time_elapsed     | 27486    |
|    total_timesteps  | 9057481  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00306  |
|    n_updates        | 2239370  |
----------------------------------
Eval num_timesteps=9060000, episode_reward=9.40 +/- 4.59
Episode length: 883.80 +/- 438.60
----------------------------------
| eval/               |          |
|    mean_ep_length   | 884      |
|    mean_reward      | 9.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9060000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0372   |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 6.35     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9156     |
|    fps              | 329      |
|    time_elapsed     | 27657    |
|    total_timesteps  | 9112362  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0351   |
|    n_updates        | 2253090  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 6.64     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9160     |
|    fps              | 329      |
|    time_elapsed     | 27667    |
|    total_timesteps  | 9116800  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0138   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 5.63     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9200     |
|    fps              | 329      |
|    time_elapsed     | 27823    |
|    total_timesteps  | 9167494  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00589  |
|    n_updates        | 2266873  |
----------------------------------
Eval num_timesteps=9170000, episode_reward=9.80 +/- 4.35
Episode length: 957.40 +/- 339.46
----------------------------------
| eval/               |          |
|    mean_ep_length   | 957      |
|    mean_reward      | 9.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9170000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00667  |

Eval num_timesteps=9220000, episode_reward=6.20 +/- 6.37
Episode length: 1124.80 +/- 323.37
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.12e+03 |
|    mean_reward      | 6.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9220000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00536  |
|    n_updates        | 2279999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | 6.23     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9244     |
|    fps              | 329      |
|    time_elapsed     | 27994    |
|    total_timesteps  | 9220829  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0149   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 6.71     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9284     |
|    fps              | 329      |
|    time_elapsed     | 28153    |
|    total_timesteps  | 9272457  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00732  |
|    n_updates        | 2293114  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 6.57     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9288     |
|    fps              | 329      |
|    time_elapsed     | 28166    |
|    total_timesteps  | 9278120  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00655  |
|    n_updates      

Eval num_timesteps=9330000, episode_reward=5.40 +/- 3.20
Episode length: 1050.60 +/- 422.86
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.05e+03 |
|    mean_reward      | 5.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9330000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0133   |
|    n_updates        | 2307499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 6.16     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9328     |
|    fps              | 329      |
|    time_elapsed     | 28338    |
|    total_timesteps  | 9332168  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00465  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 5.68     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9368     |
|    fps              | 329      |
|    time_elapsed     | 28496    |
|    total_timesteps  | 9383074  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00467  |
|    n_updates        | 2320768  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 5.56     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9372     |
|    fps              | 329      |
|    time_elapsed     | 28507    |
|    total_timesteps  | 9387899  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00762  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 5.89     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9412     |
|    fps              | 329      |
|    time_elapsed     | 28664    |
|    total_timesteps  | 9438206  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00611  |
|    n_updates        | 2334551  |
----------------------------------
Eval num_timesteps=9440000, episode_reward=4.20 +/- 5.60
Episode length: 1050.00 +/- 380.79
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.05e+03 |
|    mean_reward      | 4.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9440000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00826  

Eval num_timesteps=9490000, episode_reward=3.20 +/- 6.24
Episode length: 1142.80 +/- 108.90
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.14e+03 |
|    mean_reward      | 3.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9490000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0128   |
|    n_updates        | 2347499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 5.85     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9456     |
|    fps              | 329      |
|    time_elapsed     | 28841    |
|    total_timesteps  | 9493638  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00236  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 6.3      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9496     |
|    fps              | 329      |
|    time_elapsed     | 28997    |
|    total_timesteps  | 9543992  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00601  |
|    n_updates        | 2360997  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 6.23     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9500     |
|    fps              | 329      |
|    time_elapsed     | 29008    |
|    total_timesteps  | 9548847  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00749  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 4.81     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9540     |
|    fps              | 329      |
|    time_elapsed     | 29169    |
|    total_timesteps  | 9599962  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00618  |
|    n_updates        | 2374990  |
----------------------------------
Eval num_timesteps=9600000, episode_reward=1.40 +/- 4.32
Episode length: 1399.00 +/- 160.91
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.4e+03  |
|    mean_reward      | 1.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9600000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00994  

Eval num_timesteps=9650000, episode_reward=8.80 +/- 2.79
Episode length: 994.20 +/- 337.99
----------------------------------
| eval/               |          |
|    mean_ep_length   | 994      |
|    mean_reward      | 8.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9650000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00741  |
|    n_updates        | 2387499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 5.54     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9584     |
|    fps              | 329      |
|    time_elapsed     | 29343    |
|    total_timesteps  | 9654810  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00965  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 6.69     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9624     |
|    fps              | 328      |
|    time_elapsed     | 29500    |
|    total_timesteps  | 9704360  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00858  |
|    n_updates        | 2401089  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 6.6      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9628     |
|    fps              | 328      |
|    time_elapsed     | 29511    |
|    total_timesteps  | 9709098  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00771  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 7.45     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9668     |
|    fps              | 328      |
|    time_elapsed     | 29669    |
|    total_timesteps  | 9759118  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00409  |
|    n_updates        | 2414779  |
----------------------------------
Eval num_timesteps=9760000, episode_reward=5.20 +/- 6.52
Episode length: 969.00 +/- 522.72
----------------------------------
| eval/               |          |
|    mean_ep_length   | 969      |
|    mean_reward      | 5.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9760000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00671  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 6.89     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9708     |
|    fps              | 328      |
|    time_elapsed     | 29835    |
|    total_timesteps  | 9811054  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0061   |
|    n_updates        | 2427763  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 6.78     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9712     |
|    fps              | 328      |
|    time_elapsed     | 29847    |
|    total_timesteps  | 9816195  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00377  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 6.9      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9752     |
|    fps              | 328      |
|    time_elapsed     | 29996    |
|    total_timesteps  | 9864267  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0111   |
|    n_updates        | 2441066  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | 6.81     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9756     |
|    fps              | 328      |
|    time_elapsed     | 30008    |
|    total_timesteps  | 9869257  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.015    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.21e+03 |
|    ep_rew_mean      | 6.73     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9796     |
|    fps              | 328      |
|    time_elapsed     | 30158    |
|    total_timesteps  | 9916590  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00626  |
|    n_updates        | 2454147  |
----------------------------------
Eval num_timesteps=9920000, episode_reward=8.20 +/- 1.60
Episode length: 962.00 +/- 222.91
----------------------------------
| eval/               |          |
|    mean_ep_length   | 962      |
|    mean_reward      | 8.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9920000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0183   |

New best mean reward!
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.11e+03 |
|    ep_rew_mean      | 9.07     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9840     |
|    fps              | 328      |
|    time_elapsed     | 30295    |
|    total_timesteps  | 9961642  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0291   |
|    n_updates        | 2465410  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.09e+03 |
|    ep_rew_mean      | 9.47     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9844     |
|    fps              | 328      |
|    time_elapsed     | 30302    |
|    total_timesteps  | 9964299  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0104   

Eval num_timesteps=10000000, episode_reward=19.40 +/- 2.73
Episode length: 615.80 +/- 102.11
----------------------------------
| eval/               |          |
|    mean_ep_length   | 616      |
|    mean_reward      | 19.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 10000000 |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0019   |
|    n_updates        | 2474999  |
----------------------------------


<stable_baselines3.dqn.dqn.DQN at 0x7f807eedba60>

In [34]:
save_path = os.path.join('Training', 'Saved Models','DQN_Pong_10M')
model_pong.save(save_path)

In [36]:
evaluate_policy(model_pong, env, n_eval_episodes=10, render=True)

(19.9, 1.8681541692269403)

In [21]:
env.close()