In [1]:
# https://stable-baselines3.readthedocs.io/en/master/guide/rl.html
# https://spinningup.openai.com/en/latest/spinningup/rl_intro2.html#a-taxonomy-of-rl-algorithms

# 1. Import dependencies

In [15]:
from sys import path as syspath
from os import path as ospath
syspath.append(ospath.join(ospath.expanduser("~"), os.path.abspath(os.getcwd())+'SB3_f'))


In [3]:
import os
import gym 
import matplotlib.pyplot as plt
import numpy as np 
from SB3_f.sb3f import DQN
from SB3_f.sb3f.common.vec_env import DummyVecEnv
from SB3_f.sb3f.common.evaluation import evaluate_policy
from SB3_f.sb3f.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
from SB3_f.sb3f.common.atari_wrappers import AtariWrapper

# 2. Load and Test Environment

In [4]:
environment_name = "Pong-v4"

In [5]:
env = gym.make(environment_name)

A.L.E: Arcade Learning Environment (version 0.7.4+069f8bd)
[Powered by Stella]


In [6]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

  logger.warn(


Episode:1 Score:-21.0
Episode:2 Score:-20.0
Episode:3 Score:-21.0
Episode:4 Score:-20.0
Episode:5 Score:-21.0


# 3. Train an RL Model

In [7]:
log_path = os.path.join('Training','Logs Opt Atari','Pong')
#training_log_path = os.path.join(log_path, 'DQN_Pong')

In [8]:
save_path = os.path.join('Training', 'Saved Models')


In [9]:
env = gym.make(environment_name)
env = AtariWrapper(env)

In [10]:
stop_callback = StopTrainingOnRewardThreshold(reward_threshold=20, verbose=1)
eval_callback = EvalCallback(env, 
                             callback_on_new_best=stop_callback, 
                             eval_freq=10000, 
                             best_model_save_path=save_path, 
                             verbose=1)

In [11]:
# Pong Reward of -1 or +1 so Opt is +1
opt_val = 1

model_pong = DQN('CnnPolicy', env, opt_val, verbose = 1,
            buffer_size = 100000,
            learning_rate = 0.0001, 
            batch_size = 32,
            learning_starts = 100000,
            target_update_interval = 1000,
            train_freq = 4,
            gradient_steps =  1,
            exploration_fraction = 0.1,
            exploration_final_eps = 0.01,
            optimize_memory_usage = False,
            tensorboard_log=log_path)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [12]:
model_pong.learn(total_timesteps=10000000, callback=eval_callback,tb_log_name='Pong_Opt_10x0.1')

2023-01-17 16:25:37.276158: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Logging to Training/Logs Opt Atari/Pong/Pong_Opt_10x0.1_1




----------------------------------
| rollout/            |          |
|    ep_len_mean      | 294      |
|    ep_rew_mean      | -20.5    |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 239      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1174     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 291      |
|    ep_rew_mean      | -20.2    |
|    exploration_rate | 0.998    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 382      |
|    time_elapsed     | 6        |
|    total_timesteps  | 2325     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 296      |
|    ep_rew_mean      | -20      |
|    exploration_rate | 0.996    |
| time/               |          |
|    episodes       



Eval num_timesteps=10000, episode_reward=-20.80 +/- 0.40
Episode length: 253.80 +/- 11.39
----------------------------------
| eval/               |          |
|    mean_ep_length   | 254      |
|    mean_reward      | -20.8    |
| rollout/            |          |
|    exploration_rate | 0.99     |
| time/               |          |
|    total_timesteps  | 10000    |
----------------------------------
New best mean reward!
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 296      |
|    ep_rew_mean      | -20.3    |
|    exploration_rate | 0.989    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 482      |
|    time_elapsed     | 22       |
|    total_timesteps  | 10653    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 295      |
|    ep_rew_mean      | -20.4    |
|    exploration_rate | 0.988    |
| time/      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 295      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.967    |
| time/               |          |
|    episodes         | 112      |
|    fps              | 683      |
|    time_elapsed     | 48       |
|    total_timesteps  | 33056    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 295      |
|    ep_rew_mean      | -20.5    |
|    exploration_rate | 0.966    |
| time/               |          |
|    episodes         | 116      |
|    fps              | 690      |
|    time_elapsed     | 49       |
|    total_timesteps  | 34275    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 295      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.965    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 297      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.944    |
| time/               |          |
|    episodes         | 192      |
|    fps              | 748      |
|    time_elapsed     | 76       |
|    total_timesteps  | 56973    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 296      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.942    |
| time/               |          |
|    episodes         | 196      |
|    fps              | 751      |
|    time_elapsed     | 77       |
|    total_timesteps  | 58104    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 298      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.941    |
| time/               |          |
|    episodes       

Eval num_timesteps=80000, episode_reward=-17.80 +/- 6.40
Episode length: 212.00 +/- 78.58
----------------------------------
| eval/               |          |
|    mean_ep_length   | 212      |
|    mean_reward      | -17.8    |
| rollout/            |          |
|    exploration_rate | 0.921    |
| time/               |          |
|    total_timesteps  | 80000    |
----------------------------------
New best mean reward!
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 300      |
|    ep_rew_mean      | -20.8    |
|    exploration_rate | 0.92     |
| time/               |          |
|    episodes         | 272      |
|    fps              | 767      |
|    time_elapsed     | 105      |
|    total_timesteps  | 81223    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 300      |
|    ep_rew_mean      | -20.8    |
|    exploration_rate | 0.918    |
| time/      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 305      |
|    ep_rew_mean      | -20.9    |
|    exploration_rate | 0.898    |
| time/               |          |
|    episodes         | 344      |
|    fps              | 766      |
|    time_elapsed     | 134      |
|    total_timesteps  | 103161   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0289   |
|    n_updates        | 790      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 306      |
|    ep_rew_mean      | -20.9    |
|    exploration_rate | 0.897    |
| time/               |          |
|    episodes         | 348      |
|    fps              | 763      |
|    time_elapsed     | 136      |
|    total_timesteps  | 104391   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0294   |
|    n_updates      

New best mean reward!
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 308      |
|    ep_rew_mean      | -21.2    |
|    exploration_rate | 0.881    |
| time/               |          |
|    episodes         | 400      |
|    fps              | 707      |
|    time_elapsed     | 170      |
|    total_timesteps  | 120300   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0306   |
|    n_updates        | 5074     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 307      |
|    ep_rew_mean      | -21      |
|    exploration_rate | 0.88     |
| time/               |          |
|    episodes         | 404      |
|    fps              | 704      |
|    time_elapsed     | 172      |
|    total_timesteps  | 121576   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0197   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 304      |
|    ep_rew_mean      | -21      |
|    exploration_rate | 0.863    |
| time/               |          |
|    episodes         | 460      |
|    fps              | 670      |
|    time_elapsed     | 206      |
|    total_timesteps  | 138378   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00338  |
|    n_updates        | 9594     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 304      |
|    ep_rew_mean      | -21      |
|    exploration_rate | 0.862    |
| time/               |          |
|    episodes         | 464      |
|    fps              | 669      |
|    time_elapsed     | 208      |
|    total_timesteps  | 139447   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0245   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 300      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.846    |
| time/               |          |
|    episodes         | 516      |
|    fps              | 638      |
|    time_elapsed     | 242      |
|    total_timesteps  | 155158   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0261   |
|    n_updates        | 13789    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 301      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.845    |
| time/               |          |
|    episodes         | 520      |
|    fps              | 637      |
|    time_elapsed     | 245      |
|    total_timesteps  | 156282   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0121   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 308      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.829    |
| time/               |          |
|    episodes         | 572      |
|    fps              | 615      |
|    time_elapsed     | 280      |
|    total_timesteps  | 172679   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0121   |
|    n_updates        | 18169    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 309      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.828    |
| time/               |          |
|    episodes         | 576      |
|    fps              | 614      |
|    time_elapsed     | 282      |
|    total_timesteps  | 173869   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.026    |
|    n_updates      

Eval num_timesteps=190000, episode_reward=-17.80 +/- 6.40
Episode length: 210.40 +/- 80.23
----------------------------------
| eval/               |          |
|    mean_ep_length   | 210      |
|    mean_reward      | -17.8    |
| rollout/            |          |
|    exploration_rate | 0.812    |
| time/               |          |
|    total_timesteps  | 190000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00929  |
|    n_updates        | 22499    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 309      |
|    ep_rew_mean      | -20.8    |
|    exploration_rate | 0.811    |
| time/               |          |
|    episodes         | 632      |
|    fps              | 595      |
|    time_elapsed     | 320      |
|    total_timesteps  | 191221   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0102   |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 309      |
|    ep_rew_mean      | -20.8    |
|    exploration_rate | 0.794    |
| time/               |          |
|    episodes         | 688      |
|    fps              | 584      |
|    time_elapsed     | 356      |
|    total_timesteps  | 208273   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0243   |
|    n_updates        | 27068    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 309      |
|    ep_rew_mean      | -20.8    |
|    exploration_rate | 0.793    |
| time/               |          |
|    episodes         | 692      |
|    fps              | 584      |
|    time_elapsed     | 358      |
|    total_timesteps  | 209543   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0163   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 312      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.776    |
| time/               |          |
|    episodes         | 744      |
|    fps              | 572      |
|    time_elapsed     | 395      |
|    total_timesteps  | 226059   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0124   |
|    n_updates        | 31514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 312      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.775    |
| time/               |          |
|    episodes         | 748      |
|    fps              | 571      |
|    time_elapsed     | 397      |
|    total_timesteps  | 227262   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00805  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 313      |
|    ep_rew_mean      | -20.5    |
|    exploration_rate | 0.759    |
| time/               |          |
|    episodes         | 800      |
|    fps              | 560      |
|    time_elapsed     | 435      |
|    total_timesteps  | 243638   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.013    |
|    n_updates        | 35909    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 315      |
|    ep_rew_mean      | -20.5    |
|    exploration_rate | 0.758    |
| time/               |          |
|    episodes         | 804      |
|    fps              | 559      |
|    time_elapsed     | 437      |
|    total_timesteps  | 244946   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00868  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 321      |
|    ep_rew_mean      | -20.7    |
|    exploration_rate | 0.741    |
| time/               |          |
|    episodes         | 856      |
|    fps              | 550      |
|    time_elapsed     | 475      |
|    total_timesteps  | 261969   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0244   |
|    n_updates        | 40492    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 319      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.74     |
| time/               |          |
|    episodes         | 860      |
|    fps              | 550      |
|    time_elapsed     | 478      |
|    total_timesteps  | 263122   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0333   |
|    n_updates      

Eval num_timesteps=280000, episode_reward=-17.00 +/- 7.01
Episode length: 233.40 +/- 90.36
----------------------------------
| eval/               |          |
|    mean_ep_length   | 233      |
|    mean_reward      | -17      |
| rollout/            |          |
|    exploration_rate | 0.723    |
| time/               |          |
|    total_timesteps  | 280000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0237   |
|    n_updates        | 44999    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 325      |
|    ep_rew_mean      | -20.9    |
|    exploration_rate | 0.722    |
| time/               |          |
|    episodes         | 916      |
|    fps              | 543      |
|    time_elapsed     | 517      |
|    total_timesteps  | 281249   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.023    |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 324      |
|    ep_rew_mean      | -20.6    |
|    exploration_rate | 0.704    |
| time/               |          |
|    episodes         | 972      |
|    fps              | 537      |
|    time_elapsed     | 556      |
|    total_timesteps  | 299451   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0168   |
|    n_updates        | 49862    |
----------------------------------
Eval num_timesteps=300000, episode_reward=-17.40 +/- 6.22
Episode length: 373.80 +/- 156.12
----------------------------------
| eval/               |          |
|    mean_ep_length   | 374      |
|    mean_reward      | -17.4    |
| rollout/            |          |
|    exploration_rate | 0.703    |
| time/               |          |
|    total_timesteps  | 300000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0186   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 326      |
|    ep_rew_mean      | -20.4    |
|    exploration_rate | 0.686    |
| time/               |          |
|    episodes         | 1028     |
|    fps              | 530      |
|    time_elapsed     | 598      |
|    total_timesteps  | 317626   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0188   |
|    n_updates        | 54406    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 325      |
|    ep_rew_mean      | -20.4    |
|    exploration_rate | 0.684    |
| time/               |          |
|    episodes         | 1032     |
|    fps              | 530      |
|    time_elapsed     | 600      |
|    total_timesteps  | 318911   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00517  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 329      |
|    ep_rew_mean      | -20.2    |
|    exploration_rate | 0.667    |
| time/               |          |
|    episodes         | 1084     |
|    fps              | 524      |
|    time_elapsed     | 641      |
|    total_timesteps  | 336368   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0138   |
|    n_updates        | 59091    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 329      |
|    ep_rew_mean      | -20.2    |
|    exploration_rate | 0.666    |
| time/               |          |
|    episodes         | 1088     |
|    fps              | 524      |
|    time_elapsed     | 644      |
|    total_timesteps  | 337671   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0126   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 336      |
|    ep_rew_mean      | -20.3    |
|    exploration_rate | 0.648    |
| time/               |          |
|    episodes         | 1140     |
|    fps              | 518      |
|    time_elapsed     | 685      |
|    total_timesteps  | 355183   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.021    |
|    n_updates        | 63795    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 337      |
|    ep_rew_mean      | -20.2    |
|    exploration_rate | 0.647    |
| time/               |          |
|    episodes         | 1144     |
|    fps              | 517      |
|    time_elapsed     | 688      |
|    total_timesteps  | 356500   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.016    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 345      |
|    ep_rew_mean      | -20.1    |
|    exploration_rate | 0.629    |
| time/               |          |
|    episodes         | 1196     |
|    fps              | 511      |
|    time_elapsed     | 732      |
|    total_timesteps  | 374825   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00774  |
|    n_updates        | 68706    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 345      |
|    ep_rew_mean      | -20.1    |
|    exploration_rate | 0.628    |
| time/               |          |
|    episodes         | 1200     |
|    fps              | 511      |
|    time_elapsed     | 735      |
|    total_timesteps  | 376216   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0241   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 358      |
|    ep_rew_mean      | -19.8    |
|    exploration_rate | 0.609    |
| time/               |          |
|    episodes         | 1252     |
|    fps              | 506      |
|    time_elapsed     | 780      |
|    total_timesteps  | 395122   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0163   |
|    n_updates        | 73780    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 361      |
|    ep_rew_mean      | -19.8    |
|    exploration_rate | 0.607    |
| time/               |          |
|    episodes         | 1256     |
|    fps              | 506      |
|    time_elapsed     | 783      |
|    total_timesteps  | 396768   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0165   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 364      |
|    ep_rew_mean      | -20      |
|    exploration_rate | 0.589    |
| time/               |          |
|    episodes         | 1308     |
|    fps              | 501      |
|    time_elapsed     | 828      |
|    total_timesteps  | 415323   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0202   |
|    n_updates        | 78830    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 366      |
|    ep_rew_mean      | -20      |
|    exploration_rate | 0.587    |
| time/               |          |
|    episodes         | 1312     |
|    fps              | 501      |
|    time_elapsed     | 831      |
|    total_timesteps  | 416936   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0184   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 373      |
|    ep_rew_mean      | -19.8    |
|    exploration_rate | 0.568    |
| time/               |          |
|    episodes         | 1364     |
|    fps              | 497      |
|    time_elapsed     | 878      |
|    total_timesteps  | 436787   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.015    |
|    n_updates        | 84196    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 373      |
|    ep_rew_mean      | -19.8    |
|    exploration_rate | 0.566    |
| time/               |          |
|    episodes         | 1368     |
|    fps              | 497      |
|    time_elapsed     | 881      |
|    total_timesteps  | 438335   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0078   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 381      |
|    ep_rew_mean      | -19.9    |
|    exploration_rate | 0.547    |
| time/               |          |
|    episodes         | 1420     |
|    fps              | 492      |
|    time_elapsed     | 929      |
|    total_timesteps  | 458042   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00942  |
|    n_updates        | 89510    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 381      |
|    ep_rew_mean      | -19.9    |
|    exploration_rate | 0.545    |
| time/               |          |
|    episodes         | 1424     |
|    fps              | 492      |
|    time_elapsed     | 932      |
|    total_timesteps  | 459515   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0132   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 379      |
|    ep_rew_mean      | -19.9    |
|    exploration_rate | 0.525    |
| time/               |          |
|    episodes         | 1476     |
|    fps              | 488      |
|    time_elapsed     | 982      |
|    total_timesteps  | 479544   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0169   |
|    n_updates        | 94885    |
----------------------------------
Eval num_timesteps=480000, episode_reward=-17.60 +/- 3.32
Episode length: 401.60 +/- 61.11
----------------------------------
| eval/               |          |
|    mean_ep_length   | 402      |
|    mean_reward      | -17.6    |
| rollout/            |          |
|    exploration_rate | 0.525    |
| time/               |          |
|    total_timesteps  | 480000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0147   |

Eval num_timesteps=500000, episode_reward=-15.20 +/- 3.43
Episode length: 561.80 +/- 79.15
----------------------------------
| eval/               |          |
|    mean_ep_length   | 562      |
|    mean_reward      | -15.2    |
| rollout/            |          |
|    exploration_rate | 0.505    |
| time/               |          |
|    total_timesteps  | 500000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0146   |
|    n_updates        | 99999    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 388      |
|    ep_rew_mean      | -19.9    |
|    exploration_rate | 0.504    |
| time/               |          |
|    episodes         | 1532     |
|    fps              | 482      |
|    time_elapsed     | 1038     |
|    total_timesteps  | 501254   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.014    |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 395      |
|    ep_rew_mean      | -19.8    |
|    exploration_rate | 0.483    |
| time/               |          |
|    episodes         | 1584     |
|    fps              | 478      |
|    time_elapsed     | 1091     |
|    total_timesteps  | 522212   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0064   |
|    n_updates        | 105552   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 396      |
|    ep_rew_mean      | -19.7    |
|    exploration_rate | 0.481    |
| time/               |          |
|    episodes         | 1588     |
|    fps              | 478      |
|    time_elapsed     | 1094     |
|    total_timesteps  | 523815   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00722  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 419      |
|    ep_rew_mean      | -19.5    |
|    exploration_rate | 0.459    |
| time/               |          |
|    episodes         | 1640     |
|    fps              | 475      |
|    time_elapsed     | 1150     |
|    total_timesteps  | 546520   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00396  |
|    n_updates        | 111629   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 417      |
|    ep_rew_mean      | -19.5    |
|    exploration_rate | 0.458    |
| time/               |          |
|    episodes         | 1644     |
|    fps              | 475      |
|    time_elapsed     | 1153     |
|    total_timesteps  | 547979   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00979  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 428      |
|    ep_rew_mean      | -19.6    |
|    exploration_rate | 0.436    |
| time/               |          |
|    episodes         | 1696     |
|    fps              | 471      |
|    time_elapsed     | 1208     |
|    total_timesteps  | 569690   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00837  |
|    n_updates        | 117422   |
----------------------------------
Eval num_timesteps=570000, episode_reward=-13.60 +/- 4.76
Episode length: 605.20 +/- 213.17
----------------------------------
| eval/               |          |
|    mean_ep_length   | 605      |
|    mean_reward      | -13.6    |
| rollout/            |          |
|    exploration_rate | 0.436    |
| time/               |          |
|    total_timesteps  | 570000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0338   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 432      |
|    ep_rew_mean      | -19.5    |
|    exploration_rate | 0.413    |
| time/               |          |
|    episodes         | 1748     |
|    fps              | 466      |
|    time_elapsed     | 1269     |
|    total_timesteps  | 592705   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0119   |
|    n_updates        | 123176   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 433      |
|    ep_rew_mean      | -19.5    |
|    exploration_rate | 0.411    |
| time/               |          |
|    episodes         | 1752     |
|    fps              | 466      |
|    time_elapsed     | 1273     |
|    total_timesteps  | 594486   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0121   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 448      |
|    ep_rew_mean      | -19      |
|    exploration_rate | 0.388    |
| time/               |          |
|    episodes         | 1804     |
|    fps              | 463      |
|    time_elapsed     | 1332     |
|    total_timesteps  | 618226   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0173   |
|    n_updates        | 129556   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 448      |
|    ep_rew_mean      | -19      |
|    exploration_rate | 0.386    |
| time/               |          |
|    episodes         | 1808     |
|    fps              | 463      |
|    time_elapsed     | 1336     |
|    total_timesteps  | 619979   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0103   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 463      |
|    ep_rew_mean      | -18.8    |
|    exploration_rate | 0.364    |
| time/               |          |
|    episodes         | 1856     |
|    fps              | 459      |
|    time_elapsed     | 1398     |
|    total_timesteps  | 642379   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0203   |
|    n_updates        | 135594   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 463      |
|    ep_rew_mean      | -18.7    |
|    exploration_rate | 0.362    |
| time/               |          |
|    episodes         | 1860     |
|    fps              | 459      |
|    time_elapsed     | 1402     |
|    total_timesteps  | 644266   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0218   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 464      |
|    ep_rew_mean      | -18.4    |
|    exploration_rate | 0.339    |
| time/               |          |
|    episodes         | 1912     |
|    fps              | 456      |
|    time_elapsed     | 1463     |
|    total_timesteps  | 668072   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0125   |
|    n_updates        | 142017   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 464      |
|    ep_rew_mean      | -18.4    |
|    exploration_rate | 0.337    |
| time/               |          |
|    episodes         | 1916     |
|    fps              | 456      |
|    time_elapsed     | 1467     |
|    total_timesteps  | 669965   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0132   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 477      |
|    ep_rew_mean      | -18.3    |
|    exploration_rate | 0.313    |
| time/               |          |
|    episodes         | 1964     |
|    fps              | 452      |
|    time_elapsed     | 1534     |
|    total_timesteps  | 693883   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00902  |
|    n_updates        | 148470   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 477      |
|    ep_rew_mean      | -18.3    |
|    exploration_rate | 0.311    |
| time/               |          |
|    episodes         | 1968     |
|    fps              | 452      |
|    time_elapsed     | 1537     |
|    total_timesteps  | 695571   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00629  |
|    n_updates      

Eval num_timesteps=720000, episode_reward=-15.20 +/- 4.49
Episode length: 590.00 +/- 60.22
----------------------------------
| eval/               |          |
|    mean_ep_length   | 590      |
|    mean_reward      | -15.2    |
| rollout/            |          |
|    exploration_rate | 0.287    |
| time/               |          |
|    total_timesteps  | 720000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0178   |
|    n_updates        | 154999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 494      |
|    ep_rew_mean      | -18.4    |
|    exploration_rate | 0.286    |
| time/               |          |
|    episodes         | 2020     |
|    fps              | 448      |
|    time_elapsed     | 1606     |
|    total_timesteps  | 721343   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0206   |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 504      |
|    ep_rew_mean      | -18.1    |
|    exploration_rate | 0.259    |
| time/               |          |
|    episodes         | 2072     |
|    fps              | 446      |
|    time_elapsed     | 1675     |
|    total_timesteps  | 748170   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0173   |
|    n_updates        | 162042   |
----------------------------------
Eval num_timesteps=750000, episode_reward=-11.60 +/- 5.12
Episode length: 610.00 +/- 133.47
----------------------------------
| eval/               |          |
|    mean_ep_length   | 610      |
|    mean_reward      | -11.6    |
| rollout/            |          |
|    exploration_rate | 0.258    |
| time/               |          |
|    total_timesteps  | 750000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00656  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 531      |
|    ep_rew_mean      | -17.8    |
|    exploration_rate | 0.231    |
| time/               |          |
|    episodes         | 2124     |
|    fps              | 443      |
|    time_elapsed     | 1750     |
|    total_timesteps  | 776338   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0291   |
|    n_updates        | 169084   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 533      |
|    ep_rew_mean      | -17.8    |
|    exploration_rate | 0.229    |
| time/               |          |
|    episodes         | 2128     |
|    fps              | 443      |
|    time_elapsed     | 1755     |
|    total_timesteps  | 778393   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0179   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 541      |
|    ep_rew_mean      | -17.7    |
|    exploration_rate | 0.203    |
| time/               |          |
|    episodes         | 2176     |
|    fps              | 440      |
|    time_elapsed     | 1828     |
|    total_timesteps  | 804672   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00653  |
|    n_updates        | 176167   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 545      |
|    ep_rew_mean      | -17.6    |
|    exploration_rate | 0.201    |
| time/               |          |
|    episodes         | 2180     |
|    fps              | 440      |
|    time_elapsed     | 1833     |
|    total_timesteps  | 807034   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.019    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 554      |
|    ep_rew_mean      | -17.4    |
|    exploration_rate | 0.175    |
| time/               |          |
|    episodes         | 2228     |
|    fps              | 436      |
|    time_elapsed     | 1908     |
|    total_timesteps  | 833782   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.018    |
|    n_updates        | 183445   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 555      |
|    ep_rew_mean      | -17.3    |
|    exploration_rate | 0.172    |
| time/               |          |
|    episodes         | 2232     |
|    fps              | 436      |
|    time_elapsed     | 1913     |
|    total_timesteps  | 835907   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0178   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 555      |
|    ep_rew_mean      | -16.8    |
|    exploration_rate | 0.146    |
| time/               |          |
|    episodes         | 2280     |
|    fps              | 433      |
|    time_elapsed     | 1988     |
|    total_timesteps  | 862507   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.013    |
|    n_updates        | 190626   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 555      |
|    ep_rew_mean      | -16.8    |
|    exploration_rate | 0.144    |
| time/               |          |
|    episodes         | 2284     |
|    fps              | 433      |
|    time_elapsed     | 1993     |
|    total_timesteps  | 864788   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0218   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 574      |
|    ep_rew_mean      | -16.4    |
|    exploration_rate | 0.116    |
| time/               |          |
|    episodes         | 2332     |
|    fps              | 431      |
|    time_elapsed     | 2070     |
|    total_timesteps  | 893303   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00849  |
|    n_updates        | 198325   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 577      |
|    ep_rew_mean      | -16.4    |
|    exploration_rate | 0.113    |
| time/               |          |
|    episodes         | 2336     |
|    fps              | 431      |
|    time_elapsed     | 2076     |
|    total_timesteps  | 895886   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0166   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 611      |
|    ep_rew_mean      | -15.7    |
|    exploration_rate | 0.0834   |
| time/               |          |
|    episodes         | 2384     |
|    fps              | 428      |
|    time_elapsed     | 2160     |
|    total_timesteps  | 925847   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0139   |
|    n_updates        | 206461   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 615      |
|    ep_rew_mean      | -15.7    |
|    exploration_rate | 0.0809   |
| time/               |          |
|    episodes         | 2388     |
|    fps              | 428      |
|    time_elapsed     | 2166     |
|    total_timesteps  | 928417   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0143   |
|    n_updates      

Eval num_timesteps=960000, episode_reward=-12.00 +/- 5.40
Episode length: 580.80 +/- 248.39
----------------------------------
| eval/               |          |
|    mean_ep_length   | 581      |
|    mean_reward      | -12      |
| rollout/            |          |
|    exploration_rate | 0.0496   |
| time/               |          |
|    total_timesteps  | 960000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0152   |
|    n_updates        | 214999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 668      |
|    ep_rew_mean      | -14.8    |
|    exploration_rate | 0.047    |
| time/               |          |
|    episodes         | 2436     |
|    fps              | 425      |
|    time_elapsed     | 2262     |
|    total_timesteps  | 962656   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00759  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 693      |
|    ep_rew_mean      | -15.1    |
|    exploration_rate | 0.0149   |
| time/               |          |
|    episodes         | 2484     |
|    fps              | 423      |
|    time_elapsed     | 2352     |
|    total_timesteps  | 995100   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0472   |
|    n_updates        | 223774   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 692      |
|    ep_rew_mean      | -15.1    |
|    exploration_rate | 0.0123   |
| time/               |          |
|    episodes         | 2488     |
|    fps              | 423      |
|    time_elapsed     | 2358     |
|    total_timesteps  | 997648   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00508  |
|    n_updates      

Eval num_timesteps=1030000, episode_reward=-10.80 +/- 3.31
Episode length: 623.60 +/- 273.09
----------------------------------
| eval/               |          |
|    mean_ep_length   | 624      |
|    mean_reward      | -10.8    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1030000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0151   |
|    n_updates        | 232499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 701      |
|    ep_rew_mean      | -14.7    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2536     |
|    fps              | 419      |
|    time_elapsed     | 2459     |
|    total_timesteps  | 1032779  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0182  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 724      |
|    ep_rew_mean      | -13.6    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2584     |
|    fps              | 417      |
|    time_elapsed     | 2553     |
|    total_timesteps  | 1067486  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00821  |
|    n_updates        | 241871   |
----------------------------------
Eval num_timesteps=1070000, episode_reward=-9.00 +/- 5.33
Episode length: 591.20 +/- 290.25
----------------------------------
| eval/               |          |
|    mean_ep_length   | 591      |
|    mean_reward      | -9       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1070000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0187   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 733      |
|    ep_rew_mean      | -14.4    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2632     |
|    fps              | 415      |
|    time_elapsed     | 2655     |
|    total_timesteps  | 1102763  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0125   |
|    n_updates        | 250690   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 730      |
|    ep_rew_mean      | -14.4    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2636     |
|    fps              | 415      |
|    time_elapsed     | 2662     |
|    total_timesteps  | 1105752  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0098   |
|    n_updates      

Eval num_timesteps=1140000, episode_reward=-12.40 +/- 6.09
Episode length: 545.60 +/- 245.44
----------------------------------
| eval/               |          |
|    mean_ep_length   | 546      |
|    mean_reward      | -12.4    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1140000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0177   |
|    n_updates        | 259999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 747      |
|    ep_rew_mean      | -14.8    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2684     |
|    fps              | 412      |
|    time_elapsed     | 2768     |
|    total_timesteps  | 1142219  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0121  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 750      |
|    ep_rew_mean      | -14      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2732     |
|    fps              | 410      |
|    time_elapsed     | 2866     |
|    total_timesteps  | 1177785  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0199   |
|    n_updates        | 269446   |
----------------------------------
Eval num_timesteps=1180000, episode_reward=-12.40 +/- 3.44
Episode length: 725.80 +/- 114.39
----------------------------------
| eval/               |          |
|    mean_ep_length   | 726      |
|    mean_reward      | -12.4    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1180000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00807 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 745      |
|    ep_rew_mean      | -14.1    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2780     |
|    fps              | 408      |
|    time_elapsed     | 2968     |
|    total_timesteps  | 1212841  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0231   |
|    n_updates        | 278210   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 732      |
|    ep_rew_mean      | -14.1    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2784     |
|    fps              | 408      |
|    time_elapsed     | 2974     |
|    total_timesteps  | 1215451  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0143   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 767      |
|    ep_rew_mean      | -13.9    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2828     |
|    fps              | 406      |
|    time_elapsed     | 3077     |
|    total_timesteps  | 1251281  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0109   |
|    n_updates        | 287820   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 765      |
|    ep_rew_mean      | -13.9    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2832     |
|    fps              | 406      |
|    time_elapsed     | 3084     |
|    total_timesteps  | 1254239  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00652  |
|    n_updates      

Eval num_timesteps=1290000, episode_reward=-13.40 +/- 3.72
Episode length: 666.60 +/- 65.07
----------------------------------
| eval/               |          |
|    mean_ep_length   | 667      |
|    mean_reward      | -13.4    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1290000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00889  |
|    n_updates        | 297499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 775      |
|    ep_rew_mean      | -13.8    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2880     |
|    fps              | 404      |
|    time_elapsed     | 3189     |
|    total_timesteps  | 1290390  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00986  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 768      |
|    ep_rew_mean      | -13.6    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2928     |
|    fps              | 402      |
|    time_elapsed     | 3296     |
|    total_timesteps  | 1328078  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0146   |
|    n_updates        | 307019   |
----------------------------------
Eval num_timesteps=1330000, episode_reward=-9.20 +/- 3.87
Episode length: 651.20 +/- 297.62
----------------------------------
| eval/               |          |
|    mean_ep_length   | 651      |
|    mean_reward      | -9.2     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1330000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0243   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 771      |
|    ep_rew_mean      | -13.8    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2976     |
|    fps              | 400      |
|    time_elapsed     | 3403     |
|    total_timesteps  | 1364555  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0242   |
|    n_updates        | 316138   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 778      |
|    ep_rew_mean      | -13.6    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 2980     |
|    fps              | 401      |
|    time_elapsed     | 3411     |
|    total_timesteps  | 1368216  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.026    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 777      |
|    ep_rew_mean      | -13.9    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3024     |
|    fps              | 398      |
|    time_elapsed     | 3516     |
|    total_timesteps  | 1402907  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00789  |
|    n_updates        | 325726   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 778      |
|    ep_rew_mean      | -13.8    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3028     |
|    fps              | 398      |
|    time_elapsed     | 3523     |
|    total_timesteps  | 1405874  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0322   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 794      |
|    ep_rew_mean      | -14.1    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3072     |
|    fps              | 397      |
|    time_elapsed     | 3627     |
|    total_timesteps  | 1440799  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0112   |
|    n_updates        | 335199   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 796      |
|    ep_rew_mean      | -14.1    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3076     |
|    fps              | 397      |
|    time_elapsed     | 3634     |
|    total_timesteps  | 1444138  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0234   |
|    n_updates      

Eval num_timesteps=1480000, episode_reward=-11.60 +/- 3.01
Episode length: 639.80 +/- 174.14
----------------------------------
| eval/               |          |
|    mean_ep_length   | 640      |
|    mean_reward      | -11.6    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1480000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0209   |
|    n_updates        | 344999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 794      |
|    ep_rew_mean      | -14.6    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3124     |
|    fps              | 395      |
|    time_elapsed     | 3747     |
|    total_timesteps  | 1482295  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0175  

Eval num_timesteps=1520000, episode_reward=-13.80 +/- 6.85
Episode length: 629.20 +/- 210.87
----------------------------------
| eval/               |          |
|    mean_ep_length   | 629      |
|    mean_reward      | -13.8    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1520000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00998  |
|    n_updates        | 354999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 799      |
|    ep_rew_mean      | -14.4    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3172     |
|    fps              | 393      |
|    time_elapsed     | 3859     |
|    total_timesteps  | 1520730  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0119  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 803      |
|    ep_rew_mean      | -14.7    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3220     |
|    fps              | 392      |
|    time_elapsed     | 3967     |
|    total_timesteps  | 1559118  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00879  |
|    n_updates        | 364779   |
----------------------------------
Eval num_timesteps=1560000, episode_reward=-11.00 +/- 5.59
Episode length: 654.00 +/- 298.73
----------------------------------
| eval/               |          |
|    mean_ep_length   | 654      |
|    mean_reward      | -11      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1560000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00843 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 812      |
|    ep_rew_mean      | -14.5    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3268     |
|    fps              | 391      |
|    time_elapsed     | 4081     |
|    total_timesteps  | 1598273  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0148   |
|    n_updates        | 374568   |
----------------------------------
Eval num_timesteps=1600000, episode_reward=-12.60 +/- 5.46
Episode length: 581.20 +/- 278.14
----------------------------------
| eval/               |          |
|    mean_ep_length   | 581      |
|    mean_reward      | -12.6    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1600000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.005   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 830      |
|    ep_rew_mean      | -13.5    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3316     |
|    fps              | 390      |
|    time_elapsed     | 4197     |
|    total_timesteps  | 1639470  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0158   |
|    n_updates        | 384867   |
----------------------------------
Eval num_timesteps=1640000, episode_reward=-9.40 +/- 6.89
Episode length: 722.20 +/- 267.51
----------------------------------
| eval/               |          |
|    mean_ep_length   | 722      |
|    mean_reward      | -9.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1640000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00988  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 845      |
|    ep_rew_mean      | -13.2    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3364     |
|    fps              | 389      |
|    time_elapsed     | 4313     |
|    total_timesteps  | 1679610  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0212   |
|    n_updates        | 394902   |
----------------------------------
Eval num_timesteps=1680000, episode_reward=-11.80 +/- 5.19
Episode length: 755.00 +/- 192.20
----------------------------------
| eval/               |          |
|    mean_ep_length   | 755      |
|    mean_reward      | -11.8    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1680000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00447 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 836      |
|    ep_rew_mean      | -13.2    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3412     |
|    fps              | 388      |
|    time_elapsed     | 4428     |
|    total_timesteps  | 1719710  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0173   |
|    n_updates        | 404927   |
----------------------------------
Eval num_timesteps=1720000, episode_reward=-13.40 +/- 3.56
Episode length: 685.40 +/- 78.31
----------------------------------
| eval/               |          |
|    mean_ep_length   | 685      |
|    mean_reward      | -13.4    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1720000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.013    

Eval num_timesteps=1760000, episode_reward=-8.80 +/- 3.71
Episode length: 734.00 +/- 330.46
----------------------------------
| eval/               |          |
|    mean_ep_length   | 734      |
|    mean_reward      | -8.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1760000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00388  |
|    n_updates        | 414999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 862      |
|    ep_rew_mean      | -13.1    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3460     |
|    fps              | 386      |
|    time_elapsed     | 4556     |
|    total_timesteps  | 1762689  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00838  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 884      |
|    ep_rew_mean      | -13.4    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3504     |
|    fps              | 385      |
|    time_elapsed     | 4668     |
|    total_timesteps  | 1801447  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00733  |
|    n_updates        | 425361   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 886      |
|    ep_rew_mean      | -13.3    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3508     |
|    fps              | 385      |
|    time_elapsed     | 4677     |
|    total_timesteps  | 1804938  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00822  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 908      |
|    ep_rew_mean      | -13.4    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3552     |
|    fps              | 384      |
|    time_elapsed     | 4797     |
|    total_timesteps  | 1846222  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00782  |
|    n_updates        | 436555   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 907      |
|    ep_rew_mean      | -13.4    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3556     |
|    fps              | 384      |
|    time_elapsed     | 4806     |
|    total_timesteps  | 1849837  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00787  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 926      |
|    ep_rew_mean      | -12.4    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3600     |
|    fps              | 383      |
|    time_elapsed     | 4925     |
|    total_timesteps  | 1889968  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00636  |
|    n_updates        | 447491   |
----------------------------------
Eval num_timesteps=1890000, episode_reward=-13.40 +/- 3.44
Episode length: 845.00 +/- 117.14
----------------------------------
| eval/               |          |
|    mean_ep_length   | 845      |
|    mean_reward      | -13.4    |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1890000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00912 

Eval num_timesteps=1930000, episode_reward=-11.00 +/- 4.60
Episode length: 863.40 +/- 193.04
----------------------------------
| eval/               |          |
|    mean_ep_length   | 863      |
|    mean_reward      | -11      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 1930000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0125   |
|    n_updates        | 457499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 907      |
|    ep_rew_mean      | -12.6    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3648     |
|    fps              | 382      |
|    time_elapsed     | 5057     |
|    total_timesteps  | 1933557  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00861 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 921      |
|    ep_rew_mean      | -12.4    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3692     |
|    fps              | 381      |
|    time_elapsed     | 5179     |
|    total_timesteps  | 1974812  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0128   |
|    n_updates        | 468702   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 919      |
|    ep_rew_mean      | -12.4    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3696     |
|    fps              | 381      |
|    time_elapsed     | 5187     |
|    total_timesteps  | 1978219  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.011    |
|    n_updates      

Eval num_timesteps=2020000, episode_reward=-8.60 +/- 3.20
Episode length: 934.00 +/- 317.58
----------------------------------
| eval/               |          |
|    mean_ep_length   | 934      |
|    mean_reward      | -8.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2020000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00551  |
|    n_updates        | 479999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 952      |
|    ep_rew_mean      | -11.9    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3740     |
|    fps              | 379      |
|    time_elapsed     | 5321     |
|    total_timesteps  | 2021706  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00965  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 976      |
|    ep_rew_mean      | -12.5    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3784     |
|    fps              | 379      |
|    time_elapsed     | 5445     |
|    total_timesteps  | 2064566  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0138   |
|    n_updates        | 491141   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 978      |
|    ep_rew_mean      | -12.5    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3788     |
|    fps              | 379      |
|    time_elapsed     | 5455     |
|    total_timesteps  | 2068759  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0124   |
|    n_updates      

Eval num_timesteps=2110000, episode_reward=-6.60 +/- 4.50
Episode length: 939.60 +/- 442.77
----------------------------------
| eval/               |          |
|    mean_ep_length   | 940      |
|    mean_reward      | -6.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2110000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00625  |
|    n_updates        | 502499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.01e+03 |
|    ep_rew_mean      | -12.1    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3832     |
|    fps              | 377      |
|    time_elapsed     | 5596     |
|    total_timesteps  | 2114307  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00684  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 996      |
|    ep_rew_mean      | -11.8    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3876     |
|    fps              | 376      |
|    time_elapsed     | 5720     |
|    total_timesteps  | 2156269  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0148   |
|    n_updates        | 514067   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 990      |
|    ep_rew_mean      | -11.6    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3880     |
|    fps              | 376      |
|    time_elapsed     | 5729     |
|    total_timesteps  | 2159936  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0103   |
|    n_updates      

Eval num_timesteps=2200000, episode_reward=-8.40 +/- 2.15
Episode length: 899.60 +/- 298.99
----------------------------------
| eval/               |          |
|    mean_ep_length   | 900      |
|    mean_reward      | -8.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2200000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00998  |
|    n_updates        | 524999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 975      |
|    ep_rew_mean      | -11.5    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3924     |
|    fps              | 375      |
|    time_elapsed     | 5863     |
|    total_timesteps  | 2202548  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00844  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 989      |
|    ep_rew_mean      | -10.5    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 3968     |
|    fps              | 374      |
|    time_elapsed     | 5995     |
|    total_timesteps  | 2247280  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.015    |
|    n_updates        | 536819   |
----------------------------------
Eval num_timesteps=2250000, episode_reward=-5.80 +/- 3.60
Episode length: 1070.80 +/- 139.46
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.07e+03 |
|    mean_reward      | -5.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2250000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0108  

New best mean reward!
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.02e+03 |
|    ep_rew_mean      | -9.8     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4012     |
|    fps              | 373      |
|    time_elapsed     | 6135     |
|    total_timesteps  | 2292984  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0033   |
|    n_updates        | 548245   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.03e+03 |
|    ep_rew_mean      | -9.69    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4016     |
|    fps              | 373      |
|    time_elapsed     | 6146     |
|    total_timesteps  | 2297671  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0173   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.05e+03 |
|    ep_rew_mean      | -9.26    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4056     |
|    fps              | 372      |
|    time_elapsed     | 6279     |
|    total_timesteps  | 2340816  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0159   |
|    n_updates        | 560203   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.05e+03 |
|    ep_rew_mean      | -9.43    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4060     |
|    fps              | 372      |
|    time_elapsed     | 6289     |
|    total_timesteps  | 2344984  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0175   |
|    n_updates      

Eval num_timesteps=2390000, episode_reward=-5.80 +/- 7.22
Episode length: 878.20 +/- 257.02
----------------------------------
| eval/               |          |
|    mean_ep_length   | 878      |
|    mean_reward      | -5.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2390000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0192   |
|    n_updates        | 572499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.09e+03 |
|    ep_rew_mean      | -8.8     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4104     |
|    fps              | 371      |
|    time_elapsed     | 6437     |
|    total_timesteps  | 2392922  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00844  

Eval num_timesteps=2440000, episode_reward=-4.00 +/- 2.83
Episode length: 1092.80 +/- 281.60
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.09e+03 |
|    mean_reward      | -4       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2440000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0191   |
|    n_updates        | 584999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.08e+03 |
|    ep_rew_mean      | -8.68    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4148     |
|    fps              | 370      |
|    time_elapsed     | 6588     |
|    total_timesteps  | 2441111  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0179  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.1e+03  |
|    ep_rew_mean      | -8.27    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4192     |
|    fps              | 369      |
|    time_elapsed     | 6729     |
|    total_timesteps  | 2488647  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0152   |
|    n_updates        | 597161   |
----------------------------------
Eval num_timesteps=2490000, episode_reward=-0.40 +/- 4.96
Episode length: 1143.00 +/- 105.90
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.14e+03 |
|    mean_reward      | -0.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2490000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0189  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.08e+03 |
|    ep_rew_mean      | -8.36    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4236     |
|    fps              | 368      |
|    time_elapsed     | 6879     |
|    total_timesteps  | 2536798  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0205   |
|    n_updates        | 609199   |
----------------------------------
Eval num_timesteps=2540000, episode_reward=-4.40 +/- 7.76
Episode length: 1013.20 +/- 216.62
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.01e+03 |
|    mean_reward      | -4.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2540000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.021   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.08e+03 |
|    ep_rew_mean      | -8.02    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4280     |
|    fps              | 367      |
|    time_elapsed     | 7030     |
|    total_timesteps  | 2584043  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0152   |
|    n_updates        | 621010   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.09e+03 |
|    ep_rew_mean      | -7.92    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4284     |
|    fps              | 367      |
|    time_elapsed     | 7041     |
|    total_timesteps  | 2588643  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0143   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.13e+03 |
|    ep_rew_mean      | -6.96    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4324     |
|    fps              | 366      |
|    time_elapsed     | 7189     |
|    total_timesteps  | 2636189  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0109   |
|    n_updates        | 634047   |
----------------------------------
Eval num_timesteps=2640000, episode_reward=-6.40 +/- 3.50
Episode length: 1012.00 +/- 268.16
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.01e+03 |
|    mean_reward      | -6.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2640000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0121  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.17e+03 |
|    ep_rew_mean      | -6.16    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4368     |
|    fps              | 365      |
|    time_elapsed     | 7348     |
|    total_timesteps  | 2688024  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.029    |
|    n_updates        | 647005   |
----------------------------------
Eval num_timesteps=2690000, episode_reward=-4.60 +/- 2.42
Episode length: 1024.20 +/- 450.46
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | -4.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2690000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0104  

Eval num_timesteps=2740000, episode_reward=-1.40 +/- 7.34
Episode length: 1103.20 +/- 286.08
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.1e+03  |
|    mean_reward      | -1.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2740000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0078   |
|    n_updates        | 659999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.21e+03 |
|    ep_rew_mean      | -5.25    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4412     |
|    fps              | 364      |
|    time_elapsed     | 7520     |
|    total_timesteps  | 2743002  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00743 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.22e+03 |
|    ep_rew_mean      | -4.93    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4452     |
|    fps              | 363      |
|    time_elapsed     | 7669     |
|    total_timesteps  | 2791053  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00999  |
|    n_updates        | 672763   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.22e+03 |
|    ep_rew_mean      | -5.03    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4456     |
|    fps              | 363      |
|    time_elapsed     | 7679     |
|    total_timesteps  | 2795457  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00608  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.22e+03 |
|    ep_rew_mean      | -4.87    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4496     |
|    fps              | 363      |
|    time_elapsed     | 7832     |
|    total_timesteps  | 2844832  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00862  |
|    n_updates        | 686207   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.21e+03 |
|    ep_rew_mean      | -4.93    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4500     |
|    fps              | 363      |
|    time_elapsed     | 7842     |
|    total_timesteps  | 2849096  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0109   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | -5       |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4540     |
|    fps              | 362      |
|    time_elapsed     | 7998     |
|    total_timesteps  | 2899266  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00867  |
|    n_updates        | 699816   |
----------------------------------
Eval num_timesteps=2900000, episode_reward=-4.80 +/- 3.66
Episode length: 1062.00 +/- 302.02
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.06e+03 |
|    mean_reward      | -4.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2900000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00776 

Eval num_timesteps=2950000, episode_reward=-5.00 +/- 2.28
Episode length: 964.80 +/- 420.30
----------------------------------
| eval/               |          |
|    mean_ep_length   | 965      |
|    mean_reward      | -5       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 2950000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0127   |
|    n_updates        | 712499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.23e+03 |
|    ep_rew_mean      | -4.48    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4584     |
|    fps              | 361      |
|    time_elapsed     | 8172     |
|    total_timesteps  | 2952455  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0173   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.22e+03 |
|    ep_rew_mean      | -4.04    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4624     |
|    fps              | 360      |
|    time_elapsed     | 8328     |
|    total_timesteps  | 3003261  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0141   |
|    n_updates        | 725815   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.22e+03 |
|    ep_rew_mean      | -4.18    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4628     |
|    fps              | 360      |
|    time_elapsed     | 8338     |
|    total_timesteps  | 3007741  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00869  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.23e+03 |
|    ep_rew_mean      | -3.76    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4668     |
|    fps              | 359      |
|    time_elapsed     | 8491     |
|    total_timesteps  | 3055744  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.011    |
|    n_updates        | 738935   |
----------------------------------
Eval num_timesteps=3060000, episode_reward=-2.60 +/- 4.59
Episode length: 1109.20 +/- 296.39
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.11e+03 |
|    mean_reward      | -2.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3060000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.021   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.2e+03  |
|    ep_rew_mean      | -4       |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4712     |
|    fps              | 359      |
|    time_elapsed     | 8653     |
|    total_timesteps  | 3107754  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00798  |
|    n_updates        | 751938   |
----------------------------------
Eval num_timesteps=3110000, episode_reward=2.00 +/- 7.82
Episode length: 893.80 +/- 428.74
----------------------------------
| eval/               |          |
|    mean_ep_length   | 894      |
|    mean_reward      | 2        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3110000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0171   |

Eval num_timesteps=3160000, episode_reward=-6.00 +/- 1.90
Episode length: 1180.40 +/- 95.16
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.18e+03 |
|    mean_reward      | -6       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3160000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0159   |
|    n_updates        | 764999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.23e+03 |
|    ep_rew_mean      | -4.28    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4756     |
|    fps              | 358      |
|    time_elapsed     | 8833     |
|    total_timesteps  | 3164540  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0266   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | -4.69    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4796     |
|    fps              | 357      |
|    time_elapsed     | 8987     |
|    total_timesteps  | 3213809  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0124   |
|    n_updates        | 778452   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | -4.68    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4800     |
|    fps              | 357      |
|    time_elapsed     | 8999     |
|    total_timesteps  | 3218654  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0195   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.22e+03 |
|    ep_rew_mean      | -4.58    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4840     |
|    fps              | 356      |
|    time_elapsed     | 9154     |
|    total_timesteps  | 3267442  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0115   |
|    n_updates        | 791860   |
----------------------------------
Eval num_timesteps=3270000, episode_reward=-0.20 +/- 7.08
Episode length: 1149.20 +/- 150.66
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.15e+03 |
|    mean_reward      | -0.2     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3270000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0116  

Eval num_timesteps=3320000, episode_reward=-4.00 +/- 8.12
Episode length: 966.00 +/- 381.72
----------------------------------
| eval/               |          |
|    mean_ep_length   | 966      |
|    mean_reward      | -4       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3320000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0159   |
|    n_updates        | 804999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | -3.48    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4884     |
|    fps              | 355      |
|    time_elapsed     | 9336     |
|    total_timesteps  | 3322620  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0216   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | -2.83    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4924     |
|    fps              | 355      |
|    time_elapsed     | 9491     |
|    total_timesteps  | 3372532  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0137   |
|    n_updates        | 818132   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | -2.51    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4928     |
|    fps              | 355      |
|    time_elapsed     | 9501     |
|    total_timesteps  | 3377158  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00734  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | -2.09    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 4968     |
|    fps              | 354      |
|    time_elapsed     | 9661     |
|    total_timesteps  | 3428600  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00549  |
|    n_updates        | 832149   |
----------------------------------
Eval num_timesteps=3430000, episode_reward=-3.60 +/- 7.50
Episode length: 1017.80 +/- 262.54
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | -3.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3430000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00573 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | -1.36    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5008     |
|    fps              | 354      |
|    time_elapsed     | 9833     |
|    total_timesteps  | 3481328  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0097   |
|    n_updates        | 845331   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | -1.73    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5012     |
|    fps              | 354      |
|    time_elapsed     | 9844     |
|    total_timesteps  | 3485949  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.016    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | -1.66    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5052     |
|    fps              | 353      |
|    time_elapsed     | 10004    |
|    total_timesteps  | 3535525  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00623  |
|    n_updates        | 858881   |
----------------------------------
Eval num_timesteps=3540000, episode_reward=-0.80 +/- 7.88
Episode length: 1020.40 +/- 302.26
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | -0.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3540000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0232  

Eval num_timesteps=3590000, episode_reward=-5.80 +/- 6.73
Episode length: 1114.20 +/- 270.53
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.11e+03 |
|    mean_reward      | -5.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3590000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00713  |
|    n_updates        | 872499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | -2.08    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5096     |
|    fps              | 352      |
|    time_elapsed     | 10187    |
|    total_timesteps  | 3592258  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0091  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | -1.91    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5136     |
|    fps              | 352      |
|    time_elapsed     | 10353    |
|    total_timesteps  | 3645438  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0117   |
|    n_updates        | 886359   |
----------------------------------
Eval num_timesteps=3650000, episode_reward=1.60 +/- 3.77
Episode length: 1056.20 +/- 342.17
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.06e+03 |
|    mean_reward      | 1.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3650000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00821  

Eval num_timesteps=3700000, episode_reward=0.40 +/- 3.77
Episode length: 1120.00 +/- 124.81
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.12e+03 |
|    mean_reward      | 0.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3700000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00552  |
|    n_updates        | 899999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | -0.31    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5180     |
|    fps              | 351      |
|    time_elapsed     | 10531    |
|    total_timesteps  | 3702188  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00641  

Eval num_timesteps=3750000, episode_reward=-1.60 +/- 3.88
Episode length: 1106.00 +/- 298.63
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.11e+03 |
|    mean_reward      | -1.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3750000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00539  |
|    n_updates        | 912499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 0.32     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5224     |
|    fps              | 351      |
|    time_elapsed     | 10693    |
|    total_timesteps  | 3754589  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0161  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.23e+03 |
|    ep_rew_mean      | -0.32    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5264     |
|    fps              | 350      |
|    time_elapsed     | 10852    |
|    total_timesteps  | 3804578  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0255   |
|    n_updates        | 926144   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.23e+03 |
|    ep_rew_mean      | -0.48    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5268     |
|    fps              | 350      |
|    time_elapsed     | 10863    |
|    total_timesteps  | 3809648  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0135   |
|    n_updates      

Eval num_timesteps=3860000, episode_reward=-0.80 +/- 4.87
Episode length: 1255.00 +/- 168.38
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.26e+03 |
|    mean_reward      | -0.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3860000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00401  |
|    n_updates        | 939999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | -1.8     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5308     |
|    fps              | 349      |
|    time_elapsed     | 11039    |
|    total_timesteps  | 3862140  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0053  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | -1.34    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5348     |
|    fps              | 349      |
|    time_elapsed     | 11200    |
|    total_timesteps  | 3912234  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0102   |
|    n_updates        | 953058   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | -1.19    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5352     |
|    fps              | 349      |
|    time_elapsed     | 11210    |
|    total_timesteps  | 3916471  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0143   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | -0.59    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5392     |
|    fps              | 348      |
|    time_elapsed     | 11370    |
|    total_timesteps  | 3967133  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00889  |
|    n_updates        | 966783   |
----------------------------------
Eval num_timesteps=3970000, episode_reward=-3.20 +/- 4.02
Episode length: 1193.60 +/- 261.63
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.19e+03 |
|    mean_reward      | -3.2     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 3970000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0126  

Eval num_timesteps=4020000, episode_reward=1.40 +/- 4.84
Episode length: 1261.40 +/- 138.72
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.26e+03 |
|    mean_reward      | 1.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4020000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00706  |
|    n_updates        | 979999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 0.29     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5436     |
|    fps              | 348      |
|    time_elapsed     | 11551    |
|    total_timesteps  | 4021913  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00893  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.25e+03 |
|    ep_rew_mean      | 0.35     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5476     |
|    fps              | 347      |
|    time_elapsed     | 11709    |
|    total_timesteps  | 4072254  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00696  |
|    n_updates        | 993063   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | 0.14     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5480     |
|    fps              | 347      |
|    time_elapsed     | 11720    |
|    total_timesteps  | 4076637  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0161   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | -0.96    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5520     |
|    fps              | 347      |
|    time_elapsed     | 11883    |
|    total_timesteps  | 4128936  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00945  |
|    n_updates        | 1007233  |
----------------------------------
Eval num_timesteps=4130000, episode_reward=-0.20 +/- 3.97
Episode length: 1034.40 +/- 431.21
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.03e+03 |
|    mean_reward      | -0.2     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4130000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00927 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | -1.99    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5560     |
|    fps              | 346      |
|    time_elapsed     | 12050    |
|    total_timesteps  | 4181262  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00849  |
|    n_updates        | 1020315  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | -2.02    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5564     |
|    fps              | 347      |
|    time_elapsed     | 12062    |
|    total_timesteps  | 4186301  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00493  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | -1.64    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5604     |
|    fps              | 346      |
|    time_elapsed     | 12225    |
|    total_timesteps  | 4237746  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0176   |
|    n_updates        | 1034436  |
----------------------------------
Eval num_timesteps=4240000, episode_reward=-5.40 +/- 4.27
Episode length: 1027.40 +/- 513.48
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.03e+03 |
|    mean_reward      | -5.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4240000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00851 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | -1.48    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5644     |
|    fps              | 345      |
|    time_elapsed     | 12403    |
|    total_timesteps  | 4291415  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0081   |
|    n_updates        | 1047853  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | -1.65    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5648     |
|    fps              | 346      |
|    time_elapsed     | 12415    |
|    total_timesteps  | 4296799  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0109   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | -1.01    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5688     |
|    fps              | 345      |
|    time_elapsed     | 12576    |
|    total_timesteps  | 4347819  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00648  |
|    n_updates        | 1061954  |
----------------------------------
Eval num_timesteps=4350000, episode_reward=-5.80 +/- 6.85
Episode length: 1016.40 +/- 349.12
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | -5.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4350000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00671 

Eval num_timesteps=4400000, episode_reward=0.80 +/- 9.99
Episode length: 902.00 +/- 342.63
----------------------------------
| eval/               |          |
|    mean_ep_length   | 902      |
|    mean_reward      | 0.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4400000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00929  |
|    n_updates        | 1074999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | -0.65    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5732     |
|    fps              | 345      |
|    time_elapsed     | 12756    |
|    total_timesteps  | 4404621  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00269  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | -1.19    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5772     |
|    fps              | 344      |
|    time_elapsed     | 12921    |
|    total_timesteps  | 4456379  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00907  |
|    n_updates        | 1089094  |
----------------------------------
Eval num_timesteps=4460000, episode_reward=1.40 +/- 8.04
Episode length: 1021.80 +/- 314.01
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | 1.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4460000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0108   

Eval num_timesteps=4510000, episode_reward=-1.80 +/- 4.66
Episode length: 1112.20 +/- 267.99
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.11e+03 |
|    mean_reward      | -1.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4510000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0122   |
|    n_updates        | 1102499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | -1.56    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5816     |
|    fps              | 344      |
|    time_elapsed     | 13106    |
|    total_timesteps  | 4514861  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00451 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | -1.6     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5856     |
|    fps              | 344      |
|    time_elapsed     | 13267    |
|    total_timesteps  | 4566955  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0347   |
|    n_updates        | 1116738  |
----------------------------------
Eval num_timesteps=4570000, episode_reward=4.00 +/- 2.00
Episode length: 1123.60 +/- 362.39
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.12e+03 |
|    mean_reward      | 4        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4570000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0034   

Eval num_timesteps=4620000, episode_reward=4.00 +/- 6.57
Episode length: 1161.80 +/- 255.30
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.16e+03 |
|    mean_reward      | 4        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4620000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0219   |
|    n_updates        | 1129999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 0.05     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5900     |
|    fps              | 343      |
|    time_elapsed     | 13448    |
|    total_timesteps  | 4621925  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00326  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | -0.08    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5940     |
|    fps              | 343      |
|    time_elapsed     | 13608    |
|    total_timesteps  | 4673532  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00297  |
|    n_updates        | 1143382  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | -0.01    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5944     |
|    fps              | 343      |
|    time_elapsed     | 13620    |
|    total_timesteps  | 4678722  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0189   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | 0.13     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 5984     |
|    fps              | 343      |
|    time_elapsed     | 13778    |
|    total_timesteps  | 4726775  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00524  |
|    n_updates        | 1156693  |
----------------------------------
Eval num_timesteps=4730000, episode_reward=0.00 +/- 8.76
Episode length: 935.00 +/- 347.03
----------------------------------
| eval/               |          |
|    mean_ep_length   | 935      |
|    mean_reward      | 0        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4730000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0115   |

Eval num_timesteps=4780000, episode_reward=-1.00 +/- 5.90
Episode length: 1173.60 +/- 88.27
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.17e+03 |
|    mean_reward      | -1       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4780000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0124   |
|    n_updates        | 1169999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | -0.9     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6028     |
|    fps              | 342      |
|    time_elapsed     | 13960    |
|    total_timesteps  | 4783880  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00775  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | -1.07    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6068     |
|    fps              | 342      |
|    time_elapsed     | 14126    |
|    total_timesteps  | 4835869  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00988  |
|    n_updates        | 1183967  |
----------------------------------
Eval num_timesteps=4840000, episode_reward=1.80 +/- 5.49
Episode length: 1263.40 +/- 250.69
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.26e+03 |
|    mean_reward      | 1.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4840000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.012    

Eval num_timesteps=4890000, episode_reward=-2.60 +/- 6.80
Episode length: 1209.00 +/- 263.48
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.21e+03 |
|    mean_reward      | -2.6     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 4890000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00991  |
|    n_updates        | 1197499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | -1.15    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6112     |
|    fps              | 341      |
|    time_elapsed     | 14312    |
|    total_timesteps  | 4892474  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0188  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 0.59     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6152     |
|    fps              | 341      |
|    time_elapsed     | 14476    |
|    total_timesteps  | 4944854  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00686  |
|    n_updates        | 1211213  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 0.72     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6156     |
|    fps              | 341      |
|    time_elapsed     | 14487    |
|    total_timesteps  | 4949622  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00745  |
|    n_updates      

Eval num_timesteps=5000000, episode_reward=4.20 +/- 6.31
Episode length: 1239.00 +/- 157.34
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.24e+03 |
|    mean_reward      | 4.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5000000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00939  |
|    n_updates        | 1224999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 1.36     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6196     |
|    fps              | 341      |
|    time_elapsed     | 14665    |
|    total_timesteps  | 5002752  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00514  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 1.37     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6236     |
|    fps              | 340      |
|    time_elapsed     | 14826    |
|    total_timesteps  | 5053666  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0147   |
|    n_updates        | 1238416  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 1.17     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6240     |
|    fps              | 340      |
|    time_elapsed     | 14838    |
|    total_timesteps  | 5058703  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00826  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 0.38     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6280     |
|    fps              | 340      |
|    time_elapsed     | 14994    |
|    total_timesteps  | 5108382  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00247  |
|    n_updates        | 1252095  |
----------------------------------
Eval num_timesteps=5110000, episode_reward=0.60 +/- 5.04
Episode length: 1262.60 +/- 176.05
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.26e+03 |
|    mean_reward      | 0.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5110000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0153   

Eval num_timesteps=5160000, episode_reward=-2.20 +/- 5.31
Episode length: 1021.80 +/- 505.44
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | -2.2     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5160000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0068   |
|    n_updates        | 1264999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | 0.59     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6324     |
|    fps              | 340      |
|    time_elapsed     | 15174    |
|    total_timesteps  | 5164200  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0176  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 0.2      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6364     |
|    fps              | 340      |
|    time_elapsed     | 15334    |
|    total_timesteps  | 5215927  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0213   |
|    n_updates        | 1278981  |
----------------------------------
Eval num_timesteps=5220000, episode_reward=3.00 +/- 7.54
Episode length: 1014.20 +/- 220.50
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.01e+03 |
|    mean_reward      | 3        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5220000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0298   

Eval num_timesteps=5270000, episode_reward=4.80 +/- 8.66
Episode length: 1258.60 +/- 190.36
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.26e+03 |
|    mean_reward      | 4.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5270000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00411  |
|    n_updates        | 1292499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 0.39     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6408     |
|    fps              | 339      |
|    time_elapsed     | 15521    |
|    total_timesteps  | 5273581  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00353  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | -0.02    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6448     |
|    fps              | 339      |
|    time_elapsed     | 15686    |
|    total_timesteps  | 5326335  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0118   |
|    n_updates        | 1306583  |
----------------------------------
Eval num_timesteps=5330000, episode_reward=-2.00 +/- 6.26
Episode length: 1069.80 +/- 456.80
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.07e+03 |
|    mean_reward      | -2       |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5330000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00879 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | -0.49    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6488     |
|    fps              | 339      |
|    time_elapsed     | 15860    |
|    total_timesteps  | 5381083  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00961  |
|    n_updates        | 1320270  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | -0.83    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6492     |
|    fps              | 339      |
|    time_elapsed     | 15872    |
|    total_timesteps  | 5386131  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00787  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | -1.44    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6532     |
|    fps              | 339      |
|    time_elapsed     | 16035    |
|    total_timesteps  | 5438201  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0066   |
|    n_updates        | 1334550  |
----------------------------------
Eval num_timesteps=5440000, episode_reward=2.60 +/- 6.15
Episode length: 1135.40 +/- 226.02
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.14e+03 |
|    mean_reward      | 2.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5440000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00706  

Eval num_timesteps=5490000, episode_reward=-1.80 +/- 6.05
Episode length: 1227.60 +/- 364.22
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.23e+03 |
|    mean_reward      | -1.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5490000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00296  |
|    n_updates        | 1347499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | -0.57    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6576     |
|    fps              | 338      |
|    time_elapsed     | 16217    |
|    total_timesteps  | 5493455  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0106  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | -0.06    |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6616     |
|    fps              | 338      |
|    time_elapsed     | 16381    |
|    total_timesteps  | 5546081  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0293   |
|    n_updates        | 1361520  |
----------------------------------
Eval num_timesteps=5550000, episode_reward=0.40 +/- 8.71
Episode length: 1061.00 +/- 435.61
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.06e+03 |
|    mean_reward      | 0.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5550000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00484  

Eval num_timesteps=5600000, episode_reward=5.80 +/- 7.08
Episode length: 1269.80 +/- 243.67
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.27e+03 |
|    mean_reward      | 5.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5600000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00984  |
|    n_updates        | 1374999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 0.32     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6660     |
|    fps              | 338      |
|    time_elapsed     | 16567    |
|    total_timesteps  | 5603715  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00971  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 2.1      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6700     |
|    fps              | 338      |
|    time_elapsed     | 16735    |
|    total_timesteps  | 5658116  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0268   |
|    n_updates        | 1389528  |
----------------------------------
Eval num_timesteps=5660000, episode_reward=8.20 +/- 5.31
Episode length: 1119.40 +/- 261.84
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.12e+03 |
|    mean_reward      | 8.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5660000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0148   

Eval num_timesteps=5710000, episode_reward=3.40 +/- 6.41
Episode length: 1250.80 +/- 353.47
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.25e+03 |
|    mean_reward      | 3.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5710000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00533  |
|    n_updates        | 1402499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 3.36     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6744     |
|    fps              | 337      |
|    time_elapsed     | 16919    |
|    total_timesteps  | 5713841  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0126   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 2.78     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6784     |
|    fps              | 337      |
|    time_elapsed     | 17085    |
|    total_timesteps  | 5767515  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0111   |
|    n_updates        | 1416878  |
----------------------------------
Eval num_timesteps=5770000, episode_reward=2.40 +/- 2.65
Episode length: 1087.00 +/- 459.48
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.09e+03 |
|    mean_reward      | 2.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5770000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0161   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 2.13     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6824     |
|    fps              | 337      |
|    time_elapsed     | 17265    |
|    total_timesteps  | 5823937  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00592  |
|    n_updates        | 1430984  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | 2.04     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6828     |
|    fps              | 337      |
|    time_elapsed     | 17278    |
|    total_timesteps  | 5829394  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0189   |
|    n_updates      

Eval num_timesteps=5880000, episode_reward=0.00 +/- 3.74
Episode length: 1283.60 +/- 278.51
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.28e+03 |
|    mean_reward      | 0        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5880000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00969  |
|    n_updates        | 1444999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | 2.42     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6868     |
|    fps              | 337      |
|    time_elapsed     | 17450    |
|    total_timesteps  | 5881353  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0307   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 2.13     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6908     |
|    fps              | 336      |
|    time_elapsed     | 17616    |
|    total_timesteps  | 5934700  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00977  |
|    n_updates        | 1458674  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 1.84     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6912     |
|    fps              | 336      |
|    time_elapsed     | 17628    |
|    total_timesteps  | 5939741  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0067   |
|    n_updates      

Eval num_timesteps=5990000, episode_reward=1.40 +/- 6.59
Episode length: 1139.40 +/- 156.60
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.14e+03 |
|    mean_reward      | 1.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 5990000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0072   |
|    n_updates        | 1472499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 0.64     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6952     |
|    fps              | 336      |
|    time_elapsed     | 17805    |
|    total_timesteps  | 5993591  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00817  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 1.59     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 6992     |
|    fps              | 336      |
|    time_elapsed     | 17979    |
|    total_timesteps  | 6046278  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0061   |
|    n_updates        | 1486569  |
----------------------------------
Eval num_timesteps=6050000, episode_reward=6.00 +/- 4.56
Episode length: 1152.60 +/- 100.66
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.15e+03 |
|    mean_reward      | 6        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6050000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0162   

Eval num_timesteps=6100000, episode_reward=1.60 +/- 3.38
Episode length: 1077.40 +/- 396.79
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.08e+03 |
|    mean_reward      | 1.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6100000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0163   |
|    n_updates        | 1499999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 1.59     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7036     |
|    fps              | 335      |
|    time_elapsed     | 18166    |
|    total_timesteps  | 6102335  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00573  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 1.26     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7076     |
|    fps              | 335      |
|    time_elapsed     | 18332    |
|    total_timesteps  | 6156409  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0157   |
|    n_updates        | 1514102  |
----------------------------------
Eval num_timesteps=6160000, episode_reward=-1.80 +/- 2.64
Episode length: 1116.00 +/- 486.81
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.12e+03 |
|    mean_reward      | -1.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6160000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0277  

Eval num_timesteps=6210000, episode_reward=8.20 +/- 3.06
Episode length: 1195.80 +/- 172.02
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.2e+03  |
|    mean_reward      | 8.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6210000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.017    |
|    n_updates        | 1527499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 1.18     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7120     |
|    fps              | 335      |
|    time_elapsed     | 18524    |
|    total_timesteps  | 6213935  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00643  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 1.7      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7160     |
|    fps              | 335      |
|    time_elapsed     | 18691    |
|    total_timesteps  | 6268274  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0106   |
|    n_updates        | 1542068  |
----------------------------------
Eval num_timesteps=6270000, episode_reward=2.80 +/- 5.49
Episode length: 1227.20 +/- 232.76
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.23e+03 |
|    mean_reward      | 2.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6270000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0255   

Eval num_timesteps=6320000, episode_reward=2.40 +/- 7.81
Episode length: 1233.40 +/- 134.39
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.23e+03 |
|    mean_reward      | 2.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6320000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0109   |
|    n_updates        | 1554999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 1.42     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7204     |
|    fps              | 335      |
|    time_elapsed     | 18875    |
|    total_timesteps  | 6323883  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00149  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 1.38     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7244     |
|    fps              | 334      |
|    time_elapsed     | 19047    |
|    total_timesteps  | 6379290  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0116   |
|    n_updates        | 1569822  |
----------------------------------
Eval num_timesteps=6380000, episode_reward=1.00 +/- 5.02
Episode length: 1159.60 +/- 429.48
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.16e+03 |
|    mean_reward      | 1        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6380000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0149   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 2.6      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7284     |
|    fps              | 334      |
|    time_elapsed     | 19226    |
|    total_timesteps  | 6432749  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00623  |
|    n_updates        | 1583187  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 2.55     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7288     |
|    fps              | 334      |
|    time_elapsed     | 19236    |
|    total_timesteps  | 6437174  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00704  |
|    n_updates      

Eval num_timesteps=6490000, episode_reward=1.00 +/- 7.27
Episode length: 1162.60 +/- 406.03
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.16e+03 |
|    mean_reward      | 1        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6490000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0235   |
|    n_updates        | 1597499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 3.67     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7328     |
|    fps              | 334      |
|    time_elapsed     | 19413    |
|    total_timesteps  | 6491239  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00474  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 3.27     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7368     |
|    fps              | 334      |
|    time_elapsed     | 19576    |
|    total_timesteps  | 6542564  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0153   |
|    n_updates        | 1610640  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 3.04     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7372     |
|    fps              | 334      |
|    time_elapsed     | 19588    |
|    total_timesteps  | 6547625  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00355  |
|    n_updates      

Eval num_timesteps=6600000, episode_reward=4.60 +/- 5.92
Episode length: 1100.80 +/- 185.32
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.1e+03  |
|    mean_reward      | 4.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6600000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0185   |
|    n_updates        | 1624999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 1.24     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7412     |
|    fps              | 334      |
|    time_elapsed     | 19762    |
|    total_timesteps  | 6601083  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00409  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 0.99     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7452     |
|    fps              | 333      |
|    time_elapsed     | 19925    |
|    total_timesteps  | 6653753  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0121   |
|    n_updates        | 1638438  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 0.57     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7456     |
|    fps              | 333      |
|    time_elapsed     | 19937    |
|    total_timesteps  | 6658801  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00668  |
|    n_updates      

Eval num_timesteps=6710000, episode_reward=3.60 +/- 5.54
Episode length: 1018.60 +/- 447.25
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | 3.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6710000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.016    |
|    n_updates        | 1652499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 0.61     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7496     |
|    fps              | 333      |
|    time_elapsed     | 20112    |
|    total_timesteps  | 6712694  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0379   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 0.42     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7536     |
|    fps              | 333      |
|    time_elapsed     | 20274    |
|    total_timesteps  | 6764145  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0213   |
|    n_updates        | 1666036  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 0.54     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7540     |
|    fps              | 333      |
|    time_elapsed     | 20286    |
|    total_timesteps  | 6769359  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00832  |
|    n_updates      

Eval num_timesteps=6820000, episode_reward=6.40 +/- 3.61
Episode length: 1201.20 +/- 185.49
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.2e+03  |
|    mean_reward      | 6.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6820000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0103   |
|    n_updates        | 1679999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 1.73     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7580     |
|    fps              | 333      |
|    time_elapsed     | 20460    |
|    total_timesteps  | 6822448  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00779  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 2.36     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7620     |
|    fps              | 333      |
|    time_elapsed     | 20627    |
|    total_timesteps  | 6875950  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00355  |
|    n_updates        | 1693987  |
----------------------------------
Eval num_timesteps=6880000, episode_reward=3.60 +/- 7.17
Episode length: 1100.00 +/- 240.49
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.1e+03  |
|    mean_reward      | 3.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6880000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0167   

Eval num_timesteps=6930000, episode_reward=3.00 +/- 4.24
Episode length: 1155.60 +/- 238.20
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.16e+03 |
|    mean_reward      | 3        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6930000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0036   |
|    n_updates        | 1707499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 2.63     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7664     |
|    fps              | 333      |
|    time_elapsed     | 20811    |
|    total_timesteps  | 6932726  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0188   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 2.66     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7704     |
|    fps              | 333      |
|    time_elapsed     | 20978    |
|    total_timesteps  | 6987338  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00605  |
|    n_updates        | 1721834  |
----------------------------------
Eval num_timesteps=6990000, episode_reward=4.40 +/- 5.46
Episode length: 1277.40 +/- 218.38
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.28e+03 |
|    mean_reward      | 4.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 6990000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00469  

Eval num_timesteps=7040000, episode_reward=7.40 +/- 3.20
Episode length: 977.80 +/- 290.22
----------------------------------
| eval/               |          |
|    mean_ep_length   | 978      |
|    mean_reward      | 7.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7040000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00653  |
|    n_updates        | 1734999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 2.53     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7748     |
|    fps              | 332      |
|    time_elapsed     | 21160    |
|    total_timesteps  | 7043308  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0141   |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 2.03     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7788     |
|    fps              | 332      |
|    time_elapsed     | 21330    |
|    total_timesteps  | 7097395  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00648  |
|    n_updates        | 1749348  |
----------------------------------
Eval num_timesteps=7100000, episode_reward=6.20 +/- 5.60
Episode length: 1115.60 +/- 361.78
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.12e+03 |
|    mean_reward      | 6.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7100000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00855  

Eval num_timesteps=7150000, episode_reward=1.00 +/- 5.90
Episode length: 1190.80 +/- 121.36
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.19e+03 |
|    mean_reward      | 1        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7150000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00944  |
|    n_updates        | 1762499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 1.69     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7832     |
|    fps              | 332      |
|    time_elapsed     | 21513    |
|    total_timesteps  | 7153203  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00516  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 2.24     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7872     |
|    fps              | 332      |
|    time_elapsed     | 21682    |
|    total_timesteps  | 7207831  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00714  |
|    n_updates        | 1776957  |
----------------------------------
Eval num_timesteps=7210000, episode_reward=-2.40 +/- 6.50
Episode length: 1129.20 +/- 271.21
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.13e+03 |
|    mean_reward      | -2.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7210000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00995 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | 1.5      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7912     |
|    fps              | 332      |
|    time_elapsed     | 21860    |
|    total_timesteps  | 7263997  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0178   |
|    n_updates        | 1790999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | 1.61     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7916     |
|    fps              | 332      |
|    time_elapsed     | 21874    |
|    total_timesteps  | 7269211  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00561  |
|    n_updates      

Eval num_timesteps=7320000, episode_reward=2.80 +/- 8.01
Episode length: 921.40 +/- 466.05
----------------------------------
| eval/               |          |
|    mean_ep_length   | 921      |
|    mean_reward      | 2.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7320000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0093   |
|    n_updates        | 1804999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.39e+03 |
|    ep_rew_mean      | 1.64     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7956     |
|    fps              | 332      |
|    time_elapsed     | 22050    |
|    total_timesteps  | 7325260  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00368  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 1.79     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 7996     |
|    fps              | 332      |
|    time_elapsed     | 22212    |
|    total_timesteps  | 7375500  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00347  |
|    n_updates        | 1818874  |
----------------------------------
Eval num_timesteps=7380000, episode_reward=5.20 +/- 4.83
Episode length: 1071.00 +/- 382.28
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.07e+03 |
|    mean_reward      | 5.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7380000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0129   

Eval num_timesteps=7430000, episode_reward=7.40 +/- 3.77
Episode length: 983.00 +/- 363.89
----------------------------------
| eval/               |          |
|    mean_ep_length   | 983      |
|    mean_reward      | 7.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7430000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0201   |
|    n_updates        | 1832499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 3.06     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8040     |
|    fps              | 331      |
|    time_elapsed     | 22400    |
|    total_timesteps  | 7433611  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00578  |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.27e+03 |
|    ep_rew_mean      | 3.21     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8080     |
|    fps              | 331      |
|    time_elapsed     | 22560    |
|    total_timesteps  | 7483678  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00189  |
|    n_updates        | 1845919  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 3.02     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8084     |
|    fps              | 331      |
|    time_elapsed     | 22573    |
|    total_timesteps  | 7489215  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00469  |
|    n_updates      

Eval num_timesteps=7540000, episode_reward=1.40 +/- 6.95
Episode length: 1194.20 +/- 191.06
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.19e+03 |
|    mean_reward      | 1.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7540000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00469  |
|    n_updates        | 1859999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 2.8      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8124     |
|    fps              | 331      |
|    time_elapsed     | 22741    |
|    total_timesteps  | 7540959  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0163   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 2.76     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8164     |
|    fps              | 331      |
|    time_elapsed     | 22906    |
|    total_timesteps  | 7592191  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00924  |
|    n_updates        | 1873047  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 2.75     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8168     |
|    fps              | 331      |
|    time_elapsed     | 22917    |
|    total_timesteps  | 7596979  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00752  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 1.46     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8208     |
|    fps              | 331      |
|    time_elapsed     | 23080    |
|    total_timesteps  | 7649026  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.013    |
|    n_updates        | 1887256  |
----------------------------------
Eval num_timesteps=7650000, episode_reward=4.20 +/- 5.98
Episode length: 1065.20 +/- 527.96
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.07e+03 |
|    mean_reward      | 4.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7650000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0319   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 1.66     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8248     |
|    fps              | 331      |
|    time_elapsed     | 23255    |
|    total_timesteps  | 7702535  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00831  |
|    n_updates        | 1900633  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 1.35     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8252     |
|    fps              | 331      |
|    time_elapsed     | 23267    |
|    total_timesteps  | 7707547  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00759  |
|    n_updates      

Eval num_timesteps=7760000, episode_reward=1.80 +/- 5.56
Episode length: 1221.40 +/- 479.12
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.22e+03 |
|    mean_reward      | 1.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7760000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0173   |
|    n_updates        | 1914999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 2.82     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8292     |
|    fps              | 331      |
|    time_elapsed     | 23446    |
|    total_timesteps  | 7761478  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0117   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | 3.63     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8332     |
|    fps              | 330      |
|    time_elapsed     | 23612    |
|    total_timesteps  | 7814441  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00468  |
|    n_updates        | 1928610  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 3.62     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8336     |
|    fps              | 330      |
|    time_elapsed     | 23624    |
|    total_timesteps  | 7819516  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00711  |
|    n_updates      

Eval num_timesteps=7870000, episode_reward=7.20 +/- 2.93
Episode length: 1129.00 +/- 310.27
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.13e+03 |
|    mean_reward      | 7.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7870000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00953  |
|    n_updates        | 1942499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 3.15     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8376     |
|    fps              | 330      |
|    time_elapsed     | 23801    |
|    total_timesteps  | 7872757  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00268  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 2.4      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8416     |
|    fps              | 330      |
|    time_elapsed     | 23968    |
|    total_timesteps  | 7925973  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0085   |
|    n_updates        | 1956493  |
----------------------------------
Eval num_timesteps=7930000, episode_reward=5.00 +/- 5.22
Episode length: 1146.40 +/- 264.99
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.15e+03 |
|    mean_reward      | 5        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7930000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0112   

Eval num_timesteps=7980000, episode_reward=5.60 +/- 7.79
Episode length: 1112.40 +/- 135.38
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.11e+03 |
|    mean_reward      | 5.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 7980000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00144  |
|    n_updates        | 1969999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 1.71     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8460     |
|    fps              | 330      |
|    time_elapsed     | 24150    |
|    total_timesteps  | 7983284  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0114   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 1.71     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8500     |
|    fps              | 330      |
|    time_elapsed     | 24315    |
|    total_timesteps  | 8035802  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0135   |
|    n_updates        | 1983950  |
----------------------------------
Eval num_timesteps=8040000, episode_reward=3.20 +/- 4.83
Episode length: 1180.00 +/- 156.79
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.18e+03 |
|    mean_reward      | 3.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8040000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0102   

Eval num_timesteps=8090000, episode_reward=6.60 +/- 2.42
Episode length: 976.00 +/- 367.08
----------------------------------
| eval/               |          |
|    mean_ep_length   | 976      |
|    mean_reward      | 6.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8090000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00507  |
|    n_updates        | 1997499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 2.21     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8544     |
|    fps              | 330      |
|    time_elapsed     | 24502    |
|    total_timesteps  | 8093894  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0194   |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 3.07     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8584     |
|    fps              | 330      |
|    time_elapsed     | 24664    |
|    total_timesteps  | 8144549  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00497  |
|    n_updates        | 2011137  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+03 |
|    ep_rew_mean      | 2.83     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8588     |
|    fps              | 330      |
|    time_elapsed     | 24676    |
|    total_timesteps  | 8149707  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00571  |
|    n_updates      

Eval num_timesteps=8200000, episode_reward=2.00 +/- 6.54
Episode length: 1040.60 +/- 363.33
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.04e+03 |
|    mean_reward      | 2        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8200000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00633  |
|    n_updates        | 2024999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 3.19     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8628     |
|    fps              | 330      |
|    time_elapsed     | 24855    |
|    total_timesteps  | 8204723  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.004    

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 3.77     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8668     |
|    fps              | 330      |
|    time_elapsed     | 25019    |
|    total_timesteps  | 8256351  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00947  |
|    n_updates        | 2039087  |
----------------------------------
Eval num_timesteps=8260000, episode_reward=4.80 +/- 3.06
Episode length: 1316.40 +/- 174.46
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.32e+03 |
|    mean_reward      | 4.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8260000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00764  

Eval num_timesteps=8310000, episode_reward=2.00 +/- 7.24
Episode length: 1054.00 +/- 398.58
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.05e+03 |
|    mean_reward      | 2        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8310000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0281   |
|    n_updates        | 2052499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 3.9      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8712     |
|    fps              | 329      |
|    time_elapsed     | 25201    |
|    total_timesteps  | 8312483  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0122   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 2.47     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8752     |
|    fps              | 329      |
|    time_elapsed     | 25365    |
|    total_timesteps  | 8365236  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00545  |
|    n_updates        | 2066308  |
----------------------------------
Eval num_timesteps=8370000, episode_reward=6.80 +/- 2.40
Episode length: 1116.00 +/- 222.29
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.12e+03 |
|    mean_reward      | 6.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8370000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0107   

Eval num_timesteps=8420000, episode_reward=1.20 +/- 4.75
Episode length: 1013.20 +/- 516.73
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.01e+03 |
|    mean_reward      | 1.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8420000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00567  |
|    n_updates        | 2079999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 2.89     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8796     |
|    fps              | 329      |
|    time_elapsed     | 25550    |
|    total_timesteps  | 8423637  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00741  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 3.25     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8836     |
|    fps              | 329      |
|    time_elapsed     | 25714    |
|    total_timesteps  | 8474069  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.02     |
|    n_updates        | 2093517  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 3.32     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8840     |
|    fps              | 329      |
|    time_elapsed     | 25725    |
|    total_timesteps  | 8478770  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00444  |
|    n_updates      

Eval num_timesteps=8530000, episode_reward=3.00 +/- 8.56
Episode length: 1085.60 +/- 169.23
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.09e+03 |
|    mean_reward      | 3        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8530000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00614  |
|    n_updates        | 2107499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 2.8      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8880     |
|    fps              | 329      |
|    time_elapsed     | 25901    |
|    total_timesteps  | 8532561  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00441  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 2.21     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8920     |
|    fps              | 329      |
|    time_elapsed     | 26069    |
|    total_timesteps  | 8586751  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0167   |
|    n_updates        | 2121687  |
----------------------------------
Eval num_timesteps=8590000, episode_reward=3.80 +/- 5.91
Episode length: 1143.60 +/- 262.55
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.14e+03 |
|    mean_reward      | 3.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8590000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0126   

Eval num_timesteps=8640000, episode_reward=4.00 +/- 7.40
Episode length: 1109.00 +/- 246.53
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.11e+03 |
|    mean_reward      | 4        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8640000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0142   |
|    n_updates        | 2134999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 2.16     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 8964     |
|    fps              | 329      |
|    time_elapsed     | 26259    |
|    total_timesteps  | 8645079  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00241  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 3.6      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9004     |
|    fps              | 329      |
|    time_elapsed     | 26428    |
|    total_timesteps  | 8698611  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00547  |
|    n_updates        | 2149652  |
----------------------------------
Eval num_timesteps=8700000, episode_reward=4.00 +/- 4.86
Episode length: 1218.20 +/- 144.14
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.22e+03 |
|    mean_reward      | 4        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8700000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00507  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 3.9      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9044     |
|    fps              | 328      |
|    time_elapsed     | 26603    |
|    total_timesteps  | 8751063  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0112   |
|    n_updates        | 2162765  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 4.04     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9048     |
|    fps              | 328      |
|    time_elapsed     | 26614    |
|    total_timesteps  | 8755921  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0135   |
|    n_updates      

Eval num_timesteps=8810000, episode_reward=7.20 +/- 5.00
Episode length: 1007.40 +/- 336.63
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.01e+03 |
|    mean_reward      | 7.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8810000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0115   |
|    n_updates        | 2177499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 3.24     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9088     |
|    fps              | 328      |
|    time_elapsed     | 26796    |
|    total_timesteps  | 8812425  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0345   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | 3.83     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9128     |
|    fps              | 328      |
|    time_elapsed     | 26962    |
|    total_timesteps  | 8865102  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00644  |
|    n_updates        | 2191275  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 4.25     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9132     |
|    fps              | 328      |
|    time_elapsed     | 26972    |
|    total_timesteps  | 8869396  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0117   |
|    n_updates      

Eval num_timesteps=8920000, episode_reward=2.20 +/- 4.31
Episode length: 1363.20 +/- 97.09
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.36e+03 |
|    mean_reward      | 2.2      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8920000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00992  |
|    n_updates        | 2204999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 4.43     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9172     |
|    fps              | 328      |
|    time_elapsed     | 27147    |
|    total_timesteps  | 8921630  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0174   |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 4.45     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9212     |
|    fps              | 328      |
|    time_elapsed     | 27316    |
|    total_timesteps  | 8975486  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00442  |
|    n_updates        | 2218871  |
----------------------------------
Eval num_timesteps=8980000, episode_reward=3.60 +/- 3.83
Episode length: 1157.00 +/- 330.81
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.16e+03 |
|    mean_reward      | 3.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 8980000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0215   

Eval num_timesteps=9030000, episode_reward=7.80 +/- 3.43
Episode length: 1027.00 +/- 454.99
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.03e+03 |
|    mean_reward      | 7.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9030000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0063   |
|    n_updates        | 2232499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 3.5      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9256     |
|    fps              | 328      |
|    time_elapsed     | 27506    |
|    total_timesteps  | 9034703  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0135   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 4.5      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9296     |
|    fps              | 328      |
|    time_elapsed     | 27670    |
|    total_timesteps  | 9086383  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0177   |
|    n_updates        | 2246595  |
----------------------------------
Eval num_timesteps=9090000, episode_reward=-0.40 +/- 6.53
Episode length: 1088.40 +/- 509.73
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.09e+03 |
|    mean_reward      | -0.4     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9090000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00565 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 4.74     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9336     |
|    fps              | 328      |
|    time_elapsed     | 27850    |
|    total_timesteps  | 9142409  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0043   |
|    n_updates        | 2260602  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 4.76     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9340     |
|    fps              | 328      |
|    time_elapsed     | 27862    |
|    total_timesteps  | 9147398  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0117   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 4.24     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9380     |
|    fps              | 328      |
|    time_elapsed     | 28025    |
|    total_timesteps  | 9198907  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0114   |
|    n_updates        | 2274726  |
----------------------------------
Eval num_timesteps=9200000, episode_reward=5.60 +/- 5.68
Episode length: 1095.40 +/- 225.52
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.1e+03  |
|    mean_reward      | 5.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9200000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0117   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 3.63     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9420     |
|    fps              | 328      |
|    time_elapsed     | 28201    |
|    total_timesteps  | 9252555  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0306   |
|    n_updates        | 2288138  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 3.69     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9424     |
|    fps              | 328      |
|    time_elapsed     | 28211    |
|    total_timesteps  | 9257213  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00773  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 2.83     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9464     |
|    fps              | 328      |
|    time_elapsed     | 28372    |
|    total_timesteps  | 9308447  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0247   |
|    n_updates        | 2302111  |
----------------------------------
Eval num_timesteps=9310000, episode_reward=-0.80 +/- 5.42
Episode length: 1186.00 +/- 200.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.19e+03 |
|    mean_reward      | -0.8     |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9310000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00721 

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 3.53     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9504     |
|    fps              | 327      |
|    time_elapsed     | 28543    |
|    total_timesteps  | 9361255  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00475  |
|    n_updates        | 2315313  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 3.56     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9508     |
|    fps              | 328      |
|    time_elapsed     | 28555    |
|    total_timesteps  | 9366517  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00279  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 4.49     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9548     |
|    fps              | 327      |
|    time_elapsed     | 28719    |
|    total_timesteps  | 9419083  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00778  |
|    n_updates        | 2329770  |
----------------------------------
Eval num_timesteps=9420000, episode_reward=0.00 +/- 3.16
Episode length: 1218.20 +/- 439.88
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.22e+03 |
|    mean_reward      | 0        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9420000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00287  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | 4.62     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9588     |
|    fps              | 327      |
|    time_elapsed     | 28897    |
|    total_timesteps  | 9473871  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0124   |
|    n_updates        | 2343467  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | 4.34     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9592     |
|    fps              | 327      |
|    time_elapsed     | 28909    |
|    total_timesteps  | 9479141  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00164  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 4.26     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9632     |
|    fps              | 327      |
|    time_elapsed     | 29067    |
|    total_timesteps  | 9529412  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00842  |
|    n_updates        | 2357352  |
----------------------------------
Eval num_timesteps=9530000, episode_reward=0.40 +/- 2.42
Episode length: 1193.40 +/- 369.34
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.19e+03 |
|    mean_reward      | 0.4      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9530000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0125   

Eval num_timesteps=9580000, episode_reward=7.00 +/- 4.77
Episode length: 1098.00 +/- 255.76
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.1e+03  |
|    mean_reward      | 7        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9580000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0108   |
|    n_updates        | 2369999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 4.18     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9676     |
|    fps              | 327      |
|    time_elapsed     | 29246    |
|    total_timesteps  | 9585122  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00344  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 2.72     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9716     |
|    fps              | 327      |
|    time_elapsed     | 29411    |
|    total_timesteps  | 9637939  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0235   |
|    n_updates        | 2384484  |
----------------------------------
Eval num_timesteps=9640000, episode_reward=5.00 +/- 8.12
Episode length: 958.40 +/- 346.56
----------------------------------
| eval/               |          |
|    mean_ep_length   | 958      |
|    mean_reward      | 5        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9640000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0151   |

Eval num_timesteps=9690000, episode_reward=6.60 +/- 6.22
Episode length: 1021.80 +/- 243.38
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.02e+03 |
|    mean_reward      | 6.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9690000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0155   |
|    n_updates        | 2397499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 2.69     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9760     |
|    fps              | 327      |
|    time_elapsed     | 29596    |
|    total_timesteps  | 9695206  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0148   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 3.32     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9800     |
|    fps              | 327      |
|    time_elapsed     | 29759    |
|    total_timesteps  | 9747549  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00164  |
|    n_updates        | 2411887  |
----------------------------------
Eval num_timesteps=9750000, episode_reward=0.80 +/- 5.04
Episode length: 1347.40 +/- 182.31
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.35e+03 |
|    mean_reward      | 0.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9750000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0193   

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 3.52     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9840     |
|    fps              | 327      |
|    time_elapsed     | 29938    |
|    total_timesteps  | 9802711  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00577  |
|    n_updates        | 2425677  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 3.58     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9844     |
|    fps              | 327      |
|    time_elapsed     | 29950    |
|    total_timesteps  | 9807748  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.018    |
|    n_updates      

Eval num_timesteps=9860000, episode_reward=2.00 +/- 5.73
Episode length: 1075.40 +/- 543.83
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.08e+03 |
|    mean_reward      | 2        |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9860000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0271   |
|    n_updates        | 2439999  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.37e+03 |
|    ep_rew_mean      | 3.17     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9884     |
|    fps              | 327      |
|    time_elapsed     | 30128    |
|    total_timesteps  | 9863982  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00805  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | 3.26     |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9924     |
|    fps              | 327      |
|    time_elapsed     | 30291    |
|    total_timesteps  | 9915304  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00432  |
|    n_updates        | 2453825  |
----------------------------------
Eval num_timesteps=9920000, episode_reward=4.60 +/- 4.18
Episode length: 1062.40 +/- 470.61
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.06e+03 |
|    mean_reward      | 4.6      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9920000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0058   

Eval num_timesteps=9970000, episode_reward=5.80 +/- 5.84
Episode length: 1036.00 +/- 522.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.04e+03 |
|    mean_reward      | 5.8      |
| rollout/            |          |
|    exploration_rate | 0.01     |
| time/               |          |
|    total_timesteps  | 9970000  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00601  |
|    n_updates        | 2467499  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 3.7      |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 9968     |
|    fps              | 327      |
|    time_elapsed     | 30474    |
|    total_timesteps  | 9972213  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00702  

<sb3f.dqn.dqn.DQN at 0x7f7ca8399040>

In [34]:
save_path = os.path.join('Training', 'Saved Models','Opt_Pong_10M')
model_pong.save(save_path)

In [36]:
evaluate_policy(model_pong, env, n_eval_episodes=10, render=True)

(19.9, 1.8681541692269403)

In [21]:
env.close()

(210, 160, 3)