**Importy**

In [1]:
import gymnasium as gym
import numpy as np
from stable_baselines3 import TD3
from stable_baselines3.common.vec_env import SubprocVecEnv, VecMonitor, DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
from stable_baselines3.common.evaluation import evaluate_policy
import os

In [None]:
# Parameters
benchmark = "BipedalWalker-v3"
model_ = TD3
max_stepov_na_epizodu = 1000
num_envs = 6

# Custom environment wrapper (optional reward shaping)
class CustomBipedalWalker(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)

# Function to create monitored env
def make_env():
    env = gym.make(benchmark, max_episode_steps=max_stepov_na_epizodu)
    env = CustomBipedalWalker(env)
    env = Monitor(env)  # Required for episode reward tracking
    return env

# Vectorized environments with monitoring
vec_env = SubprocVecEnv([make_env for _ in range(num_envs)])
vec_env = VecMonitor(vec_env)

# Action noise for TD3
action_noise = NormalActionNoise(
    mean=np.zeros(vec_env.action_space.shape),
    sigma=0.1 * np.ones(vec_env.action_space.shape)
)

# Custom callback for logging average reward
class AvgRewardCallback(BaseCallback):
    def __init__(self, verbose=0):
        super().__init__(verbose)
        self.episode_rewards = []

    def _on_step(self) -> bool:
        if "infos" in self.locals:
            for info in self.locals["infos"]:
                if "episode" in info:
                    reward = info["episode"]["r"]
                    self.episode_rewards.append(reward)
                    if len(self.episode_rewards) >= 100:
                        avg_reward = sum(self.episode_rewards[-100:]) / 100
                        self.logger.record("custom/avg_reward_100ep", avg_reward)
        return True

# Eval environment
eval_env = Monitor(gym.make(benchmark, max_episode_steps=max_stepov_na_epizodu))

def make_eval_env():
    env = gym.make(benchmark, max_episode_steps=max_stepov_na_epizodu)
    env = CustomBipedalWalker(env)
    env = Monitor(env)
    return env

eval_env = DummyVecEnv([make_eval_env])
eval_env = VecMonitor(eval_env)


# Eval callback
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path="./models/",
    log_path="./log/",
    eval_freq=5000,
    deterministic=True,
    render=False
)



In [None]:
# Define model
model = model_(
    'MlpPolicy',
    vec_env,
    verbose=1,
    device="cuda",
    action_noise=action_noise,
    tensorboard_log="./log/" + model_.__name__ + "_" + benchmark,
    batch_size=256,
    learning_rate=0.0003,
    buffer_size = 1_000_000,
    gamma=0.99,
    learning_starts=10000,
    policy_delay=2,
    target_policy_noise=0.2,
    target_noise_clip=0.5,
)

In [None]:
# Train with callbacks
model.learn(
    total_timesteps=2_000_000,
    callback=[eval_callback, AvgRewardCallback()],
    progress_bar=True,
)

In [None]:
model.save(model_.__name__ + "_" + benchmark)

In [None]:
model = model_.load(model_.__name__ + "_" + benchmark) # Načítanie modelu
env = gym.make(benchmark, render_mode="human", max_episode_steps=max_stepov_na_epizodu)


# Spustenie evaluacie
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10, deterministic=True)
print(mean_reward, std_reward)
env.close()

In [None]:
# Parameters
benchmark = "BipedalWalkerHardcore-v3"
model_ = TD3
max_stepov_na_epizodu = 1000
num_envs = 6

# Custom environment wrapper (optional reward shaping)
class CustomBipedalWalker(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)

# Function to create monitored env
def make_env():
    env = gym.make(benchmark, max_episode_steps=max_stepov_na_epizodu)
    env = CustomBipedalWalker(env)
    env = Monitor(env)  # Required for episode reward tracking
    return env

# Vectorized environments with monitoring
vec_env = SubprocVecEnv([make_env for _ in range(num_envs)])
vec_env = VecMonitor(vec_env)

# Action noise for TD3
action_noise = NormalActionNoise(
    mean=np.zeros(vec_env.action_space.shape),
    sigma=0.1 * np.ones(vec_env.action_space.shape)
)

# Custom callback for logging average reward
class AvgRewardCallback(BaseCallback):
    def __init__(self, verbose=0):
        super().__init__(verbose)
        self.episode_rewards = []

    def _on_step(self) -> bool:
        if "infos" in self.locals:
            for info in self.locals["infos"]:
                if "episode" in info:
                    reward = info["episode"]["r"]
                    self.episode_rewards.append(reward)
                    if len(self.episode_rewards) >= 100:
                        avg_reward = sum(self.episode_rewards[-100:]) / 100
                        self.logger.record("custom/avg_reward_100ep", avg_reward)
        return True

# Eval environment
eval_env = Monitor(gym.make(benchmark, max_episode_steps=max_stepov_na_epizodu))

def make_eval_env():
    env = gym.make(benchmark, max_episode_steps=max_stepov_na_epizodu)
    env = CustomBipedalWalker(env)
    env = Monitor(env)
    return env

eval_env = DummyVecEnv([make_eval_env])
eval_env = VecMonitor(eval_env)


# Eval callback
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path="./models/",
    log_path="./log/",
    eval_freq=5000,
    deterministic=True,
    render=False
)



model = TD3.load("TD3_BipedalWalker-v3", env=vec_env)

# Train with callbacks
model.learn(
    total_timesteps=1_000_000,
    callback=[eval_callback, AvgRewardCallback()],
    progress_bar=True,
)

Logging to ./log/TD3_BipedalWalker-v3\TD3_6


Output()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 70.2     |
|    ep_rew_mean     | -112     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 2485     |
|    time_elapsed    | 0        |
|    total_timesteps | 480      |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 69.4     |
|    ep_rew_mean     | -109     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 3033     |
|    time_elapsed    | 0        |
|    total_timesteps | 1206     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 76.7     |
|    ep_rew_mean     | -112     |
| time/              |          |
|    episodes        | 12       |
|    fps             | 3224     |
|    time_elapsed    | 0        |
|    total_timesteps | 1734     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 186      |
|    ep_rew_mean     | -105     |
| time/              |          |
|    episodes        | 16       |
|    fps             | 4045     |
|    time_elapsed    | 1        |
|    total_timesteps | 6000     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 216      |
|    ep_rew_mean     | -103     |
| time/              |          |
|    episodes        | 20       |
|    fps             | 4053     |
|    time_elapsed    | 1        |
|    total_timesteps | 6540     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 231      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 24       |
|    fps             | 4095     |
|    time_elapsed    | 1        |
|    total_timesteps | 7626     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 275      |
|    ep_rew_mean     | -98.5    |
| time/              |          |
|    episodes        | 28       |
|    fps             | 4164     |
|    time_elapsed    | 2        |
|    total_timesteps | 9078     |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 256      |
|    ep_rew_mean     | -99.2    |
| time/              |          |
|    episodes        | 32       |
|    fps             | 2434     |
|    time_elapsed    | 4        |
|    total_timesteps | 11226    |
| train/             |          |
|    actor_loss      | -4.66    |
|    critic_loss     | 6.9      |
|    learning_rate   | 0.0003   |
|    n_updates       | 331872   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 299      |
|    ep_rew_mean     | -98.1    |
| time/              |          |
|    episodes        | 36       |
|    fps             | 2122     |
|    time_elapsed    | 5        |
|    total_timesteps | 11820    |
| train/             |          |
|    actor_loss      | -3.81    |
|    critic_loss     | 10.6     |
|    learning_rate   | 0.0003   |
|    n_updates       | 331971   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 296      |
|    ep_rew_mean     | -95.9    |
| time/              |          |
|    episodes        | 40       |
|    fps             | 1806     |
|    time_elapsed    | 6        |
|    total_timesteps | 12642    |
| train/             |          |
|    actor_loss      | -5.61    |
|    critic_loss     | 3.44     |
|    learning_rate   | 0.0003   |
|    n_updates       | 332108   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 294      |
|    ep_rew_mean     | -92.3    |
| time/              |          |
|    episodes        | 44       |
|    fps             | 1391     |
|    time_elapsed    | 10       |
|    total_timesteps | 14598    |
| train/             |          |
|    actor_loss      | -7.46    |
|    critic_loss     | 5.17     |
|    learning_rate   | 0.0003   |
|    n_updates       | 332434   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 310      |
|    ep_rew_mean     | -87.1    |
| time/              |          |
|    episodes        | 48       |
|    fps             | 1275     |
|    time_elapsed    | 12       |
|    total_timesteps | 15618    |
| train/             |          |
|    actor_loss      | -3.44    |
|    critic_loss     | 3.23     |
|    learning_rate   | 0.0003   |
|    n_updates       | 332604   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 307      |
|    ep_rew_mean     | -84.8    |
| time/              |          |
|    episodes        | 52       |
|    fps             | 1193     |
|    time_elapsed    | 13       |
|    total_timesteps | 16536    |
| train/             |          |
|    actor_loss      | -6.63    |
|    critic_loss     | 7.92     |
|    learning_rate   | 0.0003   |
|    n_updates       | 332757   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 301      |
|    ep_rew_mean     | -84      |
| time/              |          |
|    episodes        | 56       |
|    fps             | 1121     |
|    time_elapsed    | 15       |
|    total_timesteps | 17670    |
| train/             |          |
|    actor_loss      | -8.62    |
|    critic_loss     | 5.54     |
|    learning_rate   | 0.0003   |
|    n_updates       | 332946   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 297      |
|    ep_rew_mean     | -82.3    |
| time/              |          |
|    episodes        | 60       |
|    fps             | 1039     |
|    time_elapsed    | 18       |
|    total_timesteps | 19356    |
| train/             |          |
|    actor_loss      | -8.41    |
|    critic_loss     | 4.47     |
|    learning_rate   | 0.0003   |
|    n_updates       | 333227   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 301      |
|    ep_rew_mean     | -82.2    |
| time/              |          |
|    episodes        | 64       |
|    fps             | 977      |
|    time_elapsed    | 21       |
|    total_timesteps | 21054    |
| train/             |          |
|    actor_loss      | -6.3     |
|    critic_loss     | 6.63     |
|    learning_rate   | 0.0003   |
|    n_updates       | 333510   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 301      |
|    ep_rew_mean     | -81.2    |
| time/              |          |
|    episodes        | 68       |
|    fps             | 952      |
|    time_elapsed    | 23       |
|    total_timesteps | 21930    |
| train/             |          |
|    actor_loss      | -6.55    |
|    critic_loss     | 4.96     |
|    learning_rate   | 0.0003   |
|    n_updates       | 333656   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 310      |
|    ep_rew_mean     | -80.7    |
| time/              |          |
|    episodes        | 72       |
|    fps             | 929      |
|    time_elapsed    | 24       |
|    total_timesteps | 22872    |
| train/             |          |
|    actor_loss      | -8.32    |
|    critic_loss     | 8.36     |
|    learning_rate   | 0.0003   |
|    n_updates       | 333813   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 303      |
|    ep_rew_mean     | -80.7    |
| time/              |          |
|    episodes        | 76       |
|    fps             | 914      |
|    time_elapsed    | 25       |
|    total_timesteps | 23466    |
| train/             |          |
|    actor_loss      | -9.14    |
|    critic_loss     | 8.11     |
|    learning_rate   | 0.0003   |
|    n_updates       | 333912   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 299      |
|    ep_rew_mean     | -80.6    |
| time/              |          |
|    episodes        | 80       |
|    fps             | 885      |
|    time_elapsed    | 27       |
|    total_timesteps | 24792    |
| train/             |          |
|    actor_loss      | -6.07    |
|    critic_loss     | 12.1     |
|    learning_rate   | 0.0003   |
|    n_updates       | 334133   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 295      |
|    ep_rew_mean     | -80.2    |
| time/              |          |
|    episodes        | 84       |
|    fps             | 862      |
|    time_elapsed    | 30       |
|    total_timesteps | 26154    |
| train/             |          |
|    actor_loss      | -8.37    |
|    critic_loss     | 4.99     |
|    learning_rate   | 0.0003   |
|    n_updates       | 334360   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 302      |
|    ep_rew_mean     | -79.4    |
| time/              |          |
|    episodes        | 88       |
|    fps             | 848      |
|    time_elapsed    | 31       |
|    total_timesteps | 27162    |
| train/             |          |
|    actor_loss      | -4.93    |
|    critic_loss     | 12.1     |
|    learning_rate   | 0.0003   |
|    n_updates       | 334528   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 296      |
|    ep_rew_mean     | -79.6    |
| time/              |          |
|    episodes        | 92       |
|    fps             | 841      |
|    time_elapsed    | 32       |
|    total_timesteps | 27564    |
| train/             |          |
|    actor_loss      | -9.22    |
|    critic_loss     | 12.5     |
|    learning_rate   | 0.0003   |
|    n_updates       | 334595   |
---------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 296      |
|    ep_rew_mean     | -79.5    |
| time/              |          |
|    episodes        | 96       |
|    fps             | 813      |
|    time_elapsed    | 36       |
|    total_timesteps | 29640    |
| train/             |          |
|    actor_loss      | -6.67    |
|    critic_loss     | 9.86     |
|    learning_rate   | 0.0003   |
|    n_updates       | 334941   |
---------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 535      |
|    mean_reward     | -76.3    |
| time/              |          |
|    total_timesteps | 30000    |
| train/             |          |
|    actor_loss      | -9.75    |
|    critic_loss     | 8.9      |
|    learning_rate   | 0.0003   |
|    n_updates       | 335001   |
---------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -79.40436 |
| rollout/            |           |
|    ep_len_mean      | 299       |
|    ep_rew_mean      | -79.4     |
| time/               |           |
|    episodes         | 100       |
|    fps              | 751       |
|    time_elapsed     | 41        |
|    total_timesteps  | 31062     |
| train/              |           |
|    actor_loss       | -7.1      |
|    critic_loss      | 10.2      |
|    learning_rate    | 0.0003    |
|    n_updates        | 335178    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -77.81638 |
| rollout/            |           |
|    ep_len_mean      | 313       |
|    ep_rew_mean      | -77.8     |
| time/               |           |
|    episodes         | 104       |
|    fps              | 740       |
|    time_elapsed     | 44        |
|    total_timesteps  | 33120     |
| train/              |           |
|    actor_loss       | -6.23     |
|    critic_loss      | 24.7      |
|    learning_rate    | 0.0003    |
|    n_updates        | 335521    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -77.20667 |
| rollout/            |           |
|    ep_len_mean      | 333       |
|    ep_rew_mean      | -77.2     |
| time/               |           |
|    episodes         | 108       |
|    fps              | 727       |
|    time_elapsed     | 49        |
|    total_timesteps  | 35670     |
| train/              |           |
|    actor_loss       | -5.07     |
|    critic_loss      | 13.4      |
|    learning_rate    | 0.0003    |
|    n_updates        | 335946    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -74.57843 |
| rollout/            |           |
|    ep_len_mean      | 350       |
|    ep_rew_mean      | -74.6     |
| time/               |           |
|    episodes         | 112       |
|    fps              | 720       |
|    time_elapsed     | 51        |
|    total_timesteps  | 36780     |
| train/              |           |
|    actor_loss       | -8.73     |
|    critic_loss      | 8.26      |
|    learning_rate    | 0.0003    |
|    n_updates        | 336131    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -73.93396 |
| rollout/            |           |
|    ep_len_mean      | 353       |
|    ep_rew_mean      | -73.9     |
| time/               |           |
|    episodes         | 116       |
|    fps              | 709       |
|    time_elapsed     | 55        |
|    total_timesteps  | 39432     |
| train/              |           |
|    actor_loss       | -7.41     |
|    critic_loss      | 5.64      |
|    learning_rate    | 0.0003    |
|    n_updates        | 336573    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -73.329025 |
| rollout/            |            |
|    ep_len_mean      | 342        |
|    ep_rew_mean      | -73.3      |
| time/               |            |
|    episodes         | 120        |
|    fps              | 702        |
|    time_elapsed     | 58         |
|    total_timesteps  | 40980      |
| train/              |            |
|    actor_loss       | -4.39      |
|    critic_loss      | 11.4       |
|    learning_rate    | 0.0003     |
|    n_updates        | 336831     |
------------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | -72.4971 |
| rollout/            |          |
|    ep_len_mean      | 355      |
|    ep_rew_mean      | -72.5    |
| time/               |          |
|    episodes         | 124      |
|    fps              | 695      |
|    time_elapsed     | 61       |
|    total_timesteps  | 42768    |
| train/              |          |
|    actor_loss       | -2.79    |
|    critic_loss      | 6.68     |
|    learning_rate    | 0.0003   |
|    n_updates        | 337129   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -72.39083 |
| rollout/            |           |
|    ep_len_mean      | 349       |
|    ep_rew_mean      | -72.4     |
| time/               |           |
|    episodes         | 128       |
|    fps              | 689       |
|    time_elapsed     | 64        |
|    total_timesteps  | 44622     |
| train/              |           |
|    actor_loss       | -6.31     |
|    critic_loss      | 9.91      |
|    learning_rate    | 0.0003    |
|    n_updates        | 337438    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -70.55799 |
| rollout/            |           |
|    ep_len_mean      | 364       |
|    ep_rew_mean      | -70.6     |
| time/               |           |
|    episodes         | 132       |
|    fps              | 683       |
|    time_elapsed     | 69        |
|    total_timesteps  | 47346     |
| train/              |           |
|    actor_loss       | -6.54     |
|    critic_loss      | 4.99      |
|    learning_rate    | 0.0003    |
|    n_updates        | 337892    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -69.27291 |
| rollout/            |           |
|    ep_len_mean      | 363       |
|    ep_rew_mean      | -69.3     |
| time/               |           |
|    episodes         | 136       |
|    fps              | 678       |
|    time_elapsed     | 72        |
|    total_timesteps  | 49146     |
| train/              |           |
|    actor_loss       | -5.11     |
|    critic_loss      | 22.5      |
|    learning_rate    | 0.0003    |
|    n_updates        | 338192    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -67.81259 |
| rollout/            |           |
|    ep_len_mean      | 377       |
|    ep_rew_mean      | -67.8     |
| time/               |           |
|    episodes         | 140       |
|    fps              | 674       |
|    time_elapsed     | 76        |
|    total_timesteps  | 51486     |
| train/              |           |
|    actor_loss       | -8.07     |
|    critic_loss      | 7.81      |
|    learning_rate    | 0.0003    |
|    n_updates        | 338582    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -68.48883 |
| rollout/            |           |
|    ep_len_mean      | 381       |
|    ep_rew_mean      | -68.5     |
| time/               |           |
|    episodes         | 144       |
|    fps              | 671       |
|    time_elapsed     | 79        |
|    total_timesteps  | 53490     |
| train/              |           |
|    actor_loss       | -8.45     |
|    critic_loss      | 6.03      |
|    learning_rate    | 0.0003    |
|    n_updates        | 338916    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -68.99107 |
| rollout/            |           |
|    ep_len_mean      | 394       |
|    ep_rew_mean      | -69       |
| time/               |           |
|    episodes         | 148       |
|    fps              | 667       |
|    time_elapsed     | 84        |
|    total_timesteps  | 56256     |
| train/              |           |
|    actor_loss       | -2.78     |
|    critic_loss      | 14.3      |
|    learning_rate    | 0.0003    |
|    n_updates        | 339377    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -69.92446 |
| rollout/            |           |
|    ep_len_mean      | 403       |
|    ep_rew_mean      | -69.9     |
| time/               |           |
|    episodes         | 152       |
|    fps              | 664       |
|    time_elapsed     | 87        |
|    total_timesteps  | 58284     |
| train/              |           |
|    actor_loss       | -6.76     |
|    critic_loss      | 7.51      |
|    learning_rate    | 0.0003    |
|    n_updates        | 339715    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -69.39248 |
| eval/               |           |
|    mean_ep_length   | 445       |
|    mean_reward      | -69       |
| time/               |           |
|    total_timesteps  | 60000     |
| train/              |           |
|    actor_loss       | -7.13     |
|    critic_loss      | 23.8      |
|    learning_rate    | 0.0003    |
|    n_updates        | 340001    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -68.893234 |
| rollout/            |            |
|    ep_len_mean      | 417        |
|    ep_rew_mean      | -68.9      |
| time/               |            |
|    episodes         | 156        |
|    fps              | 647        |
|    time_elapsed     | 94         |
|    total_timesteps  | 61146      |
| train/              |            |
|    actor_loss       | -5.76      |
|    critic_loss      | 7.16       |
|    learning_rate    | 0.0003     |
|    n_updates        | 340192     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -67.112404 |
| rollout/            |            |
|    ep_len_mean      | 447        |
|    ep_rew_mean      | -67.1      |
| time/               |            |
|    episodes         | 160        |
|    fps              | 644        |
|    time_elapsed     | 101        |
|    total_timesteps  | 65280      |
| train/              |            |
|    actor_loss       | -5.24      |
|    critic_loss      | 11.3       |
|    learning_rate    | 0.0003     |
|    n_updates        | 340881     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -64.758286 |
| rollout/            |            |
|    ep_len_mean      | 468        |
|    ep_rew_mean      | -64.8      |
| time/               |            |
|    episodes         | 164        |
|    fps              | 642        |
|    time_elapsed     | 106        |
|    total_timesteps  | 68484      |
| train/              |            |
|    actor_loss       | -6.16      |
|    critic_loss      | 8.25       |
|    learning_rate    | 0.0003     |
|    n_updates        | 341415     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -61.541996 |
| rollout/            |            |
|    ep_len_mean      | 495        |
|    ep_rew_mean      | -61.5      |
| time/               |            |
|    episodes         | 168        |
|    fps              | 640        |
|    time_elapsed     | 111        |
|    total_timesteps  | 71490      |
| train/              |            |
|    actor_loss       | -5.32      |
|    critic_loss      | 9.94       |
|    learning_rate    | 0.0003     |
|    n_updates        | 341916     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -61.509766 |
| rollout/            |            |
|    ep_len_mean      | 489        |
|    ep_rew_mean      | -61.5      |
| time/               |            |
|    episodes         | 172        |
|    fps              | 639        |
|    time_elapsed     | 114        |
|    total_timesteps  | 73110      |
| train/              |            |
|    actor_loss       | -5.22      |
|    critic_loss      | 14.4       |
|    learning_rate    | 0.0003     |
|    n_updates        | 342186     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -61.38907 |
| rollout/            |           |
|    ep_len_mean      | 506       |
|    ep_rew_mean      | -61.4     |
| time/               |           |
|    episodes         | 176       |
|    fps              | 637       |
|    time_elapsed     | 119       |
|    total_timesteps  | 76284     |
| train/              |           |
|    actor_loss       | -6.38     |
|    critic_loss      | 10.4      |
|    learning_rate    | 0.0003    |
|    n_updates        | 342715    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -57.71397 |
| rollout/            |           |
|    ep_len_mean      | 535       |
|    ep_rew_mean      | -57.7     |
| time/               |           |
|    episodes         | 180       |
|    fps              | 635       |
|    time_elapsed     | 126       |
|    total_timesteps  | 80130     |
| train/              |           |
|    actor_loss       | -4.6      |
|    critic_loss      | 9.67      |
|    learning_rate    | 0.0003    |
|    n_updates        | 343356    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -56.333588 |
| rollout/            |            |
|    ep_len_mean      | 558        |
|    ep_rew_mean      | -56.3      |
| time/               |            |
|    episodes         | 184        |
|    fps              | 634        |
|    time_elapsed     | 131        |
|    total_timesteps  | 83280      |
| train/              |            |
|    actor_loss       | -5.48      |
|    critic_loss      | 7.8        |
|    learning_rate    | 0.0003     |
|    n_updates        | 343881     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -55.917206 |
| rollout/            |            |
|    ep_len_mean      | 567        |
|    ep_rew_mean      | -55.9      |
| time/               |            |
|    episodes         | 188        |
|    fps              | 633        |
|    time_elapsed     | 134        |
|    total_timesteps  | 84924      |
| train/              |            |
|    actor_loss       | -6.86      |
|    critic_loss      | 10.5       |
|    learning_rate    | 0.0003     |
|    n_updates        | 344155     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -54.506996 |
| rollout/            |            |
|    ep_len_mean      | 593        |
|    ep_rew_mean      | -54.5      |
| time/               |            |
|    episodes         | 192        |
|    fps              | 629        |
|    time_elapsed     | 141        |
|    total_timesteps  | 89376      |
| train/              |            |
|    actor_loss       | -6.18      |
|    critic_loss      | 10.1       |
|    learning_rate    | 0.0003     |
|    n_updates        | 344897     |
------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 471      |
|    mean_reward     | -71.5    |
| time/              |          |
|    total_timesteps | 90000    |
| train/             |          |
|    actor_loss      | -4.49    |
|    critic_loss     | 8.27     |
|    learning_rate   | 0.0003   |
|    n_updates       | 345001   |
---------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -52.666172 |
| rollout/            |            |
|    ep_len_mean      | 614        |
|    ep_rew_mean      | -52.7      |
| time/               |            |
|    episodes         | 196        |
|    fps              | 620        |
|    time_elapsed     | 147        |
|    total_timesteps  | 91242      |
| train/              |            |
|    actor_loss       | -7.32      |
|    critic_loss      | 11.2       |
|    learning_rate    | 0.0003     |
|    n_updates        | 345208     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -51.78557 |
| rollout/            |           |
|    ep_len_mean      | 620       |
|    ep_rew_mean      | -51.8     |
| time/               |           |
|    episodes         | 200       |
|    fps              | 619       |
|    time_elapsed     | 151       |
|    total_timesteps  | 93738     |
| train/              |           |
|    actor_loss       | -5.31     |
|    critic_loss      | 4.71      |
|    learning_rate    | 0.0003    |
|    n_updates        | 345624    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -52.32429 |
| rollout/            |           |
|    ep_len_mean      | 636       |
|    ep_rew_mean      | -52.3     |
| time/               |           |
|    episodes         | 204       |
|    fps              | 618       |
|    time_elapsed     | 159       |
|    total_timesteps  | 98466     |
| train/              |           |
|    actor_loss       | -1.6      |
|    critic_loss      | 14.6      |
|    learning_rate    | 0.0003    |
|    n_updates        | 346412    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -49.182724 |
| rollout/            |            |
|    ep_len_mean      | 651        |
|    ep_rew_mean      | -49.2      |
| time/               |            |
|    episodes         | 208        |
|    fps              | 617        |
|    time_elapsed     | 161        |
|    total_timesteps  | 99834      |
| train/              |            |
|    actor_loss       | -5.69      |
|    critic_loss      | 3.97       |
|    learning_rate    | 0.0003     |
|    n_updates        | 346640     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -50.390575 |
| rollout/            |            |
|    ep_len_mean      | 650        |
|    ep_rew_mean      | -50.4      |
| time/               |            |
|    episodes         | 212        |
|    fps              | 617        |
|    time_elapsed     | 166        |
|    total_timesteps  | 102930     |
| train/              |            |
|    actor_loss       | -4.13      |
|    critic_loss      | 5.89       |
|    learning_rate    | 0.0003     |
|    n_updates        | 347156     |
------------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | -50.131  |
| rollout/            |          |
|    ep_len_mean      | 659      |
|    ep_rew_mean      | -50.1    |
| time/               |          |
|    episodes         | 216      |
|    fps              | 616      |
|    time_elapsed     | 170      |
|    total_timesteps  | 105270   |
| train/              |          |
|    actor_loss       | -5.8     |
|    critic_loss      | 4.44     |
|    learning_rate    | 0.0003   |
|    n_updates        | 347546   |
----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -48.249424 |
| rollout/            |            |
|    ep_len_mean      | 680        |
|    ep_rew_mean      | -48.2      |
| time/               |            |
|    episodes         | 220        |
|    fps              | 616        |
|    time_elapsed     | 176        |
|    total_timesteps  | 108930     |
| train/              |            |
|    actor_loss       | -2.67      |
|    critic_loss      | 6.09       |
|    learning_rate    | 0.0003     |
|    n_updates        | 348156     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -47.089893 |
| rollout/            |            |
|    ep_len_mean      | 672        |
|    ep_rew_mean      | -47.1      |
| time/               |            |
|    episodes         | 224        |
|    fps              | 615        |
|    time_elapsed     | 179        |
|    total_timesteps  | 110742     |
| train/              |            |
|    actor_loss       | -3.98      |
|    critic_loss      | 10.3       |
|    learning_rate    | 0.0003     |
|    n_updates        | 348458     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -45.279537 |
| rollout/            |            |
|    ep_len_mean      | 683        |
|    ep_rew_mean      | -45.3      |
| time/               |            |
|    episodes         | 228        |
|    fps              | 614        |
|    time_elapsed     | 182        |
|    total_timesteps  | 111948     |
| train/              |            |
|    actor_loss       | -4.79      |
|    critic_loss      | 14.1       |
|    learning_rate    | 0.0003     |
|    n_updates        | 348659     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -45.114902 |
| rollout/            |            |
|    ep_len_mean      | 685        |
|    ep_rew_mean      | -45.1      |
| time/               |            |
|    episodes         | 232        |
|    fps              | 613        |
|    time_elapsed     | 188        |
|    total_timesteps  | 115734     |
| train/              |            |
|    actor_loss       | -4.91      |
|    critic_loss      | 12.3       |
|    learning_rate    | 0.0003     |
|    n_updates        | 349290     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -44.285694 |
| rollout/            |            |
|    ep_len_mean      | 695        |
|    ep_rew_mean      | -44.3      |
| time/               |            |
|    episodes         | 236        |
|    fps              | 613        |
|    time_elapsed     | 192        |
|    total_timesteps  | 117876     |
| train/              |            |
|    actor_loss       | -4.49      |
|    critic_loss      | 13.1       |
|    learning_rate    | 0.0003     |
|    n_updates        | 349647     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -44.852467 |
| eval/               |            |
|    mean_ep_length   | 635        |
|    mean_reward      | -72.9      |
| time/               |            |
|    total_timesteps  | 120000     |
| train/              |            |
|    actor_loss       | -5.53      |
|    critic_loss      | 8.27       |
|    learning_rate    | 0.0003     |
|    n_updates        | 350001     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -44.544907 |
| rollout/            |            |
|    ep_len_mean      | 688        |
|    ep_rew_mean      | -44.5      |
| time/               |            |
|    episodes         | 240        |
|    fps              | 605        |
|    time_elapsed     | 199        |
|    total_timesteps  | 120930     |
| train/              |            |
|    actor_loss       | -4.85      |
|    critic_loss      | 7.59       |
|    learning_rate    | 0.0003     |
|    n_updates        | 350156     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -42.19212 |
| rollout/            |           |
|    ep_len_mean      | 705       |
|    ep_rew_mean      | -42.2     |
| time/               |           |
|    episodes         | 244       |
|    fps              | 605       |
|    time_elapsed     | 204       |
|    total_timesteps  | 123930    |
| train/              |           |
|    actor_loss       | -3.75     |
|    critic_loss      | 7.26      |
|    learning_rate    | 0.0003    |
|    n_updates        | 350656    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -43.145737 |
| rollout/            |            |
|    ep_len_mean      | 710        |
|    ep_rew_mean      | -43.1      |
| time/               |            |
|    episodes         | 248        |
|    fps              | 605        |
|    time_elapsed     | 209        |
|    total_timesteps  | 127026     |
| train/              |            |
|    actor_loss       | -3.13      |
|    critic_loss      | 5.16       |
|    learning_rate    | 0.0003     |
|    n_updates        | 351172     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -41.61769 |
| rollout/            |           |
|    ep_len_mean      | 721       |
|    ep_rew_mean      | -41.6     |
| time/               |           |
|    episodes         | 252       |
|    fps              | 605       |
|    time_elapsed     | 215       |
|    total_timesteps  | 130266    |
| train/              |           |
|    actor_loss       | -5.09     |
|    critic_loss      | 4.73      |
|    learning_rate    | 0.0003    |
|    n_updates        | 351712    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -42.54476 |
| rollout/            |           |
|    ep_len_mean      | 723       |
|    ep_rew_mean      | -42.5     |
| time/               |           |
|    episodes         | 256       |
|    fps              | 605       |
|    time_elapsed     | 218       |
|    total_timesteps  | 132480    |
| train/              |           |
|    actor_loss       | -2.68     |
|    critic_loss      | 9.86      |
|    learning_rate    | 0.0003    |
|    n_updates        | 352081    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -45.096745 |
| rollout/            |            |
|    ep_len_mean      | 701        |
|    ep_rew_mean      | -45.1      |
| time/               |            |
|    episodes         | 260        |
|    fps              | 605        |
|    time_elapsed     | 221        |
|    total_timesteps  | 133938     |
| train/              |            |
|    actor_loss       | -1.83      |
|    critic_loss      | 8.06       |
|    learning_rate    | 0.0003     |
|    n_updates        | 352324     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -47.196144 |
| rollout/            |            |
|    ep_len_mean      | 684        |
|    ep_rew_mean      | -47.2      |
| time/               |            |
|    episodes         | 264        |
|    fps              | 605        |
|    time_elapsed     | 224        |
|    total_timesteps  | 135810     |
| train/              |            |
|    actor_loss       | -3.59      |
|    critic_loss      | 9.52       |
|    learning_rate    | 0.0003     |
|    n_updates        | 352636     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -49.75336 |
| rollout/            |           |
|    ep_len_mean      | 667       |
|    ep_rew_mean      | -49.8     |
| time/               |           |
|    episodes         | 268       |
|    fps              | 605       |
|    time_elapsed     | 228       |
|    total_timesteps  | 138450    |
| train/              |           |
|    actor_loss       | -6.06     |
|    critic_loss      | 4.03      |
|    learning_rate    | 0.0003    |
|    n_updates        | 353076    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -50.810375 |
| rollout/            |            |
|    ep_len_mean      | 667        |
|    ep_rew_mean      | -50.8      |
| time/               |            |
|    episodes         | 272        |
|    fps              | 604        |
|    time_elapsed     | 234        |
|    total_timesteps  | 141534     |
| train/              |            |
|    actor_loss       | -3.63      |
|    critic_loss      | 14.1       |
|    learning_rate    | 0.0003     |
|    n_updates        | 353590     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -49.80635 |
| rollout/            |           |
|    ep_len_mean      | 674       |
|    ep_rew_mean      | -49.8     |
| time/               |           |
|    episodes         | 276       |
|    fps              | 604       |
|    time_elapsed     | 235       |
|    total_timesteps  | 142572    |
| train/              |           |
|    actor_loss       | -1.74     |
|    critic_loss      | 11.8      |
|    learning_rate    | 0.0003    |
|    n_updates        | 353763    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -52.96703 |
| rollout/            |           |
|    ep_len_mean      | 653       |
|    ep_rew_mean      | -53       |
| time/               |           |
|    episodes         | 280       |
|    fps              | 604       |
|    time_elapsed     | 237       |
|    total_timesteps  | 143568    |
| train/              |           |
|    actor_loss       | -5.01     |
|    critic_loss      | 7.46      |
|    learning_rate    | 0.0003    |
|    n_updates        | 353929    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -55.871353 |
| rollout/            |            |
|    ep_len_mean      | 628        |
|    ep_rew_mean      | -55.9      |
| time/               |            |
|    episodes         | 284        |
|    fps              | 604        |
|    time_elapsed     | 238        |
|    total_timesteps  | 144156     |
| train/              |            |
|    actor_loss       | -2.69      |
|    critic_loss      | 8.81       |
|    learning_rate    | 0.0003     |
|    n_updates        | 354027     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -56.308727 |
| rollout/            |            |
|    ep_len_mean      | 632        |
|    ep_rew_mean      | -56.3      |
| time/               |            |
|    episodes         | 288        |
|    fps              | 604        |
|    time_elapsed     | 245        |
|    total_timesteps  | 148764     |
| train/              |            |
|    actor_loss       | -1.5       |
|    critic_loss      | 9.16       |
|    learning_rate    | 0.0003     |
|    n_updates        | 354795     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -56.98253 |
| eval/               |           |
|    mean_ep_length   | 573       |
|    mean_reward      | -76.5     |
| time/               |           |
|    total_timesteps  | 150000    |
| train/              |           |
|    actor_loss       | -3.81     |
|    critic_loss      | 5.72      |
|    learning_rate    | 0.0003    |
|    n_updates        | 355001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -56.14251 |
| rollout/            |           |
|    ep_len_mean      | 626       |
|    ep_rew_mean      | -56.1     |
| time/               |           |
|    episodes         | 292       |
|    fps              | 598       |
|    time_elapsed     | 250       |
|    total_timesteps  | 150156    |
| train/              |           |
|    actor_loss       | -4.74     |
|    critic_loss      | 4.62      |
|    learning_rate    | 0.0003    |
|    n_updates        | 355027    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -58.49712 |
| rollout/            |           |
|    ep_len_mean      | 609       |
|    ep_rew_mean      | -58.5     |
| time/               |           |
|    episodes         | 296       |
|    fps              | 598       |
|    time_elapsed     | 255       |
|    total_timesteps  | 152814    |
| train/              |           |
|    actor_loss       | -3.88     |
|    critic_loss      | 4.61      |
|    learning_rate    | 0.0003    |
|    n_updates        | 355470    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -58.827423 |
| rollout/            |            |
|    ep_len_mean      | 611        |
|    ep_rew_mean      | -58.8      |
| time/               |            |
|    episodes         | 300        |
|    fps              | 598        |
|    time_elapsed     | 258        |
|    total_timesteps  | 154914     |
| train/              |            |
|    actor_loss       | -2.86      |
|    critic_loss      | 11.4       |
|    learning_rate    | 0.0003     |
|    n_updates        | 355820     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -58.82619 |
| rollout/            |           |
|    ep_len_mean      | 596       |
|    ep_rew_mean      | -58.8     |
| time/               |           |
|    episodes         | 304       |
|    fps              | 598       |
|    time_elapsed     | 261       |
|    total_timesteps  | 156822    |
| train/              |           |
|    actor_loss       | -1.45     |
|    critic_loss      | 13.6      |
|    learning_rate    | 0.0003    |
|    n_updates        | 356138    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -60.44347 |
| rollout/            |           |
|    ep_len_mean      | 584       |
|    ep_rew_mean      | -60.4     |
| time/               |           |
|    episodes         | 308       |
|    fps              | 598       |
|    time_elapsed     | 266       |
|    total_timesteps  | 159414    |
| train/              |           |
|    actor_loss       | -1.27     |
|    critic_loss      | 13.5      |
|    learning_rate    | 0.0003    |
|    n_updates        | 356570    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -59.77127 |
| rollout/            |           |
|    ep_len_mean      | 588       |
|    ep_rew_mean      | -59.8     |
| time/               |           |
|    episodes         | 312       |
|    fps              | 598       |
|    time_elapsed     | 270       |
|    total_timesteps  | 162264    |
| train/              |           |
|    actor_loss       | -2.75     |
|    critic_loss      | 5.49      |
|    learning_rate    | 0.0003    |
|    n_updates        | 357045    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -56.36681 |
| rollout/            |           |
|    ep_len_mean      | 597       |
|    ep_rew_mean      | -56.4     |
| time/               |           |
|    episodes         | 316       |
|    fps              | 599       |
|    time_elapsed     | 275       |
|    total_timesteps  | 164814    |
| train/              |           |
|    actor_loss       | -1.55     |
|    critic_loss      | 5.76      |
|    learning_rate    | 0.0003    |
|    n_updates        | 357470    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -58.19025 |
| rollout/            |           |
|    ep_len_mean      | 589       |
|    ep_rew_mean      | -58.2     |
| time/               |           |
|    episodes         | 320       |
|    fps              | 599       |
|    time_elapsed     | 278       |
|    total_timesteps  | 166914    |
| train/              |           |
|    actor_loss       | -3.1      |
|    critic_loss      | 7.15      |
|    learning_rate    | 0.0003    |
|    n_updates        | 357820    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -59.156094 |
| rollout/            |            |
|    ep_len_mean      | 590        |
|    ep_rew_mean      | -59.2      |
| time/               |            |
|    episodes         | 324        |
|    fps              | 599        |
|    time_elapsed     | 283        |
|    total_timesteps  | 169824     |
| train/              |            |
|    actor_loss       | -3.04      |
|    critic_loss      | 4.36       |
|    learning_rate    | 0.0003     |
|    n_updates        | 358305     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -57.876446 |
| rollout/            |            |
|    ep_len_mean      | 597        |
|    ep_rew_mean      | -57.9      |
| time/               |            |
|    episodes         | 328        |
|    fps              | 599        |
|    time_elapsed     | 288        |
|    total_timesteps  | 172914     |
| train/              |            |
|    actor_loss       | -3.65      |
|    critic_loss      | 6.03       |
|    learning_rate    | 0.0003     |
|    n_updates        | 358820     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -54.55544 |
| rollout/            |           |
|    ep_len_mean      | 614       |
|    ep_rew_mean      | -54.6     |
| time/               |           |
|    episodes         | 332       |
|    fps              | 599       |
|    time_elapsed     | 296       |
|    total_timesteps  | 177456    |
| train/              |           |
|    actor_loss       | -1.44     |
|    critic_loss      | 7.96      |
|    learning_rate    | 0.0003    |
|    n_updates        | 359577    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -54.163242 |
| eval/               |            |
|    mean_ep_length   | 704        |
|    mean_reward      | -19.2      |
| time/               |            |
|    total_timesteps  | 180000     |
| train/              |            |
|    actor_loss       | -2.06      |
|    critic_loss      | 4.29       |
|    learning_rate    | 0.0003     |
|    n_updates        | 360001     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -54.072582 |
| rollout/            |            |
|    ep_len_mean      | 616        |
|    ep_rew_mean      | -54.1      |
| time/               |            |
|    episodes         | 336        |
|    fps              | 593        |
|    time_elapsed     | 304        |
|    total_timesteps  | 180504     |
| train/              |            |
|    actor_loss       | -1.78      |
|    critic_loss      | 9.98       |
|    learning_rate    | 0.0003     |
|    n_updates        | 360085     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -54.491158 |
| rollout/            |            |
|    ep_len_mean      | 629        |
|    ep_rew_mean      | -54.5      |
| time/               |            |
|    episodes         | 340        |
|    fps              | 593        |
|    time_elapsed     | 307        |
|    total_timesteps  | 182466     |
| train/              |            |
|    actor_loss       | -0.41      |
|    critic_loss      | 4.9        |
|    learning_rate    | 0.0003     |
|    n_updates        | 360412     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -56.69252 |
| rollout/            |           |
|    ep_len_mean      | 621       |
|    ep_rew_mean      | -56.7     |
| time/               |           |
|    episodes         | 344       |
|    fps              | 593       |
|    time_elapsed     | 313       |
|    total_timesteps  | 186180    |
| train/              |           |
|    actor_loss       | -0.36     |
|    critic_loss      | 5.62      |
|    learning_rate    | 0.0003    |
|    n_updates        | 361031    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -53.49196 |
| rollout/            |           |
|    ep_len_mean      | 616       |
|    ep_rew_mean      | -53.5     |
| time/               |           |
|    episodes         | 348       |
|    fps              | 593       |
|    time_elapsed     | 317       |
|    total_timesteps  | 188466    |
| train/              |           |
|    actor_loss       | -2.37     |
|    critic_loss      | 4.53      |
|    learning_rate    | 0.0003    |
|    n_updates        | 361412    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -54.463856 |
| rollout/            |            |
|    ep_len_mean      | 616        |
|    ep_rew_mean      | -54.5      |
| time/               |            |
|    episodes         | 352        |
|    fps              | 593        |
|    time_elapsed     | 325        |
|    total_timesteps  | 193098     |
| train/              |            |
|    actor_loss       | -0.228     |
|    critic_loss      | 4.62       |
|    learning_rate    | 0.0003     |
|    n_updates        | 362184     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -52.37108 |
| rollout/            |           |
|    ep_len_mean      | 620       |
|    ep_rew_mean      | -52.4     |
| time/               |           |
|    episodes         | 356       |
|    fps              | 593       |
|    time_elapsed     | 330       |
|    total_timesteps  | 195918    |
| train/              |           |
|    actor_loss       | -0.719    |
|    critic_loss      | 10        |
|    learning_rate    | 0.0003    |
|    n_updates        | 362654    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -48.49849 |
| rollout/            |           |
|    ep_len_mean      | 638       |
|    ep_rew_mean      | -48.5     |
| time/               |           |
|    episodes         | 360       |
|    fps              | 593       |
|    time_elapsed     | 335       |
|    total_timesteps  | 199098    |
| train/              |           |
|    actor_loss       | -1.29     |
|    critic_loss      | 5.79      |
|    learning_rate    | 0.0003    |
|    n_updates        | 363184    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -47.064796 |
| rollout/            |            |
|    ep_len_mean      | 644        |
|    ep_rew_mean      | -47.1      |
| time/               |            |
|    episodes         | 364        |
|    fps              | 593        |
|    time_elapsed     | 340        |
|    total_timesteps  | 201918     |
| train/              |            |
|    actor_loss       | -1.4       |
|    critic_loss      | 13.6       |
|    learning_rate    | 0.0003     |
|    n_updates        | 363654     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -44.490005 |
| rollout/            |            |
|    ep_len_mean      | 656        |
|    ep_rew_mean      | -44.5      |
| time/               |            |
|    episodes         | 368        |
|    fps              | 593        |
|    time_elapsed     | 341        |
|    total_timesteps  | 202794     |
| train/              |            |
|    actor_loss       | -0.78      |
|    critic_loss      | 5.56       |
|    learning_rate    | 0.0003     |
|    n_updates        | 363800     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -42.133835 |
| rollout/            |            |
|    ep_len_mean      | 671        |
|    ep_rew_mean      | -42.1      |
| time/               |            |
|    episodes         | 372        |
|    fps              | 593        |
|    time_elapsed     | 350        |
|    total_timesteps  | 208134     |
| train/              |            |
|    actor_loss       | -1.01      |
|    critic_loss      | 9.94       |
|    learning_rate    | 0.0003     |
|    n_updates        | 364690     |
------------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | -41.2286 |
| rollout/            |          |
|    ep_len_mean      | 673      |
|    ep_rew_mean      | -41.2    |
| time/               |          |
|    episodes         | 376      |
|    fps              | 593      |
|    time_elapsed     | 352      |
|    total_timesteps  | 209406   |
| train/              |          |
|    actor_loss       | -0.679   |
|    critic_loss      | 6.37     |
|    learning_rate    | 0.0003   |
|    n_updates        | 364902   |
----------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 547      |
|    mean_reward     | -58.1    |
| time/              |          |
|    total_timesteps | 210000   |
| train/             |          |
|    actor_loss      | 0.261    |
|    critic_loss     | 6.71     |
|    learning_rate   | 0.0003   |
|    n_updates       | 365001   |
---------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -38.94255 |
| rollout/            |           |
|    ep_len_mean      | 688       |
|    ep_rew_mean      | -38.9     |
| time/               |           |
|    episodes         | 380       |
|    fps              | 590       |
|    time_elapsed     | 362       |
|    total_timesteps  | 213918    |
| train/              |           |
|    actor_loss       | 0.0917    |
|    critic_loss      | 4.42      |
|    learning_rate    | 0.0003    |
|    n_updates        | 365654    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | -36.8676 |
| rollout/            |          |
|    ep_len_mean      | 707      |
|    ep_rew_mean      | -36.9    |
| time/               |          |
|    episodes         | 384      |
|    fps              | 590      |
|    time_elapsed     | 365      |
|    total_timesteps  | 215826   |
| train/              |          |
|    actor_loss       | 2.32     |
|    critic_loss      | 11.1     |
|    learning_rate    | 0.0003   |
|    n_updates        | 365972   |
----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -37.239594 |
| rollout/            |            |
|    ep_len_mean      | 710        |
|    ep_rew_mean      | -37.2      |
| time/               |            |
|    episodes         | 388        |
|    fps              | 590        |
|    time_elapsed     | 371        |
|    total_timesteps  | 219558     |
| train/              |            |
|    actor_loss       | 0.396      |
|    critic_loss      | 9.59       |
|    learning_rate    | 0.0003     |
|    n_updates        | 366594     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -36.199265 |
| rollout/            |            |
|    ep_len_mean      | 723        |
|    ep_rew_mean      | -36.2      |
| time/               |            |
|    episodes         | 392        |
|    fps              | 591        |
|    time_elapsed     | 379        |
|    total_timesteps  | 224598     |
| train/              |            |
|    actor_loss       | -1.7       |
|    critic_loss      | 2.91       |
|    learning_rate    | 0.0003     |
|    n_updates        | 367434     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -35.124973 |
| rollout/            |            |
|    ep_len_mean      | 736        |
|    ep_rew_mean      | -35.1      |
| time/               |            |
|    episodes         | 396        |
|    fps              | 591        |
|    time_elapsed     | 381        |
|    total_timesteps  | 225816     |
| train/              |            |
|    actor_loss       | -1.23      |
|    critic_loss      | 3.21       |
|    learning_rate    | 0.0003     |
|    n_updates        | 367637     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -34.660713 |
| rollout/            |            |
|    ep_len_mean      | 738        |
|    ep_rew_mean      | -34.7      |
| time/               |            |
|    episodes         | 400        |
|    fps              | 591        |
|    time_elapsed     | 386        |
|    total_timesteps  | 228306     |
| train/              |            |
|    actor_loss       | -0.0468    |
|    critic_loss      | 4.29       |
|    learning_rate    | 0.0003     |
|    n_updates        | 368052     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -32.513145 |
| rollout/            |            |
|    ep_len_mean      | 748        |
|    ep_rew_mean      | -32.5      |
| time/               |            |
|    episodes         | 404        |
|    fps              | 591        |
|    time_elapsed     | 391        |
|    total_timesteps  | 231816     |
| train/              |            |
|    actor_loss       | 0.0239     |
|    critic_loss      | 21.2       |
|    learning_rate    | 0.0003     |
|    n_updates        | 368637     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -31.74641 |
| rollout/            |           |
|    ep_len_mean      | 753       |
|    ep_rew_mean      | -31.7     |
| time/               |           |
|    episodes         | 408       |
|    fps              | 591       |
|    time_elapsed     | 395       |
|    total_timesteps  | 234306    |
| train/              |           |
|    actor_loss       | 0.293     |
|    critic_loss      | 6.63      |
|    learning_rate    | 0.0003    |
|    n_updates        | 369052    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -31.484175 |
| rollout/            |            |
|    ep_len_mean      | 744        |
|    ep_rew_mean      | -31.5      |
| time/               |            |
|    episodes         | 412        |
|    fps              | 591        |
|    time_elapsed     | 397        |
|    total_timesteps  | 235410     |
| train/              |            |
|    actor_loss       | 0.543      |
|    critic_loss      | 5.19       |
|    learning_rate    | 0.0003     |
|    n_updates        | 369236     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -34.409496 |
| rollout/            |            |
|    ep_len_mean      | 730        |
|    ep_rew_mean      | -34.4      |
| time/               |            |
|    episodes         | 416        |
|    fps              | 591        |
|    time_elapsed     | 402        |
|    total_timesteps  | 238104     |
| train/              |            |
|    actor_loss       | 1.5        |
|    critic_loss      | 5.26       |
|    learning_rate    | 0.0003     |
|    n_updates        | 369685     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -33.40945 |
| eval/               |           |
|    mean_ep_length   | 485       |
|    mean_reward      | -52       |
| time/               |           |
|    total_timesteps  | 240000    |
| train/              |           |
|    actor_loss       | 2.5       |
|    critic_loss      | 4.14      |
|    learning_rate    | 0.0003    |
|    n_updates        | 370001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -34.18958 |
| rollout/            |           |
|    ep_len_mean      | 723       |
|    ep_rew_mean      | -34.2     |
| time/               |           |
|    episodes         | 420       |
|    fps              | 588       |
|    time_elapsed     | 409       |
|    total_timesteps  | 240924    |
| train/              |           |
|    actor_loss       | 1.39      |
|    critic_loss      | 5.07      |
|    learning_rate    | 0.0003    |
|    n_updates        | 370155    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -33.40951 |
| rollout/            |           |
|    ep_len_mean      | 730       |
|    ep_rew_mean      | -33.4     |
| time/               |           |
|    episodes         | 424       |
|    fps              | 588       |
|    time_elapsed     | 412       |
|    total_timesteps  | 242796    |
| train/              |           |
|    actor_loss       | 1.09      |
|    critic_loss      | 7.62      |
|    learning_rate    | 0.0003    |
|    n_updates        | 370467    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -35.920227 |
| rollout/            |            |
|    ep_len_mean      | 714        |
|    ep_rew_mean      | -35.9      |
| time/               |            |
|    episodes         | 428        |
|    fps              | 588        |
|    time_elapsed     | 414        |
|    total_timesteps  | 243816     |
| train/              |            |
|    actor_loss       | 1.07       |
|    critic_loss      | 6.09       |
|    learning_rate    | 0.0003     |
|    n_updates        | 370637     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -37.156876 |
| rollout/            |            |
|    ep_len_mean      | 696        |
|    ep_rew_mean      | -37.2      |
| time/               |            |
|    episodes         | 432        |
|    fps              | 588        |
|    time_elapsed     | 415        |
|    total_timesteps  | 244800     |
| train/              |            |
|    actor_loss       | 1.81       |
|    critic_loss      | 8.22       |
|    learning_rate    | 0.0003     |
|    n_updates        | 370801     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -39.109932 |
| rollout/            |            |
|    ep_len_mean      | 686        |
|    ep_rew_mean      | -39.1      |
| time/               |            |
|    episodes         | 436        |
|    fps              | 589        |
|    time_elapsed     | 424        |
|    total_timesteps  | 250104     |
| train/              |            |
|    actor_loss       | 1.93       |
|    critic_loss      | 5.13       |
|    learning_rate    | 0.0003     |
|    n_updates        | 371685     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -36.905285 |
| rollout/            |            |
|    ep_len_mean      | 694        |
|    ep_rew_mean      | -36.9      |
| time/               |            |
|    episodes         | 440        |
|    fps              | 588        |
|    time_elapsed     | 428        |
|    total_timesteps  | 252330     |
| train/              |            |
|    actor_loss       | 1.59       |
|    critic_loss      | 8.61       |
|    learning_rate    | 0.0003     |
|    n_updates        | 372056     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -35.731277 |
| rollout/            |            |
|    ep_len_mean      | 704        |
|    ep_rew_mean      | -35.7      |
| time/               |            |
|    episodes         | 444        |
|    fps              | 589        |
|    time_elapsed     | 435        |
|    total_timesteps  | 256560     |
| train/              |            |
|    actor_loss       | 1.63       |
|    critic_loss      | 5.88       |
|    learning_rate    | 0.0003     |
|    n_updates        | 372761     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -35.710014 |
| rollout/            |            |
|    ep_len_mean      | 704        |
|    ep_rew_mean      | -35.7      |
| time/               |            |
|    episodes         | 448        |
|    fps              | 589        |
|    time_elapsed     | 439        |
|    total_timesteps  | 259218     |
| train/              |            |
|    actor_loss       | -0.437     |
|    critic_loss      | 6.77       |
|    learning_rate    | 0.0003     |
|    n_updates        | 373204     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -31.350342 |
| rollout/            |            |
|    ep_len_mean      | 713        |
|    ep_rew_mean      | -31.4      |
| time/               |            |
|    episodes         | 452        |
|    fps              | 589        |
|    time_elapsed     | 445        |
|    total_timesteps  | 262800     |
| train/              |            |
|    actor_loss       | 2.38       |
|    critic_loss      | 6.41       |
|    learning_rate    | 0.0003     |
|    n_updates        | 373801     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -32.213295 |
| rollout/            |            |
|    ep_len_mean      | 698        |
|    ep_rew_mean      | -32.2      |
| time/               |            |
|    episodes         | 456        |
|    fps              | 589        |
|    time_elapsed     | 448        |
|    total_timesteps  | 264432     |
| train/              |            |
|    actor_loss       | 1.57       |
|    critic_loss      | 3.82       |
|    learning_rate    | 0.0003     |
|    n_updates        | 374073     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -32.071823 |
| rollout/            |            |
|    ep_len_mean      | 687        |
|    ep_rew_mean      | -32.1      |
| time/               |            |
|    episodes         | 460        |
|    fps              | 589        |
|    time_elapsed     | 452        |
|    total_timesteps  | 266514     |
| train/              |            |
|    actor_loss       | 1.14       |
|    critic_loss      | 10.9       |
|    learning_rate    | 0.0003     |
|    n_updates        | 374420     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -34.058933 |
| rollout/            |            |
|    ep_len_mean      | 683        |
|    ep_rew_mean      | -34.1      |
| time/               |            |
|    episodes         | 464        |
|    fps              | 589        |
|    time_elapsed     | 455        |
|    total_timesteps  | 268686     |
| train/              |            |
|    actor_loss       | 4.18       |
|    critic_loss      | 5.77       |
|    learning_rate    | 0.0003     |
|    n_updates        | 374782     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -34.169308 |
| eval/               |            |
|    mean_ep_length   | 346        |
|    mean_reward      | -42.3      |
| time/               |            |
|    total_timesteps  | 270000     |
| train/              |            |
|    actor_loss       | 4.55       |
|    critic_loss      | 2.94       |
|    learning_rate    | 0.0003     |
|    n_updates        | 375001     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -34.054935 |
| rollout/            |            |
|    ep_len_mean      | 675        |
|    ep_rew_mean      | -34.1      |
| time/               |            |
|    episodes         | 468        |
|    fps              | 587        |
|    time_elapsed     | 463        |
|    total_timesteps  | 272430     |
| train/              |            |
|    actor_loss       | 2.79       |
|    critic_loss      | 10.2       |
|    learning_rate    | 0.0003     |
|    n_updates        | 375406     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -35.200943 |
| rollout/            |            |
|    ep_len_mean      | 669        |
|    ep_rew_mean      | -35.2      |
| time/               |            |
|    episodes         | 472        |
|    fps              | 588        |
|    time_elapsed     | 467        |
|    total_timesteps  | 274644     |
| train/              |            |
|    actor_loss       | 1.42       |
|    critic_loss      | 3.15       |
|    learning_rate    | 0.0003     |
|    n_updates        | 375775     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -35.22062 |
| rollout/            |           |
|    ep_len_mean      | 663       |
|    ep_rew_mean      | -35.2     |
| time/               |           |
|    episodes         | 476       |
|    fps              | 588       |
|    time_elapsed     | 471       |
|    total_timesteps  | 277122    |
| train/              |           |
|    actor_loss       | 3.24      |
|    critic_loss      | 3.58      |
|    learning_rate    | 0.0003    |
|    n_updates        | 376188    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | -36.9286 |
| rollout/            |          |
|    ep_len_mean      | 660      |
|    ep_rew_mean      | -36.9    |
| time/               |          |
|    episodes         | 480      |
|    fps              | 588      |
|    time_elapsed     | 474      |
|    total_timesteps  | 279006   |
| train/              |          |
|    actor_loss       | 1.84     |
|    critic_loss      | 8.67     |
|    learning_rate    | 0.0003   |
|    n_updates        | 376502   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -34.35782 |
| rollout/            |           |
|    ep_len_mean      | 663       |
|    ep_rew_mean      | -34.4     |
| time/               |           |
|    episodes         | 484       |
|    fps              | 588       |
|    time_elapsed     | 480       |
|    total_timesteps  | 282534    |
| train/              |           |
|    actor_loss       | 3.54      |
|    critic_loss      | 5.63      |
|    learning_rate    | 0.0003    |
|    n_updates        | 377090    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -33.200882 |
| rollout/            |            |
|    ep_len_mean      | 660        |
|    ep_rew_mean      | -33.2      |
| time/               |            |
|    episodes         | 488        |
|    fps              | 588        |
|    time_elapsed     | 483        |
|    total_timesteps  | 284514     |
| train/              |            |
|    actor_loss       | 2.45       |
|    critic_loss      | 5.58       |
|    learning_rate    | 0.0003     |
|    n_updates        | 377420     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -32.716835 |
| rollout/            |            |
|    ep_len_mean      | 647        |
|    ep_rew_mean      | -32.7      |
| time/               |            |
|    episodes         | 492        |
|    fps              | 588        |
|    time_elapsed     | 490        |
|    total_timesteps  | 288588     |
| train/              |            |
|    actor_loss       | 1.77       |
|    critic_loss      | 5.18       |
|    learning_rate    | 0.0003     |
|    n_updates        | 378099     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -30.14949 |
| rollout/            |           |
|    ep_len_mean      | 647       |
|    ep_rew_mean      | -30.1     |
| time/               |           |
|    episodes         | 496       |
|    fps              | 588       |
|    time_elapsed     | 492       |
|    total_timesteps  | 290088    |
| train/              |           |
|    actor_loss       | 3.28      |
|    critic_loss      | 5.29      |
|    learning_rate    | 0.0003    |
|    n_updates        | 378349    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -30.15019 |
| rollout/            |           |
|    ep_len_mean      | 638       |
|    ep_rew_mean      | -30.2     |
| time/               |           |
|    episodes         | 500       |
|    fps              | 588       |
|    time_elapsed     | 498       |
|    total_timesteps  | 293412    |
| train/              |           |
|    actor_loss       | 3.06      |
|    critic_loss      | 9.11      |
|    learning_rate    | 0.0003    |
|    n_updates        | 378903    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -26.265749 |
| rollout/            |            |
|    ep_len_mean      | 646        |
|    ep_rew_mean      | -26.3      |
| time/               |            |
|    episodes         | 504        |
|    fps              | 588        |
|    time_elapsed     | 501        |
|    total_timesteps  | 295452     |
| train/              |            |
|    actor_loss       | 5.13       |
|    critic_loss      | 3.93       |
|    learning_rate    | 0.0003     |
|    n_updates        | 379243     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -26.554848 |
| eval/               |            |
|    mean_ep_length   | 714        |
|    mean_reward      | -22.8      |
| time/               |            |
|    total_timesteps  | 300000     |
| train/              |            |
|    actor_loss       | 4.51       |
|    critic_loss      | 9.01       |
|    learning_rate    | 0.0003     |
|    n_updates        | 380001     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -25.863092 |
| rollout/            |            |
|    ep_len_mean      | 653        |
|    ep_rew_mean      | -25.9      |
| time/               |            |
|    episodes         | 508        |
|    fps              | 585        |
|    time_elapsed     | 512        |
|    total_timesteps  | 300300     |
| train/              |            |
|    actor_loss       | 2.45       |
|    critic_loss      | 4.64       |
|    learning_rate    | 0.0003     |
|    n_updates        | 380051     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -26.02333 |
| rollout/            |           |
|    ep_len_mean      | 662       |
|    ep_rew_mean      | -26       |
| time/               |           |
|    episodes         | 512       |
|    fps              | 586       |
|    time_elapsed     | 514       |
|    total_timesteps  | 301752    |
| train/              |           |
|    actor_loss       | 2.5       |
|    critic_loss      | 8.02      |
|    learning_rate    | 0.0003    |
|    n_updates        | 380293    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -26.20667 |
| rollout/            |           |
|    ep_len_mean      | 669       |
|    ep_rew_mean      | -26.2     |
| time/               |           |
|    episodes         | 516       |
|    fps              | 586       |
|    time_elapsed     | 520       |
|    total_timesteps  | 305412    |
| train/              |           |
|    actor_loss       | 4.07      |
|    critic_loss      | 2.88      |
|    learning_rate    | 0.0003    |
|    n_updates        | 380903    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -25.140633 |
| rollout/            |            |
|    ep_len_mean      | 682        |
|    ep_rew_mean      | -25.1      |
| time/               |            |
|    episodes         | 520        |
|    fps              | 586        |
|    time_elapsed     | 524        |
|    total_timesteps  | 307836     |
| train/              |            |
|    actor_loss       | 1.66       |
|    critic_loss      | 12         |
|    learning_rate    | 0.0003     |
|    n_updates        | 381307     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -24.286386 |
| rollout/            |            |
|    ep_len_mean      | 698        |
|    ep_rew_mean      | -24.3      |
| time/               |            |
|    episodes         | 524        |
|    fps              | 586        |
|    time_elapsed     | 533        |
|    total_timesteps  | 313038     |
| train/              |            |
|    actor_loss       | 2.33       |
|    critic_loss      | 2.75       |
|    learning_rate    | 0.0003     |
|    n_updates        | 382174     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -23.276035 |
| rollout/            |            |
|    ep_len_mean      | 720        |
|    ep_rew_mean      | -23.3      |
| time/               |            |
|    episodes         | 528        |
|    fps              | 586        |
|    time_elapsed     | 539        |
|    total_timesteps  | 316410     |
| train/              |            |
|    actor_loss       | 4.79       |
|    critic_loss      | 7.59       |
|    learning_rate    | 0.0003     |
|    n_updates        | 382736     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -24.368532 |
| rollout/            |            |
|    ep_len_mean      | 737        |
|    ep_rew_mean      | -24.4      |
| time/               |            |
|    episodes         | 532        |
|    fps              | 586        |
|    time_elapsed     | 544        |
|    total_timesteps  | 319836     |
| train/              |            |
|    actor_loss       | 3.1        |
|    critic_loss      | 4.79       |
|    learning_rate    | 0.0003     |
|    n_updates        | 383307     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -22.850288 |
| rollout/            |            |
|    ep_len_mean      | 740        |
|    ep_rew_mean      | -22.9      |
| time/               |            |
|    episodes         | 536        |
|    fps              | 587        |
|    time_elapsed     | 550        |
|    total_timesteps  | 323244     |
| train/              |            |
|    actor_loss       | 4.68       |
|    critic_loss      | 4.44       |
|    learning_rate    | 0.0003     |
|    n_updates        | 383875     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -23.488945 |
| rollout/            |            |
|    ep_len_mean      | 732        |
|    ep_rew_mean      | -23.5      |
| time/               |            |
|    episodes         | 540        |
|    fps              | 587        |
|    time_elapsed     | 554        |
|    total_timesteps  | 325836     |
| train/              |            |
|    actor_loss       | 5.74       |
|    critic_loss      | 5.19       |
|    learning_rate    | 0.0003     |
|    n_updates        | 384307     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -23.905855 |
| rollout/            |            |
|    ep_len_mean      | 730        |
|    ep_rew_mean      | -23.9      |
| time/               |            |
|    episodes         | 544        |
|    fps              | 587        |
|    time_elapsed     | 560        |
|    total_timesteps  | 329412     |
| train/              |            |
|    actor_loss       | 3.41       |
|    critic_loss      | 5.6        |
|    learning_rate    | 0.0003     |
|    n_updates        | 384903     |
------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 718      |
|    mean_reward     | -20.2    |
| time/              |          |
|    total_timesteps | 330000   |
| train/             |          |
|    actor_loss      | 4.97     |
|    critic_loss     | 3.68     |
|    learning_rate   | 0.0003   |
|    n_updates       | 385001   |
---------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -25.696253 |
| rollout/            |            |
|    ep_len_mean      | 735        |
|    ep_rew_mean      | -25.7      |
| time/               |            |
|    episodes         | 548        |
|    fps              | 584        |
|    time_elapsed     | 570        |
|    total_timesteps  | 333558     |
| train/              |            |
|    actor_loss       | 3.37       |
|    critic_loss      | 3.63       |
|    learning_rate    | 0.0003     |
|    n_updates        | 385594     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -28.535294 |
| rollout/            |            |
|    ep_len_mean      | 731        |
|    ep_rew_mean      | -28.5      |
| time/               |            |
|    episodes         | 552        |
|    fps              | 584        |
|    time_elapsed     | 575        |
|    total_timesteps  | 336336     |
| train/              |            |
|    actor_loss       | 6.14       |
|    critic_loss      | 6.05       |
|    learning_rate    | 0.0003     |
|    n_updates        | 386057     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -29.00557 |
| rollout/            |           |
|    ep_len_mean      | 739       |
|    ep_rew_mean      | -29       |
| time/               |           |
|    episodes         | 556       |
|    fps              | 584       |
|    time_elapsed     | 580       |
|    total_timesteps  | 339060    |
| train/              |           |
|    actor_loss       | 6.64      |
|    critic_loss      | 6.29      |
|    learning_rate    | 0.0003    |
|    n_updates        | 386511    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -31.014727 |
| rollout/            |            |
|    ep_len_mean      | 750        |
|    ep_rew_mean      | -31        |
| time/               |            |
|    episodes         | 560        |
|    fps              | 584        |
|    time_elapsed     | 584        |
|    total_timesteps  | 341454     |
| train/              |            |
|    actor_loss       | 4.75       |
|    critic_loss      | 4.05       |
|    learning_rate    | 0.0003     |
|    n_updates        | 386910     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -28.704824 |
| rollout/            |            |
|    ep_len_mean      | 750        |
|    ep_rew_mean      | -28.7      |
| time/               |            |
|    episodes         | 564        |
|    fps              | 584        |
|    time_elapsed     | 587        |
|    total_timesteps  | 343836     |
| train/              |            |
|    actor_loss       | 4.87       |
|    critic_loss      | 4.08       |
|    learning_rate    | 0.0003     |
|    n_updates        | 387307     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -28.751158 |
| rollout/            |            |
|    ep_len_mean      | 758        |
|    ep_rew_mean      | -28.8      |
| time/               |            |
|    episodes         | 568        |
|    fps              | 585        |
|    time_elapsed     | 596        |
|    total_timesteps  | 348846     |
| train/              |            |
|    actor_loss       | 6.2        |
|    critic_loss      | 7.83       |
|    learning_rate    | 0.0003     |
|    n_updates        | 388142     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -27.62992 |
| rollout/            |           |
|    ep_len_mean      | 762       |
|    ep_rew_mean      | -27.6     |
| time/               |           |
|    episodes         | 572       |
|    fps              | 585       |
|    time_elapsed     | 599       |
|    total_timesteps  | 350682    |
| train/              |           |
|    actor_loss       | 5.85      |
|    critic_loss      | 8.06      |
|    learning_rate    | 0.0003    |
|    n_updates        | 388448    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -27.845848 |
| rollout/            |            |
|    ep_len_mean      | 766        |
|    ep_rew_mean      | -27.8      |
| time/               |            |
|    episodes         | 576        |
|    fps              | 585        |
|    time_elapsed     | 605        |
|    total_timesteps  | 354618     |
| train/              |            |
|    actor_loss       | 4.6        |
|    critic_loss      | 4.93       |
|    learning_rate    | 0.0003     |
|    n_updates        | 389104     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -25.476702 |
| rollout/            |            |
|    ep_len_mean      | 771        |
|    ep_rew_mean      | -25.5      |
| time/               |            |
|    episodes         | 580        |
|    fps              | 585        |
|    time_elapsed     | 609        |
|    total_timesteps  | 356682     |
| train/              |            |
|    actor_loss       | 4.74       |
|    critic_loss      | 4.19       |
|    learning_rate    | 0.0003     |
|    n_updates        | 389448     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -25.946735 |
| eval/               |            |
|    mean_ep_length   | 935        |
|    mean_reward      | -32.7      |
| time/               |            |
|    total_timesteps  | 360000     |
| train/              |            |
|    actor_loss       | 5.14       |
|    critic_loss      | 3.56       |
|    learning_rate    | 0.0003     |
|    n_updates        | 390001     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -27.179626 |
| rollout/            |            |
|    ep_len_mean      | 775        |
|    ep_rew_mean      | -27.2      |
| time/               |            |
|    episodes         | 584        |
|    fps              | 581        |
|    time_elapsed     | 620        |
|    total_timesteps  | 361092     |
| train/              |            |
|    actor_loss       | 5.98       |
|    critic_loss      | 3.74       |
|    learning_rate    | 0.0003     |
|    n_updates        | 390183     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -26.133938 |
| rollout/            |            |
|    ep_len_mean      | 785        |
|    ep_rew_mean      | -26.1      |
| time/               |            |
|    episodes         | 588        |
|    fps              | 581        |
|    time_elapsed     | 625        |
|    total_timesteps  | 363702     |
| train/              |            |
|    actor_loss       | 4.89       |
|    critic_loss      | 5.92       |
|    learning_rate    | 0.0003     |
|    n_updates        | 390618     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -26.976076 |
| rollout/            |            |
|    ep_len_mean      | 798        |
|    ep_rew_mean      | -27        |
| time/               |            |
|    episodes         | 592        |
|    fps              | 582        |
|    time_elapsed     | 633        |
|    total_timesteps  | 368682     |
| train/              |            |
|    actor_loss       | 5.62       |
|    critic_loss      | 2.85       |
|    learning_rate    | 0.0003     |
|    n_updates        | 391448     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -28.557364 |
| rollout/            |            |
|    ep_len_mean      | 802        |
|    ep_rew_mean      | -28.6      |
| time/               |            |
|    episodes         | 596        |
|    fps              | 582        |
|    time_elapsed     | 637        |
|    total_timesteps  | 370998     |
| train/              |            |
|    actor_loss       | 3.46       |
|    critic_loss      | 2.76       |
|    learning_rate    | 0.0003     |
|    n_updates        | 391834     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -27.299736 |
| rollout/            |            |
|    ep_len_mean      | 812        |
|    ep_rew_mean      | -27.3      |
| time/               |            |
|    episodes         | 600        |
|    fps              | 582        |
|    time_elapsed     | 643        |
|    total_timesteps  | 374592     |
| train/              |            |
|    actor_loss       | 4.51       |
|    critic_loss      | 3.62       |
|    learning_rate    | 0.0003     |
|    n_updates        | 392433     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -30.435093 |
| rollout/            |            |
|    ep_len_mean      | 815        |
|    ep_rew_mean      | -30.4      |
| time/               |            |
|    episodes         | 604        |
|    fps              | 582        |
|    time_elapsed     | 649        |
|    total_timesteps  | 378066     |
| train/              |            |
|    actor_loss       | 5.01       |
|    critic_loss      | 10.6       |
|    learning_rate    | 0.0003     |
|    n_updates        | 393012     |
------------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | -29.4475 |
| rollout/            |          |
|    ep_len_mean      | 818      |
|    ep_rew_mean      | -29.4    |
| time/               |          |
|    episodes         | 608      |
|    fps              | 582      |
|    time_elapsed     | 653      |
|    total_timesteps  | 380748   |
| train/              |          |
|    actor_loss       | 4.9      |
|    critic_loss      | 2.15     |
|    learning_rate    | 0.0003   |
|    n_updates        | 393459   |
----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -30.061298 |
| rollout/            |            |
|    ep_len_mean      | 811        |
|    ep_rew_mean      | -30.1      |
| time/               |            |
|    episodes         | 612        |
|    fps              | 582        |
|    time_elapsed     | 659        |
|    total_timesteps  | 384246     |
| train/              |            |
|    actor_loss       | 4.72       |
|    critic_loss      | 6          |
|    learning_rate    | 0.0003     |
|    n_updates        | 394042     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -29.375462 |
| rollout/            |            |
|    ep_len_mean      | 813        |
|    ep_rew_mean      | -29.4      |
| time/               |            |
|    episodes         | 616        |
|    fps              | 582        |
|    time_elapsed     | 663        |
|    total_timesteps  | 386592     |
| train/              |            |
|    actor_loss       | 6.54       |
|    critic_loss      | 3.48       |
|    learning_rate    | 0.0003     |
|    n_updates        | 394433     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -30.238348 |
| rollout/            |            |
|    ep_len_mean      | 809        |
|    ep_rew_mean      | -30.2      |
| time/               |            |
|    episodes         | 620        |
|    fps              | 582        |
|    time_elapsed     | 665        |
|    total_timesteps  | 388272     |
| train/              |            |
|    actor_loss       | 4.54       |
|    critic_loss      | 3.86       |
|    learning_rate    | 0.0003     |
|    n_updates        | 394713     |
------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 842      |
|    mean_reward     | -9.63    |
| time/              |          |
|    total_timesteps | 390000   |
| train/             |          |
|    actor_loss      | 3.8      |
|    critic_loss     | 3.9      |
|    learning_rate   | 0.0003   |
|    n_updates       | 395001   |
---------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -28.373568 |
| rollout/            |            |
|    ep_len_mean      | 809        |
|    ep_rew_mean      | -28.4      |
| time/               |            |
|    episodes         | 624        |
|    fps              | 580        |
|    time_elapsed     | 678        |
|    total_timesteps  | 393966     |
| train/              |            |
|    actor_loss       | 6.05       |
|    critic_loss      | 2.77       |
|    learning_rate    | 0.0003     |
|    n_updates        | 395662     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -27.112906 |
| rollout/            |            |
|    ep_len_mean      | 809        |
|    ep_rew_mean      | -27.1      |
| time/               |            |
|    episodes         | 628        |
|    fps              | 580        |
|    time_elapsed     | 684        |
|    total_timesteps  | 397338     |
| train/              |            |
|    actor_loss       | 4.6        |
|    critic_loss      | 3.25       |
|    learning_rate    | 0.0003     |
|    n_updates        | 396224     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -26.439577 |
| rollout/            |            |
|    ep_len_mean      | 806        |
|    ep_rew_mean      | -26.4      |
| time/               |            |
|    episodes         | 632        |
|    fps              | 580        |
|    time_elapsed     | 689        |
|    total_timesteps  | 400272     |
| train/              |            |
|    actor_loss       | 7.48       |
|    critic_loss      | 3.4        |
|    learning_rate    | 0.0003     |
|    n_updates        | 396713     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -24.416191 |
| rollout/            |            |
|    ep_len_mean      | 819        |
|    ep_rew_mean      | -24.4      |
| time/               |            |
|    episodes         | 636        |
|    fps              | 580        |
|    time_elapsed     | 696        |
|    total_timesteps  | 404748     |
| train/              |            |
|    actor_loss       | 7.11       |
|    critic_loss      | 5.48       |
|    learning_rate    | 0.0003     |
|    n_updates        | 397459     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -25.297945 |
| rollout/            |            |
|    ep_len_mean      | 826        |
|    ep_rew_mean      | -25.3      |
| time/               |            |
|    episodes         | 640        |
|    fps              | 581        |
|    time_elapsed     | 703        |
|    total_timesteps  | 408972     |
| train/              |            |
|    actor_loss       | 4.98       |
|    critic_loss      | 6.63       |
|    learning_rate    | 0.0003     |
|    n_updates        | 398163     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -23.316465 |
| rollout/            |            |
|    ep_len_mean      | 828        |
|    ep_rew_mean      | -23.3      |
| time/               |            |
|    episodes         | 644        |
|    fps              | 581        |
|    time_elapsed     | 709        |
|    total_timesteps  | 412272     |
| train/              |            |
|    actor_loss       | 6.65       |
|    critic_loss      | 2.85       |
|    learning_rate    | 0.0003     |
|    n_updates        | 398713     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -19.76171 |
| rollout/            |           |
|    ep_len_mean      | 833       |
|    ep_rew_mean      | -19.8     |
| time/               |           |
|    episodes         | 648       |
|    fps              | 581       |
|    time_elapsed     | 715       |
|    total_timesteps  | 416064    |
| train/              |           |
|    actor_loss       | 6.3       |
|    critic_loss      | 3.14      |
|    learning_rate    | 0.0003    |
|    n_updates        | 399345    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -20.902855 |
| rollout/            |            |
|    ep_len_mean      | 828        |
|    ep_rew_mean      | -20.9      |
| time/               |            |
|    episodes         | 652        |
|    fps              | 581        |
|    time_elapsed     | 720        |
|    total_timesteps  | 418854     |
| train/              |            |
|    actor_loss       | 8.04       |
|    critic_loss      | 7.04       |
|    learning_rate    | 0.0003     |
|    n_updates        | 399810     |
------------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 820      |
|    mean_reward     | 34.6     |
| time/              |          |
|    total_timesteps | 420000   |
| train/             |          |
|    actor_loss      | 5.34     |
|    critic_loss     | 4.88     |
|    learning_rate   | 0.0003   |
|    n_updates       | 400001   |
---------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -19.98599 |
| rollout/            |           |
|    ep_len_mean      | 837       |
|    ep_rew_mean      | -20       |
| time/               |           |
|    episodes         | 656       |
|    fps              | 578       |
|    time_elapsed     | 730       |
|    total_timesteps  | 422748    |
| train/              |           |
|    actor_loss       | 6.16      |
|    critic_loss      | 3.53      |
|    learning_rate    | 0.0003    |
|    n_updates        | 400459    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -20.293966 |
| rollout/            |            |
|    ep_len_mean      | 836        |
|    ep_rew_mean      | -20.3      |
| time/               |            |
|    episodes         | 660        |
|    fps              | 579        |
|    time_elapsed     | 735        |
|    total_timesteps  | 426108     |
| train/              |            |
|    actor_loss       | 8.33       |
|    critic_loss      | 4.78       |
|    learning_rate    | 0.0003     |
|    n_updates        | 401019     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -18.028181 |
| rollout/            |            |
|    ep_len_mean      | 848        |
|    ep_rew_mean      | -18        |
| time/               |            |
|    episodes         | 664        |
|    fps              | 579        |
|    time_elapsed     | 740        |
|    total_timesteps  | 428964     |
| train/              |            |
|    actor_loss       | 6.09       |
|    critic_loss      | 2.79       |
|    learning_rate    | 0.0003     |
|    n_updates        | 401495     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -17.891447 |
| rollout/            |            |
|    ep_len_mean      | 852        |
|    ep_rew_mean      | -17.9      |
| time/               |            |
|    episodes         | 668        |
|    fps              | 579        |
|    time_elapsed     | 748        |
|    total_timesteps  | 434064     |
| train/              |            |
|    actor_loss       | 6.92       |
|    critic_loss      | 2.71       |
|    learning_rate    | 0.0003     |
|    n_updates        | 402345     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -18.876883 |
| rollout/            |            |
|    ep_len_mean      | 850        |
|    ep_rew_mean      | -18.9      |
| time/               |            |
|    episodes         | 672        |
|    fps              | 579        |
|    time_elapsed     | 751        |
|    total_timesteps  | 435642     |
| train/              |            |
|    actor_loss       | 5.18       |
|    critic_loss      | 3.02       |
|    learning_rate    | 0.0003     |
|    n_updates        | 402608     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -18.525585 |
| rollout/            |            |
|    ep_len_mean      | 853        |
|    ep_rew_mean      | -18.5      |
| time/               |            |
|    episodes         | 676        |
|    fps              | 579        |
|    time_elapsed     | 758        |
|    total_timesteps  | 440064     |
| train/              |            |
|    actor_loss       | 6.41       |
|    critic_loss      | 2.19       |
|    learning_rate    | 0.0003     |
|    n_updates        | 403345     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -18.754402 |
| rollout/            |            |
|    ep_len_mean      | 859        |
|    ep_rew_mean      | -18.8      |
| time/               |            |
|    episodes         | 680        |
|    fps              | 579        |
|    time_elapsed     | 761        |
|    total_timesteps  | 441882     |
| train/              |            |
|    actor_loss       | 6.37       |
|    critic_loss      | 3.03       |
|    learning_rate    | 0.0003     |
|    n_updates        | 403648     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -17.683144 |
| rollout/            |            |
|    ep_len_mean      | 858        |
|    ep_rew_mean      | -17.7      |
| time/               |            |
|    episodes         | 684        |
|    fps              | 580        |
|    time_elapsed     | 768        |
|    total_timesteps  | 446070     |
| train/              |            |
|    actor_loss       | 5.53       |
|    critic_loss      | 3.1        |
|    learning_rate    | 0.0003     |
|    n_updates        | 404346     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -17.181128 |
| rollout/            |            |
|    ep_len_mean      | 851        |
|    ep_rew_mean      | -17.2      |
| time/               |            |
|    episodes         | 688        |
|    fps              | 580        |
|    time_elapsed     | 774        |
|    total_timesteps  | 449514     |
| train/              |            |
|    actor_loss       | 5.68       |
|    critic_loss      | 4.17       |
|    learning_rate    | 0.0003     |
|    n_updates        | 404920     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -17.114977 |
| eval/               |            |
|    mean_ep_length   | 1e+03      |
|    mean_reward      | 47.8       |
| time/               |            |
|    total_timesteps  | 450000     |
| train/              |            |
|    actor_loss       | 6.52       |
|    critic_loss      | 3.03       |
|    learning_rate    | 0.0003     |
|    n_updates        | 405001     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -18.259483 |
| rollout/            |            |
|    ep_len_mean      | 838        |
|    ep_rew_mean      | -18.3      |
| time/               |            |
|    episodes         | 692        |
|    fps              | 577        |
|    time_elapsed     | 783        |
|    total_timesteps  | 452064     |
| train/              |            |
|    actor_loss       | 7.67       |
|    critic_loss      | 2.86       |
|    learning_rate    | 0.0003     |
|    n_updates        | 405345     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -18.392122 |
| rollout/            |            |
|    ep_len_mean      | 831        |
|    ep_rew_mean      | -18.4      |
| time/               |            |
|    episodes         | 696        |
|    fps              | 577        |
|    time_elapsed     | 786        |
|    total_timesteps  | 453888     |
| train/              |            |
|    actor_loss       | 7.46       |
|    critic_loss      | 3.09       |
|    learning_rate    | 0.0003     |
|    n_updates        | 405649     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -16.889376 |
| rollout/            |            |
|    ep_len_mean      | 836        |
|    ep_rew_mean      | -16.9      |
| time/               |            |
|    episodes         | 700        |
|    fps              | 577        |
|    time_elapsed     | 793        |
|    total_timesteps  | 458070     |
| train/              |            |
|    actor_loss       | 5.66       |
|    critic_loss      | 3.01       |
|    learning_rate    | 0.0003     |
|    n_updates        | 406346     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -17.26297 |
| rollout/            |           |
|    ep_len_mean      | 831       |
|    ep_rew_mean      | -17.3     |
| time/               |           |
|    episodes         | 704       |
|    fps              | 577       |
|    time_elapsed     | 796       |
|    total_timesteps  | 459978    |
| train/              |           |
|    actor_loss       | 6.32      |
|    critic_loss      | 3.75      |
|    learning_rate    | 0.0003    |
|    n_updates        | 406664    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -16.134716 |
| rollout/            |            |
|    ep_len_mean      | 827        |
|    ep_rew_mean      | -16.1      |
| time/               |            |
|    episodes         | 708        |
|    fps              | 577        |
|    time_elapsed     | 804        |
|    total_timesteps  | 465126     |
| train/              |            |
|    actor_loss       | 7.49       |
|    critic_loss      | 5.11       |
|    learning_rate    | 0.0003     |
|    n_updates        | 407522     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -14.928597 |
| rollout/            |            |
|    ep_len_mean      | 835        |
|    ep_rew_mean      | -14.9      |
| time/               |            |
|    episodes         | 712        |
|    fps              | 577        |
|    time_elapsed     | 808        |
|    total_timesteps  | 467310     |
| train/              |            |
|    actor_loss       | 5.93       |
|    critic_loss      | 3.76       |
|    learning_rate    | 0.0003     |
|    n_updates        | 407886     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -12.074262 |
| rollout/            |            |
|    ep_len_mean      | 845        |
|    ep_rew_mean      | -12.1      |
| time/               |            |
|    episodes         | 716        |
|    fps              | 578        |
|    time_elapsed     | 815        |
|    total_timesteps  | 471390     |
| train/              |            |
|    actor_loss       | 7.56       |
|    critic_loss      | 3.18       |
|    learning_rate    | 0.0003     |
|    n_updates        | 408566     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -7.218013 |
| rollout/            |           |
|    ep_len_mean      | 855       |
|    ep_rew_mean      | -7.22     |
| time/               |           |
|    episodes         | 720       |
|    fps              | 578       |
|    time_elapsed     | 821       |
|    total_timesteps  | 475416    |
| train/              |           |
|    actor_loss       | 5.96      |
|    critic_loss      | 12        |
|    learning_rate    | 0.0003    |
|    n_updates        | 409237    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -8.2006645 |
| rollout/            |            |
|    ep_len_mean      | 848        |
|    ep_rew_mean      | -8.2       |
| time/               |            |
|    episodes         | 724        |
|    fps              | 578        |
|    time_elapsed     | 825        |
|    total_timesteps  | 477390     |
| train/              |            |
|    actor_loss       | 6.42       |
|    critic_loss      | 3.89       |
|    learning_rate    | 0.0003     |
|    n_updates        | 409566     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -6.6425767 |
| eval/               |            |
|    mean_ep_length   | 738        |
|    mean_reward      | -8.4       |
| time/               |            |
|    total_timesteps  | 480000     |
| train/              |            |
|    actor_loss       | 7.14       |
|    critic_loss      | 4.37       |
|    learning_rate    | 0.0003     |
|    n_updates        | 410001     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -5.420981 |
| rollout/            |           |
|    ep_len_mean      | 848       |
|    ep_rew_mean      | -5.42     |
| time/               |           |
|    episodes         | 728       |
|    fps              | 576       |
|    time_elapsed     | 837       |
|    total_timesteps  | 482838    |
| train/              |           |
|    actor_loss       | 6.23      |
|    critic_loss      | 4.14      |
|    learning_rate    | 0.0003    |
|    n_updates        | 410474    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -4.7041864 |
| rollout/            |            |
|    ep_len_mean      | 844        |
|    ep_rew_mean      | -4.7       |
| time/               |            |
|    episodes         | 732        |
|    fps              | 576        |
|    time_elapsed     | 840        |
|    total_timesteps  | 484830     |
| train/              |            |
|    actor_loss       | 8.34       |
|    critic_loss      | 3.26       |
|    learning_rate    | 0.0003     |
|    n_updates        | 410806     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -5.134849 |
| rollout/            |           |
|    ep_len_mean      | 836       |
|    ep_rew_mean      | -5.13     |
| time/               |           |
|    episodes         | 736       |
|    fps              | 576       |
|    time_elapsed     | 848       |
|    total_timesteps  | 489126    |
| train/              |           |
|    actor_loss       | 8.93      |
|    critic_loss      | 8.31      |
|    learning_rate    | 0.0003    |
|    n_updates        | 411522    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | -4.268117 |
| rollout/            |           |
|    ep_len_mean      | 838       |
|    ep_rew_mean      | -4.27     |
| time/               |           |
|    episodes         | 740       |
|    fps              | 576       |
|    time_elapsed     | 853       |
|    total_timesteps  | 492222    |
| train/              |           |
|    actor_loss       | 7.27      |
|    critic_loss      | 2.71      |
|    learning_rate    | 0.0003    |
|    n_updates        | 412038    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -4.7292933 |
| rollout/            |            |
|    ep_len_mean      | 839        |
|    ep_rew_mean      | -4.73      |
| time/               |            |
|    episodes         | 744        |
|    fps              | 577        |
|    time_elapsed     | 858        |
|    total_timesteps  | 495606     |
| train/              |            |
|    actor_loss       | 4.65       |
|    critic_loss      | 4.81       |
|    learning_rate    | 0.0003     |
|    n_updates        | 412602     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -5.9130225 |
| rollout/            |            |
|    ep_len_mean      | 836        |
|    ep_rew_mean      | -5.91      |
| time/               |            |
|    episodes         | 748        |
|    fps              | 577        |
|    time_elapsed     | 867        |
|    total_timesteps  | 500838     |
| train/              |            |
|    actor_loss       | 5.85       |
|    critic_loss      | 3.47       |
|    learning_rate    | 0.0003     |
|    n_updates        | 413474     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -2.0836816 |
| rollout/            |            |
|    ep_len_mean      | 844        |
|    ep_rew_mean      | -2.08      |
| time/               |            |
|    episodes         | 752        |
|    fps              | 577        |
|    time_elapsed     | 871        |
|    total_timesteps  | 503310     |
| train/              |            |
|    actor_loss       | 6.53       |
|    critic_loss      | 3.52       |
|    learning_rate    | 0.0003     |
|    n_updates        | 413886     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | 0.59475374 |
| rollout/            |            |
|    ep_len_mean      | 852        |
|    ep_rew_mean      | 0.595      |
| time/               |            |
|    episodes         | 756        |
|    fps              | 577        |
|    time_elapsed     | 878        |
|    total_timesteps  | 507606     |
| train/              |            |
|    actor_loss       | 5.62       |
|    critic_loss      | 4.79       |
|    learning_rate    | 0.0003     |
|    n_updates        | 414602     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 1.5126784 |
| eval/               |           |
|    mean_ep_length   | 1e+03     |
|    mean_reward      | 41.9      |
| time/               |           |
|    total_timesteps  | 510000    |
| train/              |           |
|    actor_loss       | 6.02      |
|    critic_loss      | 5.09      |
|    learning_rate    | 0.0003    |
|    n_updates        | 415001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 1.3688421 |
| rollout/            |           |
|    ep_len_mean      | 842       |
|    ep_rew_mean      | 1.37      |
| time/               |           |
|    episodes         | 760       |
|    fps              | 574       |
|    time_elapsed     | 887       |
|    total_timesteps  | 510174    |
| train/              |           |
|    actor_loss       | 8.03      |
|    critic_loss      | 4.13      |
|    learning_rate    | 0.0003    |
|    n_updates        | 415030    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | 0.37955415 |
| rollout/            |            |
|    ep_len_mean      | 851        |
|    ep_rew_mean      | 0.38       |
| time/               |            |
|    episodes         | 764        |
|    fps              | 575        |
|    time_elapsed     | 893        |
|    total_timesteps  | 514086     |
| train/              |            |
|    actor_loss       | 6.34       |
|    critic_loss      | 3          |
|    learning_rate    | 0.0003     |
|    n_updates        | 415682     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | -1.2571156 |
| rollout/            |            |
|    ep_len_mean      | 839        |
|    ep_rew_mean      | -1.26      |
| time/               |            |
|    episodes         | 768        |
|    fps              | 575        |
|    time_elapsed     | 899        |
|    total_timesteps  | 517620     |
| train/              |            |
|    actor_loss       | 6.62       |
|    critic_loss      | 4.11       |
|    learning_rate    | 0.0003     |
|    n_updates        | 416271     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 0.7697285 |
| rollout/            |           |
|    ep_len_mean      | 847       |
|    ep_rew_mean      | 0.77      |
| time/               |           |
|    episodes         | 772       |
|    fps              | 575       |
|    time_elapsed     | 906       |
|    total_timesteps  | 521448    |
| train/              |           |
|    actor_loss       | 6.26      |
|    critic_loss      | 6.09      |
|    learning_rate    | 0.0003    |
|    n_updates        | 416909    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 2.1180243 |
| rollout/            |           |
|    ep_len_mean      | 857       |
|    ep_rew_mean      | 2.12      |
| time/               |           |
|    episodes         | 776       |
|    fps              | 575       |
|    time_elapsed     | 909       |
|    total_timesteps  | 523632    |
| train/              |           |
|    actor_loss       | 7.32      |
|    critic_loss      | 4.29      |
|    learning_rate    | 0.0003    |
|    n_updates        | 417273    |
-----------------------------------


-------------------------------------
| custom/             |             |
|    avg_reward_100ep | 0.078602985 |
| rollout/            |             |
|    ep_len_mean      | 837         |
|    ep_rew_mean      | 0.0786      |
| time/               |             |
|    episodes         | 780         |
|    fps              | 575         |
|    time_elapsed     | 913         |
|    total_timesteps  | 525666      |
| train/              |             |
|    actor_loss       | 6.39        |
|    critic_loss      | 4.61        |
|    learning_rate    | 0.0003      |
|    n_updates        | 417612      |
-------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | 0.78705704 |
| rollout/            |            |
|    ep_len_mean      | 832        |
|    ep_rew_mean      | 0.787      |
| time/               |            |
|    episodes         | 784        |
|    fps              | 575        |
|    time_elapsed     | 919        |
|    total_timesteps  | 529632     |
| train/              |            |
|    actor_loss       | 8.07       |
|    critic_loss      | 2.33       |
|    learning_rate    | 0.0003     |
|    n_updates        | 418273     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 2.3235536 |
| rollout/            |           |
|    ep_len_mean      | 837       |
|    ep_rew_mean      | 2.32      |
| time/               |           |
|    episodes         | 788       |
|    fps              | 575       |
|    time_elapsed     | 924       |
|    total_timesteps  | 532512    |
| train/              |           |
|    actor_loss       | 6.5       |
|    critic_loss      | 3.05      |
|    learning_rate    | 0.0003    |
|    n_updates        | 418753    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 6.3883476 |
| rollout/            |           |
|    ep_len_mean      | 842       |
|    ep_rew_mean      | 6.39      |
| time/               |           |
|    episodes         | 792       |
|    fps              | 576       |
|    time_elapsed     | 930       |
|    total_timesteps  | 536148    |
| train/              |           |
|    actor_loss       | 5.67      |
|    critic_loss      | 4.61      |
|    learning_rate    | 0.0003    |
|    n_updates        | 419359    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 8.363043 |
| eval/               |          |
|    mean_ep_length   | 888      |
|    mean_reward      | -38      |
| time/               |          |
|    total_timesteps  | 540000   |
| train/              |          |
|    actor_loss       | 8.62     |
|    critic_loss      | 3.43     |
|    learning_rate    | 0.0003   |
|    n_updates        | 420001   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 8.59162  |
| rollout/            |          |
|    ep_len_mean      | 851      |
|    ep_rew_mean      | 8.59     |
| time/               |          |
|    episodes         | 796      |
|    fps              | 573      |
|    time_elapsed     | 942      |
|    total_timesteps  | 540864   |
| train/              |          |
|    actor_loss       | 6.71     |
|    critic_loss      | 4.3      |
|    learning_rate    | 0.0003   |
|    n_updates        | 420145   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 8.218019 |
| rollout/            |          |
|    ep_len_mean      | 862      |
|    ep_rew_mean      | 8.22     |
| time/               |          |
|    episodes         | 800      |
|    fps              | 573      |
|    time_elapsed     | 947      |
|    total_timesteps  | 543582   |
| train/              |          |
|    actor_loss       | 6.83     |
|    critic_loss      | 2.62     |
|    learning_rate    | 0.0003   |
|    n_updates        | 420598   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 9.67533  |
| rollout/            |          |
|    ep_len_mean      | 855      |
|    ep_rew_mean      | 9.68     |
| time/               |          |
|    episodes         | 804      |
|    fps              | 574      |
|    time_elapsed     | 952      |
|    total_timesteps  | 546864   |
| train/              |          |
|    actor_loss       | 7.15     |
|    critic_loss      | 3.56     |
|    learning_rate    | 0.0003   |
|    n_updates        | 421145   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 8.651408 |
| rollout/            |          |
|    ep_len_mean      | 863      |
|    ep_rew_mean      | 8.65     |
| time/               |          |
|    episodes         | 808      |
|    fps              | 574      |
|    time_elapsed     | 958      |
|    total_timesteps  | 550494   |
| train/              |          |
|    actor_loss       | 7.25     |
|    critic_loss      | 3.72     |
|    learning_rate    | 0.0003   |
|    n_updates        | 421750   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 11.621018 |
| rollout/            |           |
|    ep_len_mean      | 870       |
|    ep_rew_mean      | 11.6      |
| time/               |           |
|    episodes         | 812       |
|    fps              | 574       |
|    time_elapsed     | 964       |
|    total_timesteps  | 553956    |
| train/              |           |
|    actor_loss       | 7.5       |
|    critic_loss      | 4.25      |
|    learning_rate    | 0.0003    |
|    n_updates        | 422327    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 13.395133 |
| rollout/            |           |
|    ep_len_mean      | 864       |
|    ep_rew_mean      | 13.4      |
| time/               |           |
|    episodes         | 816       |
|    fps              | 574       |
|    time_elapsed     | 969       |
|    total_timesteps  | 556830    |
| train/              |           |
|    actor_loss       | 5.73      |
|    critic_loss      | 2.85      |
|    learning_rate    | 0.0003    |
|    n_updates        | 422806    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 11.59577 |
| rollout/            |          |
|    ep_len_mean      | 859      |
|    ep_rew_mean      | 11.6     |
| time/               |          |
|    episodes         | 820      |
|    fps              | 574      |
|    time_elapsed     | 975      |
|    total_timesteps  | 560532   |
| train/              |          |
|    actor_loss       | 7.13     |
|    critic_loss      | 2.24     |
|    learning_rate    | 0.0003   |
|    n_updates        | 423423   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 13.291232 |
| rollout/            |           |
|    ep_len_mean      | 860       |
|    ep_rew_mean      | 13.3      |
| time/               |           |
|    episodes         | 824       |
|    fps              | 574       |
|    time_elapsed     | 983       |
|    total_timesteps  | 565218    |
| train/              |           |
|    actor_loss       | 6.76      |
|    critic_loss      | 3.29      |
|    learning_rate    | 0.0003    |
|    n_updates        | 424204    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 12.006521 |
| rollout/            |           |
|    ep_len_mean      | 860       |
|    ep_rew_mean      | 12        |
| time/               |           |
|    episodes         | 828       |
|    fps              | 574       |
|    time_elapsed     | 986       |
|    total_timesteps  | 566874    |
| train/              |           |
|    actor_loss       | 7.42      |
|    critic_loss      | 5.14      |
|    learning_rate    | 0.0003    |
|    n_updates        | 424480    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 12.267317 |
| eval/               |           |
|    mean_ep_length   | 1e+03     |
|    mean_reward      | 49.1      |
| time/               |           |
|    total_timesteps  | 570000    |
| train/              |           |
|    actor_loss       | 7.3       |
|    critic_loss      | 5.51      |
|    learning_rate    | 0.0003    |
|    n_updates        | 425001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 11.563352 |
| rollout/            |           |
|    ep_len_mean      | 865       |
|    ep_rew_mean      | 11.6      |
| time/               |           |
|    episodes         | 832       |
|    fps              | 572       |
|    time_elapsed     | 998       |
|    total_timesteps  | 571518    |
| train/              |           |
|    actor_loss       | 6.64      |
|    critic_loss      | 2.9       |
|    learning_rate    | 0.0003    |
|    n_updates        | 425254    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 11.087822 |
| rollout/            |           |
|    ep_len_mean      | 866       |
|    ep_rew_mean      | 11.1      |
| time/               |           |
|    episodes         | 836       |
|    fps              | 572       |
|    time_elapsed     | 1004      |
|    total_timesteps  | 575454    |
| train/              |           |
|    actor_loss       | 7.51      |
|    critic_loss      | 3.36      |
|    learning_rate    | 0.0003    |
|    n_updates        | 425910    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 15.85583 |
| rollout/            |          |
|    ep_len_mean      | 866      |
|    ep_rew_mean      | 15.9     |
| time/               |          |
|    episodes         | 840      |
|    fps              | 572      |
|    time_elapsed     | 1009     |
|    total_timesteps  | 578532   |
| train/              |          |
|    actor_loss       | 7.94     |
|    critic_loss      | 3.57     |
|    learning_rate    | 0.0003   |
|    n_updates        | 426423   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 17.72052 |
| rollout/            |          |
|    ep_len_mean      | 870      |
|    ep_rew_mean      | 17.7     |
| time/               |          |
|    episodes         | 844      |
|    fps              | 573      |
|    time_elapsed     | 1017     |
|    total_timesteps  | 583374   |
| train/              |          |
|    actor_loss       | 7.81     |
|    critic_loss      | 3.11     |
|    learning_rate    | 0.0003   |
|    n_updates        | 427230   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 17.367346 |
| rollout/            |           |
|    ep_len_mean      | 870       |
|    ep_rew_mean      | 17.4      |
| time/               |           |
|    episodes         | 848       |
|    fps              | 573       |
|    time_elapsed     | 1024      |
|    total_timesteps  | 587412    |
| train/              |           |
|    actor_loss       | 7.57      |
|    critic_loss      | 9.28      |
|    learning_rate    | 0.0003    |
|    n_updates        | 427903    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 16.774233 |
| rollout/            |           |
|    ep_len_mean      | 870       |
|    ep_rew_mean      | 16.8      |
| time/               |           |
|    episodes         | 852       |
|    fps              | 573       |
|    time_elapsed     | 1029      |
|    total_timesteps  | 590532    |
| train/              |           |
|    actor_loss       | 6.66      |
|    critic_loss      | 6.75      |
|    learning_rate    | 0.0003    |
|    n_updates        | 428423    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 13.502515 |
| rollout/            |           |
|    ep_len_mean      | 863       |
|    ep_rew_mean      | 13.5      |
| time/               |           |
|    episodes         | 856       |
|    fps              | 573       |
|    time_elapsed     | 1035      |
|    total_timesteps  | 593994    |
| train/              |           |
|    actor_loss       | 7.6       |
|    critic_loss      | 7.38      |
|    learning_rate    | 0.0003    |
|    n_updates        | 429000    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 13.791273 |
| rollout/            |           |
|    ep_len_mean      | 866       |
|    ep_rew_mean      | 13.8      |
| time/               |           |
|    episodes         | 860       |
|    fps              | 573       |
|    time_elapsed     | 1040      |
|    total_timesteps  | 596772    |
| train/              |           |
|    actor_loss       | 6.83      |
|    critic_loss      | 3.29      |
|    learning_rate    | 0.0003    |
|    n_updates        | 429463    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 16.741968 |
| eval/               |           |
|    mean_ep_length   | 760       |
|    mean_reward      | 9.42      |
| time/               |           |
|    total_timesteps  | 600000    |
| train/              |           |
|    actor_loss       | 7.85      |
|    critic_loss      | 3.95      |
|    learning_rate    | 0.0003    |
|    n_updates        | 430001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 17.043034 |
| rollout/            |           |
|    ep_len_mean      | 866       |
|    ep_rew_mean      | 17        |
| time/               |           |
|    episodes         | 864       |
|    fps              | 572       |
|    time_elapsed     | 1049      |
|    total_timesteps  | 600342    |
| train/              |           |
|    actor_loss       | 7.47      |
|    critic_loss      | 3.01      |
|    learning_rate    | 0.0003    |
|    n_updates        | 430058    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 16.572596 |
| rollout/            |           |
|    ep_len_mean      | 856       |
|    ep_rew_mean      | 16.6      |
| time/               |           |
|    episodes         | 868       |
|    fps              | 572       |
|    time_elapsed     | 1052      |
|    total_timesteps  | 602568    |
| train/              |           |
|    actor_loss       | 6.81      |
|    critic_loss      | 3.15      |
|    learning_rate    | 0.0003    |
|    n_updates        | 430429    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 14.896807 |
| rollout/            |           |
|    ep_len_mean      | 849       |
|    ep_rew_mean      | 14.9      |
| time/               |           |
|    episodes         | 872       |
|    fps              | 572       |
|    time_elapsed     | 1058      |
|    total_timesteps  | 605994    |
| train/              |           |
|    actor_loss       | 6.87      |
|    critic_loss      | 4.26      |
|    learning_rate    | 0.0003    |
|    n_updates        | 431000    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 14.205837 |
| rollout/            |           |
|    ep_len_mean      | 843       |
|    ep_rew_mean      | 14.2      |
| time/               |           |
|    episodes         | 876       |
|    fps              | 572       |
|    time_elapsed     | 1062      |
|    total_timesteps  | 608568    |
| train/              |           |
|    actor_loss       | 7.58      |
|    critic_loss      | 1.42      |
|    learning_rate    | 0.0003    |
|    n_updates        | 431429    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 17.343002 |
| rollout/            |           |
|    ep_len_mean      | 859       |
|    ep_rew_mean      | 17.3      |
| time/               |           |
|    episodes         | 880       |
|    fps              | 572       |
|    time_elapsed     | 1069      |
|    total_timesteps  | 612330    |
| train/              |           |
|    actor_loss       | 5.83      |
|    critic_loss      | 6.54      |
|    learning_rate    | 0.0003    |
|    n_updates        | 432056    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 19.716618 |
| rollout/            |           |
|    ep_len_mean      | 872       |
|    ep_rew_mean      | 19.7      |
| time/               |           |
|    episodes         | 884       |
|    fps              | 572       |
|    time_elapsed     | 1077      |
|    total_timesteps  | 617100    |
| train/              |           |
|    actor_loss       | 6.94      |
|    critic_loss      | 3.1       |
|    learning_rate    | 0.0003    |
|    n_updates        | 432851    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 20.690075 |
| rollout/            |           |
|    ep_len_mean      | 875       |
|    ep_rew_mean      | 20.7      |
| time/               |           |
|    episodes         | 888       |
|    fps              | 573       |
|    time_elapsed     | 1082      |
|    total_timesteps  | 620532    |
| train/              |           |
|    actor_loss       | 7.08      |
|    critic_loss      | 5.41      |
|    learning_rate    | 0.0003    |
|    n_updates        | 433423    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 21.433477 |
| rollout/            |           |
|    ep_len_mean      | 882       |
|    ep_rew_mean      | 21.4      |
| time/               |           |
|    episodes         | 892       |
|    fps              | 573       |
|    time_elapsed     | 1089      |
|    total_timesteps  | 624330    |
| train/              |           |
|    actor_loss       | 8.08      |
|    critic_loss      | 3.28      |
|    learning_rate    | 0.0003    |
|    n_updates        | 434056    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 24.35228 |
| rollout/            |          |
|    ep_len_mean      | 885      |
|    ep_rew_mean      | 24.4     |
| time/               |          |
|    episodes         | 896      |
|    fps              | 573      |
|    time_elapsed     | 1096     |
|    total_timesteps  | 628890   |
| train/              |          |
|    actor_loss       | 7.1      |
|    critic_loss      | 1.56     |
|    learning_rate    | 0.0003   |
|    n_updates        | 434816   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 25.157906 |
| eval/               |           |
|    mean_ep_length   | 726       |
|    mean_reward      | -14.9     |
| time/               |           |
|    total_timesteps  | 630000    |
| train/              |           |
|    actor_loss       | 7.08      |
|    critic_loss      | 4.53      |
|    learning_rate    | 0.0003    |
|    n_updates        | 435001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 25.708542 |
| rollout/            |           |
|    ep_len_mean      | 880       |
|    ep_rew_mean      | 25.7      |
| time/               |           |
|    episodes         | 900       |
|    fps              | 571       |
|    time_elapsed     | 1105      |
|    total_timesteps  | 632352    |
| train/              |           |
|    actor_loss       | 8.09      |
|    critic_loss      | 4.26      |
|    learning_rate    | 0.0003    |
|    n_updates        | 435393    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 26.347345 |
| rollout/            |           |
|    ep_len_mean      | 894       |
|    ep_rew_mean      | 26.3      |
| time/               |           |
|    episodes         | 904       |
|    fps              | 572       |
|    time_elapsed     | 1111      |
|    total_timesteps  | 635994    |
| train/              |           |
|    actor_loss       | 6.12      |
|    critic_loss      | 4.01      |
|    learning_rate    | 0.0003    |
|    n_updates        | 436000    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 26.48888 |
| rollout/            |          |
|    ep_len_mean      | 894      |
|    ep_rew_mean      | 26.5     |
| time/               |          |
|    episodes         | 908      |
|    fps              | 572      |
|    time_elapsed     | 1117     |
|    total_timesteps  | 639540   |
| train/              |          |
|    actor_loss       | 7.32     |
|    critic_loss      | 2.13     |
|    learning_rate    | 0.0003   |
|    n_updates        | 436591   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 23.385025 |
| rollout/            |           |
|    ep_len_mean      | 881       |
|    ep_rew_mean      | 23.4      |
| time/               |           |
|    episodes         | 912       |
|    fps              | 572       |
|    time_elapsed     | 1121      |
|    total_timesteps  | 641994    |
| train/              |           |
|    actor_loss       | 6.55      |
|    critic_loss      | 2.88      |
|    learning_rate    | 0.0003    |
|    n_updates        | 437000    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 19.611792 |
| rollout/            |           |
|    ep_len_mean      | 875       |
|    ep_rew_mean      | 19.6      |
| time/               |           |
|    episodes         | 916       |
|    fps              | 572       |
|    time_elapsed     | 1126      |
|    total_timesteps  | 644532    |
| train/              |           |
|    actor_loss       | 5.7       |
|    critic_loss      | 2.89      |
|    learning_rate    | 0.0003    |
|    n_updates        | 437423    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 21.636675 |
| rollout/            |           |
|    ep_len_mean      | 884       |
|    ep_rew_mean      | 21.6      |
| time/               |           |
|    episodes         | 920       |
|    fps              | 572       |
|    time_elapsed     | 1133      |
|    total_timesteps  | 648870    |
| train/              |           |
|    actor_loss       | 6.58      |
|    critic_loss      | 1.68      |
|    learning_rate    | 0.0003    |
|    n_updates        | 438146    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 20.29816 |
| rollout/            |          |
|    ep_len_mean      | 882      |
|    ep_rew_mean      | 20.3     |
| time/               |          |
|    episodes         | 924      |
|    fps              | 572      |
|    time_elapsed     | 1138     |
|    total_timesteps  | 651690   |
| train/              |          |
|    actor_loss       | 5.93     |
|    critic_loss      | 4.31     |
|    learning_rate    | 0.0003   |
|    n_updates        | 438616   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 21.904427 |
| rollout/            |           |
|    ep_len_mean      | 878       |
|    ep_rew_mean      | 21.9      |
| time/               |           |
|    episodes         | 928       |
|    fps              | 572       |
|    time_elapsed     | 1146      |
|    total_timesteps  | 656532    |
| train/              |           |
|    actor_loss       | 7.01      |
|    critic_loss      | 2.74      |
|    learning_rate    | 0.0003    |
|    n_updates        | 439423    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 22.772837 |
| rollout/            |           |
|    ep_len_mean      | 866       |
|    ep_rew_mean      | 22.8      |
| time/               |           |
|    episodes         | 932       |
|    fps              | 572       |
|    time_elapsed     | 1148      |
|    total_timesteps  | 658092    |
| train/              |           |
|    actor_loss       | 6.44      |
|    critic_loss      | 2.55      |
|    learning_rate    | 0.0003    |
|    n_updates        | 439683    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 25.033657 |
| eval/               |           |
|    mean_ep_length   | 846       |
|    mean_reward      | 53.2      |
| time/               |           |
|    total_timesteps  | 660000    |
| train/              |           |
|    actor_loss       | 6.1       |
|    critic_loss      | 2.44      |
|    learning_rate    | 0.0003    |
|    n_updates        | 440001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 24.284792 |
| rollout/            |           |
|    ep_len_mean      | 857       |
|    ep_rew_mean      | 24.3      |
| time/               |           |
|    episodes         | 936       |
|    fps              | 571       |
|    time_elapsed     | 1157      |
|    total_timesteps  | 660972    |
| train/              |           |
|    actor_loss       | 6.84      |
|    critic_loss      | 2.43      |
|    learning_rate    | 0.0003    |
|    n_updates        | 440163    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 24.578192 |
| rollout/            |           |
|    ep_len_mean      | 857       |
|    ep_rew_mean      | 24.6      |
| time/               |           |
|    episodes         | 940       |
|    fps              | 571       |
|    time_elapsed     | 1163      |
|    total_timesteps  | 664890    |
| train/              |           |
|    actor_loss       | 7.96      |
|    critic_loss      | 4.48      |
|    learning_rate    | 0.0003    |
|    n_updates        | 440816    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 25.772581 |
| rollout/            |           |
|    ep_len_mean      | 850       |
|    ep_rew_mean      | 25.8      |
| time/               |           |
|    episodes         | 944       |
|    fps              | 571       |
|    time_elapsed     | 1169      |
|    total_timesteps  | 668586    |
| train/              |           |
|    actor_loss       | 8.48      |
|    critic_loss      | 5.44      |
|    learning_rate    | 0.0003    |
|    n_updates        | 441432    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 23.789595 |
| rollout/            |           |
|    ep_len_mean      | 846       |
|    ep_rew_mean      | 23.8      |
| time/               |           |
|    episodes         | 948       |
|    fps              | 571       |
|    time_elapsed     | 1174      |
|    total_timesteps  | 671100    |
| train/              |           |
|    actor_loss       | 8.66      |
|    critic_loss      | 6.69      |
|    learning_rate    | 0.0003    |
|    n_updates        | 441851    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 24.998629 |
| rollout/            |           |
|    ep_len_mean      | 842       |
|    ep_rew_mean      | 25        |
| time/               |           |
|    episodes         | 952       |
|    fps              | 571       |
|    time_elapsed     | 1179      |
|    total_timesteps  | 674586    |
| train/              |           |
|    actor_loss       | 6.37      |
|    critic_loss      | 3.07      |
|    learning_rate    | 0.0003    |
|    n_updates        | 442432    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 29.953676 |
| rollout/            |           |
|    ep_len_mean      | 848       |
|    ep_rew_mean      | 30        |
| time/               |           |
|    episodes         | 956       |
|    fps              | 571       |
|    time_elapsed     | 1187      |
|    total_timesteps  | 678972    |
| train/              |           |
|    actor_loss       | 6.33      |
|    critic_loss      | 2.57      |
|    learning_rate    | 0.0003    |
|    n_updates        | 443163    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 32.361847 |
| rollout/            |           |
|    ep_len_mean      | 859       |
|    ep_rew_mean      | 32.4      |
| time/               |           |
|    episodes         | 960       |
|    fps              | 572       |
|    time_elapsed     | 1194      |
|    total_timesteps  | 683100    |
| train/              |           |
|    actor_loss       | 6.64      |
|    critic_loss      | 2.39      |
|    learning_rate    | 0.0003    |
|    n_updates        | 443851    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 30.330881 |
| rollout/            |           |
|    ep_len_mean      | 852       |
|    ep_rew_mean      | 30.3      |
| time/               |           |
|    episodes         | 964       |
|    fps              | 572       |
|    time_elapsed     | 1198      |
|    total_timesteps  | 685410    |
| train/              |           |
|    actor_loss       | 7.85      |
|    critic_loss      | 5.73      |
|    learning_rate    | 0.0003    |
|    n_updates        | 444236    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 32.62727 |
| rollout/            |          |
|    ep_len_mean      | 863      |
|    ep_rew_mean      | 32.6     |
| time/               |          |
|    episodes         | 968      |
|    fps              | 572      |
|    time_elapsed     | 1204     |
|    total_timesteps  | 689100   |
| train/              |          |
|    actor_loss       | 6.63     |
|    critic_loss      | 1.78     |
|    learning_rate    | 0.0003   |
|    n_updates        | 444851   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 32.676582 |
| eval/               |           |
|    mean_ep_length   | 572       |
|    mean_reward      | 10.7      |
| time/               |           |
|    total_timesteps  | 690000    |
| train/              |           |
|    actor_loss       | 7.29      |
|    critic_loss      | 3.81      |
|    learning_rate    | 0.0003    |
|    n_updates        | 445001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 35.400673 |
| rollout/            |           |
|    ep_len_mean      | 866       |
|    ep_rew_mean      | 35.4      |
| time/               |           |
|    episodes         | 972       |
|    fps              | 571       |
|    time_elapsed     | 1210      |
|    total_timesteps  | 691410    |
| train/              |           |
|    actor_loss       | 7.37      |
|    critic_loss      | 5.03      |
|    learning_rate    | 0.0003    |
|    n_updates        | 445236    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 38.711857 |
| rollout/            |           |
|    ep_len_mean      | 870       |
|    ep_rew_mean      | 38.7      |
| time/               |           |
|    episodes         | 976       |
|    fps              | 571       |
|    time_elapsed     | 1216      |
|    total_timesteps  | 694746    |
| train/              |           |
|    actor_loss       | 6.06      |
|    critic_loss      | 3.72      |
|    learning_rate    | 0.0003    |
|    n_updates        | 445792    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 35.57783 |
| rollout/            |          |
|    ep_len_mean      | 858      |
|    ep_rew_mean      | 35.6     |
| time/               |          |
|    episodes         | 980      |
|    fps              | 571      |
|    time_elapsed     | 1221     |
|    total_timesteps  | 697728   |
| train/              |          |
|    actor_loss       | 8.37     |
|    critic_loss      | 4.37     |
|    learning_rate    | 0.0003   |
|    n_updates        | 446289   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 34.117928 |
| rollout/            |           |
|    ep_len_mean      | 852       |
|    ep_rew_mean      | 34.1      |
| time/               |           |
|    episodes         | 984       |
|    fps              | 571       |
|    time_elapsed     | 1227      |
|    total_timesteps  | 701490    |
| train/              |           |
|    actor_loss       | 5.86      |
|    critic_loss      | 1.85      |
|    learning_rate    | 0.0003    |
|    n_updates        | 446916    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 33.84159 |
| rollout/            |          |
|    ep_len_mean      | 853      |
|    ep_rew_mean      | 33.8     |
| time/               |          |
|    episodes         | 988      |
|    fps              | 571      |
|    time_elapsed     | 1235     |
|    total_timesteps  | 706212   |
| train/              |          |
|    actor_loss       | 7.29     |
|    critic_loss      | 4.68     |
|    learning_rate    | 0.0003   |
|    n_updates        | 447703   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 34.06282 |
| rollout/            |          |
|    ep_len_mean      | 844      |
|    ep_rew_mean      | 34.1     |
| time/               |          |
|    episodes         | 992      |
|    fps              | 571      |
|    time_elapsed     | 1240     |
|    total_timesteps  | 709410   |
| train/              |          |
|    actor_loss       | 5.96     |
|    critic_loss      | 2.14     |
|    learning_rate    | 0.0003   |
|    n_updates        | 448236   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 33.946945 |
| rollout/            |           |
|    ep_len_mean      | 844       |
|    ep_rew_mean      | 33.9      |
| time/               |           |
|    episodes         | 996       |
|    fps              | 572       |
|    time_elapsed     | 1246      |
|    total_timesteps  | 713028    |
| train/              |           |
|    actor_loss       | 6.84      |
|    critic_loss      | 1.95      |
|    learning_rate    | 0.0003    |
|    n_updates        | 448839    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 33.754795 |
| rollout/            |           |
|    ep_len_mean      | 846       |
|    ep_rew_mean      | 33.8      |
| time/               |           |
|    episodes         | 1000      |
|    fps              | 572       |
|    time_elapsed     | 1253      |
|    total_timesteps  | 716970    |
| train/              |           |
|    actor_loss       | 6.36      |
|    critic_loss      | 2.95      |
|    learning_rate    | 0.0003    |
|    n_updates        | 449496    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 33.479103 |
| eval/               |           |
|    mean_ep_length   | 716       |
|    mean_reward      | 51.4      |
| time/               |           |
|    total_timesteps  | 720000    |
| train/              |           |
|    actor_loss       | 8         |
|    critic_loss      | 3.82      |
|    learning_rate    | 0.0003    |
|    n_updates        | 450001    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 33.25844 |
| rollout/            |          |
|    ep_len_mean      | 846      |
|    ep_rew_mean      | 33.3     |
| time/               |          |
|    episodes         | 1004     |
|    fps              | 570      |
|    time_elapsed     | 1262     |
|    total_timesteps  | 720132   |
| train/              |          |
|    actor_loss       | 7.41     |
|    critic_loss      | 2.73     |
|    learning_rate    | 0.0003   |
|    n_updates        | 450023   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 34.75907 |
| rollout/            |          |
|    ep_len_mean      | 845      |
|    ep_rew_mean      | 34.8     |
| time/               |          |
|    episodes         | 1008     |
|    fps              | 570      |
|    time_elapsed     | 1270     |
|    total_timesteps  | 724638   |
| train/              |          |
|    actor_loss       | 6.32     |
|    critic_loss      | 2.05     |
|    learning_rate    | 0.0003   |
|    n_updates        | 450774   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 35.404713 |
| rollout/            |           |
|    ep_len_mean      | 853       |
|    ep_rew_mean      | 35.4      |
| time/               |           |
|    episodes         | 1012      |
|    fps              | 570       |
|    time_elapsed     | 1276      |
|    total_timesteps  | 728340    |
| train/              |           |
|    actor_loss       | 7.78      |
|    critic_loss      | 1.23      |
|    learning_rate    | 0.0003    |
|    n_updates        | 451391    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 36.708744 |
| rollout/            |           |
|    ep_len_mean      | 858       |
|    ep_rew_mean      | 36.7      |
| time/               |           |
|    episodes         | 1016      |
|    fps              | 570       |
|    time_elapsed     | 1279      |
|    total_timesteps  | 730638    |
| train/              |           |
|    actor_loss       | 7.19      |
|    critic_loss      | 3.13      |
|    learning_rate    | 0.0003    |
|    n_updates        | 451774    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 35.500687 |
| rollout/            |           |
|    ep_len_mean      | 848       |
|    ep_rew_mean      | 35.5      |
| time/               |           |
|    episodes         | 1020      |
|    fps              | 571       |
|    time_elapsed     | 1285      |
|    total_timesteps  | 734340    |
| train/              |           |
|    actor_loss       | 6.17      |
|    critic_loss      | 3         |
|    learning_rate    | 0.0003    |
|    n_updates        | 452391    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 34.72483 |
| rollout/            |          |
|    ep_len_mean      | 846      |
|    ep_rew_mean      | 34.7     |
| time/               |          |
|    episodes         | 1024     |
|    fps              | 571      |
|    time_elapsed     | 1290     |
|    total_timesteps  | 737550   |
| train/              |          |
|    actor_loss       | 7.79     |
|    critic_loss      | 4.56     |
|    learning_rate    | 0.0003   |
|    n_updates        | 452926   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 34.93861 |
| rollout/            |          |
|    ep_len_mean      | 850      |
|    ep_rew_mean      | 34.9     |
| time/               |          |
|    episodes         | 1028     |
|    fps              | 571      |
|    time_elapsed     | 1295     |
|    total_timesteps  | 740562   |
| train/              |          |
|    actor_loss       | 7.96     |
|    critic_loss      | 1.83     |
|    learning_rate    | 0.0003   |
|    n_updates        | 453428   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 35.830517 |
| rollout/            |           |
|    ep_len_mean      | 864       |
|    ep_rew_mean      | 35.8      |
| time/               |           |
|    episodes         | 1032      |
|    fps              | 571       |
|    time_elapsed     | 1301      |
|    total_timesteps  | 743898    |
| train/              |           |
|    actor_loss       | 6.11      |
|    critic_loss      | 4.55      |
|    learning_rate    | 0.0003    |
|    n_updates        | 453984    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 37.459732 |
| rollout/            |           |
|    ep_len_mean      | 874       |
|    ep_rew_mean      | 37.5      |
| time/               |           |
|    episodes         | 1036      |
|    fps              | 571       |
|    time_elapsed     | 1309      |
|    total_timesteps  | 748764    |
| train/              |           |
|    actor_loss       | 7.36      |
|    critic_loss      | 2.37      |
|    learning_rate    | 0.0003    |
|    n_updates        | 454795    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 37.491985 |
| eval/               |           |
|    mean_ep_length   | 945       |
|    mean_reward      | 22.9      |
| time/               |           |
|    total_timesteps  | 750000    |
| train/              |           |
|    actor_loss       | 9.01      |
|    critic_loss      | 6.25      |
|    learning_rate    | 0.0003    |
|    n_updates        | 455001    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 36.21464 |
| rollout/            |          |
|    ep_len_mean      | 874      |
|    ep_rew_mean      | 36.2     |
| time/               |          |
|    episodes         | 1040     |
|    fps              | 570      |
|    time_elapsed     | 1318     |
|    total_timesteps  | 752400   |
| train/              |          |
|    actor_loss       | 7.01     |
|    critic_loss      | 2.85     |
|    learning_rate    | 0.0003   |
|    n_updates        | 455401   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 34.073414 |
| rollout/            |           |
|    ep_len_mean      | 871       |
|    ep_rew_mean      | 34.1      |
| time/               |           |
|    episodes         | 1044      |
|    fps              | 570       |
|    time_elapsed     | 1324      |
|    total_timesteps  | 755748    |
| train/              |           |
|    actor_loss       | 6.64      |
|    critic_loss      | 4.55      |
|    learning_rate    | 0.0003    |
|    n_updates        | 455959    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 36.35248 |
| rollout/            |          |
|    ep_len_mean      | 876      |
|    ep_rew_mean      | 36.4     |
| time/               |          |
|    episodes         | 1048     |
|    fps              | 570      |
|    time_elapsed     | 1328     |
|    total_timesteps  | 758562   |
| train/              |          |
|    actor_loss       | 6.02     |
|    critic_loss      | 1.67     |
|    learning_rate    | 0.0003   |
|    n_updates        | 456428   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 34.924625 |
| rollout/            |           |
|    ep_len_mean      | 874       |
|    ep_rew_mean      | 34.9      |
| time/               |           |
|    episodes         | 1052      |
|    fps              | 570       |
|    time_elapsed     | 1334      |
|    total_timesteps  | 761862    |
| train/              |           |
|    actor_loss       | 7.89      |
|    critic_loss      | 2.44      |
|    learning_rate    | 0.0003    |
|    n_updates        | 456978    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 30.72875 |
| rollout/            |          |
|    ep_len_mean      | 862      |
|    ep_rew_mean      | 30.7     |
| time/               |          |
|    episodes         | 1056     |
|    fps              | 571      |
|    time_elapsed     | 1340     |
|    total_timesteps  | 765636   |
| train/              |          |
|    actor_loss       | 5.91     |
|    critic_loss      | 3.19     |
|    learning_rate    | 0.0003   |
|    n_updates        | 457607   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 28.543003 |
| rollout/            |           |
|    ep_len_mean      | 854       |
|    ep_rew_mean      | 28.5      |
| time/               |           |
|    episodes         | 1060      |
|    fps              | 571       |
|    time_elapsed     | 1344      |
|    total_timesteps  | 767946    |
| train/              |           |
|    actor_loss       | 6.33      |
|    critic_loss      | 2.39      |
|    learning_rate    | 0.0003    |
|    n_updates        | 457992    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 25.512274 |
| rollout/            |           |
|    ep_len_mean      | 847       |
|    ep_rew_mean      | 25.5      |
| time/               |           |
|    episodes         | 1064      |
|    fps              | 571       |
|    time_elapsed     | 1348      |
|    total_timesteps  | 770400    |
| train/              |           |
|    actor_loss       | 6.68      |
|    critic_loss      | 2.82      |
|    learning_rate    | 0.0003    |
|    n_updates        | 458401    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 26.244612 |
| rollout/            |           |
|    ep_len_mean      | 852       |
|    ep_rew_mean      | 26.2      |
| time/               |           |
|    episodes         | 1068      |
|    fps              | 571       |
|    time_elapsed     | 1353      |
|    total_timesteps  | 773778    |
| train/              |           |
|    actor_loss       | 8.21      |
|    critic_loss      | 3.74      |
|    learning_rate    | 0.0003    |
|    n_updates        | 458964    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 28.040258 |
| rollout/            |           |
|    ep_len_mean      | 849       |
|    ep_rew_mean      | 28        |
| time/               |           |
|    episodes         | 1072      |
|    fps              | 571       |
|    time_elapsed     | 1358      |
|    total_timesteps  | 776850    |
| train/              |           |
|    actor_loss       | 5.89      |
|    critic_loss      | 3.7       |
|    learning_rate    | 0.0003    |
|    n_updates        | 459476    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 27.414522 |
| eval/               |           |
|    mean_ep_length   | 782       |
|    mean_reward      | 53.1      |
| time/               |           |
|    total_timesteps  | 780000    |
| train/              |           |
|    actor_loss       | 6.5       |
|    critic_loss      | 2.8       |
|    learning_rate    | 0.0003    |
|    n_updates        | 460001    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 27.33495 |
| rollout/            |          |
|    ep_len_mean      | 856      |
|    ep_rew_mean      | 27.3     |
| time/               |          |
|    episodes         | 1076     |
|    fps              | 570      |
|    time_elapsed     | 1369     |
|    total_timesteps  | 781956   |
| train/              |          |
|    actor_loss       | 6.72     |
|    critic_loss      | 2.28     |
|    learning_rate    | 0.0003   |
|    n_updates        | 460327   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 32.047714 |
| rollout/            |           |
|    ep_len_mean      | 866       |
|    ep_rew_mean      | 32        |
| time/               |           |
|    episodes         | 1080      |
|    fps              | 571       |
|    time_elapsed     | 1375      |
|    total_timesteps  | 785748    |
| train/              |           |
|    actor_loss       | 7.32      |
|    critic_loss      | 2.68      |
|    learning_rate    | 0.0003    |
|    n_updates        | 460959    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 31.223232 |
| rollout/            |           |
|    ep_len_mean      | 872       |
|    ep_rew_mean      | 31.2      |
| time/               |           |
|    episodes         | 1084      |
|    fps              | 571       |
|    time_elapsed     | 1380      |
|    total_timesteps  | 788400    |
| train/              |           |
|    actor_loss       | 6.64      |
|    critic_loss      | 2.54      |
|    learning_rate    | 0.0003    |
|    n_updates        | 461401    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 28.743292 |
| rollout/            |           |
|    ep_len_mean      | 864       |
|    ep_rew_mean      | 28.7      |
| time/               |           |
|    episodes         | 1088      |
|    fps              | 571       |
|    time_elapsed     | 1385      |
|    total_timesteps  | 791760    |
| train/              |           |
|    actor_loss       | 6.8       |
|    critic_loss      | 4.21      |
|    learning_rate    | 0.0003    |
|    n_updates        | 461961    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 24.234077 |
| rollout/            |           |
|    ep_len_mean      | 862       |
|    ep_rew_mean      | 24.2      |
| time/               |           |
|    episodes         | 1092      |
|    fps              | 571       |
|    time_elapsed     | 1389      |
|    total_timesteps  | 794400    |
| train/              |           |
|    actor_loss       | 7.65      |
|    critic_loss      | 2.82      |
|    learning_rate    | 0.0003    |
|    n_updates        | 462401    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 21.633867 |
| rollout/            |           |
|    ep_len_mean      | 853       |
|    ep_rew_mean      | 21.6      |
| time/               |           |
|    episodes         | 1096      |
|    fps              | 571       |
|    time_elapsed     | 1396      |
|    total_timesteps  | 798444    |
| train/              |           |
|    actor_loss       | 7.01      |
|    critic_loss      | 6.01      |
|    learning_rate    | 0.0003    |
|    n_updates        | 463075    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 22.032198 |
| rollout/            |           |
|    ep_len_mean      | 854       |
|    ep_rew_mean      | 22        |
| time/               |           |
|    episodes         | 1100      |
|    fps              | 572       |
|    time_elapsed     | 1402      |
|    total_timesteps  | 802062    |
| train/              |           |
|    actor_loss       | 7.12      |
|    critic_loss      | 1.58      |
|    learning_rate    | 0.0003    |
|    n_updates        | 463678    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 26.06891 |
| rollout/            |          |
|    ep_len_mean      | 851      |
|    ep_rew_mean      | 26.1     |
| time/               |          |
|    episodes         | 1104     |
|    fps              | 572      |
|    time_elapsed     | 1408     |
|    total_timesteps  | 805716   |
| train/              |          |
|    actor_loss       | 7.76     |
|    critic_loss      | 3.27     |
|    learning_rate    | 0.0003   |
|    n_updates        | 464287   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 25.75361 |
| rollout/            |          |
|    ep_len_mean      | 846      |
|    ep_rew_mean      | 25.8     |
| time/               |          |
|    episodes         | 1108     |
|    fps              | 572      |
|    time_elapsed     | 1411     |
|    total_timesteps  | 808062   |
| train/              |          |
|    actor_loss       | 5.86     |
|    critic_loss      | 2.38     |
|    learning_rate    | 0.0003   |
|    n_updates        | 464678   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 26.401878 |
| eval/               |           |
|    mean_ep_length   | 978       |
|    mean_reward      | 55.3      |
| time/               |           |
|    total_timesteps  | 810000    |
| train/              |           |
|    actor_loss       | 4.89      |
|    critic_loss      | 1.47      |
|    learning_rate    | 0.0003    |
|    n_updates        | 465001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 26.758478 |
| rollout/            |           |
|    ep_len_mean      | 843       |
|    ep_rew_mean      | 26.8      |
| time/               |           |
|    episodes         | 1112      |
|    fps              | 570       |
|    time_elapsed     | 1421      |
|    total_timesteps  | 811716    |
| train/              |           |
|    actor_loss       | 7.27      |
|    critic_loss      | 3.68      |
|    learning_rate    | 0.0003    |
|    n_updates        | 465287    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 27.790648 |
| rollout/            |           |
|    ep_len_mean      | 850       |
|    ep_rew_mean      | 27.8      |
| time/               |           |
|    episodes         | 1116      |
|    fps              | 571       |
|    time_elapsed     | 1428      |
|    total_timesteps  | 815928    |
| train/              |           |
|    actor_loss       | 6.2       |
|    critic_loss      | 3.07      |
|    learning_rate    | 0.0003    |
|    n_updates        | 465989    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 28.09237 |
| rollout/            |          |
|    ep_len_mean      | 860      |
|    ep_rew_mean      | 28.1     |
| time/               |          |
|    episodes         | 1120     |
|    fps              | 571      |
|    time_elapsed     | 1435     |
|    total_timesteps  | 820062   |
| train/              |          |
|    actor_loss       | 10.1     |
|    critic_loss      | 4.31     |
|    learning_rate    | 0.0003   |
|    n_updates        | 466678   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 31.26199 |
| rollout/            |          |
|    ep_len_mean      | 864      |
|    ep_rew_mean      | 31.3     |
| time/               |          |
|    episodes         | 1124     |
|    fps              | 571      |
|    time_elapsed     | 1440     |
|    total_timesteps  | 823374   |
| train/              |          |
|    actor_loss       | 6.39     |
|    critic_loss      | 5.19     |
|    learning_rate    | 0.0003   |
|    n_updates        | 467230   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 28.333374 |
| rollout/            |           |
|    ep_len_mean      | 855       |
|    ep_rew_mean      | 28.3      |
| time/               |           |
|    episodes         | 1128      |
|    fps              | 571       |
|    time_elapsed     | 1445      |
|    total_timesteps  | 826302    |
| train/              |           |
|    actor_loss       | 6.06      |
|    critic_loss      | 1.49      |
|    learning_rate    | 0.0003    |
|    n_updates        | 467718    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 27.200031 |
| rollout/            |           |
|    ep_len_mean      | 854       |
|    ep_rew_mean      | 27.2      |
| time/               |           |
|    episodes         | 1132      |
|    fps              | 571       |
|    time_elapsed     | 1450      |
|    total_timesteps  | 829374    |
| train/              |           |
|    actor_loss       | 6.36      |
|    critic_loss      | 1.46      |
|    learning_rate    | 0.0003    |
|    n_updates        | 468230    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 25.715425 |
| rollout/            |           |
|    ep_len_mean      | 850       |
|    ep_rew_mean      | 25.7      |
| time/               |           |
|    episodes         | 1136      |
|    fps              | 572       |
|    time_elapsed     | 1455      |
|    total_timesteps  | 832758    |
| train/              |           |
|    actor_loss       | 6.39      |
|    critic_loss      | 4.81      |
|    learning_rate    | 0.0003    |
|    n_updates        | 468794    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 25.026627 |
| rollout/            |           |
|    ep_len_mean      | 848       |
|    ep_rew_mean      | 25        |
| time/               |           |
|    episodes         | 1140      |
|    fps              | 572       |
|    time_elapsed     | 1464      |
|    total_timesteps  | 837870    |
| train/              |           |
|    actor_loss       | 6.73      |
|    critic_loss      | 3.69      |
|    learning_rate    | 0.0003    |
|    n_updates        | 469646    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 25.507708 |
| eval/               |           |
|    mean_ep_length   | 1e+03     |
|    mean_reward      | 82.2      |
| time/               |           |
|    total_timesteps  | 840000    |
| train/              |           |
|    actor_loss       | 8.66      |
|    critic_loss      | 1.79      |
|    learning_rate    | 0.0003    |
|    n_updates        | 470001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 28.009043 |
| rollout/            |           |
|    ep_len_mean      | 859       |
|    ep_rew_mean      | 28        |
| time/               |           |
|    episodes         | 1144      |
|    fps              | 570       |
|    time_elapsed     | 1473      |
|    total_timesteps  | 841374    |
| train/              |           |
|    actor_loss       | 8.11      |
|    critic_loss      | 1.62      |
|    learning_rate    | 0.0003    |
|    n_updates        | 470230    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 29.997936 |
| rollout/            |           |
|    ep_len_mean      | 861       |
|    ep_rew_mean      | 30        |
| time/               |           |
|    episodes         | 1148      |
|    fps              | 571       |
|    time_elapsed     | 1479      |
|    total_timesteps  | 844758    |
| train/              |           |
|    actor_loss       | 7.21      |
|    critic_loss      | 2.65      |
|    learning_rate    | 0.0003    |
|    n_updates        | 470794    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 30.240707 |
| rollout/            |           |
|    ep_len_mean      | 866       |
|    ep_rew_mean      | 30.2      |
| time/               |           |
|    episodes         | 1152      |
|    fps              | 571       |
|    time_elapsed     | 1487      |
|    total_timesteps  | 849546    |
| train/              |           |
|    actor_loss       | 7.72      |
|    critic_loss      | 2.41      |
|    learning_rate    | 0.0003    |
|    n_updates        | 471592    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 32.524002 |
| rollout/            |           |
|    ep_len_mean      | 871       |
|    ep_rew_mean      | 32.5      |
| time/               |           |
|    episodes         | 1156      |
|    fps              | 571       |
|    time_elapsed     | 1490      |
|    total_timesteps  | 851760    |
| train/              |           |
|    actor_loss       | 7.85      |
|    critic_loss      | 2.43      |
|    learning_rate    | 0.0003    |
|    n_updates        | 471961    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 35.51671 |
| rollout/            |          |
|    ep_len_mean      | 875      |
|    ep_rew_mean      | 35.5     |
| time/               |          |
|    episodes         | 1160     |
|    fps              | 571      |
|    time_elapsed     | 1497     |
|    total_timesteps  | 856062   |
| train/              |          |
|    actor_loss       | 6.15     |
|    critic_loss      | 3.6      |
|    learning_rate    | 0.0003   |
|    n_updates        | 472678   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 38.93302 |
| rollout/            |          |
|    ep_len_mean      | 882      |
|    ep_rew_mean      | 38.9     |
| time/               |          |
|    episodes         | 1164     |
|    fps              | 571      |
|    time_elapsed     | 1500     |
|    total_timesteps  | 857964   |
| train/              |          |
|    actor_loss       | 7.64     |
|    critic_loss      | 3.49     |
|    learning_rate    | 0.0003   |
|    n_updates        | 472995   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 38.4854  |
| rollout/            |          |
|    ep_len_mean      | 885      |
|    ep_rew_mean      | 38.5     |
| time/               |          |
|    episodes         | 1168     |
|    fps              | 571      |
|    time_elapsed     | 1509     |
|    total_timesteps  | 863244   |
| train/              |          |
|    actor_loss       | 5.63     |
|    critic_loss      | 3.11     |
|    learning_rate    | 0.0003   |
|    n_updates        | 473875   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 41.123898 |
| rollout/            |           |
|    ep_len_mean      | 892       |
|    ep_rew_mean      | 41.1      |
| time/               |           |
|    episodes         | 1172      |
|    fps              | 572       |
|    time_elapsed     | 1516      |
|    total_timesteps  | 868062    |
| train/              |           |
|    actor_loss       | 7.52      |
|    critic_loss      | 2.27      |
|    learning_rate    | 0.0003    |
|    n_updates        | 474678    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 42.218178 |
| rollout/            |           |
|    ep_len_mean      | 892       |
|    ep_rew_mean      | 42.2      |
| time/               |           |
|    episodes         | 1176      |
|    fps              | 572       |
|    time_elapsed     | 1519      |
|    total_timesteps  | 869760    |
| train/              |           |
|    actor_loss       | 6.43      |
|    critic_loss      | 3.55      |
|    learning_rate    | 0.0003    |
|    n_updates        | 474961    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 43.541103 |
| eval/               |           |
|    mean_ep_length   | 1e+03     |
|    mean_reward      | 27.1      |
| time/               |           |
|    total_timesteps  | 870000    |
| train/              |           |
|    actor_loss       | 7.32      |
|    critic_loss      | 2.76      |
|    learning_rate    | 0.0003    |
|    n_updates        | 475001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 40.746895 |
| rollout/            |           |
|    ep_len_mean      | 897       |
|    ep_rew_mean      | 40.7      |
| time/               |           |
|    episodes         | 1180      |
|    fps              | 571       |
|    time_elapsed     | 1532      |
|    total_timesteps  | 875244    |
| train/              |           |
|    actor_loss       | 7.57      |
|    critic_loss      | 2.58      |
|    learning_rate    | 0.0003    |
|    n_updates        | 475875    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 43.314453 |
| rollout/            |           |
|    ep_len_mean      | 897       |
|    ep_rew_mean      | 43.3      |
| time/               |           |
|    episodes         | 1184      |
|    fps              | 571       |
|    time_elapsed     | 1540      |
|    total_timesteps  | 880062    |
| train/              |           |
|    actor_loss       | 7.18      |
|    critic_loss      | 2.3       |
|    learning_rate    | 0.0003    |
|    n_updates        | 476678    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 46.888718 |
| rollout/            |           |
|    ep_len_mean      | 905       |
|    ep_rew_mean      | 46.9      |
| time/               |           |
|    episodes         | 1188      |
|    fps              | 571       |
|    time_elapsed     | 1543      |
|    total_timesteps  | 881760    |
| train/              |           |
|    actor_loss       | 7.49      |
|    critic_loss      | 3.95      |
|    learning_rate    | 0.0003    |
|    n_updates        | 476961    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 52.658833 |
| rollout/            |           |
|    ep_len_mean      | 915       |
|    ep_rew_mean      | 52.7      |
| time/               |           |
|    episodes         | 1192      |
|    fps              | 571       |
|    time_elapsed     | 1552      |
|    total_timesteps  | 887244    |
| train/              |           |
|    actor_loss       | 7.4       |
|    critic_loss      | 2.39      |
|    learning_rate    | 0.0003    |
|    n_updates        | 477875    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 56.228424 |
| rollout/            |           |
|    ep_len_mean      | 924       |
|    ep_rew_mean      | 56.2      |
| time/               |           |
|    episodes         | 1196      |
|    fps              | 571       |
|    time_elapsed     | 1559      |
|    total_timesteps  | 892062    |
| train/              |           |
|    actor_loss       | 7.72      |
|    critic_loss      | 4.76      |
|    learning_rate    | 0.0003    |
|    n_updates        | 478678    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 56.797543 |
| rollout/            |           |
|    ep_len_mean      | 926       |
|    ep_rew_mean      | 56.8      |
| time/               |           |
|    episodes         | 1200      |
|    fps              | 571       |
|    time_elapsed     | 1562      |
|    total_timesteps  | 893760    |
| train/              |           |
|    actor_loss       | 6.57      |
|    critic_loss      | 3.95      |
|    learning_rate    | 0.0003    |
|    n_updates        | 478961    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 56.475258 |
| rollout/            |           |
|    ep_len_mean      | 929       |
|    ep_rew_mean      | 56.5      |
| time/               |           |
|    episodes         | 1204      |
|    fps              | 572       |
|    time_elapsed     | 1571      |
|    total_timesteps  | 899244    |
| train/              |           |
|    actor_loss       | 8.17      |
|    critic_loss      | 3.9       |
|    learning_rate    | 0.0003    |
|    n_updates        | 479875    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 58.75375 |
| eval/               |          |
|    mean_ep_length   | 1e+03    |
|    mean_reward      | 20.3     |
| time/               |          |
|    total_timesteps  | 900000   |
| train/              |          |
|    actor_loss       | 7.06     |
|    critic_loss      | 2.8      |
|    learning_rate    | 0.0003   |
|    n_updates        | 480001   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 57.60541 |
| rollout/            |          |
|    ep_len_mean      | 935      |
|    ep_rew_mean      | 57.6     |
| time/               |          |
|    episodes         | 1208     |
|    fps              | 571      |
|    time_elapsed     | 1582     |
|    total_timesteps  | 903432   |
| train/              |          |
|    actor_loss       | 7.3      |
|    critic_loss      | 4.98     |
|    learning_rate    | 0.0003   |
|    n_updates        | 480573   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 59.72763 |
| rollout/            |          |
|    ep_len_mean      | 942      |
|    ep_rew_mean      | 59.7     |
| time/               |          |
|    episodes         | 1212     |
|    fps              | 571      |
|    time_elapsed     | 1585     |
|    total_timesteps  | 905550   |
| train/              |          |
|    actor_loss       | 8.74     |
|    critic_loss      | 3.56     |
|    learning_rate    | 0.0003   |
|    n_updates        | 480926   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 58.83327 |
| rollout/            |          |
|    ep_len_mean      | 942      |
|    ep_rew_mean      | 58.8     |
| time/               |          |
|    episodes         | 1216     |
|    fps              | 571      |
|    time_elapsed     | 1594     |
|    total_timesteps  | 910986   |
| train/              |          |
|    actor_loss       | 6.91     |
|    critic_loss      | 2.72     |
|    learning_rate    | 0.0003   |
|    n_updates        | 481832   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 55.46868 |
| rollout/            |          |
|    ep_len_mean      | 934      |
|    ep_rew_mean      | 55.5     |
| time/               |          |
|    episodes         | 1220     |
|    fps              | 571      |
|    time_elapsed     | 1596     |
|    total_timesteps  | 912018   |
| train/              |          |
|    actor_loss       | 6.44     |
|    critic_loss      | 1.5      |
|    learning_rate    | 0.0003   |
|    n_updates        | 482004   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 55.033924 |
| rollout/            |           |
|    ep_len_mean      | 937       |
|    ep_rew_mean      | 55        |
| time/               |           |
|    episodes         | 1224      |
|    fps              | 571       |
|    time_elapsed     | 1606      |
|    total_timesteps  | 917550    |
| train/              |           |
|    actor_loss       | 6.02      |
|    critic_loss      | 3.84      |
|    learning_rate    | 0.0003    |
|    n_updates        | 482926    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 58.50438 |
| rollout/            |          |
|    ep_len_mean      | 941      |
|    ep_rew_mean      | 58.5     |
| time/               |          |
|    episodes         | 1228     |
|    fps              | 571      |
|    time_elapsed     | 1612     |
|    total_timesteps  | 921126   |
| train/              |          |
|    actor_loss       | 7.19     |
|    critic_loss      | 6.06     |
|    learning_rate    | 0.0003   |
|    n_updates        | 483522   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 62.53897 |
| rollout/            |          |
|    ep_len_mean      | 943      |
|    ep_rew_mean      | 62.5     |
| time/               |          |
|    episodes         | 1232     |
|    fps              | 571      |
|    time_elapsed     | 1617     |
|    total_timesteps  | 923988   |
| train/              |          |
|    actor_loss       | 6.7      |
|    critic_loss      | 8.81     |
|    learning_rate    | 0.0003   |
|    n_updates        | 483999   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 63.05833 |
| rollout/            |          |
|    ep_len_mean      | 949      |
|    ep_rew_mean      | 63.1     |
| time/               |          |
|    episodes         | 1236     |
|    fps              | 571      |
|    time_elapsed     | 1625     |
|    total_timesteps  | 928782   |
| train/              |          |
|    actor_loss       | 5.96     |
|    critic_loss      | 2.86     |
|    learning_rate    | 0.0003   |
|    n_updates        | 484798   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 62.113914 |
| eval/               |           |
|    mean_ep_length   | 1e+03     |
|    mean_reward      | 78.8      |
| time/               |           |
|    total_timesteps  | 930000    |
| train/              |           |
|    actor_loss       | 6.66      |
|    critic_loss      | 2.07      |
|    learning_rate    | 0.0003    |
|    n_updates        | 485001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 62.574097 |
| rollout/            |           |
|    ep_len_mean      | 946       |
|    ep_rew_mean      | 62.6      |
| time/               |           |
|    episodes         | 1240      |
|    fps              | 569       |
|    time_elapsed     | 1636      |
|    total_timesteps  | 932442    |
| train/              |           |
|    actor_loss       | 7.2       |
|    critic_loss      | 2.54      |
|    learning_rate    | 0.0003    |
|    n_updates        | 485408    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 60.883556 |
| rollout/            |           |
|    ep_len_mean      | 946       |
|    ep_rew_mean      | 60.9      |
| time/               |           |
|    episodes         | 1244      |
|    fps              | 569       |
|    time_elapsed     | 1642      |
|    total_timesteps  | 935988    |
| train/              |           |
|    actor_loss       | 7.01      |
|    critic_loss      | 2.69      |
|    learning_rate    | 0.0003    |
|    n_updates        | 485999    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 59.66818 |
| rollout/            |          |
|    ep_len_mean      | 946      |
|    ep_rew_mean      | 59.7     |
| time/               |          |
|    episodes         | 1248     |
|    fps              | 569      |
|    time_elapsed     | 1649     |
|    total_timesteps  | 940062   |
| train/              |          |
|    actor_loss       | 6.23     |
|    critic_loss      | 2.77     |
|    learning_rate    | 0.0003   |
|    n_updates        | 486678   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 61.26913 |
| rollout/            |          |
|    ep_len_mean      | 940      |
|    ep_rew_mean      | 61.3     |
| time/               |          |
|    episodes         | 1252     |
|    fps              | 569      |
|    time_elapsed     | 1655     |
|    total_timesteps  | 943380   |
| train/              |          |
|    actor_loss       | 6        |
|    critic_loss      | 3.79     |
|    learning_rate    | 0.0003   |
|    n_updates        | 487231   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 65.28395 |
| rollout/            |          |
|    ep_len_mean      | 947      |
|    ep_rew_mean      | 65.3     |
| time/               |          |
|    episodes         | 1256     |
|    fps              | 570      |
|    time_elapsed     | 1660     |
|    total_timesteps  | 946782   |
| train/              |          |
|    actor_loss       | 6.79     |
|    critic_loss      | 4.37     |
|    learning_rate    | 0.0003   |
|    n_updates        | 487798   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 68.071304 |
| rollout/            |           |
|    ep_len_mean      | 953       |
|    ep_rew_mean      | 68.1      |
| time/               |           |
|    episodes         | 1260      |
|    fps              | 570       |
|    time_elapsed     | 1668      |
|    total_timesteps  | 951126    |
| train/              |           |
|    actor_loss       | 6.63      |
|    critic_loss      | 9.49      |
|    learning_rate    | 0.0003    |
|    n_updates        | 488522    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 66.90088 |
| rollout/            |          |
|    ep_len_mean      | 952      |
|    ep_rew_mean      | 66.9     |
| time/               |          |
|    episodes         | 1264     |
|    fps              | 570      |
|    time_elapsed     | 1673     |
|    total_timesteps  | 954018   |
| train/              |          |
|    actor_loss       | 5.53     |
|    critic_loss      | 1.73     |
|    learning_rate    | 0.0003   |
|    n_updates        | 489004   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 66.682   |
| rollout/            |          |
|    ep_len_mean      | 945      |
|    ep_rew_mean      | 66.7     |
| time/               |          |
|    episodes         | 1268     |
|    fps              | 570      |
|    time_elapsed     | 1677     |
|    total_timesteps  | 956442   |
| train/              |          |
|    actor_loss       | 6.41     |
|    critic_loss      | 5.66     |
|    learning_rate    | 0.0003   |
|    n_updates        | 489408   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 63.359997 |
| eval/               |           |
|    mean_ep_length   | 942       |
|    mean_reward      | 60.5      |
| time/               |           |
|    total_timesteps  | 960000    |
| train/              |           |
|    actor_loss       | 6.83      |
|    critic_loss      | 3.16      |
|    learning_rate    | 0.0003    |
|    n_updates        | 490001    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 62.513443 |
| rollout/            |           |
|    ep_len_mean      | 937       |
|    ep_rew_mean      | 62.5      |
| time/               |           |
|    episodes         | 1272      |
|    fps              | 568       |
|    time_elapsed     | 1689      |
|    total_timesteps  | 960990    |
| train/              |           |
|    actor_loss       | 5.4       |
|    critic_loss      | 2.59      |
|    learning_rate    | 0.0003    |
|    n_updates        | 490166    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 62.807518 |
| rollout/            |           |
|    ep_len_mean      | 937       |
|    ep_rew_mean      | 62.8      |
| time/               |           |
|    episodes         | 1276      |
|    fps              | 568       |
|    time_elapsed     | 1695      |
|    total_timesteps  | 964404    |
| train/              |           |
|    actor_loss       | 5.51      |
|    critic_loss      | 12.8      |
|    learning_rate    | 0.0003    |
|    n_updates        | 490735    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 64.46354 |
| rollout/            |          |
|    ep_len_mean      | 937      |
|    ep_rew_mean      | 64.5     |
| time/               |          |
|    episodes         | 1280     |
|    fps              | 568      |
|    time_elapsed     | 1702     |
|    total_timesteps  | 968328   |
| train/              |          |
|    actor_loss       | 5.46     |
|    critic_loss      | 3.56     |
|    learning_rate    | 0.0003   |
|    n_updates        | 491389   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 65.45189 |
| rollout/            |          |
|    ep_len_mean      | 937      |
|    ep_rew_mean      | 65.5     |
| time/               |          |
|    episodes         | 1284     |
|    fps              | 569      |
|    time_elapsed     | 1709     |
|    total_timesteps  | 972990   |
| train/              |          |
|    actor_loss       | 7.12     |
|    critic_loss      | 1.57     |
|    learning_rate    | 0.0003   |
|    n_updates        | 492166   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 62.992115 |
| rollout/            |           |
|    ep_len_mean      | 937       |
|    ep_rew_mean      | 63        |
| time/               |           |
|    episodes         | 1288      |
|    fps              | 569       |
|    time_elapsed     | 1715      |
|    total_timesteps  | 976404    |
| train/              |           |
|    actor_loss       | 6.53      |
|    critic_loss      | 2.19      |
|    learning_rate    | 0.0003    |
|    n_updates        | 492735    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 60.51059 |
| rollout/            |          |
|    ep_len_mean      | 937      |
|    ep_rew_mean      | 60.5     |
| time/               |          |
|    episodes         | 1292     |
|    fps              | 569      |
|    time_elapsed     | 1722     |
|    total_timesteps  | 980328   |
| train/              |          |
|    actor_loss       | 8.39     |
|    critic_loss      | 2.25     |
|    learning_rate    | 0.0003   |
|    n_updates        | 493389   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 59.187096 |
| rollout/            |           |
|    ep_len_mean      | 937       |
|    ep_rew_mean      | 59.2      |
| time/               |           |
|    episodes         | 1296      |
|    fps              | 569       |
|    time_elapsed     | 1730      |
|    total_timesteps  | 984990    |
| train/              |           |
|    actor_loss       | 5.49      |
|    critic_loss      | 1.87      |
|    learning_rate    | 0.0003    |
|    n_updates        | 494166    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 57.6744  |
| rollout/            |          |
|    ep_len_mean      | 933      |
|    ep_rew_mean      | 57.7     |
| time/               |          |
|    episodes         | 1300     |
|    fps              | 569      |
|    time_elapsed     | 1732     |
|    total_timesteps  | 986442   |
| train/              |          |
|    actor_loss       | 7.23     |
|    critic_loss      | 4.79     |
|    learning_rate    | 0.0003   |
|    n_updates        | 494408   |
----------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 502      |
|    mean_reward     | -45.7    |
| time/              |          |
|    total_timesteps | 990000   |
| train/             |          |
|    actor_loss      | 6.42     |
|    critic_loss     | 4.48     |
|    learning_rate   | 0.0003   |
|    n_updates       | 495001   |
---------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 57.091103 |
| rollout/            |           |
|    ep_len_mean      | 931       |
|    ep_rew_mean      | 57.1      |
| time/               |           |
|    episodes         | 1304      |
|    fps              | 568       |
|    time_elapsed     | 1743      |
|    total_timesteps  | 991380    |
| train/              |           |
|    actor_loss       | 6.01      |
|    critic_loss      | 1.51      |
|    learning_rate    | 0.0003    |
|    n_updates        | 495231    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 56.10718 |
| rollout/            |          |
|    ep_len_mean      | 933      |
|    ep_rew_mean      | 56.1     |
| time/               |          |
|    episodes         | 1308     |
|    fps              | 568      |
|    time_elapsed     | 1752     |
|    total_timesteps  | 996414   |
| train/              |          |
|    actor_loss       | 7.1      |
|    critic_loss      | 7.07     |
|    learning_rate    | 0.0003   |
|    n_updates        | 496070   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 57.006214 |
| rollout/            |           |
|    ep_len_mean      | 933       |
|    ep_rew_mean      | 57        |
| time/               |           |
|    episodes         | 1312      |
|    fps              | 568       |
|    time_elapsed     | 1755      |
|    total_timesteps  | 998442    |
| train/              |           |
|    actor_loss       | 6.61      |
|    critic_loss      | 2.71      |
|    learning_rate    | 0.0003    |
|    n_updates        | 496408    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 60.480644 |
| rollout/            |           |
|    ep_len_mean      | 932       |
|    ep_rew_mean      | 60.5      |
| time/               |           |
|    episodes         | 1316      |
|    fps              | 568       |
|    time_elapsed     | 1763      |
|    total_timesteps  | 1002990   |
| train/              |           |
|    actor_loss       | 5.12      |
|    critic_loss      | 2.15      |
|    learning_rate    | 0.0003    |
|    n_updates        | 497166    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 68.035164 |
| rollout/            |           |
|    ep_len_mean      | 940       |
|    ep_rew_mean      | 68        |
| time/               |           |
|    episodes         | 1320      |
|    fps              | 568       |
|    time_elapsed     | 1772      |
|    total_timesteps  | 1008150   |
| train/              |           |
|    actor_loss       | 6.61      |
|    critic_loss      | 2.88      |
|    learning_rate    | 0.0003    |
|    n_updates        | 498026    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 70.20664 |
| rollout/            |          |
|    ep_len_mean      | 945      |
|    ep_rew_mean      | 70.2     |
| time/               |          |
|    episodes         | 1324     |
|    fps              | 568      |
|    time_elapsed     | 1776     |
|    total_timesteps  | 1010442  |
| train/              |          |
|    actor_loss       | 5.27     |
|    critic_loss      | 3.9      |
|    learning_rate    | 0.0003   |
|    n_updates        | 498408   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 67.71637 |
| rollout/            |          |
|    ep_len_mean      | 950      |
|    ep_rew_mean      | 67.7     |
| time/               |          |
|    episodes         | 1328     |
|    fps              | 569      |
|    time_elapsed     | 1783     |
|    total_timesteps  | 1014990  |
| train/              |          |
|    actor_loss       | 5.77     |
|    critic_loss      | 1.57     |
|    learning_rate    | 0.0003   |
|    n_updates        | 499166   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 67.225365 |
| eval/               |           |
|    mean_ep_length   | 884       |
|    mean_reward      | 44.7      |
| time/               |           |
|    total_timesteps  | 1020000   |
| train/              |           |
|    actor_loss       | 5.56      |
|    critic_loss      | 1.98      |
|    learning_rate    | 0.0003    |
|    n_updates        | 500001    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 67.39973 |
| rollout/            |          |
|    ep_len_mean      | 950      |
|    ep_rew_mean      | 67.4     |
| time/               |          |
|    episodes         | 1332     |
|    fps              | 567      |
|    time_elapsed     | 1796     |
|    total_timesteps  | 1020150  |
| train/              |          |
|    actor_loss       | 5.26     |
|    critic_loss      | 1.69     |
|    learning_rate    | 0.0003   |
|    n_updates        | 500026   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 68.48686 |
| rollout/            |          |
|    ep_len_mean      | 951      |
|    ep_rew_mean      | 68.5     |
| time/               |          |
|    episodes         | 1336     |
|    fps              | 567      |
|    time_elapsed     | 1800     |
|    total_timesteps  | 1022442  |
| train/              |          |
|    actor_loss       | 5.79     |
|    critic_loss      | 3.02     |
|    learning_rate    | 0.0003   |
|    n_updates        | 500408   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 68.0705  |
| rollout/            |          |
|    ep_len_mean      | 951      |
|    ep_rew_mean      | 68.1     |
| time/               |          |
|    episodes         | 1340     |
|    fps              | 567      |
|    time_elapsed     | 1808     |
|    total_timesteps  | 1026990  |
| train/              |          |
|    actor_loss       | 4.7      |
|    critic_loss      | 3.5      |
|    learning_rate    | 0.0003   |
|    n_updates        | 501166   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 69.727585 |
| rollout/            |           |
|    ep_len_mean      | 951       |
|    ep_rew_mean      | 69.7      |
| time/               |           |
|    episodes         | 1344      |
|    fps              | 568       |
|    time_elapsed     | 1816      |
|    total_timesteps  | 1032150   |
| train/              |           |
|    actor_loss       | 7.06      |
|    critic_loss      | 5.77      |
|    learning_rate    | 0.0003    |
|    n_updates        | 502026    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 71.10313 |
| rollout/            |          |
|    ep_len_mean      | 951      |
|    ep_rew_mean      | 71.1     |
| time/               |          |
|    episodes         | 1348     |
|    fps              | 568      |
|    time_elapsed     | 1820     |
|    total_timesteps  | 1034442  |
| train/              |          |
|    actor_loss       | 6.37     |
|    critic_loss      | 2.92     |
|    learning_rate    | 0.0003   |
|    n_updates        | 502408   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 72.32848 |
| rollout/            |          |
|    ep_len_mean      | 959      |
|    ep_rew_mean      | 72.3     |
| time/               |          |
|    episodes         | 1352     |
|    fps              | 568      |
|    time_elapsed     | 1828     |
|    total_timesteps  | 1038990  |
| train/              |          |
|    actor_loss       | 6.65     |
|    critic_loss      | 1.8      |
|    learning_rate    | 0.0003   |
|    n_updates        | 503166   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 68.66055 |
| rollout/            |          |
|    ep_len_mean      | 952      |
|    ep_rew_mean      | 68.7     |
| time/               |          |
|    episodes         | 1356     |
|    fps              | 568      |
|    time_elapsed     | 1834     |
|    total_timesteps  | 1042500  |
| train/              |          |
|    actor_loss       | 6.12     |
|    critic_loss      | 1.97     |
|    learning_rate    | 0.0003   |
|    n_updates        | 503751   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 65.61801 |
| rollout/            |          |
|    ep_len_mean      | 952      |
|    ep_rew_mean      | 65.6     |
| time/               |          |
|    episodes         | 1360     |
|    fps              | 568      |
|    time_elapsed     | 1841     |
|    total_timesteps  | 1046328  |
| train/              |          |
|    actor_loss       | 6.79     |
|    critic_loss      | 1.46     |
|    learning_rate    | 0.0003   |
|    n_updates        | 504389   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 66.19883 |
| eval/               |          |
|    mean_ep_length   | 1e+03    |
|    mean_reward      | 85.1     |
| time/               |          |
|    total_timesteps  | 1050000  |
| train/              |          |
|    actor_loss       | 5.22     |
|    critic_loss      | 3.48     |
|    learning_rate    | 0.0003   |
|    n_updates        | 505001   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 67.57186 |
| rollout/            |          |
|    ep_len_mean      | 957      |
|    ep_rew_mean      | 67.6     |
| time/               |          |
|    episodes         | 1364     |
|    fps              | 566      |
|    time_elapsed     | 1852     |
|    total_timesteps  | 1050414  |
| train/              |          |
|    actor_loss       | 5.78     |
|    critic_loss      | 2.33     |
|    learning_rate    | 0.0003   |
|    n_updates        | 505070   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 68.1168  |
| rollout/            |          |
|    ep_len_mean      | 966      |
|    ep_rew_mean      | 68.1     |
| time/               |          |
|    episodes         | 1368     |
|    fps              | 566      |
|    time_elapsed     | 1856     |
|    total_timesteps  | 1052634  |
| train/              |          |
|    actor_loss       | 6.33     |
|    critic_loss      | 2.74     |
|    learning_rate    | 0.0003   |
|    n_updates        | 505440   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 70.05775 |
| rollout/            |          |
|    ep_len_mean      | 974      |
|    ep_rew_mean      | 70.1     |
| time/               |          |
|    episodes         | 1372     |
|    fps              | 567      |
|    time_elapsed     | 1866     |
|    total_timesteps  | 1058328  |
| train/              |          |
|    actor_loss       | 6.14     |
|    critic_loss      | 1.73     |
|    learning_rate    | 0.0003   |
|    n_updates        | 506389   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 69.355606 |
| rollout/            |           |
|    ep_len_mean      | 974       |
|    ep_rew_mean      | 69.4      |
| time/               |           |
|    episodes         | 1376      |
|    fps              | 567       |
|    time_elapsed     | 1873      |
|    total_timesteps  | 1062414   |
| train/              |           |
|    actor_loss       | 5.36      |
|    critic_loss      | 2.07      |
|    learning_rate    | 0.0003    |
|    n_updates        | 507070    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 71.040596 |
| rollout/            |           |
|    ep_len_mean      | 974       |
|    ep_rew_mean      | 71        |
| time/               |           |
|    episodes         | 1380      |
|    fps              | 567       |
|    time_elapsed     | 1876      |
|    total_timesteps  | 1064634   |
| train/              |           |
|    actor_loss       | 6.56      |
|    critic_loss      | 2.8       |
|    learning_rate    | 0.0003    |
|    n_updates        | 507440    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 72.93096 |
| rollout/            |          |
|    ep_len_mean      | 974      |
|    ep_rew_mean      | 72.9     |
| time/               |          |
|    episodes         | 1384     |
|    fps              | 567      |
|    time_elapsed     | 1886     |
|    total_timesteps  | 1070328  |
| train/              |          |
|    actor_loss       | 4.9      |
|    critic_loss      | 4.66     |
|    learning_rate    | 0.0003   |
|    n_updates        | 508389   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 73.94394 |
| rollout/            |          |
|    ep_len_mean      | 970      |
|    ep_rew_mean      | 73.9     |
| time/               |          |
|    episodes         | 1388     |
|    fps              | 567      |
|    time_elapsed     | 1892     |
|    total_timesteps  | 1073994  |
| train/              |          |
|    actor_loss       | 5.64     |
|    critic_loss      | 4.44     |
|    learning_rate    | 0.0003   |
|    n_updates        | 509000   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 74.80418 |
| rollout/            |          |
|    ep_len_mean      | 967      |
|    ep_rew_mean      | 74.8     |
| time/               |          |
|    episodes         | 1392     |
|    fps              | 567      |
|    time_elapsed     | 1897     |
|    total_timesteps  | 1076442  |
| train/              |          |
|    actor_loss       | 5.1      |
|    critic_loss      | 1.4      |
|    learning_rate    | 0.0003   |
|    n_updates        | 509408   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 72.65457 |
| eval/               |          |
|    mean_ep_length   | 913      |
|    mean_reward      | 39       |
| time/               |          |
|    total_timesteps  | 1080000  |
| train/              |          |
|    actor_loss       | 5.59     |
|    critic_loss      | 1.92     |
|    learning_rate    | 0.0003   |
|    n_updates        | 510001   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 72.81156 |
| rollout/            |          |
|    ep_len_mean      | 961      |
|    ep_rew_mean      | 72.8     |
| time/               |          |
|    episodes         | 1396     |
|    fps              | 566      |
|    time_elapsed     | 1908     |
|    total_timesteps  | 1080864  |
| train/              |          |
|    actor_loss       | 4.76     |
|    critic_loss      | 6.14     |
|    learning_rate    | 0.0003   |
|    n_updates        | 510145   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 74.58011 |
| rollout/            |          |
|    ep_len_mean      | 965      |
|    ep_rew_mean      | 74.6     |
| time/               |          |
|    episodes         | 1400     |
|    fps              | 566      |
|    time_elapsed     | 1915     |
|    total_timesteps  | 1084950  |
| train/              |          |
|    actor_loss       | 4.38     |
|    critic_loss      | 3.79     |
|    learning_rate    | 0.0003   |
|    n_updates        | 510826   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 75.723976 |
| rollout/            |           |
|    ep_len_mean      | 967       |
|    ep_rew_mean      | 75.7      |
| time/               |           |
|    episodes         | 1404      |
|    fps              | 566       |
|    time_elapsed     | 1921      |
|    total_timesteps  | 1088442   |
| train/              |           |
|    actor_loss       | 4.99      |
|    critic_loss      | 1.58      |
|    learning_rate    | 0.0003    |
|    n_updates        | 511408    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 76.86206 |
| rollout/            |          |
|    ep_len_mean      | 966      |
|    ep_rew_mean      | 76.9     |
| time/               |          |
|    episodes         | 1408     |
|    fps              | 566      |
|    time_elapsed     | 1929     |
|    total_timesteps  | 1092864  |
| train/              |          |
|    actor_loss       | 4.97     |
|    critic_loss      | 2.1      |
|    learning_rate    | 0.0003   |
|    n_updates        | 512145   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 78.99983 |
| rollout/            |          |
|    ep_len_mean      | 966      |
|    ep_rew_mean      | 79       |
| time/               |          |
|    episodes         | 1412     |
|    fps              | 566      |
|    time_elapsed     | 1935     |
|    total_timesteps  | 1096188  |
| train/              |          |
|    actor_loss       | 5.85     |
|    critic_loss      | 3.34     |
|    learning_rate    | 0.0003   |
|    n_updates        | 512699   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 76.72839 |
| rollout/            |          |
|    ep_len_mean      | 967      |
|    ep_rew_mean      | 76.7     |
| time/               |          |
|    episodes         | 1416     |
|    fps              | 566      |
|    time_elapsed     | 1942     |
|    total_timesteps  | 1100442  |
| train/              |          |
|    actor_loss       | 4.78     |
|    critic_loss      | 2.98     |
|    learning_rate    | 0.0003   |
|    n_updates        | 513408   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 75.33204 |
| rollout/            |          |
|    ep_len_mean      | 967      |
|    ep_rew_mean      | 75.3     |
| time/               |          |
|    episodes         | 1420     |
|    fps              | 566      |
|    time_elapsed     | 1950     |
|    total_timesteps  | 1104864  |
| train/              |          |
|    actor_loss       | 4.55     |
|    critic_loss      | 3.17     |
|    learning_rate    | 0.0003   |
|    n_updates        | 514145   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 71.05329 |
| rollout/            |          |
|    ep_len_mean      | 962      |
|    ep_rew_mean      | 71.1     |
| time/               |          |
|    episodes         | 1424     |
|    fps              | 566      |
|    time_elapsed     | 1953     |
|    total_timesteps  | 1106838  |
| train/              |          |
|    actor_loss       | 4.55     |
|    critic_loss      | 1.94     |
|    learning_rate    | 0.0003   |
|    n_updates        | 514474   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 71.15498 |
| eval/               |          |
|    mean_ep_length   | 1e+03    |
|    mean_reward      | 77.6     |
| time/               |          |
|    total_timesteps  | 1110000  |
| train/              |          |
|    actor_loss       | 4.99     |
|    critic_loss      | 2.01     |
|    learning_rate    | 0.0003   |
|    n_updates        | 515001   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 73.40414 |
| rollout/            |          |
|    ep_len_mean      | 962      |
|    ep_rew_mean      | 73.4     |
| time/               |          |
|    episodes         | 1428     |
|    fps              | 565      |
|    time_elapsed     | 1966     |
|    total_timesteps  | 1111680  |
| train/              |          |
|    actor_loss       | 3.52     |
|    critic_loss      | 4.08     |
|    learning_rate    | 0.0003   |
|    n_updates        | 515281   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 70.87945 |
| rollout/            |          |
|    ep_len_mean      | 955      |
|    ep_rew_mean      | 70.9     |
| time/               |          |
|    episodes         | 1432     |
|    fps              | 565      |
|    time_elapsed     | 1974     |
|    total_timesteps  | 1116528  |
| train/              |          |
|    actor_loss       | 5.7      |
|    critic_loss      | 1.48     |
|    learning_rate    | 0.0003   |
|    n_updates        | 516089   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 69.89949 |
| rollout/            |          |
|    ep_len_mean      | 955      |
|    ep_rew_mean      | 69.9     |
| time/               |          |
|    episodes         | 1436     |
|    fps              | 565      |
|    time_elapsed     | 1978     |
|    total_timesteps  | 1118442  |
| train/              |          |
|    actor_loss       | 4.7      |
|    critic_loss      | 2.46     |
|    learning_rate    | 0.0003   |
|    n_updates        | 516408   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 71.295715 |
| rollout/            |           |
|    ep_len_mean      | 961       |
|    ep_rew_mean      | 71.3      |
| time/               |           |
|    episodes         | 1440      |
|    fps              | 565       |
|    time_elapsed     | 1985      |
|    total_timesteps  | 1122990   |
| train/              |           |
|    actor_loss       | 6.43      |
|    critic_loss      | 2.99      |
|    learning_rate    | 0.0003    |
|    n_updates        | 517166    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 70.53975 |
| rollout/            |          |
|    ep_len_mean      | 961      |
|    ep_rew_mean      | 70.5     |
| time/               |          |
|    episodes         | 1444     |
|    fps              | 565      |
|    time_elapsed     | 1995     |
|    total_timesteps  | 1128528  |
| train/              |          |
|    actor_loss       | 5.06     |
|    critic_loss      | 1.63     |
|    learning_rate    | 0.0003   |
|    n_updates        | 518089   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 73.32067 |
| rollout/            |          |
|    ep_len_mean      | 961      |
|    ep_rew_mean      | 73.3     |
| time/               |          |
|    episodes         | 1448     |
|    fps              | 565      |
|    time_elapsed     | 1998     |
|    total_timesteps  | 1130442  |
| train/              |          |
|    actor_loss       | 5.72     |
|    critic_loss      | 6.97     |
|    learning_rate    | 0.0003   |
|    n_updates        | 518408   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 73.25885 |
| rollout/            |          |
|    ep_len_mean      | 961      |
|    ep_rew_mean      | 73.3     |
| time/               |          |
|    episodes         | 1452     |
|    fps              | 565      |
|    time_elapsed     | 2006     |
|    total_timesteps  | 1134990  |
| train/              |          |
|    actor_loss       | 5.19     |
|    critic_loss      | 3.08     |
|    learning_rate    | 0.0003   |
|    n_updates        | 519166   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 73.66572 |
| eval/               |          |
|    mean_ep_length   | 1e+03    |
|    mean_reward      | 99.8     |
| time/               |          |
|    total_timesteps  | 1140000  |
| train/              |          |
|    actor_loss       | 4.59     |
|    critic_loss      | 3.62     |
|    learning_rate    | 0.0003   |
|    n_updates        | 520001   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 75.66833 |
| rollout/            |          |
|    ep_len_mean      | 967      |
|    ep_rew_mean      | 75.7     |
| time/               |          |
|    episodes         | 1456     |
|    fps              | 564      |
|    time_elapsed     | 2020     |
|    total_timesteps  | 1140528  |
| train/              |          |
|    actor_loss       | 3.51     |
|    critic_loss      | 1.68     |
|    learning_rate    | 0.0003   |
|    n_updates        | 520089   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 76.81614 |
| rollout/            |          |
|    ep_len_mean      | 967      |
|    ep_rew_mean      | 76.8     |
| time/               |          |
|    episodes         | 1460     |
|    fps              | 564      |
|    time_elapsed     | 2023     |
|    total_timesteps  | 1142442  |
| train/              |          |
|    actor_loss       | 4.76     |
|    critic_loss      | 5.14     |
|    learning_rate    | 0.0003   |
|    n_updates        | 520408   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 77.68397 |
| rollout/            |          |
|    ep_len_mean      | 966      |
|    ep_rew_mean      | 77.7     |
| time/               |          |
|    episodes         | 1464     |
|    fps              | 564      |
|    time_elapsed     | 2031     |
|    total_timesteps  | 1146864  |
| train/              |          |
|    actor_loss       | 4.5      |
|    critic_loss      | 1.11     |
|    learning_rate    | 0.0003   |
|    n_updates        | 521145   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 81.58472 |
| rollout/            |          |
|    ep_len_mean      | 970      |
|    ep_rew_mean      | 81.6     |
| time/               |          |
|    episodes         | 1468     |
|    fps              | 564      |
|    time_elapsed     | 2040     |
|    total_timesteps  | 1152528  |
| train/              |          |
|    actor_loss       | 4.01     |
|    critic_loss      | 2.59     |
|    learning_rate    | 0.0003   |
|    n_updates        | 522089   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 81.0281  |
| rollout/            |          |
|    ep_len_mean      | 970      |
|    ep_rew_mean      | 81       |
| time/               |          |
|    episodes         | 1472     |
|    fps              | 564      |
|    time_elapsed     | 2042     |
|    total_timesteps  | 1153680  |
| train/              |          |
|    actor_loss       | 3.25     |
|    critic_loss      | 2.14     |
|    learning_rate    | 0.0003   |
|    n_updates        | 522281   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 80.95296 |
| rollout/            |          |
|    ep_len_mean      | 963      |
|    ep_rew_mean      | 81       |
| time/               |          |
|    episodes         | 1476     |
|    fps              | 564      |
|    time_elapsed     | 2051     |
|    total_timesteps  | 1158582  |
| train/              |          |
|    actor_loss       | 4.35     |
|    critic_loss      | 2.45     |
|    learning_rate    | 0.0003   |
|    n_updates        | 523098   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 77.29852 |
| rollout/            |          |
|    ep_len_mean      | 963      |
|    ep_rew_mean      | 77.3     |
| time/               |          |
|    episodes         | 1480     |
|    fps              | 564      |
|    time_elapsed     | 2057     |
|    total_timesteps  | 1161900  |
| train/              |          |
|    actor_loss       | 3.77     |
|    critic_loss      | 2.97     |
|    learning_rate    | 0.0003   |
|    n_updates        | 523651   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 78.69551 |
| rollout/            |          |
|    ep_len_mean      | 963      |
|    ep_rew_mean      | 78.7     |
| time/               |          |
|    episodes         | 1484     |
|    fps              | 564      |
|    time_elapsed     | 2062     |
|    total_timesteps  | 1164990  |
| train/              |          |
|    actor_loss       | 6.16     |
|    critic_loss      | 4.51     |
|    learning_rate    | 0.0003   |
|    n_updates        | 524166   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 77.82492 |
| eval/               |          |
|    mean_ep_length   | 964      |
|    mean_reward      | 117      |
| time/               |          |
|    total_timesteps  | 1170000  |
| train/              |          |
|    actor_loss       | 4.49     |
|    critic_loss      | 3.81     |
|    learning_rate    | 0.0003   |
|    n_updates        | 525001   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 79.64969 |
| rollout/            |          |
|    ep_len_mean      | 965      |
|    ep_rew_mean      | 79.6     |
| time/               |          |
|    episodes         | 1488     |
|    fps              | 563      |
|    time_elapsed     | 2076     |
|    total_timesteps  | 1170582  |
| train/              |          |
|    actor_loss       | 3.96     |
|    critic_loss      | 2.53     |
|    learning_rate    | 0.0003   |
|    n_updates        | 525098   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 79.638   |
| rollout/            |          |
|    ep_len_mean      | 964      |
|    ep_rew_mean      | 79.6     |
| time/               |          |
|    episodes         | 1492     |
|    fps              | 563      |
|    time_elapsed     | 2081     |
|    total_timesteps  | 1173612  |
| train/              |          |
|    actor_loss       | 3.75     |
|    critic_loss      | 2.12     |
|    learning_rate    | 0.0003   |
|    n_updates        | 525603   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 81.21696 |
| rollout/            |          |
|    ep_len_mean      | 963      |
|    ep_rew_mean      | 81.2     |
| time/               |          |
|    episodes         | 1496     |
|    fps              | 563      |
|    time_elapsed     | 2087     |
|    total_timesteps  | 1176864  |
| train/              |          |
|    actor_loss       | 5.37     |
|    critic_loss      | 1.44     |
|    learning_rate    | 0.0003   |
|    n_updates        | 526145   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 81.77634 |
| rollout/            |          |
|    ep_len_mean      | 963      |
|    ep_rew_mean      | 81.8     |
| time/               |          |
|    episodes         | 1500     |
|    fps              | 563      |
|    time_elapsed     | 2092     |
|    total_timesteps  | 1180068  |
| train/              |          |
|    actor_loss       | 3.63     |
|    critic_loss      | 2.25     |
|    learning_rate    | 0.0003   |
|    n_updates        | 526679   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 76.78466 |
| rollout/            |          |
|    ep_len_mean      | 948      |
|    ep_rew_mean      | 76.8     |
| time/               |          |
|    episodes         | 1504     |
|    fps              | 563      |
|    time_elapsed     | 2097     |
|    total_timesteps  | 1182912  |
| train/              |          |
|    actor_loss       | 4.19     |
|    critic_loss      | 2.78     |
|    learning_rate    | 0.0003   |
|    n_updates        | 527153   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 78.66899 |
| rollout/            |          |
|    ep_len_mean      | 950      |
|    ep_rew_mean      | 78.7     |
| time/               |          |
|    episodes         | 1508     |
|    fps              | 564      |
|    time_elapsed     | 2106     |
|    total_timesteps  | 1188330  |
| train/              |          |
|    actor_loss       | 5.34     |
|    critic_loss      | 2.7      |
|    learning_rate    | 0.0003   |
|    n_updates        | 528056   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 78.52338 |
| rollout/            |          |
|    ep_len_mean      | 950      |
|    ep_rew_mean      | 78.5     |
| time/               |          |
|    episodes         | 1512     |
|    fps              | 564      |
|    time_elapsed     | 2112     |
|    total_timesteps  | 1191474  |
| train/              |          |
|    actor_loss       | 4.1      |
|    critic_loss      | 1.3      |
|    learning_rate    | 0.0003   |
|    n_updates        | 528580   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 80.413826 |
| rollout/            |           |
|    ep_len_mean      | 946       |
|    ep_rew_mean      | 80.4      |
| time/               |           |
|    episodes         | 1516      |
|    fps              | 564       |
|    time_elapsed     | 2117      |
|    total_timesteps  | 1194582   |
| train/              |           |
|    actor_loss       | 4.46      |
|    critic_loss      | 1.08      |
|    learning_rate    | 0.0003    |
|    n_updates        | 529098    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 77.58968 |
| rollout/            |          |
|    ep_len_mean      | 946      |
|    ep_rew_mean      | 77.6     |
| time/               |          |
|    episodes         | 1520     |
|    fps              | 564      |
|    time_elapsed     | 2124     |
|    total_timesteps  | 1198698  |
| train/              |          |
|    actor_loss       | 4.24     |
|    critic_loss      | 3.49     |
|    learning_rate    | 0.0003   |
|    n_updates        | 529784   |
----------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | 120      |
| time/              |          |
|    total_timesteps | 1200000  |
| train/             |          |
|    actor_loss      | 4.54     |
|    critic_loss     | 1.63     |
|    learning_rate   | 0.0003   |
|    n_updates       | 530001   |
---------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 81.70752 |
| rollout/            |          |
|    ep_len_mean      | 949      |
|    ep_rew_mean      | 81.7     |
| time/               |          |
|    episodes         | 1524     |
|    fps              | 563      |
|    time_elapsed     | 2136     |
|    total_timesteps  | 1203264  |
| train/              |          |
|    actor_loss       | 3.51     |
|    critic_loss      | 1.31     |
|    learning_rate    | 0.0003   |
|    n_updates        | 530545   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 82.18052 |
| rollout/            |          |
|    ep_len_mean      | 949      |
|    ep_rew_mean      | 82.2     |
| time/               |          |
|    episodes         | 1528     |
|    fps              | 563      |
|    time_elapsed     | 2142     |
|    total_timesteps  | 1206582  |
| train/              |          |
|    actor_loss       | 7.05     |
|    critic_loss      | 2.69     |
|    learning_rate    | 0.0003   |
|    n_updates        | 531098   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 83.41357 |
| rollout/            |          |
|    ep_len_mean      | 949      |
|    ep_rew_mean      | 83.4     |
| time/               |          |
|    episodes         | 1532     |
|    fps              | 563      |
|    time_elapsed     | 2149     |
|    total_timesteps  | 1210494  |
| train/              |          |
|    actor_loss       | 3.92     |
|    critic_loss      | 1.66     |
|    learning_rate    | 0.0003   |
|    n_updates        | 531750   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 84.01399 |
| rollout/            |          |
|    ep_len_mean      | 947      |
|    ep_rew_mean      | 84       |
| time/               |          |
|    episodes         | 1536     |
|    fps              | 563      |
|    time_elapsed     | 2153     |
|    total_timesteps  | 1212912  |
| train/              |          |
|    actor_loss       | 4.71     |
|    critic_loss      | 7.77     |
|    learning_rate    | 0.0003   |
|    n_updates        | 532153   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 83.010475 |
| rollout/            |           |
|    ep_len_mean      | 947       |
|    ep_rew_mean      | 83        |
| time/               |           |
|    episodes         | 1540      |
|    fps              | 563       |
|    time_elapsed     | 2161      |
|    total_timesteps  | 1217160   |
| train/              |           |
|    actor_loss       | 3.14      |
|    critic_loss      | 5.32      |
|    learning_rate    | 0.0003    |
|    n_updates        | 532861    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 85.67139 |
| rollout/            |          |
|    ep_len_mean      | 947      |
|    ep_rew_mean      | 85.7     |
| time/               |          |
|    episodes         | 1544     |
|    fps              | 563      |
|    time_elapsed     | 2170     |
|    total_timesteps  | 1222494  |
| train/              |          |
|    actor_loss       | 3.79     |
|    critic_loss      | 2.16     |
|    learning_rate    | 0.0003   |
|    n_updates        | 533750   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 79.29475 |
| rollout/            |          |
|    ep_len_mean      | 936      |
|    ep_rew_mean      | 79.3     |
| time/               |          |
|    episodes         | 1548     |
|    fps              | 563      |
|    time_elapsed     | 2174     |
|    total_timesteps  | 1224642  |
| train/              |          |
|    actor_loss       | 3.86     |
|    critic_loss      | 1.5      |
|    learning_rate    | 0.0003   |
|    n_updates        | 534108   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 81.208046 |
| rollout/            |           |
|    ep_len_mean      | 936       |
|    ep_rew_mean      | 81.2      |
| time/               |           |
|    episodes         | 1552      |
|    fps              | 563       |
|    time_elapsed     | 2181      |
|    total_timesteps  | 1228698   |
| train/              |           |
|    actor_loss       | 4.27      |
|    critic_loss      | 2.45      |
|    learning_rate    | 0.0003    |
|    n_updates        | 534784    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 82.625   |
| eval/               |          |
|    mean_ep_length   | 1e+03    |
|    mean_reward      | 73.7     |
| time/               |          |
|    total_timesteps  | 1230000  |
| train/              |          |
|    actor_loss       | 4.87     |
|    critic_loss      | 2.15     |
|    learning_rate    | 0.0003   |
|    n_updates        | 535001   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 80.74592 |
| rollout/            |          |
|    ep_len_mean      | 936      |
|    ep_rew_mean      | 80.7     |
| time/               |          |
|    episodes         | 1556     |
|    fps              | 561      |
|    time_elapsed     | 2195     |
|    total_timesteps  | 1233474  |
| train/              |          |
|    actor_loss       | 3.99     |
|    critic_loss      | 2.37     |
|    learning_rate    | 0.0003   |
|    n_updates        | 535580   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 83.39238 |
| rollout/            |          |
|    ep_len_mean      | 936      |
|    ep_rew_mean      | 83.4     |
| time/               |          |
|    episodes         | 1560     |
|    fps              | 561      |
|    time_elapsed     | 2200     |
|    total_timesteps  | 1236642  |
| train/              |          |
|    actor_loss       | 4.6      |
|    critic_loss      | 2.85     |
|    learning_rate    | 0.0003   |
|    n_updates        | 536108   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 81.14786 |
| rollout/            |          |
|    ep_len_mean      | 933      |
|    ep_rew_mean      | 81.1     |
| time/               |          |
|    episodes         | 1564     |
|    fps              | 561      |
|    time_elapsed     | 2207     |
|    total_timesteps  | 1240494  |
| train/              |          |
|    actor_loss       | 3.21     |
|    critic_loss      | 2.34     |
|    learning_rate    | 0.0003   |
|    n_updates        | 536750   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 82.078705 |
| rollout/            |           |
|    ep_len_mean      | 933       |
|    ep_rew_mean      | 82.1      |
| time/               |           |
|    episodes         | 1568      |
|    fps              | 561       |
|    time_elapsed     | 2215      |
|    total_timesteps  | 1245054   |
| train/              |           |
|    actor_loss       | 5.67      |
|    critic_loss      | 3.41      |
|    learning_rate    | 0.0003    |
|    n_updates        | 537510    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 80.49848 |
| rollout/            |          |
|    ep_len_mean      | 933      |
|    ep_rew_mean      | 80.5     |
| time/               |          |
|    episodes         | 1572     |
|    fps              | 561      |
|    time_elapsed     | 2218     |
|    total_timesteps  | 1246812  |
| train/              |          |
|    actor_loss       | 3.94     |
|    critic_loss      | 2.1      |
|    learning_rate    | 0.0003   |
|    n_updates        | 537803   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 80.84411 |
| rollout/            |          |
|    ep_len_mean      | 929      |
|    ep_rew_mean      | 80.8     |
| time/               |          |
|    episodes         | 1576     |
|    fps              | 561      |
|    time_elapsed     | 2226     |
|    total_timesteps  | 1251054  |
| train/              |          |
|    actor_loss       | 2.78     |
|    critic_loss      | 1.82     |
|    learning_rate    | 0.0003   |
|    n_updates        | 538510   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 82.95925 |
| rollout/            |          |
|    ep_len_mean      | 929      |
|    ep_rew_mean      | 83       |
| time/               |          |
|    episodes         | 1580     |
|    fps              | 561      |
|    time_elapsed     | 2235     |
|    total_timesteps  | 1255968  |
| train/              |          |
|    actor_loss       | 5.11     |
|    critic_loss      | 3.78     |
|    learning_rate    | 0.0003   |
|    n_updates        | 539329   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 79.11566 |
| rollout/            |          |
|    ep_len_mean      | 927      |
|    ep_rew_mean      | 79.1     |
| time/               |          |
|    episodes         | 1584     |
|    fps              | 561      |
|    time_elapsed     | 2239     |
|    total_timesteps  | 1258698  |
| train/              |          |
|    actor_loss       | 2.57     |
|    critic_loss      | 1.15     |
|    learning_rate    | 0.0003   |
|    n_updates        | 539784   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 81.2699  |
| eval/               |          |
|    mean_ep_length   | 1e+03    |
|    mean_reward      | 94.6     |
| time/               |          |
|    total_timesteps  | 1260000  |
| train/              |          |
|    actor_loss       | 3.28     |
|    critic_loss      | 3.95     |
|    learning_rate    | 0.0003   |
|    n_updates        | 540001   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 81.2875  |
| rollout/            |          |
|    ep_len_mean      | 929      |
|    ep_rew_mean      | 81.3     |
| time/               |          |
|    episodes         | 1588     |
|    fps              | 560      |
|    time_elapsed     | 2252     |
|    total_timesteps  | 1262844  |
| train/              |          |
|    actor_loss       | 4        |
|    critic_loss      | 1.08     |
|    learning_rate    | 0.0003   |
|    n_updates        | 540475   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 84.34251 |
| rollout/            |          |
|    ep_len_mean      | 933      |
|    ep_rew_mean      | 84.3     |
| time/               |          |
|    episodes         | 1592     |
|    fps              | 560      |
|    time_elapsed     | 2260     |
|    total_timesteps  | 1267968  |
| train/              |          |
|    actor_loss       | 3.04     |
|    critic_loss      | 3.15     |
|    learning_rate    | 0.0003   |
|    n_updates        | 541329   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 83.338936 |
| rollout/            |           |
|    ep_len_mean      | 939       |
|    ep_rew_mean      | 83.3      |
| time/               |           |
|    episodes         | 1596      |
|    fps              | 560       |
|    time_elapsed     | 2265      |
|    total_timesteps  | 1270698   |
| train/              |           |
|    actor_loss       | 4.62      |
|    critic_loss      | 3.84      |
|    learning_rate    | 0.0003    |
|    n_updates        | 541784    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 84.589035 |
| rollout/            |           |
|    ep_len_mean      | 939       |
|    ep_rew_mean      | 84.6      |
| time/               |           |
|    episodes         | 1600      |
|    fps              | 560       |
|    time_elapsed     | 2272      |
|    total_timesteps  | 1274844   |
| train/              |           |
|    actor_loss       | 3.06      |
|    critic_loss      | 1.59      |
|    learning_rate    | 0.0003    |
|    n_updates        | 542475    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 89.95281 |
| rollout/            |          |
|    ep_len_mean      | 955      |
|    ep_rew_mean      | 90       |
| time/               |          |
|    episodes         | 1604     |
|    fps              | 561      |
|    time_elapsed     | 2281     |
|    total_timesteps  | 1279968  |
| train/              |          |
|    actor_loss       | 3.02     |
|    critic_loss      | 3.67     |
|    learning_rate    | 0.0003   |
|    n_updates        | 543329   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 89.32156 |
| rollout/            |          |
|    ep_len_mean      | 956      |
|    ep_rew_mean      | 89.3     |
| time/               |          |
|    episodes         | 1608     |
|    fps              | 561      |
|    time_elapsed     | 2285     |
|    total_timesteps  | 1282698  |
| train/              |          |
|    actor_loss       | 3.41     |
|    critic_loss      | 1.22     |
|    learning_rate    | 0.0003   |
|    n_updates        | 543784   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 87.86345 |
| rollout/            |          |
|    ep_len_mean      | 956      |
|    ep_rew_mean      | 87.9     |
| time/               |          |
|    episodes         | 1612     |
|    fps              | 561      |
|    time_elapsed     | 2292     |
|    total_timesteps  | 1286844  |
| train/              |          |
|    actor_loss       | 3.43     |
|    critic_loss      | 2.23     |
|    learning_rate    | 0.0003   |
|    n_updates        | 544475   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 88.30803 |
| eval/               |          |
|    mean_ep_length   | 852      |
|    mean_reward      | 17.1     |
| time/               |          |
|    total_timesteps  | 1290000  |
| train/              |          |
|    actor_loss       | 3.09     |
|    critic_loss      | 2.85     |
|    learning_rate    | 0.0003   |
|    n_updates        | 545001   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 87.507324 |
| rollout/            |           |
|    ep_len_mean      | 960       |
|    ep_rew_mean      | 87.5      |
| time/               |           |
|    episodes         | 1616      |
|    fps              | 560       |
|    time_elapsed     | 2305      |
|    total_timesteps  | 1291968   |
| train/              |           |
|    actor_loss       | 3.94      |
|    critic_loss      | 1.24      |
|    learning_rate    | 0.0003    |
|    n_updates        | 545329    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 88.75151 |
| rollout/            |          |
|    ep_len_mean      | 960      |
|    ep_rew_mean      | 88.8     |
| time/               |          |
|    episodes         | 1620     |
|    fps              | 560      |
|    time_elapsed     | 2310     |
|    total_timesteps  | 1294698  |
| train/              |          |
|    actor_loss       | 3.26     |
|    critic_loss      | 1.55     |
|    learning_rate    | 0.0003   |
|    n_updates        | 545784   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 87.68008 |
| rollout/            |          |
|    ep_len_mean      | 956      |
|    ep_rew_mean      | 87.7     |
| time/               |          |
|    episodes         | 1624     |
|    fps              | 560      |
|    time_elapsed     | 2317     |
|    total_timesteps  | 1298844  |
| train/              |          |
|    actor_loss       | 4.89     |
|    critic_loss      | 4.05     |
|    learning_rate    | 0.0003   |
|    n_updates        | 546475   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 86.81138 |
| rollout/            |          |
|    ep_len_mean      | 956      |
|    ep_rew_mean      | 86.8     |
| time/               |          |
|    episodes         | 1628     |
|    fps              | 560      |
|    time_elapsed     | 2322     |
|    total_timesteps  | 1301760  |
| train/              |          |
|    actor_loss       | 3.27     |
|    critic_loss      | 1.35     |
|    learning_rate    | 0.0003   |
|    n_updates        | 546961   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 85.859795 |
| rollout/            |           |
|    ep_len_mean      | 959       |
|    ep_rew_mean      | 85.9      |
| time/               |           |
|    episodes         | 1632      |
|    fps              | 560       |
|    time_elapsed     | 2330      |
|    total_timesteps  | 1306698   |
| train/              |           |
|    actor_loss       | 2.63      |
|    critic_loss      | 2.4       |
|    learning_rate    | 0.0003    |
|    n_updates        | 547784    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 87.35474 |
| rollout/            |          |
|    ep_len_mean      | 961      |
|    ep_rew_mean      | 87.4     |
| time/               |          |
|    episodes         | 1636     |
|    fps              | 560      |
|    time_elapsed     | 2336     |
|    total_timesteps  | 1309968  |
| train/              |          |
|    actor_loss       | 2.24     |
|    critic_loss      | 3.71     |
|    learning_rate    | 0.0003   |
|    n_updates        | 548329   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 89.98204 |
| rollout/            |          |
|    ep_len_mean      | 961      |
|    ep_rew_mean      | 90       |
| time/               |          |
|    episodes         | 1640     |
|    fps              | 560      |
|    time_elapsed     | 2342     |
|    total_timesteps  | 1313760  |
| train/              |          |
|    actor_loss       | 3.29     |
|    critic_loss      | 3.53     |
|    learning_rate    | 0.0003   |
|    n_updates        | 548961   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 84.74155 |
| rollout/            |          |
|    ep_len_mean      | 953      |
|    ep_rew_mean      | 84.7     |
| time/               |          |
|    episodes         | 1644     |
|    fps              | 560      |
|    time_elapsed     | 2348     |
|    total_timesteps  | 1317300  |
| train/              |          |
|    actor_loss       | 2.56     |
|    critic_loss      | 4.9      |
|    learning_rate    | 0.0003   |
|    n_updates        | 549551   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 88.28489 |
| rollout/            |          |
|    ep_len_mean      | 964      |
|    ep_rew_mean      | 88.3     |
| time/               |          |
|    episodes         | 1648     |
|    fps              | 560      |
|    time_elapsed     | 2353     |
|    total_timesteps  | 1319778  |
| train/              |          |
|    actor_loss       | 3.6      |
|    critic_loss      | 1.95     |
|    learning_rate    | 0.0003   |
|    n_updates        | 549964   |
----------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 983      |
|    mean_reward     | 60       |
| time/              |          |
|    total_timesteps | 1320000  |
| train/             |          |
|    actor_loss      | 1.69     |
|    critic_loss     | 1.22     |
|    learning_rate   | 0.0003   |
|    n_updates       | 550001   |
---------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 86.96451 |
| rollout/            |          |
|    ep_len_mean      | 964      |
|    ep_rew_mean      | 87       |
| time/               |          |
|    episodes         | 1652     |
|    fps              | 559      |
|    time_elapsed     | 2366     |
|    total_timesteps  | 1324812  |
| train/              |          |
|    actor_loss       | 2.37     |
|    critic_loss      | 2.4      |
|    learning_rate    | 0.0003   |
|    n_updates        | 550803   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 90.0311  |
| rollout/            |          |
|    ep_len_mean      | 964      |
|    ep_rew_mean      | 90       |
| time/               |          |
|    episodes         | 1656     |
|    fps              | 559      |
|    time_elapsed     | 2374     |
|    total_timesteps  | 1329300  |
| train/              |          |
|    actor_loss       | 2.38     |
|    critic_loss      | 1.1      |
|    learning_rate    | 0.0003   |
|    n_updates        | 551551   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 89.494316 |
| rollout/            |           |
|    ep_len_mean      | 964       |
|    ep_rew_mean      | 89.5      |
| time/               |           |
|    episodes         | 1660      |
|    fps              | 559       |
|    time_elapsed     | 2378      |
|    total_timesteps  | 1331778   |
| train/              |           |
|    actor_loss       | 2.13      |
|    critic_loss      | 4.77      |
|    learning_rate    | 0.0003    |
|    n_updates        | 551964    |
-----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 95.0475  |
| rollout/            |          |
|    ep_len_mean      | 970      |
|    ep_rew_mean      | 95       |
| time/               |          |
|    episodes         | 1664     |
|    fps              | 560      |
|    time_elapsed     | 2386     |
|    total_timesteps  | 1336812  |
| train/              |          |
|    actor_loss       | 1.63     |
|    critic_loss      | 1.07     |
|    learning_rate    | 0.0003   |
|    n_updates        | 552803   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 92.43638 |
| rollout/            |          |
|    ep_len_mean      | 970      |
|    ep_rew_mean      | 92.4     |
| time/               |          |
|    episodes         | 1668     |
|    fps              | 560      |
|    time_elapsed     | 2394     |
|    total_timesteps  | 1341300  |
| train/              |          |
|    actor_loss       | 2.68     |
|    critic_loss      | 1.8      |
|    learning_rate    | 0.0003   |
|    n_updates        | 553551   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 96.70828 |
| rollout/            |          |
|    ep_len_mean      | 970      |
|    ep_rew_mean      | 96.7     |
| time/               |          |
|    episodes         | 1672     |
|    fps              | 560      |
|    time_elapsed     | 2398     |
|    total_timesteps  | 1343778  |
| train/              |          |
|    actor_loss       | 4.14     |
|    critic_loss      | 2.22     |
|    learning_rate    | 0.0003   |
|    n_updates        | 553964   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 98.15436 |
| rollout/            |          |
|    ep_len_mean      | 973      |
|    ep_rew_mean      | 98.2     |
| time/               |          |
|    episodes         | 1676     |
|    fps              | 560      |
|    time_elapsed     | 2407     |
|    total_timesteps  | 1348698  |
| train/              |          |
|    actor_loss       | 1.72     |
|    critic_loss      | 1.18     |
|    learning_rate    | 0.0003   |
|    n_updates        | 554784   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 98.98141 |
| eval/               |          |
|    mean_ep_length   | 1e+03    |
|    mean_reward      | 143      |
| time/               |          |
|    total_timesteps  | 1350000  |
| train/              |          |
|    actor_loss       | 2.18     |
|    critic_loss      | 3.63     |
|    learning_rate    | 0.0003   |
|    n_updates        | 555001   |
----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | 100.754196 |
| rollout/            |            |
|    ep_len_mean      | 973        |
|    ep_rew_mean      | 101        |
| time/               |            |
|    episodes         | 1680       |
|    fps              | 559        |
|    time_elapsed     | 2419       |
|    total_timesteps  | 1353054    |
| train/              |            |
|    actor_loss       | 2.7        |
|    critic_loss      | 1.79       |
|    learning_rate    | 0.0003     |
|    n_updates        | 555510     |
------------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 97.65607 |
| rollout/            |          |
|    ep_len_mean      | 975      |
|    ep_rew_mean      | 97.7     |
| time/               |          |
|    episodes         | 1684     |
|    fps              | 559      |
|    time_elapsed     | 2424     |
|    total_timesteps  | 1355760  |
| train/              |          |
|    actor_loss       | 1.82     |
|    critic_loss      | 1.7      |
|    learning_rate    | 0.0003   |
|    n_updates        | 555961   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 96.89014 |
| rollout/            |          |
|    ep_len_mean      | 975      |
|    ep_rew_mean      | 96.9     |
| time/               |          |
|    episodes         | 1688     |
|    fps              | 559      |
|    time_elapsed     | 2432     |
|    total_timesteps  | 1360698  |
| train/              |          |
|    actor_loss       | 2.34     |
|    critic_loss      | 1.54     |
|    learning_rate    | 0.0003   |
|    n_updates        | 556784   |
----------------------------------


----------------------------------
| custom/             |          |
|    avg_reward_100ep | 96.81796 |
| rollout/            |          |
|    ep_len_mean      | 975      |
|    ep_rew_mean      | 96.8     |
| time/               |          |
|    episodes         | 1692     |
|    fps              | 559      |
|    time_elapsed     | 2439     |
|    total_timesteps  | 1365054  |
| train/              |          |
|    actor_loss       | 2.26     |
|    critic_loss      | 1.51     |
|    learning_rate    | 0.0003   |
|    n_updates        | 557510   |
----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 100.21501 |
| rollout/            |           |
|    ep_len_mean      | 975       |
|    ep_rew_mean      | 100       |
| time/               |           |
|    episodes         | 1696      |
|    fps              | 559       |
|    time_elapsed     | 2444      |
|    total_timesteps  | 1367760   |
| train/              |           |
|    actor_loss       | 1.94      |
|    critic_loss      | 1.67      |
|    learning_rate    | 0.0003    |
|    n_updates        | 557961    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | 101.262245 |
| rollout/            |            |
|    ep_len_mean      | 973        |
|    ep_rew_mean      | 101        |
| time/               |            |
|    episodes         | 1700       |
|    fps              | 559        |
|    time_elapsed     | 2452       |
|    total_timesteps  | 1372698    |
| train/              |            |
|    actor_loss       | 1.94       |
|    critic_loss      | 1.46       |
|    learning_rate    | 0.0003     |
|    n_updates        | 558784     |
------------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | 101.361305 |
| rollout/            |            |
|    ep_len_mean      | 973        |
|    ep_rew_mean      | 101        |
| time/               |            |
|    episodes         | 1704       |
|    fps              | 559        |
|    time_elapsed     | 2458       |
|    total_timesteps  | 1376100    |
| train/              |            |
|    actor_loss       | 2.05       |
|    critic_loss      | 2.78       |
|    learning_rate    | 0.0003     |
|    n_updates        | 559351     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 102.44507 |
| rollout/            |           |
|    ep_len_mean      | 973       |
|    ep_rew_mean      | 102       |
| time/               |           |
|    episodes         | 1708      |
|    fps              | 559       |
|    time_elapsed     | 2465      |
|    total_timesteps  | 1379760   |
| train/              |           |
|    actor_loss       | 2.28      |
|    critic_loss      | 1.2       |
|    learning_rate    | 0.0003    |
|    n_updates        | 559961    |
-----------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 1e+03    |
|    mean_reward     | 86.1     |
| time/              |          |
|    total_timesteps | 1380000  |
| train/             |          |
|    actor_loss      | 2.87     |
|    critic_loss     | 1.53     |
|    learning_rate   | 0.0003   |
|    n_updates       | 560001   |
---------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 100.12738 |
| rollout/            |           |
|    ep_len_mean      | 962       |
|    ep_rew_mean      | 100       |
| time/               |           |
|    episodes         | 1712      |
|    fps              | 558       |
|    time_elapsed     | 2477      |
|    total_timesteps  | 1383468   |
| train/              |           |
|    actor_loss       | 1.89      |
|    critic_loss      | 7.39      |
|    learning_rate    | 0.0003    |
|    n_updates        | 560579    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 102.17136 |
| rollout/            |           |
|    ep_len_mean      | 962       |
|    ep_rew_mean      | 102       |
| time/               |           |
|    episodes         | 1716      |
|    fps              | 558       |
|    time_elapsed     | 2483      |
|    total_timesteps  | 1386912   |
| train/              |           |
|    actor_loss       | 1.96      |
|    critic_loss      | 2.04      |
|    learning_rate    | 0.0003    |
|    n_updates        | 561153    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 104.93859 |
| rollout/            |           |
|    ep_len_mean      | 962       |
|    ep_rew_mean      | 105       |
| time/               |           |
|    episodes         | 1720      |
|    fps              | 558       |
|    time_elapsed     | 2490      |
|    total_timesteps  | 1390812   |
| train/              |           |
|    actor_loss       | 1.55      |
|    critic_loss      | 1.23      |
|    learning_rate    | 0.0003    |
|    n_updates        | 561803    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | 106.808495 |
| rollout/            |            |
|    ep_len_mean      | 960        |
|    ep_rew_mean      | 107        |
| time/               |            |
|    episodes         | 1724       |
|    fps              | 558        |
|    time_elapsed     | 2494       |
|    total_timesteps  | 1393032    |
| train/              |            |
|    actor_loss       | 2.82       |
|    critic_loss      | 1.32       |
|    learning_rate    | 0.0003     |
|    n_updates        | 562173     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 106.94876 |
| rollout/            |           |
|    ep_len_mean      | 956       |
|    ep_rew_mean      | 107       |
| time/               |           |
|    episodes         | 1728      |
|    fps              | 558       |
|    time_elapsed     | 2503      |
|    total_timesteps  | 1397760   |
| train/              |           |
|    actor_loss       | 1.07      |
|    critic_loss      | 3.02      |
|    learning_rate    | 0.0003    |
|    n_updates        | 562961    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 108.27085 |
| rollout/            |           |
|    ep_len_mean      | 960       |
|    ep_rew_mean      | 108       |
| time/               |           |
|    episodes         | 1732      |
|    fps              | 558       |
|    time_elapsed     | 2512      |
|    total_timesteps  | 1402698   |
| train/              |           |
|    actor_loss       | 0.472     |
|    critic_loss      | 1.8       |
|    learning_rate    | 0.0003    |
|    n_updates        | 563784    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 108.50542 |
| rollout/            |           |
|    ep_len_mean      | 960       |
|    ep_rew_mean      | 109       |
| time/               |           |
|    episodes         | 1736      |
|    fps              | 558       |
|    time_elapsed     | 2516      |
|    total_timesteps  | 1405032   |
| train/              |           |
|    actor_loss       | 0.741     |
|    critic_loss      | 1.59      |
|    learning_rate    | 0.0003    |
|    n_updates        | 564173    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 111.32651 |
| rollout/            |           |
|    ep_len_mean      | 960       |
|    ep_rew_mean      | 111       |
| time/               |           |
|    episodes         | 1740      |
|    fps              | 558       |
|    time_elapsed     | 2525      |
|    total_timesteps  | 1409760   |
| train/              |           |
|    actor_loss       | 1.49      |
|    critic_loss      | 1.47      |
|    learning_rate    | 0.0003    |
|    n_updates        | 564961    |
-----------------------------------


---------------------------------
| eval/              |          |
|    mean_ep_length  | 866      |
|    mean_reward     | 77       |
| time/              |          |
|    total_timesteps | 1410000  |
| train/             |          |
|    actor_loss      | 1.21     |
|    critic_loss     | 2.25     |
|    learning_rate   | 0.0003   |
|    n_updates       | 565001   |
---------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 116.10888 |
| rollout/            |           |
|    ep_len_mean      | 964       |
|    ep_rew_mean      | 116       |
| time/               |           |
|    episodes         | 1744      |
|    fps              | 557       |
|    time_elapsed     | 2537      |
|    total_timesteps  | 1414482   |
| train/              |           |
|    actor_loss       | 1.32      |
|    critic_loss      | 1.35      |
|    learning_rate    | 0.0003    |
|    n_updates        | 565748    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 120.05374 |
| rollout/            |           |
|    ep_len_mean      | 964       |
|    ep_rew_mean      | 120       |
| time/               |           |
|    episodes         | 1748      |
|    fps              | 557       |
|    time_elapsed     | 2542      |
|    total_timesteps  | 1417032   |
| train/              |           |
|    actor_loss       | 0.901     |
|    critic_loss      | 2.21      |
|    learning_rate    | 0.0003    |
|    n_updates        | 566173    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 120.45275 |
| rollout/            |           |
|    ep_len_mean      | 964       |
|    ep_rew_mean      | 120       |
| time/               |           |
|    episodes         | 1752      |
|    fps              | 557       |
|    time_elapsed     | 2548      |
|    total_timesteps  | 1420812   |
| train/              |           |
|    actor_loss       | 1.18      |
|    critic_loss      | 1.45      |
|    learning_rate    | 0.0003    |
|    n_updates        | 566803    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 119.63678 |
| rollout/            |           |
|    ep_len_mean      | 964       |
|    ep_rew_mean      | 120       |
| time/               |           |
|    episodes         | 1756      |
|    fps              | 557       |
|    time_elapsed     | 2558      |
|    total_timesteps  | 1426482   |
| train/              |           |
|    actor_loss       | 1.84      |
|    critic_loss      | 1.55      |
|    learning_rate    | 0.0003    |
|    n_updates        | 567748    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | 121.203125 |
| rollout/            |            |
|    ep_len_mean      | 964        |
|    ep_rew_mean      | 121        |
| time/               |            |
|    episodes         | 1760       |
|    fps              | 557        |
|    time_elapsed     | 2563       |
|    total_timesteps  | 1429032    |
| train/              |            |
|    actor_loss       | 1.09       |
|    critic_loss      | 1.65       |
|    learning_rate    | 0.0003     |
|    n_updates        | 568173     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 120.25022 |
| rollout/            |           |
|    ep_len_mean      | 964       |
|    ep_rew_mean      | 120       |
| time/               |           |
|    episodes         | 1764      |
|    fps              | 557       |
|    time_elapsed     | 2570      |
|    total_timesteps  | 1432812   |
| train/              |           |
|    actor_loss       | -0.0758   |
|    critic_loss      | 2.33      |
|    learning_rate    | 0.0003    |
|    n_updates        | 568803    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 121.58003 |
| rollout/            |           |
|    ep_len_mean      | 961       |
|    ep_rew_mean      | 122       |
| time/               |           |
|    episodes         | 1768      |
|    fps              | 557       |
|    time_elapsed     | 2578      |
|    total_timesteps  | 1437300   |
| train/              |           |
|    actor_loss       | 0.317     |
|    critic_loss      | 3.64      |
|    learning_rate    | 0.0003    |
|    n_updates        | 569551    |
-----------------------------------


------------------------------------
| custom/             |            |
|    avg_reward_100ep | 120.843414 |
| eval/               |            |
|    mean_ep_length   | 798        |
|    mean_reward      | 22.3       |
| time/               |            |
|    total_timesteps  | 1440000    |
| train/              |            |
|    actor_loss       | 1.56       |
|    critic_loss      | 1.7        |
|    learning_rate    | 0.0003     |
|    n_updates        | 570001     |
------------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 118.54398 |
| rollout/            |           |
|    ep_len_mean      | 960       |
|    ep_rew_mean      | 119       |
| time/               |           |
|    episodes         | 1772      |
|    fps              | 556       |
|    time_elapsed     | 2587      |
|    total_timesteps  | 1440564   |
| train/              |           |
|    actor_loss       | 0.709     |
|    critic_loss      | 3.1       |
|    learning_rate    | 0.0003    |
|    n_updates        | 570095    |
-----------------------------------


-----------------------------------
| custom/             |           |
|    avg_reward_100ep | 118.47898 |
| rollout/            |           |
|    ep_len_mean      | 968       |
|    ep_rew_mean      | 118       |
| time/               |           |
|    episodes         | 1776      |
|    fps              | 556       |
|    time_elapsed     | 2595      |
|    total_timesteps  | 1444698   |
| train/              |           |
|    actor_loss       | -0.0979   |
|    critic_loss      | 3.5       |
|    learning_rate    | 0.0003    |
|    n_updates        | 570784    |
-----------------------------------


FileNotFoundError: [Errno 2] No such file or directory: b'./log/TD3_BipedalWalker-v3\\TD3_6\\events.out.tfevents.1746464115.DESKTOP-V8JHN9B.18696.0'

: 

In [1]:
model.save(model_.__name__ + "_" + benchmark + "_hardcore")

NameError: name 'model' is not defined