In [7]:
import gymnasium as gym
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import CheckpointCallback, CallbackList
from stable_baselines3.common.logger import configure
from tqdm import trange

def make_env():
    env = gym.make("CarRacing-v3", render_mode="rgb_array", continuous=True)
    env = Monitor(env)  
    return env

env = DummyVecEnv([make_env])
env = VecTransposeImage(env)
log_dir = "./sac_logs/"
new_logger = configure(log_dir, ["stdout", "tensorboard"])

checkpoint_callback = CheckpointCallback(
    save_freq=100_000,
    save_path="./checkpoints/",
    name_prefix="sac_carracing"
)

model = SAC(
    policy="CnnPolicy",
    env=env,
    verbose=0,
    buffer_size= 80_000,
    learning_rate=3e-4,
    batch_size=64,
    train_freq=1,
    gradient_steps=1,
    ent_coef= 0,
    tensorboard_log=log_dir,
    device="cuda"
)

model.set_logger(new_logger)
total_timesteps = 1_000_000
steps_per_iter = 10_000  
print("Starting training with tqdm and TensorBoard logging...")

for _ in trange(total_timesteps // steps_per_iter, desc="Training Progress"):
    model.learn(total_timesteps=steps_per_iter, reset_num_timesteps=False, callback=checkpoint_callback)

model.save("sac_carracing_sb3")
print("Training complete. Model saved as 'sac_carracing_sb3'.")


Logging to ./sac_logs/
Starting training with tqdm and TensorBoard logging...


Training Progress:   0%|                                                                       | 0/100 [00:00<?, ?it/s]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -90.9    |
| time/              |          |
|    episodes        | 4        |
|    fps             | 28       |
|    time_elapsed    | 141      |
|    total_timesteps | 4000     |
| train/             |          |
|    actor_loss      | 1.67     |
|    critic_loss     | 6.65e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 3899     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -92      |
| time/              |          |
|    episodes        | 8        |
|    fps             | 28       |
|    time_elapsed    | 284      |
|    total_timesteps | 8000     |
| train/             |          |
|    actor_loss      | 3.17     |
|    critic_loss     | 4.11e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:   1%|▌                                                           | 1/100 [05:54<9:45:00, 354.55s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -92.4    |
| time/              |          |
|    episodes        | 12       |
|    fps             | 28       |
|    time_elapsed    | 70       |
|    total_timesteps | 12000    |
| train/             |          |
|    actor_loss      | 4.37     |
|    critic_loss     | 0.000273 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 11899    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -92.6    |
| time/              |          |
|    episodes        | 16       |
|    fps             | 28       |
|    time_elapsed    | 210      |
|    total_timesteps | 16000    |
| train/             |          |
|    actor_loss      | 4.74     |
|    critic_loss     | 0.000689 |
|    ent_coef        | 0        |
|    learning_

Training Progress:   2%|█▏                                                          | 2/100 [11:45<9:35:54, 352.59s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -78.3    |
| time/              |          |
|    episodes        | 24       |
|    fps             | 28       |
|    time_elapsed    | 142      |
|    total_timesteps | 24000    |
| train/             |          |
|    actor_loss      | -0.565   |
|    critic_loss     | 0.00475  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 23899    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -75.5    |
| time/              |          |
|    episodes        | 28       |
|    fps             | 27       |
|    time_elapsed    | 288      |
|    total_timesteps | 28000    |
| train/             |          |
|    actor_loss      | -1.4     |
|    critic_loss     | 0.0411   |
|    ent_coef        | 0        |
|    learning_

Training Progress:   3%|█▊                                                          | 3/100 [17:48<9:37:31, 357.23s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -76.1    |
| time/              |          |
|    episodes        | 32       |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 32000    |
| train/             |          |
|    actor_loss      | -1.28    |
|    critic_loss     | 0.0221   |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 31899    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -76.9    |
| time/              |          |
|    episodes        | 36       |
|    fps             | 27       |
|    time_elapsed    | 216      |
|    total_timesteps | 36000    |
| train/             |          |
|    actor_loss      | -0.587   |
|    critic_loss     | 0.0219   |
|    ent_coef        | 0        |
|    learning_

Training Progress:   4%|██▍                                                         | 4/100 [23:49<9:33:49, 358.64s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -78.1    |
| time/              |          |
|    episodes        | 44       |
|    fps             | 28       |
|    time_elapsed    | 142      |
|    total_timesteps | 44000    |
| train/             |          |
|    actor_loss      | 3.18     |
|    critic_loss     | 0.184    |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 43899    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -78.7    |
| time/              |          |
|    episodes        | 48       |
|    fps             | 27       |
|    time_elapsed    | 287      |
|    total_timesteps | 48000    |
| train/             |          |
|    actor_loss      | 5.78     |
|    critic_loss     | 0.0597   |
|    ent_coef        | 0        |
|    learning_

Training Progress:   5%|███                                                         | 5/100 [29:49<9:28:54, 359.31s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -79.1    |
| time/              |          |
|    episodes        | 52       |
|    fps             | 28       |
|    time_elapsed    | 71       |
|    total_timesteps | 52000    |
| train/             |          |
|    actor_loss      | 4.54     |
|    critic_loss     | 0.169    |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 51899    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -79.4    |
| time/              |          |
|    episodes        | 56       |
|    fps             | 27       |
|    time_elapsed    | 216      |
|    total_timesteps | 56000    |
| train/             |          |
|    actor_loss      | 6.72     |
|    critic_loss     | 0.0375   |
|    ent_coef        | 0        |
|    learning_

Training Progress:   6%|███▌                                                        | 6/100 [35:49<9:23:18, 359.56s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -80      |
| time/              |          |
|    episodes        | 64       |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 64000    |
| train/             |          |
|    actor_loss      | 8.73     |
|    critic_loss     | 0.0396   |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 63899    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -80.2    |
| time/              |          |
|    episodes        | 68       |
|    fps             | 27       |
|    time_elapsed    | 286      |
|    total_timesteps | 68000    |
| train/             |          |
|    actor_loss      | 7.92     |
|    critic_loss     | 0.0273   |
|    ent_coef        | 0        |
|    learning_

Training Progress:   7%|████▏                                                       | 7/100 [41:48<9:16:49, 359.24s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -80.4    |
| time/              |          |
|    episodes        | 72       |
|    fps             | 27       |
|    time_elapsed    | 71       |
|    total_timesteps | 72000    |
| train/             |          |
|    actor_loss      | 8.44     |
|    critic_loss     | 0.0152   |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 71899    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -80.6    |
| time/              |          |
|    episodes        | 76       |
|    fps             | 27       |
|    time_elapsed    | 216      |
|    total_timesteps | 76000    |
| train/             |          |
|    actor_loss      | 8.26     |
|    critic_loss     | 0.0206   |
|    ent_coef        | 0        |
|    learning_

Training Progress:   8%|████▊                                                       | 8/100 [47:48<9:11:00, 359.35s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -80.8    |
| time/              |          |
|    episodes        | 84       |
|    fps             | 27       |
|    time_elapsed    | 144      |
|    total_timesteps | 84000    |
| train/             |          |
|    actor_loss      | 8.72     |
|    critic_loss     | 0.00488  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 83899    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -80.9    |
| time/              |          |
|    episodes        | 88       |
|    fps             | 27       |
|    time_elapsed    | 289      |
|    total_timesteps | 88000    |
| train/             |          |
|    actor_loss      | 9.01     |
|    critic_loss     | 0.0446   |
|    ent_coef        | 0        |
|    learning_

Training Progress:   9%|█████▍                                                      | 9/100 [53:49<9:05:58, 359.99s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -81.1    |
| time/              |          |
|    episodes        | 92       |
|    fps             | 27       |
|    time_elapsed    | 71       |
|    total_timesteps | 92000    |
| train/             |          |
|    actor_loss      | 8.8      |
|    critic_loss     | 0.109    |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 91899    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -81.2    |
| time/              |          |
|    episodes        | 96       |
|    fps             | 27       |
|    time_elapsed    | 215      |
|    total_timesteps | 96000    |
| train/             |          |
|    actor_loss      | 9.45     |
|    critic_loss     | 0.0361   |
|    ent_coef        | 0        |
|    learning_

Training Progress:  10%|█████▉                                                     | 10/100 [59:48<8:59:42, 359.81s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -80.9    |
| time/              |          |
|    episodes        | 104      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 104000   |
| train/             |          |
|    actor_loss      | 9.68     |
|    critic_loss     | 0.00763  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 103899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -80.6    |
| time/              |          |
|    episodes        | 108      |
|    fps             | 28       |
|    time_elapsed    | 285      |
|    total_timesteps | 108000   |
| train/             |          |
|    actor_loss      | 9.7      |
|    critic_loss     | 0.000162 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  11%|██████▎                                                  | 11/100 [1:05:46<8:52:40, 359.11s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -80.3    |
| time/              |          |
|    episodes        | 112      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 112000   |
| train/             |          |
|    actor_loss      | 9.76     |
|    critic_loss     | 1.7e-05  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 111899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -79.8    |
| time/              |          |
|    episodes        | 116      |
|    fps             | 27       |
|    time_elapsed    | 220      |
|    total_timesteps | 116000   |
| train/             |          |
|    actor_loss      | 9.84     |
|    critic_loss     | 0.00339  |
|    ent_coef        | 0        |
|    learning_

Training Progress:  12%|██████▊                                                  | 12/100 [1:11:51<8:49:26, 360.98s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -82.5    |
| time/              |          |
|    episodes        | 124      |
|    fps             | 27       |
|    time_elapsed    | 147      |
|    total_timesteps | 124000   |
| train/             |          |
|    actor_loss      | 9.77     |
|    critic_loss     | 9.22e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 123899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 128      |
|    fps             | 27       |
|    time_elapsed    | 292      |
|    total_timesteps | 128000   |
| train/             |          |
|    actor_loss      | 9.91     |
|    critic_loss     | 0.000172 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  13%|███████▍                                                 | 13/100 [1:17:56<8:45:06, 362.14s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 132      |
|    fps             | 27       |
|    time_elapsed    | 71       |
|    total_timesteps | 132000   |
| train/             |          |
|    actor_loss      | 9.89     |
|    critic_loss     | 1.42e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 131899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 136      |
|    fps             | 27       |
|    time_elapsed    | 214      |
|    total_timesteps | 136000   |
| train/             |          |
|    actor_loss      | 9.59     |
|    critic_loss     | 0.000209 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  14%|███████▉                                                 | 14/100 [1:23:55<8:37:41, 361.18s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 144      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 144000   |
| train/             |          |
|    actor_loss      | 9.65     |
|    critic_loss     | 0.000398 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 143899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 148      |
|    fps             | 27       |
|    time_elapsed    | 286      |
|    total_timesteps | 148000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 0.000226 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  15%|████████▌                                                | 15/100 [1:29:53<8:30:22, 360.27s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 152      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 152000   |
| train/             |          |
|    actor_loss      | 9.72     |
|    critic_loss     | 0.00215  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 151899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 156      |
|    fps             | 27       |
|    time_elapsed    | 216      |
|    total_timesteps | 156000   |
| train/             |          |
|    actor_loss      | 9.26     |
|    critic_loss     | 0.000236 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  16%|█████████                                                | 16/100 [1:35:53<8:24:13, 360.16s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 164      |
|    fps             | 27       |
|    time_elapsed    | 144      |
|    total_timesteps | 164000   |
| train/             |          |
|    actor_loss      | 9.68     |
|    critic_loss     | 0.000621 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 163899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 168      |
|    fps             | 27       |
|    time_elapsed    | 288      |
|    total_timesteps | 168000   |
| train/             |          |
|    actor_loss      | 9.97     |
|    critic_loss     | 1.19e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  17%|█████████▋                                               | 17/100 [1:41:54<8:18:27, 360.33s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 172      |
|    fps             | 28       |
|    time_elapsed    | 71       |
|    total_timesteps | 172000   |
| train/             |          |
|    actor_loss      | 9.93     |
|    critic_loss     | 6.63e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 171899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 176      |
|    fps             | 27       |
|    time_elapsed    | 215      |
|    total_timesteps | 176000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 8.54e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  18%|██████████▎                                              | 18/100 [1:47:53<8:12:07, 360.09s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 184      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 184000   |
| train/             |          |
|    actor_loss      | 9.65     |
|    critic_loss     | 0.000137 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 183899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 188      |
|    fps             | 27       |
|    time_elapsed    | 287      |
|    total_timesteps | 188000   |
| train/             |          |
|    actor_loss      | 9.98     |
|    critic_loss     | 2.49e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  19%|██████████▊                                              | 19/100 [1:53:52<8:05:31, 359.65s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 192      |
|    fps             | 27       |
|    time_elapsed    | 71       |
|    total_timesteps | 192000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 2.3e-06  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 191899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 196      |
|    fps             | 27       |
|    time_elapsed    | 215      |
|    total_timesteps | 196000   |
| train/             |          |
|    actor_loss      | 9.63     |
|    critic_loss     | 0.00115  |
|    ent_coef        | 0        |
|    learning_

Training Progress:  20%|███████████▍                                             | 20/100 [1:59:51<7:59:21, 359.52s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 204      |
|    fps             | 27       |
|    time_elapsed    | 144      |
|    total_timesteps | 204000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 1.51e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 203899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 208      |
|    fps             | 27       |
|    time_elapsed    | 288      |
|    total_timesteps | 208000   |
| train/             |          |
|    actor_loss      | 9.96     |
|    critic_loss     | 5.2e-05  |
|    ent_coef        | 0        |
|    learning_

Training Progress:  21%|███████████▉                                             | 21/100 [2:05:52<7:54:05, 360.06s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 212      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 212000   |
| train/             |          |
|    actor_loss      | 9.48     |
|    critic_loss     | 0.00231  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 211899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 216      |
|    fps             | 27       |
|    time_elapsed    | 218      |
|    total_timesteps | 216000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 4.86e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  22%|████████████▌                                            | 22/100 [2:11:53<7:48:06, 360.08s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 224      |
|    fps             | 27       |
|    time_elapsed    | 144      |
|    total_timesteps | 224000   |
| train/             |          |
|    actor_loss      | 9.82     |
|    critic_loss     | 0.00109  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 223899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 228      |
|    fps             | 27       |
|    time_elapsed    | 288      |
|    total_timesteps | 228000   |
| train/             |          |
|    actor_loss      | 9.89     |
|    critic_loss     | 3.59e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  23%|█████████████                                            | 23/100 [2:17:53<7:42:05, 360.07s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 232      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 232000   |
| train/             |          |
|    actor_loss      | 9.88     |
|    critic_loss     | 0.000196 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 231899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 236      |
|    fps             | 27       |
|    time_elapsed    | 215      |
|    total_timesteps | 236000   |
| train/             |          |
|    actor_loss      | 9.78     |
|    critic_loss     | 0.000457 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  24%|█████████████▋                                           | 24/100 [2:23:52<7:35:49, 359.87s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 244      |
|    fps             | 28       |
|    time_elapsed    | 142      |
|    total_timesteps | 244000   |
| train/             |          |
|    actor_loss      | 9.82     |
|    critic_loss     | 0.00137  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 243899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 248      |
|    fps             | 28       |
|    time_elapsed    | 285      |
|    total_timesteps | 248000   |
| train/             |          |
|    actor_loss      | 9.69     |
|    critic_loss     | 0.000299 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  25%|██████████████▎                                          | 25/100 [2:29:49<7:28:50, 359.07s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 252      |
|    fps             | 27       |
|    time_elapsed    | 71       |
|    total_timesteps | 252000   |
| train/             |          |
|    actor_loss      | 9.71     |
|    critic_loss     | 0.00188  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 251899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 256      |
|    fps             | 27       |
|    time_elapsed    | 214      |
|    total_timesteps | 256000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 1.9e-06  |
|    ent_coef        | 0        |
|    learning_

Training Progress:  26%|██████████████▊                                          | 26/100 [2:35:47<7:22:32, 358.82s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 264      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 264000   |
| train/             |          |
|    actor_loss      | 9.72     |
|    critic_loss     | 0.000146 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 263899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 268      |
|    fps             | 27       |
|    time_elapsed    | 286      |
|    total_timesteps | 268000   |
| train/             |          |
|    actor_loss      | 9.74     |
|    critic_loss     | 4.95e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  27%|███████████████▍                                         | 27/100 [2:41:45<7:16:17, 358.59s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 272      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 272000   |
| train/             |          |
|    actor_loss      | 9.6      |
|    critic_loss     | 0.000326 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 271899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 276      |
|    fps             | 27       |
|    time_elapsed    | 215      |
|    total_timesteps | 276000   |
| train/             |          |
|    actor_loss      | 9.64     |
|    critic_loss     | 0.000206 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  28%|███████████████▉                                         | 28/100 [2:47:45<7:10:32, 358.79s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 284      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 284000   |
| train/             |          |
|    actor_loss      | 9.87     |
|    critic_loss     | 4.93e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 283899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 288      |
|    fps             | 27       |
|    time_elapsed    | 286      |
|    total_timesteps | 288000   |
| train/             |          |
|    actor_loss      | 9.91     |
|    critic_loss     | 0.000609 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  29%|████████████████▌                                        | 29/100 [2:53:43<7:04:15, 358.53s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 292      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 292000   |
| train/             |          |
|    actor_loss      | 9.87     |
|    critic_loss     | 7.09e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 291899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 296      |
|    fps             | 27       |
|    time_elapsed    | 215      |
|    total_timesteps | 296000   |
| train/             |          |
|    actor_loss      | 9.8      |
|    critic_loss     | 0.000835 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  30%|█████████████████                                        | 30/100 [2:59:42<6:58:25, 358.65s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 304      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 304000   |
| train/             |          |
|    actor_loss      | 9.74     |
|    critic_loss     | 0.000108 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 303899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 308      |
|    fps             | 27       |
|    time_elapsed    | 286      |
|    total_timesteps | 308000   |
| train/             |          |
|    actor_loss      | 9.93     |
|    critic_loss     | 0.000138 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  31%|█████████████████▋                                       | 31/100 [3:05:40<6:52:15, 358.49s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 312      |
|    fps             | 28       |
|    time_elapsed    | 71       |
|    total_timesteps | 312000   |
| train/             |          |
|    actor_loss      | 9.88     |
|    critic_loss     | 8.8e-05  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 311899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 316      |
|    fps             | 27       |
|    time_elapsed    | 214      |
|    total_timesteps | 316000   |
| train/             |          |
|    actor_loss      | 9.88     |
|    critic_loss     | 8.17e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  32%|██████████████████▏                                      | 32/100 [3:11:38<6:46:11, 358.41s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 324      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 324000   |
| train/             |          |
|    actor_loss      | 9.77     |
|    critic_loss     | 0.000252 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 323899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 328      |
|    fps             | 27       |
|    time_elapsed    | 287      |
|    total_timesteps | 328000   |
| train/             |          |
|    actor_loss      | 9.79     |
|    critic_loss     | 0.000174 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  33%|██████████████████▊                                      | 33/100 [3:17:38<6:40:40, 358.81s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 332      |
|    fps             | 28       |
|    time_elapsed    | 71       |
|    total_timesteps | 332000   |
| train/             |          |
|    actor_loss      | 9.82     |
|    critic_loss     | 0.000137 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 331899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 336      |
|    fps             | 27       |
|    time_elapsed    | 216      |
|    total_timesteps | 336000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 1.84e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  34%|███████████████████▍                                     | 34/100 [3:23:40<6:35:51, 359.87s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 344      |
|    fps             | 27       |
|    time_elapsed    | 146      |
|    total_timesteps | 344000   |
| train/             |          |
|    actor_loss      | 9.84     |
|    critic_loss     | 0.000505 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 343899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 348      |
|    fps             | 27       |
|    time_elapsed    | 290      |
|    total_timesteps | 348000   |
| train/             |          |
|    actor_loss      | 9.98     |
|    critic_loss     | 0.000101 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  35%|███████████████████▉                                     | 35/100 [3:29:43<6:30:50, 360.77s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 352      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 352000   |
| train/             |          |
|    actor_loss      | 9.72     |
|    critic_loss     | 0.000133 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 351899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 356      |
|    fps             | 27       |
|    time_elapsed    | 216      |
|    total_timesteps | 356000   |
| train/             |          |
|    actor_loss      | 9.9      |
|    critic_loss     | 5.94e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  36%|████████████████████▌                                    | 36/100 [3:35:45<6:25:15, 361.19s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 364      |
|    fps             | 27       |
|    time_elapsed    | 144      |
|    total_timesteps | 364000   |
| train/             |          |
|    actor_loss      | 9.89     |
|    critic_loss     | 0.000567 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 363899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.3    |
| time/              |          |
|    episodes        | 368      |
|    fps             | 27       |
|    time_elapsed    | 289      |
|    total_timesteps | 368000   |
| train/             |          |
|    actor_loss      | 9.87     |
|    critic_loss     | 1.61e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  37%|█████████████████████                                    | 37/100 [3:41:47<6:19:24, 361.34s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.3    |
| time/              |          |
|    episodes        | 372      |
|    fps             | 28       |
|    time_elapsed    | 71       |
|    total_timesteps | 372000   |
| train/             |          |
|    actor_loss      | 9.68     |
|    critic_loss     | 8.85e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 371899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.2    |
| time/              |          |
|    episodes        | 376      |
|    fps             | 27       |
|    time_elapsed    | 214      |
|    total_timesteps | 376000   |
| train/             |          |
|    actor_loss      | 9.62     |
|    critic_loss     | 0.000116 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  38%|█████████████████████▋                                   | 38/100 [3:47:46<6:12:37, 360.61s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.2    |
| time/              |          |
|    episodes        | 384      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 384000   |
| train/             |          |
|    actor_loss      | 9.69     |
|    critic_loss     | 0.00243  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 383899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.2    |
| time/              |          |
|    episodes        | 388      |
|    fps             | 27       |
|    time_elapsed    | 286      |
|    total_timesteps | 388000   |
| train/             |          |
|    actor_loss      | 9.78     |
|    critic_loss     | 0.0001   |
|    ent_coef        | 0        |
|    learning_

Training Progress:  39%|██████████████████████▏                                  | 39/100 [3:53:44<6:05:58, 359.97s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.2    |
| time/              |          |
|    episodes        | 392      |
|    fps             | 28       |
|    time_elapsed    | 71       |
|    total_timesteps | 392000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 2.73e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 391899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.3    |
| time/              |          |
|    episodes        | 396      |
|    fps             | 27       |
|    time_elapsed    | 215      |
|    total_timesteps | 396000   |
| train/             |          |
|    actor_loss      | 9.56     |
|    critic_loss     | 7.06e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  40%|██████████████████████▊                                  | 40/100 [3:59:43<5:59:47, 359.79s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.3    |
| time/              |          |
|    episodes        | 404      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 404000   |
| train/             |          |
|    actor_loss      | 9.78     |
|    critic_loss     | 7.57e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 403899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 408      |
|    fps             | 27       |
|    time_elapsed    | 286      |
|    total_timesteps | 408000   |
| train/             |          |
|    actor_loss      | 9.93     |
|    critic_loss     | 2.01e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  41%|███████████████████████▎                                 | 41/100 [4:05:42<5:53:17, 359.28s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 412      |
|    fps             | 28       |
|    time_elapsed    | 71       |
|    total_timesteps | 412000   |
| train/             |          |
|    actor_loss      | 9.91     |
|    critic_loss     | 6.56e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 411899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 416      |
|    fps             | 27       |
|    time_elapsed    | 214      |
|    total_timesteps | 416000   |
| train/             |          |
|    actor_loss      | 9.97     |
|    critic_loss     | 3.38e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  42%|███████████████████████▉                                 | 42/100 [4:11:39<5:46:53, 358.85s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 424      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 424000   |
| train/             |          |
|    actor_loss      | 9.61     |
|    critic_loss     | 0.00121  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 423899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 428      |
|    fps             | 27       |
|    time_elapsed    | 286      |
|    total_timesteps | 428000   |
| train/             |          |
|    actor_loss      | 9.65     |
|    critic_loss     | 2.65e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  43%|████████████████████████▌                                | 43/100 [4:17:38<5:40:42, 358.63s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 432      |
|    fps             | 27       |
|    time_elapsed    | 71       |
|    total_timesteps | 432000   |
| train/             |          |
|    actor_loss      | 9.96     |
|    critic_loss     | 1.57e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 431899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 436      |
|    fps             | 28       |
|    time_elapsed    | 214      |
|    total_timesteps | 436000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 5.39e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  44%|█████████████████████████                                | 44/100 [4:23:36<5:34:36, 358.51s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 444      |
|    fps             | 28       |
|    time_elapsed    | 142      |
|    total_timesteps | 444000   |
| train/             |          |
|    actor_loss      | 9.74     |
|    critic_loss     | 4.96e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 443899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 448      |
|    fps             | 28       |
|    time_elapsed    | 284      |
|    total_timesteps | 448000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 3.71e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  45%|█████████████████████████▋                               | 45/100 [4:29:32<5:27:53, 357.69s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 452      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 452000   |
| train/             |          |
|    actor_loss      | 9.62     |
|    critic_loss     | 0.000894 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 451899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 456      |
|    fps             | 27       |
|    time_elapsed    | 215      |
|    total_timesteps | 456000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 4.98e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  46%|██████████████████████████▏                              | 46/100 [4:35:31<5:22:17, 358.10s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 464      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 464000   |
| train/             |          |
|    actor_loss      | 9.64     |
|    critic_loss     | 0.000417 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 463899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.9    |
| time/              |          |
|    episodes        | 468      |
|    fps             | 27       |
|    time_elapsed    | 287      |
|    total_timesteps | 468000   |
| train/             |          |
|    actor_loss      | 9.96     |
|    critic_loss     | 4.07e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  47%|██████████████████████████▊                              | 47/100 [4:41:29<5:16:24, 358.20s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.9    |
| time/              |          |
|    episodes        | 472      |
|    fps             | 28       |
|    time_elapsed    | 70       |
|    total_timesteps | 472000   |
| train/             |          |
|    actor_loss      | 9.82     |
|    critic_loss     | 0.000431 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 471899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -84      |
| time/              |          |
|    episodes        | 476      |
|    fps             | 28       |
|    time_elapsed    | 213      |
|    total_timesteps | 476000   |
| train/             |          |
|    actor_loss      | 9.65     |
|    critic_loss     | 8.14e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  48%|███████████████████████████▎                             | 48/100 [4:47:26<5:10:08, 357.85s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -84      |
| time/              |          |
|    episodes        | 484      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 484000   |
| train/             |          |
|    actor_loss      | 9.8      |
|    critic_loss     | 0.000357 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 483899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -84      |
| time/              |          |
|    episodes        | 488      |
|    fps             | 27       |
|    time_elapsed    | 293      |
|    total_timesteps | 488000   |
| train/             |          |
|    actor_loss      | 9.86     |
|    critic_loss     | 0.000275 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  49%|███████████████████████████▉                             | 49/100 [4:53:37<5:07:23, 361.63s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.9    |
| time/              |          |
|    episodes        | 492      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 492000   |
| train/             |          |
|    actor_loss      | 9.78     |
|    critic_loss     | 6.66e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 491899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.9    |
| time/              |          |
|    episodes        | 496      |
|    fps             | 27       |
|    time_elapsed    | 217      |
|    total_timesteps | 496000   |
| train/             |          |
|    actor_loss      | 9.83     |
|    critic_loss     | 2.91e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  50%|████████████████████████████▌                            | 50/100 [4:59:41<5:02:00, 362.41s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.9    |
| time/              |          |
|    episodes        | 504      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 504000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 4.75e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 503899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.9    |
| time/              |          |
|    episodes        | 508      |
|    fps             | 27       |
|    time_elapsed    | 291      |
|    total_timesteps | 508000   |
| train/             |          |
|    actor_loss      | 9.7      |
|    critic_loss     | 0.000345 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  51%|█████████████████████████████                            | 51/100 [5:05:45<4:56:26, 362.99s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 512      |
|    fps             | 27       |
|    time_elapsed    | 73       |
|    total_timesteps | 512000   |
| train/             |          |
|    actor_loss      | 9.96     |
|    critic_loss     | 0.000196 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 511899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 516      |
|    fps             | 27       |
|    time_elapsed    | 219      |
|    total_timesteps | 516000   |
| train/             |          |
|    actor_loss      | 9.86     |
|    critic_loss     | 3.69e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  52%|█████████████████████████████▋                           | 52/100 [5:11:49<4:50:43, 363.41s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 524      |
|    fps             | 27       |
|    time_elapsed    | 144      |
|    total_timesteps | 524000   |
| train/             |          |
|    actor_loss      | 9.96     |
|    critic_loss     | 6.69e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 523899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 528      |
|    fps             | 27       |
|    time_elapsed    | 290      |
|    total_timesteps | 528000   |
| train/             |          |
|    actor_loss      | 9.87     |
|    critic_loss     | 0.000118 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  53%|██████████████████████████████▏                          | 53/100 [5:17:52<4:44:34, 363.30s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 532      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 532000   |
| train/             |          |
|    actor_loss      | 9.71     |
|    critic_loss     | 0.000803 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 531899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 536      |
|    fps             | 27       |
|    time_elapsed    | 217      |
|    total_timesteps | 536000   |
| train/             |          |
|    actor_loss      | 9.87     |
|    critic_loss     | 0.000128 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  54%|██████████████████████████████▊                          | 54/100 [5:23:56<4:38:36, 363.39s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 544      |
|    fps             | 27       |
|    time_elapsed    | 144      |
|    total_timesteps | 544000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 6.82e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 543899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 548      |
|    fps             | 27       |
|    time_elapsed    | 289      |
|    total_timesteps | 548000   |
| train/             |          |
|    actor_loss      | 9.86     |
|    critic_loss     | 5.08e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  55%|███████████████████████████████▎                         | 55/100 [5:29:59<4:32:22, 363.17s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 552      |
|    fps             | 27       |
|    time_elapsed    | 73       |
|    total_timesteps | 552000   |
| train/             |          |
|    actor_loss      | 9.93     |
|    critic_loss     | 5.85e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 551899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 556      |
|    fps             | 27       |
|    time_elapsed    | 219      |
|    total_timesteps | 556000   |
| train/             |          |
|    actor_loss      | 9.82     |
|    critic_loss     | 0.000163 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  56%|███████████████████████████████▉                         | 56/100 [5:36:03<4:26:29, 363.40s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 564      |
|    fps             | 27       |
|    time_elapsed    | 144      |
|    total_timesteps | 564000   |
| train/             |          |
|    actor_loss      | 9.82     |
|    critic_loss     | 0.000511 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 563899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 568      |
|    fps             | 27       |
|    time_elapsed    | 289      |
|    total_timesteps | 568000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 1.38e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  57%|████████████████████████████████▍                        | 57/100 [5:42:05<4:20:14, 363.13s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 572      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 572000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 7.42e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 571899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 576      |
|    fps             | 27       |
|    time_elapsed    | 217      |
|    total_timesteps | 576000   |
| train/             |          |
|    actor_loss      | 9.73     |
|    critic_loss     | 0.000152 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  58%|█████████████████████████████████                        | 58/100 [5:48:10<4:14:26, 363.48s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 584      |
|    fps             | 27       |
|    time_elapsed    | 144      |
|    total_timesteps | 584000   |
| train/             |          |
|    actor_loss      | 9.88     |
|    critic_loss     | 0.000126 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 583899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 588      |
|    fps             | 27       |
|    time_elapsed    | 289      |
|    total_timesteps | 588000   |
| train/             |          |
|    actor_loss      | 9.83     |
|    critic_loss     | 0.000103 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  59%|█████████████████████████████████▋                       | 59/100 [5:54:12<4:08:12, 363.24s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 592      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 592000   |
| train/             |          |
|    actor_loss      | 9.91     |
|    critic_loss     | 4.91e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 591899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 596      |
|    fps             | 27       |
|    time_elapsed    | 216      |
|    total_timesteps | 596000   |
| train/             |          |
|    actor_loss      | 9.93     |
|    critic_loss     | 0.00013  |
|    ent_coef        | 0        |
|    learning_

Training Progress:  60%|██████████████████████████████████▏                      | 60/100 [6:00:14<4:01:52, 362.82s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 604      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 604000   |
| train/             |          |
|    actor_loss      | 9.7      |
|    critic_loss     | 0.000194 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 603899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 608      |
|    fps             | 27       |
|    time_elapsed    | 289      |
|    total_timesteps | 608000   |
| train/             |          |
|    actor_loss      | 9.88     |
|    critic_loss     | 5.95e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  61%|██████████████████████████████████▊                      | 61/100 [6:06:16<3:55:39, 362.56s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 612      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 612000   |
| train/             |          |
|    actor_loss      | 9.96     |
|    critic_loss     | 4.68e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 611899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 616      |
|    fps             | 27       |
|    time_elapsed    | 217      |
|    total_timesteps | 616000   |
| train/             |          |
|    actor_loss      | 9.61     |
|    critic_loss     | 0.000396 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  62%|███████████████████████████████████▎                     | 62/100 [6:12:19<3:49:37, 362.57s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.3    |
| time/              |          |
|    episodes        | 624      |
|    fps             | 27       |
|    time_elapsed    | 144      |
|    total_timesteps | 624000   |
| train/             |          |
|    actor_loss      | 9.79     |
|    critic_loss     | 7.46e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 623899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 628      |
|    fps             | 27       |
|    time_elapsed    | 289      |
|    total_timesteps | 628000   |
| train/             |          |
|    actor_loss      | 9.96     |
|    critic_loss     | 9.53e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  63%|███████████████████████████████████▉                     | 63/100 [6:18:21<3:43:32, 362.51s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.3    |
| time/              |          |
|    episodes        | 632      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 632000   |
| train/             |          |
|    actor_loss      | 9.79     |
|    critic_loss     | 0.000245 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 631899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 636      |
|    fps             | 27       |
|    time_elapsed    | 218      |
|    total_timesteps | 636000   |
| train/             |          |
|    actor_loss      | 9.8      |
|    critic_loss     | 0.000371 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  64%|████████████████████████████████████▍                    | 64/100 [6:24:24<3:37:35, 362.65s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 644      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 644000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 8.92e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 643899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 648      |
|    fps             | 27       |
|    time_elapsed    | 291      |
|    total_timesteps | 648000   |
| train/             |          |
|    actor_loss      | 9.98     |
|    critic_loss     | 1.92e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  65%|█████████████████████████████████████                    | 65/100 [6:30:29<3:31:53, 363.25s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 652      |
|    fps             | 27       |
|    time_elapsed    | 73       |
|    total_timesteps | 652000   |
| train/             |          |
|    actor_loss      | 9.97     |
|    critic_loss     | 7.25e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 651899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 656      |
|    fps             | 27       |
|    time_elapsed    | 218      |
|    total_timesteps | 656000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 8.13e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  66%|█████████████████████████████████████▌                   | 66/100 [6:36:32<3:25:57, 363.44s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 664      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 664000   |
| train/             |          |
|    actor_loss      | 9.84     |
|    critic_loss     | 3.3e-06  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 663899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 668      |
|    fps             | 27       |
|    time_elapsed    | 290      |
|    total_timesteps | 668000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 8.4e-06  |
|    ent_coef        | 0        |
|    learning_

Training Progress:  67%|██████████████████████████████████████▏                  | 67/100 [6:42:36<3:19:52, 363.42s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 672      |
|    fps             | 27       |
|    time_elapsed    | 73       |
|    total_timesteps | 672000   |
| train/             |          |
|    actor_loss      | 9.13     |
|    critic_loss     | 0.000431 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 671899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 676      |
|    fps             | 27       |
|    time_elapsed    | 218      |
|    total_timesteps | 676000   |
| train/             |          |
|    actor_loss      | 9.46     |
|    critic_loss     | 0.000638 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  68%|██████████████████████████████████████▊                  | 68/100 [6:48:41<3:14:01, 363.80s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 684      |
|    fps             | 27       |
|    time_elapsed    | 144      |
|    total_timesteps | 684000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 0.000305 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 683899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 688      |
|    fps             | 27       |
|    time_elapsed    | 289      |
|    total_timesteps | 688000   |
| train/             |          |
|    actor_loss      | 9.91     |
|    critic_loss     | 8.03e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  69%|███████████████████████████████████████▎                 | 69/100 [6:54:44<3:07:50, 363.56s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 692      |
|    fps             | 27       |
|    time_elapsed    | 73       |
|    total_timesteps | 692000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 3.23e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 691899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 696      |
|    fps             | 27       |
|    time_elapsed    | 218      |
|    total_timesteps | 696000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 3.14e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  70%|███████████████████████████████████████▉                 | 70/100 [7:00:47<3:01:48, 363.63s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 704      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 704000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 2e-05    |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 703899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 708      |
|    fps             | 27       |
|    time_elapsed    | 290      |
|    total_timesteps | 708000   |
| train/             |          |
|    actor_loss      | 9.84     |
|    critic_loss     | 5.67e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  71%|████████████████████████████████████████▍                | 71/100 [7:06:52<2:55:55, 363.98s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 712      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 712000   |
| train/             |          |
|    actor_loss      | 9.83     |
|    critic_loss     | 8.05e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 711899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 716      |
|    fps             | 27       |
|    time_elapsed    | 218      |
|    total_timesteps | 716000   |
| train/             |          |
|    actor_loss      | 9.77     |
|    critic_loss     | 0.000916 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  72%|█████████████████████████████████████████                | 72/100 [7:12:56<2:49:51, 363.99s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 724      |
|    fps             | 27       |
|    time_elapsed    | 146      |
|    total_timesteps | 724000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 2.87e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 723899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 728      |
|    fps             | 27       |
|    time_elapsed    | 292      |
|    total_timesteps | 728000   |
| train/             |          |
|    actor_loss      | 9.84     |
|    critic_loss     | 0.000202 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  73%|█████████████████████████████████████████▌               | 73/100 [7:19:01<2:43:54, 364.23s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 732      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 732000   |
| train/             |          |
|    actor_loss      | 9.7      |
|    critic_loss     | 0.000233 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 731899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 736      |
|    fps             | 27       |
|    time_elapsed    | 217      |
|    total_timesteps | 736000   |
| train/             |          |
|    actor_loss      | 9.66     |
|    critic_loss     | 2.97e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  74%|██████████████████████████████████████████▏              | 74/100 [7:25:03<2:37:35, 363.67s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 744      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 744000   |
| train/             |          |
|    actor_loss      | 9.89     |
|    critic_loss     | 9e-05    |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 743899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 748      |
|    fps             | 27       |
|    time_elapsed    | 291      |
|    total_timesteps | 748000   |
| train/             |          |
|    actor_loss      | 9.79     |
|    critic_loss     | 2.8e-05  |
|    ent_coef        | 0        |
|    learning_

Training Progress:  75%|██████████████████████████████████████████▊              | 75/100 [7:31:08<2:31:38, 363.94s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 752      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 752000   |
| train/             |          |
|    actor_loss      | 9.59     |
|    critic_loss     | 6.69e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 751899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 756      |
|    fps             | 27       |
|    time_elapsed    | 217      |
|    total_timesteps | 756000   |
| train/             |          |
|    actor_loss      | 9.73     |
|    critic_loss     | 2.84e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  76%|███████████████████████████████████████████▎             | 76/100 [7:37:10<2:25:23, 363.46s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 764      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 764000   |
| train/             |          |
|    actor_loss      | 9.97     |
|    critic_loss     | 6.56e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 763899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 768      |
|    fps             | 27       |
|    time_elapsed    | 291      |
|    total_timesteps | 768000   |
| train/             |          |
|    actor_loss      | 9.84     |
|    critic_loss     | 0.000255 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  77%|███████████████████████████████████████████▉             | 77/100 [7:43:14<2:19:21, 363.56s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 772      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 772000   |
| train/             |          |
|    actor_loss      | 9.85     |
|    critic_loss     | 3.75e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 771899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 776      |
|    fps             | 27       |
|    time_elapsed    | 218      |
|    total_timesteps | 776000   |
| train/             |          |
|    actor_loss      | 9.68     |
|    critic_loss     | 0.000309 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  78%|████████████████████████████████████████████▍            | 78/100 [7:49:18<2:13:23, 363.80s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.3    |
| time/              |          |
|    episodes        | 784      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 784000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 1.2e-06  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 783899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.3    |
| time/              |          |
|    episodes        | 788      |
|    fps             | 27       |
|    time_elapsed    | 291      |
|    total_timesteps | 788000   |
| train/             |          |
|    actor_loss      | 9.96     |
|    critic_loss     | 1.69e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  79%|█████████████████████████████████████████████            | 79/100 [7:55:22<2:07:21, 363.86s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.3    |
| time/              |          |
|    episodes        | 792      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 792000   |
| train/             |          |
|    actor_loss      | 9.82     |
|    critic_loss     | 0.000138 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 791899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.3    |
| time/              |          |
|    episodes        | 796      |
|    fps             | 27       |
|    time_elapsed    | 218      |
|    total_timesteps | 796000   |
| train/             |          |
|    actor_loss      | 9.9      |
|    critic_loss     | 0.000962 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  80%|█████████████████████████████████████████████▌           | 80/100 [8:01:29<2:01:31, 364.56s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 804      |
|    fps             | 27       |
|    time_elapsed    | 146      |
|    total_timesteps | 804000   |
| train/             |          |
|    actor_loss      | 9.67     |
|    critic_loss     | 0.000262 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 803899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 808      |
|    fps             | 27       |
|    time_elapsed    | 291      |
|    total_timesteps | 808000   |
| train/             |          |
|    actor_loss      | 9.55     |
|    critic_loss     | 0.00161  |
|    ent_coef        | 0        |
|    learning_

Training Progress:  81%|██████████████████████████████████████████████▏          | 81/100 [8:07:34<1:55:29, 364.70s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.3    |
| time/              |          |
|    episodes        | 812      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 812000   |
| train/             |          |
|    actor_loss      | 9.12     |
|    critic_loss     | 0.0012   |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 811899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 816      |
|    fps             | 27       |
|    time_elapsed    | 217      |
|    total_timesteps | 816000   |
| train/             |          |
|    actor_loss      | 9.77     |
|    critic_loss     | 0.000486 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  82%|██████████████████████████████████████████████▋          | 82/100 [8:13:36<1:49:14, 364.11s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 824      |
|    fps             | 27       |
|    time_elapsed    | 146      |
|    total_timesteps | 824000   |
| train/             |          |
|    actor_loss      | 9.74     |
|    critic_loss     | 9.78e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 823899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 828      |
|    fps             | 27       |
|    time_elapsed    | 292      |
|    total_timesteps | 828000   |
| train/             |          |
|    actor_loss      | 9.7      |
|    critic_loss     | 0.00286  |
|    ent_coef        | 0        |
|    learning_

Training Progress:  83%|███████████████████████████████████████████████▎         | 83/100 [8:19:41<1:43:14, 364.40s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 832      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 832000   |
| train/             |          |
|    actor_loss      | 9.61     |
|    critic_loss     | 0.0025   |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 831899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 836      |
|    fps             | 27       |
|    time_elapsed    | 217      |
|    total_timesteps | 836000   |
| train/             |          |
|    actor_loss      | 9.91     |
|    critic_loss     | 4.63e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  84%|███████████████████████████████████████████████▉         | 84/100 [8:25:44<1:37:02, 363.90s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 844      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 844000   |
| train/             |          |
|    actor_loss      | 9.9      |
|    critic_loss     | 0.000168 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 843899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 848      |
|    fps             | 27       |
|    time_elapsed    | 290      |
|    total_timesteps | 848000   |
| train/             |          |
|    actor_loss      | 9.67     |
|    critic_loss     | 0.000132 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  85%|████████████████████████████████████████████████▍        | 85/100 [8:31:48<1:30:56, 363.80s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 852      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 852000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 9.14e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 851899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 856      |
|    fps             | 27       |
|    time_elapsed    | 217      |
|    total_timesteps | 856000   |
| train/             |          |
|    actor_loss      | 9.82     |
|    critic_loss     | 0.000239 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  86%|█████████████████████████████████████████████████        | 86/100 [8:37:52<1:24:54, 363.86s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 864      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 864000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 5.13e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 863899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 868      |
|    fps             | 27       |
|    time_elapsed    | 290      |
|    total_timesteps | 868000   |
| train/             |          |
|    actor_loss      | 9.79     |
|    critic_loss     | 0.000118 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  87%|█████████████████████████████████████████████████▌       | 87/100 [8:43:55<1:18:46, 363.57s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 872      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 872000   |
| train/             |          |
|    actor_loss      | 9.88     |
|    critic_loss     | 1.09e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 871899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 876      |
|    fps             | 27       |
|    time_elapsed    | 218      |
|    total_timesteps | 876000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 6.03e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  88%|██████████████████████████████████████████████████▏      | 88/100 [8:49:57<1:12:40, 363.36s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 884      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 884000   |
| train/             |          |
|    actor_loss      | 9.97     |
|    critic_loss     | 6.34e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 883899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 888      |
|    fps             | 27       |
|    time_elapsed    | 290      |
|    total_timesteps | 888000   |
| train/             |          |
|    actor_loss      | 9.89     |
|    critic_loss     | 2.83e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  89%|██████████████████████████████████████████████████▋      | 89/100 [8:56:01<1:06:36, 363.32s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 892      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 892000   |
| train/             |          |
|    actor_loss      | 9.75     |
|    critic_loss     | 6.49e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 891899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 896      |
|    fps             | 27       |
|    time_elapsed    | 218      |
|    total_timesteps | 896000   |
| train/             |          |
|    actor_loss      | 9.78     |
|    critic_loss     | 0.000482 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  90%|███████████████████████████████████████████████████▎     | 90/100 [9:02:04<1:00:34, 363.42s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 904      |
|    fps             | 27       |
|    time_elapsed    | 145      |
|    total_timesteps | 904000   |
| train/             |          |
|    actor_loss      | 9.82     |
|    critic_loss     | 5.2e-05  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 903899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 908      |
|    fps             | 27       |
|    time_elapsed    | 289      |
|    total_timesteps | 908000   |
| train/             |          |
|    actor_loss      | 9.79     |
|    critic_loss     | 9.74e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  91%|█████████████████████████████████████████████████████▋     | 91/100 [9:08:06<54:25, 362.88s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 912      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 912000   |
| train/             |          |
|    actor_loss      | 9.88     |
|    critic_loss     | 8.4e-05  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 911899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 916      |
|    fps             | 27       |
|    time_elapsed    | 214      |
|    total_timesteps | 916000   |
| train/             |          |
|    actor_loss      | 9.6      |
|    critic_loss     | 5.1e-05  |
|    ent_coef        | 0        |
|    learning_

Training Progress:  92%|██████████████████████████████████████████████████████▎    | 92/100 [9:14:04<48:10, 361.33s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 924      |
|    fps             | 28       |
|    time_elapsed    | 142      |
|    total_timesteps | 924000   |
| train/             |          |
|    actor_loss      | 9.62     |
|    critic_loss     | 0.000104 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 923899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 928      |
|    fps             | 27       |
|    time_elapsed    | 286      |
|    total_timesteps | 928000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 1.43e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  93%|██████████████████████████████████████████████████████▊    | 93/100 [9:20:01<42:00, 360.01s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 932      |
|    fps             | 28       |
|    time_elapsed    | 71       |
|    total_timesteps | 932000   |
| train/             |          |
|    actor_loss      | 9.64     |
|    critic_loss     | 0.000202 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 931899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.8    |
| time/              |          |
|    episodes        | 936      |
|    fps             | 27       |
|    time_elapsed    | 215      |
|    total_timesteps | 936000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 1.66e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  94%|███████████████████████████████████████████████████████▍   | 94/100 [9:26:00<35:58, 359.80s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 944      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 944000   |
| train/             |          |
|    actor_loss      | 10       |
|    critic_loss     | 2.63e-06 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 943899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 948      |
|    fps             | 27       |
|    time_elapsed    | 287      |
|    total_timesteps | 948000   |
| train/             |          |
|    actor_loss      | 9.76     |
|    critic_loss     | 6.53e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  95%|████████████████████████████████████████████████████████   | 95/100 [9:32:00<29:59, 359.93s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 952      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 952000   |
| train/             |          |
|    actor_loss      | 9.9      |
|    critic_loss     | 0.000295 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 951899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.7    |
| time/              |          |
|    episodes        | 956      |
|    fps             | 27       |
|    time_elapsed    | 216      |
|    total_timesteps | 956000   |
| train/             |          |
|    actor_loss      | 9.89     |
|    critic_loss     | 0.000239 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  96%|████████████████████████████████████████████████████████▋  | 96/100 [9:37:59<23:58, 359.74s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 964      |
|    fps             | 28       |
|    time_elapsed    | 141      |
|    total_timesteps | 964000   |
| train/             |          |
|    actor_loss      | 9.64     |
|    critic_loss     | 0.00132  |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 963899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.6    |
| time/              |          |
|    episodes        | 968      |
|    fps             | 28       |
|    time_elapsed    | 284      |
|    total_timesteps | 968000   |
| train/             |          |
|    actor_loss      | 9.93     |
|    critic_loss     | 4.13e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  97%|█████████████████████████████████████████████████████████▏ | 97/100 [9:43:56<17:56, 358.79s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 972      |
|    fps             | 27       |
|    time_elapsed    | 71       |
|    total_timesteps | 972000   |
| train/             |          |
|    actor_loss      | 9.75     |
|    critic_loss     | 0.000638 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 971899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 976      |
|    fps             | 27       |
|    time_elapsed    | 215      |
|    total_timesteps | 976000   |
| train/             |          |
|    actor_loss      | 9.89     |
|    critic_loss     | 4.88e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  98%|█████████████████████████████████████████████████████████▊ | 98/100 [9:49:54<11:57, 358.62s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 984      |
|    fps             | 27       |
|    time_elapsed    | 143      |
|    total_timesteps | 984000   |
| train/             |          |
|    actor_loss      | 9.88     |
|    critic_loss     | 1.85e-05 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 983899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.4    |
| time/              |          |
|    episodes        | 988      |
|    fps             | 27       |
|    time_elapsed    | 287      |
|    total_timesteps | 988000   |
| train/             |          |
|    actor_loss      | 9.68     |
|    critic_loss     | 2.69e-05 |
|    ent_coef        | 0        |
|    learning_

Training Progress:  99%|██████████████████████████████████████████████████████████▍| 99/100 [9:55:54<05:58, 358.92s/it]

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 992      |
|    fps             | 27       |
|    time_elapsed    | 72       |
|    total_timesteps | 992000   |
| train/             |          |
|    actor_loss      | 9.91     |
|    critic_loss     | 0.000867 |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 991899   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -83.5    |
| time/              |          |
|    episodes        | 996      |
|    fps             | 27       |
|    time_elapsed    | 215      |
|    total_timesteps | 996000   |
| train/             |          |
|    actor_loss      | 9.92     |
|    critic_loss     | 1.93e-06 |
|    ent_coef        | 0        |
|    learning_

Training Progress: 100%|█████████████████████████████████████████████████████████| 100/100 [10:01:55<00:00, 361.15s/it]

Training complete. Model saved as 'sac_carracing_sb3'.





In [1]:
import gymnasium as gym
from stable_baselines3 import SAC
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
import numpy as np

seeds = [0, 1, 10, 42, 100, 123, 999]
n_eval_episodes = 5

def make_env(seed):
    def _init():
        env = gym.make("CarRacing-v3", render_mode="rgb_array", continuous=True)
        env.reset(seed=seed)
        env.action_space.seed(seed)
        return env
    return _init

mean_rewards = []
std_rewards = []

for seed in seeds:
    vec_env = DummyVecEnv([make_env(seed)])
    vec_env = VecTransposeImage(vec_env)
    model = SAC.load("sac_carracing_sb3", env=vec_env)
    mean_reward, std_reward = evaluate_policy(
        model, vec_env, n_eval_episodes=n_eval_episodes, render=False, deterministic=True
    )
    mean_rewards.append(mean_reward)
    std_rewards.append(std_reward)

overall_mean = np.mean(mean_rewards)
overall_std = np.mean(std_rewards)

print(f"\nOverall average across seeds: Mean reward = {overall_mean:.2f} ± {overall_std:.2f}")





Overall average across seeds: Mean reward = -83.48 ± 1.43
