### Atari Games

In [1]:
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

<pyvirtualdisplay.display.Display at 0x7d4ad015dad0>

In [1]:
import gymnasium as gym
import cv2
import torch
from torch import nn
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
import gymnasium as gym
from tqdm import tqdm
import os

env = gym.make('ALE/SpaceInvaders-v5', frameskip=4)

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


In [1]:
import torch
import torch.nn as nn

class DQN(nn.Module):
    def __init__(self, num_actions):
        super(DQN, self).__init__()
        self.conv_block = nn.Sequential(
            nn.Conv2d(4, 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Flatten(),
        )

        self.fc_block = nn.Sequential(
            nn.Linear(in_features=1024, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=num_actions)
        )

    def forward(self, input):
        x = self.conv_block(input) 
        out = self.fc_block(x)
        return out

### Własna inplemntacja 

In [61]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import cv2
from tqdm.auto import tqdm

class DQLearn:
    def __init__(self, net, env, gamma=0.99, epsilon=1.0, epsilon_min=0.1, epsilon_decay=0.995,
                 lr=1e-4, batch_size=128, memory_size=10000):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.net = net.to(self.device)
        self.target_net = type(net)(env.action_space.n).to(self.device)
        self.target_net.load_state_dict(self.net.state_dict())
        self.target_net.eval()

        self.env = env
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay

        self.batch_size = batch_size
        self.memory = deque(maxlen=memory_size)

        self.optimizer = optim.Adam(self.net.parameters(), lr=lr)
        self.loss_fn = nn.MSELoss()

        self.frame_stack = deque(maxlen=4)

    def preprocess_frame(self, frame):
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        frame = cv2.resize(frame, (64, 64))
        return frame.astype(np.float32) / 255.0

    def stack_frames(self, frame, is_new_episode):
        frame = self.preprocess_frame(frame)
        if is_new_episode:
            self.frame_stack = deque([np.zeros((64, 64), dtype=np.float32)] * 4, maxlen=4)
            for _ in range(4):
                self.frame_stack.append(frame)
        else:
            self.frame_stack.append(frame)

        stacked_state = np.stack(self.frame_stack, axis=0)  # shape: (4, 64, 64)
        return stacked_state

    def select_action(self, state):
        if np.random.rand() < self.epsilon:
            return self.env.action_space.sample()
        state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(self.device)
        q_values = self.net(state_tensor)
        return torch.argmax(q_values, dim=1).item()

    def store_transition(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train_step(self):
        if len(self.memory) < self.batch_size:
            return

        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)

        states = torch.tensor(states, dtype=torch.float32).to(self.device)
        actions = torch.tensor(actions).unsqueeze(1).to(self.device)
        rewards = torch.tensor(rewards, dtype=torch.float32).unsqueeze(1).to(self.device)
        next_states = torch.tensor(next_states, dtype=torch.float32).to(self.device)
        dones = torch.tensor(dones, dtype=torch.float32).unsqueeze(1).to(self.device)

        q_values = self.net(states).gather(1, actions)
        with torch.no_grad():
            next_q = self.target_net(next_states).max(1)[0].unsqueeze(1)
            q_target = rewards + (1 - dones) * self.gamma * next_q

        loss = self.loss_fn(q_values, q_target)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def update_target_net(self):
        self.target_net.load_state_dict(self.net.state_dict())

    def train(self, episodes=500, update_target_every=10):
        for ep in tqdm(range(episodes)):
            frame = self.env.reset()[0]
            state = self.stack_frames(frame, is_new_episode=True)
            total_reward = 0

            done = False
            while not done:
                action = self.select_action(state)
                next_frame, reward, terminated, truncated, _ = self.env.step(action)
                done = terminated or truncated
                next_state = self.stack_frames(next_frame, is_new_episode=False)

                self.store_transition(state, action, reward, next_state, done)
                self.train_step()
                state = next_state
                total_reward += reward

            self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

            if ep % update_target_every == 0:
                self.update_target_net()

            print(f"Episode {ep} | Total Reward: {total_reward:.2f} | Epsilon: {self.epsilon:.3f}")

In [62]:
dql = DQLearn(net=DQN(env.action_space.n), env=env)

In [63]:
dql.train()

  0%|          | 1/500 [03:14<26:59:32, 194.73s/it]

Episode 0 | Total Reward: 10.00 | Epsilon: 0.995


  0%|          | 2/500 [08:01<34:27:59, 249.16s/it]

Episode 1 | Total Reward: 45.00 | Epsilon: 0.990


  0%|          | 2/500 [08:08<33:49:13, 244.49s/it]


KeyboardInterrupt: 

### Szkoenie modelu za pomoca PPO z stable_baselines

In [None]:
from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
import gymnasium as gym
from tqdm import tqdm
import os

os.makedirs("checkpoints", exist_ok=True)

env = gym.make("ALE/SpaceInvaders-v5")
env = Monitor(env)

env = DummyVecEnv([lambda: env])

env = VecFrameStack(env, n_stack=4)

model = DQN(
    "CnnPolicy",
    env,
    buffer_size=50_000, 
    verbose=1
)

total_timesteps = 1_000_000
epoch_size = 1000
num_epochs = total_timesteps // epoch_size

for epoch in tqdm(range(1, num_epochs + 1), desc="Training Progress", ncols=100):
    model.learn(total_timesteps=epoch_size, reset_num_timesteps=False)
    
    if epoch % 50 == 0:
        model.save(f"checkpoints/ppo_spaceinvaders_epoch_{epoch}")

model.save("ppo_spaceinvaders_final")


In [None]:
os.makedirs("checkpoints", exist_ok=True)

env = gym.make("ALE/SpaceInvaders-v5")
env = Monitor(env)

env = DummyVecEnv([lambda: env])

env = VecFrameStack(env, n_stack=4)

model = DQN("CnnPolicy", env, buffer_size=30000, verbose=1)

total_timesteps = 1_000_000
epoch_size = 1000
num_epochs = total_timesteps // epoch_size

for epoch in tqdm(range(1, num_epochs + 1), desc="Training Progress", ncols=100):
    model.learn(total_timesteps=epoch_size, reset_num_timesteps=False)
    
    if epoch % 50 == 0:
        model.save(f"checkpoints/ppo_spaceinvaders_epoch_{epoch}")

model.save("ppo_spaceinvaders_final")


----------------------------------
Training Progress:  91%|███████████████████████████████████▎   | 907/1000 [1:59:54<13:03,  8.43s/it]
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 507      |
|    ep_rew_mean      | 329      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1828     |
|    fps              | 120      |
|    time_elapsed     | 2        |
|    total_timesteps  | 907292   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.628    |
|    n_updates        | 214322   |
----------------------------------

In [3]:
env = gym.make("ALE/SpaceInvaders-v5")
env = Monitor(env)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, n_stack=4)

model = DQN.load("/home/plorenc/Desktop/AiR_ISS/ML/RL/checkpoints/ppo_spaceinvaders_epoch_10.zip", env=env)

total_timesteps = 1_000_000
epoch_size = 1000
num_epochs = total_timesteps // epoch_size

for epoch in tqdm(range(1, 1000 + 1), desc="Training Progress", ncols=100):
    model.learn(total_timesteps=epoch_size, reset_num_timesteps=False)
    
    if epoch % 50 == 0:
        model.save(f"checkpoints/ppo_spaceinvaders_epoch_{epoch}")

model.save("ppo_spaceinvaders_final")


model.save("checkpoints/ppo_spaceinvaders_epoch_150")


Training Progress:  46%|█████████████████                    | 460/1000 [1:05:14<1:19:18,  8.81s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 550      |
|    ep_rew_mean      | 360      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2820     |
|    fps              | 118      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1470296  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.796    |
|    n_updates        | 355073   |
----------------------------------


Training Progress:  46%|█████████████████                    | 462/1000 [1:05:31<1:17:49,  8.68s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 555      |
|    ep_rew_mean      | 368      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2824     |
|    fps              | 114      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1472692  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.32     |
|    n_updates        | 355672   |
----------------------------------


Training Progress:  46%|█████████████████▏                   | 463/1000 [1:05:40<1:17:58,  8.71s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 545      |
|    ep_rew_mean      | 358      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2828     |
|    fps              | 117      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1473962  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.38     |
|    n_updates        | 355990   |
----------------------------------


Training Progress:  47%|█████████████████▎                   | 467/1000 [1:06:14<1:16:40,  8.63s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 557      |
|    ep_rew_mean      | 368      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2832     |
|    fps              | 120      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1477148  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.647    |
|    n_updates        | 356786   |
----------------------------------


Training Progress:  47%|█████████████████▎                   | 468/1000 [1:06:23<1:16:17,  8.60s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 543      |
|    ep_rew_mean      | 357      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2836     |
|    fps              | 121      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1478754  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.342    |
|    n_updates        | 357188   |
----------------------------------


Training Progress:  47%|█████████████████▍                   | 470/1000 [1:06:39<1:14:22,  8.42s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 540      |
|    ep_rew_mean      | 354      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2840     |
|    fps              | 123      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1480388  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.64     |
|    n_updates        | 357596   |
----------------------------------


Training Progress:  47%|█████████████████▍                   | 472/1000 [1:06:56<1:14:00,  8.41s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 535      |
|    ep_rew_mean      | 349      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2844     |
|    fps              | 116      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1482400  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.619    |
|    n_updates        | 358099   |
----------------------------------


Training Progress:  47%|█████████████████▌                   | 474/1000 [1:07:13<1:14:35,  8.51s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 535      |
|    ep_rew_mean      | 350      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2848     |
|    fps              | 119      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1484267  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.45     |
|    n_updates        | 358566   |
----------------------------------


Training Progress:  48%|█████████████████▌                   | 476/1000 [1:07:32<1:18:15,  8.96s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 527      |
|    ep_rew_mean      | 344      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2852     |
|    fps              | 104      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1486165  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 9.73     |
|    n_updates        | 359041   |
----------------------------------


Training Progress:  48%|█████████████████▋                   | 479/1000 [1:07:58<1:15:51,  8.74s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 532      |
|    ep_rew_mean      | 344      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2856     |
|    fps              | 116      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1489088  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.798    |
|    n_updates        | 359771   |
----------------------------------


Training Progress:  48%|█████████████████▊                   | 481/1000 [1:08:15<1:15:00,  8.67s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 539      |
|    ep_rew_mean      | 347      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2860     |
|    fps              | 116      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1491568  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.743    |
|    n_updates        | 360391   |
----------------------------------


Training Progress:  48%|█████████████████▊                   | 483/1000 [1:08:33<1:14:50,  8.69s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 545      |
|    ep_rew_mean      | 354      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2864     |
|    fps              | 114      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1493467  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.61     |
|    n_updates        | 360866   |
----------------------------------


Training Progress:  48%|█████████████████▉                   | 485/1000 [1:08:50<1:14:25,  8.67s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 542      |
|    ep_rew_mean      | 348      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2868     |
|    fps              | 113      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1495505  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.61     |
|    n_updates        | 361376   |
----------------------------------


Training Progress:  49%|██████████████████                   | 487/1000 [1:09:07<1:14:09,  8.67s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 533      |
|    ep_rew_mean      | 340      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2872     |
|    fps              | 116      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1497304  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.96     |
|    n_updates        | 361825   |
----------------------------------


Training Progress:  49%|██████████████████                   | 488/1000 [1:09:16<1:13:48,  8.65s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 527      |
|    ep_rew_mean      | 334      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2876     |
|    fps              | 120      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1498332  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.88     |
|    n_updates        | 362082   |
----------------------------------


Training Progress:  49%|██████████████████                   | 489/1000 [1:09:24<1:12:49,  8.55s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 524      |
|    ep_rew_mean      | 329      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2880     |
|    fps              | 119      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1499435  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.567    |
|    n_updates        | 362358   |
----------------------------------


Training Progress:  49%|██████████████████▏                  | 492/1000 [1:09:49<1:10:31,  8.33s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 536      |
|    ep_rew_mean      | 337      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2884     |
|    fps              | 120      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1502308  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.13     |
|    n_updates        | 363076   |
----------------------------------


Training Progress:  49%|██████████████████▎                  | 494/1000 [1:10:06<1:11:25,  8.47s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 540      |
|    ep_rew_mean      | 340      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2888     |
|    fps              | 118      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1504871  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.89     |
|    n_updates        | 363717   |
----------------------------------


Training Progress:  50%|██████████████████▎                  | 496/1000 [1:10:23<1:10:51,  8.44s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 532      |
|    ep_rew_mean      | 332      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2892     |
|    fps              | 118      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1506714  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.492    |
|    n_updates        | 364178   |
----------------------------------


Training Progress:  50%|██████████████████▍                  | 499/1000 [1:10:48<1:10:17,  8.42s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 542      |
|    ep_rew_mean      | 339      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2896     |
|    fps              | 121      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1509956  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.544    |
|    n_updates        | 364988   |
----------------------------------


Training Progress:  50%|██████████████████▌                  | 501/1000 [1:11:05<1:10:23,  8.46s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 531      |
|    ep_rew_mean      | 329      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2900     |
|    fps              | 119      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1511606  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.682    |
|    n_updates        | 365401   |
----------------------------------


Training Progress:  50%|██████████████████▋                  | 504/1000 [1:11:30<1:09:23,  8.39s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 529      |
|    ep_rew_mean      | 329      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2904     |
|    fps              | 114      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1514154  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.708    |
|    n_updates        | 366038   |
----------------------------------


Training Progress:  51%|██████████████████▋                  | 506/1000 [1:11:48<1:09:51,  8.49s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 529      |
|    ep_rew_mean      | 331      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2908     |
|    fps              | 119      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1516600  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.12     |
|    n_updates        | 366649   |
----------------------------------


Training Progress:  51%|██████████████████▊                  | 508/1000 [1:12:05<1:10:19,  8.58s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 519      |
|    ep_rew_mean      | 323      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2912     |
|    fps              | 117      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1518357  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.925    |
|    n_updates        | 367089   |
----------------------------------


Training Progress:  51%|██████████████████▊                  | 510/1000 [1:12:22<1:09:20,  8.49s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 524      |
|    ep_rew_mean      | 329      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2916     |
|    fps              | 117      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1520747  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.962    |
|    n_updates        | 367686   |
----------------------------------


Training Progress:  51%|██████████████████▉                  | 513/1000 [1:12:47<1:08:15,  8.41s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 530      |
|    ep_rew_mean      | 332      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2920     |
|    fps              | 121      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1523265  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.781    |
|    n_updates        | 368316   |
----------------------------------


Training Progress:  52%|███████████████████                  | 515/1000 [1:13:03<1:07:27,  8.34s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 526      |
|    ep_rew_mean      | 326      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2924     |
|    fps              | 118      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1525281  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.651    |
|    n_updates        | 368820   |
----------------------------------


Training Progress:  52%|███████████████████                  | 516/1000 [1:13:12<1:07:27,  8.36s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 529      |
|    ep_rew_mean      | 331      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2928     |
|    fps              | 120      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1526850  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.209    |
|    n_updates        | 369212   |
----------------------------------


Training Progress:  52%|███████████████████▏                 | 518/1000 [1:13:29<1:07:16,  8.37s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 512      |
|    ep_rew_mean      | 319      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2932     |
|    fps              | 120      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1528327  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.522    |
|    n_updates        | 369581   |
----------------------------------


Training Progress:  52%|███████████████████▏                 | 520/1000 [1:13:45<1:07:13,  8.40s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 513      |
|    ep_rew_mean      | 322      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2936     |
|    fps              | 126      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1530068  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.1      |
|    n_updates        | 370016   |
----------------------------------


Training Progress:  52%|███████████████████▎                 | 521/1000 [1:13:54<1:06:37,  8.34s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 515      |
|    ep_rew_mean      | 327      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2940     |
|    fps              | 114      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1531858  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.07     |
|    n_updates        | 370464   |
----------------------------------


Training Progress:  52%|███████████████████▍                 | 524/1000 [1:14:19<1:06:23,  8.37s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 518      |
|    ep_rew_mean      | 328      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2944     |
|    fps              | 123      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1534176  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.69     |
|    n_updates        | 371043   |
----------------------------------


Training Progress:  53%|███████████████████▍                 | 526/1000 [1:14:36<1:06:05,  8.37s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 518      |
|    ep_rew_mean      | 328      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2948     |
|    fps              | 119      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1536117  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.37     |
|    n_updates        | 371529   |
----------------------------------


Training Progress:  53%|███████████████████▌                 | 528/1000 [1:14:52<1:05:32,  8.33s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 519      |
|    ep_rew_mean      | 330      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2952     |
|    fps              | 121      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1538107  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.565    |
|    n_updates        | 372026   |
----------------------------------


Training Progress:  53%|███████████████████▋                 | 531/1000 [1:15:17<1:04:34,  8.26s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 520      |
|    ep_rew_mean      | 327      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2956     |
|    fps              | 118      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1541041  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.505    |
|    n_updates        | 372760   |
----------------------------------


Training Progress:  53%|███████████████████▋                 | 532/1000 [1:15:25<1:04:25,  8.26s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 511      |
|    ep_rew_mean      | 322      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2960     |
|    fps              | 122      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1542714  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.08     |
|    n_updates        | 373178   |
----------------------------------


Training Progress:  53%|███████████████████▊                 | 534/1000 [1:15:42<1:04:06,  8.25s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 510      |
|    ep_rew_mean      | 319      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2964     |
|    fps              | 123      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1544517  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.594    |
|    n_updates        | 373629   |
----------------------------------


Training Progress:  54%|███████████████████▊                 | 536/1000 [1:15:58<1:04:19,  8.32s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 509      |
|    ep_rew_mean      | 318      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2968     |
|    fps              | 115      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1546372  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.13     |
|    n_updates        | 374092   |
----------------------------------


Training Progress:  54%|███████████████████▉                 | 539/1000 [1:16:25<1:06:40,  8.68s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 518      |
|    ep_rew_mean      | 324      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2972     |
|    fps              | 120      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1549153  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.929    |
|    n_updates        | 374788   |
----------------------------------


Training Progress:  54%|████████████████████                 | 541/1000 [1:16:43<1:06:47,  8.73s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 534      |
|    ep_rew_mean      | 339      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2976     |
|    fps              | 110      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1551758  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.93     |
|    n_updates        | 375439   |
----------------------------------


Training Progress:  54%|████████████████████                 | 543/1000 [1:17:00<1:07:16,  8.83s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 544      |
|    ep_rew_mean      | 351      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2980     |
|    fps              | 112      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1553852  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.76     |
|    n_updates        | 375962   |
----------------------------------


Training Progress:  55%|████████████████████▏                | 546/1000 [1:17:27<1:05:52,  8.71s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 544      |
|    ep_rew_mean      | 350      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2984     |
|    fps              | 116      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1556663  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.366    |
|    n_updates        | 376665   |
----------------------------------


Training Progress:  55%|████████████████████▎                | 548/1000 [1:17:44<1:04:39,  8.58s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 538      |
|    ep_rew_mean      | 347      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2988     |
|    fps              | 119      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1558653  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.653    |
|    n_updates        | 377163   |
----------------------------------


Training Progress:  55%|████████████████████▎                | 550/1000 [1:18:01<1:06:00,  8.80s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 542      |
|    ep_rew_mean      | 352      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2992     |
|    fps              | 120      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1560892  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.18     |
|    n_updates        | 377722   |
----------------------------------


Training Progress:  55%|████████████████████▍                | 553/1000 [1:18:26<1:03:19,  8.50s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 537      |
|    ep_rew_mean      | 348      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2996     |
|    fps              | 120      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1563703  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.726    |
|    n_updates        | 378425   |
----------------------------------


Training Progress:  56%|████████████████████▌                | 555/1000 [1:18:43<1:02:13,  8.39s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 542      |
|    ep_rew_mean      | 353      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3000     |
|    fps              | 119      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1565769  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.43     |
|    n_updates        | 378942   |
----------------------------------


Training Progress:  56%|████████████████████▋                | 558/1000 [1:19:08<1:01:47,  8.39s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 539      |
|    ep_rew_mean      | 350      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3004     |
|    fps              | 133      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1568016  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.19     |
|    n_updates        | 379503   |
----------------------------------


Training Progress:  56%|████████████████████▋                | 559/1000 [1:19:17<1:01:41,  8.39s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 533      |
|    ep_rew_mean      | 343      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3008     |
|    fps              | 120      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1569885  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.597    |
|    n_updates        | 379971   |
----------------------------------


Training Progress:  56%|████████████████████▊                | 561/1000 [1:19:33<1:01:12,  8.36s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 535      |
|    ep_rew_mean      | 345      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3012     |
|    fps              | 120      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1571868  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.76     |
|    n_updates        | 380466   |
----------------------------------


Training Progress:  56%|████████████████████▊                | 563/1000 [1:19:50<1:00:27,  8.30s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 529      |
|    ep_rew_mean      | 337      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3016     |
|    fps              | 120      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1573664  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.723    |
|    n_updates        | 380915   |
----------------------------------


Training Progress:  56%|████████████████████▉                | 565/1000 [1:20:07<1:00:26,  8.34s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 527      |
|    ep_rew_mean      | 339      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3020     |
|    fps              | 121      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1575920  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.481    |
|    n_updates        | 381479   |
----------------------------------


Training Progress:  57%|████████████████████▉                | 567/1000 [1:20:23<1:00:01,  8.32s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 525      |
|    ep_rew_mean      | 339      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3024     |
|    fps              | 122      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1577735  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.59     |
|    n_updates        | 381933   |
----------------------------------


Training Progress:  57%|██████████████████████▏                | 569/1000 [1:20:40<59:58,  8.35s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 523      |
|    ep_rew_mean      | 336      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3028     |
|    fps              | 119      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1579197  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.805    |
|    n_updates        | 382299   |
----------------------------------


Training Progress:  57%|█████████████████████▏               | 572/1000 [1:21:05<1:00:26,  8.47s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 539      |
|    ep_rew_mean      | 348      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3032     |
|    fps              | 118      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1582191  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.682    |
|    n_updates        | 383047   |
----------------------------------


Training Progress:  57%|██████████████████████▍                | 574/1000 [1:21:22<59:23,  8.37s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 548      |
|    ep_rew_mean      | 352      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3036     |
|    fps              | 121      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1584864  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.73     |
|    n_updates        | 383715   |
----------------------------------


Training Progress:  58%|██████████████████████▌                | 577/1000 [1:21:47<58:34,  8.31s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 553      |
|    ep_rew_mean      | 354      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3040     |
|    fps              | 122      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1587120  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.27     |
|    n_updates        | 384279   |
----------------------------------


Training Progress:  58%|██████████████████████▌                | 579/1000 [1:22:03<58:09,  8.29s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 551      |
|    ep_rew_mean      | 354      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3044     |
|    fps              | 122      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1589280  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.675    |
|    n_updates        | 384819   |
----------------------------------


Training Progress:  58%|██████████████████████▋                | 581/1000 [1:22:20<57:57,  8.30s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 549      |
|    ep_rew_mean      | 352      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3048     |
|    fps              | 127      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1591020  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.731    |
|    n_updates        | 385254   |
----------------------------------


Training Progress:  58%|██████████████████████▋                | 583/1000 [1:22:36<57:20,  8.25s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 552      |
|    ep_rew_mean      | 354      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3052     |
|    fps              | 120      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1593334  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.693    |
|    n_updates        | 385833   |
----------------------------------


Training Progress:  58%|██████████████████████▊                | 585/1000 [1:22:53<57:32,  8.32s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 549      |
|    ep_rew_mean      | 357      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3056     |
|    fps              | 121      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1595956  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.441    |
|    n_updates        | 386488   |
----------------------------------


Training Progress:  59%|██████████████████████▉                | 588/1000 [1:23:18<56:47,  8.27s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 559      |
|    ep_rew_mean      | 364      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3060     |
|    fps              | 120      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1598623  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.432    |
|    n_updates        | 387155   |
----------------------------------


Training Progress:  59%|███████████████████████                | 590/1000 [1:23:34<56:30,  8.27s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 560      |
|    ep_rew_mean      | 367      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3064     |
|    fps              | 121      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1600546  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.425    |
|    n_updates        | 387636   |
----------------------------------


Training Progress:  59%|███████████████████████                | 592/1000 [1:23:51<56:28,  8.30s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 559      |
|    ep_rew_mean      | 369      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3068     |
|    fps              | 120      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1602314  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.738    |
|    n_updates        | 388078   |
----------------------------------


Training Progress:  59%|███████████████████████▏               | 594/1000 [1:24:08<57:21,  8.48s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 556      |
|    ep_rew_mean      | 371      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3072     |
|    fps              | 113      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1604764  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.09     |
|    n_updates        | 388690   |
----------------------------------


Training Progress:  60%|███████████████████████▏               | 596/1000 [1:24:26<58:45,  8.73s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 549      |
|    ep_rew_mean      | 364      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3076     |
|    fps              | 111      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1606670  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.5      |
|    n_updates        | 389167   |
----------------------------------


Training Progress:  60%|███████████████████████▎               | 598/1000 [1:24:44<59:00,  8.81s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 545      |
|    ep_rew_mean      | 360      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3080     |
|    fps              | 118      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1608315  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.13     |
|    n_updates        | 389578   |
----------------------------------


Training Progress:  60%|███████████████████████▍               | 600/1000 [1:25:02<58:46,  8.82s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 541      |
|    ep_rew_mean      | 358      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3084     |
|    fps              | 125      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1610766  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.411    |
|    n_updates        | 390191   |
----------------------------------


Training Progress:  60%|███████████████████████▍               | 602/1000 [1:25:17<55:33,  8.37s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 541      |
|    ep_rew_mean      | 357      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3088     |
|    fps              | 118      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1612729  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.549    |
|    n_updates        | 390682   |
----------------------------------


Training Progress:  60%|███████████████████████▌               | 604/1000 [1:25:34<54:31,  8.26s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 539      |
|    ep_rew_mean      | 358      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3092     |
|    fps              | 127      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1614757  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.31     |
|    n_updates        | 391189   |
----------------------------------


Training Progress:  61%|███████████████████████▋               | 606/1000 [1:25:50<53:18,  8.12s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 531      |
|    ep_rew_mean      | 354      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3096     |
|    fps              | 123      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1616803  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.783    |
|    n_updates        | 391700   |
----------------------------------


Training Progress:  61%|███████████████████████▋               | 608/1000 [1:26:06<52:54,  8.10s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 529      |
|    ep_rew_mean      | 352      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3100     |
|    fps              | 125      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1618673  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.787    |
|    n_updates        | 392168   |
----------------------------------


Training Progress:  61%|███████████████████████▊               | 610/1000 [1:26:22<52:06,  8.02s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 525      |
|    ep_rew_mean      | 348      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3104     |
|    fps              | 127      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1620530  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.82     |
|    n_updates        | 392632   |
----------------------------------


Training Progress:  61%|███████████████████████▊               | 612/1000 [1:26:38<51:13,  7.92s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 528      |
|    ep_rew_mean      | 351      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3108     |
|    fps              | 126      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1622679  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.2      |
|    n_updates        | 393169   |
----------------------------------


Training Progress:  61%|███████████████████████▉               | 614/1000 [1:26:53<50:51,  7.91s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 527      |
|    ep_rew_mean      | 351      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3112     |
|    fps              | 122      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1624545  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.684    |
|    n_updates        | 393636   |
----------------------------------


Training Progress:  62%|████████████████████████               | 617/1000 [1:27:18<51:29,  8.07s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 543      |
|    ep_rew_mean      | 365      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3116     |
|    fps              | 117      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1627958  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.441    |
|    n_updates        | 394489   |
----------------------------------


Training Progress:  62%|████████████████████████▏              | 619/1000 [1:27:34<51:38,  8.13s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 540      |
|    ep_rew_mean      | 365      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3120     |
|    fps              | 126      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1629951  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.55     |
|    n_updates        | 394987   |
----------------------------------


Training Progress:  62%|████████████████████████▎              | 622/1000 [1:27:58<50:09,  7.96s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 545      |
|    ep_rew_mean      | 368      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3124     |
|    fps              | 127      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1632255  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.2      |
|    n_updates        | 395563   |
----------------------------------


Training Progress:  62%|████████████████████████▎              | 624/1000 [1:28:14<49:43,  7.94s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 551      |
|    ep_rew_mean      | 375      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3128     |
|    fps              | 125      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1634341  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.12     |
|    n_updates        | 396085   |
----------------------------------


Training Progress:  63%|████████████████████████▍              | 626/1000 [1:28:29<49:16,  7.91s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 545      |
|    ep_rew_mean      | 367      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3132     |
|    fps              | 127      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1636725  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.597    |
|    n_updates        | 396681   |
----------------------------------


Training Progress:  63%|████████████████████████▌              | 629/1000 [1:28:53<49:32,  8.01s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 542      |
|    ep_rew_mean      | 369      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3136     |
|    fps              | 115      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1639053  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.53     |
|    n_updates        | 397263   |
----------------------------------


Training Progress:  63%|████████████████████████▌              | 631/1000 [1:29:10<50:50,  8.27s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 544      |
|    ep_rew_mean      | 372      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3140     |
|    fps              | 116      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1641529  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.676    |
|    n_updates        | 397882   |
----------------------------------


Training Progress:  63%|████████████████████████▋              | 633/1000 [1:29:28<51:57,  8.49s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 540      |
|    ep_rew_mean      | 368      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3144     |
|    fps              | 112      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1643270  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.53     |
|    n_updates        | 398317   |
----------------------------------


Training Progress:  64%|████████████████████████▊              | 635/1000 [1:29:45<52:26,  8.62s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 546      |
|    ep_rew_mean      | 377      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3148     |
|    fps              | 118      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1645662  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.942    |
|    n_updates        | 398915   |
----------------------------------


Training Progress:  64%|████████████████████████▊              | 637/1000 [1:30:02<51:14,  8.47s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 546      |
|    ep_rew_mean      | 376      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3152     |
|    fps              | 119      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1647977  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.372    |
|    n_updates        | 399494   |
----------------------------------


Training Progress:  64%|████████████████████████▉              | 640/1000 [1:30:27<50:20,  8.39s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 541      |
|    ep_rew_mean      | 374      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3156     |
|    fps              | 120      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1650029  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.12     |
|    n_updates        | 400007   |
----------------------------------


Training Progress:  64%|█████████████████████████              | 642/1000 [1:30:44<49:59,  8.38s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 534      |
|    ep_rew_mean      | 369      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3160     |
|    fps              | 120      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1652033  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.362    |
|    n_updates        | 400508   |
----------------------------------


Training Progress:  64%|█████████████████████████              | 644/1000 [1:31:01<50:15,  8.47s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 535      |
|    ep_rew_mean      | 369      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3164     |
|    fps              | 108      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1654009  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.36     |
|    n_updates        | 401002   |
----------------------------------


Training Progress:  65%|█████████████████████████▏             | 646/1000 [1:31:18<50:34,  8.57s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 546      |
|    ep_rew_mean      | 378      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3168     |
|    fps              | 119      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1656912  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.14     |
|    n_updates        | 401727   |
----------------------------------


Training Progress:  65%|█████████████████████████▎             | 648/1000 [1:31:35<49:40,  8.47s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 538      |
|    ep_rew_mean      | 371      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3172     |
|    fps              | 118      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1658528  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.552    |
|    n_updates        | 402131   |
----------------------------------


Training Progress:  65%|█████████████████████████▎             | 650/1000 [1:31:52<50:10,  8.60s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 538      |
|    ep_rew_mean      | 371      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3176     |
|    fps              | 116      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1660444  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.39     |
|    n_updates        | 402610   |
----------------------------------


Training Progress:  65%|█████████████████████████▍             | 652/1000 [1:32:10<49:54,  8.60s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 545      |
|    ep_rew_mean      | 379      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3180     |
|    fps              | 117      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1662771  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.944    |
|    n_updates        | 403192   |
----------------------------------


Training Progress:  66%|█████████████████████████▌             | 655/1000 [1:32:35<49:01,  8.53s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 546      |
|    ep_rew_mean      | 380      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3184     |
|    fps              | 116      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1665339  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.64     |
|    n_updates        | 403834   |
----------------------------------


Training Progress:  66%|█████████████████████████▌             | 657/1000 [1:32:52<48:38,  8.51s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 551      |
|    ep_rew_mean      | 382      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3188     |
|    fps              | 119      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1667846  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.45     |
|    n_updates        | 404461   |
----------------------------------


Training Progress:  66%|█████████████████████████▋             | 660/1000 [1:33:17<47:43,  8.42s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 553      |
|    ep_rew_mean      | 381      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3192     |
|    fps              | 121      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1670035  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.42     |
|    n_updates        | 405008   |
----------------------------------


Training Progress:  66%|█████████████████████████▊             | 662/1000 [1:33:34<47:18,  8.40s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 552      |
|    ep_rew_mean      | 380      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3196     |
|    fps              | 125      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1672023  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.94     |
|    n_updates        | 405505   |
----------------------------------


Training Progress:  66%|█████████████████████████▊             | 663/1000 [1:33:42<47:09,  8.40s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 547      |
|    ep_rew_mean      | 374      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3200     |
|    fps              | 120      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1673374  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.678    |
|    n_updates        | 405843   |
----------------------------------


Training Progress:  66%|█████████████████████████▉             | 665/1000 [1:33:59<47:04,  8.43s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 547      |
|    ep_rew_mean      | 375      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3204     |
|    fps              | 121      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1675268  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.231    |
|    n_updates        | 406316   |
----------------------------------


Training Progress:  67%|██████████████████████████             | 667/1000 [1:34:16<46:27,  8.37s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 552      |
|    ep_rew_mean      | 380      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3208     |
|    fps              | 120      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1677900  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.34     |
|    n_updates        | 406974   |
----------------------------------


Training Progress:  67%|██████████████████████████             | 669/1000 [1:34:33<46:18,  8.40s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 552      |
|    ep_rew_mean      | 379      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3212     |
|    fps              | 119      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1679754  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.05     |
|    n_updates        | 407438   |
----------------------------------


Training Progress:  67%|██████████████████████████▏            | 672/1000 [1:34:58<45:44,  8.37s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 541      |
|    ep_rew_mean      | 369      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3216     |
|    fps              | 115      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1682009  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.725    |
|    n_updates        | 408002   |
----------------------------------


Training Progress:  67%|██████████████████████████▏            | 673/1000 [1:35:07<46:53,  8.60s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 532      |
|    ep_rew_mean      | 358      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3220     |
|    fps              | 108      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1683142  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.465    |
|    n_updates        | 408285   |
----------------------------------


Training Progress:  67%|██████████████████████████▎            | 674/1000 [1:35:16<47:24,  8.73s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 523      |
|    ep_rew_mean      | 349      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3224     |
|    fps              | 116      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1684566  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.269    |
|    n_updates        | 408641   |
----------------------------------


Training Progress:  68%|██████████████████████████▎            | 676/1000 [1:35:33<45:59,  8.52s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 522      |
|    ep_rew_mean      | 346      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3228     |
|    fps              | 122      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1686496  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.299    |
|    n_updates        | 409123   |
----------------------------------


Training Progress:  68%|██████████████████████████▍            | 678/1000 [1:35:49<44:52,  8.36s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 517      |
|    ep_rew_mean      | 342      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3232     |
|    fps              | 122      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1688413  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.4      |
|    n_updates        | 409603   |
----------------------------------


Training Progress:  68%|██████████████████████████▌            | 681/1000 [1:36:13<43:42,  8.22s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 527      |
|    ep_rew_mean      | 353      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3236     |
|    fps              | 122      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1691716  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.622    |
|    n_updates        | 410428   |
----------------------------------


Training Progress:  68%|██████████████████████████▋            | 683/1000 [1:36:30<43:30,  8.24s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 523      |
|    ep_rew_mean      | 350      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3240     |
|    fps              | 117      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1693800  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.667    |
|    n_updates        | 410949   |
----------------------------------


Training Progress:  69%|██████████████████████████▊            | 687/1000 [1:37:05<45:34,  8.74s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 537      |
|    ep_rew_mean      | 359      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3244     |
|    fps              | 131      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1697003  |
----------------------------------


Training Progress:  69%|██████████████████████████▉            | 690/1000 [1:37:30<43:18,  8.38s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 547      |
|    ep_rew_mean      | 361      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3248     |
|    fps              | 125      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1700393  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.46     |
|    n_updates        | 412598   |
----------------------------------


Training Progress:  69%|███████████████████████████            | 693/1000 [1:37:55<42:24,  8.29s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 552      |
|    ep_rew_mean      | 367      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3252     |
|    fps              | 124      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1703160  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.556    |
|    n_updates        | 413289   |
----------------------------------


Training Progress:  70%|███████████████████████████            | 695/1000 [1:38:11<42:10,  8.30s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 551      |
|    ep_rew_mean      | 365      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3256     |
|    fps              | 122      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1705153  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.441    |
|    n_updates        | 413788   |
----------------------------------


Training Progress:  70%|███████████████████████████▏           | 697/1000 [1:38:29<43:11,  8.55s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 553      |
|    ep_rew_mean      | 366      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3260     |
|    fps              | 116      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1707374  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.191    |
|    n_updates        | 414343   |
----------------------------------


Training Progress:  70%|███████████████████████████▎           | 700/1000 [1:38:54<42:57,  8.59s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 562      |
|    ep_rew_mean      | 373      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3264     |
|    fps              | 122      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1710205  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.8      |
|    n_updates        | 415051   |
----------------------------------


Training Progress:  70%|███████████████████████████▍           | 702/1000 [1:39:11<41:51,  8.43s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 551      |
|    ep_rew_mean      | 362      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3268     |
|    fps              | 126      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1712048  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.618    |
|    n_updates        | 415511   |
----------------------------------


Training Progress:  70%|███████████████████████████▍           | 704/1000 [1:39:28<41:16,  8.37s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 558      |
|    ep_rew_mean      | 366      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3272     |
|    fps              | 122      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1714288  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.44     |
|    n_updates        | 416071   |
----------------------------------


Training Progress:  71%|███████████████████████████▌           | 706/1000 [1:39:44<40:47,  8.33s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 563      |
|    ep_rew_mean      | 369      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3276     |
|    fps              | 122      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1716725  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.386    |
|    n_updates        | 416681   |
----------------------------------


Training Progress:  71%|███████████████████████████▌           | 708/1000 [1:40:01<40:19,  8.29s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 560      |
|    ep_rew_mean      | 364      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3280     |
|    fps              | 120      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1718820  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.612    |
|    n_updates        | 417204   |
----------------------------------


Training Progress:  71%|███████████████████████████▋           | 710/1000 [1:40:17<40:08,  8.31s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 553      |
|    ep_rew_mean      | 357      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3284     |
|    fps              | 120      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1720591  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.894    |
|    n_updates        | 417647   |
----------------------------------


Training Progress:  71%|███████████████████████████▊           | 712/1000 [1:40:34<40:00,  8.34s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 544      |
|    ep_rew_mean      | 354      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3288     |
|    fps              | 120      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1722199  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.588    |
|    n_updates        | 418049   |
----------------------------------


Training Progress:  71%|███████████████████████████▊           | 714/1000 [1:40:51<39:47,  8.35s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 548      |
|    ep_rew_mean      | 358      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3292     |
|    fps              | 120      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1724838  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.02     |
|    n_updates        | 418709   |
----------------------------------


Training Progress:  72%|███████████████████████████▉           | 717/1000 [1:41:15<38:58,  8.26s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 551      |
|    ep_rew_mean      | 361      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3296     |
|    fps              | 121      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1727120  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.472    |
|    n_updates        | 419279   |
----------------------------------


Training Progress:  72%|████████████████████████████           | 719/1000 [1:41:32<38:52,  8.30s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 563      |
|    ep_rew_mean      | 370      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3300     |
|    fps              | 120      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1729632  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.19     |
|    n_updates        | 419907   |
----------------------------------


Training Progress:  72%|████████████████████████████           | 721/1000 [1:41:49<38:39,  8.31s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 561      |
|    ep_rew_mean      | 370      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3304     |
|    fps              | 121      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1731390  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.587    |
|    n_updates        | 420347   |
----------------------------------


Training Progress:  72%|████████████████████████████▏          | 723/1000 [1:42:05<38:07,  8.26s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 558      |
|    ep_rew_mean      | 368      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3308     |
|    fps              | 120      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1733731  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.356    |
|    n_updates        | 420932   |
----------------------------------


Training Progress:  73%|████████████████████████████▎          | 726/1000 [1:42:30<37:46,  8.27s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 565      |
|    ep_rew_mean      | 375      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3312     |
|    fps              | 122      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1736244  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.19     |
|    n_updates        | 421560   |
----------------------------------


Training Progress:  73%|████████████████████████████▍          | 729/1000 [1:42:55<37:27,  8.29s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 572      |
|    ep_rew_mean      | 383      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3316     |
|    fps              | 120      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1739225  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.723    |
|    n_updates        | 422306   |
----------------------------------


Training Progress:  73%|████████████████████████████▌          | 732/1000 [1:43:20<37:11,  8.33s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 590      |
|    ep_rew_mean      | 400      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3320     |
|    fps              | 124      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1742107  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.642    |
|    n_updates        | 423026   |
----------------------------------


Training Progress:  73%|████████████████████████████▋          | 734/1000 [1:43:36<36:45,  8.29s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 602      |
|    ep_rew_mean      | 410      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3324     |
|    fps              | 118      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1744756  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.568    |
|    n_updates        | 423688   |
----------------------------------


Training Progress:  74%|████████████████████████████▊          | 738/1000 [1:44:09<35:10,  8.05s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 616      |
|    ep_rew_mean      | 420      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3328     |
|    fps              | 130      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1748056  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.805    |
|    n_updates        | 424513   |
----------------------------------


Training Progress:  74%|████████████████████████████▊          | 740/1000 [1:44:24<34:30,  7.96s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 617      |
|    ep_rew_mean      | 423      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3332     |
|    fps              | 125      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1750141  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.701    |
|    n_updates        | 425035   |
----------------------------------


Training Progress:  74%|████████████████████████████▉          | 742/1000 [1:44:40<34:05,  7.93s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 611      |
|    ep_rew_mean      | 414      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3336     |
|    fps              | 128      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1752797  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.786    |
|    n_updates        | 425699   |
----------------------------------


Training Progress:  74%|█████████████████████████████          | 745/1000 [1:45:04<33:22,  7.85s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 619      |
|    ep_rew_mean      | 419      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3340     |
|    fps              | 128      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1755687  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.424    |
|    n_updates        | 426421   |
----------------------------------


Training Progress:  75%|█████████████████████████████▏         | 748/1000 [1:45:27<32:57,  7.85s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 613      |
|    ep_rew_mean      | 420      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3344     |
|    fps              | 129      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1758302  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.12     |
|    n_updates        | 427075   |
----------------------------------


Training Progress:  75%|█████████████████████████████▎         | 751/1000 [1:45:51<32:55,  7.94s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 612      |
|    ep_rew_mean      | 417      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3348     |
|    fps              | 128      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1761566  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.764    |
|    n_updates        | 427891   |
----------------------------------


Training Progress:  75%|█████████████████████████████▎         | 753/1000 [1:46:07<32:27,  7.88s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 605      |
|    ep_rew_mean      | 414      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3352     |
|    fps              | 128      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1763694  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.77     |
|    n_updates        | 428423   |
----------------------------------


Training Progress:  76%|█████████████████████████████▍         | 756/1000 [1:46:30<32:00,  7.87s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 612      |
|    ep_rew_mean      | 416      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3356     |
|    fps              | 125      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1766337  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.561    |
|    n_updates        | 429084   |
----------------------------------


Training Progress:  76%|█████████████████████████████▌         | 758/1000 [1:46:46<31:51,  7.90s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 613      |
|    ep_rew_mean      | 418      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3360     |
|    fps              | 128      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1768662  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.523    |
|    n_updates        | 429665   |
----------------------------------


Training Progress:  76%|█████████████████████████████▋         | 761/1000 [1:47:10<31:22,  7.87s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 617      |
|    ep_rew_mean      | 418      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3364     |
|    fps              | 126      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1771873  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.651    |
|    n_updates        | 430468   |
----------------------------------


Training Progress:  76%|█████████████████████████████▊         | 765/1000 [1:47:42<30:56,  7.90s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 633      |
|    ep_rew_mean      | 430      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3368     |
|    fps              | 129      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1775381  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.3      |
|    n_updates        | 431345   |
----------------------------------


Training Progress:  77%|█████████████████████████████▉         | 767/1000 [1:47:57<30:39,  7.89s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 629      |
|    ep_rew_mean      | 430      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3372     |
|    fps              | 128      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1777224  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.87     |
|    n_updates        | 431805   |
----------------------------------


Training Progress:  77%|█████████████████████████████▉         | 769/1000 [1:48:13<30:21,  7.89s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 627      |
|    ep_rew_mean      | 430      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3376     |
|    fps              | 128      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1779470  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.306    |
|    n_updates        | 432367   |
----------------------------------


Training Progress:  77%|██████████████████████████████         | 770/1000 [1:48:21<30:13,  7.88s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 621      |
|    ep_rew_mean      | 424      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3380     |
|    fps              | 126      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1780900  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.59     |
|    n_updates        | 432724   |
----------------------------------


Training Progress:  77%|██████████████████████████████         | 772/1000 [1:48:37<29:59,  7.89s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 621      |
|    ep_rew_mean      | 427      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3384     |
|    fps              | 127      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1782690  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.208    |
|    n_updates        | 433172   |
----------------------------------


Training Progress:  77%|██████████████████████████████▏        | 774/1000 [1:48:52<29:38,  7.87s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 624      |
|    ep_rew_mean      | 427      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3388     |
|    fps              | 127      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1784561  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.401    |
|    n_updates        | 433640   |
----------------------------------


Training Progress:  78%|██████████████████████████████▎        | 776/1000 [1:49:08<29:28,  7.90s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 618      |
|    ep_rew_mean      | 424      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3392     |
|    fps              | 127      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1786668  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.42     |
|    n_updates        | 434166   |
----------------------------------


Training Progress:  78%|██████████████████████████████▎        | 778/1000 [1:49:24<29:06,  7.87s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 616      |
|    ep_rew_mean      | 419      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3396     |
|    fps              | 126      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1788737  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.527    |
|    n_updates        | 434684   |
----------------------------------


Training Progress:  78%|██████████████████████████████▍        | 780/1000 [1:49:40<28:49,  7.86s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 612      |
|    ep_rew_mean      | 419      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3400     |
|    fps              | 123      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1790787  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.85     |
|    n_updates        | 435196   |
----------------------------------


Training Progress:  78%|██████████████████████████████▍        | 782/1000 [1:49:56<29:22,  8.08s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 608      |
|    ep_rew_mean      | 414      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3404     |
|    fps              | 113      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1792213  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.69     |
|    n_updates        | 435553   |
----------------------------------


Training Progress:  78%|██████████████████████████████▌        | 784/1000 [1:50:13<29:59,  8.33s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 606      |
|    ep_rew_mean      | 410      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3408     |
|    fps              | 120      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1794288  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.03     |
|    n_updates        | 436071   |
----------------------------------


Training Progress:  79%|██████████████████████████████▋        | 786/1000 [1:50:30<29:57,  8.40s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 598      |
|    ep_rew_mean      | 402      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3412     |
|    fps              | 122      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1796087  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.563    |
|    n_updates        | 436521   |
----------------------------------


Training Progress:  79%|██████████████████████████████▋        | 788/1000 [1:50:47<29:44,  8.42s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 594      |
|    ep_rew_mean      | 397      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3416     |
|    fps              | 120      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1798639  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.86     |
|    n_updates        | 437159   |
----------------------------------


Training Progress:  79%|██████████████████████████████▊        | 790/1000 [1:51:04<29:15,  8.36s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 583      |
|    ep_rew_mean      | 386      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3420     |
|    fps              | 121      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1800373  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.491    |
|    n_updates        | 437593   |
----------------------------------


Training Progress:  79%|██████████████████████████████▉        | 792/1000 [1:51:20<28:53,  8.33s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 575      |
|    ep_rew_mean      | 381      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3424     |
|    fps              | 120      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1802214  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.681    |
|    n_updates        | 438053   |
----------------------------------


Training Progress:  80%|███████████████████████████████        | 795/1000 [1:51:46<28:50,  8.44s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 570      |
|    ep_rew_mean      | 379      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3428     |
|    fps              | 116      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1805014  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.651    |
|    n_updates        | 438753   |
----------------------------------


Training Progress:  80%|███████████████████████████████        | 797/1000 [1:52:03<28:57,  8.56s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 570      |
|    ep_rew_mean      | 379      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3432     |
|    fps              | 113      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1807149  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.702    |
|    n_updates        | 439287   |
----------------------------------


Training Progress:  80%|███████████████████████████████        | 798/1000 [1:52:12<28:55,  8.59s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 558      |
|    ep_rew_mean      | 367      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3436     |
|    fps              | 115      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1808617  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.1      |
|    n_updates        | 439654   |
----------------------------------


Training Progress:  80%|███████████████████████████████▏       | 801/1000 [1:52:38<28:22,  8.55s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 557      |
|    ep_rew_mean      | 364      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3440     |
|    fps              | 119      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1811340  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.659    |
|    n_updates        | 440334   |
----------------------------------


Training Progress:  80%|███████████████████████████████▎       | 802/1000 [1:52:46<28:08,  8.53s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 542      |
|    ep_rew_mean      | 348      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3444     |
|    fps              | 118      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1812457  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.526    |
|    n_updates        | 440614   |
----------------------------------


Training Progress:  80%|███████████████████████████████▎       | 804/1000 [1:53:03<27:27,  8.41s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 527      |
|    ep_rew_mean      | 340      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3448     |
|    fps              | 121      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1814255  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.83     |
|    n_updates        | 441063   |
----------------------------------


Training Progress:  80%|███████████████████████████████▍       | 805/1000 [1:53:11<27:14,  8.38s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 518      |
|    ep_rew_mean      | 328      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3452     |
|    fps              | 117      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1815477  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.534    |
|    n_updates        | 441369   |
----------------------------------


Training Progress:  81%|███████████████████████████████▍       | 807/1000 [1:53:28<27:29,  8.54s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 511      |
|    ep_rew_mean      | 323      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3456     |
|    fps              | 113      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1817394  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.54     |
|    n_updates        | 441848   |
----------------------------------


Training Progress:  81%|███████████████████████████████▌       | 809/1000 [1:53:46<27:26,  8.62s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 511      |
|    ep_rew_mean      | 323      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3460     |
|    fps              | 118      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1819744  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.4      |
|    n_updates        | 442435   |
----------------------------------


Training Progress:  81%|███████████████████████████████▋       | 811/1000 [1:54:03<26:50,  8.52s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 491      |
|    ep_rew_mean      | 310      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3464     |
|    fps              | 123      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1821016  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.84     |
|    n_updates        | 442753   |
----------------------------------


Training Progress:  81%|███████████████████████████████▋       | 812/1000 [1:54:11<26:31,  8.46s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 475      |
|    ep_rew_mean      | 302      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3468     |
|    fps              | 119      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1822913  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.611    |
|    n_updates        | 443228   |
----------------------------------


Training Progress:  82%|███████████████████████████████▊       | 816/1000 [1:54:45<25:52,  8.44s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 490      |
|    ep_rew_mean      | 311      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3472     |
|    fps              | 120      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1826193  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.782    |
|    n_updates        | 444048   |
----------------------------------


Training Progress:  82%|███████████████████████████████▉       | 818/1000 [1:55:02<25:42,  8.47s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 487      |
|    ep_rew_mean      | 307      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3476     |
|    fps              | 117      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1828217  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.248    |
|    n_updates        | 444554   |
----------------------------------


Training Progress:  82%|███████████████████████████████▉       | 820/1000 [1:55:19<26:12,  8.73s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 496      |
|    ep_rew_mean      | 314      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3480     |
|    fps              | 112      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1830464  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.883    |
|    n_updates        | 445115   |
----------------------------------


Training Progress:  82%|████████████████████████████████       | 822/1000 [1:55:37<25:40,  8.65s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 502      |
|    ep_rew_mean      | 318      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3484     |
|    fps              | 117      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1832879  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.8      |
|    n_updates        | 445719   |
----------------------------------


Training Progress:  82%|████████████████████████████████▏      | 824/1000 [1:55:54<25:03,  8.54s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 501      |
|    ep_rew_mean      | 316      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3488     |
|    fps              | 118      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1834688  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.595    |
|    n_updates        | 446171   |
----------------------------------


Training Progress:  82%|████████████████████████████████▏      | 825/1000 [1:56:02<24:50,  8.52s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 493      |
|    ep_rew_mean      | 306      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3492     |
|    fps              | 119      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1835936  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.36     |
|    n_updates        | 446483   |
----------------------------------


Training Progress:  83%|████████████████████████████████▎      | 827/1000 [1:56:20<25:05,  8.70s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 492      |
|    ep_rew_mean      | 312      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3496     |
|    fps              | 108      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1837914  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.368    |
|    n_updates        | 446978   |
----------------------------------


Training Progress:  83%|████████████████████████████████▎      | 829/1000 [1:56:37<25:02,  8.79s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 488      |
|    ep_rew_mean      | 306      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3500     |
|    fps              | 121      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1839573  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.44     |
|    n_updates        | 447393   |
----------------------------------


Training Progress:  83%|████████████████████████████████▎      | 830/1000 [1:56:46<24:33,  8.67s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 486      |
|    ep_rew_mean      | 304      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3504     |
|    fps              | 118      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1840810  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.96     |
|    n_updates        | 447702   |
----------------------------------


Training Progress:  83%|████████████████████████████████▍      | 833/1000 [1:57:11<23:48,  8.55s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 490      |
|    ep_rew_mean      | 308      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3508     |
|    fps              | 117      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1843256  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.631    |
|    n_updates        | 448313   |
----------------------------------


Training Progress:  84%|████████████████████████████████▌      | 835/1000 [1:57:29<23:56,  8.70s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 490      |
|    ep_rew_mean      | 309      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3512     |
|    fps              | 108      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1845105  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.17     |
|    n_updates        | 448776   |
----------------------------------


Training Progress:  84%|████████████████████████████████▋      | 837/1000 [1:57:47<24:16,  8.93s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 486      |
|    ep_rew_mean      | 308      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3516     |
|    fps              | 104      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1847221  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.444    |
|    n_updates        | 449305   |
----------------------------------


Training Progress:  84%|████████████████████████████████▋      | 839/1000 [1:58:06<24:19,  9.06s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 495      |
|    ep_rew_mean      | 319      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3520     |
|    fps              | 109      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1849910  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.29     |
|    n_updates        | 449977   |
----------------------------------


Training Progress:  84%|████████████████████████████████▊      | 841/1000 [1:58:24<24:12,  9.13s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 495      |
|    ep_rew_mean      | 318      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3524     |
|    fps              | 108      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1851711  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.01     |
|    n_updates        | 450427   |
----------------------------------


Training Progress:  84%|████████████████████████████████▉      | 844/1000 [1:58:51<23:35,  9.07s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 494      |
|    ep_rew_mean      | 315      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3528     |
|    fps              | 116      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1854424  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.935    |
|    n_updates        | 451105   |
----------------------------------


Training Progress:  85%|████████████████████████████████▉      | 846/1000 [1:59:08<22:39,  8.83s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 494      |
|    ep_rew_mean      | 316      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3532     |
|    fps              | 122      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1856596  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.391    |
|    n_updates        | 451648   |
----------------------------------


Training Progress:  85%|█████████████████████████████████      | 848/1000 [1:59:25<21:32,  8.50s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 501      |
|    ep_rew_mean      | 327      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3536     |
|    fps              | 122      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1858747  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.41     |
|    n_updates        | 452186   |
----------------------------------


Training Progress:  85%|█████████████████████████████████▏     | 851/1000 [1:59:50<21:10,  8.53s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 497      |
|    ep_rew_mean      | 326      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3540     |
|    fps              | 121      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1861038  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3        |
|    n_updates        | 452759   |
----------------------------------


Training Progress:  85%|█████████████████████████████████▎     | 853/1000 [2:00:07<20:50,  8.51s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 513      |
|    ep_rew_mean      | 343      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3544     |
|    fps              | 116      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1863772  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.259    |
|    n_updates        | 453442   |
----------------------------------


Training Progress:  86%|█████████████████████████████████▍     | 857/1000 [2:00:41<20:09,  8.46s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 528      |
|    ep_rew_mean      | 354      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3548     |
|    fps              | 118      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1867037  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.709    |
|    n_updates        | 454259   |
----------------------------------


Training Progress:  86%|█████████████████████████████████▌     | 859/1000 [2:00:58<19:48,  8.43s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 537      |
|    ep_rew_mean      | 362      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3552     |
|    fps              | 120      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1869145  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.435    |
|    n_updates        | 454786   |
----------------------------------


Training Progress:  86%|█████████████████████████████████▌     | 861/1000 [2:01:14<19:26,  8.39s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 544      |
|    ep_rew_mean      | 369      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3556     |
|    fps              | 118      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1871756  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.709    |
|    n_updates        | 455438   |
----------------------------------


Training Progress:  86%|█████████████████████████████████▋     | 863/1000 [2:01:31<19:01,  8.33s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 542      |
|    ep_rew_mean      | 367      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3560     |
|    fps              | 124      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1873942  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.8      |
|    n_updates        | 455985   |
----------------------------------


Training Progress:  86%|█████████████████████████████████▋     | 865/1000 [2:01:47<18:26,  8.19s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 546      |
|    ep_rew_mean      | 372      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3564     |
|    fps              | 123      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1875577  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.912    |
|    n_updates        | 456394   |
----------------------------------


Training Progress:  87%|█████████████████████████████████▊     | 867/1000 [2:02:03<18:02,  8.14s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 545      |
|    ep_rew_mean      | 367      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3568     |
|    fps              | 125      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1877374  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.76     |
|    n_updates        | 456843   |
----------------------------------


Training Progress:  87%|█████████████████████████████████▉     | 869/1000 [2:02:20<17:47,  8.15s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 537      |
|    ep_rew_mean      | 365      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3572     |
|    fps              | 119      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1879879  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.708    |
|    n_updates        | 457469   |
----------------------------------


Training Progress:  87%|█████████████████████████████████▉     | 871/1000 [2:02:36<17:44,  8.25s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 535      |
|    ep_rew_mean      | 363      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3576     |
|    fps              | 124      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1881729  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.489    |
|    n_updates        | 457932   |
----------------------------------


Training Progress:  87%|██████████████████████████████████     | 873/1000 [2:02:53<17:20,  8.19s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 530      |
|    ep_rew_mean      | 358      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3580     |
|    fps              | 123      |
|    time_elapsed     | 4        |
|    total_timesteps  | 1883508  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.456    |
|    n_updates        | 458376   |
----------------------------------


Training Progress:  88%|██████████████████████████████████▏    | 876/1000 [2:03:17<16:44,  8.10s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 534      |
|    ep_rew_mean      | 361      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3584     |
|    fps              | 124      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1886270  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.363    |
|    n_updates        | 459067   |
----------------------------------


Training Progress:  88%|██████████████████████████████████▏    | 878/1000 [2:03:33<16:28,  8.10s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 534      |
|    ep_rew_mean      | 362      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3588     |
|    fps              | 124      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1888090  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.54     |
|    n_updates        | 459522   |
----------------------------------


Training Progress:  88%|██████████████████████████████████▎    | 880/1000 [2:03:50<16:27,  8.23s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 550      |
|    ep_rew_mean      | 377      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3592     |
|    fps              | 121      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1890943  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.552    |
|    n_updates        | 460235   |
----------------------------------


Training Progress:  88%|██████████████████████████████████▍    | 882/1000 [2:04:06<16:04,  8.18s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 551      |
|    ep_rew_mean      | 375      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3596     |
|    fps              | 121      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1892976  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.332    |
|    n_updates        | 460743   |
----------------------------------


Training Progress:  88%|██████████████████████████████████▌    | 885/1000 [2:04:30<15:35,  8.14s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 555      |
|    ep_rew_mean      | 378      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3600     |
|    fps              | 124      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1895085  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.252    |
|    n_updates        | 461271   |
----------------------------------


Training Progress:  89%|██████████████████████████████████▌    | 887/1000 [2:04:46<15:13,  8.08s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 569      |
|    ep_rew_mean      | 389      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3604     |
|    fps              | 124      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1897676  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.476    |
|    n_updates        | 461918   |
----------------------------------


Training Progress:  89%|██████████████████████████████████▋    | 889/1000 [2:05:03<14:59,  8.10s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 564      |
|    ep_rew_mean      | 385      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3608     |
|    fps              | 122      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1899705  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.59     |
|    n_updates        | 462426   |
----------------------------------


Training Progress:  89%|██████████████████████████████████▋    | 891/1000 [2:05:19<14:41,  8.09s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 565      |
|    ep_rew_mean      | 386      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3612     |
|    fps              | 124      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1901625  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.1      |
|    n_updates        | 462906   |
----------------------------------


Training Progress:  89%|██████████████████████████████████▊    | 893/1000 [2:05:35<14:21,  8.05s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 567      |
|    ep_rew_mean      | 387      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3616     |
|    fps              | 123      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1903879  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.241    |
|    n_updates        | 463469   |
----------------------------------


Training Progress:  90%|██████████████████████████████████▉    | 896/1000 [2:05:59<13:58,  8.07s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 568      |
|    ep_rew_mean      | 386      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3620     |
|    fps              | 125      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1906687  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.285    |
|    n_updates        | 464171   |
----------------------------------


Training Progress:  90%|███████████████████████████████████    | 900/1000 [2:06:32<13:40,  8.20s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 584      |
|    ep_rew_mean      | 398      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3624     |
|    fps              | 125      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1910156  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.942    |
|    n_updates        | 465038   |
----------------------------------


Training Progress:  90%|███████████████████████████████████▏   | 902/1000 [2:06:48<13:17,  8.14s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 578      |
|    ep_rew_mean      | 395      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3628     |
|    fps              | 125      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1912242  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.443    |
|    n_updates        | 465560   |
----------------------------------


Training Progress:  90%|███████████████████████████████████▏   | 903/1000 [2:06:56<13:06,  8.11s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 571      |
|    ep_rew_mean      | 388      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3632     |
|    fps              | 123      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1913688  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.09     |
|    n_updates        | 465921   |
----------------------------------


Training Progress:  90%|███████████████████████████████████▎   | 905/1000 [2:07:12<12:49,  8.10s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 567      |
|    ep_rew_mean      | 382      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3636     |
|    fps              | 124      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1915470  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.715    |
|    n_updates        | 466367   |
----------------------------------


Training Progress:  91%|███████████████████████████████████▎   | 907/1000 [2:07:29<12:38,  8.16s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 567      |
|    ep_rew_mean      | 381      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3640     |
|    fps              | 122      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1917751  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.479    |
|    n_updates        | 466937   |
----------------------------------


Training Progress:  91%|███████████████████████████████████▍   | 909/1000 [2:07:45<12:21,  8.15s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 560      |
|    ep_rew_mean      | 371      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3644     |
|    fps              | 123      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1919725  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.06     |
|    n_updates        | 467431   |
----------------------------------


Training Progress:  91%|███████████████████████████████████▌   | 911/1000 [2:08:01<12:03,  8.13s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 540      |
|    ep_rew_mean      | 356      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3648     |
|    fps              | 136      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1921002  |
----------------------------------


Training Progress:  91%|███████████████████████████████████▌   | 912/1000 [2:08:10<12:04,  8.23s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 537      |
|    ep_rew_mean      | 353      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3652     |
|    fps              | 122      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1922881  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.469    |
|    n_updates        | 468220   |
----------------------------------


Training Progress:  91%|███████████████████████████████████▋   | 914/1000 [2:08:26<11:44,  8.20s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 529      |
|    ep_rew_mean      | 346      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3656     |
|    fps              | 124      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1924695  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.893    |
|    n_updates        | 468673   |
----------------------------------


Training Progress:  92%|███████████████████████████████████▋   | 916/1000 [2:08:42<11:23,  8.14s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 529      |
|    ep_rew_mean      | 348      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3660     |
|    fps              | 123      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1926878  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.34     |
|    n_updates        | 469219   |
----------------------------------


Training Progress:  92%|███████████████████████████████████▊   | 918/1000 [2:08:58<11:08,  8.15s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 533      |
|    ep_rew_mean      | 352      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3664     |
|    fps              | 123      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1928889  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.73     |
|    n_updates        | 469722   |
----------------------------------


Training Progress:  92%|███████████████████████████████████▉   | 921/1000 [2:09:23<10:51,  8.25s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 539      |
|    ep_rew_mean      | 358      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3668     |
|    fps              | 118      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1931237  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.788    |
|    n_updates        | 470309   |
----------------------------------


Training Progress:  92%|████████████████████████████████████   | 924/1000 [2:09:48<10:29,  8.29s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 542      |
|    ep_rew_mean      | 356      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3672     |
|    fps              | 122      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1934080  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.593    |
|    n_updates        | 471019   |
----------------------------------


Training Progress:  93%|████████████████████████████████████   | 926/1000 [2:10:04<10:07,  8.21s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 548      |
|    ep_rew_mean      | 359      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3676     |
|    fps              | 123      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1936489  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.588    |
|    n_updates        | 471622   |
----------------------------------


Training Progress:  93%|████████████████████████████████████▏  | 928/1000 [2:10:21<09:49,  8.19s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 546      |
|    ep_rew_mean      | 357      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3680     |
|    fps              | 123      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1938082  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.39     |
|    n_updates        | 472020   |
----------------------------------


Training Progress:  93%|████████████████████████████████████▎  | 930/1000 [2:10:37<09:31,  8.16s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 537      |
|    ep_rew_mean      | 349      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3684     |
|    fps              | 131      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1940011  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.541    |
|    n_updates        | 472502   |
----------------------------------


Training Progress:  93%|████████████████████████████████████▎  | 932/1000 [2:10:53<09:12,  8.13s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 547      |
|    ep_rew_mean      | 356      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3688     |
|    fps              | 120      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1942754  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.6      |
|    n_updates        | 473188   |
----------------------------------


Training Progress:  93%|████████████████████████████████████▍  | 934/1000 [2:11:10<09:00,  8.19s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 535      |
|    ep_rew_mean      | 346      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3692     |
|    fps              | 124      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1944481  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.71     |
|    n_updates        | 473620   |
----------------------------------


Training Progress:  94%|████████████████████████████████████▌  | 936/1000 [2:11:26<08:42,  8.16s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 539      |
|    ep_rew_mean      | 348      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3696     |
|    fps              | 123      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1946899  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.47     |
|    n_updates        | 474224   |
----------------------------------


Training Progress:  94%|████████████████████████████████████▌  | 938/1000 [2:11:42<08:27,  8.18s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 537      |
|    ep_rew_mean      | 346      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3700     |
|    fps              | 122      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1948747  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.495    |
|    n_updates        | 474686   |
----------------------------------


Training Progress:  94%|████████████████████████████████████▋  | 940/1000 [2:11:59<08:06,  8.12s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 533      |
|    ep_rew_mean      | 346      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3704     |
|    fps              | 125      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1950981  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.37     |
|    n_updates        | 475245   |
----------------------------------


Training Progress:  94%|████████████████████████████████████▊  | 943/1000 [2:12:23<07:45,  8.17s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 536      |
|    ep_rew_mean      | 350      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3708     |
|    fps              | 119      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1953310  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.518    |
|    n_updates        | 475827   |
----------------------------------


Training Progress:  94%|████████████████████████████████████▊  | 945/1000 [2:12:40<07:32,  8.22s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 543      |
|    ep_rew_mean      | 354      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3712     |
|    fps              | 124      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1955881  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.26     |
|    n_updates        | 476470   |
----------------------------------


Training Progress:  95%|████████████████████████████████████▉  | 948/1000 [2:13:04<07:01,  8.12s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 545      |
|    ep_rew_mean      | 355      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3716     |
|    fps              | 122      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1958405  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.659    |
|    n_updates        | 477101   |
----------------------------------


Training Progress:  95%|█████████████████████████████████████  | 950/1000 [2:13:21<06:53,  8.27s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 536      |
|    ep_rew_mean      | 347      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3720     |
|    fps              | 125      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1960297  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.89     |
|    n_updates        | 477574   |
----------------------------------


Training Progress:  95%|█████████████████████████████████████▏ | 953/1000 [2:13:45<06:21,  8.12s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 528      |
|    ep_rew_mean      | 345      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3724     |
|    fps              | 204      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1963004  |
----------------------------------


Training Progress:  96%|█████████████████████████████████████▏ | 955/1000 [2:14:01<06:04,  8.10s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 531      |
|    ep_rew_mean      | 346      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3728     |
|    fps              | 124      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1965330  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.285    |
|    n_updates        | 478832   |
----------------------------------


Training Progress:  96%|█████████████████████████████████████▎ | 958/1000 [2:14:25<05:39,  8.09s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 544      |
|    ep_rew_mean      | 358      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3732     |
|    fps              | 130      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1968079  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.48     |
|    n_updates        | 479519   |
----------------------------------


Training Progress:  96%|█████████████████████████████████████▍ | 960/1000 [2:14:41<05:26,  8.16s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 554      |
|    ep_rew_mean      | 366      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3736     |
|    fps              | 125      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1970834  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.44     |
|    n_updates        | 480208   |
----------------------------------


Training Progress:  96%|█████████████████████████████████████▌ | 963/1000 [2:15:06<05:00,  8.13s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 554      |
|    ep_rew_mean      | 366      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3740     |
|    fps              | 125      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1973112  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.01     |
|    n_updates        | 480777   |
----------------------------------


Training Progress:  96%|█████████████████████████████████████▋ | 965/1000 [2:15:22<04:44,  8.12s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 554      |
|    ep_rew_mean      | 367      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3744     |
|    fps              | 127      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1975086  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.499    |
|    n_updates        | 481271   |
----------------------------------


Training Progress:  97%|█████████████████████████████████████▋ | 966/1000 [2:15:30<04:35,  8.10s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 554      |
|    ep_rew_mean      | 367      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3748     |
|    fps              | 123      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1976406  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.9      |
|    n_updates        | 481601   |
----------------------------------


Training Progress:  97%|█████████████████████████████████████▊ | 969/1000 [2:15:54<04:11,  8.10s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 562      |
|    ep_rew_mean      | 374      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3752     |
|    fps              | 124      |
|    time_elapsed     | 0        |
|    total_timesteps  | 1979043  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.52     |
|    n_updates        | 482260   |
----------------------------------


Training Progress:  97%|█████████████████████████████████████▊ | 971/1000 [2:16:10<03:54,  8.07s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 571      |
|    ep_rew_mean      | 387      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3756     |
|    fps              | 125      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1981798  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.875    |
|    n_updates        | 482949   |
----------------------------------


Training Progress:  97%|█████████████████████████████████████▉ | 974/1000 [2:16:35<03:32,  8.17s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 579      |
|    ep_rew_mean      | 391      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3760     |
|    fps              | 118      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1984771  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.94     |
|    n_updates        | 483692   |
----------------------------------


Training Progress:  98%|██████████████████████████████████████ | 977/1000 [2:17:00<03:07,  8.16s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 585      |
|    ep_rew_mean      | 396      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3764     |
|    fps              | 124      |
|    time_elapsed     | 3        |
|    total_timesteps  | 1987409  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.85     |
|    n_updates        | 484352   |
----------------------------------


Training Progress:  98%|██████████████████████████████████████▏| 979/1000 [2:17:16<02:50,  8.12s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 586      |
|    ep_rew_mean      | 397      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3768     |
|    fps              | 125      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1989833  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.397    |
|    n_updates        | 484958   |
----------------------------------


Training Progress:  98%|██████████████████████████████████████▎| 982/1000 [2:17:40<02:26,  8.14s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 582      |
|    ep_rew_mean      | 395      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3772     |
|    fps              | 122      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1992310  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.804    |
|    n_updates        | 485577   |
----------------------------------


Training Progress:  98%|██████████████████████████████████████▍| 984/1000 [2:17:57<02:11,  8.19s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 577      |
|    ep_rew_mean      | 394      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3776     |
|    fps              | 124      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1994207  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.813    |
|    n_updates        | 486051   |
----------------------------------


Training Progress:  99%|██████████████████████████████████████▍| 986/1000 [2:18:13<01:54,  8.18s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 580      |
|    ep_rew_mean      | 399      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3780     |
|    fps              | 122      |
|    time_elapsed     | 1        |
|    total_timesteps  | 1996124  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.404    |
|    n_updates        | 486530   |
----------------------------------


Training Progress:  99%|██████████████████████████████████████▌| 988/1000 [2:18:30<01:39,  8.28s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 586      |
|    ep_rew_mean      | 403      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3784     |
|    fps              | 118      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1998660  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.493    |
|    n_updates        | 487164   |
----------------------------------


Training Progress:  99%|██████████████████████████████████████▌| 989/1000 [2:18:38<01:31,  8.34s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 571      |
|    ep_rew_mean      | 389      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3788     |
|    fps              | 119      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1999889  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.86     |
|    n_updates        | 487472   |
----------------------------------


Training Progress:  99%|██████████████████████████████████████▋| 991/1000 [2:18:55<01:15,  8.34s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 574      |
|    ep_rew_mean      | 392      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3792     |
|    fps              | 119      |
|    time_elapsed     | 7        |
|    total_timesteps  | 2001916  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.692    |
|    n_updates        | 487978   |
----------------------------------


Training Progress:  99%|██████████████████████████████████████▊| 994/1000 [2:19:20<00:50,  8.36s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 575      |
|    ep_rew_mean      | 393      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3796     |
|    fps              | 119      |
|    time_elapsed     | 3        |
|    total_timesteps  | 2004421  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.501    |
|    n_updates        | 488605   |
----------------------------------


Training Progress: 100%|██████████████████████████████████████▉| 997/1000 [2:19:45<00:24,  8.33s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 583      |
|    ep_rew_mean      | 400      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3800     |
|    fps              | 119      |
|    time_elapsed     | 0        |
|    total_timesteps  | 2007035  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.09     |
|    n_updates        | 489258   |
----------------------------------


Training Progress: 100%|██████████████████████████████████████▉| 999/1000 [2:20:02<00:08,  8.36s/it]

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 584      |
|    ep_rew_mean      | 400      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3804     |
|    fps              | 118      |
|    time_elapsed     | 3        |
|    total_timesteps  | 2009391  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.469    |
|    n_updates        | 489847   |
----------------------------------


Training Progress: 100%|██████████████████████████████████████| 1000/1000 [2:20:11<00:00,  8.41s/it]


In [None]:
from stable_baselines3 import DQN 
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
import gymnasium as gym

# Tworzenie środowiska z renderowaniem
env = gym.make("ALE/SpaceInvaders-v5", render_mode="human")
env = Monitor(env)

# Konieczne opakowania
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, n_stack=4)

# Załaduj model
model = DQN.load("/home/plorenc/Desktop/AiR_ISS/ML/RL/checkpoints/ppo_spaceinvaders_epoch_750.zip", env=env)

# Uruchomienie agenta
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, reward, done, info = env.step(action)
    if done:
        obs = env.reset()


Wrapping the env in a VecTransposeImage.




KeyboardInterrupt: 

: 

### Optymalizacja paramterow podczas uczenia z optuna

In [None]:
import os
import gymnasium as gym
import optuna
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback
from tqdm import tqdm

# Stwórz foldery
os.makedirs("optuna_checkpoints", exist_ok=True)

# Funkcja tworząca środowisko
def make_env():
    return Monitor(gym.make("ALE/SpaceInvaders-v5"))

# Funkcja celu dla Optuny
def optimize_ppo(trial):
    env = make_env()

    # Przestrzeń hiperparametrów do przeszukania
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
    gamma = trial.suggest_uniform('gamma', 0.9, 0.9999)
    n_steps = trial.suggest_categorical('n_steps', [128, 256, 512, 1024])
    ent_coef = trial.suggest_loguniform('ent_coef', 0.00000001, 0.1)
    clip_range = trial.suggest_uniform('clip_range', 0.1, 0.4)
    gae_lambda = trial.suggest_uniform('gae_lambda', 0.8, 1.0)

    model = PPO(
        "CnnPolicy",
        env,
        learning_rate=learning_rate,
        gamma=gamma,
        n_steps=n_steps,
        ent_coef=ent_coef,
        clip_range=clip_range,
        gae_lambda=gae_lambda,
        verbose=0,
    )

    model.learn(total_timesteps=100_000)
    
    mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=5, render=False)

    return mean_reward

study = optuna.create_study(direction='maximize')
study.optimize(optimize_ppo, n_trials=20)  

print("✅ Najlepsze parametry:")
for key, value in study.best_params.items():
    print(f"{key}: {value}")

best_params = study.best_params
env = make_env()
best_model = PPO(
    "CnnPolicy",
    env,
    **best_params,
    verbose=1,
)

total_timesteps = 1_000_000
epoch_size = 10000
num_epochs = total_timesteps // epoch_size

for epoch in tqdm(range(1, num_epochs + 1), desc="Training (best params)", ncols=100):
    best_model.learn(total_timesteps=epoch_size, reset_num_timesteps=False)

    if epoch % 50 == 0:
        best_model.save(f"optuna_checkpoints/ppo_best_epoch_{epoch}")

best_model.save("ppo_spaceinvaders_best_final")


[I 2025-06-12 21:04:37,928] Trial 16 finished with value: 285.0 and parameters: {'learning_rate': 2.965437188017218e-05, 'gamma': 0.9787983489982238, 'n_steps': 1024, 'ent_coef': 0.00477870995327333, 'clip_range': 0.1935358383718346, 'gae_lambda': 0.8044171446097719}. Best is trial 4 with value: 288.0.


[I 2025-06-12 21:12:26,854] Trial 17 finished with value: 139.0 and parameters: {'learning_rate': 0.00011701857799450871, 'gamma': 0.9272375397750325, 'n_steps': 128, 'ent_coef': 1.9209513067547926e-07, 'clip_range': 0.27323501778012177, 'gae_lambda': 0.9396958849567045}. Best is trial 4 with value: 288.0.


[I 2025-06-12 21:20:23,110] Trial 18 finished with value: 285.0 and parameters: {'learning_rate': 1.0306674375625183e-05, 'gamma': 0.9537869744560241, 'n_steps': 1024, 'ent_coef': 1.7426922520679073e-08, 'clip_range': 0.2060662947543671, 'gae_lambda': 0.8979545255068435}. Best is trial 4 with value: 288.0.


[I 2025-06-12 21:28:14,208] Trial 19 finished with value: 297.0 and parameters: {'learning_rate': 0.0003603034714510459, 'gamma': 0.9746210073550151, 'n_steps': 256, 'ent_coef': 5.510438562118013e-05, 'clip_range': 0.14403864725593685, 'gae_lambda': 0.8416304787950954}. Best is trial 19 with value: 297.0.

