In [47]:
!pip install gym[atari]
!pip install autorom[accept-rom-license]
!pip install opencv-python
!pip install torch 
!pip install stable-baselines3
!pip install shimmy





[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip






[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [48]:
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
import cv2
from collections import deque
from tqdm import tqdm
from stable_baselines3.common.vec_env import DummyVecEnv  # For parallel environments

# Frame preprocessing function
def preprocess_frame(frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)  # Convert to grayscale
    frame = cv2.resize(frame, (84, 84))  # Resize to 84x84
    return frame / 255.0  # Normalize pixel values to [0, 1]

# Define the DQN Network
class DQN(nn.Module):
    def __init__(self, input_shape, num_actions):
        super(DQN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )
        conv_out_size = self._get_conv_out(input_shape)
        self.fc = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, num_actions)
        )

    def _get_conv_out(self, shape):
        o = torch.zeros(1, *shape)
        o = self.conv(o)
        return int(np.prod(o.size()))

    def forward(self, x):
        conv_out = self.conv(x).view(x.size()[0], -1)
        return self.fc(conv_out)

# Experience Replay Buffer
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        return np.array(states), actions, rewards, np.array(next_states), dones

    def __len__(self):
        return len(self.buffer)

# Main DQN agent class
class DQNAgent:
    def __init__(self, env, buffer_size=1000000, batch_size=64, gamma=0.99, lr=1e-4, epsilon=1.0, epsilon_min=0.1, epsilon_decay=0.995):
        self.env = env
        self.memory = ReplayBuffer(buffer_size)
        self.batch_size = batch_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.num_actions = env.action_space.n

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Initialize DQN and target DQN
        input_shape = (4, 84, 84)  # 4 stacked frames of 84x84
        self.policy_net = DQN(input_shape, self.num_actions).to(self.device)
        self.target_net = DQN(input_shape, self.num_actions).to(self.device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.target_net.eval()

        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)

    def select_action(self, state):
        if random.random() < self.epsilon:
            return self.env.action_space.sample()
        else:
            state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
            with torch.no_grad():
                q_values = self.policy_net(state)
            return q_values.max(1)[1].item()

    def store_experience(self, state, action, reward, next_state, done):
        self.memory.push(state, action, reward, next_state, done)

    def update_target_net(self):
        self.target_net.load_state_dict(self.policy_net.state_dict())

    def train_step(self):
        if len(self.memory) < self.batch_size:
            return

        # Parallelized batch processing
        states, actions, rewards, next_states, dones = self.memory.sample(self.batch_size)

        states = torch.FloatTensor(states).to(self.device)
        actions = torch.LongTensor(actions).to(self.device)
        rewards = torch.FloatTensor(rewards).to(self.device)
        next_states = torch.FloatTensor(next_states).to(self.device)
        dones = torch.FloatTensor(dones).to(self.device)

        # Q-value prediction
        q_values = self.policy_net(states)
        next_q_values = self.target_net(next_states)

        q_value = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
        next_q_value = next_q_values.max(1)[0]

        # Bellman equation
        expected_q_value = rewards + (1 - dones) * self.gamma * next_q_value

        # Compute loss
        loss = nn.MSELoss()(q_value, expected_q_value.detach())

        # Optimize the model
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()


# Create a parallelized environment
def make_env(env_name):
    def _init():
        env = gym.make(env_name)
        return env
    return _init



In [63]:
env_name="PongNoFrameskip-v4"
num_episodes=150
target_update=10
num_envs=8
history=[]
envs = DummyVecEnv([make_env(env_name) for _ in range(num_envs)])  # Vectorized environments
agent = DQNAgent(envs)

from torch.cuda.amp import GradScaler, autocast

# Initialize a gradient scaler
scaler = GradScaler("cuda")

for episode in tqdm(range(num_episodes)):
    states = envs.reset()
    states = np.stack([preprocess_frame(s) for s in states], axis=0)
    states = np.stack([states] * 4, axis=1)  # Stack frames for input
    episode_rewards = np.zeros(num_envs)

    while True:
        actions = [agent.select_action(state) for state in states]
        next_states, rewards, dones, _ = envs.step(actions)
        next_states = np.stack([preprocess_frame(ns) for ns in next_states], axis=0)
        next_states = np.concatenate([states[:, 1:], np.expand_dims(next_states, axis=1)], axis=1)

        # Store experiences and perform mixed-precision training
        for i in range(num_envs):
            agent.store_experience(states[i], actions[i], rewards[i], next_states[i], dones[i])

        # Mixed precision training step
        with autocast():  # Enable autocast for mixed precision
            agent.train_step()  # Ensure train_step uses scaler.scale(loss).backward() internally if computing loss

        # Update states and rewards
        states = next_states
        episode_rewards += rewards
        agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)    

        if dones.any():
            break

    

    if episode % target_update == 0:
        agent.update_target_net()

        
    for i in range(num_envs): 
        print(f"Episode: {episode}, Reward: {episode_rewards[i]:.2f}, Epsilon: {agent.epsilon:.2f}")
        history.append(episode_rewards[i])

envs.close()

torch.save(agent.policy_net.state_dict(), 'dqn_policy_net.pth')  # Save the policy network
torch.save(agent.target_net.state_dict(), 'dqn_target_net.pth')  # Save the target network
torch.save(agent.optimizer.state_dict(), 'dqn_optimizer.pth')     # Save the optimizer state

# Save the history list as a .npy file
np.save('dqn_history.npy', history)  # Save the history list    

  scaler = GradScaler("cuda")
  if not isinstance(terminated, (bool, np.bool8)):
  with autocast():  # Enable autocast for mixed precision
  dones = torch.FloatTensor(dones).to(self.device)
  1%|          | 1/150 [02:10<5:25:00, 130.88s/it]

Episode: 0, Reward: -21.00, Epsilon: 0.10
Episode: 0, Reward: -21.00, Epsilon: 0.10
Episode: 0, Reward: -15.00, Epsilon: 0.10
Episode: 0, Reward: -19.00, Epsilon: 0.10
Episode: 0, Reward: -21.00, Epsilon: 0.10
Episode: 0, Reward: -21.00, Epsilon: 0.10
Episode: 0, Reward: -21.00, Epsilon: 0.10
Episode: 0, Reward: -21.00, Epsilon: 0.10


  1%|▏         | 2/150 [04:27<5:31:07, 134.24s/it]

Episode: 1, Reward: -19.00, Epsilon: 0.10
Episode: 1, Reward: -19.00, Epsilon: 0.10
Episode: 1, Reward: -20.00, Epsilon: 0.10
Episode: 1, Reward: -20.00, Epsilon: 0.10
Episode: 1, Reward: -21.00, Epsilon: 0.10
Episode: 1, Reward: -19.00, Epsilon: 0.10
Episode: 1, Reward: -19.00, Epsilon: 0.10
Episode: 1, Reward: -19.00, Epsilon: 0.10


  2%|▏         | 3/150 [06:36<5:22:41, 131.71s/it]

Episode: 2, Reward: -20.00, Epsilon: 0.10
Episode: 2, Reward: -16.00, Epsilon: 0.10
Episode: 2, Reward: -20.00, Epsilon: 0.10
Episode: 2, Reward: -21.00, Epsilon: 0.10
Episode: 2, Reward: -17.00, Epsilon: 0.10
Episode: 2, Reward: -20.00, Epsilon: 0.10
Episode: 2, Reward: -12.00, Epsilon: 0.10
Episode: 2, Reward: -18.00, Epsilon: 0.10


  3%|▎         | 4/150 [08:29<5:02:50, 124.45s/it]

Episode: 3, Reward: -15.00, Epsilon: 0.10
Episode: 3, Reward: -19.00, Epsilon: 0.10
Episode: 3, Reward: -16.00, Epsilon: 0.10
Episode: 3, Reward: -21.00, Epsilon: 0.10
Episode: 3, Reward: -21.00, Epsilon: 0.10
Episode: 3, Reward: -16.00, Epsilon: 0.10
Episode: 3, Reward: -15.00, Epsilon: 0.10
Episode: 3, Reward: -19.00, Epsilon: 0.10


  3%|▎         | 5/150 [10:18<4:47:28, 118.95s/it]

Episode: 4, Reward: -21.00, Epsilon: 0.10
Episode: 4, Reward: -19.00, Epsilon: 0.10
Episode: 4, Reward: -21.00, Epsilon: 0.10
Episode: 4, Reward: -19.00, Epsilon: 0.10
Episode: 4, Reward: -21.00, Epsilon: 0.10
Episode: 4, Reward: -21.00, Epsilon: 0.10
Episode: 4, Reward: -16.00, Epsilon: 0.10
Episode: 4, Reward: -21.00, Epsilon: 0.10


  4%|▍         | 6/150 [12:08<4:37:54, 115.79s/it]

Episode: 5, Reward: -21.00, Epsilon: 0.10
Episode: 5, Reward: -20.00, Epsilon: 0.10
Episode: 5, Reward: -16.00, Epsilon: 0.10
Episode: 5, Reward: -16.00, Epsilon: 0.10
Episode: 5, Reward: -18.00, Epsilon: 0.10
Episode: 5, Reward: -17.00, Epsilon: 0.10
Episode: 5, Reward: -16.00, Epsilon: 0.10
Episode: 5, Reward: -16.00, Epsilon: 0.10


  5%|▍         | 7/150 [13:57<4:30:49, 113.63s/it]

Episode: 6, Reward: -21.00, Epsilon: 0.10
Episode: 6, Reward: -14.00, Epsilon: 0.10
Episode: 6, Reward: -20.00, Epsilon: 0.10
Episode: 6, Reward: -19.00, Epsilon: 0.10
Episode: 6, Reward: -20.00, Epsilon: 0.10
Episode: 6, Reward: -20.00, Epsilon: 0.10
Episode: 6, Reward: -20.00, Epsilon: 0.10
Episode: 6, Reward: -20.00, Epsilon: 0.10


  5%|▌         | 8/150 [15:50<4:28:42, 113.54s/it]

Episode: 7, Reward: -18.00, Epsilon: 0.10
Episode: 7, Reward: -14.00, Epsilon: 0.10
Episode: 7, Reward: -18.00, Epsilon: 0.10
Episode: 7, Reward: -21.00, Epsilon: 0.10
Episode: 7, Reward: -18.00, Epsilon: 0.10
Episode: 7, Reward: -14.00, Epsilon: 0.10
Episode: 7, Reward: -18.00, Epsilon: 0.10
Episode: 7, Reward: -15.00, Epsilon: 0.10


  6%|▌         | 9/150 [17:44<4:26:33, 113.43s/it]

Episode: 8, Reward: -18.00, Epsilon: 0.10
Episode: 8, Reward: -18.00, Epsilon: 0.10
Episode: 8, Reward: -12.00, Epsilon: 0.10
Episode: 8, Reward: -16.00, Epsilon: 0.10
Episode: 8, Reward: -11.00, Epsilon: 0.10
Episode: 8, Reward: -18.00, Epsilon: 0.10
Episode: 8, Reward: -21.00, Epsilon: 0.10
Episode: 8, Reward: -18.00, Epsilon: 0.10


  7%|▋         | 10/150 [19:33<4:21:32, 112.09s/it]

Episode: 9, Reward: -18.00, Epsilon: 0.10
Episode: 9, Reward: -20.00, Epsilon: 0.10
Episode: 9, Reward: -16.00, Epsilon: 0.10
Episode: 9, Reward: -21.00, Epsilon: 0.10
Episode: 9, Reward: -17.00, Epsilon: 0.10
Episode: 9, Reward: -20.00, Epsilon: 0.10
Episode: 9, Reward: -20.00, Epsilon: 0.10
Episode: 9, Reward: -21.00, Epsilon: 0.10


  7%|▋         | 11/150 [21:26<4:20:21, 112.38s/it]

Episode: 10, Reward: -20.00, Epsilon: 0.10
Episode: 10, Reward: -20.00, Epsilon: 0.10
Episode: 10, Reward: -20.00, Epsilon: 0.10
Episode: 10, Reward: -20.00, Epsilon: 0.10
Episode: 10, Reward: -21.00, Epsilon: 0.10
Episode: 10, Reward: -20.00, Epsilon: 0.10
Episode: 10, Reward: -18.00, Epsilon: 0.10
Episode: 10, Reward: -18.00, Epsilon: 0.10


  8%|▊         | 12/150 [23:15<4:16:17, 111.43s/it]

Episode: 11, Reward: -21.00, Epsilon: 0.10
Episode: 11, Reward: -19.00, Epsilon: 0.10
Episode: 11, Reward: -19.00, Epsilon: 0.10
Episode: 11, Reward: -19.00, Epsilon: 0.10
Episode: 11, Reward: -18.00, Epsilon: 0.10
Episode: 11, Reward: -19.00, Epsilon: 0.10
Episode: 11, Reward: -19.00, Epsilon: 0.10
Episode: 11, Reward: -20.00, Epsilon: 0.10


  9%|▊         | 13/150 [25:09<4:16:31, 112.35s/it]

Episode: 12, Reward: -19.00, Epsilon: 0.10
Episode: 12, Reward: -18.00, Epsilon: 0.10
Episode: 12, Reward: -21.00, Epsilon: 0.10
Episode: 12, Reward: -18.00, Epsilon: 0.10
Episode: 12, Reward: -17.00, Epsilon: 0.10
Episode: 12, Reward: -17.00, Epsilon: 0.10
Episode: 12, Reward: -18.00, Epsilon: 0.10
Episode: 12, Reward: -21.00, Epsilon: 0.10


  9%|▉         | 14/150 [27:07<4:18:22, 113.99s/it]

Episode: 13, Reward: -17.00, Epsilon: 0.10
Episode: 13, Reward: -21.00, Epsilon: 0.10
Episode: 13, Reward: -18.00, Epsilon: 0.10
Episode: 13, Reward: -19.00, Epsilon: 0.10
Episode: 13, Reward: -19.00, Epsilon: 0.10
Episode: 13, Reward: -16.00, Epsilon: 0.10
Episode: 13, Reward: -18.00, Epsilon: 0.10
Episode: 13, Reward: -20.00, Epsilon: 0.10


 10%|█         | 15/150 [29:05<4:18:53, 115.06s/it]

Episode: 14, Reward: -20.00, Epsilon: 0.10
Episode: 14, Reward: -20.00, Epsilon: 0.10
Episode: 14, Reward: -20.00, Epsilon: 0.10
Episode: 14, Reward: -20.00, Epsilon: 0.10
Episode: 14, Reward: -20.00, Epsilon: 0.10
Episode: 14, Reward: -17.00, Epsilon: 0.10
Episode: 14, Reward: -21.00, Epsilon: 0.10
Episode: 14, Reward: -20.00, Epsilon: 0.10


 11%|█         | 16/150 [30:56<4:14:39, 114.03s/it]

Episode: 15, Reward: -20.00, Epsilon: 0.10
Episode: 15, Reward: -21.00, Epsilon: 0.10
Episode: 15, Reward: -17.00, Epsilon: 0.10
Episode: 15, Reward: -18.00, Epsilon: 0.10
Episode: 15, Reward: -20.00, Epsilon: 0.10
Episode: 15, Reward: -20.00, Epsilon: 0.10
Episode: 15, Reward: -17.00, Epsilon: 0.10
Episode: 15, Reward: -17.00, Epsilon: 0.10


 11%|█▏        | 17/150 [32:50<4:12:16, 113.81s/it]

Episode: 16, Reward: -19.00, Epsilon: 0.10
Episode: 16, Reward: -21.00, Epsilon: 0.10
Episode: 16, Reward: -19.00, Epsilon: 0.10
Episode: 16, Reward: -16.00, Epsilon: 0.10
Episode: 16, Reward: -17.00, Epsilon: 0.10
Episode: 16, Reward: -20.00, Epsilon: 0.10
Episode: 16, Reward: -14.00, Epsilon: 0.10
Episode: 16, Reward: -19.00, Epsilon: 0.10


 12%|█▏        | 18/150 [34:40<4:08:16, 112.85s/it]

Episode: 17, Reward: -21.00, Epsilon: 0.10
Episode: 17, Reward: -21.00, Epsilon: 0.10
Episode: 17, Reward: -21.00, Epsilon: 0.10
Episode: 17, Reward: -21.00, Epsilon: 0.10
Episode: 17, Reward: -21.00, Epsilon: 0.10
Episode: 17, Reward: -21.00, Epsilon: 0.10
Episode: 17, Reward: -21.00, Epsilon: 0.10
Episode: 17, Reward: -21.00, Epsilon: 0.10


 13%|█▎        | 19/150 [36:30<4:04:02, 111.78s/it]

Episode: 18, Reward: -21.00, Epsilon: 0.10
Episode: 18, Reward: -21.00, Epsilon: 0.10
Episode: 18, Reward: -21.00, Epsilon: 0.10
Episode: 18, Reward: -21.00, Epsilon: 0.10
Episode: 18, Reward: -21.00, Epsilon: 0.10
Episode: 18, Reward: -21.00, Epsilon: 0.10
Episode: 18, Reward: -21.00, Epsilon: 0.10
Episode: 18, Reward: -21.00, Epsilon: 0.10


 13%|█▎        | 20/150 [38:19<4:00:22, 110.94s/it]

Episode: 19, Reward: -21.00, Epsilon: 0.10
Episode: 19, Reward: -21.00, Epsilon: 0.10
Episode: 19, Reward: -21.00, Epsilon: 0.10
Episode: 19, Reward: -21.00, Epsilon: 0.10
Episode: 19, Reward: -21.00, Epsilon: 0.10
Episode: 19, Reward: -21.00, Epsilon: 0.10
Episode: 19, Reward: -21.00, Epsilon: 0.10
Episode: 19, Reward: -21.00, Epsilon: 0.10


 14%|█▍        | 21/150 [40:08<3:57:34, 110.50s/it]

Episode: 20, Reward: -21.00, Epsilon: 0.10
Episode: 20, Reward: -17.00, Epsilon: 0.10
Episode: 20, Reward: -20.00, Epsilon: 0.10
Episode: 20, Reward: -21.00, Epsilon: 0.10
Episode: 20, Reward: -19.00, Epsilon: 0.10
Episode: 20, Reward: -17.00, Epsilon: 0.10
Episode: 20, Reward: -20.00, Epsilon: 0.10
Episode: 20, Reward: -21.00, Epsilon: 0.10


 15%|█▍        | 22/150 [41:57<3:54:38, 109.99s/it]

Episode: 21, Reward: -18.00, Epsilon: 0.10
Episode: 21, Reward: -21.00, Epsilon: 0.10
Episode: 21, Reward: -20.00, Epsilon: 0.10
Episode: 21, Reward: -18.00, Epsilon: 0.10
Episode: 21, Reward: -20.00, Epsilon: 0.10
Episode: 21, Reward: -18.00, Epsilon: 0.10
Episode: 21, Reward: -21.00, Epsilon: 0.10
Episode: 21, Reward: -20.00, Epsilon: 0.10


 15%|█▌        | 23/150 [43:57<3:58:59, 112.91s/it]

Episode: 22, Reward: -19.00, Epsilon: 0.10
Episode: 22, Reward: -20.00, Epsilon: 0.10
Episode: 22, Reward: -20.00, Epsilon: 0.10
Episode: 22, Reward: -15.00, Epsilon: 0.10
Episode: 22, Reward: -17.00, Epsilon: 0.10
Episode: 22, Reward: -20.00, Epsilon: 0.10
Episode: 22, Reward: -14.00, Epsilon: 0.10
Episode: 22, Reward: -20.00, Epsilon: 0.10


 16%|█▌        | 24/150 [45:54<4:00:02, 114.30s/it]

Episode: 23, Reward: -19.00, Epsilon: 0.10
Episode: 23, Reward: -19.00, Epsilon: 0.10
Episode: 23, Reward: -19.00, Epsilon: 0.10
Episode: 23, Reward: -19.00, Epsilon: 0.10
Episode: 23, Reward: -18.00, Epsilon: 0.10
Episode: 23, Reward: -18.00, Epsilon: 0.10
Episode: 23, Reward: -19.00, Epsilon: 0.10
Episode: 23, Reward: -21.00, Epsilon: 0.10


 17%|█▋        | 25/150 [47:43<3:54:50, 112.72s/it]

Episode: 24, Reward: -21.00, Epsilon: 0.10
Episode: 24, Reward: -21.00, Epsilon: 0.10
Episode: 24, Reward: -21.00, Epsilon: 0.10
Episode: 24, Reward: -21.00, Epsilon: 0.10
Episode: 24, Reward: -21.00, Epsilon: 0.10
Episode: 24, Reward: -21.00, Epsilon: 0.10
Episode: 24, Reward: -21.00, Epsilon: 0.10
Episode: 24, Reward: -21.00, Epsilon: 0.10


 17%|█▋        | 26/150 [49:32<3:50:51, 111.71s/it]

Episode: 25, Reward: -21.00, Epsilon: 0.10
Episode: 25, Reward: -21.00, Epsilon: 0.10
Episode: 25, Reward: -21.00, Epsilon: 0.10
Episode: 25, Reward: -21.00, Epsilon: 0.10
Episode: 25, Reward: -21.00, Epsilon: 0.10
Episode: 25, Reward: -21.00, Epsilon: 0.10
Episode: 25, Reward: -21.00, Epsilon: 0.10
Episode: 25, Reward: -21.00, Epsilon: 0.10


 18%|█▊        | 27/150 [51:30<3:52:36, 113.47s/it]

Episode: 26, Reward: -16.00, Epsilon: 0.10
Episode: 26, Reward: -17.00, Epsilon: 0.10
Episode: 26, Reward: -19.00, Epsilon: 0.10
Episode: 26, Reward: -19.00, Epsilon: 0.10
Episode: 26, Reward: -19.00, Epsilon: 0.10
Episode: 26, Reward: -15.00, Epsilon: 0.10
Episode: 26, Reward: -21.00, Epsilon: 0.10
Episode: 26, Reward: -18.00, Epsilon: 0.10


 19%|█▊        | 28/150 [53:22<3:49:33, 112.90s/it]

Episode: 27, Reward: -17.00, Epsilon: 0.10
Episode: 27, Reward: -17.00, Epsilon: 0.10
Episode: 27, Reward: -18.00, Epsilon: 0.10
Episode: 27, Reward: -20.00, Epsilon: 0.10
Episode: 27, Reward: -21.00, Epsilon: 0.10
Episode: 27, Reward: -17.00, Epsilon: 0.10
Episode: 27, Reward: -21.00, Epsilon: 0.10
Episode: 27, Reward: -18.00, Epsilon: 0.10


 19%|█▉        | 29/150 [55:11<3:45:18, 111.72s/it]

Episode: 28, Reward: -21.00, Epsilon: 0.10
Episode: 28, Reward: -19.00, Epsilon: 0.10
Episode: 28, Reward: -19.00, Epsilon: 0.10
Episode: 28, Reward: -20.00, Epsilon: 0.10
Episode: 28, Reward: -19.00, Epsilon: 0.10
Episode: 28, Reward: -17.00, Epsilon: 0.10
Episode: 28, Reward: -20.00, Epsilon: 0.10
Episode: 28, Reward: -19.00, Epsilon: 0.10


 20%|██        | 30/150 [57:00<3:42:12, 111.10s/it]

Episode: 29, Reward: -21.00, Epsilon: 0.10
Episode: 29, Reward: -19.00, Epsilon: 0.10
Episode: 29, Reward: -21.00, Epsilon: 0.10
Episode: 29, Reward: -21.00, Epsilon: 0.10
Episode: 29, Reward: -21.00, Epsilon: 0.10
Episode: 29, Reward: -20.00, Epsilon: 0.10
Episode: 29, Reward: -21.00, Epsilon: 0.10
Episode: 29, Reward: -17.00, Epsilon: 0.10


 21%|██        | 31/150 [58:50<3:39:32, 110.69s/it]

Episode: 30, Reward: -20.00, Epsilon: 0.10
Episode: 30, Reward: -20.00, Epsilon: 0.10
Episode: 30, Reward: -17.00, Epsilon: 0.10
Episode: 30, Reward: -17.00, Epsilon: 0.10
Episode: 30, Reward: -17.00, Epsilon: 0.10
Episode: 30, Reward: -19.00, Epsilon: 0.10
Episode: 30, Reward: -19.00, Epsilon: 0.10
Episode: 30, Reward: -21.00, Epsilon: 0.10


 21%|██▏       | 32/150 [1:01:00<3:49:02, 116.46s/it]

Episode: 31, Reward: -21.00, Epsilon: 0.10
Episode: 31, Reward: -20.00, Epsilon: 0.10
Episode: 31, Reward: -21.00, Epsilon: 0.10
Episode: 31, Reward: -20.00, Epsilon: 0.10
Episode: 31, Reward: -20.00, Epsilon: 0.10
Episode: 31, Reward: -16.00, Epsilon: 0.10
Episode: 31, Reward: -20.00, Epsilon: 0.10
Episode: 31, Reward: -20.00, Epsilon: 0.10


 22%|██▏       | 33/150 [1:02:59<3:48:23, 117.12s/it]

Episode: 32, Reward: -21.00, Epsilon: 0.10
Episode: 32, Reward: -18.00, Epsilon: 0.10
Episode: 32, Reward: -18.00, Epsilon: 0.10
Episode: 32, Reward: -18.00, Epsilon: 0.10
Episode: 32, Reward: -18.00, Epsilon: 0.10
Episode: 32, Reward: -18.00, Epsilon: 0.10
Episode: 32, Reward: -21.00, Epsilon: 0.10
Episode: 32, Reward: -18.00, Epsilon: 0.10


 23%|██▎       | 34/150 [1:04:52<3:44:30, 116.13s/it]

Episode: 33, Reward: -16.00, Epsilon: 0.10
Episode: 33, Reward: -18.00, Epsilon: 0.10
Episode: 33, Reward: -21.00, Epsilon: 0.10
Episode: 33, Reward: -16.00, Epsilon: 0.10
Episode: 33, Reward: -21.00, Epsilon: 0.10
Episode: 33, Reward: -18.00, Epsilon: 0.10
Episode: 33, Reward: -16.00, Epsilon: 0.10
Episode: 33, Reward: -18.00, Epsilon: 0.10


 23%|██▎       | 35/150 [1:06:50<3:43:38, 116.68s/it]

Episode: 34, Reward: -19.00, Epsilon: 0.10
Episode: 34, Reward: -19.00, Epsilon: 0.10
Episode: 34, Reward: -17.00, Epsilon: 0.10
Episode: 34, Reward: -21.00, Epsilon: 0.10
Episode: 34, Reward: -21.00, Epsilon: 0.10
Episode: 34, Reward: -20.00, Epsilon: 0.10
Episode: 34, Reward: -20.00, Epsilon: 0.10
Episode: 34, Reward: -19.00, Epsilon: 0.10


 24%|██▍       | 36/150 [1:08:40<3:37:38, 114.55s/it]

Episode: 35, Reward: -21.00, Epsilon: 0.10
Episode: 35, Reward: -21.00, Epsilon: 0.10
Episode: 35, Reward: -21.00, Epsilon: 0.10
Episode: 35, Reward: -21.00, Epsilon: 0.10
Episode: 35, Reward: -21.00, Epsilon: 0.10
Episode: 35, Reward: -21.00, Epsilon: 0.10
Episode: 35, Reward: -21.00, Epsilon: 0.10
Episode: 35, Reward: -21.00, Epsilon: 0.10


 25%|██▍       | 37/150 [1:10:30<3:33:02, 113.12s/it]

Episode: 36, Reward: -19.00, Epsilon: 0.10
Episode: 36, Reward: -19.00, Epsilon: 0.10
Episode: 36, Reward: -21.00, Epsilon: 0.10
Episode: 36, Reward: -16.00, Epsilon: 0.10
Episode: 36, Reward: -21.00, Epsilon: 0.10
Episode: 36, Reward: -21.00, Epsilon: 0.10
Episode: 36, Reward: -20.00, Epsilon: 0.10
Episode: 36, Reward: -19.00, Epsilon: 0.10


 25%|██▌       | 38/150 [1:12:24<3:31:40, 113.40s/it]

Episode: 37, Reward: -19.00, Epsilon: 0.10
Episode: 37, Reward: -20.00, Epsilon: 0.10
Episode: 37, Reward: -20.00, Epsilon: 0.10
Episode: 37, Reward: -20.00, Epsilon: 0.10
Episode: 37, Reward: -19.00, Epsilon: 0.10
Episode: 37, Reward: -21.00, Epsilon: 0.10
Episode: 37, Reward: -20.00, Epsilon: 0.10
Episode: 37, Reward: -18.00, Epsilon: 0.10


 26%|██▌       | 39/150 [1:14:22<3:32:32, 114.89s/it]

Episode: 38, Reward: -20.00, Epsilon: 0.10
Episode: 38, Reward: -21.00, Epsilon: 0.10
Episode: 38, Reward: -15.00, Epsilon: 0.10
Episode: 38, Reward: -19.00, Epsilon: 0.10
Episode: 38, Reward: -21.00, Epsilon: 0.10
Episode: 38, Reward: -19.00, Epsilon: 0.10
Episode: 38, Reward: -19.00, Epsilon: 0.10
Episode: 38, Reward: -21.00, Epsilon: 0.10


 27%|██▋       | 40/150 [1:16:12<3:27:54, 113.40s/it]

Episode: 39, Reward: -21.00, Epsilon: 0.10
Episode: 39, Reward: -15.00, Epsilon: 0.10
Episode: 39, Reward: -19.00, Epsilon: 0.10
Episode: 39, Reward: -17.00, Epsilon: 0.10
Episode: 39, Reward: -19.00, Epsilon: 0.10
Episode: 39, Reward: -16.00, Epsilon: 0.10
Episode: 39, Reward: -15.00, Epsilon: 0.10
Episode: 39, Reward: -17.00, Epsilon: 0.10


 27%|██▋       | 41/150 [1:18:04<3:25:23, 113.06s/it]

Episode: 40, Reward: -21.00, Epsilon: 0.10
Episode: 40, Reward: -20.00, Epsilon: 0.10
Episode: 40, Reward: -20.00, Epsilon: 0.10
Episode: 40, Reward: -18.00, Epsilon: 0.10
Episode: 40, Reward: -20.00, Epsilon: 0.10
Episode: 40, Reward: -18.00, Epsilon: 0.10
Episode: 40, Reward: -17.00, Epsilon: 0.10
Episode: 40, Reward: -20.00, Epsilon: 0.10


 28%|██▊       | 42/150 [1:20:03<3:26:28, 114.71s/it]

Episode: 41, Reward: -21.00, Epsilon: 0.10
Episode: 41, Reward: -21.00, Epsilon: 0.10
Episode: 41, Reward: -20.00, Epsilon: 0.10
Episode: 41, Reward: -20.00, Epsilon: 0.10
Episode: 41, Reward: -19.00, Epsilon: 0.10
Episode: 41, Reward: -19.00, Epsilon: 0.10
Episode: 41, Reward: -21.00, Epsilon: 0.10
Episode: 41, Reward: -20.00, Epsilon: 0.10


 29%|██▊       | 43/150 [1:21:53<3:22:02, 113.30s/it]

Episode: 42, Reward: -21.00, Epsilon: 0.10
Episode: 42, Reward: -21.00, Epsilon: 0.10
Episode: 42, Reward: -21.00, Epsilon: 0.10
Episode: 42, Reward: -21.00, Epsilon: 0.10
Episode: 42, Reward: -21.00, Epsilon: 0.10
Episode: 42, Reward: -21.00, Epsilon: 0.10
Episode: 42, Reward: -21.00, Epsilon: 0.10
Episode: 42, Reward: -21.00, Epsilon: 0.10


 29%|██▉       | 44/150 [1:23:43<3:18:17, 112.24s/it]

Episode: 43, Reward: -21.00, Epsilon: 0.10
Episode: 43, Reward: -21.00, Epsilon: 0.10
Episode: 43, Reward: -18.00, Epsilon: 0.10
Episode: 43, Reward: -21.00, Epsilon: 0.10
Episode: 43, Reward: -21.00, Epsilon: 0.10
Episode: 43, Reward: -21.00, Epsilon: 0.10
Episode: 43, Reward: -20.00, Epsilon: 0.10
Episode: 43, Reward: -19.00, Epsilon: 0.10


 30%|███       | 45/150 [1:25:37<3:17:15, 112.72s/it]

Episode: 44, Reward: -21.00, Epsilon: 0.10
Episode: 44, Reward: -21.00, Epsilon: 0.10
Episode: 44, Reward: -20.00, Epsilon: 0.10
Episode: 44, Reward: -20.00, Epsilon: 0.10
Episode: 44, Reward: -20.00, Epsilon: 0.10
Episode: 44, Reward: -20.00, Epsilon: 0.10
Episode: 44, Reward: -18.00, Epsilon: 0.10
Episode: 44, Reward: -21.00, Epsilon: 0.10


 31%|███       | 46/150 [1:27:35<3:18:18, 114.41s/it]

Episode: 45, Reward: -21.00, Epsilon: 0.10
Episode: 45, Reward: -20.00, Epsilon: 0.10
Episode: 45, Reward: -20.00, Epsilon: 0.10
Episode: 45, Reward: -21.00, Epsilon: 0.10
Episode: 45, Reward: -20.00, Epsilon: 0.10
Episode: 45, Reward: -20.00, Epsilon: 0.10
Episode: 45, Reward: -21.00, Epsilon: 0.10
Episode: 45, Reward: -20.00, Epsilon: 0.10


 31%|███▏      | 47/150 [1:29:25<3:13:59, 113.01s/it]

Episode: 46, Reward: -19.00, Epsilon: 0.10
Episode: 46, Reward: -21.00, Epsilon: 0.10
Episode: 46, Reward: -21.00, Epsilon: 0.10
Episode: 46, Reward: -19.00, Epsilon: 0.10
Episode: 46, Reward: -19.00, Epsilon: 0.10
Episode: 46, Reward: -21.00, Epsilon: 0.10
Episode: 46, Reward: -19.00, Epsilon: 0.10
Episode: 46, Reward: -19.00, Epsilon: 0.10


 32%|███▏      | 48/150 [1:31:14<3:10:25, 112.01s/it]

Episode: 47, Reward: -21.00, Epsilon: 0.10
Episode: 47, Reward: -19.00, Epsilon: 0.10
Episode: 47, Reward: -16.00, Epsilon: 0.10
Episode: 47, Reward: -19.00, Epsilon: 0.10
Episode: 47, Reward: -18.00, Epsilon: 0.10
Episode: 47, Reward: -15.00, Epsilon: 0.10
Episode: 47, Reward: -19.00, Epsilon: 0.10
Episode: 47, Reward: -21.00, Epsilon: 0.10


 33%|███▎      | 49/150 [1:33:05<3:07:43, 111.52s/it]

Episode: 48, Reward: -19.00, Epsilon: 0.10
Episode: 48, Reward: -15.00, Epsilon: 0.10
Episode: 48, Reward: -17.00, Epsilon: 0.10
Episode: 48, Reward: -16.00, Epsilon: 0.10
Episode: 48, Reward: -17.00, Epsilon: 0.10
Episode: 48, Reward: -19.00, Epsilon: 0.10
Episode: 48, Reward: -21.00, Epsilon: 0.10
Episode: 48, Reward: -19.00, Epsilon: 0.10


 33%|███▎      | 50/150 [1:34:55<3:05:08, 111.08s/it]

Episode: 49, Reward: -20.00, Epsilon: 0.10
Episode: 49, Reward: -21.00, Epsilon: 0.10
Episode: 49, Reward: -21.00, Epsilon: 0.10
Episode: 49, Reward: -20.00, Epsilon: 0.10
Episode: 49, Reward: -19.00, Epsilon: 0.10
Episode: 49, Reward: -19.00, Epsilon: 0.10
Episode: 49, Reward: -19.00, Epsilon: 0.10
Episode: 49, Reward: -21.00, Epsilon: 0.10


 34%|███▍      | 51/150 [1:36:44<3:02:22, 110.53s/it]

Episode: 50, Reward: -21.00, Epsilon: 0.10
Episode: 50, Reward: -21.00, Epsilon: 0.10
Episode: 50, Reward: -21.00, Epsilon: 0.10
Episode: 50, Reward: -21.00, Epsilon: 0.10
Episode: 50, Reward: -21.00, Epsilon: 0.10
Episode: 50, Reward: -21.00, Epsilon: 0.10
Episode: 50, Reward: -21.00, Epsilon: 0.10
Episode: 50, Reward: -21.00, Epsilon: 0.10


 35%|███▍      | 52/150 [1:38:43<3:04:38, 113.04s/it]

Episode: 51, Reward: -21.00, Epsilon: 0.10
Episode: 51, Reward: -21.00, Epsilon: 0.10
Episode: 51, Reward: -17.00, Epsilon: 0.10
Episode: 51, Reward: -17.00, Epsilon: 0.10
Episode: 51, Reward: -21.00, Epsilon: 0.10
Episode: 51, Reward: -21.00, Epsilon: 0.10
Episode: 51, Reward: -17.00, Epsilon: 0.10
Episode: 51, Reward: -17.00, Epsilon: 0.10


 35%|███▌      | 53/150 [1:40:32<3:01:02, 111.98s/it]

Episode: 52, Reward: -21.00, Epsilon: 0.10
Episode: 52, Reward: -21.00, Epsilon: 0.10
Episode: 52, Reward: -21.00, Epsilon: 0.10
Episode: 52, Reward: -21.00, Epsilon: 0.10
Episode: 52, Reward: -21.00, Epsilon: 0.10
Episode: 52, Reward: -21.00, Epsilon: 0.10
Episode: 52, Reward: -21.00, Epsilon: 0.10
Episode: 52, Reward: -21.00, Epsilon: 0.10


 36%|███▌      | 54/150 [1:42:22<2:57:50, 111.16s/it]

Episode: 53, Reward: -21.00, Epsilon: 0.10
Episode: 53, Reward: -21.00, Epsilon: 0.10
Episode: 53, Reward: -21.00, Epsilon: 0.10
Episode: 53, Reward: -21.00, Epsilon: 0.10
Episode: 53, Reward: -19.00, Epsilon: 0.10
Episode: 53, Reward: -21.00, Epsilon: 0.10
Episode: 53, Reward: -21.00, Epsilon: 0.10
Episode: 53, Reward: -21.00, Epsilon: 0.10


 37%|███▋      | 55/150 [1:44:11<2:55:08, 110.62s/it]

Episode: 54, Reward: -21.00, Epsilon: 0.10
Episode: 54, Reward: -21.00, Epsilon: 0.10
Episode: 54, Reward: -21.00, Epsilon: 0.10
Episode: 54, Reward: -21.00, Epsilon: 0.10
Episode: 54, Reward: -21.00, Epsilon: 0.10
Episode: 54, Reward: -21.00, Epsilon: 0.10
Episode: 54, Reward: -21.00, Epsilon: 0.10
Episode: 54, Reward: -21.00, Epsilon: 0.10


 37%|███▋      | 56/150 [1:46:00<2:52:39, 110.21s/it]

Episode: 55, Reward: -20.00, Epsilon: 0.10
Episode: 55, Reward: -20.00, Epsilon: 0.10
Episode: 55, Reward: -21.00, Epsilon: 0.10
Episode: 55, Reward: -19.00, Epsilon: 0.10
Episode: 55, Reward: -19.00, Epsilon: 0.10
Episode: 55, Reward: -21.00, Epsilon: 0.10
Episode: 55, Reward: -19.00, Epsilon: 0.10
Episode: 55, Reward: -21.00, Epsilon: 0.10


 38%|███▊      | 57/150 [1:47:53<2:52:02, 111.00s/it]

Episode: 56, Reward: -21.00, Epsilon: 0.10
Episode: 56, Reward: -19.00, Epsilon: 0.10
Episode: 56, Reward: -21.00, Epsilon: 0.10
Episode: 56, Reward: -21.00, Epsilon: 0.10
Episode: 56, Reward: -19.00, Epsilon: 0.10
Episode: 56, Reward: -20.00, Epsilon: 0.10
Episode: 56, Reward: -21.00, Epsilon: 0.10
Episode: 56, Reward: -21.00, Epsilon: 0.10


 39%|███▊      | 58/150 [1:49:43<2:49:45, 110.72s/it]

Episode: 57, Reward: -21.00, Epsilon: 0.10
Episode: 57, Reward: -21.00, Epsilon: 0.10
Episode: 57, Reward: -21.00, Epsilon: 0.10
Episode: 57, Reward: -21.00, Epsilon: 0.10
Episode: 57, Reward: -21.00, Epsilon: 0.10
Episode: 57, Reward: -21.00, Epsilon: 0.10
Episode: 57, Reward: -21.00, Epsilon: 0.10
Episode: 57, Reward: -21.00, Epsilon: 0.10


 39%|███▉      | 59/150 [1:51:41<2:51:09, 112.85s/it]

Episode: 58, Reward: -21.00, Epsilon: 0.10
Episode: 58, Reward: -20.00, Epsilon: 0.10
Episode: 58, Reward: -19.00, Epsilon: 0.10
Episode: 58, Reward: -18.00, Epsilon: 0.10
Episode: 58, Reward: -21.00, Epsilon: 0.10
Episode: 58, Reward: -21.00, Epsilon: 0.10
Episode: 58, Reward: -21.00, Epsilon: 0.10
Episode: 58, Reward: -20.00, Epsilon: 0.10


 40%|████      | 60/150 [1:53:30<2:47:45, 111.84s/it]

Episode: 59, Reward: -18.00, Epsilon: 0.10
Episode: 59, Reward: -19.00, Epsilon: 0.10
Episode: 59, Reward: -21.00, Epsilon: 0.10
Episode: 59, Reward: -18.00, Epsilon: 0.10
Episode: 59, Reward: -17.00, Epsilon: 0.10
Episode: 59, Reward: -16.00, Epsilon: 0.10
Episode: 59, Reward: -20.00, Epsilon: 0.10
Episode: 59, Reward: -21.00, Epsilon: 0.10


 41%|████      | 61/150 [1:55:29<2:48:40, 113.72s/it]

Episode: 60, Reward: -21.00, Epsilon: 0.10
Episode: 60, Reward: -19.00, Epsilon: 0.10
Episode: 60, Reward: -19.00, Epsilon: 0.10
Episode: 60, Reward: -18.00, Epsilon: 0.10
Episode: 60, Reward: -19.00, Epsilon: 0.10
Episode: 60, Reward: -19.00, Epsilon: 0.10
Episode: 60, Reward: -20.00, Epsilon: 0.10
Episode: 60, Reward: -19.00, Epsilon: 0.10


 41%|████▏     | 62/150 [1:57:26<2:48:24, 114.82s/it]

Episode: 61, Reward: -20.00, Epsilon: 0.10
Episode: 61, Reward: -20.00, Epsilon: 0.10
Episode: 61, Reward: -20.00, Epsilon: 0.10
Episode: 61, Reward: -18.00, Epsilon: 0.10
Episode: 61, Reward: -19.00, Epsilon: 0.10
Episode: 61, Reward: -19.00, Epsilon: 0.10
Episode: 61, Reward: -19.00, Epsilon: 0.10
Episode: 61, Reward: -21.00, Epsilon: 0.10


 42%|████▏     | 63/150 [1:59:15<2:44:07, 113.19s/it]

Episode: 62, Reward: -21.00, Epsilon: 0.10
Episode: 62, Reward: -17.00, Epsilon: 0.10
Episode: 62, Reward: -21.00, Epsilon: 0.10
Episode: 62, Reward: -20.00, Epsilon: 0.10
Episode: 62, Reward: -17.00, Epsilon: 0.10
Episode: 62, Reward: -21.00, Epsilon: 0.10
Episode: 62, Reward: -21.00, Epsilon: 0.10
Episode: 62, Reward: -21.00, Epsilon: 0.10


 43%|████▎     | 64/150 [2:01:05<2:40:34, 112.03s/it]

Episode: 63, Reward: -17.00, Epsilon: 0.10
Episode: 63, Reward: -19.00, Epsilon: 0.10
Episode: 63, Reward: -18.00, Epsilon: 0.10
Episode: 63, Reward: -20.00, Epsilon: 0.10
Episode: 63, Reward: -12.00, Epsilon: 0.10
Episode: 63, Reward: -21.00, Epsilon: 0.10
Episode: 63, Reward: -19.00, Epsilon: 0.10
Episode: 63, Reward: -20.00, Epsilon: 0.10


 43%|████▎     | 65/150 [2:02:58<2:39:22, 112.50s/it]

Episode: 64, Reward: -21.00, Epsilon: 0.10
Episode: 64, Reward: -18.00, Epsilon: 0.10
Episode: 64, Reward: -18.00, Epsilon: 0.10
Episode: 64, Reward: -18.00, Epsilon: 0.10
Episode: 64, Reward: -18.00, Epsilon: 0.10
Episode: 64, Reward: -18.00, Epsilon: 0.10
Episode: 64, Reward: -16.00, Epsilon: 0.10
Episode: 64, Reward: -19.00, Epsilon: 0.10


 44%|████▍     | 66/150 [2:04:48<2:36:12, 111.58s/it]

Episode: 65, Reward: -19.00, Epsilon: 0.10
Episode: 65, Reward: -19.00, Epsilon: 0.10
Episode: 65, Reward: -19.00, Epsilon: 0.10
Episode: 65, Reward: -21.00, Epsilon: 0.10
Episode: 65, Reward: -19.00, Epsilon: 0.10
Episode: 65, Reward: -19.00, Epsilon: 0.10
Episode: 65, Reward: -19.00, Epsilon: 0.10
Episode: 65, Reward: -16.00, Epsilon: 0.10


 45%|████▍     | 67/150 [2:06:45<2:36:44, 113.31s/it]

Episode: 66, Reward: -20.00, Epsilon: 0.10
Episode: 66, Reward: -20.00, Epsilon: 0.10
Episode: 66, Reward: -19.00, Epsilon: 0.10
Episode: 66, Reward: -20.00, Epsilon: 0.10
Episode: 66, Reward: -15.00, Epsilon: 0.10
Episode: 66, Reward: -19.00, Epsilon: 0.10
Episode: 66, Reward: -21.00, Epsilon: 0.10
Episode: 66, Reward: -19.00, Epsilon: 0.10


 45%|████▌     | 68/150 [2:08:34<2:33:06, 112.03s/it]

Episode: 67, Reward: -21.00, Epsilon: 0.10
Episode: 67, Reward: -17.00, Epsilon: 0.10
Episode: 67, Reward: -20.00, Epsilon: 0.10
Episode: 67, Reward: -21.00, Epsilon: 0.10
Episode: 67, Reward: -20.00, Epsilon: 0.10
Episode: 67, Reward: -21.00, Epsilon: 0.10
Episode: 67, Reward: -18.00, Epsilon: 0.10
Episode: 67, Reward: -18.00, Epsilon: 0.10


 46%|████▌     | 69/150 [2:10:27<2:31:39, 112.33s/it]

Episode: 68, Reward: -20.00, Epsilon: 0.10
Episode: 68, Reward: -21.00, Epsilon: 0.10
Episode: 68, Reward: -20.00, Epsilon: 0.10
Episode: 68, Reward: -18.00, Epsilon: 0.10
Episode: 68, Reward: -18.00, Epsilon: 0.10
Episode: 68, Reward: -20.00, Epsilon: 0.10
Episode: 68, Reward: -20.00, Epsilon: 0.10
Episode: 68, Reward: -18.00, Epsilon: 0.10


 47%|████▋     | 70/150 [2:12:16<2:28:34, 111.43s/it]

Episode: 69, Reward: -20.00, Epsilon: 0.10
Episode: 69, Reward: -19.00, Epsilon: 0.10
Episode: 69, Reward: -17.00, Epsilon: 0.10
Episode: 69, Reward: -18.00, Epsilon: 0.10
Episode: 69, Reward: -21.00, Epsilon: 0.10
Episode: 69, Reward: -16.00, Epsilon: 0.10
Episode: 69, Reward: -19.00, Epsilon: 0.10
Episode: 69, Reward: -20.00, Epsilon: 0.10


 47%|████▋     | 71/150 [2:14:10<2:27:36, 112.11s/it]

Episode: 70, Reward: -19.00, Epsilon: 0.10
Episode: 70, Reward: -20.00, Epsilon: 0.10
Episode: 70, Reward: -20.00, Epsilon: 0.10
Episode: 70, Reward: -18.00, Epsilon: 0.10
Episode: 70, Reward: -21.00, Epsilon: 0.10
Episode: 70, Reward: -21.00, Epsilon: 0.10
Episode: 70, Reward: -18.00, Epsilon: 0.10
Episode: 70, Reward: -21.00, Epsilon: 0.10


 48%|████▊     | 72/150 [2:16:03<2:26:04, 112.36s/it]

Episode: 71, Reward: -18.00, Epsilon: 0.10
Episode: 71, Reward: -16.00, Epsilon: 0.10
Episode: 71, Reward: -17.00, Epsilon: 0.10
Episode: 71, Reward: -20.00, Epsilon: 0.10
Episode: 71, Reward: -17.00, Epsilon: 0.10
Episode: 71, Reward: -20.00, Epsilon: 0.10
Episode: 71, Reward: -21.00, Epsilon: 0.10
Episode: 71, Reward: -18.00, Epsilon: 0.10


 49%|████▊     | 73/150 [2:18:01<2:26:16, 113.98s/it]

Episode: 72, Reward: -20.00, Epsilon: 0.10
Episode: 72, Reward: -20.00, Epsilon: 0.10
Episode: 72, Reward: -19.00, Epsilon: 0.10
Episode: 72, Reward: -19.00, Epsilon: 0.10
Episode: 72, Reward: -21.00, Epsilon: 0.10
Episode: 72, Reward: -17.00, Epsilon: 0.10
Episode: 72, Reward: -15.00, Epsilon: 0.10
Episode: 72, Reward: -21.00, Epsilon: 0.10


 49%|████▉     | 74/150 [2:19:50<2:22:29, 112.50s/it]

Episode: 73, Reward: -21.00, Epsilon: 0.10
Episode: 73, Reward: -21.00, Epsilon: 0.10
Episode: 73, Reward: -21.00, Epsilon: 0.10
Episode: 73, Reward: -19.00, Epsilon: 0.10
Episode: 73, Reward: -21.00, Epsilon: 0.10
Episode: 73, Reward: -21.00, Epsilon: 0.10
Episode: 73, Reward: -21.00, Epsilon: 0.10
Episode: 73, Reward: -21.00, Epsilon: 0.10


 50%|█████     | 75/150 [2:21:39<2:19:17, 111.43s/it]

Episode: 74, Reward: -21.00, Epsilon: 0.10
Episode: 74, Reward: -21.00, Epsilon: 0.10
Episode: 74, Reward: -21.00, Epsilon: 0.10
Episode: 74, Reward: -21.00, Epsilon: 0.10
Episode: 74, Reward: -21.00, Epsilon: 0.10
Episode: 74, Reward: -21.00, Epsilon: 0.10
Episode: 74, Reward: -21.00, Epsilon: 0.10
Episode: 74, Reward: -21.00, Epsilon: 0.10


 51%|█████     | 76/150 [2:23:28<2:16:31, 110.70s/it]

Episode: 75, Reward: -21.00, Epsilon: 0.10
Episode: 75, Reward: -20.00, Epsilon: 0.10
Episode: 75, Reward: -21.00, Epsilon: 0.10
Episode: 75, Reward: -20.00, Epsilon: 0.10
Episode: 75, Reward: -21.00, Epsilon: 0.10
Episode: 75, Reward: -21.00, Epsilon: 0.10
Episode: 75, Reward: -21.00, Epsilon: 0.10
Episode: 75, Reward: -20.00, Epsilon: 0.10


 51%|█████▏    | 77/150 [2:25:17<2:14:03, 110.19s/it]

Episode: 76, Reward: -21.00, Epsilon: 0.10
Episode: 76, Reward: -21.00, Epsilon: 0.10
Episode: 76, Reward: -21.00, Epsilon: 0.10
Episode: 76, Reward: -21.00, Epsilon: 0.10
Episode: 76, Reward: -21.00, Epsilon: 0.10
Episode: 76, Reward: -21.00, Epsilon: 0.10
Episode: 76, Reward: -21.00, Epsilon: 0.10
Episode: 76, Reward: -21.00, Epsilon: 0.10


 52%|█████▏    | 78/150 [2:27:05<2:11:40, 109.73s/it]

Episode: 77, Reward: -20.00, Epsilon: 0.10
Episode: 77, Reward: -21.00, Epsilon: 0.10
Episode: 77, Reward: -21.00, Epsilon: 0.10
Episode: 77, Reward: -21.00, Epsilon: 0.10
Episode: 77, Reward: -20.00, Epsilon: 0.10
Episode: 77, Reward: -19.00, Epsilon: 0.10
Episode: 77, Reward: -21.00, Epsilon: 0.10
Episode: 77, Reward: -19.00, Epsilon: 0.10


 53%|█████▎    | 79/150 [2:28:54<2:09:16, 109.24s/it]

Episode: 78, Reward: -21.00, Epsilon: 0.10
Episode: 78, Reward: -19.00, Epsilon: 0.10
Episode: 78, Reward: -21.00, Epsilon: 0.10
Episode: 78, Reward: -17.00, Epsilon: 0.10
Episode: 78, Reward: -19.00, Epsilon: 0.10
Episode: 78, Reward: -19.00, Epsilon: 0.10
Episode: 78, Reward: -19.00, Epsilon: 0.10
Episode: 78, Reward: -21.00, Epsilon: 0.10


 53%|█████▎    | 80/150 [2:30:42<2:07:05, 108.93s/it]

Episode: 79, Reward: -19.00, Epsilon: 0.10
Episode: 79, Reward: -15.00, Epsilon: 0.10
Episode: 79, Reward: -17.00, Epsilon: 0.10
Episode: 79, Reward: -21.00, Epsilon: 0.10
Episode: 79, Reward: -17.00, Epsilon: 0.10
Episode: 79, Reward: -19.00, Epsilon: 0.10
Episode: 79, Reward: -17.00, Epsilon: 0.10
Episode: 79, Reward: -21.00, Epsilon: 0.10


 54%|█████▍    | 81/150 [2:32:34<2:06:22, 109.89s/it]

Episode: 80, Reward: -20.00, Epsilon: 0.10
Episode: 80, Reward: -20.00, Epsilon: 0.10
Episode: 80, Reward: -20.00, Epsilon: 0.10
Episode: 80, Reward: -18.00, Epsilon: 0.10
Episode: 80, Reward: -21.00, Epsilon: 0.10
Episode: 80, Reward: -21.00, Epsilon: 0.10
Episode: 80, Reward: -20.00, Epsilon: 0.10
Episode: 80, Reward: -20.00, Epsilon: 0.10


 55%|█████▍    | 82/150 [2:34:22<2:03:52, 109.30s/it]

Episode: 81, Reward: -21.00, Epsilon: 0.10
Episode: 81, Reward: -17.00, Epsilon: 0.10
Episode: 81, Reward: -17.00, Epsilon: 0.10
Episode: 81, Reward: -17.00, Epsilon: 0.10
Episode: 81, Reward: -16.00, Epsilon: 0.10
Episode: 81, Reward: -20.00, Epsilon: 0.10
Episode: 81, Reward: -17.00, Epsilon: 0.10
Episode: 81, Reward: -17.00, Epsilon: 0.10


 55%|█████▌    | 83/150 [2:36:10<2:01:43, 109.00s/it]

Episode: 82, Reward: -15.00, Epsilon: 0.10
Episode: 82, Reward: -21.00, Epsilon: 0.10
Episode: 82, Reward: -17.00, Epsilon: 0.10
Episode: 82, Reward: -21.00, Epsilon: 0.10
Episode: 82, Reward: -21.00, Epsilon: 0.10
Episode: 82, Reward: -21.00, Epsilon: 0.10
Episode: 82, Reward: -21.00, Epsilon: 0.10
Episode: 82, Reward: -16.00, Epsilon: 0.10


 56%|█████▌    | 84/150 [2:37:58<1:59:37, 108.76s/it]

Episode: 83, Reward: -21.00, Epsilon: 0.10
Episode: 83, Reward: -21.00, Epsilon: 0.10
Episode: 83, Reward: -21.00, Epsilon: 0.10
Episode: 83, Reward: -21.00, Epsilon: 0.10
Episode: 83, Reward: -21.00, Epsilon: 0.10
Episode: 83, Reward: -21.00, Epsilon: 0.10
Episode: 83, Reward: -21.00, Epsilon: 0.10
Episode: 83, Reward: -21.00, Epsilon: 0.10


 57%|█████▋    | 85/150 [2:39:50<1:58:55, 109.78s/it]

Episode: 84, Reward: -18.00, Epsilon: 0.10
Episode: 84, Reward: -20.00, Epsilon: 0.10
Episode: 84, Reward: -21.00, Epsilon: 0.10
Episode: 84, Reward: -21.00, Epsilon: 0.10
Episode: 84, Reward: -18.00, Epsilon: 0.10
Episode: 84, Reward: -18.00, Epsilon: 0.10
Episode: 84, Reward: -16.00, Epsilon: 0.10
Episode: 84, Reward: -18.00, Epsilon: 0.10


 57%|█████▋    | 86/150 [2:41:39<1:56:42, 109.41s/it]

Episode: 85, Reward: -21.00, Epsilon: 0.10
Episode: 85, Reward: -21.00, Epsilon: 0.10
Episode: 85, Reward: -21.00, Epsilon: 0.10
Episode: 85, Reward: -21.00, Epsilon: 0.10
Episode: 85, Reward: -21.00, Epsilon: 0.10
Episode: 85, Reward: -21.00, Epsilon: 0.10
Episode: 85, Reward: -21.00, Epsilon: 0.10
Episode: 85, Reward: -21.00, Epsilon: 0.10


 58%|█████▊    | 87/150 [2:43:36<1:57:21, 111.78s/it]

Episode: 86, Reward: -19.00, Epsilon: 0.10
Episode: 86, Reward: -21.00, Epsilon: 0.10
Episode: 86, Reward: -16.00, Epsilon: 0.10
Episode: 86, Reward: -20.00, Epsilon: 0.10
Episode: 86, Reward: -19.00, Epsilon: 0.10
Episode: 86, Reward: -19.00, Epsilon: 0.10
Episode: 86, Reward: -20.00, Epsilon: 0.10
Episode: 86, Reward: -15.00, Epsilon: 0.10


 59%|█████▊    | 88/150 [2:45:31<1:56:30, 112.74s/it]

Episode: 87, Reward: -19.00, Epsilon: 0.10
Episode: 87, Reward: -19.00, Epsilon: 0.10
Episode: 87, Reward: -19.00, Epsilon: 0.10
Episode: 87, Reward: -16.00, Epsilon: 0.10
Episode: 87, Reward: -21.00, Epsilon: 0.10
Episode: 87, Reward: -19.00, Epsilon: 0.10
Episode: 87, Reward: -15.00, Epsilon: 0.10
Episode: 87, Reward: -15.00, Epsilon: 0.10


 59%|█████▉    | 89/150 [2:47:34<1:57:39, 115.73s/it]

Episode: 88, Reward: -19.00, Epsilon: 0.10
Episode: 88, Reward: -21.00, Epsilon: 0.10
Episode: 88, Reward: -19.00, Epsilon: 0.10
Episode: 88, Reward: -19.00, Epsilon: 0.10
Episode: 88, Reward: -19.00, Epsilon: 0.10
Episode: 88, Reward: -19.00, Epsilon: 0.10
Episode: 88, Reward: -21.00, Epsilon: 0.10
Episode: 88, Reward: -19.00, Epsilon: 0.10


 60%|██████    | 90/150 [2:49:26<1:54:40, 114.67s/it]

Episode: 89, Reward: -17.00, Epsilon: 0.10
Episode: 89, Reward: -17.00, Epsilon: 0.10
Episode: 89, Reward: -21.00, Epsilon: 0.10
Episode: 89, Reward: -21.00, Epsilon: 0.10
Episode: 89, Reward: -21.00, Epsilon: 0.10
Episode: 89, Reward: -21.00, Epsilon: 0.10
Episode: 89, Reward: -19.00, Epsilon: 0.10
Episode: 89, Reward: -17.00, Epsilon: 0.10


 61%|██████    | 91/150 [2:51:17<1:51:40, 113.56s/it]

Episode: 90, Reward: -18.00, Epsilon: 0.10
Episode: 90, Reward: -21.00, Epsilon: 0.10
Episode: 90, Reward: -21.00, Epsilon: 0.10
Episode: 90, Reward: -21.00, Epsilon: 0.10
Episode: 90, Reward: -21.00, Epsilon: 0.10
Episode: 90, Reward: -21.00, Epsilon: 0.10
Episode: 90, Reward: -21.00, Epsilon: 0.10
Episode: 90, Reward: -21.00, Epsilon: 0.10


 61%|██████▏   | 92/150 [2:53:06<1:48:29, 112.24s/it]

Episode: 91, Reward: -21.00, Epsilon: 0.10
Episode: 91, Reward: -21.00, Epsilon: 0.10
Episode: 91, Reward: -21.00, Epsilon: 0.10
Episode: 91, Reward: -21.00, Epsilon: 0.10
Episode: 91, Reward: -21.00, Epsilon: 0.10
Episode: 91, Reward: -21.00, Epsilon: 0.10
Episode: 91, Reward: -21.00, Epsilon: 0.10
Episode: 91, Reward: -21.00, Epsilon: 0.10


 62%|██████▏   | 93/150 [2:54:56<1:45:48, 111.37s/it]

Episode: 92, Reward: -19.00, Epsilon: 0.10
Episode: 92, Reward: -21.00, Epsilon: 0.10
Episode: 92, Reward: -17.00, Epsilon: 0.10
Episode: 92, Reward: -19.00, Epsilon: 0.10
Episode: 92, Reward: -19.00, Epsilon: 0.10
Episode: 92, Reward: -19.00, Epsilon: 0.10
Episode: 92, Reward: -17.00, Epsilon: 0.10
Episode: 92, Reward: -15.00, Epsilon: 0.10


 63%|██████▎   | 94/150 [2:56:45<1:43:16, 110.65s/it]

Episode: 93, Reward: -19.00, Epsilon: 0.10
Episode: 93, Reward: -19.00, Epsilon: 0.10
Episode: 93, Reward: -15.00, Epsilon: 0.10
Episode: 93, Reward: -16.00, Epsilon: 0.10
Episode: 93, Reward: -17.00, Epsilon: 0.10
Episode: 93, Reward: -21.00, Epsilon: 0.10
Episode: 93, Reward: -16.00, Epsilon: 0.10
Episode: 93, Reward: -19.00, Epsilon: 0.10


 63%|██████▎   | 95/150 [2:58:33<1:40:45, 109.91s/it]

Episode: 94, Reward: -16.00, Epsilon: 0.10
Episode: 94, Reward: -13.00, Epsilon: 0.10
Episode: 94, Reward: -21.00, Epsilon: 0.10
Episode: 94, Reward: -19.00, Epsilon: 0.10
Episode: 94, Reward: -17.00, Epsilon: 0.10
Episode: 94, Reward: -15.00, Epsilon: 0.10
Episode: 94, Reward: -17.00, Epsilon: 0.10
Episode: 94, Reward: -20.00, Epsilon: 0.10


 64%|██████▍   | 96/150 [3:00:22<1:38:36, 109.57s/it]

Episode: 95, Reward: -21.00, Epsilon: 0.10
Episode: 95, Reward: -21.00, Epsilon: 0.10
Episode: 95, Reward: -21.00, Epsilon: 0.10
Episode: 95, Reward: -21.00, Epsilon: 0.10
Episode: 95, Reward: -21.00, Epsilon: 0.10
Episode: 95, Reward: -21.00, Epsilon: 0.10
Episode: 95, Reward: -21.00, Epsilon: 0.10
Episode: 95, Reward: -21.00, Epsilon: 0.10


 65%|██████▍   | 97/150 [3:02:10<1:36:34, 109.33s/it]

Episode: 96, Reward: -21.00, Epsilon: 0.10
Episode: 96, Reward: -21.00, Epsilon: 0.10
Episode: 96, Reward: -21.00, Epsilon: 0.10
Episode: 96, Reward: -21.00, Epsilon: 0.10
Episode: 96, Reward: -21.00, Epsilon: 0.10
Episode: 96, Reward: -21.00, Epsilon: 0.10
Episode: 96, Reward: -21.00, Epsilon: 0.10
Episode: 96, Reward: -21.00, Epsilon: 0.10


 65%|██████▌   | 98/150 [3:03:59<1:34:39, 109.22s/it]

Episode: 97, Reward: -21.00, Epsilon: 0.10
Episode: 97, Reward: -21.00, Epsilon: 0.10
Episode: 97, Reward: -21.00, Epsilon: 0.10
Episode: 97, Reward: -21.00, Epsilon: 0.10
Episode: 97, Reward: -21.00, Epsilon: 0.10
Episode: 97, Reward: -21.00, Epsilon: 0.10
Episode: 97, Reward: -21.00, Epsilon: 0.10
Episode: 97, Reward: -21.00, Epsilon: 0.10


 66%|██████▌   | 99/150 [3:05:52<1:33:45, 110.30s/it]

Episode: 98, Reward: -14.00, Epsilon: 0.10
Episode: 98, Reward: -17.00, Epsilon: 0.10
Episode: 98, Reward: -17.00, Epsilon: 0.10
Episode: 98, Reward: -21.00, Epsilon: 0.10
Episode: 98, Reward: -15.00, Epsilon: 0.10
Episode: 98, Reward: -18.00, Epsilon: 0.10
Episode: 98, Reward: -17.00, Epsilon: 0.10
Episode: 98, Reward: -18.00, Epsilon: 0.10


 67%|██████▋   | 100/150 [3:07:50<1:33:43, 112.47s/it]

Episode: 99, Reward: -18.00, Epsilon: 0.10
Episode: 99, Reward: -21.00, Epsilon: 0.10
Episode: 99, Reward: -17.00, Epsilon: 0.10
Episode: 99, Reward: -21.00, Epsilon: 0.10
Episode: 99, Reward: -20.00, Epsilon: 0.10
Episode: 99, Reward: -18.00, Epsilon: 0.10
Episode: 99, Reward: -21.00, Epsilon: 0.10
Episode: 99, Reward: -21.00, Epsilon: 0.10


 67%|██████▋   | 101/150 [3:09:47<1:33:04, 113.97s/it]

Episode: 100, Reward: -13.00, Epsilon: 0.10
Episode: 100, Reward: -21.00, Epsilon: 0.10
Episode: 100, Reward: -19.00, Epsilon: 0.10
Episode: 100, Reward: -20.00, Epsilon: 0.10
Episode: 100, Reward: -20.00, Epsilon: 0.10
Episode: 100, Reward: -17.00, Epsilon: 0.10
Episode: 100, Reward: -20.00, Epsilon: 0.10
Episode: 100, Reward: -19.00, Epsilon: 0.10


 68%|██████▊   | 102/150 [3:11:36<1:29:58, 112.47s/it]

Episode: 101, Reward: -20.00, Epsilon: 0.10
Episode: 101, Reward: -17.00, Epsilon: 0.10
Episode: 101, Reward: -15.00, Epsilon: 0.10
Episode: 101, Reward: -16.00, Epsilon: 0.10
Episode: 101, Reward: -20.00, Epsilon: 0.10
Episode: 101, Reward: -21.00, Epsilon: 0.10
Episode: 101, Reward: -17.00, Epsilon: 0.10
Episode: 101, Reward: -17.00, Epsilon: 0.10


 69%|██████▊   | 103/150 [3:13:25<1:27:17, 111.44s/it]

Episode: 102, Reward: -21.00, Epsilon: 0.10
Episode: 102, Reward: -21.00, Epsilon: 0.10
Episode: 102, Reward: -21.00, Epsilon: 0.10
Episode: 102, Reward: -20.00, Epsilon: 0.10
Episode: 102, Reward: -20.00, Epsilon: 0.10
Episode: 102, Reward: -21.00, Epsilon: 0.10
Episode: 102, Reward: -20.00, Epsilon: 0.10
Episode: 102, Reward: -21.00, Epsilon: 0.10


 69%|██████▉   | 104/150 [3:15:14<1:24:53, 110.73s/it]

Episode: 103, Reward: -19.00, Epsilon: 0.10
Episode: 103, Reward: -20.00, Epsilon: 0.10
Episode: 103, Reward: -20.00, Epsilon: 0.10
Episode: 103, Reward: -21.00, Epsilon: 0.10
Episode: 103, Reward: -21.00, Epsilon: 0.10
Episode: 103, Reward: -19.00, Epsilon: 0.10
Episode: 103, Reward: -21.00, Epsilon: 0.10
Episode: 103, Reward: -21.00, Epsilon: 0.10


 70%|███████   | 105/150 [3:17:12<1:24:34, 112.76s/it]

Episode: 104, Reward: -21.00, Epsilon: 0.10
Episode: 104, Reward: -19.00, Epsilon: 0.10
Episode: 104, Reward: -19.00, Epsilon: 0.10
Episode: 104, Reward: -19.00, Epsilon: 0.10
Episode: 104, Reward: -21.00, Epsilon: 0.10
Episode: 104, Reward: -17.00, Epsilon: 0.10
Episode: 104, Reward: -19.00, Epsilon: 0.10
Episode: 104, Reward: -17.00, Epsilon: 0.10


 71%|███████   | 106/150 [3:19:05<1:22:42, 112.78s/it]

Episode: 105, Reward: -18.00, Epsilon: 0.10
Episode: 105, Reward: -16.00, Epsilon: 0.10
Episode: 105, Reward: -21.00, Epsilon: 0.10
Episode: 105, Reward: -20.00, Epsilon: 0.10
Episode: 105, Reward: -20.00, Epsilon: 0.10
Episode: 105, Reward: -20.00, Epsilon: 0.10
Episode: 105, Reward: -15.00, Epsilon: 0.10
Episode: 105, Reward: -15.00, Epsilon: 0.10


 71%|███████▏  | 107/150 [3:20:53<1:19:56, 111.54s/it]

Episode: 106, Reward: -19.00, Epsilon: 0.10
Episode: 106, Reward: -17.00, Epsilon: 0.10
Episode: 106, Reward: -21.00, Epsilon: 0.10
Episode: 106, Reward: -15.00, Epsilon: 0.10
Episode: 106, Reward: -20.00, Epsilon: 0.10
Episode: 106, Reward: -17.00, Epsilon: 0.10
Episode: 106, Reward: -21.00, Epsilon: 0.10
Episode: 106, Reward: -20.00, Epsilon: 0.10


 72%|███████▏  | 108/150 [3:22:50<1:19:08, 113.06s/it]

Episode: 107, Reward: -18.00, Epsilon: 0.10
Episode: 107, Reward: -19.00, Epsilon: 0.10
Episode: 107, Reward: -18.00, Epsilon: 0.10
Episode: 107, Reward: -20.00, Epsilon: 0.10
Episode: 107, Reward: -21.00, Epsilon: 0.10
Episode: 107, Reward: -18.00, Epsilon: 0.10
Episode: 107, Reward: -19.00, Epsilon: 0.10
Episode: 107, Reward: -19.00, Epsilon: 0.10


 73%|███████▎  | 109/150 [3:24:49<1:18:32, 114.94s/it]

Episode: 108, Reward: -19.00, Epsilon: 0.10
Episode: 108, Reward: -19.00, Epsilon: 0.10
Episode: 108, Reward: -20.00, Epsilon: 0.10
Episode: 108, Reward: -19.00, Epsilon: 0.10
Episode: 108, Reward: -19.00, Epsilon: 0.10
Episode: 108, Reward: -18.00, Epsilon: 0.10
Episode: 108, Reward: -20.00, Epsilon: 0.10
Episode: 108, Reward: -19.00, Epsilon: 0.10


 73%|███████▎  | 110/150 [3:26:47<1:17:09, 115.74s/it]

Episode: 109, Reward: -18.00, Epsilon: 0.10
Episode: 109, Reward: -19.00, Epsilon: 0.10
Episode: 109, Reward: -20.00, Epsilon: 0.10
Episode: 109, Reward: -19.00, Epsilon: 0.10
Episode: 109, Reward: -18.00, Epsilon: 0.10
Episode: 109, Reward: -18.00, Epsilon: 0.10
Episode: 109, Reward: -21.00, Epsilon: 0.10
Episode: 109, Reward: -20.00, Epsilon: 0.10


 74%|███████▍  | 111/150 [3:28:35<1:13:51, 113.62s/it]

Episode: 110, Reward: -18.00, Epsilon: 0.10
Episode: 110, Reward: -19.00, Epsilon: 0.10
Episode: 110, Reward: -21.00, Epsilon: 0.10
Episode: 110, Reward: -18.00, Epsilon: 0.10
Episode: 110, Reward: -17.00, Epsilon: 0.10
Episode: 110, Reward: -18.00, Epsilon: 0.10
Episode: 110, Reward: -14.00, Epsilon: 0.10
Episode: 110, Reward: -21.00, Epsilon: 0.10


 75%|███████▍  | 112/150 [3:30:24<1:10:59, 112.10s/it]

Episode: 111, Reward: -21.00, Epsilon: 0.10
Episode: 111, Reward: -21.00, Epsilon: 0.10
Episode: 111, Reward: -21.00, Epsilon: 0.10
Episode: 111, Reward: -21.00, Epsilon: 0.10
Episode: 111, Reward: -21.00, Epsilon: 0.10
Episode: 111, Reward: -16.00, Epsilon: 0.10
Episode: 111, Reward: -21.00, Epsilon: 0.10
Episode: 111, Reward: -21.00, Epsilon: 0.10


 75%|███████▌  | 113/150 [3:32:13<1:08:30, 111.10s/it]

Episode: 112, Reward: -21.00, Epsilon: 0.10
Episode: 112, Reward: -21.00, Epsilon: 0.10
Episode: 112, Reward: -21.00, Epsilon: 0.10
Episode: 112, Reward: -17.00, Epsilon: 0.10
Episode: 112, Reward: -21.00, Epsilon: 0.10
Episode: 112, Reward: -17.00, Epsilon: 0.10
Episode: 112, Reward: -17.00, Epsilon: 0.10
Episode: 112, Reward: -19.00, Epsilon: 0.10


 76%|███████▌  | 114/150 [3:34:01<1:06:12, 110.34s/it]

Episode: 113, Reward: -21.00, Epsilon: 0.10
Episode: 113, Reward: -21.00, Epsilon: 0.10
Episode: 113, Reward: -21.00, Epsilon: 0.10
Episode: 113, Reward: -21.00, Epsilon: 0.10
Episode: 113, Reward: -21.00, Epsilon: 0.10
Episode: 113, Reward: -19.00, Epsilon: 0.10
Episode: 113, Reward: -21.00, Epsilon: 0.10
Episode: 113, Reward: -19.00, Epsilon: 0.10


 77%|███████▋  | 115/150 [3:35:50<1:04:03, 109.81s/it]

Episode: 114, Reward: -21.00, Epsilon: 0.10
Episode: 114, Reward: -21.00, Epsilon: 0.10
Episode: 114, Reward: -21.00, Epsilon: 0.10
Episode: 114, Reward: -21.00, Epsilon: 0.10
Episode: 114, Reward: -21.00, Epsilon: 0.10
Episode: 114, Reward: -21.00, Epsilon: 0.10
Episode: 114, Reward: -21.00, Epsilon: 0.10
Episode: 114, Reward: -21.00, Epsilon: 0.10


 77%|███████▋  | 116/150 [3:37:39<1:02:02, 109.48s/it]

Episode: 115, Reward: -20.00, Epsilon: 0.10
Episode: 115, Reward: -20.00, Epsilon: 0.10
Episode: 115, Reward: -21.00, Epsilon: 0.10
Episode: 115, Reward: -21.00, Epsilon: 0.10
Episode: 115, Reward: -20.00, Epsilon: 0.10
Episode: 115, Reward: -20.00, Epsilon: 0.10
Episode: 115, Reward: -21.00, Epsilon: 0.10
Episode: 115, Reward: -20.00, Epsilon: 0.10


 78%|███████▊  | 117/150 [3:39:27<1:00:04, 109.23s/it]

Episode: 116, Reward: -21.00, Epsilon: 0.10
Episode: 116, Reward: -21.00, Epsilon: 0.10
Episode: 116, Reward: -21.00, Epsilon: 0.10
Episode: 116, Reward: -21.00, Epsilon: 0.10
Episode: 116, Reward: -21.00, Epsilon: 0.10
Episode: 116, Reward: -21.00, Epsilon: 0.10
Episode: 116, Reward: -17.00, Epsilon: 0.10
Episode: 116, Reward: -21.00, Epsilon: 0.10


 79%|███████▊  | 118/150 [3:41:16<58:10, 109.07s/it]  

Episode: 117, Reward: -19.00, Epsilon: 0.10
Episode: 117, Reward: -19.00, Epsilon: 0.10
Episode: 117, Reward: -19.00, Epsilon: 0.10
Episode: 117, Reward: -16.00, Epsilon: 0.10
Episode: 117, Reward: -21.00, Epsilon: 0.10
Episode: 117, Reward: -19.00, Epsilon: 0.10
Episode: 117, Reward: -21.00, Epsilon: 0.10
Episode: 117, Reward: -21.00, Epsilon: 0.10


 79%|███████▉  | 119/150 [3:43:05<56:19, 109.02s/it]

Episode: 118, Reward: -21.00, Epsilon: 0.10
Episode: 118, Reward: -21.00, Epsilon: 0.10
Episode: 118, Reward: -21.00, Epsilon: 0.10
Episode: 118, Reward: -21.00, Epsilon: 0.10
Episode: 118, Reward: -21.00, Epsilon: 0.10
Episode: 118, Reward: -21.00, Epsilon: 0.10
Episode: 118, Reward: -21.00, Epsilon: 0.10
Episode: 118, Reward: -21.00, Epsilon: 0.10


 80%|████████  | 120/150 [3:44:54<54:29, 108.97s/it]

Episode: 119, Reward: -17.00, Epsilon: 0.10
Episode: 119, Reward: -21.00, Epsilon: 0.10
Episode: 119, Reward: -20.00, Epsilon: 0.10
Episode: 119, Reward: -16.00, Epsilon: 0.10
Episode: 119, Reward: -20.00, Epsilon: 0.10
Episode: 119, Reward: -18.00, Epsilon: 0.10
Episode: 119, Reward: -19.00, Epsilon: 0.10
Episode: 119, Reward: -21.00, Epsilon: 0.10


 81%|████████  | 121/150 [3:46:43<52:39, 108.94s/it]

Episode: 120, Reward: -17.00, Epsilon: 0.10
Episode: 120, Reward: -16.00, Epsilon: 0.10
Episode: 120, Reward: -14.00, Epsilon: 0.10
Episode: 120, Reward: -19.00, Epsilon: 0.10
Episode: 120, Reward: -21.00, Epsilon: 0.10
Episode: 120, Reward: -13.00, Epsilon: 0.10
Episode: 120, Reward: -17.00, Epsilon: 0.10
Episode: 120, Reward: -18.00, Epsilon: 0.10


 81%|████████▏ | 122/150 [3:48:31<50:49, 108.92s/it]

Episode: 121, Reward: -17.00, Epsilon: 0.10
Episode: 121, Reward: -17.00, Epsilon: 0.10
Episode: 121, Reward: -17.00, Epsilon: 0.10
Episode: 121, Reward: -17.00, Epsilon: 0.10
Episode: 121, Reward: -17.00, Epsilon: 0.10
Episode: 121, Reward: -21.00, Epsilon: 0.10
Episode: 121, Reward: -20.00, Epsilon: 0.10
Episode: 121, Reward: -14.00, Epsilon: 0.10


 82%|████████▏ | 123/150 [3:50:29<50:08, 111.43s/it]

Episode: 122, Reward: -21.00, Epsilon: 0.10
Episode: 122, Reward: -19.00, Epsilon: 0.10
Episode: 122, Reward: -20.00, Epsilon: 0.10
Episode: 122, Reward: -21.00, Epsilon: 0.10
Episode: 122, Reward: -20.00, Epsilon: 0.10
Episode: 122, Reward: -21.00, Epsilon: 0.10
Episode: 122, Reward: -21.00, Epsilon: 0.10
Episode: 122, Reward: -19.00, Epsilon: 0.10


 83%|████████▎ | 124/150 [3:52:18<47:57, 110.66s/it]

Episode: 123, Reward: -21.00, Epsilon: 0.10
Episode: 123, Reward: -21.00, Epsilon: 0.10
Episode: 123, Reward: -21.00, Epsilon: 0.10
Episode: 123, Reward: -21.00, Epsilon: 0.10
Episode: 123, Reward: -21.00, Epsilon: 0.10
Episode: 123, Reward: -21.00, Epsilon: 0.10
Episode: 123, Reward: -21.00, Epsilon: 0.10
Episode: 123, Reward: -21.00, Epsilon: 0.10


 83%|████████▎ | 125/150 [3:54:15<46:56, 112.67s/it]

Episode: 124, Reward: -19.00, Epsilon: 0.10
Episode: 124, Reward: -19.00, Epsilon: 0.10
Episode: 124, Reward: -16.00, Epsilon: 0.10
Episode: 124, Reward: -21.00, Epsilon: 0.10
Episode: 124, Reward: -20.00, Epsilon: 0.10
Episode: 124, Reward: -21.00, Epsilon: 0.10
Episode: 124, Reward: -19.00, Epsilon: 0.10
Episode: 124, Reward: -16.00, Epsilon: 0.10


 84%|████████▍ | 126/150 [3:56:13<45:41, 114.24s/it]

Episode: 125, Reward: -21.00, Epsilon: 0.10
Episode: 125, Reward: -11.00, Epsilon: 0.10
Episode: 125, Reward: -21.00, Epsilon: 0.10
Episode: 125, Reward: -21.00, Epsilon: 0.10
Episode: 125, Reward: -17.00, Epsilon: 0.10
Episode: 125, Reward: -15.00, Epsilon: 0.10
Episode: 125, Reward: -21.00, Epsilon: 0.10
Episode: 125, Reward: -19.00, Epsilon: 0.10


 85%|████████▍ | 127/150 [3:58:02<43:09, 112.60s/it]

Episode: 126, Reward: -17.00, Epsilon: 0.10
Episode: 126, Reward: -21.00, Epsilon: 0.10
Episode: 126, Reward: -19.00, Epsilon: 0.10
Episode: 126, Reward: -21.00, Epsilon: 0.10
Episode: 126, Reward: -19.00, Epsilon: 0.10
Episode: 126, Reward: -19.00, Epsilon: 0.10
Episode: 126, Reward: -17.00, Epsilon: 0.10
Episode: 126, Reward: -21.00, Epsilon: 0.10


 85%|████████▌ | 128/150 [3:59:50<40:50, 111.41s/it]

Episode: 127, Reward: -19.00, Epsilon: 0.10
Episode: 127, Reward: -21.00, Epsilon: 0.10
Episode: 127, Reward: -19.00, Epsilon: 0.10
Episode: 127, Reward: -19.00, Epsilon: 0.10
Episode: 127, Reward: -18.00, Epsilon: 0.10
Episode: 127, Reward: -19.00, Epsilon: 0.10
Episode: 127, Reward: -19.00, Epsilon: 0.10
Episode: 127, Reward: -19.00, Epsilon: 0.10


 86%|████████▌ | 129/150 [4:01:48<39:36, 113.18s/it]

Episode: 128, Reward: -21.00, Epsilon: 0.10
Episode: 128, Reward: -19.00, Epsilon: 0.10
Episode: 128, Reward: -21.00, Epsilon: 0.10
Episode: 128, Reward: -21.00, Epsilon: 0.10
Episode: 128, Reward: -20.00, Epsilon: 0.10
Episode: 128, Reward: -19.00, Epsilon: 0.10
Episode: 128, Reward: -21.00, Epsilon: 0.10
Episode: 128, Reward: -19.00, Epsilon: 0.10


 87%|████████▋ | 130/150 [4:03:36<37:17, 111.87s/it]

Episode: 129, Reward: -18.00, Epsilon: 0.10
Episode: 129, Reward: -19.00, Epsilon: 0.10
Episode: 129, Reward: -19.00, Epsilon: 0.10
Episode: 129, Reward: -15.00, Epsilon: 0.10
Episode: 129, Reward: -21.00, Epsilon: 0.10
Episode: 129, Reward: -19.00, Epsilon: 0.10
Episode: 129, Reward: -21.00, Epsilon: 0.10
Episode: 129, Reward: -10.00, Epsilon: 0.10


 87%|████████▋ | 131/150 [4:05:25<35:07, 110.91s/it]

Episode: 130, Reward: -21.00, Epsilon: 0.10
Episode: 130, Reward: -21.00, Epsilon: 0.10
Episode: 130, Reward: -21.00, Epsilon: 0.10
Episode: 130, Reward: -21.00, Epsilon: 0.10
Episode: 130, Reward: -21.00, Epsilon: 0.10
Episode: 130, Reward: -21.00, Epsilon: 0.10
Episode: 130, Reward: -21.00, Epsilon: 0.10
Episode: 130, Reward: -21.00, Epsilon: 0.10


 88%|████████▊ | 132/150 [4:07:23<33:52, 112.91s/it]

Episode: 131, Reward: -21.00, Epsilon: 0.10
Episode: 131, Reward: -20.00, Epsilon: 0.10
Episode: 131, Reward: -19.00, Epsilon: 0.10
Episode: 131, Reward: -21.00, Epsilon: 0.10
Episode: 131, Reward: -17.00, Epsilon: 0.10
Episode: 131, Reward: -17.00, Epsilon: 0.10
Episode: 131, Reward: -19.00, Epsilon: 0.10
Episode: 131, Reward: -19.00, Epsilon: 0.10


 89%|████████▊ | 133/150 [4:09:18<32:12, 113.70s/it]

Episode: 132, Reward: -20.00, Epsilon: 0.10
Episode: 132, Reward: -19.00, Epsilon: 0.10
Episode: 132, Reward: -16.00, Epsilon: 0.10
Episode: 132, Reward: -16.00, Epsilon: 0.10
Episode: 132, Reward: -19.00, Epsilon: 0.10
Episode: 132, Reward: -21.00, Epsilon: 0.10
Episode: 132, Reward: -16.00, Epsilon: 0.10
Episode: 132, Reward: -20.00, Epsilon: 0.10


 89%|████████▉ | 134/150 [4:11:11<30:16, 113.53s/it]

Episode: 133, Reward: -18.00, Epsilon: 0.10
Episode: 133, Reward: -21.00, Epsilon: 0.10
Episode: 133, Reward: -18.00, Epsilon: 0.10
Episode: 133, Reward: -18.00, Epsilon: 0.10
Episode: 133, Reward: -19.00, Epsilon: 0.10
Episode: 133, Reward: -18.00, Epsilon: 0.10
Episode: 133, Reward: -18.00, Epsilon: 0.10
Episode: 133, Reward: -14.00, Epsilon: 0.10


 90%|█████████ | 135/150 [4:13:04<28:21, 113.41s/it]

Episode: 134, Reward: -18.00, Epsilon: 0.10
Episode: 134, Reward: -19.00, Epsilon: 0.10
Episode: 134, Reward: -17.00, Epsilon: 0.10
Episode: 134, Reward: -18.00, Epsilon: 0.10
Episode: 134, Reward: -19.00, Epsilon: 0.10
Episode: 134, Reward: -21.00, Epsilon: 0.10
Episode: 134, Reward: -20.00, Epsilon: 0.10
Episode: 134, Reward: -13.00, Epsilon: 0.10


 91%|█████████ | 136/150 [4:15:02<26:43, 114.53s/it]

Episode: 135, Reward: -20.00, Epsilon: 0.10
Episode: 135, Reward: -20.00, Epsilon: 0.10
Episode: 135, Reward: -20.00, Epsilon: 0.10
Episode: 135, Reward: -21.00, Epsilon: 0.10
Episode: 135, Reward: -20.00, Epsilon: 0.10
Episode: 135, Reward: -20.00, Epsilon: 0.10
Episode: 135, Reward: -20.00, Epsilon: 0.10
Episode: 135, Reward: -20.00, Epsilon: 0.10


 91%|█████████▏| 137/150 [4:16:53<24:36, 113.59s/it]

Episode: 136, Reward: -19.00, Epsilon: 0.10
Episode: 136, Reward: -21.00, Epsilon: 0.10
Episode: 136, Reward: -20.00, Epsilon: 0.10
Episode: 136, Reward: -19.00, Epsilon: 0.10
Episode: 136, Reward: -15.00, Epsilon: 0.10
Episode: 136, Reward: -18.00, Epsilon: 0.10
Episode: 136, Reward: -21.00, Epsilon: 0.10
Episode: 136, Reward: -21.00, Epsilon: 0.10


 92%|█████████▏| 138/150 [4:18:42<22:25, 112.12s/it]

Episode: 137, Reward: -21.00, Epsilon: 0.10
Episode: 137, Reward: -21.00, Epsilon: 0.10
Episode: 137, Reward: -21.00, Epsilon: 0.10
Episode: 137, Reward: -21.00, Epsilon: 0.10
Episode: 137, Reward: -21.00, Epsilon: 0.10
Episode: 137, Reward: -21.00, Epsilon: 0.10
Episode: 137, Reward: -21.00, Epsilon: 0.10
Episode: 137, Reward: -21.00, Epsilon: 0.10


 93%|█████████▎| 139/150 [4:20:30<20:21, 111.03s/it]

Episode: 138, Reward: -21.00, Epsilon: 0.10
Episode: 138, Reward: -21.00, Epsilon: 0.10
Episode: 138, Reward: -21.00, Epsilon: 0.10
Episode: 138, Reward: -21.00, Epsilon: 0.10
Episode: 138, Reward: -21.00, Epsilon: 0.10
Episode: 138, Reward: -21.00, Epsilon: 0.10
Episode: 138, Reward: -21.00, Epsilon: 0.10
Episode: 138, Reward: -21.00, Epsilon: 0.10


 93%|█████████▎| 140/150 [4:22:23<18:34, 111.46s/it]

Episode: 139, Reward: -18.00, Epsilon: 0.10
Episode: 139, Reward: -21.00, Epsilon: 0.10
Episode: 139, Reward: -18.00, Epsilon: 0.10
Episode: 139, Reward: -19.00, Epsilon: 0.10
Episode: 139, Reward: -18.00, Epsilon: 0.10
Episode: 139, Reward: -18.00, Epsilon: 0.10
Episode: 139, Reward: -18.00, Epsilon: 0.10
Episode: 139, Reward: -18.00, Epsilon: 0.10


 94%|█████████▍| 141/150 [4:24:15<16:45, 111.77s/it]

Episode: 140, Reward: -18.00, Epsilon: 0.10
Episode: 140, Reward: -17.00, Epsilon: 0.10
Episode: 140, Reward: -15.00, Epsilon: 0.10
Episode: 140, Reward: -18.00, Epsilon: 0.10
Episode: 140, Reward: -20.00, Epsilon: 0.10
Episode: 140, Reward: -21.00, Epsilon: 0.10
Episode: 140, Reward: -21.00, Epsilon: 0.10
Episode: 140, Reward: -13.00, Epsilon: 0.10


 95%|█████████▍| 142/150 [4:26:04<14:46, 110.79s/it]

Episode: 141, Reward: -21.00, Epsilon: 0.10
Episode: 141, Reward: -21.00, Epsilon: 0.10
Episode: 141, Reward: -21.00, Epsilon: 0.10
Episode: 141, Reward: -21.00, Epsilon: 0.10
Episode: 141, Reward: -21.00, Epsilon: 0.10
Episode: 141, Reward: -21.00, Epsilon: 0.10
Episode: 141, Reward: -21.00, Epsilon: 0.10
Episode: 141, Reward: -21.00, Epsilon: 0.10


 95%|█████████▌| 143/150 [4:27:53<12:51, 110.26s/it]

Episode: 142, Reward: -20.00, Epsilon: 0.10
Episode: 142, Reward: -21.00, Epsilon: 0.10
Episode: 142, Reward: -20.00, Epsilon: 0.10
Episode: 142, Reward: -20.00, Epsilon: 0.10
Episode: 142, Reward: -20.00, Epsilon: 0.10
Episode: 142, Reward: -20.00, Epsilon: 0.10
Episode: 142, Reward: -20.00, Epsilon: 0.10
Episode: 142, Reward: -21.00, Epsilon: 0.10


 96%|█████████▌| 144/150 [4:29:42<10:59, 109.90s/it]

Episode: 143, Reward: -20.00, Epsilon: 0.10
Episode: 143, Reward: -21.00, Epsilon: 0.10
Episode: 143, Reward: -21.00, Epsilon: 0.10
Episode: 143, Reward: -20.00, Epsilon: 0.10
Episode: 143, Reward: -18.00, Epsilon: 0.10
Episode: 143, Reward: -21.00, Epsilon: 0.10
Episode: 143, Reward: -21.00, Epsilon: 0.10
Episode: 143, Reward: -21.00, Epsilon: 0.10


 97%|█████████▋| 145/150 [4:31:31<09:07, 109.58s/it]

Episode: 144, Reward: -21.00, Epsilon: 0.10
Episode: 144, Reward: -21.00, Epsilon: 0.10
Episode: 144, Reward: -21.00, Epsilon: 0.10
Episode: 144, Reward: -21.00, Epsilon: 0.10
Episode: 144, Reward: -21.00, Epsilon: 0.10
Episode: 144, Reward: -21.00, Epsilon: 0.10
Episode: 144, Reward: -21.00, Epsilon: 0.10
Episode: 144, Reward: -21.00, Epsilon: 0.10


 97%|█████████▋| 146/150 [4:33:20<07:17, 109.40s/it]

Episode: 145, Reward: -21.00, Epsilon: 0.10
Episode: 145, Reward: -21.00, Epsilon: 0.10
Episode: 145, Reward: -21.00, Epsilon: 0.10
Episode: 145, Reward: -21.00, Epsilon: 0.10
Episode: 145, Reward: -21.00, Epsilon: 0.10
Episode: 145, Reward: -21.00, Epsilon: 0.10
Episode: 145, Reward: -21.00, Epsilon: 0.10
Episode: 145, Reward: -21.00, Epsilon: 0.10


 98%|█████████▊| 147/150 [4:35:08<05:27, 109.23s/it]

Episode: 146, Reward: -21.00, Epsilon: 0.10
Episode: 146, Reward: -19.00, Epsilon: 0.10
Episode: 146, Reward: -19.00, Epsilon: 0.10
Episode: 146, Reward: -19.00, Epsilon: 0.10
Episode: 146, Reward: -19.00, Epsilon: 0.10
Episode: 146, Reward: -20.00, Epsilon: 0.10
Episode: 146, Reward: -19.00, Epsilon: 0.10
Episode: 146, Reward: -21.00, Epsilon: 0.10


 99%|█████████▊| 148/150 [4:36:57<03:38, 109.12s/it]

Episode: 147, Reward: -19.00, Epsilon: 0.10
Episode: 147, Reward: -15.00, Epsilon: 0.10
Episode: 147, Reward: -19.00, Epsilon: 0.10
Episode: 147, Reward: -19.00, Epsilon: 0.10
Episode: 147, Reward: -19.00, Epsilon: 0.10
Episode: 147, Reward: -19.00, Epsilon: 0.10
Episode: 147, Reward: -21.00, Epsilon: 0.10
Episode: 147, Reward: -21.00, Epsilon: 0.10


 99%|█████████▉| 149/150 [4:38:46<01:49, 109.14s/it]

Episode: 148, Reward: -21.00, Epsilon: 0.10
Episode: 148, Reward: -21.00, Epsilon: 0.10
Episode: 148, Reward: -21.00, Epsilon: 0.10
Episode: 148, Reward: -21.00, Epsilon: 0.10
Episode: 148, Reward: -21.00, Epsilon: 0.10
Episode: 148, Reward: -21.00, Epsilon: 0.10
Episode: 148, Reward: -21.00, Epsilon: 0.10
Episode: 148, Reward: -21.00, Epsilon: 0.10


100%|██████████| 150/150 [4:40:35<00:00, 112.24s/it]

Episode: 149, Reward: -21.00, Epsilon: 0.10
Episode: 149, Reward: -21.00, Epsilon: 0.10
Episode: 149, Reward: -21.00, Epsilon: 0.10
Episode: 149, Reward: -21.00, Epsilon: 0.10
Episode: 149, Reward: -21.00, Epsilon: 0.10
Episode: 149, Reward: -17.00, Epsilon: 0.10
Episode: 149, Reward: -21.00, Epsilon: 0.10
Episode: 149, Reward: -16.00, Epsilon: 0.10





In [None]:
# Create a new instance of your DQNAgent
# agent = DQNAgent(envs)  # Initialize with the same environment and parameters

# # Load the state dictionaries into your model
# agent.policy_net.load_state_dict(torch.load('dqn_policy_net.pth'))
# agent.target_net.load_state_dict(torch.load('dqn_target_net.pth'))
# agent.optimizer.load_state_dict(torch.load('dqn_optimizer.pth'))

# # Load the history list
# history = np.load('dqn_history.npy', allow_pickle=True)  # Load the history list
