In [31]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from reverse import *
from tqdm import tqdm
import os

In [19]:
"""
黑白棋，DQN, 
各种超参数
"""
BOARD_SIZE = 8
N_STATE = pow(BOARD_SIZE, 2)        # 1*64，表示棋盘
N_ACTION = pow(BOARD_SIZE, 2) + 1   # 1*65，表示动作（包括没有可下的位置）

LR = 0.001
EPISODE = 10000
BATCH_SIZE = 32
GAMMA = 0.9
ALPHA = 0.8
TRANSITIONS_CAPACITY = 200
UPDATE_DELAY = 10

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [20]:

# device='cpu'
print(device)

cuda


In [33]:
class NET(nn.Module):
    """定义网络结构

    Returns:
        x [tensor] -- (batch, N_ACTION)，每一行表示各个action的分数
    """

    def __init__(self):
        super(NET, self).__init__()

        self.linear1 = nn.Sequential(
            nn.Linear(N_STATE, 128),
            nn.LeakyReLU()
        )
        # self.linear1.weight.data.normal_(0, 0.1)

        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 4, 3, 1, 1),   # in_channel=1, out_channel=4, kernel_size卷积核大小=3, stride步长=1, padding=1
            nn.LeakyReLU(inplace=True)  # inplace=true，输出数据会覆盖输入数据，再求梯度时不可用。
        )

        self.conv2 = nn.Sequential(
            nn.Conv1d(4, 8, 3, 1, 1),
            nn.LeakyReLU()
        )

        self.conv3 = nn.Sequential(
            nn.Conv1d(8, 16, 3, 1, 1),
            nn.LeakyReLU()
        )

        self.linear2_val = nn.Sequential(
            nn.Linear(16 * 128, 512),
            nn.LeakyReLU()
        )

        self.linear2_adv = nn.Sequential(
            nn.Linear(16 * 128, 512),
            nn.LeakyReLU()
        )

        self.linear3_adv = nn.Sequential(
            nn.Linear(512, N_ACTION)
        )

        self.linear3_val = nn.Sequential(
            nn.Linear(512, 1)
        )

    def forward(self, x):
        x = self.linear1(x)
        x = x.view(x.shape[0], 1, -1)   # 将多行tensor拼接成一行
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        # x = x.flatten()
        x = x.view(x.shape[0], -1)
        # x = self.linear2(x)
        adv = self.linear2_adv(x)
        adv = self.linear3_adv(adv)

        val = self.linear2_val(x)
        val = self.linear3_val(val).expand(x.size(0), N_ACTION)

        res = val + adv - adv.mean(1).unsqueeze(1).expand(x.size(0), N_ACTION)

        return res


In [37]:
class DQN(object):
    def __init__(self, color):
        """
        color: 1表示先手；-1表示后手

        transitions : 存储状态的空间，格式为(state, action, reward, state_), state_为后继状态
        transitions_index : 记录当前使用存储空间的索引
        learn_iter : 当到达UPDATE_ITERS时，就更新预测网络 Q_ ，把Q的参数复制给它
        """
        self.transitions = np.zeros((TRANSITIONS_CAPACITY, 2 * N_STATE + 2))
        self.transitions_index = 0
        self.learn_iter = 0

        self.Q, self.Q_ = NET().to(device), NET().to(device)
        # ??
        # if color == 1:
            # self.Q.load_state_dict(torch.load('model_offensive.pth'))
        # elif color == -1:
            # self.Q.load_state_dict(torch.load('model_defensive.pth'))

        self.optimizer = torch.optim.Adam(self.Q.parameters(), lr=LR)
        self.criteria = nn.MSELoss()

    def Choose_Action_EpsilonGreedy(self, x, game_state, color, Epsilon=0.1):
        """ε-greedy算法选择下一个action。以ε概率随机选择一个action，否则就选择Q值最大的action

        Arguments:
            x [tensor] -- NET网络的输入值，即当前状态，在Q-Learning中，选择下一个动作应该是查表得到的，
                            在DQN中没有这个表，所以要先经过Q网络得到一个状态的Q值，然后选择这向量里概率最大的action
            game_state [class] -- 当前的游戏状态
            color int -- 1表示黑棋，-1表示白棋

        Returns:
            action [int] -- 0~64中的一个数，表示下棋的位置；64表示跳过
        """

        if color == 1:
            avaliable_pos = game_state.Get_Valid_Pos(game_state.black_chess, game_state.white_chess)
        elif color == -1:
            avaliable_pos = game_state.Get_Valid_Pos(game_state.white_chess, game_state.black_chess)

        avaliable_pos = list(map(lambda a: game_state.board_size * a[0] + a[1], avaliable_pos))  # 列表,表明合法位置
        if len(avaliable_pos) == 0:
            return 64  # 表示这一步只能跳过

        if np.random.uniform() < Epsilon:  # random choose an action
            action = np.random.choice(avaliable_pos, 1)[0]      # 从available_pos里面抽取1个数字，并返回数组
        else:  # choose the max Q-value action
            x = torch.tensor(x, dtype=torch.float)
            x = x.view(1, -1)
            x = x.to(device)
            actions_values = self.Q(x)[0]  # 65维tensor，各个action在各个位置的值（1*65维，经过NET的结果）

            ava_actions = torch.tensor(actions_values[avaliable_pos])   # actions_values是各个action的得分

            _, action_ind = torch.max(ava_actions, 0)
            action = avaliable_pos[action_ind]
        return action

    def Store_transition(self, s, a, r, s_):
        """把一组转移属性存储到transitions中

        Arguments:
            s {[type]} -- 当前状态
            a {[type]} -- 选择的动作
            r {[type]} -- reward值
            s_ {[type]} -- 后继状态
        """
        transition = np.hstack((s, a, r, s_))   # 拼接在一起
        self.transitions[self.transitions_index % TRANSITIONS_CAPACITY] = transition
        self.transitions_index += 1

    def Learn(self, oppo_Q_):
        for step in range(10):
            if self.learn_iter % UPDATE_DELAY == 0:  # update parameters of Q_ 每隔一段时间将Q的参数直接给到Q_
                self.Q_.load_state_dict(self.Q.state_dict())
            self.learn_iter += 1

            sample_index = np.random.choice(TRANSITIONS_CAPACITY,
                                            BATCH_SIZE)  # randomly choose BATCH_SIZE samples to learn 从经验池中随机选取进行训练，是数组
            batch_tran = self.transitions[sample_index, :]
            batch_s = batch_tran[:, :N_STATE]
            batch_a = batch_tran[:, N_STATE: N_STATE + 1]
            batch_r = batch_tran[:, N_STATE + 1: N_STATE + 2]
            batch_s_ = batch_tran[:, N_STATE + 2:]

            batch_s = torch.tensor(batch_s, dtype=torch.float)
            batch_s_ = torch.tensor(batch_s_, dtype=torch.float)
            batch_a = torch.tensor(batch_a, dtype=int)
            batch_r = torch.tensor(batch_r, dtype=torch.float)
            
            batch_s, batch_s_, batch_a, batch_r = batch_s.to(device), batch_s_.to(device), batch_a.to(device), batch_r.to(device)

            # gather函数
            batch_y = self.Q(batch_s).gather(1,
                                             batch_a)  # gather figure out which action actually is chosen 相当于从第一维取第batch_a位置的值
            batch_y_ = oppo_Q_(
                batch_s_).detach()  # detach return a new Variable which do not have gradient detach就是禁止梯度更新，这些图变量包含了梯度，在计算loss的时候会更新，因为Q_不用更新，因此禁止梯度。
            batch_y_ = batch_r - GAMMA * torch.max(batch_y_, 1)[0].view(-1,
                                                                        1)  # max(1) return (value,index) for each row
            batch_y, batch_y_ = batch_y.to(device), batch_y_.to(device)
            
            loss = self.criteria(batch_y, batch_y_)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

In [None]:
if __name__ == "__main__":
    offensive = DQN(1)
    defensive = DQN(-1)

    for episode in tqdm(range(EPISODE * 50)):
        game_state = Game()
        round_ = 0
        while True:
            # 先手
            # print(round_)
            round_ += 1
            # game_state.Display()    # 输出棋盘
            s = game_state.Get_State()
            a = offensive.Choose_Action_EpsilonGreedy(s, game_state, 1)
            game_state.Add(1, a)
            r = game_state.Gameover() * 100.0
            s_ = game_state.Get_State()

            offensive.Store_transition(s, a, r, s_)   # 先后手的经验池分开存
            # defensive.Store_transition(s, a, -r, s_)

            if r != 0 or round_ > 100:  # 当这局游戏结束或双方下够了100次。经验池已经有很多样本，此时可以开始训练
                offensive.Learn(defensive.Q_) # 用对手的Q_网络来计算下一个状态
                # print("END~~~~~")
                # game_state.Display()
                # print("==================================")
                if episode % 200 == 0:
                    print('Episode:{} | Reward:{}'.format(episode, r))
                break

            # 后手
            # game_state.Display()
            s = game_state.Get_State()
            a = defensive.Choose_Action_EpsilonGreedy(s, game_state, -1)
            game_state.Add(-1, a)
            r = game_state.Gameover() * 100.0
            s_ = game_state.Get_State()

            # offensive.Store_transition(s, a, r, s_)
            defensive.Store_transition(s, a, -r, s_) # 先后手的经验池分开存

            if r != 0:
                defensive.Learn(offensive.Q_) # 用对手的Q_网络来计算下一个状态
                # print("END~~~~~")
                # game_state.Display()
                # print("==================================")
                if episode % 200 == 0:
                    print('Episode:{} | Reward:{}'.format(episode, r))
                break

        if (episode + 1) % 100 == 0:
            torch.save(offensive.Q.state_dict(), 'model_offensive.pth')
            torch.save(defensive.Q.state_dict(), 'model_defensive.pth')

  0%|          | 2/500000 [00:00<23:19:25,  5.95it/s]

Episode:0 | Reward:-100.0


  0%|          | 202/500000 [00:28<21:38:54,  6.41it/s]

Episode:200 | Reward:100.0


  0%|          | 402/500000 [00:56<21:21:08,  6.50it/s]

Episode:400 | Reward:-100.0


  0%|          | 602/500000 [01:24<21:51:07,  6.35it/s]

Episode:600 | Reward:-100.0


  0%|          | 802/500000 [01:52<22:27:06,  6.18it/s]

Episode:800 | Reward:100.0


  0%|          | 1002/500000 [02:21<21:15:01,  6.52it/s]

Episode:1000 | Reward:-100.0


  0%|          | 1202/500000 [02:49<21:40:37,  6.39it/s]

Episode:1200 | Reward:100.0


  0%|          | 1402/500000 [03:18<21:24:06,  6.47it/s]

Episode:1400 | Reward:-100.0


  0%|          | 1602/500000 [03:46<21:36:06,  6.41it/s]

Episode:1600 | Reward:-100.0


  0%|          | 1802/500000 [04:15<22:19:30,  6.20it/s]

Episode:1800 | Reward:0.0


  0%|          | 2002/500000 [04:43<22:36:06,  6.12it/s]

Episode:2000 | Reward:-100.0


  0%|          | 2202/500000 [05:12<22:15:55,  6.21it/s]

Episode:2200 | Reward:-100.0


  0%|          | 2402/500000 [05:40<20:56:41,  6.60it/s]

Episode:2400 | Reward:-100.0


  1%|          | 2602/500000 [06:08<18:16:44,  7.56it/s]

Episode:2600 | Reward:100.0


  1%|          | 2802/500000 [06:32<22:04:48,  6.25it/s]

Episode:2800 | Reward:-100.0


  1%|          | 3002/500000 [07:00<21:46:27,  6.34it/s]

Episode:3000 | Reward:-100.0


  1%|          | 3202/500000 [07:29<21:56:46,  6.29it/s]

Episode:3200 | Reward:-100.0


  1%|          | 3402/500000 [07:57<21:00:52,  6.56it/s]

Episode:3400 | Reward:-100.0


  1%|          | 3602/500000 [08:26<21:24:58,  6.44it/s]

Episode:3600 | Reward:-100.0


  1%|          | 3802/500000 [08:53<19:33:09,  7.05it/s]

Episode:3800 | Reward:100.0


  1%|          | 4002/500000 [09:22<19:36:23,  7.03it/s]

Episode:4000 | Reward:100.0


  1%|          | 4202/500000 [09:50<22:24:59,  6.14it/s]

Episode:4200 | Reward:-100.0


  1%|          | 4402/500000 [10:19<21:15:24,  6.48it/s]

Episode:4400 | Reward:-100.0


  1%|          | 4602/500000 [10:47<20:51:34,  6.60it/s]

Episode:4600 | Reward:-100.0


  1%|          | 4802/500000 [11:16<21:44:58,  6.32it/s]

Episode:4800 | Reward:-100.0


  1%|          | 5002/500000 [11:44<21:07:11,  6.51it/s]

Episode:5000 | Reward:-100.0


  1%|          | 5202/500000 [12:13<21:22:52,  6.43it/s]

Episode:5200 | Reward:-100.0


  1%|          | 5402/500000 [12:41<18:02:36,  7.61it/s]

Episode:5400 | Reward:-100.0


  1%|          | 5602/500000 [13:10<22:16:37,  6.16it/s]

Episode:5600 | Reward:-100.0


  1%|          | 5802/500000 [13:38<21:48:27,  6.29it/s]

Episode:5800 | Reward:100.0


  1%|          | 6002/500000 [14:07<21:06:37,  6.50it/s]

Episode:6000 | Reward:100.0


  1%|          | 6202/500000 [14:35<21:12:52,  6.47it/s]

Episode:6200 | Reward:-100.0


  1%|▏         | 6402/500000 [15:04<21:29:14,  6.38it/s]

Episode:6400 | Reward:-100.0


  1%|▏         | 6602/500000 [15:32<20:51:31,  6.57it/s]

Episode:6600 | Reward:-100.0


  1%|▏         | 6802/500000 [16:01<21:10:49,  6.47it/s]

Episode:6800 | Reward:100.0


  1%|▏         | 7002/500000 [16:29<22:33:43,  6.07it/s]

Episode:7000 | Reward:100.0


  1%|▏         | 7202/500000 [16:58<21:56:18,  6.24it/s]

Episode:7200 | Reward:100.0


  1%|▏         | 7402/500000 [17:25<20:15:37,  6.75it/s]

Episode:7400 | Reward:-100.0


  2%|▏         | 7602/500000 [17:54<20:50:16,  6.56it/s]

Episode:7600 | Reward:-100.0


  2%|▏         | 7802/500000 [18:22<20:55:05,  6.54it/s]

Episode:7800 | Reward:-100.0


  2%|▏         | 8002/500000 [18:51<20:36:21,  6.63it/s]

Episode:8000 | Reward:-100.0


  2%|▏         | 8202/500000 [19:19<20:50:43,  6.55it/s]

Episode:8200 | Reward:-100.0


  2%|▏         | 8402/500000 [19:48<21:18:34,  6.41it/s]

Episode:8400 | Reward:100.0


  2%|▏         | 8602/500000 [20:16<20:41:13,  6.60it/s]

Episode:8600 | Reward:100.0


  2%|▏         | 8802/500000 [20:45<21:09:30,  6.45it/s]

Episode:8800 | Reward:-100.0


  2%|▏         | 9002/500000 [21:13<20:51:12,  6.54it/s]

Episode:9000 | Reward:100.0


  2%|▏         | 9202/500000 [21:42<20:43:46,  6.58it/s]

Episode:9200 | Reward:-100.0


  2%|▏         | 9402/500000 [22:09<21:14:42,  6.41it/s]

Episode:9400 | Reward:-100.0


  2%|▏         | 9602/500000 [22:37<20:52:38,  6.52it/s]

Episode:9600 | Reward:-100.0


  2%|▏         | 9802/500000 [23:06<21:13:21,  6.42it/s]

Episode:9800 | Reward:100.0


  2%|▏         | 10002/500000 [23:33<21:05:34,  6.45it/s]

Episode:10000 | Reward:-100.0


  2%|▏         | 10202/500000 [24:02<21:00:13,  6.48it/s]

Episode:10200 | Reward:-100.0


  2%|▏         | 10402/500000 [24:30<21:10:52,  6.42it/s]

Episode:10400 | Reward:-100.0


  2%|▏         | 10602/500000 [24:59<20:02:25,  6.78it/s]

Episode:10600 | Reward:-100.0


  2%|▏         | 10802/500000 [25:27<21:56:38,  6.19it/s]

Episode:10800 | Reward:100.0


  2%|▏         | 11002/500000 [25:56<20:51:43,  6.51it/s]

Episode:11000 | Reward:-100.0


  2%|▏         | 11202/500000 [26:24<21:40:13,  6.27it/s]

Episode:11200 | Reward:-100.0


  2%|▏         | 11402/500000 [26:53<21:09:55,  6.41it/s]

Episode:11400 | Reward:100.0


  2%|▏         | 11602/500000 [27:21<21:39:20,  6.26it/s]

Episode:11600 | Reward:100.0


  2%|▏         | 11802/500000 [27:49<20:44:00,  6.54it/s]

Episode:11800 | Reward:-100.0


  2%|▏         | 12002/500000 [28:17<21:23:39,  6.34it/s]

Episode:12000 | Reward:100.0


  2%|▏         | 12202/500000 [28:45<21:36:13,  6.27it/s]

Episode:12200 | Reward:100.0


  2%|▏         | 12402/500000 [29:13<17:40:08,  7.67it/s]

Episode:12400 | Reward:100.0


  3%|▎         | 12602/500000 [29:39<21:17:58,  6.36it/s]

Episode:12600 | Reward:-100.0


  3%|▎         | 12802/500000 [30:07<20:54:22,  6.47it/s]

Episode:12800 | Reward:-100.0


  3%|▎         | 13002/500000 [30:36<20:38:51,  6.55it/s]

Episode:13000 | Reward:-100.0


  3%|▎         | 13202/500000 [31:04<19:50:46,  6.81it/s]

Episode:13200 | Reward:-100.0


  3%|▎         | 13402/500000 [31:33<21:20:45,  6.33it/s]

Episode:13400 | Reward:-100.0


  3%|▎         | 13602/500000 [32:01<21:38:59,  6.24it/s]

Episode:13600 | Reward:-100.0


  3%|▎         | 13802/500000 [32:29<20:50:47,  6.48it/s]

Episode:13800 | Reward:-100.0


  3%|▎         | 14002/500000 [32:58<20:57:39,  6.44it/s]

Episode:14000 | Reward:100.0


  3%|▎         | 14202/500000 [33:26<17:36:44,  7.66it/s]

Episode:14200 | Reward:-100.0


  3%|▎         | 14402/500000 [33:54<20:19:32,  6.64it/s]

Episode:14400 | Reward:100.0


  3%|▎         | 14602/500000 [34:22<20:36:51,  6.54it/s]

Episode:14600 | Reward:100.0


  3%|▎         | 14802/500000 [34:51<20:59:23,  6.42it/s]

Episode:14800 | Reward:-100.0


  3%|▎         | 15002/500000 [35:19<20:55:09,  6.44it/s]

Episode:15000 | Reward:100.0


  3%|▎         | 15202/500000 [35:47<21:52:35,  6.16it/s]

Episode:15200 | Reward:100.0


  3%|▎         | 15402/500000 [36:16<21:00:35,  6.41it/s]

Episode:15400 | Reward:100.0


  3%|▎         | 15602/500000 [36:44<21:07:43,  6.37it/s]

Episode:15600 | Reward:100.0


  3%|▎         | 15802/500000 [37:12<21:09:31,  6.36it/s]

Episode:15800 | Reward:100.0


  3%|▎         | 16002/500000 [37:40<21:09:15,  6.36it/s]

Episode:16000 | Reward:100.0


  3%|▎         | 16202/500000 [38:09<21:19:32,  6.30it/s]

Episode:16200 | Reward:-100.0


  3%|▎         | 16402/500000 [38:38<20:23:03,  6.59it/s]

Episode:16400 | Reward:100.0


  3%|▎         | 16602/500000 [39:05<21:49:59,  6.15it/s]

Episode:16600 | Reward:100.0


  3%|▎         | 16802/500000 [39:34<20:43:45,  6.47it/s]

Episode:16800 | Reward:-100.0


  3%|▎         | 17002/500000 [40:02<20:17:40,  6.61it/s]

Episode:17000 | Reward:100.0


  3%|▎         | 17202/500000 [40:31<21:38:23,  6.20it/s]

Episode:17200 | Reward:100.0


  3%|▎         | 17402/500000 [40:59<20:42:04,  6.48it/s]

Episode:17400 | Reward:-100.0


  4%|▎         | 17602/500000 [41:27<21:15:46,  6.30it/s]

Episode:17600 | Reward:100.0


  4%|▎         | 17802/500000 [41:55<20:26:51,  6.55it/s]

Episode:17800 | Reward:-100.0


  4%|▎         | 18002/500000 [42:24<21:04:14,  6.35it/s]

Episode:18000 | Reward:100.0


  4%|▎         | 18202/500000 [42:52<21:31:31,  6.22it/s]

Episode:18200 | Reward:100.0


  4%|▎         | 18402/500000 [43:21<20:46:02,  6.44it/s]

Episode:18400 | Reward:100.0


  4%|▎         | 18602/500000 [43:48<19:23:27,  6.90it/s]

Episode:18600 | Reward:-100.0


  4%|▍         | 18802/500000 [44:16<20:09:37,  6.63it/s]

Episode:18800 | Reward:-100.0


  4%|▍         | 19002/500000 [44:45<20:29:03,  6.52it/s]

Episode:19000 | Reward:100.0


  4%|▍         | 19202/500000 [45:13<21:35:24,  6.19it/s]

Episode:19200 | Reward:-100.0


  4%|▍         | 19402/500000 [45:41<20:44:32,  6.44it/s]

Episode:19400 | Reward:-100.0


  4%|▍         | 19602/500000 [46:09<19:55:32,  6.70it/s]

Episode:19600 | Reward:100.0


  4%|▍         | 19802/500000 [46:38<20:42:34,  6.44it/s]

Episode:19800 | Reward:-100.0


  4%|▍         | 20002/500000 [47:06<20:21:41,  6.55it/s]

Episode:20000 | Reward:-100.0


  4%|▍         | 20202/500000 [47:34<20:55:56,  6.37it/s]

Episode:20200 | Reward:-100.0


  4%|▍         | 20402/500000 [48:02<20:21:53,  6.54it/s]

Episode:20400 | Reward:100.0


  4%|▍         | 20602/500000 [48:30<20:24:49,  6.52it/s]

Episode:20600 | Reward:100.0


  4%|▍         | 20802/500000 [48:59<20:19:01,  6.55it/s]

Episode:20800 | Reward:100.0


  4%|▍         | 21002/500000 [49:27<19:58:50,  6.66it/s]

Episode:21000 | Reward:100.0


  4%|▍         | 21202/500000 [49:55<20:38:41,  6.44it/s]

Episode:21200 | Reward:100.0


  4%|▍         | 21402/500000 [50:24<20:30:22,  6.48it/s]

Episode:21400 | Reward:100.0


  4%|▍         | 21602/500000 [50:52<20:48:08,  6.39it/s]

Episode:21600 | Reward:100.0


  4%|▍         | 21802/500000 [51:20<20:50:07,  6.38it/s]

Episode:21800 | Reward:100.0


  4%|▍         | 22002/500000 [51:48<21:10:21,  6.27it/s]

Episode:22000 | Reward:-100.0


  4%|▍         | 22202/500000 [52:16<20:22:24,  6.51it/s]

Episode:22200 | Reward:100.0


  4%|▍         | 22402/500000 [52:41<20:02:34,  6.62it/s]

Episode:22400 | Reward:-100.0


  5%|▍         | 22602/500000 [53:08<17:34:09,  7.55it/s]

Episode:22600 | Reward:100.0


  5%|▍         | 22802/500000 [53:36<20:48:26,  6.37it/s]

Episode:22800 | Reward:100.0


  5%|▍         | 23002/500000 [54:03<20:03:13,  6.61it/s]

Episode:23000 | Reward:-100.0


  5%|▍         | 23202/500000 [54:31<20:30:21,  6.46it/s]

Episode:23200 | Reward:-100.0


  5%|▍         | 23402/500000 [55:00<20:26:53,  6.47it/s]

Episode:23400 | Reward:100.0


  5%|▍         | 23602/500000 [55:27<21:34:13,  6.13it/s]

Episode:23600 | Reward:-100.0


  5%|▍         | 23802/500000 [55:55<20:26:40,  6.47it/s]

Episode:23800 | Reward:-100.0


  5%|▍         | 24002/500000 [56:23<20:04:28,  6.59it/s]

Episode:24000 | Reward:-100.0


  5%|▍         | 24202/500000 [56:52<21:17:10,  6.21it/s]

Episode:24200 | Reward:100.0


  5%|▍         | 24402/500000 [57:20<17:28:05,  7.56it/s]

Episode:24400 | Reward:-100.0


  5%|▍         | 24602/500000 [57:47<20:46:54,  6.35it/s]

Episode:24600 | Reward:-100.0


  5%|▍         | 24802/500000 [58:16<21:57:33,  6.01it/s]

Episode:24800 | Reward:100.0


  5%|▌         | 25002/500000 [58:44<20:03:58,  6.58it/s]

Episode:25000 | Reward:-100.0


  5%|▌         | 25202/500000 [59:12<20:39:35,  6.38it/s]

Episode:25200 | Reward:100.0


  5%|▌         | 25402/500000 [59:41<20:14:06,  6.52it/s]

Episode:25400 | Reward:-100.0


  5%|▌         | 25602/500000 [1:00:09<20:26:32,  6.45it/s]

Episode:25600 | Reward:100.0


  5%|▌         | 25802/500000 [1:00:38<20:11:17,  6.52it/s]

Episode:25800 | Reward:100.0


  5%|▌         | 26002/500000 [1:01:06<19:30:22,  6.75it/s]

Episode:26000 | Reward:100.0


  5%|▌         | 26202/500000 [1:01:33<20:14:44,  6.50it/s]

Episode:26200 | Reward:-100.0


  5%|▌         | 26402/500000 [1:02:01<20:19:24,  6.47it/s]

Episode:26400 | Reward:-100.0


  5%|▌         | 26602/500000 [1:02:30<20:21:49,  6.46it/s]

Episode:26600 | Reward:100.0


  5%|▌         | 26802/500000 [1:02:58<19:08:17,  6.87it/s]

Episode:26800 | Reward:100.0


  5%|▌         | 27002/500000 [1:03:26<20:37:46,  6.37it/s]

Episode:27000 | Reward:100.0


  5%|▌         | 27202/500000 [1:03:54<20:12:30,  6.50it/s]

Episode:27200 | Reward:-100.0


  5%|▌         | 27402/500000 [1:04:23<20:31:45,  6.39it/s]

Episode:27400 | Reward:100.0


  6%|▌         | 27602/500000 [1:04:51<20:26:42,  6.42it/s]

Episode:27600 | Reward:100.0


  6%|▌         | 27802/500000 [1:05:19<20:49:47,  6.30it/s]

Episode:27800 | Reward:100.0


  6%|▌         | 28002/500000 [1:05:48<20:08:44,  6.51it/s]

Episode:28000 | Reward:100.0


  6%|▌         | 28202/500000 [1:06:16<20:13:40,  6.48it/s]

Episode:28200 | Reward:100.0


  6%|▌         | 28402/500000 [1:06:45<20:38:18,  6.35it/s]

Episode:28400 | Reward:100.0


  6%|▌         | 28602/500000 [1:07:13<17:16:04,  7.58it/s]

Episode:28600 | Reward:100.0


  6%|▌         | 28802/500000 [1:07:40<19:59:30,  6.55it/s]

Episode:28800 | Reward:-100.0


  6%|▌         | 29002/500000 [1:08:08<20:22:00,  6.42it/s]

Episode:29000 | Reward:100.0


  6%|▌         | 29202/500000 [1:08:37<20:38:05,  6.34it/s]

Episode:29200 | Reward:-100.0


  6%|▌         | 29402/500000 [1:09:05<20:52:00,  6.26it/s]

Episode:29400 | Reward:-100.0


  6%|▌         | 29602/500000 [1:09:34<20:18:04,  6.44it/s]

Episode:29600 | Reward:-100.0


  6%|▌         | 29802/500000 [1:10:02<20:25:46,  6.39it/s]

Episode:29800 | Reward:-100.0


  6%|▌         | 30002/500000 [1:10:30<19:54:07,  6.56it/s]

Episode:30000 | Reward:-100.0


  6%|▌         | 30202/500000 [1:10:58<19:59:28,  6.53it/s]

Episode:30200 | Reward:100.0


  6%|▌         | 30402/500000 [1:11:26<17:38:40,  7.39it/s]

Episode:30400 | Reward:100.0


  6%|▌         | 30602/500000 [1:11:54<18:59:31,  6.87it/s]

Episode:30600 | Reward:100.0


  6%|▌         | 30802/500000 [1:12:23<19:45:20,  6.60it/s]

Episode:30800 | Reward:100.0


  6%|▌         | 31002/500000 [1:12:51<19:54:00,  6.55it/s]

Episode:31000 | Reward:-100.0


  6%|▌         | 31202/500000 [1:13:17<18:01:11,  7.23it/s]

Episode:31200 | Reward:100.0


  6%|▋         | 31402/500000 [1:13:46<20:07:46,  6.47it/s]

Episode:31400 | Reward:100.0


  6%|▋         | 31602/500000 [1:14:14<19:59:17,  6.51it/s]

Episode:31600 | Reward:100.0


  6%|▋         | 31802/500000 [1:14:43<21:06:59,  6.16it/s]

Episode:31800 | Reward:100.0


  6%|▋         | 32002/500000 [1:15:11<19:42:49,  6.59it/s]

Episode:32000 | Reward:-100.0


  6%|▋         | 32202/500000 [1:15:39<20:21:02,  6.39it/s]

Episode:32200 | Reward:100.0


  6%|▋         | 32402/500000 [1:16:08<19:37:50,  6.62it/s]

Episode:32400 | Reward:100.0


  7%|▋         | 32602/500000 [1:16:36<20:11:01,  6.43it/s]

Episode:32600 | Reward:100.0


  7%|▋         | 32802/500000 [1:17:04<20:21:24,  6.38it/s]

Episode:32800 | Reward:-100.0


  7%|▋         | 33002/500000 [1:17:33<19:50:21,  6.54it/s]

Episode:33000 | Reward:-100.0


  7%|▋         | 33202/500000 [1:18:01<20:15:17,  6.40it/s]

Episode:33200 | Reward:-100.0


  7%|▋         | 33402/500000 [1:18:30<20:07:14,  6.44it/s]

Episode:33400 | Reward:-100.0


  7%|▋         | 33602/500000 [1:18:58<21:00:03,  6.17it/s]

Episode:33600 | Reward:-100.0


  7%|▋         | 33802/500000 [1:19:25<20:05:26,  6.45it/s]

Episode:33800 | Reward:-100.0


  7%|▋         | 34002/500000 [1:19:53<20:40:55,  6.26it/s]

Episode:34000 | Reward:100.0


  7%|▋         | 34201/500000 [1:20:20<23:00:18,  5.62it/s]

Episode:34200 | Reward:100.0


  7%|▋         | 34402/500000 [1:20:47<20:42:04,  6.25it/s]

Episode:34400 | Reward:0.0


  7%|▋         | 34602/500000 [1:21:13<17:58:37,  7.19it/s]

Episode:34600 | Reward:100.0


  7%|▋         | 34802/500000 [1:21:41<19:22:32,  6.67it/s]

Episode:34800 | Reward:100.0


  7%|▋         | 35002/500000 [1:22:08<20:19:47,  6.35it/s]

Episode:35000 | Reward:0.0


  7%|▋         | 35202/500000 [1:22:37<19:49:54,  6.51it/s]

Episode:35200 | Reward:-100.0


  7%|▋         | 35402/500000 [1:23:05<20:39:33,  6.25it/s]

Episode:35400 | Reward:-100.0


  7%|▋         | 35602/500000 [1:23:34<19:52:37,  6.49it/s]

Episode:35600 | Reward:-100.0


  7%|▋         | 35802/500000 [1:24:02<20:25:23,  6.31it/s]

Episode:35800 | Reward:-100.0


  7%|▋         | 36002/500000 [1:24:31<19:53:44,  6.48it/s]

Episode:36000 | Reward:-100.0


  7%|▋         | 36202/500000 [1:24:56<16:52:01,  7.64it/s]

Episode:36200 | Reward:-100.0


  7%|▋         | 36402/500000 [1:25:23<19:51:31,  6.48it/s]

Episode:36400 | Reward:-100.0


  7%|▋         | 36602/500000 [1:25:52<21:03:11,  6.11it/s]

Episode:36600 | Reward:-100.0


  7%|▋         | 36802/500000 [1:26:20<19:27:04,  6.61it/s]

Episode:36800 | Reward:100.0


  7%|▋         | 37002/500000 [1:26:47<20:25:50,  6.29it/s]

Episode:37000 | Reward:100.0


  7%|▋         | 37202/500000 [1:27:16<20:05:31,  6.40it/s]

Episode:37200 | Reward:100.0


  7%|▋         | 37402/500000 [1:27:44<19:11:59,  6.69it/s]

Episode:37400 | Reward:-100.0


  8%|▊         | 37602/500000 [1:28:12<18:09:40,  7.07it/s]

Episode:37600 | Reward:100.0


  8%|▊         | 37802/500000 [1:28:41<20:18:39,  6.32it/s]

Episode:37800 | Reward:100.0


  8%|▊         | 38002/500000 [1:29:09<19:10:05,  6.70it/s]

Episode:38000 | Reward:100.0


  8%|▊         | 38202/500000 [1:29:37<20:01:39,  6.40it/s]

Episode:38200 | Reward:100.0


  8%|▊         | 38402/500000 [1:30:06<17:08:14,  7.48it/s]

Episode:38400 | Reward:-100.0


  8%|▊         | 38602/500000 [1:30:32<20:06:07,  6.38it/s]

Episode:38600 | Reward:-100.0


  8%|▊         | 38802/500000 [1:31:01<19:22:16,  6.61it/s]

Episode:38800 | Reward:-100.0


  8%|▊         | 39002/500000 [1:31:29<20:02:01,  6.39it/s]

Episode:39000 | Reward:-100.0


  8%|▊         | 39202/500000 [1:31:57<19:59:50,  6.40it/s]

Episode:39200 | Reward:-100.0


  8%|▊         | 39402/500000 [1:32:26<19:45:29,  6.48it/s]

Episode:39400 | Reward:-100.0


  8%|▊         | 39602/500000 [1:32:53<18:21:36,  6.97it/s]

Episode:39600 | Reward:100.0


  8%|▊         | 39802/500000 [1:33:18<19:26:47,  6.57it/s]

Episode:39800 | Reward:100.0


  8%|▊         | 40002/500000 [1:33:46<19:30:54,  6.55it/s]

Episode:40000 | Reward:-100.0


  8%|▊         | 40202/500000 [1:34:15<19:32:19,  6.54it/s]

Episode:40200 | Reward:100.0


  8%|▊         | 40402/500000 [1:34:43<19:13:20,  6.64it/s]

Episode:40400 | Reward:-100.0


  8%|▊         | 40602/500000 [1:35:11<20:24:19,  6.25it/s]

Episode:40600 | Reward:100.0


  8%|▊         | 40802/500000 [1:35:40<19:37:42,  6.50it/s]

Episode:40800 | Reward:100.0


  8%|▊         | 41002/500000 [1:36:08<19:16:45,  6.61it/s]

Episode:41000 | Reward:-100.0


  8%|▊         | 41202/500000 [1:36:36<20:03:24,  6.35it/s]

Episode:41200 | Reward:100.0


  8%|▊         | 41402/500000 [1:37:04<20:35:45,  6.19it/s]

Episode:41400 | Reward:-100.0


  8%|▊         | 41602/500000 [1:37:32<20:00:14,  6.37it/s]

Episode:41600 | Reward:-100.0


  8%|▊         | 41802/500000 [1:38:00<20:02:03,  6.35it/s]

Episode:41800 | Reward:-100.0


  8%|▊         | 42002/500000 [1:38:27<20:11:40,  6.30it/s]

Episode:42000 | Reward:-100.0


  8%|▊         | 42202/500000 [1:38:56<20:08:13,  6.32it/s]

Episode:42200 | Reward:-100.0


  8%|▊         | 42402/500000 [1:39:24<19:54:07,  6.39it/s]

Episode:42400 | Reward:-100.0


  9%|▊         | 42602/500000 [1:39:53<19:25:45,  6.54it/s]

Episode:42600 | Reward:100.0


  9%|▊         | 42802/500000 [1:40:21<19:31:47,  6.50it/s]

Episode:42800 | Reward:-100.0


  9%|▊         | 43002/500000 [1:40:50<19:26:27,  6.53it/s]

Episode:43000 | Reward:-100.0


  9%|▊         | 43202/500000 [1:41:18<19:19:24,  6.57it/s]

Episode:43200 | Reward:-100.0


  9%|▊         | 43402/500000 [1:41:46<19:26:12,  6.53it/s]

Episode:43400 | Reward:100.0


  9%|▊         | 43602/500000 [1:42:15<19:17:37,  6.57it/s]

Episode:43600 | Reward:-100.0


  9%|▉         | 43802/500000 [1:42:44<19:51:59,  6.38it/s]

Episode:43800 | Reward:-100.0


  9%|▉         | 44002/500000 [1:43:12<19:20:15,  6.55it/s]

Episode:44000 | Reward:100.0


  9%|▉         | 44202/500000 [1:43:41<21:02:19,  6.02it/s]

Episode:44200 | Reward:100.0


  9%|▉         | 44402/500000 [1:44:09<19:36:51,  6.45it/s]

Episode:44400 | Reward:100.0


  9%|▉         | 44602/500000 [1:44:38<20:04:08,  6.30it/s]

Episode:44600 | Reward:-100.0


  9%|▉         | 44802/500000 [1:45:06<19:08:38,  6.60it/s]

Episode:44800 | Reward:100.0


  9%|▉         | 45002/500000 [1:45:35<18:59:22,  6.66it/s]

Episode:45000 | Reward:-100.0


  9%|▉         | 45202/500000 [1:46:03<20:25:59,  6.18it/s]

Episode:45200 | Reward:-100.0


  9%|▉         | 45402/500000 [1:46:32<20:29:58,  6.16it/s]

Episode:45400 | Reward:100.0


  9%|▉         | 45602/500000 [1:47:00<16:35:10,  7.61it/s]

Episode:45600 | Reward:100.0


  9%|▉         | 45802/500000 [1:47:27<19:30:37,  6.47it/s]

Episode:45800 | Reward:-100.0


  9%|▉         | 46002/500000 [1:47:55<18:48:31,  6.70it/s]

Episode:46000 | Reward:-100.0


  9%|▉         | 46202/500000 [1:48:23<19:50:08,  6.35it/s]

Episode:46200 | Reward:-100.0


  9%|▉         | 46402/500000 [1:48:52<19:07:57,  6.59it/s]

Episode:46400 | Reward:100.0


  9%|▉         | 46602/500000 [1:49:19<20:49:24,  6.05it/s]

Episode:46600 | Reward:-100.0


  9%|▉         | 46802/500000 [1:49:48<19:20:24,  6.51it/s]

Episode:46800 | Reward:-100.0


  9%|▉         | 47002/500000 [1:50:16<19:30:50,  6.45it/s]

Episode:47000 | Reward:-100.0


  9%|▉         | 47202/500000 [1:50:45<19:40:47,  6.39it/s]

Episode:47200 | Reward:-100.0


  9%|▉         | 47402/500000 [1:51:13<19:18:07,  6.51it/s]

Episode:47400 | Reward:100.0


 10%|▉         | 47602/500000 [1:51:42<19:47:13,  6.35it/s]

Episode:47600 | Reward:100.0


 10%|▉         | 47802/500000 [1:52:10<19:43:28,  6.37it/s]

Episode:47800 | Reward:-100.0


 10%|▉         | 48002/500000 [1:52:39<19:18:25,  6.50it/s]

Episode:48000 | Reward:100.0


 10%|▉         | 48202/500000 [1:53:07<16:51:17,  7.45it/s]

Episode:48200 | Reward:100.0


 10%|▉         | 48402/500000 [1:53:33<19:23:57,  6.47it/s]

Episode:48400 | Reward:-100.0


 10%|▉         | 48602/500000 [1:54:02<17:54:49,  7.00it/s]

Episode:48600 | Reward:-100.0


 10%|▉         | 48802/500000 [1:54:30<24:51:59,  5.04it/s]

Episode:48800 | Reward:-100.0


 10%|▉         | 49002/500000 [1:54:57<18:32:24,  6.76it/s]

Episode:49000 | Reward:-100.0


 10%|▉         | 49202/500000 [1:55:25<24:41:17,  5.07it/s]

Episode:49200 | Reward:-100.0


 10%|▉         | 49402/500000 [1:55:54<18:38:41,  6.71it/s]

Episode:49400 | Reward:100.0


 10%|▉         | 49602/500000 [1:56:23<19:21:31,  6.46it/s]

Episode:49600 | Reward:100.0


 10%|▉         | 49802/500000 [1:56:51<19:15:07,  6.50it/s]

Episode:49800 | Reward:-100.0


 10%|█         | 50002/500000 [1:57:20<20:05:51,  6.22it/s]

Episode:50000 | Reward:100.0


 10%|█         | 50202/500000 [1:57:48<19:45:44,  6.32it/s]

Episode:50200 | Reward:-100.0


 10%|█         | 50402/500000 [1:58:16<19:11:31,  6.51it/s]

Episode:50400 | Reward:100.0


 10%|█         | 50602/500000 [1:58:45<19:29:47,  6.40it/s]

Episode:50600 | Reward:100.0


 10%|█         | 50802/500000 [1:59:14<19:52:49,  6.28it/s]

Episode:50800 | Reward:-100.0


 10%|█         | 51002/500000 [1:59:42<20:22:15,  6.12it/s]

Episode:51000 | Reward:-100.0


 10%|█         | 51202/500000 [2:00:11<18:55:43,  6.59it/s]

Episode:51200 | Reward:100.0


 10%|█         | 51402/500000 [2:00:39<18:57:19,  6.57it/s]

Episode:51400 | Reward:-100.0


 10%|█         | 51602/500000 [2:01:08<19:07:06,  6.51it/s]

Episode:51600 | Reward:100.0


 10%|█         | 51802/500000 [2:01:36<19:41:35,  6.32it/s]

Episode:51800 | Reward:-100.0


 10%|█         | 52002/500000 [2:02:05<19:11:50,  6.48it/s]

Episode:52000 | Reward:100.0


 10%|█         | 52202/500000 [2:02:33<25:03:30,  4.96it/s]

Episode:52200 | Reward:-100.0


 10%|█         | 52402/500000 [2:03:00<16:18:11,  7.63it/s]

Episode:52400 | Reward:-100.0


 11%|█         | 52602/500000 [2:03:26<18:59:49,  6.54it/s]

Episode:52600 | Reward:-100.0


 11%|█         | 52802/500000 [2:03:55<19:14:55,  6.45it/s]

Episode:52800 | Reward:100.0


 11%|█         | 53002/500000 [2:04:23<19:21:28,  6.41it/s]

Episode:53000 | Reward:100.0


 11%|█         | 53202/500000 [2:04:52<19:37:58,  6.32it/s]

Episode:53200 | Reward:-100.0


 11%|█         | 53402/500000 [2:05:20<18:51:44,  6.58it/s]

Episode:53400 | Reward:100.0


 11%|█         | 53602/500000 [2:05:49<18:30:44,  6.70it/s]

Episode:53600 | Reward:-100.0


 11%|█         | 53802/500000 [2:06:17<18:59:33,  6.53it/s]

Episode:53800 | Reward:100.0


 11%|█         | 54002/500000 [2:06:45<16:21:52,  7.57it/s]

Episode:54000 | Reward:100.0


 11%|█         | 54202/500000 [2:07:11<19:29:02,  6.36it/s]

Episode:54200 | Reward:100.0


 11%|█         | 54402/500000 [2:07:40<18:51:37,  6.56it/s]

Episode:54400 | Reward:-100.0


 11%|█         | 54602/500000 [2:08:08<19:13:21,  6.44it/s]

Episode:54600 | Reward:-100.0


 11%|█         | 54802/500000 [2:08:37<16:16:47,  7.60it/s]

Episode:54800 | Reward:-100.0


 11%|█         | 55002/500000 [2:09:03<19:06:51,  6.47it/s]

Episode:55000 | Reward:100.0


 11%|█         | 55202/500000 [2:09:32<20:05:15,  6.15it/s]

Episode:55200 | Reward:-100.0


 11%|█         | 55402/500000 [2:10:00<19:06:19,  6.46it/s]

Episode:55400 | Reward:-100.0


 11%|█         | 55602/500000 [2:10:28<19:01:54,  6.49it/s]

Episode:55600 | Reward:-100.0


 11%|█         | 55802/500000 [2:10:57<19:13:32,  6.42it/s]

Episode:55800 | Reward:-100.0


 11%|█         | 56002/500000 [2:11:26<18:57:36,  6.50it/s]

Episode:56000 | Reward:-100.0


 11%|█         | 56202/500000 [2:11:54<19:22:50,  6.36it/s]

Episode:56200 | Reward:100.0


 11%|█▏        | 56402/500000 [2:12:21<19:22:38,  6.36it/s]

Episode:56400 | Reward:100.0


 11%|█▏        | 56602/500000 [2:12:50<20:13:39,  6.09it/s]

Episode:56600 | Reward:100.0


 11%|█▏        | 56802/500000 [2:13:19<19:01:30,  6.47it/s]

Episode:56800 | Reward:-100.0


 11%|█▏        | 57002/500000 [2:13:47<19:17:39,  6.38it/s]

Episode:57000 | Reward:100.0


 11%|█▏        | 57202/500000 [2:14:16<18:58:56,  6.48it/s]

Episode:57200 | Reward:100.0


 11%|█▏        | 57402/500000 [2:14:44<19:01:53,  6.46it/s]

Episode:57400 | Reward:100.0


 12%|█▏        | 57602/500000 [2:15:13<18:41:44,  6.57it/s]

Episode:57600 | Reward:-100.0


 12%|█▏        | 57802/500000 [2:15:41<18:48:29,  6.53it/s]

Episode:57800 | Reward:100.0


 12%|█▏        | 58002/500000 [2:16:09<18:51:47,  6.51it/s]

Episode:58000 | Reward:100.0


 12%|█▏        | 58202/500000 [2:16:36<19:10:23,  6.40it/s]

Episode:58200 | Reward:-100.0


 12%|█▏        | 58402/500000 [2:17:05<20:21:32,  6.03it/s]

Episode:58400 | Reward:-100.0


 12%|█▏        | 58602/500000 [2:17:33<20:03:16,  6.11it/s]

Episode:58600 | Reward:100.0


 12%|█▏        | 58802/500000 [2:18:02<18:49:27,  6.51it/s]

Episode:58800 | Reward:100.0


 12%|█▏        | 59002/500000 [2:18:30<18:43:15,  6.54it/s]

Episode:59000 | Reward:100.0


 12%|█▏        | 59202/500000 [2:18:59<18:57:13,  6.46it/s]

Episode:59200 | Reward:-100.0


 12%|█▏        | 59402/500000 [2:19:28<18:52:12,  6.49it/s]

Episode:59400 | Reward:100.0


 12%|█▏        | 59602/500000 [2:19:54<19:07:37,  6.40it/s]

Episode:59600 | Reward:-100.0


 12%|█▏        | 59802/500000 [2:20:22<19:02:53,  6.42it/s]

Episode:59800 | Reward:100.0


 12%|█▏        | 60002/500000 [2:20:51<18:49:49,  6.49it/s]

Episode:60000 | Reward:-100.0


 12%|█▏        | 60202/500000 [2:21:20<19:15:03,  6.35it/s]

Episode:60200 | Reward:-100.0


 12%|█▏        | 60402/500000 [2:21:48<18:30:23,  6.60it/s]

Episode:60400 | Reward:-100.0


 12%|█▏        | 60602/500000 [2:22:15<18:38:13,  6.55it/s]

Episode:60600 | Reward:-100.0


 12%|█▏        | 60802/500000 [2:22:43<18:44:13,  6.51it/s]

Episode:60800 | Reward:100.0


 12%|█▏        | 61002/500000 [2:23:11<16:55:02,  7.21it/s]

Episode:61000 | Reward:100.0


 12%|█▏        | 61202/500000 [2:23:38<18:44:43,  6.50it/s]

Episode:61200 | Reward:100.0


 12%|█▏        | 61402/500000 [2:24:06<19:36:49,  6.21it/s]

Episode:61400 | Reward:-100.0


 12%|█▏        | 61602/500000 [2:24:35<19:20:18,  6.30it/s]

Episode:61600 | Reward:-100.0


 12%|█▏        | 61802/500000 [2:25:04<18:46:58,  6.48it/s]

Episode:61800 | Reward:100.0


 12%|█▏        | 62002/500000 [2:25:31<19:02:05,  6.39it/s]

Episode:62000 | Reward:-100.0


 12%|█▏        | 62202/500000 [2:26:00<18:48:16,  6.47it/s]

Episode:62200 | Reward:100.0


 12%|█▏        | 62402/500000 [2:26:28<18:42:17,  6.50it/s]

Episode:62400 | Reward:-100.0


 13%|█▎        | 62602/500000 [2:26:56<18:38:17,  6.52it/s]

Episode:62600 | Reward:100.0


 13%|█▎        | 62802/500000 [2:27:25<18:53:26,  6.43it/s]

Episode:62800 | Reward:-100.0


 13%|█▎        | 63002/500000 [2:27:53<19:07:28,  6.35it/s]

Episode:63000 | Reward:100.0


 13%|█▎        | 63202/500000 [2:28:21<18:41:36,  6.49it/s]

Episode:63200 | Reward:100.0


 13%|█▎        | 63402/500000 [2:28:50<18:25:07,  6.58it/s]

Episode:63400 | Reward:-100.0


 13%|█▎        | 63602/500000 [2:29:18<18:43:56,  6.47it/s]

Episode:63600 | Reward:100.0


 13%|█▎        | 63802/500000 [2:29:45<18:24:37,  6.58it/s]

Episode:63800 | Reward:-100.0


 13%|█▎        | 64002/500000 [2:30:11<16:02:34,  7.55it/s]

Episode:64000 | Reward:-100.0


 13%|█▎        | 64202/500000 [2:30:39<18:42:22,  6.47it/s]

Episode:64200 | Reward:100.0


 13%|█▎        | 64402/500000 [2:31:08<19:15:40,  6.28it/s]

Episode:64400 | Reward:100.0


 13%|█▎        | 64602/500000 [2:31:34<19:37:00,  6.17it/s]

Episode:64600 | Reward:100.0


 13%|█▎        | 64802/500000 [2:32:03<18:44:59,  6.45it/s]

Episode:64800 | Reward:-100.0


 13%|█▎        | 65002/500000 [2:32:31<16:29:03,  7.33it/s]

Episode:65000 | Reward:100.0


 13%|█▎        | 65202/500000 [2:32:57<17:20:03,  6.97it/s]

Episode:65200 | Reward:100.0


 13%|█▎        | 65402/500000 [2:33:24<18:48:15,  6.42it/s]

Episode:65400 | Reward:100.0


 13%|█▎        | 65602/500000 [2:33:53<18:34:13,  6.50it/s]

Episode:65600 | Reward:100.0


 13%|█▎        | 65802/500000 [2:34:20<19:05:45,  6.32it/s]

Episode:65800 | Reward:-100.0


 13%|█▎        | 66002/500000 [2:34:49<18:42:07,  6.45it/s]

Episode:66000 | Reward:100.0


 13%|█▎        | 66202/500000 [2:35:18<18:14:31,  6.61it/s]

Episode:66200 | Reward:-100.0


 13%|█▎        | 66402/500000 [2:35:46<18:44:59,  6.42it/s]

Episode:66400 | Reward:100.0


 13%|█▎        | 66602/500000 [2:36:14<18:05:02,  6.66it/s]

Episode:66600 | Reward:-100.0


 13%|█▎        | 66802/500000 [2:36:43<19:10:44,  6.27it/s]

Episode:66800 | Reward:-100.0


 13%|█▎        | 67002/500000 [2:37:12<18:48:36,  6.39it/s]

Episode:67000 | Reward:-100.0


 13%|█▎        | 67202/500000 [2:37:41<19:16:04,  6.24it/s]

Episode:67200 | Reward:100.0


 13%|█▎        | 67402/500000 [2:38:09<18:19:09,  6.56it/s]

Episode:67400 | Reward:100.0


 14%|█▎        | 67602/500000 [2:38:37<18:28:04,  6.50it/s]

Episode:67600 | Reward:100.0


 14%|█▎        | 67802/500000 [2:39:06<15:23:43,  7.80it/s]

Episode:67800 | Reward:-100.0


 14%|█▎        | 68002/500000 [2:39:34<18:20:01,  6.55it/s]

Episode:68000 | Reward:100.0


 14%|█▎        | 68202/500000 [2:40:02<18:52:42,  6.35it/s]

Episode:68200 | Reward:100.0


 14%|█▎        | 68402/500000 [2:40:30<18:21:43,  6.53it/s]

Episode:68400 | Reward:100.0


 14%|█▎        | 68602/500000 [2:40:59<19:02:00,  6.30it/s]

Episode:68600 | Reward:-100.0


 14%|█▍        | 68802/500000 [2:41:27<18:30:30,  6.47it/s]

Episode:68800 | Reward:-100.0


 14%|█▍        | 69002/500000 [2:41:55<19:21:36,  6.18it/s]

Episode:69000 | Reward:100.0


 14%|█▍        | 69202/500000 [2:42:23<19:19:47,  6.19it/s]

Episode:69200 | Reward:-100.0


 14%|█▍        | 69402/500000 [2:42:52<18:28:48,  6.47it/s]

Episode:69400 | Reward:-100.0


 14%|█▍        | 69602/500000 [2:43:21<18:44:46,  6.38it/s]

Episode:69600 | Reward:100.0


 14%|█▍        | 69802/500000 [2:43:49<19:00:17,  6.29it/s]

Episode:69800 | Reward:100.0


 14%|█▍        | 70002/500000 [2:44:17<16:22:04,  7.30it/s]

Episode:70000 | Reward:100.0


 14%|█▍        | 70202/500000 [2:44:44<18:24:23,  6.49it/s]

Episode:70200 | Reward:100.0


 14%|█▍        | 70402/500000 [2:45:13<18:40:29,  6.39it/s]

Episode:70400 | Reward:100.0


 14%|█▍        | 70602/500000 [2:45:41<18:57:00,  6.29it/s]

Episode:70600 | Reward:100.0


 14%|█▍        | 70802/500000 [2:46:10<18:51:05,  6.32it/s]

Episode:70800 | Reward:-100.0


 14%|█▍        | 71002/500000 [2:46:38<17:59:37,  6.62it/s]

Episode:71000 | Reward:100.0


 14%|█▍        | 71202/500000 [2:47:07<17:56:17,  6.64it/s]

Episode:71200 | Reward:100.0


 14%|█▍        | 71402/500000 [2:47:35<17:12:33,  6.92it/s]

Episode:71400 | Reward:-100.0


 14%|█▍        | 71602/500000 [2:48:00<15:11:15,  7.84it/s]

Episode:71600 | Reward:100.0


 14%|█▍        | 71802/500000 [2:48:28<18:12:53,  6.53it/s]

Episode:71800 | Reward:-100.0


 14%|█▍        | 72002/500000 [2:48:57<18:32:41,  6.41it/s]

Episode:72000 | Reward:100.0


 14%|█▍        | 72202/500000 [2:49:25<18:08:45,  6.55it/s]

Episode:72200 | Reward:-100.0


 14%|█▍        | 72402/500000 [2:49:53<19:20:20,  6.14it/s]

Episode:72400 | Reward:-100.0


 15%|█▍        | 72602/500000 [2:50:21<18:15:23,  6.50it/s]

Episode:72600 | Reward:100.0


 15%|█▍        | 72802/500000 [2:50:49<17:51:09,  6.65it/s]

Episode:72800 | Reward:-100.0


 15%|█▍        | 73002/500000 [2:51:18<18:18:07,  6.48it/s]

Episode:73000 | Reward:100.0


 15%|█▍        | 73202/500000 [2:51:43<19:31:47,  6.07it/s]

Episode:73200 | Reward:100.0


 15%|█▍        | 73402/500000 [2:52:12<19:12:06,  6.17it/s]

Episode:73400 | Reward:-100.0


 15%|█▍        | 73602/500000 [2:52:40<18:07:22,  6.54it/s]

Episode:73600 | Reward:100.0


 15%|█▍        | 73802/500000 [2:53:09<18:32:26,  6.39it/s]

Episode:73800 | Reward:100.0


 15%|█▍        | 74002/500000 [2:53:37<18:19:16,  6.46it/s]

Episode:74000 | Reward:-100.0


 15%|█▍        | 74202/500000 [2:54:06<18:37:40,  6.35it/s]

Episode:74200 | Reward:100.0


 15%|█▍        | 74402/500000 [2:54:34<17:48:36,  6.64it/s]

Episode:74400 | Reward:-100.0


 15%|█▍        | 74602/500000 [2:55:03<18:25:58,  6.41it/s]

Episode:74600 | Reward:100.0


 15%|█▍        | 74802/500000 [2:55:31<18:19:49,  6.44it/s]

Episode:74800 | Reward:-100.0


 15%|█▌        | 75002/500000 [2:55:59<18:28:50,  6.39it/s]

Episode:75000 | Reward:-100.0


 15%|█▌        | 75202/500000 [2:56:27<17:50:27,  6.61it/s]

Episode:75200 | Reward:-100.0


 15%|█▌        | 75402/500000 [2:56:56<22:13:48,  5.31it/s]

Episode:75400 | Reward:-100.0


 15%|█▌        | 75602/500000 [2:57:24<18:08:44,  6.50it/s]

Episode:75600 | Reward:-100.0


 15%|█▌        | 75802/500000 [2:57:53<18:36:17,  6.33it/s]

Episode:75800 | Reward:-100.0


 15%|█▌        | 76002/500000 [2:58:21<17:53:48,  6.58it/s]

Episode:76000 | Reward:-100.0


 15%|█▌        | 76202/500000 [2:58:49<17:39:38,  6.67it/s]

Episode:76200 | Reward:100.0


 15%|█▌        | 76402/500000 [2:59:18<18:34:20,  6.34it/s]

Episode:76400 | Reward:-100.0


 15%|█▌        | 76602/500000 [2:59:46<17:47:26,  6.61it/s]

Episode:76600 | Reward:-100.0


 15%|█▌        | 76802/500000 [3:00:14<18:09:52,  6.47it/s]

Episode:76800 | Reward:100.0


 15%|█▌        | 77002/500000 [3:00:43<17:42:28,  6.64it/s]

Episode:77000 | Reward:100.0


 15%|█▌        | 77202/500000 [3:01:11<18:13:21,  6.44it/s]

Episode:77200 | Reward:100.0


 15%|█▌        | 77402/500000 [3:01:40<17:46:35,  6.60it/s]

Episode:77400 | Reward:100.0


 16%|█▌        | 77602/500000 [3:02:08<18:31:57,  6.33it/s]

Episode:77600 | Reward:100.0


 16%|█▌        | 77802/500000 [3:02:37<18:11:17,  6.45it/s]

Episode:77800 | Reward:100.0


 16%|█▌        | 78002/500000 [3:03:06<18:19:11,  6.40it/s]

Episode:78000 | Reward:-100.0


 16%|█▌        | 78202/500000 [3:03:30<15:12:36,  7.70it/s]

Episode:78200 | Reward:-100.0


 16%|█▌        | 78402/500000 [3:03:57<18:27:55,  6.34it/s]

Episode:78400 | Reward:-100.0


 16%|█▌        | 78602/500000 [3:04:26<17:57:39,  6.52it/s]

Episode:78600 | Reward:100.0


 16%|█▌        | 78802/500000 [3:04:54<18:20:24,  6.38it/s]

Episode:78800 | Reward:100.0


 16%|█▌        | 79002/500000 [3:05:23<18:26:54,  6.34it/s]

Episode:79000 | Reward:100.0


 16%|█▌        | 79202/500000 [3:05:52<19:30:48,  5.99it/s]

Episode:79200 | Reward:100.0


 16%|█▌        | 79402/500000 [3:06:20<18:07:31,  6.45it/s]

Episode:79400 | Reward:100.0


 16%|█▌        | 79602/500000 [3:06:48<18:29:39,  6.31it/s]

Episode:79600 | Reward:100.0


 16%|█▌        | 79802/500000 [3:07:17<18:23:37,  6.35it/s]

Episode:79800 | Reward:100.0


 16%|█▌        | 80002/500000 [3:07:44<18:21:35,  6.35it/s]

Episode:80000 | Reward:100.0


 16%|█▌        | 80202/500000 [3:08:12<18:24:10,  6.34it/s]

Episode:80200 | Reward:-100.0


 16%|█▌        | 80402/500000 [3:08:40<18:16:58,  6.38it/s]

Episode:80400 | Reward:100.0


 16%|█▌        | 80602/500000 [3:09:09<18:29:53,  6.30it/s]

Episode:80600 | Reward:-100.0


 16%|█▌        | 80802/500000 [3:09:37<17:45:33,  6.56it/s]

Episode:80800 | Reward:-100.0


 16%|█▌        | 81002/500000 [3:10:05<17:58:24,  6.48it/s]

Episode:81000 | Reward:-100.0


 16%|█▌        | 81202/500000 [3:10:31<14:47:05,  7.87it/s]

Episode:81200 | Reward:-100.0


 16%|█▋        | 81402/500000 [3:10:58<17:46:45,  6.54it/s]

Episode:81400 | Reward:-100.0


 16%|█▋        | 81602/500000 [3:11:27<18:05:20,  6.42it/s]

Episode:81600 | Reward:-100.0


 16%|█▋        | 81802/500000 [3:11:55<18:35:31,  6.25it/s]

Episode:81800 | Reward:-100.0


 16%|█▋        | 82002/500000 [3:12:23<18:42:16,  6.21it/s]

Episode:82000 | Reward:100.0


 16%|█▋        | 82202/500000 [3:12:52<18:24:44,  6.30it/s]

Episode:82200 | Reward:-100.0


 16%|█▋        | 82402/500000 [3:13:20<15:19:36,  7.57it/s]

Episode:82400 | Reward:-100.0


 17%|█▋        | 82602/500000 [3:13:47<17:44:45,  6.53it/s]

Episode:82600 | Reward:-100.0


 17%|█▋        | 82802/500000 [3:14:16<17:40:45,  6.55it/s]

Episode:82800 | Reward:-100.0


 17%|█▋        | 83002/500000 [3:14:44<18:13:33,  6.36it/s]

Episode:83000 | Reward:-100.0


 17%|█▋        | 83202/500000 [3:15:13<17:47:22,  6.51it/s]

Episode:83200 | Reward:100.0


 17%|█▋        | 83402/500000 [3:15:42<17:34:23,  6.59it/s]

Episode:83400 | Reward:-100.0


 17%|█▋        | 83602/500000 [3:16:10<17:59:26,  6.43it/s]

Episode:83600 | Reward:100.0


 17%|█▋        | 83802/500000 [3:16:37<17:28:54,  6.61it/s]

Episode:83800 | Reward:100.0


 17%|█▋        | 84002/500000 [3:17:06<28:08:44,  4.11it/s]

Episode:84000 | Reward:-100.0


 17%|█▋        | 84202/500000 [3:17:34<17:58:39,  6.42it/s]

Episode:84200 | Reward:-100.0


 17%|█▋        | 84402/500000 [3:18:03<17:46:43,  6.49it/s]

Episode:84400 | Reward:-100.0


 17%|█▋        | 84602/500000 [3:18:31<17:49:52,  6.47it/s]

Episode:84600 | Reward:100.0


 17%|█▋        | 84802/500000 [3:18:59<18:09:43,  6.35it/s]

Episode:84800 | Reward:-100.0


 17%|█▋        | 85002/500000 [3:19:27<19:18:26,  5.97it/s]

Episode:85000 | Reward:-100.0


 17%|█▋        | 85202/500000 [3:19:56<17:35:53,  6.55it/s]

Episode:85200 | Reward:100.0


 17%|█▋        | 85402/500000 [3:20:24<17:48:41,  6.47it/s]

Episode:85400 | Reward:-100.0


 17%|█▋        | 85602/500000 [3:20:51<15:36:12,  7.38it/s]

Episode:85600 | Reward:-100.0


 17%|█▋        | 85802/500000 [3:21:19<18:06:03,  6.36it/s]

Episode:85800 | Reward:-100.0


 17%|█▋        | 86002/500000 [3:21:47<15:21:31,  7.49it/s]

Episode:86000 | Reward:-100.0


 17%|█▋        | 86202/500000 [3:22:14<17:49:23,  6.45it/s]

Episode:86200 | Reward:-100.0


 17%|█▋        | 86402/500000 [3:22:43<17:21:07,  6.62it/s]

Episode:86400 | Reward:100.0


 17%|█▋        | 86602/500000 [3:23:12<18:08:05,  6.33it/s]

Episode:86600 | Reward:100.0


 17%|█▋        | 86802/500000 [3:23:40<18:09:12,  6.32it/s]

Episode:86800 | Reward:-100.0


 17%|█▋        | 87002/500000 [3:24:07<15:26:33,  7.43it/s]

Episode:87000 | Reward:100.0


 17%|█▋        | 87202/500000 [3:24:34<18:06:32,  6.33it/s]

Episode:87200 | Reward:100.0


 17%|█▋        | 87402/500000 [3:25:03<17:59:45,  6.37it/s]

Episode:87400 | Reward:100.0


 18%|█▊        | 87602/500000 [3:25:32<17:41:28,  6.48it/s]

Episode:87600 | Reward:100.0


 18%|█▊        | 87802/500000 [3:26:00<17:09:18,  6.67it/s]

Episode:87800 | Reward:100.0


 18%|█▊        | 88002/500000 [3:26:29<17:58:53,  6.36it/s]

Episode:88000 | Reward:100.0


 18%|█▊        | 88202/500000 [3:26:57<18:16:53,  6.26it/s]

Episode:88200 | Reward:-100.0


 18%|█▊        | 88402/500000 [3:27:26<17:50:06,  6.41it/s]

Episode:88400 | Reward:100.0


 18%|█▊        | 88602/500000 [3:27:55<17:44:27,  6.44it/s]

Episode:88600 | Reward:-100.0


 18%|█▊        | 88802/500000 [3:28:23<17:52:07,  6.39it/s]

Episode:88800 | Reward:100.0


 18%|█▊        | 89002/500000 [3:28:51<17:22:46,  6.57it/s]

Episode:89000 | Reward:-100.0


 18%|█▊        | 89202/500000 [3:29:20<17:26:58,  6.54it/s]

Episode:89200 | Reward:-100.0


 18%|█▊        | 89402/500000 [3:29:48<17:41:06,  6.45it/s]

Episode:89400 | Reward:-100.0


 18%|█▊        | 89602/500000 [3:30:16<17:28:08,  6.53it/s]

Episode:89600 | Reward:-100.0


 18%|█▊        | 89802/500000 [3:30:44<17:12:02,  6.62it/s]

Episode:89800 | Reward:0.0


 18%|█▊        | 90001/500000 [3:31:13<18:28:12,  6.17it/s]

Episode:90000 | Reward:-100.0


 18%|█▊        | 90202/500000 [3:31:42<17:42:10,  6.43it/s]

Episode:90200 | Reward:-100.0


 18%|█▊        | 90402/500000 [3:32:10<17:56:40,  6.34it/s]

Episode:90400 | Reward:-100.0


 18%|█▊        | 90602/500000 [3:32:38<17:55:28,  6.34it/s]

Episode:90600 | Reward:-100.0


 18%|█▊        | 90802/500000 [3:33:06<17:29:10,  6.50it/s]

Episode:90800 | Reward:-100.0


 18%|█▊        | 91002/500000 [3:33:35<17:32:05,  6.48it/s]

Episode:91000 | Reward:-100.0


 18%|█▊        | 91202/500000 [3:34:04<17:35:45,  6.45it/s]

Episode:91200 | Reward:100.0


 18%|█▊        | 91402/500000 [3:34:32<17:09:47,  6.61it/s]

Episode:91400 | Reward:-100.0


 18%|█▊        | 91602/500000 [3:35:01<17:28:55,  6.49it/s]

Episode:91600 | Reward:-100.0


 18%|█▊        | 91802/500000 [3:35:29<17:26:32,  6.50it/s]

Episode:91800 | Reward:-100.0


 18%|█▊        | 92002/500000 [3:35:57<18:01:39,  6.29it/s]

Episode:92000 | Reward:100.0


 18%|█▊        | 92202/500000 [3:36:26<17:54:16,  6.33it/s]

Episode:92200 | Reward:100.0


 18%|█▊        | 92402/500000 [3:36:53<17:31:50,  6.46it/s]

Episode:92400 | Reward:-100.0


 19%|█▊        | 92602/500000 [3:37:20<17:40:21,  6.40it/s]

Episode:92600 | Reward:-100.0


 19%|█▊        | 92802/500000 [3:37:49<17:19:05,  6.53it/s]

Episode:92800 | Reward:-100.0


 19%|█▊        | 93002/500000 [3:38:17<17:13:26,  6.56it/s]

Episode:93000 | Reward:-100.0


 19%|█▊        | 93202/500000 [3:38:46<17:29:29,  6.46it/s]

Episode:93200 | Reward:-100.0


 19%|█▊        | 93402/500000 [3:39:13<17:28:18,  6.46it/s]

Episode:93400 | Reward:-100.0


 19%|█▊        | 93602/500000 [3:39:42<17:40:44,  6.39it/s]

Episode:93600 | Reward:-100.0


 19%|█▉        | 93802/500000 [3:40:10<17:28:06,  6.46it/s]

Episode:93800 | Reward:-100.0


 19%|█▉        | 94002/500000 [3:40:38<17:47:10,  6.34it/s]

Episode:94000 | Reward:-100.0


 19%|█▉        | 94202/500000 [3:41:05<17:29:54,  6.44it/s]

Episode:94200 | Reward:100.0


 19%|█▉        | 94402/500000 [3:41:34<17:24:57,  6.47it/s]

Episode:94400 | Reward:100.0


 19%|█▉        | 94602/500000 [3:42:00<17:40:17,  6.37it/s]

Episode:94600 | Reward:100.0


 19%|█▉        | 94802/500000 [3:42:28<16:43:41,  6.73it/s]

Episode:94800 | Reward:100.0


 19%|█▉        | 95002/500000 [3:42:57<17:31:16,  6.42it/s]

Episode:95000 | Reward:-100.0


 19%|█▉        | 95202/500000 [3:43:26<17:29:15,  6.43it/s]

Episode:95200 | Reward:100.0


 19%|█▉        | 95402/500000 [3:43:54<17:01:39,  6.60it/s]

Episode:95400 | Reward:100.0


 19%|█▉        | 95602/500000 [3:44:22<17:28:57,  6.43it/s]

Episode:95600 | Reward:100.0


 19%|█▉        | 95802/500000 [3:44:51<18:03:20,  6.22it/s]

Episode:95800 | Reward:-100.0


 19%|█▉        | 96002/500000 [3:45:19<17:13:13,  6.52it/s]

Episode:96000 | Reward:-100.0


 19%|█▉        | 96202/500000 [3:45:48<17:54:36,  6.26it/s]

Episode:96200 | Reward:-100.0


 19%|█▉        | 96402/500000 [3:46:16<16:28:19,  6.81it/s]

Episode:96400 | Reward:100.0


 19%|█▉        | 96602/500000 [3:46:45<17:31:44,  6.39it/s]

Episode:96600 | Reward:0.0


 19%|█▉        | 96802/500000 [3:47:13<17:19:45,  6.46it/s]

Episode:96800 | Reward:100.0


 19%|█▉        | 97002/500000 [3:47:42<17:02:30,  6.57it/s]

Episode:97000 | Reward:-100.0


 19%|█▉        | 97202/500000 [3:48:10<17:24:14,  6.43it/s]

Episode:97200 | Reward:-100.0


 19%|█▉        | 97402/500000 [3:48:39<17:09:50,  6.52it/s]

Episode:97400 | Reward:-100.0


 20%|█▉        | 97602/500000 [3:49:05<17:26:27,  6.41it/s]

Episode:97600 | Reward:-100.0


 20%|█▉        | 97802/500000 [3:49:34<17:21:22,  6.44it/s]

Episode:97800 | Reward:100.0


 20%|█▉        | 98002/500000 [3:50:02<17:12:16,  6.49it/s]

Episode:98000 | Reward:-100.0


 20%|█▉        | 98202/500000 [3:50:31<16:57:15,  6.58it/s]

Episode:98200 | Reward:100.0


 20%|█▉        | 98402/500000 [3:50:59<16:58:31,  6.57it/s]

Episode:98400 | Reward:-100.0


 20%|█▉        | 98602/500000 [3:51:28<16:59:36,  6.56it/s]

Episode:98600 | Reward:-100.0


 20%|█▉        | 98802/500000 [3:51:57<17:28:17,  6.38it/s]

Episode:98800 | Reward:100.0


 20%|█▉        | 99002/500000 [3:52:25<17:04:19,  6.52it/s]

Episode:99000 | Reward:-100.0


 20%|█▉        | 99202/500000 [3:52:53<17:18:05,  6.43it/s]

Episode:99200 | Reward:100.0


 20%|█▉        | 99402/500000 [3:53:21<18:48:27,  5.92it/s]

Episode:99400 | Reward:-100.0


 20%|█▉        | 99602/500000 [3:53:48<17:02:23,  6.53it/s]

Episode:99600 | Reward:100.0


 20%|█▉        | 99802/500000 [3:54:17<17:24:02,  6.39it/s]

Episode:99800 | Reward:100.0


 20%|██        | 100002/500000 [3:54:45<16:59:24,  6.54it/s]

Episode:100000 | Reward:-100.0


 20%|██        | 100202/500000 [3:55:13<17:07:04,  6.49it/s]

Episode:100200 | Reward:-100.0


 20%|██        | 100402/500000 [3:55:43<17:42:58,  6.27it/s]

Episode:100400 | Reward:-100.0


 20%|██        | 100602/500000 [3:56:11<17:19:04,  6.41it/s]

Episode:100600 | Reward:-100.0


 20%|██        | 100802/500000 [3:56:39<17:16:40,  6.42it/s]

Episode:100800 | Reward:-100.0


 20%|██        | 101002/500000 [3:57:07<17:11:56,  6.44it/s]

Episode:101000 | Reward:-100.0


 20%|██        | 101202/500000 [3:57:36<17:24:54,  6.36it/s]

Episode:101200 | Reward:100.0


 20%|██        | 101402/500000 [3:58:05<17:34:51,  6.30it/s]

Episode:101400 | Reward:-100.0


 20%|██        | 101602/500000 [3:58:33<16:41:09,  6.63it/s]

Episode:101600 | Reward:100.0


 20%|██        | 101802/500000 [3:59:01<17:20:20,  6.38it/s]

Episode:101800 | Reward:-100.0


 20%|██        | 102001/500000 [3:59:30<28:29:55,  3.88it/s]

Episode:102000 | Reward:100.0


 20%|██        | 102202/500000 [3:59:59<17:35:13,  6.28it/s]

Episode:102200 | Reward:-100.0


 20%|██        | 102402/500000 [4:00:27<17:08:56,  6.44it/s]

Episode:102400 | Reward:100.0


 21%|██        | 102602/500000 [4:00:52<15:23:58,  7.17it/s]

Episode:102600 | Reward:-100.0


 21%|██        | 102802/500000 [4:01:20<16:39:24,  6.62it/s]

Episode:102800 | Reward:100.0


 21%|██        | 103002/500000 [4:01:49<16:55:59,  6.51it/s]

Episode:103000 | Reward:-100.0


 21%|██        | 103202/500000 [4:02:17<16:56:33,  6.51it/s]

Episode:103200 | Reward:-100.0


 21%|██        | 103402/500000 [4:02:45<17:14:15,  6.39it/s]

Episode:103400 | Reward:100.0


 21%|██        | 103602/500000 [4:03:14<17:01:23,  6.47it/s]

Episode:103600 | Reward:100.0


 21%|██        | 103802/500000 [4:03:43<17:19:08,  6.35it/s]

Episode:103800 | Reward:-100.0


 21%|██        | 104002/500000 [4:04:11<17:45:18,  6.20it/s]

Episode:104000 | Reward:100.0


 21%|██        | 104202/500000 [4:04:39<14:20:12,  7.67it/s]

Episode:104200 | Reward:-100.0


 21%|██        | 104402/500000 [4:05:07<17:02:19,  6.45it/s]

Episode:104400 | Reward:-100.0


 21%|██        | 104602/500000 [4:05:36<17:10:02,  6.40it/s]

Episode:104600 | Reward:100.0


 21%|██        | 104802/500000 [4:06:04<18:24:34,  5.96it/s]

Episode:104800 | Reward:100.0


 21%|██        | 105002/500000 [4:06:33<17:50:36,  6.15it/s]

Episode:105000 | Reward:-100.0


 21%|██        | 105202/500000 [4:07:01<16:42:24,  6.56it/s]

Episode:105200 | Reward:100.0


 21%|██        | 105402/500000 [4:07:30<17:10:05,  6.38it/s]

Episode:105400 | Reward:-100.0


 21%|██        | 105602/500000 [4:07:59<16:33:31,  6.62it/s]

Episode:105600 | Reward:100.0


 21%|██        | 105802/500000 [4:08:27<18:02:03,  6.07it/s]

Episode:105800 | Reward:100.0


 21%|██        | 106002/500000 [4:08:54<14:09:26,  7.73it/s]

Episode:106000 | Reward:-100.0


 21%|██        | 106202/500000 [4:09:22<17:08:58,  6.38it/s]

Episode:106200 | Reward:-100.0


 21%|██▏       | 106402/500000 [4:09:50<17:07:58,  6.38it/s]

Episode:106400 | Reward:100.0


 21%|██▏       | 106602/500000 [4:10:19<17:08:43,  6.37it/s]

Episode:106600 | Reward:-100.0


 21%|██▏       | 106802/500000 [4:10:47<17:08:03,  6.37it/s]

Episode:106800 | Reward:-100.0


 21%|██▏       | 107002/500000 [4:11:16<17:35:08,  6.21it/s]

Episode:107000 | Reward:100.0


 21%|██▏       | 107202/500000 [4:11:45<16:37:28,  6.56it/s]

Episode:107200 | Reward:-100.0


 21%|██▏       | 107402/500000 [4:12:13<16:53:44,  6.45it/s]

Episode:107400 | Reward:-100.0


 22%|██▏       | 107602/500000 [4:12:41<16:47:58,  6.49it/s]

Episode:107600 | Reward:100.0


 22%|██▏       | 107802/500000 [4:13:10<16:54:53,  6.44it/s]

Episode:107800 | Reward:100.0


 22%|██▏       | 108001/500000 [4:13:38<16:47:16,  6.49it/s]

Episode:108000 | Reward:0.0


 22%|██▏       | 108202/500000 [4:14:08<16:09:59,  6.73it/s]

Episode:108200 | Reward:-100.0


 22%|██▏       | 108402/500000 [4:14:36<16:49:57,  6.46it/s]

Episode:108400 | Reward:-100.0


 22%|██▏       | 108602/500000 [4:15:04<16:45:44,  6.49it/s]

Episode:108600 | Reward:100.0


 22%|██▏       | 108802/500000 [4:15:33<17:04:37,  6.36it/s]

Episode:108800 | Reward:100.0


 22%|██▏       | 109002/500000 [4:16:02<17:53:15,  6.07it/s]

Episode:109000 | Reward:-100.0


 22%|██▏       | 109202/500000 [4:16:30<17:50:14,  6.09it/s]

Episode:109200 | Reward:-100.0


 22%|██▏       | 109402/500000 [4:16:59<16:39:00,  6.52it/s]

Episode:109400 | Reward:-100.0


 22%|██▏       | 109602/500000 [4:17:27<17:10:33,  6.31it/s]

Episode:109600 | Reward:-100.0


 22%|██▏       | 109802/500000 [4:17:52<14:17:59,  7.58it/s]

Episode:109800 | Reward:100.0


 22%|██▏       | 110002/500000 [4:18:20<16:48:02,  6.45it/s]

Episode:110000 | Reward:-100.0


 22%|██▏       | 110202/500000 [4:18:48<16:01:10,  6.76it/s]

Episode:110200 | Reward:100.0


 22%|██▏       | 110402/500000 [4:19:15<16:58:05,  6.38it/s]

Episode:110400 | Reward:0.0


 22%|██▏       | 110601/500000 [4:19:44<28:54:15,  3.74it/s]

Episode:110600 | Reward:-100.0


 22%|██▏       | 110802/500000 [4:20:12<17:44:13,  6.10it/s]

Episode:110800 | Reward:100.0


 22%|██▏       | 111002/500000 [4:20:41<16:16:25,  6.64it/s]

Episode:111000 | Reward:-100.0


 22%|██▏       | 111202/500000 [4:21:09<16:41:40,  6.47it/s]

Episode:111200 | Reward:0.0


 22%|██▏       | 111402/500000 [4:21:37<15:55:51,  6.78it/s]

Episode:111400 | Reward:-100.0


 22%|██▏       | 111602/500000 [4:22:06<17:13:20,  6.26it/s]

Episode:111600 | Reward:-100.0


 22%|██▏       | 111802/500000 [4:22:34<17:20:22,  6.22it/s]

Episode:111800 | Reward:-100.0


 22%|██▏       | 112002/500000 [4:23:02<17:02:08,  6.33it/s]

Episode:112000 | Reward:-100.0


 22%|██▏       | 112202/500000 [4:23:28<17:03:19,  6.32it/s]

Episode:112200 | Reward:100.0


 22%|██▏       | 112402/500000 [4:23:56<13:59:23,  7.70it/s]

Episode:112400 | Reward:-100.0


 23%|██▎       | 112602/500000 [4:24:22<14:14:38,  7.55it/s]

Episode:112600 | Reward:-100.0


 23%|██▎       | 112802/500000 [4:24:49<16:56:04,  6.35it/s]

Episode:112800 | Reward:100.0


 23%|██▎       | 113002/500000 [4:25:17<16:52:02,  6.37it/s]

Episode:113000 | Reward:-100.0


 23%|██▎       | 113202/500000 [4:25:45<16:49:32,  6.39it/s]

Episode:113200 | Reward:-100.0


 23%|██▎       | 113402/500000 [4:26:14<16:31:25,  6.50it/s]

Episode:113400 | Reward:100.0


 23%|██▎       | 113602/500000 [4:26:42<16:54:20,  6.35it/s]

Episode:113600 | Reward:100.0


 23%|██▎       | 113802/500000 [4:27:11<17:07:51,  6.26it/s]

Episode:113800 | Reward:100.0


 23%|██▎       | 114002/500000 [4:27:39<16:12:24,  6.62it/s]

Episode:114000 | Reward:100.0


 23%|██▎       | 114202/500000 [4:28:08<16:23:06,  6.54it/s]

Episode:114200 | Reward:-100.0


 23%|██▎       | 114402/500000 [4:28:36<16:28:01,  6.50it/s]

Episode:114400 | Reward:100.0


 23%|██▎       | 114602/500000 [4:29:05<16:44:05,  6.40it/s]

Episode:114600 | Reward:0.0


 23%|██▎       | 114802/500000 [4:29:33<16:34:26,  6.46it/s]

Episode:114800 | Reward:100.0


 23%|██▎       | 115002/500000 [4:30:03<16:37:06,  6.44it/s]

Episode:115000 | Reward:-100.0


 23%|██▎       | 115202/500000 [4:30:31<16:36:21,  6.44it/s]

Episode:115200 | Reward:-100.0


 23%|██▎       | 115402/500000 [4:30:58<16:31:01,  6.47it/s]

Episode:115400 | Reward:-100.0


 23%|██▎       | 115602/500000 [4:31:26<16:29:30,  6.47it/s]

Episode:115600 | Reward:100.0


 23%|██▎       | 115802/500000 [4:31:56<16:49:12,  6.34it/s]

Episode:115800 | Reward:100.0


 23%|██▎       | 116002/500000 [4:32:22<16:25:37,  6.49it/s]

Episode:116000 | Reward:100.0


 23%|██▎       | 116202/500000 [4:32:51<16:28:16,  6.47it/s]

Episode:116200 | Reward:100.0


 23%|██▎       | 116402/500000 [4:33:19<16:44:29,  6.36it/s]

Episode:116400 | Reward:100.0


 23%|██▎       | 116602/500000 [4:33:47<16:31:17,  6.45it/s]

Episode:116600 | Reward:-100.0


 23%|██▎       | 116802/500000 [4:34:17<16:35:48,  6.41it/s]

Episode:116800 | Reward:-100.0


 23%|██▎       | 117002/500000 [4:34:45<17:05:07,  6.23it/s]

Episode:117000 | Reward:-100.0


 23%|██▎       | 117202/500000 [4:35:14<16:29:39,  6.45it/s]

Episode:117200 | Reward:0.0


 23%|██▎       | 117402/500000 [4:35:42<16:13:49,  6.55it/s]

Episode:117400 | Reward:100.0


 24%|██▎       | 117602/500000 [4:36:11<16:16:55,  6.52it/s]

Episode:117600 | Reward:100.0


 24%|██▎       | 117802/500000 [4:36:40<17:17:54,  6.14it/s]

Episode:117800 | Reward:-100.0


 24%|██▎       | 118002/500000 [4:37:08<16:39:23,  6.37it/s]

Episode:118000 | Reward:-100.0


 24%|██▎       | 118202/500000 [4:37:37<17:00:27,  6.24it/s]

Episode:118200 | Reward:0.0


 24%|██▎       | 118402/500000 [4:38:05<16:11:52,  6.54it/s]

Episode:118400 | Reward:100.0


 24%|██▎       | 118602/500000 [4:38:34<16:06:11,  6.58it/s]

Episode:118600 | Reward:-100.0


 24%|██▍       | 118802/500000 [4:39:03<16:57:49,  6.24it/s]

Episode:118800 | Reward:100.0


 24%|██▍       | 119002/500000 [4:39:31<16:28:17,  6.43it/s]

Episode:119000 | Reward:-100.0


 24%|██▍       | 119202/500000 [4:40:00<16:01:50,  6.60it/s]

Episode:119200 | Reward:100.0


 24%|██▍       | 119402/500000 [4:40:28<16:53:54,  6.26it/s]

Episode:119400 | Reward:100.0


 24%|██▍       | 119602/500000 [4:40:55<16:17:11,  6.49it/s]

Episode:119600 | Reward:-100.0


 24%|██▍       | 119802/500000 [4:41:24<16:01:39,  6.59it/s]

Episode:119800 | Reward:-100.0


 24%|██▍       | 120002/500000 [4:41:52<17:13:24,  6.13it/s]

Episode:120000 | Reward:100.0


 24%|██▍       | 120202/500000 [4:42:21<16:33:31,  6.37it/s]

Episode:120200 | Reward:-100.0


 24%|██▍       | 120402/500000 [4:42:49<16:20:13,  6.45it/s]

Episode:120400 | Reward:100.0


 24%|██▍       | 120602/500000 [4:43:17<16:19:29,  6.46it/s]

Episode:120600 | Reward:100.0


 24%|██▍       | 120802/500000 [4:43:46<17:26:45,  6.04it/s]

Episode:120800 | Reward:-100.0


 24%|██▍       | 121002/500000 [4:44:15<16:18:46,  6.45it/s]

Episode:121000 | Reward:100.0


 24%|██▍       | 121202/500000 [4:44:43<16:09:23,  6.51it/s]

Episode:121200 | Reward:-100.0


 24%|██▍       | 121402/500000 [4:45:12<16:59:01,  6.19it/s]

Episode:121400 | Reward:-100.0


 24%|██▍       | 121602/500000 [4:45:40<16:13:50,  6.48it/s]

Episode:121600 | Reward:-100.0


 24%|██▍       | 121802/500000 [4:46:09<16:48:32,  6.25it/s]

Episode:121800 | Reward:100.0


 24%|██▍       | 122002/500000 [4:46:38<16:54:11,  6.21it/s]

Episode:122000 | Reward:100.0


 24%|██▍       | 122202/500000 [4:47:06<16:17:38,  6.44it/s]

Episode:122200 | Reward:-100.0


 24%|██▍       | 122402/500000 [4:47:34<16:39:29,  6.30it/s]

Episode:122400 | Reward:-100.0


 25%|██▍       | 122602/500000 [4:48:02<16:10:20,  6.48it/s]

Episode:122600 | Reward:100.0


 25%|██▍       | 122802/500000 [4:48:31<16:20:00,  6.41it/s]

Episode:122800 | Reward:-100.0


 25%|██▍       | 123002/500000 [4:49:00<16:02:11,  6.53it/s]

Episode:123000 | Reward:-100.0


 25%|██▍       | 123202/500000 [4:49:27<17:21:20,  6.03it/s]

Episode:123200 | Reward:100.0


 25%|██▍       | 123402/500000 [4:49:56<16:32:02,  6.33it/s]

Episode:123400 | Reward:-100.0


 25%|██▍       | 123602/500000 [4:50:25<16:36:57,  6.29it/s]

Episode:123600 | Reward:100.0


 25%|██▍       | 123802/500000 [4:50:53<16:26:03,  6.36it/s]

Episode:123800 | Reward:-100.0


 25%|██▍       | 124002/500000 [4:51:22<16:02:07,  6.51it/s]

Episode:124000 | Reward:-100.0


 25%|██▍       | 124202/500000 [4:51:50<15:52:07,  6.58it/s]

Episode:124200 | Reward:-100.0


 25%|██▍       | 124402/500000 [4:52:19<15:59:03,  6.53it/s]

Episode:124400 | Reward:-100.0


 25%|██▍       | 124602/500000 [4:52:47<16:07:02,  6.47it/s]

Episode:124600 | Reward:-100.0


 25%|██▍       | 124802/500000 [4:53:15<16:44:29,  6.23it/s]

Episode:124800 | Reward:-100.0


 25%|██▌       | 125002/500000 [4:53:44<16:19:02,  6.38it/s]

Episode:125000 | Reward:-100.0


 25%|██▌       | 125202/500000 [4:54:12<13:51:47,  7.51it/s]

Episode:125200 | Reward:100.0


 25%|██▌       | 125402/500000 [4:54:38<16:06:10,  6.46it/s]

Episode:125400 | Reward:100.0


 25%|██▌       | 125602/500000 [4:55:06<15:41:02,  6.63it/s]

Episode:125600 | Reward:100.0


 25%|██▌       | 125802/500000 [4:55:33<16:06:36,  6.45it/s]

Episode:125800 | Reward:100.0


 25%|██▌       | 126002/500000 [4:56:01<16:14:58,  6.39it/s]

Episode:126000 | Reward:-100.0


 25%|██▌       | 126202/500000 [4:56:30<16:33:04,  6.27it/s]

Episode:126200 | Reward:0.0


 25%|██▌       | 126402/500000 [4:56:59<16:35:40,  6.25it/s]

Episode:126400 | Reward:-100.0


 25%|██▌       | 126602/500000 [4:57:27<16:05:05,  6.45it/s]

Episode:126600 | Reward:100.0


 25%|██▌       | 126802/500000 [4:57:55<16:03:51,  6.45it/s]

Episode:126800 | Reward:-100.0


 25%|██▌       | 127002/500000 [4:58:24<16:11:40,  6.40it/s]

Episode:127000 | Reward:-100.0


 25%|██▌       | 127202/500000 [4:58:52<16:22:21,  6.32it/s]

Episode:127200 | Reward:-100.0


 25%|██▌       | 127402/500000 [4:59:19<15:44:20,  6.58it/s]

Episode:127400 | Reward:100.0


 26%|██▌       | 127602/500000 [4:59:47<13:21:07,  7.75it/s]

Episode:127600 | Reward:100.0


 26%|██▌       | 127801/500000 [5:00:13<24:48:36,  4.17it/s]

Episode:127800 | Reward:100.0


 26%|██▌       | 128002/500000 [5:00:42<16:09:05,  6.40it/s]

Episode:128000 | Reward:100.0


 26%|██▌       | 128202/500000 [5:01:08<15:08:35,  6.82it/s]

Episode:128200 | Reward:100.0


 26%|██▌       | 128402/500000 [5:01:36<16:11:12,  6.38it/s]

Episode:128400 | Reward:-100.0


 26%|██▌       | 128602/500000 [5:02:04<16:10:28,  6.38it/s]

Episode:128600 | Reward:100.0


 26%|██▌       | 128802/500000 [5:02:32<16:01:28,  6.43it/s]

Episode:128800 | Reward:100.0


 26%|██▌       | 129002/500000 [5:02:59<15:51:41,  6.50it/s]

Episode:129000 | Reward:100.0


 26%|██▌       | 129202/500000 [5:03:27<16:07:07,  6.39it/s]

Episode:129200 | Reward:100.0


 26%|██▌       | 129402/500000 [5:03:55<16:06:41,  6.39it/s]

Episode:129400 | Reward:-100.0


 26%|██▌       | 129602/500000 [5:04:25<16:07:40,  6.38it/s]

Episode:129600 | Reward:-100.0


 26%|██▌       | 129802/500000 [5:04:53<15:47:02,  6.51it/s]

Episode:129800 | Reward:100.0


 26%|██▌       | 130002/500000 [5:05:22<15:50:31,  6.49it/s]

Episode:130000 | Reward:-100.0


 26%|██▌       | 130202/500000 [5:05:50<16:12:21,  6.34it/s]

Episode:130200 | Reward:-100.0


 26%|██▌       | 130402/500000 [5:06:19<17:36:21,  5.83it/s]

Episode:130400 | Reward:100.0


 26%|██▌       | 130602/500000 [5:06:47<15:58:00,  6.43it/s]

Episode:130600 | Reward:100.0


 26%|██▌       | 130802/500000 [5:07:15<15:46:56,  6.50it/s]

Episode:130800 | Reward:100.0


 26%|██▌       | 131002/500000 [5:07:44<15:23:10,  6.66it/s]

Episode:131000 | Reward:100.0


 26%|██▌       | 131202/500000 [5:08:12<17:02:40,  6.01it/s]

Episode:131200 | Reward:100.0


 26%|██▋       | 131402/500000 [5:08:41<16:02:39,  6.38it/s]

Episode:131400 | Reward:100.0


 26%|██▋       | 131602/500000 [5:09:10<16:38:52,  6.15it/s]

Episode:131600 | Reward:-100.0


 26%|██▋       | 131802/500000 [5:09:39<16:39:38,  6.14it/s]

Episode:131800 | Reward:100.0


 26%|██▋       | 132002/500000 [5:10:07<15:47:14,  6.47it/s]

Episode:132000 | Reward:100.0


 26%|██▋       | 132202/500000 [5:10:36<15:43:47,  6.50it/s]

Episode:132200 | Reward:100.0


 26%|██▋       | 132402/500000 [5:11:04<16:07:23,  6.33it/s]

Episode:132400 | Reward:100.0


 27%|██▋       | 132602/500000 [5:11:32<15:51:26,  6.44it/s]

Episode:132600 | Reward:-100.0


 27%|██▋       | 132802/500000 [5:12:01<15:33:14,  6.56it/s]

Episode:132800 | Reward:100.0


 27%|██▋       | 133002/500000 [5:12:30<16:05:16,  6.34it/s]

Episode:133000 | Reward:100.0


 27%|██▋       | 133202/500000 [5:12:58<15:44:12,  6.47it/s]

Episode:133200 | Reward:100.0


 27%|██▋       | 133402/500000 [5:13:26<15:53:11,  6.41it/s]

Episode:133400 | Reward:-100.0


 27%|██▋       | 133602/500000 [5:13:54<15:55:11,  6.39it/s]

Episode:133600 | Reward:100.0


 27%|██▋       | 133802/500000 [5:14:23<15:21:51,  6.62it/s]

Episode:133800 | Reward:100.0


 27%|██▋       | 134002/500000 [5:14:50<12:44:12,  7.98it/s]

Episode:134000 | Reward:100.0


 27%|██▋       | 134202/500000 [5:15:18<15:35:39,  6.52it/s]

Episode:134200 | Reward:100.0


 27%|██▋       | 134402/500000 [5:15:45<15:43:19,  6.46it/s]

Episode:134400 | Reward:-100.0


 27%|██▋       | 134602/500000 [5:16:11<13:14:30,  7.67it/s]

Episode:134600 | Reward:-100.0


 27%|██▋       | 134802/500000 [5:16:39<15:48:11,  6.42it/s]

Episode:134800 | Reward:100.0


 27%|██▋       | 135002/500000 [5:17:07<13:19:19,  7.61it/s]

Episode:135000 | Reward:100.0


 27%|██▋       | 135202/500000 [5:17:33<13:08:37,  7.71it/s]

Episode:135200 | Reward:-100.0


 27%|██▋       | 135402/500000 [5:18:00<15:39:29,  6.47it/s]

Episode:135400 | Reward:100.0


 27%|██▋       | 135602/500000 [5:18:29<15:51:49,  6.38it/s]

Episode:135600 | Reward:-100.0


 27%|██▋       | 135802/500000 [5:18:57<15:46:01,  6.42it/s]

Episode:135800 | Reward:100.0


 27%|██▋       | 136002/500000 [5:19:22<13:27:31,  7.51it/s]

Episode:136000 | Reward:100.0


 27%|██▋       | 136202/500000 [5:19:50<15:33:47,  6.49it/s]

Episode:136200 | Reward:-100.0


 27%|██▋       | 136402/500000 [5:20:18<15:35:21,  6.48it/s]

Episode:136400 | Reward:100.0


 27%|██▋       | 136602/500000 [5:20:47<15:12:36,  6.64it/s]

Episode:136600 | Reward:100.0


 27%|██▋       | 136802/500000 [5:21:16<15:48:35,  6.38it/s]

Episode:136800 | Reward:-100.0


 27%|██▋       | 137002/500000 [5:21:44<14:46:24,  6.83it/s]

Episode:137000 | Reward:-100.0


 27%|██▋       | 137202/500000 [5:22:12<15:36:23,  6.46it/s]

Episode:137200 | Reward:100.0


 27%|██▋       | 137402/500000 [5:22:41<15:38:50,  6.44it/s]

Episode:137400 | Reward:-100.0


 28%|██▊       | 137602/500000 [5:23:10<15:39:54,  6.43it/s]

Episode:137600 | Reward:-100.0


 28%|██▊       | 137802/500000 [5:23:38<15:32:36,  6.47it/s]

Episode:137800 | Reward:100.0


 28%|██▊       | 138002/500000 [5:24:06<15:59:30,  6.29it/s]

Episode:138000 | Reward:0.0


 28%|██▊       | 138202/500000 [5:24:35<15:44:01,  6.39it/s]

Episode:138200 | Reward:100.0


 28%|██▊       | 138402/500000 [5:25:02<15:34:16,  6.45it/s]

Episode:138400 | Reward:-100.0


 28%|██▊       | 138602/500000 [5:25:30<15:34:21,  6.45it/s]

Episode:138600 | Reward:100.0


 28%|██▊       | 138802/500000 [5:25:59<15:26:08,  6.50it/s]

Episode:138800 | Reward:100.0


 28%|██▊       | 139002/500000 [5:26:26<16:00:02,  6.27it/s]

Episode:139000 | Reward:100.0


 28%|██▊       | 139202/500000 [5:26:56<15:28:06,  6.48it/s]

Episode:139200 | Reward:100.0


 28%|██▊       | 139402/500000 [5:27:24<15:44:10,  6.37it/s]

Episode:139400 | Reward:100.0


 28%|██▊       | 139602/500000 [5:27:53<15:32:20,  6.44it/s]

Episode:139600 | Reward:0.0


 28%|██▊       | 139802/500000 [5:28:21<16:04:26,  6.22it/s]

Episode:139800 | Reward:100.0


 28%|██▊       | 140002/500000 [5:28:50<15:21:19,  6.51it/s]

Episode:140000 | Reward:100.0


 28%|██▊       | 140202/500000 [5:29:19<15:12:32,  6.57it/s]

Episode:140200 | Reward:100.0


 28%|██▊       | 140402/500000 [5:29:47<16:00:23,  6.24it/s]

Episode:140400 | Reward:100.0


 28%|██▊       | 140602/500000 [5:30:16<15:12:12,  6.57it/s]

Episode:140600 | Reward:100.0


 28%|██▊       | 140802/500000 [5:30:45<15:18:10,  6.52it/s]

Episode:140800 | Reward:100.0


 28%|██▊       | 141002/500000 [5:31:14<15:30:49,  6.43it/s]

Episode:141000 | Reward:-100.0


 28%|██▊       | 141202/500000 [5:31:42<15:30:57,  6.42it/s]

Episode:141200 | Reward:-100.0


 28%|██▊       | 141402/500000 [5:32:11<15:55:14,  6.26it/s]

Episode:141400 | Reward:-100.0


 28%|██▊       | 141602/500000 [5:32:40<22:39:18,  4.39it/s]

Episode:141600 | Reward:-100.0


 28%|██▊       | 141802/500000 [5:33:09<15:31:54,  6.41it/s]

Episode:141800 | Reward:100.0


 28%|██▊       | 142002/500000 [5:33:37<15:12:50,  6.54it/s]

Episode:142000 | Reward:-100.0


 28%|██▊       | 142202/500000 [5:34:05<15:12:46,  6.53it/s]

Episode:142200 | Reward:100.0


 28%|██▊       | 142402/500000 [5:34:33<14:26:30,  6.88it/s]

Episode:142400 | Reward:100.0


 29%|██▊       | 142602/500000 [5:35:02<15:11:06,  6.54it/s]

Episode:142600 | Reward:100.0


 29%|██▊       | 142802/500000 [5:35:30<15:18:31,  6.48it/s]

Episode:142800 | Reward:-100.0


 29%|██▊       | 143002/500000 [5:35:58<15:15:10,  6.50it/s]

Episode:143000 | Reward:100.0


 29%|██▊       | 143202/500000 [5:36:27<15:28:55,  6.40it/s]

Episode:143200 | Reward:100.0


 29%|██▊       | 143402/500000 [5:36:55<15:23:45,  6.43it/s]

Episode:143400 | Reward:-100.0


 29%|██▊       | 143602/500000 [5:37:24<15:56:20,  6.21it/s]

Episode:143600 | Reward:-100.0


 29%|██▉       | 143802/500000 [5:37:52<14:33:44,  6.79it/s]

Episode:143800 | Reward:-100.0


 29%|██▉       | 144002/500000 [5:38:21<15:23:40,  6.42it/s]

Episode:144000 | Reward:-100.0


 29%|██▉       | 144202/500000 [5:38:50<15:22:27,  6.43it/s]

Episode:144200 | Reward:-100.0


 29%|██▉       | 144402/500000 [5:39:18<15:15:45,  6.47it/s]

Episode:144400 | Reward:-100.0


 29%|██▉       | 144602/500000 [5:39:46<15:42:10,  6.29it/s]

Episode:144600 | Reward:100.0


 29%|██▉       | 144802/500000 [5:40:15<15:19:14,  6.44it/s]

Episode:144800 | Reward:100.0


 29%|██▉       | 145002/500000 [5:40:43<15:26:55,  6.38it/s]

Episode:145000 | Reward:-100.0


 29%|██▉       | 145202/500000 [5:41:13<15:43:56,  6.26it/s]

Episode:145200 | Reward:-100.0


 29%|██▉       | 145402/500000 [5:41:40<15:21:45,  6.41it/s]

Episode:145400 | Reward:-100.0


 29%|██▉       | 145602/500000 [5:42:09<15:41:12,  6.28it/s]

Episode:145600 | Reward:0.0


 29%|██▉       | 145802/500000 [5:42:37<15:02:09,  6.54it/s]

Episode:145800 | Reward:100.0


 29%|██▉       | 146002/500000 [5:43:07<15:44:29,  6.25it/s]

Episode:146000 | Reward:-100.0


 29%|██▉       | 146202/500000 [5:43:34<13:03:03,  7.53it/s]

Episode:146200 | Reward:-100.0


 29%|██▉       | 146402/500000 [5:44:02<14:56:29,  6.57it/s]

Episode:146400 | Reward:-100.0


 29%|██▉       | 146602/500000 [5:44:29<15:10:25,  6.47it/s]

Episode:146600 | Reward:100.0


 29%|██▉       | 146802/500000 [5:44:57<15:05:55,  6.50it/s]

Episode:146800 | Reward:100.0


 29%|██▉       | 147002/500000 [5:45:25<14:51:06,  6.60it/s]

Episode:147000 | Reward:100.0


 29%|██▉       | 147202/500000 [5:45:53<14:46:39,  6.63it/s]

Episode:147200 | Reward:-100.0


 29%|██▉       | 147402/500000 [5:46:21<14:45:02,  6.64it/s]

Episode:147400 | Reward:100.0


 30%|██▉       | 147602/500000 [5:46:50<14:47:56,  6.61it/s]

Episode:147600 | Reward:-100.0


 30%|██▉       | 147802/500000 [5:47:19<15:05:10,  6.48it/s]

Episode:147800 | Reward:-100.0


 30%|██▉       | 148002/500000 [5:47:48<15:08:36,  6.46it/s]

Episode:148000 | Reward:100.0


 30%|██▉       | 148202/500000 [5:48:15<13:22:55,  7.30it/s]

Episode:148200 | Reward:-100.0


 30%|██▉       | 148402/500000 [5:48:41<14:53:48,  6.56it/s]

Episode:148400 | Reward:-100.0


 30%|██▉       | 148602/500000 [5:49:10<15:24:14,  6.34it/s]

Episode:148600 | Reward:100.0


 30%|██▉       | 148802/500000 [5:49:39<15:25:49,  6.32it/s]

Episode:148800 | Reward:-100.0


 30%|██▉       | 149002/500000 [5:50:07<15:01:44,  6.49it/s]

Episode:149000 | Reward:100.0


 30%|██▉       | 149202/500000 [5:50:35<15:43:00,  6.20it/s]

Episode:149200 | Reward:100.0


 30%|██▉       | 149402/500000 [5:51:04<15:03:42,  6.47it/s]

Episode:149400 | Reward:100.0


 30%|██▉       | 149602/500000 [5:51:33<14:57:25,  6.51it/s]

Episode:149600 | Reward:-100.0


 30%|██▉       | 149802/500000 [5:51:59<12:36:33,  7.71it/s]

Episode:149800 | Reward:100.0


 30%|███       | 150002/500000 [5:52:26<14:54:14,  6.52it/s]

Episode:150000 | Reward:-100.0


 30%|███       | 150202/500000 [5:52:55<14:53:33,  6.52it/s]

Episode:150200 | Reward:100.0


 30%|███       | 150402/500000 [5:53:24<15:26:03,  6.29it/s]

Episode:150400 | Reward:100.0


 30%|███       | 150602/500000 [5:53:53<14:21:45,  6.76it/s]

Episode:150600 | Reward:-100.0


 30%|███       | 150802/500000 [5:54:21<15:03:14,  6.44it/s]

Episode:150800 | Reward:-100.0


 30%|███       | 151002/500000 [5:54:49<15:15:18,  6.35it/s]

Episode:151000 | Reward:-100.0


 30%|███       | 151202/500000 [5:55:19<21:20:12,  4.54it/s]

Episode:151200 | Reward:100.0


 30%|███       | 151402/500000 [5:55:48<14:55:39,  6.49it/s]

Episode:151400 | Reward:100.0


 30%|███       | 151602/500000 [5:56:13<15:32:11,  6.23it/s]

Episode:151600 | Reward:-100.0


 30%|███       | 151802/500000 [5:56:42<15:17:06,  6.33it/s]

Episode:151800 | Reward:100.0


 30%|███       | 152002/500000 [5:57:11<14:25:49,  6.70it/s]

Episode:152000 | Reward:-100.0


 30%|███       | 152202/500000 [5:57:39<14:50:04,  6.51it/s]

Episode:152200 | Reward:100.0


 30%|███       | 152402/500000 [5:58:08<14:54:45,  6.47it/s]

Episode:152400 | Reward:100.0


 31%|███       | 152602/500000 [5:58:34<15:03:19,  6.41it/s]

Episode:152600 | Reward:-100.0


 31%|███       | 152801/500000 [5:59:03<26:42:34,  3.61it/s]

Episode:152800 | Reward:-100.0


 31%|███       | 153002/500000 [5:59:32<16:17:51,  5.91it/s]

Episode:153000 | Reward:100.0


 31%|███       | 153202/500000 [5:59:59<14:51:22,  6.48it/s]

Episode:153200 | Reward:100.0


 31%|███       | 153402/500000 [6:00:28<15:01:16,  6.41it/s]

Episode:153400 | Reward:100.0


 31%|███       | 153602/500000 [6:00:56<14:43:01,  6.54it/s]

Episode:153600 | Reward:100.0


 31%|███       | 153802/500000 [6:01:24<14:14:10,  6.75it/s]

Episode:153800 | Reward:-100.0


 31%|███       | 154002/500000 [6:01:52<14:32:43,  6.61it/s]

Episode:154000 | Reward:100.0


 31%|███       | 154202/500000 [6:02:21<14:48:42,  6.49it/s]

Episode:154200 | Reward:100.0


 31%|███       | 154402/500000 [6:02:49<14:34:39,  6.59it/s]

Episode:154400 | Reward:100.0


 31%|███       | 154602/500000 [6:03:19<15:14:22,  6.30it/s]

Episode:154600 | Reward:-100.0


 31%|███       | 154802/500000 [6:03:47<14:43:26,  6.51it/s]

Episode:154800 | Reward:100.0


 31%|███       | 155002/500000 [6:04:14<12:35:53,  7.61it/s]

Episode:155000 | Reward:-100.0


 31%|███       | 155202/500000 [6:04:40<14:24:19,  6.65it/s]

Episode:155200 | Reward:-100.0


 31%|███       | 155402/500000 [6:05:09<28:02:00,  3.41it/s]

Episode:155400 | Reward:100.0


 31%|███       | 155602/500000 [6:05:37<14:21:48,  6.66it/s]

Episode:155600 | Reward:100.0


 31%|███       | 155802/500000 [6:06:06<14:27:24,  6.61it/s]

Episode:155800 | Reward:-100.0


 31%|███       | 156002/500000 [6:06:34<14:24:19,  6.63it/s]

Episode:156000 | Reward:100.0


 31%|███       | 156202/500000 [6:07:03<15:00:36,  6.36it/s]

Episode:156200 | Reward:100.0


 31%|███▏      | 156402/500000 [6:07:32<14:47:34,  6.45it/s]

Episode:156400 | Reward:-100.0


 31%|███▏      | 156602/500000 [6:08:01<14:45:06,  6.47it/s]

Episode:156600 | Reward:-100.0


 31%|███▏      | 156802/500000 [6:08:28<15:03:16,  6.33it/s]

Episode:156800 | Reward:0.0


 31%|███▏      | 157002/500000 [6:08:56<14:24:45,  6.61it/s]

Episode:157000 | Reward:-100.0


 31%|███▏      | 157202/500000 [6:09:25<12:42:04,  7.50it/s]

Episode:157200 | Reward:100.0


 31%|███▏      | 157402/500000 [6:09:53<15:18:03,  6.22it/s]

Episode:157400 | Reward:-100.0


 32%|███▏      | 157602/500000 [6:10:22<14:48:46,  6.42it/s]

Episode:157600 | Reward:-100.0


 32%|███▏      | 157802/500000 [6:10:50<14:31:27,  6.54it/s]

Episode:157800 | Reward:100.0


 32%|███▏      | 158002/500000 [6:11:19<15:21:40,  6.18it/s]

Episode:158000 | Reward:100.0


 32%|███▏      | 158202/500000 [6:11:48<15:02:14,  6.31it/s]

Episode:158200 | Reward:100.0


 32%|███▏      | 158402/500000 [6:12:16<14:24:22,  6.59it/s]

Episode:158400 | Reward:100.0


 32%|███▏      | 158602/500000 [6:12:44<14:30:41,  6.54it/s]

Episode:158600 | Reward:-100.0


 32%|███▏      | 158802/500000 [6:13:13<14:32:07,  6.52it/s]

Episode:158800 | Reward:100.0


 32%|███▏      | 159002/500000 [6:13:43<14:46:27,  6.41it/s]

Episode:159000 | Reward:100.0


 32%|███▏      | 159202/500000 [6:14:11<14:47:54,  6.40it/s]

Episode:159200 | Reward:100.0


 32%|███▏      | 159402/500000 [6:14:39<14:42:28,  6.43it/s]

Episode:159400 | Reward:100.0


 32%|███▏      | 159602/500000 [6:15:08<14:33:29,  6.49it/s]

Episode:159600 | Reward:100.0


 32%|███▏      | 159802/500000 [6:15:37<14:48:40,  6.38it/s]

Episode:159800 | Reward:100.0


 32%|███▏      | 160002/500000 [6:16:06<14:42:37,  6.42it/s]

Episode:160000 | Reward:-100.0


 32%|███▏      | 160202/500000 [6:16:35<14:33:49,  6.48it/s]

Episode:160200 | Reward:-100.0


 32%|███▏      | 160402/500000 [6:17:03<14:44:52,  6.40it/s]

Episode:160400 | Reward:-100.0


 32%|███▏      | 160602/500000 [6:17:33<14:47:54,  6.37it/s]

Episode:160600 | Reward:-100.0


 32%|███▏      | 160802/500000 [6:18:01<14:43:10,  6.40it/s]

Episode:160800 | Reward:100.0


 32%|███▏      | 161002/500000 [6:18:30<14:34:08,  6.46it/s]

Episode:161000 | Reward:100.0


 32%|███▏      | 161202/500000 [6:18:58<15:00:24,  6.27it/s]

Episode:161200 | Reward:100.0


 32%|███▏      | 161402/500000 [6:19:28<15:24:19,  6.11it/s]

Episode:161400 | Reward:-100.0


 32%|███▏      | 161602/500000 [6:19:56<14:30:37,  6.48it/s]

Episode:161600 | Reward:100.0


 32%|███▏      | 161802/500000 [6:20:24<14:32:17,  6.46it/s]

Episode:161800 | Reward:-100.0


 32%|███▏      | 162002/500000 [6:20:51<12:30:49,  7.50it/s]

Episode:162000 | Reward:100.0


 32%|███▏      | 162202/500000 [6:21:17<14:41:41,  6.39it/s]

Episode:162200 | Reward:100.0


 32%|███▏      | 162402/500000 [6:21:46<14:43:47,  6.37it/s]

Episode:162400 | Reward:-100.0


 33%|███▎      | 162602/500000 [6:22:15<14:33:54,  6.43it/s]

Episode:162600 | Reward:-100.0


 33%|███▎      | 162802/500000 [6:22:43<14:34:02,  6.43it/s]

Episode:162800 | Reward:100.0


 33%|███▎      | 163002/500000 [6:23:12<15:47:05,  5.93it/s]

Episode:163000 | Reward:100.0


 33%|███▎      | 163202/500000 [6:23:40<12:15:06,  7.64it/s]

Episode:163200 | Reward:100.0


 33%|███▎      | 163402/500000 [6:24:07<14:04:28,  6.64it/s]

Episode:163400 | Reward:-100.0


 33%|███▎      | 163602/500000 [6:24:35<14:27:27,  6.46it/s]

Episode:163600 | Reward:100.0


 33%|███▎      | 163802/500000 [6:25:04<13:37:39,  6.85it/s]

Episode:163800 | Reward:-100.0


 33%|███▎      | 164002/500000 [6:25:31<14:13:42,  6.56it/s]

Episode:164000 | Reward:-100.0


 33%|███▎      | 164202/500000 [6:25:58<14:08:31,  6.60it/s]

Episode:164200 | Reward:100.0


 33%|███▎      | 164402/500000 [6:26:27<14:58:43,  6.22it/s]

Episode:164400 | Reward:0.0


 33%|███▎      | 164602/500000 [6:26:54<14:55:39,  6.24it/s]

Episode:164600 | Reward:100.0


 33%|███▎      | 164802/500000 [6:27:22<14:08:59,  6.58it/s]

Episode:164800 | Reward:-100.0


 33%|███▎      | 165002/500000 [6:27:51<14:21:20,  6.48it/s]

Episode:165000 | Reward:-100.0


 33%|███▎      | 165202/500000 [6:28:18<14:07:59,  6.58it/s]

Episode:165200 | Reward:100.0


 33%|███▎      | 165402/500000 [6:28:47<14:34:09,  6.38it/s]

Episode:165400 | Reward:100.0


 33%|███▎      | 165602/500000 [6:29:15<13:15:36,  7.01it/s]

Episode:165600 | Reward:-100.0


 33%|███▎      | 165802/500000 [6:29:43<14:49:36,  6.26it/s]

Episode:165800 | Reward:100.0


 33%|███▎      | 166002/500000 [6:30:11<13:57:25,  6.65it/s]

Episode:166000 | Reward:100.0


 33%|███▎      | 166202/500000 [6:30:40<14:05:18,  6.58it/s]

Episode:166200 | Reward:100.0


 33%|███▎      | 166402/500000 [6:31:08<14:26:51,  6.41it/s]

Episode:166400 | Reward:100.0


 33%|███▎      | 166602/500000 [6:31:38<14:35:14,  6.35it/s]

Episode:166600 | Reward:100.0


 33%|███▎      | 166802/500000 [6:32:06<14:19:42,  6.46it/s]

Episode:166800 | Reward:-100.0


 33%|███▎      | 167002/500000 [6:32:35<14:30:53,  6.37it/s]

Episode:167000 | Reward:-100.0


 33%|███▎      | 167202/500000 [6:33:02<12:33:50,  7.36it/s]

Episode:167200 | Reward:-100.0


 33%|███▎      | 167402/500000 [6:33:30<14:23:54,  6.42it/s]

Episode:167400 | Reward:-100.0


 34%|███▎      | 167602/500000 [6:33:58<14:39:18,  6.30it/s]

Episode:167600 | Reward:100.0


 34%|███▎      | 167802/500000 [6:34:27<14:27:21,  6.38it/s]

Episode:167800 | Reward:100.0


 34%|███▎      | 168002/500000 [6:34:55<14:17:05,  6.46it/s]

Episode:168000 | Reward:100.0


 34%|███▎      | 168202/500000 [6:35:23<14:37:02,  6.31it/s]

Episode:168200 | Reward:100.0


 34%|███▎      | 168402/500000 [6:35:53<14:14:01,  6.47it/s]

Episode:168400 | Reward:100.0


 34%|███▎      | 168602/500000 [6:36:21<14:14:42,  6.46it/s]

Episode:168600 | Reward:-100.0


 34%|███▍      | 168802/500000 [6:36:50<13:47:41,  6.67it/s]

Episode:168800 | Reward:-100.0


 34%|███▍      | 169002/500000 [6:37:18<14:30:41,  6.34it/s]

Episode:169000 | Reward:-100.0


 34%|███▍      | 169202/500000 [6:37:47<14:12:10,  6.47it/s]

Episode:169200 | Reward:-100.0


 34%|███▍      | 169402/500000 [6:38:16<14:07:15,  6.50it/s]

Episode:169400 | Reward:100.0


 34%|███▍      | 169602/500000 [6:38:44<14:17:00,  6.43it/s]

Episode:169600 | Reward:-100.0


 34%|███▍      | 169802/500000 [6:39:12<14:36:54,  6.28it/s]

Episode:169800 | Reward:-100.0


 34%|███▍      | 170002/500000 [6:39:42<15:03:52,  6.08it/s]

Episode:170000 | Reward:-100.0


 34%|███▍      | 170202/500000 [6:40:10<13:48:11,  6.64it/s]

Episode:170200 | Reward:100.0


 34%|███▍      | 170402/500000 [6:40:39<14:12:56,  6.44it/s]

Episode:170400 | Reward:-100.0


 34%|███▍      | 170602/500000 [6:41:08<14:20:55,  6.38it/s]

Episode:170600 | Reward:100.0


 34%|███▍      | 170802/500000 [6:41:36<14:05:52,  6.49it/s]

Episode:170800 | Reward:100.0


 34%|███▍      | 171002/500000 [6:42:05<14:03:08,  6.50it/s]

Episode:171000 | Reward:-100.0


 34%|███▍      | 171202/500000 [6:42:34<14:21:02,  6.36it/s]

Episode:171200 | Reward:100.0


 34%|███▍      | 171402/500000 [6:43:01<13:48:36,  6.61it/s]

Episode:171400 | Reward:100.0


 34%|███▍      | 171602/500000 [6:43:29<14:49:20,  6.15it/s]

Episode:171600 | Reward:-100.0


 34%|███▍      | 171802/500000 [6:43:57<13:55:57,  6.54it/s]

Episode:171800 | Reward:-100.0


 34%|███▍      | 172002/500000 [6:44:25<13:50:57,  6.58it/s]

Episode:172000 | Reward:100.0


 34%|███▍      | 172202/500000 [6:44:54<14:03:40,  6.48it/s]

Episode:172200 | Reward:100.0


 34%|███▍      | 172402/500000 [6:45:22<14:24:33,  6.32it/s]

Episode:172400 | Reward:100.0


 35%|███▍      | 172602/500000 [6:45:52<13:28:46,  6.75it/s]

Episode:172600 | Reward:100.0


 35%|███▍      | 172802/500000 [6:46:20<14:19:37,  6.34it/s]

Episode:172800 | Reward:100.0


 35%|███▍      | 173002/500000 [6:46:47<14:24:25,  6.30it/s]

Episode:173000 | Reward:-100.0


 35%|███▍      | 173202/500000 [6:47:16<14:12:28,  6.39it/s]

Episode:173200 | Reward:-100.0


 35%|███▍      | 173402/500000 [6:47:44<13:57:37,  6.50it/s]

Episode:173400 | Reward:100.0


 35%|███▍      | 173602/500000 [6:48:14<14:02:24,  6.46it/s]

Episode:173600 | Reward:-100.0


 35%|███▍      | 173802/500000 [6:48:42<14:33:01,  6.23it/s]

Episode:173800 | Reward:-100.0


 35%|███▍      | 174002/500000 [6:49:11<12:25:06,  7.29it/s]

Episode:174000 | Reward:-100.0


 35%|███▍      | 174202/500000 [6:49:39<13:58:06,  6.48it/s]

Episode:174200 | Reward:100.0


 35%|███▍      | 174402/500000 [6:50:08<14:00:09,  6.46it/s]

Episode:174400 | Reward:-100.0


 35%|███▍      | 174602/500000 [6:50:37<14:18:47,  6.32it/s]

Episode:174600 | Reward:-100.0


 35%|███▍      | 174802/500000 [6:51:05<14:16:02,  6.33it/s]

Episode:174800 | Reward:100.0


 35%|███▌      | 175002/500000 [6:51:33<14:38:35,  6.17it/s]

Episode:175000 | Reward:100.0


 35%|███▌      | 175202/500000 [6:52:03<13:18:45,  6.78it/s]

Episode:175200 | Reward:-100.0


 35%|███▌      | 175402/500000 [6:52:31<14:14:35,  6.33it/s]

Episode:175400 | Reward:100.0


 35%|███▌      | 175602/500000 [6:53:00<13:52:25,  6.50it/s]

Episode:175600 | Reward:-100.0


 35%|███▌      | 175802/500000 [6:53:28<13:50:18,  6.51it/s]

Episode:175800 | Reward:100.0


 35%|███▌      | 176002/500000 [6:53:58<14:24:31,  6.25it/s]

Episode:176000 | Reward:100.0


 35%|███▌      | 176202/500000 [6:54:26<14:10:24,  6.35it/s]

Episode:176200 | Reward:100.0


 35%|███▌      | 176402/500000 [6:54:55<13:45:46,  6.53it/s]

Episode:176400 | Reward:-100.0


 35%|███▌      | 176602/500000 [6:55:23<13:49:01,  6.50it/s]

Episode:176600 | Reward:100.0


 35%|███▌      | 176802/500000 [6:55:51<13:44:48,  6.53it/s]

Episode:176800 | Reward:-100.0


 35%|███▌      | 177002/500000 [6:56:20<13:44:13,  6.53it/s]

Episode:177000 | Reward:100.0


 35%|███▌      | 177202/500000 [6:56:49<14:29:31,  6.19it/s]

Episode:177200 | Reward:100.0


 35%|███▌      | 177402/500000 [6:57:17<13:58:41,  6.41it/s]

Episode:177400 | Reward:-100.0


 36%|███▌      | 177602/500000 [6:57:45<14:09:21,  6.33it/s]

Episode:177600 | Reward:-100.0


 36%|███▌      | 177802/500000 [6:58:14<13:54:56,  6.43it/s]

Episode:177800 | Reward:100.0


 36%|███▌      | 178002/500000 [6:58:43<14:01:40,  6.38it/s]

Episode:178000 | Reward:-100.0


 36%|███▌      | 178202/500000 [6:59:11<14:27:58,  6.18it/s]

Episode:178200 | Reward:-100.0


 36%|███▌      | 178402/500000 [6:59:40<13:47:46,  6.48it/s]

Episode:178400 | Reward:100.0


 36%|███▌      | 178602/500000 [7:00:09<14:55:20,  5.98it/s]

Episode:178600 | Reward:100.0


 36%|███▌      | 178802/500000 [7:00:37<13:46:14,  6.48it/s]

Episode:178800 | Reward:100.0


 36%|███▌      | 179002/500000 [7:01:05<14:22:38,  6.20it/s]

Episode:179000 | Reward:100.0


 36%|███▌      | 179202/500000 [7:01:34<14:49:02,  6.01it/s]

Episode:179200 | Reward:100.0


 36%|███▌      | 179402/500000 [7:01:58<12:04:56,  7.37it/s]

Episode:179400 | Reward:100.0


 36%|███▌      | 179602/500000 [7:02:23<11:42:32,  7.60it/s]

Episode:179600 | Reward:100.0


 36%|███▌      | 179802/500000 [7:02:51<13:59:42,  6.36it/s]

Episode:179800 | Reward:-100.0


 36%|███▌      | 180002/500000 [7:03:20<13:46:00,  6.46it/s]

Episode:180000 | Reward:100.0


 36%|███▌      | 180202/500000 [7:03:48<13:55:12,  6.38it/s]

Episode:180200 | Reward:-100.0


 36%|███▌      | 180402/500000 [7:04:18<13:38:30,  6.51it/s]

Episode:180400 | Reward:100.0


 36%|███▌      | 180602/500000 [7:04:46<13:29:53,  6.57it/s]

Episode:180600 | Reward:100.0


 36%|███▌      | 180802/500000 [7:05:13<11:40:12,  7.60it/s]

Episode:180800 | Reward:-100.0


 36%|███▌      | 181002/500000 [7:05:41<12:58:51,  6.83it/s]

Episode:181000 | Reward:-100.0


 36%|███▌      | 181202/500000 [7:06:10<14:36:45,  6.06it/s]

Episode:181200 | Reward:-100.0


 36%|███▋      | 181402/500000 [7:06:39<13:36:10,  6.51it/s]

Episode:181400 | Reward:-100.0


 36%|███▋      | 181602/500000 [7:07:07<13:47:10,  6.42it/s]

Episode:181600 | Reward:-100.0


 36%|███▋      | 181802/500000 [7:07:36<13:27:42,  6.57it/s]

Episode:181800 | Reward:-100.0


 36%|███▋      | 182002/500000 [7:08:04<13:14:14,  6.67it/s]

Episode:182000 | Reward:100.0


 36%|███▋      | 182202/500000 [7:08:34<13:32:37,  6.52it/s]

Episode:182200 | Reward:-100.0


 36%|███▋      | 182402/500000 [7:09:01<12:33:27,  7.03it/s]

Episode:182400 | Reward:100.0


 37%|███▋      | 182602/500000 [7:09:29<13:07:10,  6.72it/s]

Episode:182600 | Reward:-100.0


 37%|███▋      | 182802/500000 [7:09:56<11:22:44,  7.74it/s]

Episode:182800 | Reward:100.0


 37%|███▋      | 183002/500000 [7:10:25<13:54:48,  6.33it/s]

Episode:183000 | Reward:-100.0


 37%|███▋      | 183202/500000 [7:10:53<13:47:25,  6.38it/s]

Episode:183200 | Reward:100.0


 37%|███▋      | 183402/500000 [7:11:21<13:18:39,  6.61it/s]

Episode:183400 | Reward:-100.0


 37%|███▋      | 183602/500000 [7:11:50<13:44:36,  6.39it/s]

Episode:183600 | Reward:100.0


 37%|███▋      | 183802/500000 [7:12:19<13:31:21,  6.50it/s]

Episode:183800 | Reward:-100.0


 37%|███▋      | 184002/500000 [7:12:48<13:19:54,  6.58it/s]

Episode:184000 | Reward:-100.0


 37%|███▋      | 184202/500000 [7:13:15<11:35:44,  7.57it/s]

Episode:184200 | Reward:100.0


 37%|███▋      | 184402/500000 [7:13:43<14:02:43,  6.24it/s]

Episode:184400 | Reward:100.0


 37%|███▋      | 184602/500000 [7:14:12<13:44:51,  6.37it/s]

Episode:184600 | Reward:-100.0


 37%|███▋      | 184802/500000 [7:14:41<13:40:38,  6.40it/s]

Episode:184800 | Reward:-100.0


 37%|███▋      | 185002/500000 [7:15:09<11:27:31,  7.64it/s]

Episode:185000 | Reward:100.0


 37%|███▋      | 185202/500000 [7:15:36<13:43:43,  6.37it/s]

Episode:185200 | Reward:-100.0


 37%|███▋      | 185402/500000 [7:16:05<14:24:01,  6.07it/s]

Episode:185400 | Reward:-100.0


 37%|███▋      | 185602/500000 [7:16:34<13:26:43,  6.50it/s]

Episode:185600 | Reward:-100.0


 37%|███▋      | 185802/500000 [7:17:02<13:19:32,  6.55it/s]

Episode:185800 | Reward:100.0


 37%|███▋      | 186002/500000 [7:17:31<13:34:54,  6.42it/s]

Episode:186000 | Reward:100.0


 37%|███▋      | 186202/500000 [7:17:59<13:08:56,  6.63it/s]

Episode:186200 | Reward:100.0


 37%|███▋      | 186402/500000 [7:18:27<13:55:49,  6.25it/s]

Episode:186400 | Reward:100.0


 37%|███▋      | 186602/500000 [7:18:55<13:13:23,  6.58it/s]

Episode:186600 | Reward:-100.0


 37%|███▋      | 186802/500000 [7:19:23<13:39:37,  6.37it/s]

Episode:186800 | Reward:-100.0


 37%|███▋      | 187002/500000 [7:19:52<13:58:02,  6.22it/s]

Episode:187000 | Reward:-100.0


 37%|███▋      | 187202/500000 [7:20:20<13:12:46,  6.58it/s]

Episode:187200 | Reward:100.0


 37%|███▋      | 187402/500000 [7:20:50<13:41:13,  6.34it/s]

Episode:187400 | Reward:-100.0


 38%|███▊      | 187602/500000 [7:21:18<13:38:59,  6.36it/s]

Episode:187600 | Reward:-100.0


 38%|███▊      | 187802/500000 [7:21:46<13:46:40,  6.29it/s]

Episode:187800 | Reward:100.0


 38%|███▊      | 188002/500000 [7:22:14<13:41:12,  6.33it/s]

Episode:188000 | Reward:100.0


 38%|███▊      | 188202/500000 [7:22:44<13:50:15,  6.26it/s]

Episode:188200 | Reward:100.0


 38%|███▊      | 188402/500000 [7:23:12<13:26:15,  6.44it/s]

Episode:188400 | Reward:100.0


 38%|███▊      | 188602/500000 [7:23:41<11:58:45,  7.22it/s]

Episode:188600 | Reward:-100.0


 38%|███▊      | 188802/500000 [7:24:09<13:30:25,  6.40it/s]

Episode:188800 | Reward:100.0


 38%|███▊      | 189002/500000 [7:24:39<13:04:09,  6.61it/s]

Episode:189000 | Reward:100.0


 38%|███▊      | 189202/500000 [7:25:07<13:36:21,  6.35it/s]

Episode:189200 | Reward:100.0


 38%|███▊      | 189402/500000 [7:25:36<13:39:18,  6.32it/s]

Episode:189400 | Reward:100.0


 38%|███▊      | 189602/500000 [7:26:04<13:32:16,  6.37it/s]

Episode:189600 | Reward:-100.0


 38%|███▊      | 189802/500000 [7:26:34<11:25:19,  7.54it/s]

Episode:189800 | Reward:100.0


 38%|███▊      | 190002/500000 [7:27:00<13:08:08,  6.56it/s]

Episode:190000 | Reward:100.0


 38%|███▊      | 190202/500000 [7:27:28<13:00:59,  6.61it/s]

Episode:190200 | Reward:100.0


 38%|███▊      | 190402/500000 [7:27:55<11:11:37,  7.68it/s]

Episode:190400 | Reward:100.0


 38%|███▊      | 190602/500000 [7:28:20<11:13:50,  7.65it/s]

Episode:190600 | Reward:100.0


 38%|███▊      | 190802/500000 [7:28:50<13:07:23,  6.54it/s]

Episode:190800 | Reward:100.0


 38%|███▊      | 191002/500000 [7:29:18<13:50:25,  6.20it/s]

Episode:191000 | Reward:0.0


 38%|███▊      | 191202/500000 [7:29:46<13:23:20,  6.41it/s]

Episode:191200 | Reward:100.0


 38%|███▊      | 191402/500000 [7:30:14<13:58:52,  6.13it/s]

Episode:191400 | Reward:-100.0


 38%|███▊      | 191602/500000 [7:30:44<13:06:51,  6.53it/s]

Episode:191600 | Reward:100.0


 38%|███▊      | 191802/500000 [7:31:12<13:03:29,  6.56it/s]

Episode:191800 | Reward:100.0


 38%|███▊      | 192002/500000 [7:31:40<13:20:25,  6.41it/s]

Episode:192000 | Reward:0.0


 38%|███▊      | 192202/500000 [7:32:08<13:05:39,  6.53it/s]

Episode:192200 | Reward:-100.0


 38%|███▊      | 192402/500000 [7:32:34<13:45:16,  6.21it/s]

Episode:192400 | Reward:-100.0


 39%|███▊      | 192602/500000 [7:33:04<13:22:36,  6.38it/s]

Episode:192600 | Reward:-100.0


 39%|███▊      | 192802/500000 [7:33:32<13:10:51,  6.47it/s]

Episode:192800 | Reward:-100.0


 39%|███▊      | 193002/500000 [7:34:00<13:29:13,  6.32it/s]

Episode:193000 | Reward:100.0


 39%|███▊      | 193202/500000 [7:34:28<13:22:48,  6.37it/s]

Episode:193200 | Reward:100.0


 39%|███▊      | 193402/500000 [7:34:58<13:09:40,  6.47it/s]

Episode:193400 | Reward:100.0


 39%|███▊      | 193602/500000 [7:35:26<13:22:26,  6.36it/s]

Episode:193600 | Reward:-100.0


 39%|███▉      | 193802/500000 [7:35:54<13:15:09,  6.42it/s]

Episode:193800 | Reward:-100.0


 39%|███▉      | 194002/500000 [7:36:23<13:02:22,  6.52it/s]

Episode:194000 | Reward:100.0


 39%|███▉      | 194202/500000 [7:36:53<13:11:28,  6.44it/s]

Episode:194200 | Reward:-100.0


 39%|███▉      | 194402/500000 [7:37:20<13:08:08,  6.46it/s]

Episode:194400 | Reward:100.0


 39%|███▉      | 194602/500000 [7:37:49<12:56:24,  6.56it/s]

Episode:194600 | Reward:-100.0


 39%|███▉      | 194802/500000 [7:38:17<12:53:32,  6.58it/s]

Episode:194800 | Reward:-100.0


 39%|███▉      | 195002/500000 [7:38:47<13:13:45,  6.40it/s]

Episode:195000 | Reward:100.0


 39%|███▉      | 195202/500000 [7:39:14<13:19:29,  6.35it/s]

Episode:195200 | Reward:100.0


 39%|███▉      | 195402/500000 [7:39:42<13:10:17,  6.42it/s]

Episode:195400 | Reward:100.0


 39%|███▉      | 195602/500000 [7:40:10<13:20:59,  6.33it/s]

Episode:195600 | Reward:-100.0


 39%|███▉      | 195802/500000 [7:40:37<13:33:02,  6.24it/s]

Episode:195800 | Reward:100.0


 39%|███▉      | 196002/500000 [7:41:06<12:49:36,  6.58it/s]

Episode:196000 | Reward:-100.0


 39%|███▉      | 196202/500000 [7:41:32<13:12:48,  6.39it/s]

Episode:196200 | Reward:-100.0


 39%|███▉      | 196402/500000 [7:42:01<13:00:52,  6.48it/s]

Episode:196400 | Reward:-100.0


 39%|███▉      | 196602/500000 [7:42:29<13:17:37,  6.34it/s]

Episode:196600 | Reward:100.0


 39%|███▉      | 196802/500000 [7:42:56<13:00:06,  6.48it/s]

Episode:196800 | Reward:-100.0


 39%|███▉      | 197002/500000 [7:43:24<13:12:28,  6.37it/s]

Episode:197000 | Reward:100.0


 39%|███▉      | 197202/500000 [7:43:53<13:25:41,  6.26it/s]

Episode:197200 | Reward:100.0


 39%|███▉      | 197402/500000 [7:44:21<13:04:35,  6.43it/s]

Episode:197400 | Reward:-100.0


 40%|███▉      | 197602/500000 [7:44:51<20:34:05,  4.08it/s]

Episode:197600 | Reward:100.0


 40%|███▉      | 197802/500000 [7:45:20<13:02:57,  6.43it/s]

Episode:197800 | Reward:100.0


 40%|███▉      | 198002/500000 [7:45:48<12:55:45,  6.49it/s]

Episode:198000 | Reward:-100.0


 40%|███▉      | 198202/500000 [7:46:16<13:08:27,  6.38it/s]

Episode:198200 | Reward:-100.0


 40%|███▉      | 198402/500000 [7:46:45<13:05:45,  6.40it/s]

Episode:198400 | Reward:-100.0


 40%|███▉      | 198602/500000 [7:47:15<13:35:42,  6.16it/s]

Episode:198600 | Reward:-100.0


 40%|███▉      | 198802/500000 [7:47:43<12:48:51,  6.53it/s]

Episode:198800 | Reward:-100.0


 40%|███▉      | 199002/500000 [7:48:11<12:53:30,  6.49it/s]

Episode:199000 | Reward:100.0


 40%|███▉      | 199202/500000 [7:48:39<12:49:56,  6.51it/s]

Episode:199200 | Reward:100.0


 40%|███▉      | 199402/500000 [7:49:09<12:42:36,  6.57it/s]

Episode:199400 | Reward:-100.0


 40%|███▉      | 199602/500000 [7:49:37<13:06:07,  6.37it/s]

Episode:199600 | Reward:-100.0


 40%|███▉      | 199802/500000 [7:50:06<12:37:04,  6.61it/s]

Episode:199800 | Reward:-100.0


 40%|████      | 200002/500000 [7:50:34<12:37:56,  6.60it/s]

Episode:200000 | Reward:-100.0


 40%|████      | 200202/500000 [7:51:03<12:59:36,  6.41it/s]

Episode:200200 | Reward:100.0


 40%|████      | 200402/500000 [7:51:32<14:02:56,  5.92it/s]

Episode:200400 | Reward:100.0


 40%|████      | 200602/500000 [7:52:00<12:46:30,  6.51it/s]

Episode:200600 | Reward:100.0


 40%|████      | 200802/500000 [7:52:28<13:30:25,  6.15it/s]

Episode:200800 | Reward:-100.0


 40%|████      | 201002/500000 [7:52:57<12:51:40,  6.46it/s]

Episode:201000 | Reward:100.0


 40%|████      | 201202/500000 [7:53:27<12:42:29,  6.53it/s]

Episode:201200 | Reward:100.0


 40%|████      | 201402/500000 [7:53:55<12:51:18,  6.45it/s]

Episode:201400 | Reward:100.0


 40%|████      | 201602/500000 [7:54:23<13:20:20,  6.21it/s]

Episode:201600 | Reward:100.0


 40%|████      | 201802/500000 [7:54:52<13:24:41,  6.18it/s]

Episode:201800 | Reward:100.0


 40%|████      | 202002/500000 [7:55:22<13:30:54,  6.12it/s]

Episode:202000 | Reward:100.0


 40%|████      | 202202/500000 [7:55:50<12:36:18,  6.56it/s]

Episode:202200 | Reward:100.0


 40%|████      | 202402/500000 [7:56:18<12:40:45,  6.52it/s]

Episode:202400 | Reward:100.0


 41%|████      | 202602/500000 [7:56:47<12:47:58,  6.45it/s]

Episode:202600 | Reward:100.0


 41%|████      | 202802/500000 [7:57:16<12:48:06,  6.45it/s]

Episode:202800 | Reward:-100.0


 41%|████      | 203002/500000 [7:57:44<12:59:57,  6.35it/s]

Episode:203000 | Reward:-100.0


 41%|████      | 203202/500000 [7:58:12<12:43:53,  6.48it/s]

Episode:203200 | Reward:-100.0


 41%|████      | 203402/500000 [7:58:41<12:49:00,  6.43it/s]

Episode:203400 | Reward:-100.0


 41%|████      | 203602/500000 [7:59:09<12:52:06,  6.40it/s]

Episode:203600 | Reward:-100.0


 41%|████      | 203802/500000 [7:59:37<12:49:03,  6.42it/s]

Episode:203800 | Reward:100.0


 41%|████      | 204002/500000 [8:00:05<13:14:24,  6.21it/s]

Episode:204000 | Reward:100.0


 41%|████      | 204202/500000 [8:00:33<12:17:13,  6.69it/s]

Episode:204200 | Reward:-100.0


 41%|████      | 204402/500000 [8:01:02<12:53:55,  6.37it/s]

Episode:204400 | Reward:100.0


 41%|████      | 204602/500000 [8:01:31<12:38:34,  6.49it/s]

Episode:204600 | Reward:100.0


 41%|████      | 204802/500000 [8:02:00<12:48:26,  6.40it/s]

Episode:204800 | Reward:100.0


 41%|████      | 205002/500000 [8:02:28<12:21:12,  6.63it/s]

Episode:205000 | Reward:-100.0


 41%|████      | 205202/500000 [8:02:56<12:00:43,  6.82it/s]

Episode:205200 | Reward:100.0


 41%|████      | 205402/500000 [8:03:25<10:36:31,  7.71it/s]

Episode:205400 | Reward:100.0


 41%|████      | 205602/500000 [8:03:53<13:01:26,  6.28it/s]

Episode:205600 | Reward:100.0


 41%|████      | 205802/500000 [8:04:21<12:35:01,  6.49it/s]

Episode:205800 | Reward:100.0


 41%|████      | 206002/500000 [8:04:50<12:41:09,  6.44it/s]

Episode:206000 | Reward:-100.0


 41%|████      | 206202/500000 [8:05:19<12:44:15,  6.41it/s]

Episode:206200 | Reward:100.0


 41%|████▏     | 206402/500000 [8:05:47<12:30:52,  6.52it/s]

Episode:206400 | Reward:100.0


 41%|████▏     | 206602/500000 [8:06:16<12:23:45,  6.57it/s]

Episode:206600 | Reward:100.0


 41%|████▏     | 206802/500000 [8:06:44<12:34:27,  6.48it/s]

Episode:206800 | Reward:100.0


 41%|████▏     | 207002/500000 [8:07:13<13:05:05,  6.22it/s]

Episode:207000 | Reward:-100.0


 41%|████▏     | 207202/500000 [8:07:42<12:45:46,  6.37it/s]

Episode:207200 | Reward:-100.0


 41%|████▏     | 207402/500000 [8:08:11<12:28:35,  6.51it/s]

Episode:207400 | Reward:100.0


 42%|████▏     | 207602/500000 [8:08:39<12:30:55,  6.49it/s]

Episode:207600 | Reward:100.0


 42%|████▏     | 207802/500000 [8:09:07<12:19:29,  6.59it/s]

Episode:207800 | Reward:100.0


 42%|████▏     | 208002/500000 [8:09:37<12:52:39,  6.30it/s]

Episode:208000 | Reward:-100.0


 42%|████▏     | 208202/500000 [8:10:04<12:26:33,  6.51it/s]

Episode:208200 | Reward:100.0


 42%|████▏     | 208402/500000 [8:10:33<12:37:12,  6.42it/s]

Episode:208400 | Reward:-100.0


 42%|████▏     | 208602/500000 [8:11:01<12:14:41,  6.61it/s]

Episode:208600 | Reward:-100.0


 42%|████▏     | 208802/500000 [8:11:31<12:29:07,  6.48it/s]

Episode:208800 | Reward:0.0


 42%|████▏     | 209002/500000 [8:11:59<12:21:23,  6.54it/s]

Episode:209000 | Reward:100.0


 42%|████▏     | 209202/500000 [8:12:27<12:27:20,  6.49it/s]

Episode:209200 | Reward:-100.0


 42%|████▏     | 209402/500000 [8:12:55<12:23:29,  6.51it/s]

Episode:209400 | Reward:100.0


 42%|████▏     | 209602/500000 [8:13:26<12:39:06,  6.38it/s]

Episode:209600 | Reward:-100.0


 42%|████▏     | 209802/500000 [8:13:54<12:10:25,  6.62it/s]

Episode:209800 | Reward:-100.0


 42%|████▏     | 210002/500000 [8:14:22<12:06:44,  6.65it/s]

Episode:210000 | Reward:100.0


 42%|████▏     | 210202/500000 [8:14:51<12:18:57,  6.54it/s]

Episode:210200 | Reward:100.0


 42%|████▏     | 210402/500000 [8:15:19<13:05:21,  6.15it/s]

Episode:210400 | Reward:100.0


 42%|████▏     | 210602/500000 [8:15:48<10:44:17,  7.49it/s]

Episode:210600 | Reward:100.0


 42%|████▏     | 210802/500000 [8:16:16<12:21:50,  6.50it/s]

Episode:210800 | Reward:-100.0


 42%|████▏     | 211002/500000 [8:16:45<12:41:35,  6.32it/s]

Episode:211000 | Reward:-100.0


 42%|████▏     | 211202/500000 [8:17:13<12:27:36,  6.44it/s]

Episode:211200 | Reward:100.0


 42%|████▏     | 211402/500000 [8:17:42<13:02:24,  6.15it/s]

Episode:211400 | Reward:-100.0


 42%|████▏     | 211602/500000 [8:18:10<12:15:37,  6.53it/s]

Episode:211600 | Reward:100.0


 42%|████▏     | 211802/500000 [8:18:38<12:22:41,  6.47it/s]

Episode:211800 | Reward:-100.0


 42%|████▏     | 212002/500000 [8:19:07<12:13:39,  6.54it/s]

Episode:212000 | Reward:100.0


 42%|████▏     | 212202/500000 [8:19:36<12:08:39,  6.58it/s]

Episode:212200 | Reward:-100.0


 42%|████▏     | 212402/500000 [8:20:05<12:05:41,  6.61it/s]

Episode:212400 | Reward:100.0


 43%|████▎     | 212602/500000 [8:20:33<12:30:43,  6.38it/s]

Episode:212600 | Reward:100.0


 43%|████▎     | 212802/500000 [8:20:58<12:50:19,  6.21it/s]

Episode:212800 | Reward:100.0


 43%|████▎     | 213002/500000 [8:21:27<12:40:34,  6.29it/s]

Episode:213000 | Reward:-100.0


 43%|████▎     | 213202/500000 [8:21:56<12:31:09,  6.36it/s]

Episode:213200 | Reward:-100.0


 43%|████▎     | 213402/500000 [8:22:22<10:37:37,  7.49it/s]

Episode:213400 | Reward:-100.0


 43%|████▎     | 213602/500000 [8:22:47<12:32:51,  6.34it/s]

Episode:213600 | Reward:-100.0


 43%|████▎     | 213802/500000 [8:23:15<12:17:40,  6.47it/s]

Episode:213800 | Reward:-100.0


 43%|████▎     | 214002/500000 [8:23:45<11:53:50,  6.68it/s]

Episode:214000 | Reward:100.0


 43%|████▎     | 214202/500000 [8:24:13<12:08:38,  6.54it/s]

Episode:214200 | Reward:100.0


 43%|████▎     | 214402/500000 [8:24:41<12:13:03,  6.49it/s]

Episode:214400 | Reward:-100.0


 43%|████▎     | 214602/500000 [8:25:10<12:31:49,  6.33it/s]

Episode:214600 | Reward:100.0


 43%|████▎     | 214802/500000 [8:25:39<13:55:37,  5.69it/s]

Episode:214800 | Reward:100.0


 43%|████▎     | 215002/500000 [8:26:08<12:02:36,  6.57it/s]

Episode:215000 | Reward:-100.0


 43%|████▎     | 215202/500000 [8:26:36<12:04:16,  6.55it/s]

Episode:215200 | Reward:-100.0


 43%|████▎     | 215402/500000 [8:27:04<12:25:46,  6.36it/s]

Episode:215400 | Reward:100.0


 43%|████▎     | 215602/500000 [8:27:32<12:08:19,  6.51it/s]

Episode:215600 | Reward:-100.0


 43%|████▎     | 215802/500000 [8:28:02<12:21:42,  6.39it/s]

Episode:215800 | Reward:-100.0


 43%|████▎     | 216002/500000 [8:28:30<12:22:45,  6.37it/s]

Episode:216000 | Reward:100.0


 43%|████▎     | 216202/500000 [8:28:58<12:22:34,  6.37it/s]

Episode:216200 | Reward:100.0


 43%|████▎     | 216402/500000 [8:29:26<12:10:06,  6.47it/s]

Episode:216400 | Reward:100.0


 43%|████▎     | 216602/500000 [8:29:56<11:59:41,  6.56it/s]

Episode:216600 | Reward:-100.0


 43%|████▎     | 216802/500000 [8:30:23<10:02:39,  7.83it/s]

Episode:216800 | Reward:-100.0


 43%|████▎     | 217002/500000 [8:30:51<12:21:35,  6.36it/s]

Episode:217000 | Reward:100.0


 43%|████▎     | 217202/500000 [8:31:20<12:09:28,  6.46it/s]

Episode:217200 | Reward:-100.0


 43%|████▎     | 217402/500000 [8:31:49<12:00:00,  6.54it/s]

Episode:217400 | Reward:100.0


 44%|████▎     | 217602/500000 [8:32:18<12:01:55,  6.52it/s]

Episode:217600 | Reward:-100.0


 44%|████▎     | 217802/500000 [8:32:47<12:01:53,  6.52it/s]

Episode:217800 | Reward:100.0


 44%|████▎     | 218002/500000 [8:33:15<12:01:16,  6.52it/s]

Episode:218000 | Reward:100.0


 44%|████▎     | 218202/500000 [8:33:43<12:07:31,  6.46it/s]

Episode:218200 | Reward:-100.0


 44%|████▎     | 218402/500000 [8:34:13<11:53:44,  6.58it/s]

Episode:218400 | Reward:100.0


 44%|████▎     | 218602/500000 [8:34:42<12:16:42,  6.37it/s]

Episode:218600 | Reward:100.0


 44%|████▍     | 218802/500000 [8:35:10<11:59:44,  6.51it/s]

Episode:218800 | Reward:100.0


 44%|████▍     | 219002/500000 [8:35:38<10:26:26,  7.48it/s]

Episode:219000 | Reward:-100.0


 44%|████▍     | 219202/500000 [8:36:06<12:23:52,  6.29it/s]

Episode:219200 | Reward:100.0


 44%|████▍     | 219402/500000 [8:36:34<12:07:29,  6.43it/s]

Episode:219400 | Reward:100.0


 44%|████▍     | 219602/500000 [8:37:01<12:12:33,  6.38it/s]

Episode:219600 | Reward:-100.0


 44%|████▍     | 219802/500000 [8:37:30<12:04:02,  6.45it/s]

Episode:219800 | Reward:100.0


 44%|████▍     | 220002/500000 [8:37:59<11:55:58,  6.52it/s]

Episode:220000 | Reward:-100.0


 44%|████▍     | 220202/500000 [8:38:27<11:55:51,  6.51it/s]

Episode:220200 | Reward:100.0


 44%|████▍     | 220402/500000 [8:38:55<11:51:17,  6.55it/s]

Episode:220400 | Reward:100.0


 44%|████▍     | 220602/500000 [8:39:24<12:09:38,  6.38it/s]

Episode:220600 | Reward:100.0


 44%|████▍     | 220802/500000 [8:39:52<12:03:49,  6.43it/s]

Episode:220800 | Reward:100.0


 44%|████▍     | 221002/500000 [8:40:21<11:56:51,  6.49it/s]

Episode:221000 | Reward:-100.0


 44%|████▍     | 221202/500000 [8:40:50<11:52:50,  6.52it/s]

Episode:221200 | Reward:-100.0


 44%|████▍     | 221402/500000 [8:41:18<12:09:57,  6.36it/s]

Episode:221400 | Reward:100.0


 44%|████▍     | 221602/500000 [8:41:46<11:42:50,  6.60it/s]

Episode:221600 | Reward:-100.0


 44%|████▍     | 221802/500000 [8:42:16<12:08:44,  6.36it/s]

Episode:221800 | Reward:0.0


 44%|████▍     | 222002/500000 [8:42:42<11:09:07,  6.92it/s]

Episode:222000 | Reward:-100.0


 44%|████▍     | 222105/500000 [8:42:56<11:17:47,  6.83it/s]