In [1]:
import os
import sys
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions.normal import Normal
from tqdm import tqdm
from collections import deque

import time
import psutil
import datetime
import subprocess
# import torch
import torchvision
from tensorboard import program
import webbrowser
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

RED = "\033[31m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
BLUE = "\033[34m"
MAGENTA = "\033[35m"
CYAN = "\033[36m"
RESET = "\033[0m"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
current_time = datetime.datetime.now().strftime("%Y%m%d_%H-%M-%S")
log_dir = f"../runs/{current_time}/"
writer = SummaryWriter(log_dir)

# tb = program.TensorBoard()
# tb.configure(argv=[None, '--logdir', f"../runs/franka_cabinet/{current_time}", '--port', '6300'])
# url = tb.launch()
# webbrowser.open_new(url)

#learning rate backward propagation NN action
lr_actor = 0.0003
#learning rate backward propagation NN state value estimation
lr_critic = 0.0003
#Number of Learning Iteration we want to perform
Iter = 100000
#Number max of step to realise in one episode. 
MAX_STEP = 1000
#How rewards are discounted.
gamma =0.98
#How do we stabilize variance in the return computation.
lambd = 0.95
#batch to train on
batch_size = 64
# Do we want high change to be taken into account.
epsilon = 0.2
#weight decay coefficient in ADAM for state value optim.
l2_rate = 0.001

save_freq = 100

save_flag = False

In [2]:
# Actor class: Used to choose actions of a continuous action space.

class Actor(nn.Module):
    def __init__(self,N_S,N_A):
      # Initialize NN structure.
        super(Actor,self).__init__()
        self.fc1 = nn.Linear(N_S,64)
        self.fc2 = nn.Linear(64,64)
        self.sigma = nn.Linear(64,N_A)
        self.mu = nn.Linear(64,N_A)
        self.mu.weight.data.mul_(0.1)
        self.mu.bias.data.mul_(0.0)
        # This approach use gaussian distribution to decide actions. Could be
        # something else.
        self.distribution = torch.distributions.Normal

    def set_init(self,layers):
      # Initialize weight and bias according to a normal distrib mean 0 and sd 0.1.
        for layer in layers:
            nn.init.normal_(layer.weight,mean=0.,std=0.1)
            nn.init.constant_(layer.bias,0.)

    def forward(self,s):
      # Use of tanh activation function is recommanded : bounded [-1,1],
      # gives some non-linearity, and tends to give some stability.
        x = torch.tanh(self.fc1(s))
        x = torch.tanh(self.fc2(x))
        # mu action output of the NN.
        mu = self.mu(x)
        #log_sigma action output of the NN
        log_sigma = self.sigma(x)
        sigma = torch.exp(log_sigma)
        return mu,sigma

    def choose_action(self,s):
      # Choose action in the continuous action space using normal distribution
      # defined by mu and sigma of each actions returned by the NN.
        mu,sigma = self.forward(s)
        Pi = self.distribution(mu,sigma)
        return Pi.sample().numpy()

In [3]:
# Critic class : Used to estimate V(state) the state value function through a NN.
class Critic(nn.Module):
    def __init__(self,N_S):
      # Initialize NN structure.
        super(Critic,self).__init__()
        self.fc1 = nn.Linear(N_S,64)
        self.fc2 = nn.Linear(64,64)
        self.fc3 = nn.Linear(64,1)
        self.fc3.weight.data.mul_(0.1)
        self.fc3.bias.data.mul_(0.0)

    def set_init(self,layers):
      # Initialize weight and bias according to a normal distrib mean 0 and sd 0.1.
        for layer in layers:
            nn.init.normal_(layer.weight,mean=0.,std=0.1)
            nn.init.constant_(layer.bias,0.)

    def forward(self,s):
      # Use of tanh activation function is recommanded.
        x = torch.tanh(self.fc1(s))
        x = torch.tanh(self.fc2(x))
        values = self.fc3(x)
        return values

In [4]:
class PPO:
    def __init__(self, N_S, N_A):
        self.actor_net = Actor(N_S, N_A)
        self.critic_net = Critic(N_S)
        self.actor_optim = optim.Adam(self.actor_net.parameters(), lr=1e-4)
        self.critic_optim = optim.Adam(self.critic_net.parameters(), lr=1e-3, weight_decay=1e-3)
        self.critic_loss_func = torch.nn.MSELoss()

    def train(self, memory):
        states, actions, rewards, masks = [], [], [], []
        
        for m in memory:
            states.append(m[0])
            actions.append(m[1])
            rewards.append(m[2])
            masks.append(m[3])
        
        states = torch.tensor(np.array(states), dtype=torch.float32)
        actions = torch.tensor(np.array(actions), dtype=torch.float32)
        rewards = torch.tensor(np.array(rewards), dtype=torch.float32)
        masks = torch.tensor(np.array(masks), dtype=torch.float32)

        # Use critic network defined in Model.py
        # This function enables to get the current state value V(S).
        values = self.critic_net(states)
        # Get advantage.
        returns,advants = self.get_gae(rewards,masks,values)
        #Get old mu and std.
        old_mu,old_std = self.actor_net(states)
        #Get the old distribution.
        pi = self.actor_net.distribution(old_mu,old_std)
        #Compute old policy.
        old_log_prob = pi.log_prob(actions).sum(1,keepdim=True)

        # Everything happens here
        n = len(states)
        arr = np.arange(n)
        for epoch in range(1):
            np.random.shuffle(arr)
            for i in range(n//batch_size):
                b_index = arr[batch_size*i:batch_size*(i+1)]
                b_states = states[b_index]
                b_advants = advants[b_index].unsqueeze(1)
                b_actions = actions[b_index]
                b_returns = returns[b_index].unsqueeze(1)

                #New parameter of the policy distribution by action.
                mu,std = self.actor_net(b_states)
                pi = self.actor_net.distribution(mu,std)
                new_prob = pi.log_prob(b_actions).sum(1,keepdim=True)
                old_prob = old_log_prob[b_index].detach()
                #Regularisation fixed KL : does not work as good as following clipping strategy
                # empirically.
                # KL_penalty = self.kl_divergence(old_mu[b_index],old_std[b_index],mu,std)
                ratio = torch.exp(new_prob-old_prob)

                surrogate_loss = ratio*b_advants
                values = self.critic_net(b_states)
                # MSE Loss : (State action value - State value)^2
                critic_loss = self.critic_loss_func(values,b_returns)
                # critic_loss = critic_loss - beta*KL_penalty

                self.critic_optim.zero_grad()
                critic_loss.backward()
                self.critic_optim.step()
                #Clipping strategy
                ratio = torch.clamp(ratio,1.0-epsilon,1.0+epsilon)
                clipped_loss =ratio*b_advants
                # Actual loss
                actor_loss = -torch.min(surrogate_loss,clipped_loss).mean()
                #Now that we have the loss, we can do the backward propagation to learn : everything is here.
                self.actor_optim.zero_grad()
                actor_loss.backward()

                self.actor_optim.step()
                
    # Get the Kullback - Leibler divergence: Measure of the diff btwn new and old policy:
    # Could be used for the objective function depending on the strategy that needs to be
    # teste.
    def kl_divergence(self,old_mu,old_sigma,mu,sigma):

        old_mu = old_mu.detach()
        old_sigma = old_sigma.detach()

        kl = torch.log(old_sigma) - torch.log(sigma) + (old_sigma.pow(2) + (old_mu - mu).pow(2)) / \
             (2.0 * sigma.pow(2)) - 0.5
        return kl.sum(1, keepdim=True)
    
    # Advantage estimation:
    def get_gae(self,rewards, masks, values):
        rewards = torch.Tensor(rewards)
        masks = torch.Tensor(masks)
        #Create an equivalent fullfilled of 0.
        returns = torch.zeros_like(rewards)
        advants = torch.zeros_like(rewards)
        #Init
        running_returns = 0
        previous_value = 0
        running_advants = 0
        #Here we compute A_t the advantage.
        for t in reversed(range(0, len(rewards))):
            # Here we compute the discounted returns. Gamma is the discount factor.
            running_returns = rewards[t] + gamma * running_returns * masks[t]
            #computes the difference between the estimated value at time step t (values.data[t]) and the discounted next value.
            running_tderror = rewards[t] + gamma * previous_value * masks[t] - values.data[t]
            # Compute advantage
            running_advants = running_tderror + gamma * lambd * running_advants * masks[t]

            returns[t] = running_returns
            previous_value = values.data[t]
            advants[t] = running_advants
        #Normalization to stabilize final advantage of the history to now.
        advants = (advants - advants.mean()) / advants.std()
        return returns, advants

    def save(self, filename):
        filename = str(filename)
        torch.save(self.actor_net.state_dict(), filename + "_actor")
        torch.save(self.critic_net.state_dict(), filename + "_critic")
        torch.save(self.actor_optim.state_dict(), filename + "_actor_optimizer")
        torch.save(self.critic_optim.state_dict(), filename + "_critic_optimizer")

    def load(self, filename):
        filename = str(filename)
        self.actor_net.load_state_dict(torch.load(filename + "_actor"))
        self.critic_net.load_state_dict(torch.load(filename + "_critic"))
        self.actor_optim.load_state_dict(torch.load(filename + "_actor_optimizer"))
        self.critic_optim.load_state_dict(torch.load(filename + "_critic_optimizer"))
        

In [5]:
# Creation of a class to normalize the states
class Normalize:
    def __init__(self, N_S):
        self.mean = np.zeros((N_S,))
        self.std = np.zeros((N_S, ))
        self.stdd = np.zeros((N_S, ))
        self.n = 0

    def __call__(self, x):
        x = np.asarray(x)
        self.n += 1
        if self.n == 1:
            self.mean = x
        else:
            old_mean = self.mean.copy()
            self.mean = old_mean + (x - old_mean) / self.n
            self.stdd = self.stdd + (x - old_mean) * (x - self.mean)
        if self.n > 1:
            self.std = np.sqrt(self.stdd / (self.n - 1))
        else:
            self.std = self.mean

        x = x - self.mean
        x = x / (self.std + 1e-8)
        x = np.clip(x, -5, +5)
        return x
    
    def update(self, x):
        self.mean = np.mean(x, axis=0)
        self.std = np.std(x, axis=0) + 1e-8
    
    def save_params(self, path):
        np.save(path, {'mean': self.mean, 'std': self.std})

    def load_params(self, path):
        params = np.load(path, allow_pickle=True).item()
        self.mean = params['mean']
        self.std = params['std']

In [None]:
env = gym.make('Walker2d-v4', render_mode='rgb_array')

#Number of state and action
N_S = env.observation_space.shape[0]
N_A = env.action_space.shape[0]

# Random seed initialization
# env.seed(500)
# torch.manual_seed(500)
# np.random.seed(500)

# Run the Ppo class
frames = []
ppo = PPO(N_S,N_A)
# Normalisation for stability, fast convergence... always good to do.
normalize = Normalize(N_S)
episodes = 0
eva_episodes = 0
episode_data = []
state, _ = env.reset()

for iter in tqdm(range(Iter)):
    memory = deque()
    scores = []
    steps = 0
    while steps < 2048:  # Horizon
        episodes += 1
        state, _ = env.reset()
        s = normalize(state)
        score = 0
        for _ in range(MAX_STEP):
            steps += 1
            a = ppo.actor_net.choose_action(torch.from_numpy(np.array(s).astype(np.float32)).unsqueeze(0))[0]
            s_, r, truncated, terminated, info = env.step(a)
            done = truncated or terminated
            s_ = normalize(s_)

            mask = (1 - done) * 1
            memory.append([s, a, r, mask])
            
            score += r
            s = s_

            if done:
                break
        scores.append(score)
    score_avg = np.mean(scores)
    print('{} episode score is {:.2f}'.format(episodes, score_avg))
    episode_data.append([iter + 1, score_avg])
    
    if (iter + 1) % save_freq == 0:
        save_flag = True

        if save_flag:
            path = log_dir + str((iter + 1)) + "/ppo/"
            os.makedirs(path, exist_ok=True)
            if not os.path.exists(path):
                os.makedirs(path)
            ppo.save(path)
            normalize.save_params(log_dir + "normalize_params.npy")

            np.save(log_dir + "reward.npy", episode_data)
            save_flag = False

            # Test the model
            test_total_reward = 0
            test_episodes = 10  # Number of episodes to test
            for _ in range(test_episodes):
                state, _ = env.reset()
                state = normalize(state)
                done = False
                episode_reward = 0
                while not done:
                    action = ppo.actor_net.choose_action(torch.from_numpy(np.array(state).astype(np.float32)).unsqueeze(0))[0]
                    next_state, reward, truncated, terminated, info = env.step(action)
                    episode_reward += reward
                    state = normalize(next_state)
                    done = truncated or terminated
                test_total_reward += episode_reward
            average_test_reward = test_total_reward / test_episodes
            print('Iteration {}: Average test reward: {:.2f}'.format(iter + 1, average_test_reward))
                
    ppo.train(memory)

  0%|          | 1/100000 [00:01<31:46:39,  1.14s/it]

108 episode score is 0.53


  0%|          | 2/100000 [00:02<31:12:37,  1.12s/it]

219 episode score is 1.21


  0%|          | 3/100000 [00:03<31:13:13,  1.12s/it]

338 episode score is 0.85


  0%|          | 4/100000 [00:04<30:59:45,  1.12s/it]

439 episode score is 2.52


  0%|          | 5/100000 [00:05<30:57:38,  1.11s/it]

537 episode score is 2.66


  0%|          | 6/100000 [00:06<31:22:45,  1.13s/it]

637 episode score is 2.90


  0%|          | 7/100000 [00:07<31:16:23,  1.13s/it]

726 episode score is 5.05


  0%|          | 8/100000 [00:08<31:04:20,  1.12s/it]

810 episode score is 4.00


  0%|          | 9/100000 [00:10<31:00:35,  1.12s/it]

892 episode score is 5.89


  0%|          | 10/100000 [00:11<31:02:32,  1.12s/it]

967 episode score is 6.94


  0%|          | 11/100000 [00:12<31:08:13,  1.12s/it]

1046 episode score is 7.28


  0%|          | 12/100000 [00:13<31:04:01,  1.12s/it]

1114 episode score is 10.41


  0%|          | 13/100000 [00:14<30:59:06,  1.12s/it]

1181 episode score is 10.08


  0%|          | 14/100000 [00:15<30:55:16,  1.11s/it]

1244 episode score is 13.73


  0%|          | 15/100000 [00:16<30:54:17,  1.11s/it]

1305 episode score is 13.84


  0%|          | 16/100000 [00:17<31:03:41,  1.12s/it]

1356 episode score is 18.82


  0%|          | 17/100000 [00:19<30:59:34,  1.12s/it]

1396 episode score is 29.41


  0%|          | 18/100000 [00:20<30:58:45,  1.12s/it]

1426 episode score is 53.35


  0%|          | 19/100000 [00:21<30:53:24,  1.11s/it]

1456 episode score is 43.69


  0%|          | 20/100000 [00:22<30:53:29,  1.11s/it]

1483 episode score is 77.73


  0%|          | 21/100000 [00:23<31:02:19,  1.12s/it]

1509 episode score is 63.51


  0%|          | 22/100000 [00:24<31:16:04,  1.13s/it]

1529 episode score is 111.97


  0%|          | 23/100000 [00:25<31:18:06,  1.13s/it]

1544 episode score is 155.71


  0%|          | 24/100000 [00:26<31:06:42,  1.12s/it]

1558 episode score is 174.45


  0%|          | 25/100000 [00:28<31:39:17,  1.14s/it]

1571 episode score is 207.60


  0%|          | 26/100000 [00:29<31:24:35,  1.13s/it]

1587 episode score is 159.32


  0%|          | 27/100000 [00:30<31:26:51,  1.13s/it]

1603 episode score is 167.92


  0%|          | 28/100000 [00:31<32:06:13,  1.16s/it]

1618 episode score is 196.95


  0%|          | 29/100000 [00:32<31:44:52,  1.14s/it]

1632 episode score is 207.49


  0%|          | 30/100000 [00:33<32:06:03,  1.16s/it]

1647 episode score is 213.63


  0%|          | 31/100000 [00:34<31:50:46,  1.15s/it]

1659 episode score is 262.77


  0%|          | 32/100000 [00:36<31:39:28,  1.14s/it]

1672 episode score is 242.00


  0%|          | 33/100000 [00:37<32:27:16,  1.17s/it]

1686 episode score is 249.97


  0%|          | 34/100000 [00:38<32:00:45,  1.15s/it]

1698 episode score is 221.48


  0%|          | 35/100000 [00:39<31:43:56,  1.14s/it]

1712 episode score is 212.20


  0%|          | 36/100000 [00:40<31:57:12,  1.15s/it]

1724 episode score is 193.06


  0%|          | 37/100000 [00:41<31:42:48,  1.14s/it]

1735 episode score is 275.65


  0%|          | 38/100000 [00:42<31:39:31,  1.14s/it]

1752 episode score is 169.96


  0%|          | 39/100000 [00:44<31:57:21,  1.15s/it]

1766 episode score is 211.96


  0%|          | 40/100000 [00:45<32:12:59,  1.16s/it]

1778 episode score is 202.15


  0%|          | 41/100000 [00:46<32:02:31,  1.15s/it]

1789 episode score is 249.38


  0%|          | 42/100000 [00:47<32:00:27,  1.15s/it]

1799 episode score is 263.47


  0%|          | 43/100000 [00:49<34:22:08,  1.24s/it]

1810 episode score is 290.72


  0%|          | 44/100000 [00:50<34:06:46,  1.23s/it]

1822 episode score is 208.56


  0%|          | 45/100000 [00:51<33:50:38,  1.22s/it]

1834 episode score is 298.00


  0%|          | 46/100000 [00:52<33:55:12,  1.22s/it]

1845 episode score is 317.72


  0%|          | 47/100000 [00:53<33:43:03,  1.21s/it]

1859 episode score is 214.68


  0%|          | 48/100000 [00:55<33:47:52,  1.22s/it]

1871 episode score is 244.64


  0%|          | 49/100000 [00:56<34:10:11,  1.23s/it]

1882 episode score is 303.31


  0%|          | 50/100000 [00:57<33:39:20,  1.21s/it]

1893 episode score is 247.43


  0%|          | 51/100000 [00:58<33:15:55,  1.20s/it]

1908 episode score is 187.39


  0%|          | 52/100000 [00:59<33:10:52,  1.20s/it]

1919 episode score is 300.26


  0%|          | 53/100000 [01:01<32:53:57,  1.19s/it]

1930 episode score is 267.10


  0%|          | 54/100000 [01:02<32:41:08,  1.18s/it]

1941 episode score is 287.45


  0%|          | 55/100000 [01:03<32:38:44,  1.18s/it]

1951 episode score is 323.25


  0%|          | 56/100000 [01:04<32:57:07,  1.19s/it]

1964 episode score is 252.31


  0%|          | 57/100000 [01:05<32:58:29,  1.19s/it]

1977 episode score is 259.64


  0%|          | 58/100000 [01:06<32:42:20,  1.18s/it]

1989 episode score is 252.31


  0%|          | 59/100000 [01:08<33:48:08,  1.22s/it]

2001 episode score is 291.33


  0%|          | 60/100000 [01:09<33:10:01,  1.19s/it]

2012 episode score is 248.68


  0%|          | 61/100000 [01:10<32:39:41,  1.18s/it]

2024 episode score is 253.79


  0%|          | 62/100000 [01:11<32:34:30,  1.17s/it]

2036 episode score is 230.41


  0%|          | 63/100000 [01:12<32:28:09,  1.17s/it]

2048 episode score is 265.62


  0%|          | 64/100000 [01:13<32:11:36,  1.16s/it]

2060 episode score is 246.22


  0%|          | 65/100000 [01:15<31:54:07,  1.15s/it]

2073 episode score is 237.53


  0%|          | 66/100000 [01:16<32:00:14,  1.15s/it]

2084 episode score is 247.86


  0%|          | 67/100000 [01:17<32:08:36,  1.16s/it]

2097 episode score is 232.62


  0%|          | 68/100000 [01:18<32:20:15,  1.16s/it]

2110 episode score is 231.42


  0%|          | 69/100000 [01:19<32:02:57,  1.15s/it]

2122 episode score is 271.49


  0%|          | 70/100000 [01:20<32:18:05,  1.16s/it]

2135 episode score is 273.01


  0%|          | 71/100000 [01:22<33:05:34,  1.19s/it]

2147 episode score is 306.63


  0%|          | 72/100000 [01:23<32:36:58,  1.18s/it]

2160 episode score is 260.13


  0%|          | 73/100000 [01:24<32:53:56,  1.19s/it]

2174 episode score is 229.89


  0%|          | 74/100000 [01:25<32:27:19,  1.17s/it]

2186 episode score is 256.88


  0%|          | 75/100000 [01:26<32:30:53,  1.17s/it]

2199 episode score is 278.01


  0%|          | 76/100000 [01:28<32:33:20,  1.17s/it]

2210 episode score is 274.46


  0%|          | 77/100000 [01:29<32:38:45,  1.18s/it]

2222 episode score is 282.76


  0%|          | 78/100000 [01:30<32:36:18,  1.17s/it]

2234 episode score is 274.90


  0%|          | 79/100000 [01:31<32:32:03,  1.17s/it]

2245 episode score is 307.87


  0%|          | 80/100000 [01:32<32:55:56,  1.19s/it]

2258 episode score is 222.53


  0%|          | 81/100000 [01:33<32:29:10,  1.17s/it]

2270 episode score is 260.29


  0%|          | 82/100000 [01:35<32:19:18,  1.16s/it]

2281 episode score is 300.47


  0%|          | 83/100000 [01:36<32:43:12,  1.18s/it]

2293 episode score is 291.89


  0%|          | 84/100000 [01:37<33:21:10,  1.20s/it]

2304 episode score is 273.47


  0%|          | 85/100000 [01:38<33:03:03,  1.19s/it]

2316 episode score is 289.87


  0%|          | 86/100000 [01:39<33:09:36,  1.19s/it]

2330 episode score is 241.77


  0%|          | 87/100000 [01:41<33:03:38,  1.19s/it]

2342 episode score is 247.04


  0%|          | 88/100000 [01:42<33:17:55,  1.20s/it]

2355 episode score is 268.99


  0%|          | 89/100000 [01:43<33:19:02,  1.20s/it]

2368 episode score is 279.95


  0%|          | 90/100000 [01:44<33:10:26,  1.20s/it]

2381 episode score is 242.54


  0%|          | 91/100000 [01:45<33:05:03,  1.19s/it]

2394 episode score is 278.10


  0%|          | 92/100000 [01:46<32:42:08,  1.18s/it]

2406 episode score is 275.55


  0%|          | 93/100000 [01:48<32:48:15,  1.18s/it]

2418 episode score is 268.62


  0%|          | 94/100000 [01:49<32:46:01,  1.18s/it]

2430 episode score is 270.81


  0%|          | 95/100000 [01:50<32:51:13,  1.18s/it]

2442 episode score is 276.00


  0%|          | 96/100000 [01:51<32:52:03,  1.18s/it]

2454 episode score is 267.33


  0%|          | 97/100000 [01:52<32:30:59,  1.17s/it]

2466 episode score is 271.88


  0%|          | 98/100000 [01:54<32:34:47,  1.17s/it]

2478 episode score is 290.55


  0%|          | 99/100000 [01:55<32:22:01,  1.17s/it]

2489 episode score is 275.95
2502 episode score is 274.44


  0%|          | 100/100000 [01:57<39:29:01,  1.42s/it]

Iteration 100: Average test reward: 280.84


  0%|          | 101/100000 [01:58<37:19:42,  1.35s/it]

2514 episode score is 286.35


  0%|          | 102/100000 [01:59<35:41:44,  1.29s/it]

2525 episode score is 283.47


  0%|          | 103/100000 [02:00<34:36:11,  1.25s/it]

2538 episode score is 253.65


  0%|          | 104/100000 [02:01<34:43:29,  1.25s/it]

2551 episode score is 275.05


  0%|          | 105/100000 [02:03<33:55:54,  1.22s/it]

2562 episode score is 299.12


  0%|          | 106/100000 [02:04<33:06:47,  1.19s/it]

2574 episode score is 285.19


  0%|          | 107/100000 [02:05<33:18:53,  1.20s/it]

2588 episode score is 267.67


  0%|          | 108/100000 [02:06<32:44:59,  1.18s/it]

2601 episode score is 266.07


  0%|          | 109/100000 [02:07<32:56:47,  1.19s/it]

2616 episode score is 254.19


  0%|          | 110/100000 [02:08<32:41:12,  1.18s/it]

2630 episode score is 261.31


  0%|          | 111/100000 [02:10<32:20:11,  1.17s/it]

2642 episode score is 261.42


  0%|          | 112/100000 [02:11<32:50:57,  1.18s/it]

2655 episode score is 267.77


  0%|          | 113/100000 [02:12<32:43:35,  1.18s/it]

2666 episode score is 283.00


  0%|          | 114/100000 [02:13<32:40:54,  1.18s/it]

2680 episode score is 240.77


  0%|          | 115/100000 [02:14<32:30:28,  1.17s/it]

2692 episode score is 286.56


  0%|          | 116/100000 [02:15<32:18:30,  1.16s/it]

2705 episode score is 269.82


  0%|          | 117/100000 [02:17<32:03:02,  1.16s/it]

2719 episode score is 229.81


  0%|          | 118/100000 [02:18<32:57:40,  1.19s/it]

2732 episode score is 286.40


  0%|          | 119/100000 [02:19<32:46:11,  1.18s/it]

2746 episode score is 265.39


  0%|          | 120/100000 [02:20<32:46:02,  1.18s/it]

2760 episode score is 259.71


  0%|          | 121/100000 [02:21<33:06:54,  1.19s/it]

2774 episode score is 255.21


  0%|          | 122/100000 [02:23<32:54:59,  1.19s/it]

2788 episode score is 261.00


  0%|          | 123/100000 [02:24<32:31:55,  1.17s/it]

2801 episode score is 273.54


  0%|          | 124/100000 [02:25<32:31:06,  1.17s/it]

2815 episode score is 264.11


  0%|          | 125/100000 [02:26<32:25:27,  1.17s/it]

2829 episode score is 243.35


  0%|          | 126/100000 [02:27<32:37:54,  1.18s/it]

2843 episode score is 265.82


  0%|          | 127/100000 [02:28<32:36:50,  1.18s/it]

2856 episode score is 275.76


  0%|          | 128/100000 [02:30<32:40:48,  1.18s/it]

2870 episode score is 268.35


  0%|          | 129/100000 [02:31<32:36:51,  1.18s/it]

2883 episode score is 262.50


  0%|          | 130/100000 [02:32<32:33:05,  1.17s/it]

2897 episode score is 260.71


  0%|          | 131/100000 [02:33<32:38:34,  1.18s/it]

2911 episode score is 263.41


  0%|          | 132/100000 [02:34<32:44:56,  1.18s/it]

2924 episode score is 280.64


  0%|          | 133/100000 [02:36<32:52:21,  1.18s/it]

2938 episode score is 267.39


  0%|          | 134/100000 [02:37<32:36:16,  1.18s/it]

2952 episode score is 256.76


  0%|          | 135/100000 [02:38<32:24:49,  1.17s/it]

2965 episode score is 250.80


  0%|          | 136/100000 [02:39<32:05:47,  1.16s/it]

2978 episode score is 260.79


  0%|          | 137/100000 [02:40<32:13:31,  1.16s/it]

2992 episode score is 267.74


  0%|          | 138/100000 [02:41<32:06:10,  1.16s/it]

3006 episode score is 259.30
3019 episode score is 249.54


  0%|          | 140/100000 [02:44<33:15:31,  1.20s/it]

3032 episode score is 278.93


  0%|          | 141/100000 [02:45<32:48:16,  1.18s/it]

3045 episode score is 267.15


  0%|          | 142/100000 [02:46<32:31:02,  1.17s/it]

3058 episode score is 253.95


  0%|          | 143/100000 [02:47<32:31:23,  1.17s/it]

3069 episode score is 297.44


  0%|          | 144/100000 [02:48<32:46:21,  1.18s/it]

3081 episode score is 263.37


  0%|          | 145/100000 [02:50<32:28:07,  1.17s/it]

3093 episode score is 283.51


  0%|          | 146/100000 [02:51<32:31:16,  1.17s/it]

3106 episode score is 270.97


  0%|          | 147/100000 [02:52<32:34:00,  1.17s/it]

3119 episode score is 274.41


  0%|          | 148/100000 [02:53<32:46:20,  1.18s/it]

3132 episode score is 281.98


  0%|          | 149/100000 [02:54<32:22:48,  1.17s/it]

3146 episode score is 253.51


  0%|          | 150/100000 [02:55<32:26:47,  1.17s/it]

3158 episode score is 267.55


  0%|          | 151/100000 [02:57<32:03:12,  1.16s/it]

3170 episode score is 262.41


  0%|          | 152/100000 [02:58<32:26:47,  1.17s/it]

3185 episode score is 258.99


  0%|          | 153/100000 [02:59<32:44:48,  1.18s/it]

3199 episode score is 269.39


  0%|          | 154/100000 [03:00<32:27:03,  1.17s/it]

3212 episode score is 270.04


  0%|          | 155/100000 [03:01<32:27:48,  1.17s/it]

3225 episode score is 279.05


  0%|          | 156/100000 [03:03<32:54:57,  1.19s/it]

3239 episode score is 268.68


  0%|          | 157/100000 [03:04<32:29:33,  1.17s/it]

3253 episode score is 260.50


  0%|          | 158/100000 [03:05<32:41:48,  1.18s/it]

3267 episode score is 245.58


  0%|          | 159/100000 [03:06<33:03:19,  1.19s/it]

3282 episode score is 257.39


  0%|          | 160/100000 [03:07<32:47:42,  1.18s/it]

3295 episode score is 273.50


  0%|          | 161/100000 [03:08<33:00:59,  1.19s/it]

3310 episode score is 257.67


  0%|          | 162/100000 [03:10<33:01:02,  1.19s/it]

3324 episode score is 268.73


  0%|          | 163/100000 [03:11<33:12:52,  1.20s/it]

3339 episode score is 255.56


  0%|          | 164/100000 [03:12<33:05:33,  1.19s/it]

3354 episode score is 253.46


  0%|          | 165/100000 [03:13<32:37:18,  1.18s/it]

3367 episode score is 273.49


  0%|          | 166/100000 [03:14<32:51:06,  1.18s/it]

3381 episode score is 276.09


  0%|          | 167/100000 [03:16<33:29:59,  1.21s/it]

3395 episode score is 267.39


  0%|          | 168/100000 [03:17<32:54:25,  1.19s/it]

3409 episode score is 261.03


  0%|          | 169/100000 [03:18<32:57:24,  1.19s/it]

3424 episode score is 252.93


  0%|          | 170/100000 [03:19<32:37:25,  1.18s/it]

3438 episode score is 260.02


  0%|          | 171/100000 [03:20<32:38:08,  1.18s/it]

3453 episode score is 256.40


  0%|          | 172/100000 [03:21<32:40:13,  1.18s/it]

3468 episode score is 259.46


  0%|          | 173/100000 [03:23<32:26:23,  1.17s/it]

3481 episode score is 267.71


  0%|          | 174/100000 [03:24<32:17:21,  1.16s/it]

3495 episode score is 260.37


  0%|          | 175/100000 [03:25<32:20:17,  1.17s/it]

3509 episode score is 272.41


  0%|          | 176/100000 [03:26<32:03:40,  1.16s/it]

3523 episode score is 263.56


  0%|          | 177/100000 [03:27<32:12:19,  1.16s/it]

3538 episode score is 227.14


  0%|          | 178/100000 [03:28<32:10:40,  1.16s/it]

3553 episode score is 253.06
3566 episode score is 277.23


  0%|          | 180/100000 [03:31<32:29:11,  1.17s/it]

3580 episode score is 264.78


  0%|          | 181/100000 [03:32<32:28:53,  1.17s/it]

3595 episode score is 254.22


  0%|          | 182/100000 [03:33<32:04:17,  1.16s/it]

3607 episode score is 269.51


  0%|          | 183/100000 [03:34<32:05:58,  1.16s/it]

3621 episode score is 274.23


  0%|          | 184/100000 [03:36<33:59:20,  1.23s/it]

3635 episode score is 272.16


  0%|          | 185/100000 [03:37<33:38:23,  1.21s/it]

3649 episode score is 269.54


  0%|          | 186/100000 [03:38<32:58:16,  1.19s/it]

3663 episode score is 260.84


  0%|          | 187/100000 [03:39<32:26:29,  1.17s/it]

3675 episode score is 275.05


  0%|          | 188/100000 [03:40<32:09:53,  1.16s/it]

3689 episode score is 270.66


  0%|          | 189/100000 [03:41<31:48:13,  1.15s/it]

3703 episode score is 269.23


  0%|          | 190/100000 [03:43<31:59:21,  1.15s/it]

3717 episode score is 275.62


  0%|          | 191/100000 [03:44<32:14:08,  1.16s/it]

3731 episode score is 278.61


  0%|          | 192/100000 [03:45<32:21:18,  1.17s/it]

3745 episode score is 270.31


  0%|          | 193/100000 [03:46<31:59:32,  1.15s/it]

3759 episode score is 270.69


  0%|          | 194/100000 [03:47<32:02:33,  1.16s/it]

3773 episode score is 271.83


  0%|          | 195/100000 [03:48<32:17:06,  1.16s/it]

3788 episode score is 262.08


  0%|          | 196/100000 [03:49<31:57:28,  1.15s/it]

3800 episode score is 298.71


  0%|          | 197/100000 [03:51<32:04:58,  1.16s/it]

3813 episode score is 289.74


  0%|          | 198/100000 [03:52<32:27:58,  1.17s/it]

3828 episode score is 282.59


  0%|          | 199/100000 [03:53<32:36:21,  1.18s/it]

3843 episode score is 267.85
3856 episode score is 288.39


  0%|          | 200/100000 [03:55<38:55:01,  1.40s/it]

Iteration 200: Average test reward: 271.65


  0%|          | 201/100000 [03:56<37:13:25,  1.34s/it]

3870 episode score is 291.54


  0%|          | 202/100000 [03:57<36:07:49,  1.30s/it]

3884 episode score is 293.65


  0%|          | 203/100000 [03:59<34:45:20,  1.25s/it]

3898 episode score is 282.77


  0%|          | 204/100000 [04:00<34:30:26,  1.24s/it]

3912 episode score is 299.71


  0%|          | 205/100000 [04:01<34:08:54,  1.23s/it]

3925 episode score is 315.02


  0%|          | 206/100000 [04:02<33:46:19,  1.22s/it]

3938 episode score is 309.90


  0%|          | 207/100000 [04:03<33:25:03,  1.21s/it]

3951 episode score is 307.04


  0%|          | 208/100000 [04:04<33:09:11,  1.20s/it]

3963 episode score is 331.57


  0%|          | 209/100000 [04:06<32:53:21,  1.19s/it]

3976 episode score is 310.84


  0%|          | 210/100000 [04:07<32:40:37,  1.18s/it]

3989 episode score is 307.70


  0%|          | 211/100000 [04:08<32:27:36,  1.17s/it]

4001 episode score is 325.74


  0%|          | 212/100000 [04:09<32:35:12,  1.18s/it]

4014 episode score is 316.22


  0%|          | 213/100000 [04:10<32:31:05,  1.17s/it]

4027 episode score is 309.40


  0%|          | 214/100000 [04:11<32:23:22,  1.17s/it]

4039 episode score is 344.71


  0%|          | 215/100000 [04:13<32:03:17,  1.16s/it]

4052 episode score is 316.88


  0%|          | 216/100000 [04:14<32:59:17,  1.19s/it]

4064 episode score is 342.77


  0%|          | 217/100000 [04:15<32:21:40,  1.17s/it]

4076 episode score is 323.42


  0%|          | 218/100000 [04:16<32:46:34,  1.18s/it]

4089 episode score is 331.42


  0%|          | 219/100000 [04:17<32:38:38,  1.18s/it]

4101 episode score is 339.51


  0%|          | 220/100000 [04:18<32:20:49,  1.17s/it]

4112 episode score is 372.17


  0%|          | 221/100000 [04:20<32:27:29,  1.17s/it]

4124 episode score is 345.07


  0%|          | 222/100000 [04:21<32:17:11,  1.16s/it]

4135 episode score is 364.52


  0%|          | 223/100000 [04:22<32:09:50,  1.16s/it]

4147 episode score is 336.61


  0%|          | 224/100000 [04:23<32:22:32,  1.17s/it]

4159 episode score is 332.29


  0%|          | 225/100000 [04:24<32:16:36,  1.16s/it]

4169 episode score is 403.20


  0%|          | 226/100000 [04:25<32:04:53,  1.16s/it]

4180 episode score is 369.27


  0%|          | 227/100000 [04:27<32:12:03,  1.16s/it]

4191 episode score is 358.18


  0%|          | 228/100000 [04:28<32:25:02,  1.17s/it]

4203 episode score is 352.37


  0%|          | 229/100000 [04:29<32:58:07,  1.19s/it]

4215 episode score is 363.73


  0%|          | 230/100000 [04:30<32:41:13,  1.18s/it]

4226 episode score is 355.21


  0%|          | 231/100000 [04:31<32:23:24,  1.17s/it]

4237 episode score is 365.37


  0%|          | 232/100000 [04:33<32:19:19,  1.17s/it]

4248 episode score is 377.16


  0%|          | 233/100000 [04:34<32:48:01,  1.18s/it]

4260 episode score is 366.57


  0%|          | 234/100000 [04:35<32:28:46,  1.17s/it]

4271 episode score is 366.58


  0%|          | 235/100000 [04:36<32:32:22,  1.17s/it]

4283 episode score is 347.49


  0%|          | 236/100000 [04:37<32:50:37,  1.19s/it]

4295 episode score is 364.18


  0%|          | 237/100000 [04:38<32:55:09,  1.19s/it]

4307 episode score is 365.55


  0%|          | 238/100000 [04:40<33:13:19,  1.20s/it]

4320 episode score is 341.61


  0%|          | 239/100000 [04:41<32:48:52,  1.18s/it]

4331 episode score is 369.09


  0%|          | 240/100000 [04:42<32:25:05,  1.17s/it]

4342 episode score is 379.53


  0%|          | 241/100000 [04:43<32:14:07,  1.16s/it]

4353 episode score is 378.70


  0%|          | 242/100000 [04:44<31:59:58,  1.15s/it]

4364 episode score is 382.24


  0%|          | 243/100000 [04:45<31:58:25,  1.15s/it]

4375 episode score is 389.41


  0%|          | 244/100000 [04:47<31:59:12,  1.15s/it]

4386 episode score is 379.44


  0%|          | 245/100000 [04:48<31:56:42,  1.15s/it]

4397 episode score is 372.80


  0%|          | 246/100000 [04:49<31:50:06,  1.15s/it]

4408 episode score is 381.22


  0%|          | 247/100000 [04:50<32:02:34,  1.16s/it]

4419 episode score is 383.44


  0%|          | 248/100000 [04:51<32:25:13,  1.17s/it]

4430 episode score is 389.32


  0%|          | 249/100000 [04:52<32:37:21,  1.18s/it]

4441 episode score is 385.81


  0%|          | 250/100000 [04:54<32:32:13,  1.17s/it]

4452 episode score is 383.76


  0%|          | 251/100000 [04:55<32:25:40,  1.17s/it]

4463 episode score is 374.67


  0%|          | 252/100000 [04:56<32:43:19,  1.18s/it]

4475 episode score is 357.25


  0%|          | 253/100000 [04:57<32:10:46,  1.16s/it]

4486 episode score is 369.80


  0%|          | 254/100000 [04:58<33:32:51,  1.21s/it]

4497 episode score is 383.60


  0%|          | 255/100000 [05:00<33:19:02,  1.20s/it]

4508 episode score is 382.14


  0%|          | 256/100000 [05:01<33:20:49,  1.20s/it]

4519 episode score is 398.73


  0%|          | 257/100000 [05:02<32:52:32,  1.19s/it]

4530 episode score is 384.75


  0%|          | 258/100000 [05:03<33:55:08,  1.22s/it]

4541 episode score is 410.34


  0%|          | 259/100000 [05:04<33:30:27,  1.21s/it]

4552 episode score is 372.13


  0%|          | 260/100000 [05:06<32:38:45,  1.18s/it]

4563 episode score is 369.62


  0%|          | 261/100000 [05:07<32:38:14,  1.18s/it]

4575 episode score is 361.45


  0%|          | 262/100000 [05:08<32:04:14,  1.16s/it]

4586 episode score is 386.19


  0%|          | 263/100000 [05:09<31:39:44,  1.14s/it]

4596 episode score is 415.78


  0%|          | 264/100000 [05:10<31:19:27,  1.13s/it]

4608 episode score is 345.38


  0%|          | 265/100000 [05:11<31:13:27,  1.13s/it]

4618 episode score is 354.48


  0%|          | 266/100000 [05:12<31:21:00,  1.13s/it]

4629 episode score is 391.36


  0%|          | 267/100000 [05:13<31:45:40,  1.15s/it]

4640 episode score is 400.84


  0%|          | 268/100000 [05:15<31:36:49,  1.14s/it]

4650 episode score is 416.60


  0%|          | 269/100000 [05:16<31:56:08,  1.15s/it]

4661 episode score is 387.12


  0%|          | 270/100000 [05:17<32:01:19,  1.16s/it]

4672 episode score is 385.93


  0%|          | 271/100000 [05:18<31:50:18,  1.15s/it]

4683 episode score is 398.07


  0%|          | 272/100000 [05:19<31:48:27,  1.15s/it]

4693 episode score is 353.79


  0%|          | 273/100000 [05:20<32:07:54,  1.16s/it]

4704 episode score is 412.69


  0%|          | 274/100000 [05:22<32:25:45,  1.17s/it]

4714 episode score is 409.64


  0%|          | 275/100000 [05:23<32:42:12,  1.18s/it]

4725 episode score is 407.16


  0%|          | 276/100000 [05:24<32:29:28,  1.17s/it]

4736 episode score is 394.09


  0%|          | 277/100000 [05:25<33:01:33,  1.19s/it]

4747 episode score is 413.77


  0%|          | 278/100000 [05:26<33:21:41,  1.20s/it]

4758 episode score is 411.64


  0%|          | 279/100000 [05:28<33:08:38,  1.20s/it]

4769 episode score is 393.98


  0%|          | 280/100000 [05:29<32:38:35,  1.18s/it]

4779 episode score is 366.59


  0%|          | 281/100000 [05:30<32:15:58,  1.16s/it]

4789 episode score is 435.78


  0%|          | 282/100000 [05:31<32:18:50,  1.17s/it]

4800 episode score is 374.82


  0%|          | 283/100000 [05:32<32:16:27,  1.17s/it]

4810 episode score is 430.34


  0%|          | 284/100000 [05:33<32:36:35,  1.18s/it]

4821 episode score is 367.01


  0%|          | 285/100000 [05:35<32:40:29,  1.18s/it]

4830 episode score is 419.13


  0%|          | 286/100000 [05:36<32:57:06,  1.19s/it]

4841 episode score is 401.89


  0%|          | 287/100000 [05:37<32:27:55,  1.17s/it]

4851 episode score is 372.56


  0%|          | 288/100000 [05:38<32:59:27,  1.19s/it]

4861 episode score is 377.83


  0%|          | 289/100000 [05:39<32:51:45,  1.19s/it]

4872 episode score is 399.19


  0%|          | 290/100000 [05:40<32:26:49,  1.17s/it]

4882 episode score is 407.00


  0%|          | 291/100000 [05:42<32:11:22,  1.16s/it]

4892 episode score is 428.59


  0%|          | 292/100000 [05:43<32:15:51,  1.16s/it]

4902 episode score is 446.30


  0%|          | 293/100000 [05:44<31:53:52,  1.15s/it]

4912 episode score is 382.01


  0%|          | 294/100000 [05:45<31:42:32,  1.14s/it]

4922 episode score is 417.61


  0%|          | 295/100000 [05:46<31:41:01,  1.14s/it]

4932 episode score is 444.15


  0%|          | 296/100000 [05:47<31:53:46,  1.15s/it]

4943 episode score is 401.98


  0%|          | 297/100000 [05:49<31:49:48,  1.15s/it]

4953 episode score is 431.84


  0%|          | 298/100000 [05:50<32:03:35,  1.16s/it]

4963 episode score is 450.39


  0%|          | 299/100000 [05:51<32:05:47,  1.16s/it]

4973 episode score is 436.70
4982 episode score is 426.54


  0%|          | 300/100000 [05:53<40:26:15,  1.46s/it]

Iteration 300: Average test reward: 457.75


  0%|          | 301/100000 [05:54<38:10:01,  1.38s/it]

4992 episode score is 410.49


  0%|          | 302/100000 [05:55<36:37:02,  1.32s/it]

5003 episode score is 408.46


  0%|          | 303/100000 [05:57<35:33:35,  1.28s/it]

5013 episode score is 392.82


  0%|          | 304/100000 [05:58<34:21:00,  1.24s/it]

5022 episode score is 437.18


  0%|          | 305/100000 [05:59<33:53:25,  1.22s/it]

5032 episode score is 452.66


  0%|          | 306/100000 [06:00<33:11:56,  1.20s/it]

5042 episode score is 436.42


  0%|          | 307/100000 [06:01<32:34:43,  1.18s/it]

5052 episode score is 436.37


  0%|          | 308/100000 [06:02<32:21:59,  1.17s/it]

5062 episode score is 388.59


  0%|          | 309/100000 [06:04<32:55:34,  1.19s/it]

5073 episode score is 398.42


  0%|          | 310/100000 [06:05<32:41:40,  1.18s/it]

5083 episode score is 448.03


  0%|          | 311/100000 [06:06<32:58:18,  1.19s/it]

5093 episode score is 457.22


  0%|          | 312/100000 [06:07<32:45:08,  1.18s/it]

5102 episode score is 480.33


  0%|          | 313/100000 [06:08<33:03:00,  1.19s/it]

5113 episode score is 441.07


  0%|          | 314/100000 [06:09<32:33:26,  1.18s/it]

5123 episode score is 440.56


  0%|          | 315/100000 [06:11<32:26:59,  1.17s/it]

5133 episode score is 428.88


  0%|          | 316/100000 [06:12<32:02:47,  1.16s/it]

5142 episode score is 473.01


  0%|          | 317/100000 [06:13<31:51:49,  1.15s/it]

5152 episode score is 432.71


  0%|          | 318/100000 [06:14<32:30:56,  1.17s/it]

5162 episode score is 463.84


  0%|          | 319/100000 [06:15<32:41:47,  1.18s/it]

5172 episode score is 459.26


  0%|          | 320/100000 [06:16<32:22:46,  1.17s/it]

5182 episode score is 446.15


  0%|          | 321/100000 [06:18<33:09:15,  1.20s/it]

5192 episode score is 457.44


  0%|          | 322/100000 [06:19<32:47:40,  1.18s/it]

5202 episode score is 432.40


  0%|          | 323/100000 [06:20<33:02:07,  1.19s/it]

5212 episode score is 447.66


  0%|          | 324/100000 [06:21<32:56:52,  1.19s/it]

5222 episode score is 475.94


  0%|          | 325/100000 [06:22<32:49:36,  1.19s/it]

5232 episode score is 465.09


  0%|          | 326/100000 [06:24<33:15:52,  1.20s/it]

5242 episode score is 471.50


  0%|          | 327/100000 [06:25<32:56:48,  1.19s/it]

5252 episode score is 448.39


  0%|          | 328/100000 [06:26<32:57:16,  1.19s/it]

5262 episode score is 458.24


  0%|          | 329/100000 [06:27<32:46:22,  1.18s/it]

5271 episode score is 541.85


  0%|          | 330/100000 [06:28<32:55:57,  1.19s/it]

5281 episode score is 464.95


  0%|          | 331/100000 [06:30<32:40:27,  1.18s/it]

5290 episode score is 485.44


  0%|          | 332/100000 [06:31<32:37:43,  1.18s/it]

5300 episode score is 456.50


  0%|          | 333/100000 [06:32<32:17:33,  1.17s/it]

5309 episode score is 501.73


  0%|          | 334/100000 [06:33<32:01:28,  1.16s/it]

5318 episode score is 516.31


  0%|          | 335/100000 [06:34<32:37:20,  1.18s/it]

5327 episode score is 480.26


  0%|          | 336/100000 [06:35<32:14:02,  1.16s/it]

5336 episode score is 489.22


  0%|          | 337/100000 [06:36<31:58:36,  1.16s/it]

5345 episode score is 507.43


  0%|          | 338/100000 [06:38<32:29:08,  1.17s/it]

5355 episode score is 470.34


  0%|          | 339/100000 [06:39<32:05:42,  1.16s/it]

5365 episode score is 419.29


  0%|          | 340/100000 [06:40<32:30:42,  1.17s/it]

5375 episode score is 475.24


  0%|          | 341/100000 [06:41<32:25:45,  1.17s/it]

5385 episode score is 410.31


  0%|          | 342/100000 [06:42<31:59:51,  1.16s/it]

5395 episode score is 445.16


  0%|          | 343/100000 [06:44<32:42:59,  1.18s/it]

5405 episode score is 497.53


  0%|          | 344/100000 [06:45<32:08:38,  1.16s/it]

5414 episode score is 505.88


  0%|          | 345/100000 [06:46<31:57:20,  1.15s/it]

5423 episode score is 538.36


  0%|          | 346/100000 [06:47<32:48:17,  1.19s/it]

5433 episode score is 497.37


  0%|          | 347/100000 [06:48<32:22:02,  1.17s/it]

5442 episode score is 516.75


  0%|          | 348/100000 [06:49<32:16:25,  1.17s/it]

5451 episode score is 518.74


  0%|          | 349/100000 [06:51<32:20:52,  1.17s/it]

5460 episode score is 549.74


  0%|          | 350/100000 [06:52<32:36:56,  1.18s/it]

5469 episode score is 545.95


  0%|          | 351/100000 [06:53<32:44:32,  1.18s/it]

5479 episode score is 507.71


  0%|          | 352/100000 [06:54<32:34:39,  1.18s/it]

5487 episode score is 565.32


  0%|          | 353/100000 [06:55<32:25:51,  1.17s/it]

5496 episode score is 509.22


  0%|          | 354/100000 [06:56<32:12:04,  1.16s/it]

5505 episode score is 512.97


  0%|          | 355/100000 [06:58<32:50:38,  1.19s/it]

5515 episode score is 511.06


  0%|          | 356/100000 [06:59<32:56:36,  1.19s/it]

5525 episode score is 489.52


  0%|          | 357/100000 [07:00<32:33:37,  1.18s/it]

5534 episode score is 516.89


  0%|          | 358/100000 [07:01<32:43:42,  1.18s/it]

5544 episode score is 497.46


  0%|          | 359/100000 [07:02<32:53:43,  1.19s/it]

5553 episode score is 570.10


  0%|          | 360/100000 [07:04<33:02:10,  1.19s/it]

5562 episode score is 527.88


  0%|          | 361/100000 [07:05<32:42:59,  1.18s/it]

5571 episode score is 518.48


  0%|          | 362/100000 [07:06<32:13:29,  1.16s/it]

5579 episode score is 600.08


  0%|          | 363/100000 [07:07<32:17:43,  1.17s/it]

5588 episode score is 564.04


  0%|          | 364/100000 [07:08<32:05:38,  1.16s/it]

5596 episode score is 586.02


  0%|          | 365/100000 [07:09<32:34:35,  1.18s/it]

5605 episode score is 537.69


  0%|          | 366/100000 [07:11<32:17:08,  1.17s/it]

5613 episode score is 554.46


  0%|          | 367/100000 [07:12<31:59:14,  1.16s/it]

5621 episode score is 590.71


  0%|          | 368/100000 [07:13<32:25:49,  1.17s/it]

5630 episode score is 587.73


  0%|          | 369/100000 [07:14<32:08:50,  1.16s/it]

5639 episode score is 551.93


  0%|          | 370/100000 [07:15<32:41:25,  1.18s/it]

5648 episode score is 553.90


  0%|          | 371/100000 [07:16<32:59:16,  1.19s/it]

5657 episode score is 565.96


  0%|          | 372/100000 [07:18<33:09:45,  1.20s/it]

5666 episode score is 524.24


  0%|          | 373/100000 [07:19<33:15:41,  1.20s/it]

5674 episode score is 655.69


  0%|          | 374/100000 [07:20<33:07:21,  1.20s/it]

5682 episode score is 613.36


  0%|          | 375/100000 [07:21<32:54:20,  1.19s/it]

5691 episode score is 538.98


  0%|          | 376/100000 [07:23<33:26:42,  1.21s/it]

5699 episode score is 677.31


  0%|          | 377/100000 [07:24<33:03:37,  1.19s/it]

5707 episode score is 628.22


  0%|          | 378/100000 [07:25<32:35:33,  1.18s/it]

5715 episode score is 608.63


  0%|          | 379/100000 [07:26<32:28:34,  1.17s/it]

5723 episode score is 626.50


  0%|          | 380/100000 [07:27<32:51:22,  1.19s/it]

5733 episode score is 554.86


  0%|          | 381/100000 [07:28<32:26:08,  1.17s/it]

5742 episode score is 548.15


  0%|          | 382/100000 [07:30<32:52:06,  1.19s/it]

5751 episode score is 614.13


  0%|          | 383/100000 [07:31<32:31:33,  1.18s/it]

5759 episode score is 659.65


  0%|          | 384/100000 [07:32<32:18:14,  1.17s/it]

5768 episode score is 540.70


  0%|          | 385/100000 [07:33<33:01:52,  1.19s/it]

5778 episode score is 529.89


  0%|          | 386/100000 [07:34<33:13:51,  1.20s/it]

5786 episode score is 682.43


  0%|          | 387/100000 [07:36<33:41:02,  1.22s/it]

5794 episode score is 667.52


  0%|          | 388/100000 [07:37<33:10:32,  1.20s/it]

5802 episode score is 662.82


  0%|          | 389/100000 [07:38<33:24:03,  1.21s/it]

5810 episode score is 664.24


  0%|          | 390/100000 [07:39<32:40:38,  1.18s/it]

5818 episode score is 635.10


  0%|          | 391/100000 [07:40<33:21:36,  1.21s/it]

5827 episode score is 599.96


  0%|          | 392/100000 [07:42<33:04:57,  1.20s/it]

5835 episode score is 669.20


  0%|          | 393/100000 [07:43<32:49:31,  1.19s/it]

5844 episode score is 581.65


  0%|          | 394/100000 [07:44<32:50:24,  1.19s/it]

5853 episode score is 537.89


  0%|          | 395/100000 [07:45<32:22:52,  1.17s/it]

5861 episode score is 637.93


  0%|          | 396/100000 [07:46<32:04:22,  1.16s/it]

5869 episode score is 618.65


  0%|          | 397/100000 [07:47<31:59:38,  1.16s/it]

5877 episode score is 568.46


  0%|          | 398/100000 [07:49<32:32:17,  1.18s/it]

5885 episode score is 637.44


  0%|          | 399/100000 [07:50<33:16:11,  1.20s/it]

5893 episode score is 649.15
5901 episode score is 627.47


  0%|          | 400/100000 [07:52<44:22:05,  1.60s/it]

Iteration 400: Average test reward: 689.58


  0%|          | 401/100000 [07:53<40:44:12,  1.47s/it]

5909 episode score is 607.90


  0%|          | 402/100000 [07:55<37:55:30,  1.37s/it]

5918 episode score is 555.43


  0%|          | 403/100000 [07:56<35:57:44,  1.30s/it]

5926 episode score is 561.43


  0%|          | 404/100000 [07:57<35:22:23,  1.28s/it]

5935 episode score is 610.36


  0%|          | 405/100000 [07:58<34:11:52,  1.24s/it]

5942 episode score is 694.41


  0%|          | 406/100000 [07:59<33:52:20,  1.22s/it]

5950 episode score is 694.86


  0%|          | 407/100000 [08:01<33:59:15,  1.23s/it]

5959 episode score is 641.05


  0%|          | 408/100000 [08:02<33:19:15,  1.20s/it]

5967 episode score is 659.30


  0%|          | 409/100000 [08:03<33:43:13,  1.22s/it]

5975 episode score is 681.59


  0%|          | 410/100000 [08:04<33:14:40,  1.20s/it]

5983 episode score is 681.96


  0%|          | 411/100000 [08:05<33:10:22,  1.20s/it]

5991 episode score is 654.45


  0%|          | 412/100000 [08:06<33:10:30,  1.20s/it]

6000 episode score is 595.32


  0%|          | 413/100000 [08:08<33:52:06,  1.22s/it]

6008 episode score is 697.01


  0%|          | 414/100000 [08:09<33:41:47,  1.22s/it]

6017 episode score is 626.33


  0%|          | 415/100000 [08:10<33:38:20,  1.22s/it]

6025 episode score is 681.80


  0%|          | 416/100000 [08:11<33:15:57,  1.20s/it]

6034 episode score is 592.09


  0%|          | 417/100000 [08:13<33:01:40,  1.19s/it]

6043 episode score is 512.18


  0%|          | 418/100000 [08:14<32:30:50,  1.18s/it]

6051 episode score is 557.66


  0%|          | 419/100000 [08:15<33:26:28,  1.21s/it]

6059 episode score is 734.40


  0%|          | 420/100000 [08:16<32:58:32,  1.19s/it]

6067 episode score is 652.81


  0%|          | 421/100000 [08:17<32:58:12,  1.19s/it]

6075 episode score is 650.04


  0%|          | 422/100000 [08:19<33:01:07,  1.19s/it]

6083 episode score is 701.87


  0%|          | 423/100000 [08:20<33:01:49,  1.19s/it]

6091 episode score is 693.29


  0%|          | 424/100000 [08:21<33:11:59,  1.20s/it]

6100 episode score is 623.09


  0%|          | 425/100000 [08:22<32:33:41,  1.18s/it]

6107 episode score is 736.85


  0%|          | 426/100000 [08:23<32:21:25,  1.17s/it]

6115 episode score is 667.07


  0%|          | 427/100000 [08:24<32:43:53,  1.18s/it]

6123 episode score is 693.18


  0%|          | 428/100000 [08:26<33:27:46,  1.21s/it]

6132 episode score is 642.75


  0%|          | 429/100000 [08:27<32:54:09,  1.19s/it]

6139 episode score is 739.87


  0%|          | 430/100000 [08:28<32:31:17,  1.18s/it]

6147 episode score is 647.28


  0%|          | 431/100000 [08:29<32:44:07,  1.18s/it]

6155 episode score is 652.68


  0%|          | 432/100000 [08:30<32:49:18,  1.19s/it]

6163 episode score is 679.10


  0%|          | 433/100000 [08:31<32:13:47,  1.17s/it]

6171 episode score is 678.03


  0%|          | 434/100000 [08:33<32:32:17,  1.18s/it]

6179 episode score is 716.11


  0%|          | 435/100000 [08:34<32:17:05,  1.17s/it]

6186 episode score is 752.08


  0%|          | 436/100000 [08:35<31:55:24,  1.15s/it]

6193 episode score is 727.54


  0%|          | 437/100000 [08:36<32:48:57,  1.19s/it]

6201 episode score is 698.44


  0%|          | 438/100000 [08:37<33:10:00,  1.20s/it]

6209 episode score is 733.29


  0%|          | 439/100000 [08:39<33:08:30,  1.20s/it]

6217 episode score is 714.37


  0%|          | 440/100000 [08:40<33:17:21,  1.20s/it]

6225 episode score is 684.67


  0%|          | 441/100000 [08:41<32:55:44,  1.19s/it]

6233 episode score is 529.43


  0%|          | 442/100000 [08:42<32:30:53,  1.18s/it]

6241 episode score is 651.72


  0%|          | 443/100000 [08:43<32:01:29,  1.16s/it]

6249 episode score is 649.21


  0%|          | 444/100000 [08:44<31:51:05,  1.15s/it]

6257 episode score is 672.60


  0%|          | 445/100000 [08:46<31:36:27,  1.14s/it]

6264 episode score is 738.34


  0%|          | 446/100000 [08:47<31:29:55,  1.14s/it]

6272 episode score is 656.01


  0%|          | 447/100000 [08:48<32:04:47,  1.16s/it]

6280 episode score is 731.97


  0%|          | 448/100000 [08:49<32:31:20,  1.18s/it]

6288 episode score is 703.55


  0%|          | 449/100000 [08:50<32:36:01,  1.18s/it]

6296 episode score is 696.77


  0%|          | 450/100000 [08:51<32:38:45,  1.18s/it]

6304 episode score is 697.09


  0%|          | 451/100000 [08:53<32:56:33,  1.19s/it]

6313 episode score is 647.80


  0%|          | 452/100000 [08:54<33:17:00,  1.20s/it]

6321 episode score is 689.78


  0%|          | 453/100000 [08:55<33:15:27,  1.20s/it]

6329 episode score is 687.39


  0%|          | 454/100000 [08:56<33:13:51,  1.20s/it]

6337 episode score is 597.78


  0%|          | 455/100000 [08:57<33:07:14,  1.20s/it]

6346 episode score is 555.52


  0%|          | 456/100000 [08:59<32:32:16,  1.18s/it]

6354 episode score is 671.10


  0%|          | 457/100000 [09:00<34:08:10,  1.23s/it]

6363 episode score is 654.34


  0%|          | 458/100000 [09:01<33:27:44,  1.21s/it]

6371 episode score is 673.13


  0%|          | 459/100000 [09:02<32:44:34,  1.18s/it]

6379 episode score is 680.65


  0%|          | 460/100000 [09:03<32:25:07,  1.17s/it]

6387 episode score is 677.35


  0%|          | 461/100000 [09:05<32:14:16,  1.17s/it]

6394 episode score is 747.62


  0%|          | 462/100000 [09:06<31:49:26,  1.15s/it]

6402 episode score is 654.12


  0%|          | 463/100000 [09:07<32:15:19,  1.17s/it]

6410 episode score is 712.56


  0%|          | 464/100000 [09:08<32:25:11,  1.17s/it]

6418 episode score is 687.92


  0%|          | 465/100000 [09:09<32:50:05,  1.19s/it]

6427 episode score is 665.70


  0%|          | 466/100000 [09:10<32:54:45,  1.19s/it]

6435 episode score is 699.11


  0%|          | 467/100000 [09:12<33:20:03,  1.21s/it]

6443 episode score is 721.73


  0%|          | 468/100000 [09:13<33:17:48,  1.20s/it]

6451 episode score is 711.53


  0%|          | 469/100000 [09:14<32:43:00,  1.18s/it]

6459 episode score is 666.79


  0%|          | 470/100000 [09:15<32:14:59,  1.17s/it]

6467 episode score is 673.77


  0%|          | 471/100000 [09:16<32:51:57,  1.19s/it]

6476 episode score is 675.91


  0%|          | 472/100000 [09:18<33:28:54,  1.21s/it]

6484 episode score is 731.78


  0%|          | 473/100000 [09:19<33:11:42,  1.20s/it]

6492 episode score is 666.71


  0%|          | 474/100000 [09:20<33:12:12,  1.20s/it]

6501 episode score is 637.37


  0%|          | 475/100000 [09:21<32:43:59,  1.18s/it]

6509 episode score is 691.21


  0%|          | 476/100000 [09:22<32:46:51,  1.19s/it]

6518 episode score is 626.39


  0%|          | 477/100000 [09:24<33:19:34,  1.21s/it]

6526 episode score is 707.98


  0%|          | 478/100000 [09:25<32:58:17,  1.19s/it]

6534 episode score is 679.98


  0%|          | 479/100000 [09:26<32:35:58,  1.18s/it]

6542 episode score is 700.36


  0%|          | 480/100000 [09:27<32:54:56,  1.19s/it]

6551 episode score is 674.61


  0%|          | 481/100000 [09:28<32:31:02,  1.18s/it]

6559 episode score is 684.90


  0%|          | 482/100000 [09:30<32:47:58,  1.19s/it]

6568 episode score is 632.49


  0%|          | 483/100000 [09:31<32:58:58,  1.19s/it]

6577 episode score is 626.52


  0%|          | 484/100000 [09:32<33:23:51,  1.21s/it]

6586 episode score is 656.73


  0%|          | 485/100000 [09:33<33:21:35,  1.21s/it]

6595 episode score is 565.51


  0%|          | 486/100000 [09:34<33:19:06,  1.21s/it]

6604 episode score is 633.99


  0%|          | 487/100000 [09:36<33:25:30,  1.21s/it]

6613 episode score is 650.84


  0%|          | 488/100000 [09:37<33:53:23,  1.23s/it]

6623 episode score is 557.86


  0%|          | 489/100000 [09:38<33:07:52,  1.20s/it]

6631 episode score is 656.56


  0%|          | 490/100000 [09:39<32:56:17,  1.19s/it]

6640 episode score is 659.23


  0%|          | 491/100000 [09:40<32:28:56,  1.18s/it]

6648 episode score is 669.34


  0%|          | 492/100000 [09:41<32:05:29,  1.16s/it]

6656 episode score is 667.35


  0%|          | 493/100000 [09:43<32:41:23,  1.18s/it]

6665 episode score is 669.60


  0%|          | 494/100000 [09:44<33:16:42,  1.20s/it]

6674 episode score is 671.53


  0%|          | 495/100000 [09:45<32:57:57,  1.19s/it]

6682 episode score is 710.74


  0%|          | 496/100000 [09:46<32:40:10,  1.18s/it]

6690 episode score is 687.79


  0%|          | 497/100000 [09:47<32:53:13,  1.19s/it]

6698 episode score is 705.29


  0%|          | 498/100000 [09:49<32:58:58,  1.19s/it]

6706 episode score is 745.36


  0%|          | 499/100000 [09:50<33:08:39,  1.20s/it]

6714 episode score is 748.05
6722 episode score is 762.43


  0%|          | 500/100000 [09:52<43:57:56,  1.59s/it]

Iteration 500: Average test reward: 703.75


  1%|          | 501/100000 [09:54<41:02:25,  1.48s/it]

6730 episode score is 732.59


  1%|          | 502/100000 [09:55<38:45:25,  1.40s/it]

6738 episode score is 743.70


  1%|          | 503/100000 [09:56<36:29:52,  1.32s/it]

6746 episode score is 680.32


  1%|          | 504/100000 [09:57<35:12:02,  1.27s/it]

6754 episode score is 681.32


  1%|          | 505/100000 [09:58<34:45:22,  1.26s/it]

6763 episode score is 602.17


  1%|          | 506/100000 [09:59<33:36:40,  1.22s/it]

6771 episode score is 653.07


  1%|          | 507/100000 [10:01<33:41:37,  1.22s/it]

6780 episode score is 608.25


  1%|          | 508/100000 [10:02<33:35:22,  1.22s/it]

6789 episode score is 660.41


  1%|          | 509/100000 [10:03<33:01:02,  1.19s/it]

6797 episode score is 698.62


  1%|          | 510/100000 [10:04<32:35:35,  1.18s/it]

6805 episode score is 701.68


  1%|          | 511/100000 [10:05<32:25:31,  1.17s/it]

6813 episode score is 702.75


  1%|          | 512/100000 [10:06<32:10:58,  1.16s/it]

6821 episode score is 710.38


  1%|          | 513/100000 [10:08<32:43:08,  1.18s/it]

6830 episode score is 654.41


  1%|          | 514/100000 [10:09<32:22:36,  1.17s/it]

6838 episode score is 691.96


  1%|          | 515/100000 [10:10<32:29:28,  1.18s/it]

6847 episode score is 627.94


  1%|          | 516/100000 [10:11<32:26:22,  1.17s/it]

6856 episode score is 627.44


  1%|          | 517/100000 [10:12<32:01:12,  1.16s/it]

6864 episode score is 706.57


  1%|          | 518/100000 [10:14<32:12:27,  1.17s/it]

6873 episode score is 645.79


  1%|          | 519/100000 [10:15<31:53:12,  1.15s/it]

6881 episode score is 680.44


  1%|          | 520/100000 [10:16<32:43:15,  1.18s/it]

6890 episode score is 654.28


  1%|          | 521/100000 [10:17<33:39:57,  1.22s/it]

6899 episode score is 648.92


  1%|          | 522/100000 [10:18<33:48:32,  1.22s/it]

6908 episode score is 662.93


  1%|          | 523/100000 [10:20<33:13:16,  1.20s/it]

6916 episode score is 707.69


  1%|          | 524/100000 [10:21<32:44:28,  1.18s/it]

6924 episode score is 699.48


  1%|          | 525/100000 [10:22<33:01:23,  1.20s/it]

6933 episode score is 686.26


  1%|          | 526/100000 [10:23<33:11:32,  1.20s/it]

6942 episode score is 670.65


  1%|          | 527/100000 [10:24<33:24:12,  1.21s/it]

6951 episode score is 636.45


  1%|          | 528/100000 [10:26<33:34:57,  1.22s/it]

6959 episode score is 722.39


  1%|          | 529/100000 [10:27<33:13:35,  1.20s/it]

6967 episode score is 707.03


  1%|          | 530/100000 [10:28<32:58:51,  1.19s/it]

6976 episode score is 630.77


  1%|          | 531/100000 [10:29<32:36:56,  1.18s/it]

6985 episode score is 625.86


  1%|          | 532/100000 [10:30<33:10:49,  1.20s/it]

6994 episode score is 684.56


  1%|          | 533/100000 [10:32<33:00:03,  1.19s/it]

7003 episode score is 636.23


  1%|          | 534/100000 [10:33<33:13:03,  1.20s/it]

7012 episode score is 597.73


  1%|          | 535/100000 [10:34<33:04:29,  1.20s/it]

7021 episode score is 632.85


  1%|          | 536/100000 [10:35<32:28:32,  1.18s/it]

7029 episode score is 670.09


  1%|          | 537/100000 [10:36<32:50:21,  1.19s/it]

7038 episode score is 655.70


  1%|          | 538/100000 [10:37<32:20:27,  1.17s/it]

7047 episode score is 607.12


  1%|          | 539/100000 [10:39<32:24:44,  1.17s/it]

7055 episode score is 693.06


  1%|          | 540/100000 [10:40<32:44:41,  1.19s/it]

7064 episode score is 650.24


  1%|          | 541/100000 [10:41<32:12:58,  1.17s/it]

7072 episode score is 674.47


  1%|          | 542/100000 [10:42<32:26:08,  1.17s/it]

7080 episode score is 738.30


  1%|          | 543/100000 [10:43<32:27:39,  1.17s/it]

7089 episode score is 653.54


  1%|          | 544/100000 [10:45<33:00:43,  1.19s/it]

7098 episode score is 663.27


  1%|          | 545/100000 [10:46<33:10:52,  1.20s/it]

7107 episode score is 593.28


  1%|          | 546/100000 [10:47<33:18:18,  1.21s/it]

7116 episode score is 682.50


  1%|          | 547/100000 [10:48<33:35:24,  1.22s/it]

7125 episode score is 677.81


  1%|          | 548/100000 [10:49<32:57:50,  1.19s/it]

7133 episode score is 684.54


  1%|          | 549/100000 [10:50<32:27:09,  1.17s/it]

7141 episode score is 663.23


  1%|          | 550/100000 [10:52<32:01:33,  1.16s/it]

7149 episode score is 687.97


  1%|          | 551/100000 [10:53<32:24:49,  1.17s/it]

7158 episode score is 665.34


  1%|          | 552/100000 [10:54<32:34:15,  1.18s/it]

7166 episode score is 743.61


  1%|          | 553/100000 [10:55<32:20:17,  1.17s/it]

7174 episode score is 698.02


  1%|          | 554/100000 [10:56<31:54:52,  1.16s/it]

7182 episode score is 680.86


  1%|          | 555/100000 [10:57<32:15:13,  1.17s/it]

7191 episode score is 681.94


  1%|          | 556/100000 [10:59<32:57:38,  1.19s/it]

7200 episode score is 693.83


  1%|          | 557/100000 [11:00<32:36:33,  1.18s/it]

7208 episode score is 729.34


  1%|          | 558/100000 [11:01<32:16:50,  1.17s/it]

7216 episode score is 695.42


  1%|          | 559/100000 [11:02<32:20:59,  1.17s/it]

7224 episode score is 740.51


  1%|          | 560/100000 [11:03<32:34:50,  1.18s/it]

7232 episode score is 768.55


  1%|          | 561/100000 [11:05<32:51:20,  1.19s/it]

7240 episode score is 749.62


  1%|          | 562/100000 [11:06<32:48:22,  1.19s/it]

7248 episode score is 773.55


  1%|          | 563/100000 [11:07<32:26:02,  1.17s/it]

7256 episode score is 740.00


  1%|          | 564/100000 [11:08<32:25:43,  1.17s/it]

7264 episode score is 756.35


  1%|          | 565/100000 [11:09<31:58:13,  1.16s/it]

7272 episode score is 711.09


  1%|          | 566/100000 [11:10<32:15:59,  1.17s/it]

7280 episode score is 721.52


  1%|          | 567/100000 [11:12<32:30:27,  1.18s/it]

7288 episode score is 761.86


  1%|          | 568/100000 [11:13<32:31:30,  1.18s/it]

7296 episode score is 762.83


  1%|          | 569/100000 [11:14<32:41:15,  1.18s/it]

7304 episode score is 758.06


  1%|          | 570/100000 [11:15<32:38:55,  1.18s/it]

7312 episode score is 740.68


  1%|          | 571/100000 [11:16<32:27:02,  1.17s/it]

7320 episode score is 738.58


  1%|          | 572/100000 [11:18<33:13:08,  1.20s/it]

7329 episode score is 696.72


  1%|          | 573/100000 [11:19<32:46:45,  1.19s/it]

7337 episode score is 731.58


  1%|          | 574/100000 [11:20<32:09:54,  1.16s/it]

7345 episode score is 712.33


  1%|          | 575/100000 [11:21<31:59:35,  1.16s/it]

7353 episode score is 752.61


  1%|          | 576/100000 [11:22<32:29:38,  1.18s/it]

7361 episode score is 730.65


  1%|          | 577/100000 [11:23<31:52:04,  1.15s/it]

7369 episode score is 704.89


  1%|          | 578/100000 [11:25<32:45:32,  1.19s/it]

7378 episode score is 714.71


  1%|          | 579/100000 [11:26<32:07:45,  1.16s/it]

7386 episode score is 711.72


  1%|          | 580/100000 [11:27<32:22:40,  1.17s/it]

7395 episode score is 659.20


  1%|          | 581/100000 [11:28<33:24:43,  1.21s/it]

7404 episode score is 675.69


  1%|          | 582/100000 [11:29<32:51:35,  1.19s/it]

7412 episode score is 717.67


  1%|          | 583/100000 [11:31<32:49:49,  1.19s/it]

7421 episode score is 667.14


  1%|          | 584/100000 [11:32<32:56:43,  1.19s/it]

7430 episode score is 626.90


  1%|          | 585/100000 [11:33<32:46:37,  1.19s/it]

7438 episode score is 696.89


  1%|          | 586/100000 [11:34<33:07:57,  1.20s/it]

7447 episode score is 649.94


  1%|          | 587/100000 [11:35<33:09:07,  1.20s/it]

7456 episode score is 691.84


  1%|          | 588/100000 [11:37<33:03:06,  1.20s/it]

7465 episode score is 676.51


  1%|          | 589/100000 [11:38<33:02:12,  1.20s/it]

7474 episode score is 662.63


  1%|          | 590/100000 [11:39<33:15:08,  1.20s/it]

7483 episode score is 703.04


  1%|          | 591/100000 [11:40<32:39:55,  1.18s/it]

7491 episode score is 694.85


  1%|          | 592/100000 [11:41<32:06:48,  1.16s/it]

7499 episode score is 675.68


  1%|          | 593/100000 [11:42<32:02:16,  1.16s/it]

7507 episode score is 711.79


  1%|          | 594/100000 [11:44<32:08:12,  1.16s/it]

7515 episode score is 724.23


  1%|          | 595/100000 [11:45<31:48:39,  1.15s/it]

7523 episode score is 715.85


  1%|          | 596/100000 [11:46<31:55:20,  1.16s/it]

7531 episode score is 720.77


  1%|          | 597/100000 [11:47<32:36:49,  1.18s/it]

7539 episode score is 761.82


  1%|          | 598/100000 [11:48<32:20:21,  1.17s/it]

7547 episode score is 731.26


  1%|          | 599/100000 [11:49<32:30:29,  1.18s/it]

7555 episode score is 708.92
7563 episode score is 718.94


  1%|          | 600/100000 [11:52<42:57:55,  1.56s/it]

Iteration 600: Average test reward: 736.99


  1%|          | 601/100000 [11:53<40:20:03,  1.46s/it]

7572 episode score is 702.86


  1%|          | 602/100000 [11:54<38:29:14,  1.39s/it]

7581 episode score is 713.09


  1%|          | 603/100000 [11:55<36:13:15,  1.31s/it]

7589 episode score is 714.77


  1%|          | 604/100000 [11:57<35:20:39,  1.28s/it]

7597 episode score is 660.04


  1%|          | 605/100000 [11:58<34:27:49,  1.25s/it]

7605 episode score is 725.87


  1%|          | 606/100000 [11:59<34:05:46,  1.23s/it]

7613 episode score is 747.53


  1%|          | 607/100000 [12:00<33:47:45,  1.22s/it]

7622 episode score is 701.96


  1%|          | 608/100000 [12:01<33:51:17,  1.23s/it]

7631 episode score is 717.29


  1%|          | 609/100000 [12:03<33:19:20,  1.21s/it]

7639 episode score is 727.46


  1%|          | 610/100000 [12:04<33:03:45,  1.20s/it]

7647 episode score is 770.22


  1%|          | 611/100000 [12:05<33:27:12,  1.21s/it]

7656 episode score is 698.57


  1%|          | 612/100000 [12:06<32:46:55,  1.19s/it]

7664 episode score is 714.63


  1%|          | 613/100000 [12:07<32:19:38,  1.17s/it]

7672 episode score is 716.77


  1%|          | 614/100000 [12:09<32:46:04,  1.19s/it]

7680 episode score is 717.71


  1%|          | 615/100000 [12:10<32:39:45,  1.18s/it]

7688 episode score is 722.52


  1%|          | 616/100000 [12:11<32:16:59,  1.17s/it]

7697 episode score is 647.38


  1%|          | 617/100000 [12:12<32:00:10,  1.16s/it]

7705 episode score is 665.67


  1%|          | 618/100000 [12:13<31:51:36,  1.15s/it]

7713 episode score is 723.22


  1%|          | 619/100000 [12:14<31:59:14,  1.16s/it]

7721 episode score is 749.39


  1%|          | 620/100000 [12:15<32:14:23,  1.17s/it]

7730 episode score is 683.87


  1%|          | 621/100000 [12:17<32:15:12,  1.17s/it]

7739 episode score is 640.06


  1%|          | 622/100000 [12:18<32:22:35,  1.17s/it]

7748 episode score is 633.57


  1%|          | 623/100000 [12:19<32:26:30,  1.18s/it]

7756 episode score is 773.10


  1%|          | 624/100000 [12:20<32:41:11,  1.18s/it]

7765 episode score is 685.81


  1%|          | 625/100000 [12:21<33:23:09,  1.21s/it]

7774 episode score is 686.78


  1%|          | 626/100000 [12:23<32:55:26,  1.19s/it]

7782 episode score is 712.67


  1%|          | 627/100000 [12:24<33:00:50,  1.20s/it]

7790 episode score is 740.95


  1%|          | 628/100000 [12:25<33:06:51,  1.20s/it]

7799 episode score is 680.60


  1%|          | 629/100000 [12:26<33:15:45,  1.21s/it]

7808 episode score is 685.99


  1%|          | 630/100000 [12:28<33:41:46,  1.22s/it]

7817 episode score is 670.45


  1%|          | 631/100000 [12:29<33:39:46,  1.22s/it]

7825 episode score is 747.76


  1%|          | 632/100000 [12:30<32:47:58,  1.19s/it]

7833 episode score is 621.47


  1%|          | 633/100000 [12:31<32:29:24,  1.18s/it]

7841 episode score is 739.26


  1%|          | 634/100000 [12:32<32:31:27,  1.18s/it]

7849 episode score is 741.72


  1%|          | 635/100000 [12:33<32:28:46,  1.18s/it]

7857 episode score is 754.44


  1%|          | 636/100000 [12:35<33:04:48,  1.20s/it]

7866 episode score is 678.27


  1%|          | 637/100000 [12:36<33:09:46,  1.20s/it]

7874 episode score is 758.91


  1%|          | 638/100000 [12:37<33:25:27,  1.21s/it]

7882 episode score is 735.75


  1%|          | 639/100000 [12:38<33:49:58,  1.23s/it]

7891 episode score is 662.29


  1%|          | 640/100000 [12:39<33:22:33,  1.21s/it]

7899 episode score is 742.47


  1%|          | 641/100000 [12:41<33:41:42,  1.22s/it]

7907 episode score is 797.76


  1%|          | 642/100000 [12:42<33:14:22,  1.20s/it]

7915 episode score is 741.51


  1%|          | 643/100000 [12:43<33:33:05,  1.22s/it]

7923 episode score is 788.26


  1%|          | 644/100000 [12:44<33:12:48,  1.20s/it]

7931 episode score is 784.37


  1%|          | 645/100000 [12:45<32:43:39,  1.19s/it]

7939 episode score is 741.82


  1%|          | 646/100000 [12:47<34:32:00,  1.25s/it]

7948 episode score is 737.61


  1%|          | 647/100000 [12:48<34:02:46,  1.23s/it]

7956 episode score is 760.68


  1%|          | 648/100000 [12:49<33:50:34,  1.23s/it]

7964 episode score is 772.29


  1%|          | 649/100000 [12:50<33:45:07,  1.22s/it]

7973 episode score is 668.82


  1%|          | 650/100000 [12:52<33:18:41,  1.21s/it]

7981 episode score is 761.13


  1%|          | 651/100000 [12:53<33:42:42,  1.22s/it]

7990 episode score is 706.02


  1%|          | 652/100000 [12:54<32:48:55,  1.19s/it]

7998 episode score is 665.77


  1%|          | 653/100000 [12:55<32:18:44,  1.17s/it]

8006 episode score is 712.56


  1%|          | 654/100000 [12:56<32:23:17,  1.17s/it]

8014 episode score is 753.61


  1%|          | 655/100000 [12:57<32:22:05,  1.17s/it]

8023 episode score is 655.39


  1%|          | 656/100000 [12:59<32:38:16,  1.18s/it]

8032 episode score is 690.73


  1%|          | 657/100000 [13:00<32:47:57,  1.19s/it]

8040 episode score is 779.82


  1%|          | 658/100000 [13:01<32:09:00,  1.17s/it]

8048 episode score is 714.07


  1%|          | 659/100000 [13:02<31:53:25,  1.16s/it]

8056 episode score is 705.28


  1%|          | 660/100000 [13:03<31:45:11,  1.15s/it]

8064 episode score is 724.17


  1%|          | 661/100000 [13:04<31:56:11,  1.16s/it]

8072 episode score is 742.97


  1%|          | 662/100000 [13:06<32:18:01,  1.17s/it]

8081 episode score is 710.71


  1%|          | 663/100000 [13:07<32:03:30,  1.16s/it]

8089 episode score is 728.31


  1%|          | 664/100000 [13:08<32:05:32,  1.16s/it]

8097 episode score is 732.79


  1%|          | 665/100000 [13:09<32:32:35,  1.18s/it]

8106 episode score is 680.26


  1%|          | 666/100000 [13:10<32:35:55,  1.18s/it]

8114 episode score is 736.21


  1%|          | 667/100000 [13:11<32:04:28,  1.16s/it]

8122 episode score is 707.56


  1%|          | 668/100000 [13:13<32:18:04,  1.17s/it]

8130 episode score is 750.96


  1%|          | 669/100000 [13:14<32:37:26,  1.18s/it]

8138 episode score is 744.10


  1%|          | 670/100000 [13:15<32:41:56,  1.19s/it]

8147 episode score is 672.23


  1%|          | 671/100000 [13:16<32:50:07,  1.19s/it]

8156 episode score is 665.98


  1%|          | 672/100000 [13:17<33:00:26,  1.20s/it]

8164 episode score is 747.53


  1%|          | 673/100000 [13:19<32:26:26,  1.18s/it]

8172 episode score is 734.36


  1%|          | 674/100000 [13:20<32:26:56,  1.18s/it]

8180 episode score is 747.51


  1%|          | 675/100000 [13:21<32:39:38,  1.18s/it]

8188 episode score is 772.39


  1%|          | 676/100000 [13:22<33:03:03,  1.20s/it]

8197 episode score is 695.62


  1%|          | 677/100000 [13:23<33:07:52,  1.20s/it]

8205 episode score is 791.85


  1%|          | 678/100000 [13:25<33:34:08,  1.22s/it]

8214 episode score is 734.01


  1%|          | 679/100000 [13:26<33:16:40,  1.21s/it]

8222 episode score is 736.90


  1%|          | 680/100000 [13:27<32:57:50,  1.19s/it]

8230 episode score is 735.61


  1%|          | 681/100000 [13:28<32:25:04,  1.18s/it]

8238 episode score is 731.36


  1%|          | 682/100000 [13:29<32:56:55,  1.19s/it]

8246 episode score is 688.11


  1%|          | 683/100000 [13:31<33:22:25,  1.21s/it]

8255 episode score is 723.58


  1%|          | 684/100000 [13:32<32:56:50,  1.19s/it]

8263 episode score is 759.64


  1%|          | 685/100000 [13:33<32:58:48,  1.20s/it]

8271 episode score is 785.82


  1%|          | 686/100000 [13:34<33:19:24,  1.21s/it]

8280 episode score is 706.84


  1%|          | 687/100000 [13:35<33:00:56,  1.20s/it]

8288 episode score is 744.33


  1%|          | 688/100000 [13:37<33:04:02,  1.20s/it]

8296 episode score is 759.05


  1%|          | 689/100000 [13:38<32:34:42,  1.18s/it]

8304 episode score is 746.34


  1%|          | 690/100000 [13:39<32:28:22,  1.18s/it]

8312 episode score is 754.40


  1%|          | 691/100000 [13:40<32:59:22,  1.20s/it]

8321 episode score is 693.19


  1%|          | 692/100000 [13:41<32:35:51,  1.18s/it]

8329 episode score is 749.04


  1%|          | 693/100000 [13:42<32:13:45,  1.17s/it]

8337 episode score is 736.02


  1%|          | 694/100000 [13:44<33:02:15,  1.20s/it]

8345 episode score is 742.98


  1%|          | 695/100000 [13:45<33:00:09,  1.20s/it]

8354 episode score is 675.68


  1%|          | 696/100000 [13:46<32:58:30,  1.20s/it]

8362 episode score is 777.13


  1%|          | 697/100000 [13:47<32:42:11,  1.19s/it]

8370 episode score is 736.77


  1%|          | 698/100000 [13:48<33:08:09,  1.20s/it]

8379 episode score is 702.35


  1%|          | 699/100000 [13:50<32:29:30,  1.18s/it]

8387 episode score is 693.43
8396 episode score is 709.56


  1%|          | 700/100000 [13:52<42:40:19,  1.55s/it]

Iteration 700: Average test reward: 691.90


  1%|          | 701/100000 [13:53<39:12:49,  1.42s/it]

8404 episode score is 756.87


  1%|          | 702/100000 [13:54<36:56:09,  1.34s/it]

8412 episode score is 729.45


  1%|          | 703/100000 [13:56<36:06:04,  1.31s/it]

8421 episode score is 714.81


  1%|          | 704/100000 [13:57<35:26:32,  1.28s/it]

8430 episode score is 718.41


  1%|          | 705/100000 [13:58<35:13:49,  1.28s/it]

8439 episode score is 739.05


  1%|          | 706/100000 [13:59<34:43:29,  1.26s/it]

8448 episode score is 716.52


  1%|          | 707/100000 [14:00<34:26:43,  1.25s/it]

8457 episode score is 721.80


  1%|          | 708/100000 [14:02<34:27:52,  1.25s/it]

8465 episode score is 778.19


  1%|          | 709/100000 [14:03<34:35:19,  1.25s/it]

8474 episode score is 711.58


  1%|          | 710/100000 [14:04<33:30:03,  1.21s/it]

8482 episode score is 722.47


  1%|          | 711/100000 [14:05<32:40:13,  1.18s/it]

8490 episode score is 725.71


  1%|          | 712/100000 [14:06<32:06:45,  1.16s/it]

8498 episode score is 732.55


  1%|          | 713/100000 [14:08<32:50:57,  1.19s/it]

8507 episode score is 702.38


  1%|          | 714/100000 [14:09<32:37:01,  1.18s/it]

8516 episode score is 668.08


  1%|          | 715/100000 [14:10<32:42:04,  1.19s/it]

8525 episode score is 695.61


  1%|          | 716/100000 [14:11<32:21:57,  1.17s/it]

8533 episode score is 732.82


  1%|          | 717/100000 [14:12<31:57:35,  1.16s/it]

8541 episode score is 736.43


  1%|          | 718/100000 [14:13<32:28:39,  1.18s/it]

8550 episode score is 698.19


  1%|          | 719/100000 [14:15<31:57:47,  1.16s/it]

8558 episode score is 699.16


  1%|          | 720/100000 [14:16<32:42:24,  1.19s/it]

8567 episode score is 714.21


  1%|          | 721/100000 [14:17<32:32:36,  1.18s/it]

8576 episode score is 653.02


  1%|          | 722/100000 [14:18<32:46:08,  1.19s/it]

8584 episode score is 718.10


  1%|          | 723/100000 [14:19<32:39:41,  1.18s/it]

8593 episode score is 659.56


  1%|          | 724/100000 [14:21<32:22:30,  1.17s/it]

8602 episode score is 634.44


  1%|          | 725/100000 [14:22<32:48:09,  1.19s/it]

8611 episode score is 675.79


  1%|          | 726/100000 [14:23<32:55:49,  1.19s/it]

8620 episode score is 672.90


  1%|          | 727/100000 [14:24<32:20:26,  1.17s/it]

8628 episode score is 743.62


  1%|          | 728/100000 [14:25<32:17:13,  1.17s/it]

8637 episode score is 686.56


  1%|          | 729/100000 [14:26<32:46:20,  1.19s/it]

8646 episode score is 692.36


  1%|          | 730/100000 [14:28<33:12:43,  1.20s/it]

8655 episode score is 709.16


  1%|          | 731/100000 [14:29<32:32:20,  1.18s/it]

8663 episode score is 714.45


  1%|          | 732/100000 [14:30<32:22:25,  1.17s/it]

8671 episode score is 749.76


  1%|          | 733/100000 [14:31<32:29:33,  1.18s/it]

8680 episode score is 593.34


  1%|          | 734/100000 [14:32<32:30:35,  1.18s/it]

8689 episode score is 676.07


  1%|          | 735/100000 [14:34<32:26:39,  1.18s/it]

8697 episode score is 763.61


  1%|          | 736/100000 [14:35<32:40:27,  1.18s/it]

8705 episode score is 740.33


  1%|          | 737/100000 [14:36<33:04:24,  1.20s/it]

8714 episode score is 705.41


  1%|          | 738/100000 [14:37<33:10:40,  1.20s/it]

8723 episode score is 673.92


  1%|          | 739/100000 [14:38<32:44:30,  1.19s/it]

8731 episode score is 743.05


  1%|          | 740/100000 [14:39<32:39:36,  1.18s/it]

8740 episode score is 665.77


  1%|          | 741/100000 [14:41<32:18:32,  1.17s/it]

8748 episode score is 743.27


  1%|          | 742/100000 [14:42<32:02:43,  1.16s/it]

8756 episode score is 727.33


  1%|          | 743/100000 [14:43<31:54:56,  1.16s/it]

8764 episode score is 715.62


  1%|          | 744/100000 [14:44<32:32:36,  1.18s/it]

8773 episode score is 674.43


  1%|          | 745/100000 [14:45<32:12:31,  1.17s/it]

8781 episode score is 784.31


  1%|          | 746/100000 [14:47<32:39:13,  1.18s/it]

8790 episode score is 675.74


  1%|          | 747/100000 [14:48<33:18:43,  1.21s/it]

8799 episode score is 742.60


  1%|          | 748/100000 [14:49<33:10:23,  1.20s/it]

8808 episode score is 679.01


  1%|          | 749/100000 [14:50<32:50:36,  1.19s/it]

8816 episode score is 795.88


  1%|          | 750/100000 [14:51<32:33:00,  1.18s/it]

8824 episode score is 777.65


  1%|          | 751/100000 [14:52<32:05:32,  1.16s/it]

8832 episode score is 762.13


  1%|          | 752/100000 [14:54<31:55:17,  1.16s/it]

8840 episode score is 756.37


  1%|          | 753/100000 [14:55<31:33:30,  1.14s/it]

8849 episode score is 652.08


  1%|          | 754/100000 [14:56<33:10:03,  1.20s/it]

8857 episode score is 799.72


  1%|          | 755/100000 [14:57<32:31:40,  1.18s/it]

8865 episode score is 743.95


  1%|          | 756/100000 [14:58<32:28:05,  1.18s/it]

8874 episode score is 645.19


  1%|          | 757/100000 [15:00<32:49:49,  1.19s/it]

8883 episode score is 687.31


  1%|          | 758/100000 [15:01<32:18:11,  1.17s/it]

8892 episode score is 626.84


  1%|          | 759/100000 [15:02<32:47:18,  1.19s/it]

8901 episode score is 704.56


  1%|          | 760/100000 [15:03<32:54:03,  1.19s/it]

8910 episode score is 714.16


  1%|          | 761/100000 [15:04<32:16:32,  1.17s/it]

8918 episode score is 702.66


  1%|          | 762/100000 [15:05<32:49:51,  1.19s/it]

8927 episode score is 727.79


  1%|          | 763/100000 [15:07<32:51:13,  1.19s/it]

8935 episode score is 778.83


  1%|          | 764/100000 [15:08<32:24:01,  1.18s/it]

8943 episode score is 770.21


  1%|          | 765/100000 [15:09<32:08:21,  1.17s/it]

8951 episode score is 762.32


  1%|          | 766/100000 [15:10<32:36:15,  1.18s/it]

8960 episode score is 747.57


  1%|          | 767/100000 [15:11<32:18:18,  1.17s/it]

8968 episode score is 759.32


  1%|          | 768/100000 [15:13<32:49:32,  1.19s/it]

8976 episode score is 802.23


  1%|          | 769/100000 [15:14<33:25:03,  1.21s/it]

8985 episode score is 763.67


  1%|          | 770/100000 [15:15<33:41:14,  1.22s/it]

8993 episode score is 814.63


  1%|          | 771/100000 [15:16<33:22:39,  1.21s/it]

9001 episode score is 811.57


  1%|          | 772/100000 [15:17<32:54:53,  1.19s/it]

9009 episode score is 753.96


  1%|          | 773/100000 [15:19<32:59:28,  1.20s/it]

9017 episode score is 815.22


  1%|          | 774/100000 [15:20<33:06:39,  1.20s/it]

9025 episode score is 804.35


  1%|          | 775/100000 [15:21<33:17:49,  1.21s/it]

9033 episode score is 824.94


  1%|          | 776/100000 [15:22<33:11:22,  1.20s/it]

9041 episode score is 799.83


  1%|          | 777/100000 [15:23<32:35:17,  1.18s/it]

9049 episode score is 771.24


  1%|          | 778/100000 [15:25<33:00:04,  1.20s/it]

9057 episode score is 805.96


  1%|          | 779/100000 [15:26<32:17:59,  1.17s/it]

9065 episode score is 710.41


  1%|          | 780/100000 [15:27<32:18:28,  1.17s/it]

9073 episode score is 780.42


  1%|          | 781/100000 [15:28<32:27:18,  1.18s/it]

9081 episode score is 773.59


  1%|          | 782/100000 [15:29<32:42:55,  1.19s/it]

9089 episode score is 784.25


  1%|          | 783/100000 [15:31<33:20:35,  1.21s/it]

9098 episode score is 746.90


  1%|          | 784/100000 [15:32<32:53:48,  1.19s/it]

9106 episode score is 732.68


  1%|          | 785/100000 [15:33<32:44:15,  1.19s/it]

9114 episode score is 752.50


  1%|          | 786/100000 [15:34<32:49:18,  1.19s/it]

9122 episode score is 795.21


  1%|          | 787/100000 [15:35<32:21:44,  1.17s/it]

9130 episode score is 731.49


  1%|          | 788/100000 [15:36<32:43:27,  1.19s/it]

9138 episode score is 806.98


  1%|          | 789/100000 [15:38<32:32:05,  1.18s/it]

9146 episode score is 779.61


  1%|          | 790/100000 [15:39<32:25:00,  1.18s/it]

9154 episode score is 773.83


  1%|          | 791/100000 [15:40<32:21:14,  1.17s/it]

9162 episode score is 774.65


  1%|          | 792/100000 [15:41<31:54:31,  1.16s/it]

9170 episode score is 738.18


  1%|          | 793/100000 [15:42<32:10:24,  1.17s/it]

9178 episode score is 784.82


  1%|          | 794/100000 [15:43<32:18:52,  1.17s/it]

9186 episode score is 794.91


  1%|          | 795/100000 [15:45<32:18:00,  1.17s/it]

9195 episode score is 694.62


  1%|          | 796/100000 [15:46<32:20:54,  1.17s/it]

9203 episode score is 732.41


  1%|          | 797/100000 [15:47<32:03:58,  1.16s/it]

9211 episode score is 762.60


  1%|          | 798/100000 [15:48<31:50:22,  1.16s/it]

9219 episode score is 739.04


  1%|          | 799/100000 [15:49<32:15:47,  1.17s/it]

9228 episode score is 697.15
9236 episode score is 758.78


  1%|          | 800/100000 [15:52<42:06:38,  1.53s/it]

Iteration 800: Average test reward: 753.80


  1%|          | 801/100000 [15:53<38:48:18,  1.41s/it]

9244 episode score is 733.56


  1%|          | 802/100000 [15:54<37:18:09,  1.35s/it]

9253 episode score is 737.07


  1%|          | 803/100000 [15:55<36:13:22,  1.31s/it]

9262 episode score is 737.14


  1%|          | 804/100000 [15:56<35:07:33,  1.27s/it]

9270 episode score is 776.60


  1%|          | 805/100000 [15:58<35:31:47,  1.29s/it]

9279 episode score is 716.77


  1%|          | 806/100000 [15:59<34:13:13,  1.24s/it]

9287 episode score is 746.89


  1%|          | 807/100000 [16:00<33:12:17,  1.21s/it]

9295 episode score is 767.20


  1%|          | 808/100000 [16:01<33:29:01,  1.22s/it]

9304 episode score is 742.37


  1%|          | 809/100000 [16:02<33:29:17,  1.22s/it]

9313 episode score is 698.34


  1%|          | 810/100000 [16:04<32:58:06,  1.20s/it]

9322 episode score is 641.05


  1%|          | 811/100000 [16:05<32:59:53,  1.20s/it]

9331 episode score is 724.38


  1%|          | 812/100000 [16:06<32:58:16,  1.20s/it]

9340 episode score is 708.66


  1%|          | 813/100000 [16:07<32:23:22,  1.18s/it]

9348 episode score is 736.29


  1%|          | 814/100000 [16:08<32:54:22,  1.19s/it]

9357 episode score is 730.36


  1%|          | 815/100000 [16:09<32:38:46,  1.18s/it]

9366 episode score is 672.96


  1%|          | 816/100000 [16:11<32:37:16,  1.18s/it]

9375 episode score is 698.97


  1%|          | 817/100000 [16:12<33:04:55,  1.20s/it]

9384 episode score is 723.80


  1%|          | 818/100000 [16:13<32:22:30,  1.18s/it]

9392 episode score is 711.00


  1%|          | 819/100000 [16:14<32:28:20,  1.18s/it]

9401 episode score is 686.62


  1%|          | 820/100000 [16:15<32:00:49,  1.16s/it]

9409 episode score is 727.33


  1%|          | 821/100000 [16:16<31:41:28,  1.15s/it]

9417 episode score is 754.29


  1%|          | 822/100000 [16:18<31:46:08,  1.15s/it]

9425 episode score is 775.33


  1%|          | 823/100000 [16:19<31:32:45,  1.15s/it]

9433 episode score is 753.81


  1%|          | 824/100000 [16:20<31:27:20,  1.14s/it]

9442 episode score is 670.25


  1%|          | 825/100000 [16:21<31:37:19,  1.15s/it]

9450 episode score is 785.59


  1%|          | 826/100000 [16:22<31:26:37,  1.14s/it]

9458 episode score is 754.79


  1%|          | 827/100000 [16:23<31:32:28,  1.14s/it]

9466 episode score is 752.28


  1%|          | 828/100000 [16:24<31:19:51,  1.14s/it]

9474 episode score is 770.81


  1%|          | 829/100000 [16:26<32:06:16,  1.17s/it]

9482 episode score is 835.94


  1%|          | 830/100000 [16:27<31:51:15,  1.16s/it]

9490 episode score is 739.46


  1%|          | 831/100000 [16:28<32:11:53,  1.17s/it]

9499 episode score is 670.96


  1%|          | 832/100000 [16:29<32:39:04,  1.19s/it]

9508 episode score is 700.12


  1%|          | 833/100000 [16:30<33:08:21,  1.20s/it]

9516 episode score is 802.24


  1%|          | 834/100000 [16:32<32:35:25,  1.18s/it]

9524 episode score is 749.54


  1%|          | 835/100000 [16:33<32:22:01,  1.18s/it]

9532 episode score is 766.73


  1%|          | 836/100000 [16:34<33:05:03,  1.20s/it]

9541 episode score is 750.05


  1%|          | 837/100000 [16:35<32:39:33,  1.19s/it]

9549 episode score is 792.87


  1%|          | 838/100000 [16:36<32:16:52,  1.17s/it]

9557 episode score is 733.73


  1%|          | 839/100000 [16:37<32:25:39,  1.18s/it]

9566 episode score is 708.70


  1%|          | 840/100000 [16:39<33:24:36,  1.21s/it]

9575 episode score is 731.13


  1%|          | 841/100000 [16:40<32:58:59,  1.20s/it]

9583 episode score is 783.47


  1%|          | 842/100000 [16:41<32:43:07,  1.19s/it]

9591 episode score is 780.00


  1%|          | 843/100000 [16:42<33:09:11,  1.20s/it]

9599 episode score is 844.10


  1%|          | 844/100000 [16:44<33:19:41,  1.21s/it]

9608 episode score is 723.69


  1%|          | 845/100000 [16:45<33:41:11,  1.22s/it]

9617 episode score is 749.21


  1%|          | 846/100000 [16:46<33:20:38,  1.21s/it]

9626 episode score is 705.33


  1%|          | 847/100000 [16:47<33:31:26,  1.22s/it]

9634 episode score is 809.62


  1%|          | 848/100000 [16:48<33:20:23,  1.21s/it]

9642 episode score is 833.22


  1%|          | 849/100000 [16:50<32:38:02,  1.18s/it]

9650 episode score is 772.83


  1%|          | 850/100000 [16:51<32:33:06,  1.18s/it]

9658 episode score is 810.83


  1%|          | 851/100000 [16:52<32:00:33,  1.16s/it]

9666 episode score is 750.68


  1%|          | 852/100000 [16:53<31:44:25,  1.15s/it]

9674 episode score is 764.19


  1%|          | 853/100000 [16:54<31:47:58,  1.15s/it]

9682 episode score is 792.52


  1%|          | 854/100000 [16:55<32:12:51,  1.17s/it]

9690 episode score is 798.99


  1%|          | 855/100000 [16:57<32:20:04,  1.17s/it]

9698 episode score is 797.31


  1%|          | 856/100000 [16:58<33:03:48,  1.20s/it]

9707 episode score is 759.26
9715 episode score is 798.18


  1%|          | 857/100000 [16:59<35:00:50,  1.27s/it]

9724 episode score is 766.12


  1%|          | 858/100000 [17:01<36:32:29,  1.33s/it]

9733 episode score is 747.66


  1%|          | 859/100000 [17:02<37:44:40,  1.37s/it]

9742 episode score is 723.76


  1%|          | 860/100000 [17:04<38:16:14,  1.39s/it]

9750 episode score is 782.18


  1%|          | 861/100000 [17:05<37:57:41,  1.38s/it]

9758 episode score is 788.98


  1%|          | 862/100000 [17:06<37:32:09,  1.36s/it]

9766 episode score is 745.79


  1%|          | 863/100000 [17:08<37:23:27,  1.36s/it]

9774 episode score is 793.52


  1%|          | 864/100000 [17:09<37:43:45,  1.37s/it]

9783 episode score is 752.67


  1%|          | 865/100000 [17:10<38:39:57,  1.40s/it]

9791 episode score is 804.67


  1%|          | 866/100000 [17:12<39:22:18,  1.43s/it]

9799 episode score is 810.33


  1%|          | 867/100000 [17:13<39:27:20,  1.43s/it]

9807 episode score is 758.57


  1%|          | 868/100000 [17:15<38:49:00,  1.41s/it]

9815 episode score is 747.28


  1%|          | 869/100000 [17:16<38:09:42,  1.39s/it]

9824 episode score is 729.48


  1%|          | 870/100000 [17:18<38:51:35,  1.41s/it]

9832 episode score is 804.01


  1%|          | 871/100000 [17:19<38:52:45,  1.41s/it]

9840 episode score is 824.13


  1%|          | 872/100000 [17:20<38:45:25,  1.41s/it]

9848 episode score is 808.00


  1%|          | 873/100000 [17:22<39:17:08,  1.43s/it]

9856 episode score is 837.04


  1%|          | 874/100000 [17:23<39:16:58,  1.43s/it]

9864 episode score is 820.19


  1%|          | 875/100000 [17:25<39:11:25,  1.42s/it]

9872 episode score is 863.89


  1%|          | 876/100000 [17:26<39:27:56,  1.43s/it]

9880 episode score is 834.00


  1%|          | 877/100000 [17:28<39:33:05,  1.44s/it]

9889 episode score is 722.90


  1%|          | 878/100000 [17:29<39:38:11,  1.44s/it]

9897 episode score is 806.62


  1%|          | 879/100000 [17:30<39:17:41,  1.43s/it]

9905 episode score is 817.66


  1%|          | 880/100000 [17:32<39:17:59,  1.43s/it]

9912 episode score is 929.04


  1%|          | 881/100000 [17:33<39:01:05,  1.42s/it]

9920 episode score is 857.08


  1%|          | 882/100000 [17:35<39:31:03,  1.44s/it]

9928 episode score is 832.19


  1%|          | 883/100000 [17:36<39:25:47,  1.43s/it]

9936 episode score is 804.92


  1%|          | 884/100000 [17:38<39:12:18,  1.42s/it]

9943 episode score is 892.20


  1%|          | 885/100000 [17:39<38:43:52,  1.41s/it]

9951 episode score is 844.87


  1%|          | 886/100000 [17:40<39:06:23,  1.42s/it]

9958 episode score is 880.84


  1%|          | 887/100000 [17:42<38:15:52,  1.39s/it]

9966 episode score is 815.98


  1%|          | 888/100000 [17:43<38:33:43,  1.40s/it]

9974 episode score is 855.39


  1%|          | 889/100000 [17:45<39:00:29,  1.42s/it]

9982 episode score is 900.38


  1%|          | 890/100000 [17:46<40:01:36,  1.45s/it]

9990 episode score is 773.75


  1%|          | 891/100000 [17:48<39:23:31,  1.43s/it]

9998 episode score is 799.48


  1%|          | 892/100000 [17:49<39:06:48,  1.42s/it]

10006 episode score is 838.28


  1%|          | 893/100000 [17:50<39:15:37,  1.43s/it]

10014 episode score is 797.91


  1%|          | 894/100000 [17:52<38:39:46,  1.40s/it]

10022 episode score is 746.71


  1%|          | 895/100000 [17:53<37:57:16,  1.38s/it]

10030 episode score is 811.44


  1%|          | 896/100000 [17:54<37:45:16,  1.37s/it]

10038 episode score is 842.32


  1%|          | 897/100000 [17:56<38:15:48,  1.39s/it]

10046 episode score is 818.30


  1%|          | 898/100000 [17:57<38:16:00,  1.39s/it]

10053 episode score is 868.58


  1%|          | 899/100000 [17:59<37:36:16,  1.37s/it]

10061 episode score is 813.41
Iteration 900: Average test reward: 820.42


  1%|          | 900/100000 [18:01<50:03:17,  1.82s/it]

10069 episode score is 780.98


  1%|          | 901/100000 [18:03<46:21:11,  1.68s/it]

10078 episode score is 733.48


  1%|          | 902/100000 [18:04<44:21:54,  1.61s/it]

10086 episode score is 794.21


  1%|          | 903/100000 [18:06<42:22:01,  1.54s/it]

10094 episode score is 814.05


  1%|          | 904/100000 [18:07<41:01:33,  1.49s/it]

10102 episode score is 789.09


  1%|          | 905/100000 [18:08<39:54:57,  1.45s/it]

10110 episode score is 785.24


  1%|          | 906/100000 [18:10<39:09:41,  1.42s/it]

10118 episode score is 730.60


  1%|          | 907/100000 [18:11<38:27:22,  1.40s/it]

10126 episode score is 799.79


  1%|          | 908/100000 [18:12<38:25:04,  1.40s/it]

10134 episode score is 812.41


  1%|          | 909/100000 [18:14<38:31:11,  1.40s/it]

10142 episode score is 784.90


  1%|          | 910/100000 [18:15<38:00:15,  1.38s/it]

10150 episode score is 799.53


  1%|          | 911/100000 [18:17<38:13:00,  1.39s/it]

10159 episode score is 713.24


  1%|          | 912/100000 [18:18<38:18:33,  1.39s/it]

10167 episode score is 782.09


  1%|          | 913/100000 [18:19<38:15:58,  1.39s/it]

10176 episode score is 689.73


  1%|          | 914/100000 [18:21<38:06:11,  1.38s/it]

10185 episode score is 685.56


  1%|          | 915/100000 [18:22<37:48:41,  1.37s/it]

10193 episode score is 787.81


  1%|          | 916/100000 [18:23<37:43:59,  1.37s/it]

10202 episode score is 769.51


  1%|          | 917/100000 [18:25<38:30:45,  1.40s/it]

10211 episode score is 767.11


  1%|          | 918/100000 [18:26<39:00:53,  1.42s/it]

10219 episode score is 807.58


  1%|          | 919/100000 [18:28<38:50:52,  1.41s/it]

10228 episode score is 703.17


  1%|          | 920/100000 [18:29<39:48:24,  1.45s/it]

10236 episode score is 749.70


  1%|          | 921/100000 [18:31<38:46:44,  1.41s/it]

10244 episode score is 736.42


  1%|          | 922/100000 [18:32<38:04:14,  1.38s/it]

10253 episode score is 723.31


  1%|          | 923/100000 [18:33<38:04:33,  1.38s/it]

10261 episode score is 769.67


  1%|          | 924/100000 [18:35<37:28:07,  1.36s/it]

10269 episode score is 804.87


  1%|          | 925/100000 [18:36<37:47:47,  1.37s/it]

10277 episode score is 779.68


  1%|          | 926/100000 [18:37<37:23:20,  1.36s/it]

10285 episode score is 751.08


  1%|          | 927/100000 [18:39<37:06:34,  1.35s/it]

10293 episode score is 796.33


  1%|          | 928/100000 [18:40<37:04:48,  1.35s/it]

10301 episode score is 788.97


  1%|          | 929/100000 [18:41<37:08:13,  1.35s/it]

10309 episode score is 737.86


  1%|          | 930/100000 [18:43<36:59:18,  1.34s/it]

10317 episode score is 797.96


  1%|          | 931/100000 [18:44<37:07:58,  1.35s/it]

10326 episode score is 739.95


  1%|          | 932/100000 [18:45<37:41:43,  1.37s/it]

10334 episode score is 738.11


  1%|          | 933/100000 [18:47<37:18:16,  1.36s/it]

10342 episode score is 831.35


  1%|          | 934/100000 [18:48<37:36:30,  1.37s/it]

10350 episode score is 815.69


  1%|          | 935/100000 [18:50<37:51:33,  1.38s/it]

10358 episode score is 764.49


  1%|          | 936/100000 [18:51<37:20:12,  1.36s/it]

10367 episode score is 766.02


  1%|          | 937/100000 [18:52<38:21:28,  1.39s/it]

10375 episode score is 790.69


  1%|          | 938/100000 [18:54<37:47:01,  1.37s/it]

10383 episode score is 772.69


  1%|          | 939/100000 [18:55<37:44:50,  1.37s/it]

10391 episode score is 855.30


  1%|          | 940/100000 [18:57<38:41:54,  1.41s/it]

10399 episode score is 799.69


  1%|          | 941/100000 [18:58<38:33:03,  1.40s/it]

10407 episode score is 766.77


  1%|          | 942/100000 [18:59<38:35:42,  1.40s/it]

10415 episode score is 783.17


  1%|          | 943/100000 [19:01<38:18:05,  1.39s/it]

10423 episode score is 785.50


  1%|          | 944/100000 [19:02<37:49:45,  1.37s/it]

10431 episode score is 758.84


  1%|          | 945/100000 [19:03<37:28:22,  1.36s/it]

10439 episode score is 806.01


  1%|          | 946/100000 [19:05<37:27:04,  1.36s/it]

10447 episode score is 868.70


  1%|          | 947/100000 [19:06<38:10:59,  1.39s/it]

10455 episode score is 785.24


  1%|          | 948/100000 [19:08<38:15:51,  1.39s/it]

10464 episode score is 739.04


  1%|          | 949/100000 [19:09<38:50:17,  1.41s/it]

10472 episode score is 797.77


  1%|          | 950/100000 [19:10<38:34:55,  1.40s/it]

10479 episode score is 857.84


  1%|          | 951/100000 [19:12<37:51:37,  1.38s/it]

10487 episode score is 815.72


  1%|          | 952/100000 [19:13<38:17:03,  1.39s/it]

10495 episode score is 808.88


  1%|          | 953/100000 [19:15<38:07:08,  1.39s/it]

10503 episode score is 851.77


  1%|          | 954/100000 [19:16<38:42:43,  1.41s/it]

10511 episode score is 858.41


  1%|          | 955/100000 [19:17<39:00:05,  1.42s/it]

10519 episode score is 824.41


  1%|          | 956/100000 [19:19<38:53:21,  1.41s/it]

10527 episode score is 792.39


  1%|          | 957/100000 [19:20<38:12:12,  1.39s/it]

10535 episode score is 808.20


  1%|          | 958/100000 [19:22<37:59:27,  1.38s/it]

10543 episode score is 780.54


  1%|          | 959/100000 [19:23<37:44:31,  1.37s/it]

10551 episode score is 756.72


  1%|          | 960/100000 [19:24<37:15:39,  1.35s/it]

10560 episode score is 721.70


  1%|          | 961/100000 [19:26<37:57:28,  1.38s/it]

10568 episode score is 798.05


  1%|          | 962/100000 [19:27<37:51:42,  1.38s/it]

10576 episode score is 882.41


  1%|          | 963/100000 [19:28<38:38:15,  1.40s/it]

10583 episode score is 915.51


  1%|          | 964/100000 [19:30<38:11:44,  1.39s/it]

10591 episode score is 773.23


  1%|          | 965/100000 [19:31<37:55:39,  1.38s/it]

10599 episode score is 828.29


  1%|          | 966/100000 [19:33<38:13:54,  1.39s/it]

10607 episode score is 858.77


  1%|          | 967/100000 [19:34<38:50:30,  1.41s/it]

10615 episode score is 851.16


  1%|          | 968/100000 [19:36<39:31:20,  1.44s/it]

10623 episode score is 809.35


  1%|          | 969/100000 [19:37<38:48:03,  1.41s/it]

10631 episode score is 806.41


  1%|          | 970/100000 [19:38<38:26:11,  1.40s/it]

10639 episode score is 791.66


  1%|          | 971/100000 [19:40<37:54:50,  1.38s/it]

10647 episode score is 763.25


  1%|          | 972/100000 [19:41<37:37:16,  1.37s/it]

10655 episode score is 834.66


  1%|          | 973/100000 [19:42<38:11:10,  1.39s/it]

10663 episode score is 852.16


  1%|          | 974/100000 [19:44<38:51:22,  1.41s/it]

10671 episode score is 847.26


  1%|          | 975/100000 [19:45<39:12:07,  1.43s/it]

10679 episode score is 824.40


  1%|          | 976/100000 [19:47<39:08:18,  1.42s/it]

10687 episode score is 814.08


  1%|          | 977/100000 [19:48<39:10:29,  1.42s/it]

10695 episode score is 837.37


  1%|          | 978/100000 [19:50<39:11:54,  1.43s/it]

10703 episode score is 828.42


  1%|          | 979/100000 [19:51<39:16:34,  1.43s/it]

10710 episode score is 932.24


  1%|          | 980/100000 [19:52<38:41:57,  1.41s/it]

10718 episode score is 782.18


  1%|          | 981/100000 [19:54<39:01:37,  1.42s/it]

10727 episode score is 734.75


  1%|          | 982/100000 [19:55<39:06:01,  1.42s/it]

10735 episode score is 871.75


  1%|          | 983/100000 [19:57<39:11:54,  1.43s/it]

10743 episode score is 867.77


  1%|          | 984/100000 [19:58<39:19:43,  1.43s/it]

10751 episode score is 843.65


  1%|          | 985/100000 [20:00<39:23:16,  1.43s/it]

10759 episode score is 808.84


  1%|          | 986/100000 [20:01<38:55:21,  1.42s/it]

10767 episode score is 848.10


  1%|          | 987/100000 [20:02<38:38:44,  1.41s/it]

10776 episode score is 747.11


  1%|          | 988/100000 [20:04<39:00:24,  1.42s/it]

10785 episode score is 762.55


  1%|          | 989/100000 [20:05<39:18:44,  1.43s/it]

10794 episode score is 743.53


  1%|          | 990/100000 [20:07<39:32:31,  1.44s/it]

10802 episode score is 779.24


  1%|          | 991/100000 [20:08<38:37:50,  1.40s/it]

10810 episode score is 786.16


  1%|          | 992/100000 [20:09<38:01:54,  1.38s/it]

10819 episode score is 749.29


  1%|          | 993/100000 [20:11<38:32:47,  1.40s/it]

10828 episode score is 732.93


  1%|          | 994/100000 [20:12<38:41:18,  1.41s/it]

10836 episode score is 754.99


  1%|          | 995/100000 [20:14<38:15:51,  1.39s/it]

10845 episode score is 705.90


  1%|          | 996/100000 [20:15<38:16:13,  1.39s/it]

10854 episode score is 717.44


  1%|          | 997/100000 [20:16<38:07:04,  1.39s/it]

10863 episode score is 698.20


  1%|          | 998/100000 [20:18<38:02:02,  1.38s/it]

10872 episode score is 733.56


  1%|          | 999/100000 [20:19<38:09:34,  1.39s/it]

10881 episode score is 686.47
Iteration 1000: Average test reward: 676.99


  1%|          | 1000/100000 [20:22<48:14:38,  1.75s/it]

10890 episode score is 685.06


  1%|          | 1001/100000 [20:23<45:06:07,  1.64s/it]

10899 episode score is 744.74


  1%|          | 1002/100000 [20:25<43:21:41,  1.58s/it]

10908 episode score is 729.78


  1%|          | 1004/100000 [20:27<39:46:54,  1.45s/it]

10917 episode score is 671.62


  1%|          | 1005/100000 [20:28<37:34:42,  1.37s/it]

10926 episode score is 705.96


  1%|          | 1006/100000 [20:30<35:52:22,  1.30s/it]

10935 episode score is 731.33


  1%|          | 1007/100000 [20:31<35:19:04,  1.28s/it]

10944 episode score is 728.14


  1%|          | 1008/100000 [20:32<34:05:40,  1.24s/it]

10953 episode score is 692.50


  1%|          | 1009/100000 [20:33<33:12:41,  1.21s/it]

10962 episode score is 683.16


  1%|          | 1010/100000 [20:34<32:35:57,  1.19s/it]

10971 episode score is 700.57


  1%|          | 1011/100000 [20:35<32:08:31,  1.17s/it]

10980 episode score is 693.23


  1%|          | 1012/100000 [20:36<31:57:17,  1.16s/it]

10989 episode score is 703.64


  1%|          | 1013/100000 [20:38<31:49:29,  1.16s/it]

10998 episode score is 701.63


  1%|          | 1014/100000 [20:39<32:10:21,  1.17s/it]

11008 episode score is 665.82


  1%|          | 1015/100000 [20:40<32:32:36,  1.18s/it]

11018 episode score is 668.32


  1%|          | 1016/100000 [20:41<32:09:13,  1.17s/it]

11027 episode score is 694.68


  1%|          | 1017/100000 [20:42<32:01:46,  1.16s/it]

11036 episode score is 713.08


  1%|          | 1018/100000 [20:43<31:47:34,  1.16s/it]

11045 episode score is 705.59


  1%|          | 1019/100000 [20:45<32:23:23,  1.18s/it]

11055 episode score is 680.12


  1%|          | 1020/100000 [20:46<32:31:40,  1.18s/it]

11065 episode score is 665.46


  1%|          | 1021/100000 [20:47<31:52:25,  1.16s/it]

11074 episode score is 667.93


  1%|          | 1022/100000 [20:48<32:08:15,  1.17s/it]

11083 episode score is 727.64


  1%|          | 1023/100000 [20:49<32:08:12,  1.17s/it]

11092 episode score is 723.45


  1%|          | 1024/100000 [20:50<31:37:04,  1.15s/it]

11101 episode score is 690.11


  1%|          | 1025/100000 [20:52<31:45:27,  1.16s/it]

11110 episode score is 719.22


  1%|          | 1026/100000 [20:53<31:46:07,  1.16s/it]

11119 episode score is 733.38


  1%|          | 1027/100000 [20:54<32:06:39,  1.17s/it]

11129 episode score is 667.68


  1%|          | 1028/100000 [20:55<32:15:00,  1.17s/it]

11139 episode score is 652.70


  1%|          | 1029/100000 [20:56<32:33:49,  1.18s/it]

11149 episode score is 665.50


  1%|          | 1030/100000 [20:58<32:39:05,  1.19s/it]

11159 episode score is 643.27


  1%|          | 1031/100000 [20:59<32:10:10,  1.17s/it]

11168 episode score is 694.09


  1%|          | 1032/100000 [21:00<31:49:03,  1.16s/it]

11177 episode score is 696.18


  1%|          | 1033/100000 [21:01<31:28:45,  1.15s/it]

11186 episode score is 685.98


  1%|          | 1034/100000 [21:02<32:08:24,  1.17s/it]

11196 episode score is 660.26


  1%|          | 1035/100000 [21:03<32:24:07,  1.18s/it]

11206 episode score is 669.27


  1%|          | 1036/100000 [21:04<31:38:52,  1.15s/it]

11215 episode score is 674.35


  1%|          | 1037/100000 [21:06<31:42:25,  1.15s/it]

11224 episode score is 705.19


  1%|          | 1038/100000 [21:07<31:45:28,  1.16s/it]

11233 episode score is 732.44


  1%|          | 1039/100000 [21:08<31:59:13,  1.16s/it]

11242 episode score is 729.91


  1%|          | 1040/100000 [21:09<33:02:43,  1.20s/it]

11251 episode score is 748.60


  1%|          | 1041/100000 [21:10<32:11:34,  1.17s/it]

11260 episode score is 688.21


  1%|          | 1042/100000 [21:11<31:44:57,  1.16s/it]

11269 episode score is 701.05


  1%|          | 1043/100000 [21:13<31:52:41,  1.16s/it]

11279 episode score is 640.30


  1%|          | 1044/100000 [21:14<31:52:46,  1.16s/it]

11288 episode score is 737.22


  1%|          | 1045/100000 [21:15<31:18:17,  1.14s/it]

11297 episode score is 678.28


  1%|          | 1046/100000 [21:16<31:40:06,  1.15s/it]

11306 episode score is 723.73


  1%|          | 1047/100000 [21:17<31:42:43,  1.15s/it]

11315 episode score is 728.31


  1%|          | 1048/100000 [21:18<31:37:21,  1.15s/it]

11324 episode score is 701.08


  1%|          | 1049/100000 [21:19<31:48:55,  1.16s/it]

11333 episode score is 720.80


  1%|          | 1050/100000 [21:21<31:29:21,  1.15s/it]

11342 episode score is 710.86


  1%|          | 1051/100000 [21:22<31:58:11,  1.16s/it]

11352 episode score is 671.14


  1%|          | 1052/100000 [21:23<31:46:02,  1.16s/it]

11361 episode score is 705.67


  1%|          | 1053/100000 [21:24<31:18:54,  1.14s/it]

11369 episode score is 790.27


  1%|          | 1054/100000 [21:25<31:14:05,  1.14s/it]

11378 episode score is 718.41


  1%|          | 1055/100000 [21:26<31:36:37,  1.15s/it]

11387 episode score is 757.48


  1%|          | 1056/100000 [21:27<31:11:45,  1.14s/it]

11395 episode score is 791.30


  1%|          | 1057/100000 [21:29<31:59:46,  1.16s/it]

11404 episode score is 785.04


  1%|          | 1058/100000 [21:30<32:17:03,  1.17s/it]

11413 episode score is 755.28


  1%|          | 1059/100000 [21:31<31:55:21,  1.16s/it]

11422 episode score is 689.90


  1%|          | 1060/100000 [21:32<32:09:24,  1.17s/it]

11431 episode score is 749.47


  1%|          | 1061/100000 [21:33<32:46:18,  1.19s/it]

11440 episode score is 782.10


  1%|          | 1062/100000 [21:35<32:36:46,  1.19s/it]

11449 episode score is 737.90


  1%|          | 1063/100000 [21:36<32:29:44,  1.18s/it]

11458 episode score is 743.65


  1%|          | 1064/100000 [21:37<32:17:18,  1.17s/it]

11467 episode score is 731.51


  1%|          | 1065/100000 [21:38<32:28:35,  1.18s/it]

11476 episode score is 743.37


  1%|          | 1066/100000 [21:39<32:35:38,  1.19s/it]

11485 episode score is 736.28


  1%|          | 1067/100000 [21:41<32:37:06,  1.19s/it]

11494 episode score is 738.76


  1%|          | 1068/100000 [21:42<32:41:07,  1.19s/it]

11503 episode score is 766.09


  1%|          | 1069/100000 [21:43<32:59:30,  1.20s/it]

11512 episode score is 754.81


  1%|          | 1070/100000 [21:44<32:10:37,  1.17s/it]

11520 episode score is 768.16


  1%|          | 1071/100000 [21:45<31:58:41,  1.16s/it]

11528 episode score is 831.63


  1%|          | 1072/100000 [21:46<31:30:00,  1.15s/it]

11536 episode score is 772.03


  1%|          | 1073/100000 [21:48<32:12:30,  1.17s/it]

11545 episode score is 770.77


  1%|          | 1074/100000 [21:49<32:36:04,  1.19s/it]

11554 episode score is 743.78


  1%|          | 1075/100000 [21:50<32:32:56,  1.18s/it]

11563 episode score is 733.55


  1%|          | 1076/100000 [21:51<32:36:48,  1.19s/it]

11572 episode score is 750.94


  1%|          | 1077/100000 [21:52<32:18:53,  1.18s/it]

11581 episode score is 724.94


  1%|          | 1078/100000 [21:53<32:02:50,  1.17s/it]

11589 episode score is 802.24


  1%|          | 1079/100000 [21:55<32:00:48,  1.17s/it]

11598 episode score is 731.58


  1%|          | 1080/100000 [21:56<32:24:07,  1.18s/it]

11607 episode score is 750.44


  1%|          | 1081/100000 [21:57<32:22:06,  1.18s/it]

11616 episode score is 740.18


  1%|          | 1082/100000 [21:58<32:22:39,  1.18s/it]

11625 episode score is 731.45


  1%|          | 1083/100000 [21:59<32:42:56,  1.19s/it]

11635 episode score is 689.24


  1%|          | 1084/100000 [22:01<32:59:14,  1.20s/it]

11644 episode score is 741.53


  1%|          | 1085/100000 [22:02<33:13:09,  1.21s/it]

11654 episode score is 701.15


  1%|          | 1086/100000 [22:03<32:36:09,  1.19s/it]

11663 episode score is 694.22


  1%|          | 1087/100000 [22:04<32:24:58,  1.18s/it]

11672 episode score is 742.21


  1%|          | 1088/100000 [22:05<32:28:40,  1.18s/it]

11681 episode score is 730.40


  1%|          | 1089/100000 [22:06<31:59:23,  1.16s/it]

11690 episode score is 694.92


  1%|          | 1090/100000 [22:08<31:31:31,  1.15s/it]

11699 episode score is 681.98


  1%|          | 1091/100000 [22:09<31:24:09,  1.14s/it]

11708 episode score is 694.80


  1%|          | 1092/100000 [22:10<32:12:11,  1.17s/it]

11717 episode score is 716.75


  1%|          | 1093/100000 [22:11<31:46:40,  1.16s/it]

11726 episode score is 679.26


  1%|          | 1094/100000 [22:12<31:33:12,  1.15s/it]

11735 episode score is 704.27


  1%|          | 1095/100000 [22:13<31:25:30,  1.14s/it]

11744 episode score is 687.71


  1%|          | 1096/100000 [22:14<31:13:29,  1.14s/it]

11753 episode score is 697.79


  1%|          | 1097/100000 [22:16<31:48:38,  1.16s/it]

11762 episode score is 746.39


  1%|          | 1098/100000 [22:17<31:40:59,  1.15s/it]

11771 episode score is 716.96


  1%|          | 1099/100000 [22:18<31:37:54,  1.15s/it]

11780 episode score is 710.31
11789 episode score is 718.07


  1%|          | 1100/100000 [22:20<41:09:50,  1.50s/it]

Iteration 1100: Average test reward: 740.97


  1%|          | 1101/100000 [22:21<38:15:30,  1.39s/it]

11798 episode score is 712.84


  1%|          | 1102/100000 [22:23<36:08:00,  1.32s/it]

11807 episode score is 701.80


  1%|          | 1103/100000 [22:24<35:14:39,  1.28s/it]

11817 episode score is 650.09


  1%|          | 1104/100000 [22:25<33:48:00,  1.23s/it]

11826 episode score is 669.51


  1%|          | 1105/100000 [22:26<32:44:42,  1.19s/it]

11835 episode score is 682.42


  1%|          | 1106/100000 [22:27<32:06:50,  1.17s/it]

11844 episode score is 690.71


  1%|          | 1107/100000 [22:28<32:16:49,  1.18s/it]

11853 episode score is 743.70


  1%|          | 1108/100000 [22:29<32:26:56,  1.18s/it]

11862 episode score is 746.40


  1%|          | 1109/100000 [22:31<31:45:17,  1.16s/it]

11871 episode score is 707.82


  1%|          | 1110/100000 [22:32<31:27:03,  1.14s/it]

11880 episode score is 678.15


  1%|          | 1111/100000 [22:33<31:16:24,  1.14s/it]

11889 episode score is 689.40


  1%|          | 1112/100000 [22:34<31:06:29,  1.13s/it]

11898 episode score is 687.30


  1%|          | 1113/100000 [22:35<30:51:32,  1.12s/it]

11907 episode score is 674.27


  1%|          | 1114/100000 [22:36<31:00:24,  1.13s/it]

11916 episode score is 710.05


  1%|          | 1115/100000 [22:37<31:11:42,  1.14s/it]

11925 episode score is 720.60


  1%|          | 1116/100000 [22:38<31:09:29,  1.13s/it]

11934 episode score is 708.55


  1%|          | 1117/100000 [22:40<31:26:43,  1.14s/it]

11943 episode score is 735.67


  1%|          | 1118/100000 [22:41<31:58:48,  1.16s/it]

11952 episode score is 746.90


  1%|          | 1119/100000 [22:42<32:17:03,  1.18s/it]

11961 episode score is 766.75


  1%|          | 1120/100000 [22:43<32:19:25,  1.18s/it]

11970 episode score is 739.45


  1%|          | 1121/100000 [22:44<31:42:04,  1.15s/it]

11978 episode score is 805.80


  1%|          | 1122/100000 [22:46<32:16:59,  1.18s/it]

11987 episode score is 781.31


  1%|          | 1123/100000 [22:47<32:25:52,  1.18s/it]

11996 episode score is 761.87


  1%|          | 1124/100000 [22:48<31:56:48,  1.16s/it]

12005 episode score is 673.08


  1%|          | 1125/100000 [22:49<32:15:34,  1.17s/it]

12014 episode score is 770.26


  1%|          | 1126/100000 [22:50<32:19:00,  1.18s/it]

12023 episode score is 741.01


  1%|          | 1127/100000 [22:51<32:27:48,  1.18s/it]

12032 episode score is 749.53


  1%|          | 1128/100000 [22:53<32:22:44,  1.18s/it]

12041 episode score is 744.80


  1%|          | 1129/100000 [22:54<31:47:54,  1.16s/it]

12050 episode score is 702.78


  1%|          | 1130/100000 [22:55<31:51:09,  1.16s/it]

12059 episode score is 719.78


  1%|          | 1131/100000 [22:56<31:43:50,  1.16s/it]

12068 episode score is 702.03


  1%|          | 1132/100000 [22:57<31:47:20,  1.16s/it]

12077 episode score is 751.52


  1%|          | 1133/100000 [22:58<32:00:10,  1.17s/it]

12086 episode score is 692.81


  1%|          | 1134/100000 [22:59<31:54:11,  1.16s/it]

12095 episode score is 710.85


  1%|          | 1135/100000 [23:01<31:39:52,  1.15s/it]

12104 episode score is 703.59


  1%|          | 1136/100000 [23:02<31:49:25,  1.16s/it]

12113 episode score is 724.39


  1%|          | 1137/100000 [23:03<31:24:55,  1.14s/it]

12122 episode score is 703.16


  1%|          | 1138/100000 [23:04<31:24:26,  1.14s/it]

12131 episode score is 715.43


  1%|          | 1139/100000 [23:05<31:10:32,  1.14s/it]

12140 episode score is 716.12


  1%|          | 1140/100000 [23:06<31:32:08,  1.15s/it]

12149 episode score is 732.65


  1%|          | 1141/100000 [23:08<31:38:08,  1.15s/it]

12158 episode score is 728.56


  1%|          | 1142/100000 [23:09<31:23:15,  1.14s/it]

12166 episode score is 741.19


  1%|          | 1143/100000 [23:10<31:24:56,  1.14s/it]

12175 episode score is 722.07


  1%|          | 1144/100000 [23:11<31:46:15,  1.16s/it]

12184 episode score is 758.87


  1%|          | 1145/100000 [23:12<31:57:00,  1.16s/it]

12193 episode score is 728.30


  1%|          | 1146/100000 [23:13<32:02:57,  1.17s/it]

12202 episode score is 716.73


  1%|          | 1147/100000 [23:14<31:56:31,  1.16s/it]

12211 episode score is 723.98


  1%|          | 1148/100000 [23:16<32:10:30,  1.17s/it]

12220 episode score is 740.81


  1%|          | 1149/100000 [23:17<32:17:24,  1.18s/it]

12229 episode score is 728.22


  1%|          | 1150/100000 [23:18<32:05:22,  1.17s/it]

12238 episode score is 712.42


  1%|          | 1151/100000 [23:19<32:16:41,  1.18s/it]

12247 episode score is 733.13


  1%|          | 1152/100000 [23:20<32:15:18,  1.17s/it]

12256 episode score is 742.27


  1%|          | 1153/100000 [23:22<31:59:45,  1.17s/it]

12265 episode score is 705.52


  1%|          | 1154/100000 [23:23<31:53:22,  1.16s/it]

12274 episode score is 717.19


  1%|          | 1155/100000 [23:24<31:45:24,  1.16s/it]

12283 episode score is 722.81


  1%|          | 1156/100000 [23:25<31:44:08,  1.16s/it]

12292 episode score is 722.78


  1%|          | 1157/100000 [23:26<31:38:25,  1.15s/it]

12301 episode score is 730.95


  1%|          | 1158/100000 [23:27<32:03:02,  1.17s/it]

12309 episode score is 789.99


  1%|          | 1159/100000 [23:29<32:21:36,  1.18s/it]

12318 episode score is 764.91


  1%|          | 1160/100000 [23:30<32:38:22,  1.19s/it]

12327 episode score is 787.47


  1%|          | 1161/100000 [23:31<31:54:09,  1.16s/it]

12335 episode score is 791.66


  1%|          | 1162/100000 [23:32<31:51:06,  1.16s/it]

12343 episode score is 838.45


  1%|          | 1163/100000 [23:33<31:35:26,  1.15s/it]

12351 episode score is 817.54


  1%|          | 1164/100000 [23:34<32:04:34,  1.17s/it]

12360 episode score is 759.90


  1%|          | 1165/100000 [23:35<31:49:35,  1.16s/it]

12368 episode score is 785.85


  1%|          | 1166/100000 [23:37<31:35:59,  1.15s/it]

12377 episode score is 709.39


  1%|          | 1167/100000 [23:38<31:29:01,  1.15s/it]

12386 episode score is 715.68


  1%|          | 1168/100000 [23:39<31:52:57,  1.16s/it]

12395 episode score is 758.38


  1%|          | 1169/100000 [23:40<32:04:44,  1.17s/it]

12404 episode score is 745.76


  1%|          | 1170/100000 [23:41<32:03:17,  1.17s/it]

12413 episode score is 727.56


  1%|          | 1171/100000 [23:42<31:51:32,  1.16s/it]

12422 episode score is 715.56


  1%|          | 1172/100000 [23:44<32:16:05,  1.18s/it]

12431 episode score is 758.89


  1%|          | 1173/100000 [23:45<32:02:09,  1.17s/it]

12440 episode score is 711.85


  1%|          | 1174/100000 [23:46<32:10:48,  1.17s/it]

12449 episode score is 742.92


  1%|          | 1175/100000 [23:47<32:37:59,  1.19s/it]

12458 episode score is 781.21


  1%|          | 1176/100000 [23:48<32:49:37,  1.20s/it]

12467 episode score is 767.29


  1%|          | 1177/100000 [23:50<32:28:35,  1.18s/it]

12476 episode score is 733.15


  1%|          | 1178/100000 [23:51<32:00:03,  1.17s/it]

12485 episode score is 713.84


  1%|          | 1179/100000 [23:52<32:20:11,  1.18s/it]

12494 episode score is 745.03


  1%|          | 1180/100000 [23:53<32:06:37,  1.17s/it]

12503 episode score is 732.11


  1%|          | 1181/100000 [23:54<31:37:51,  1.15s/it]

12511 episode score is 780.86


  1%|          | 1182/100000 [23:55<32:04:27,  1.17s/it]

12520 episode score is 763.69


  1%|          | 1183/100000 [23:56<31:37:21,  1.15s/it]

12528 episode score is 803.62


  1%|          | 1184/100000 [23:58<31:21:06,  1.14s/it]

12536 episode score is 791.85


  1%|          | 1185/100000 [23:59<32:11:52,  1.17s/it]

12545 episode score is 787.56


  1%|          | 1186/100000 [24:00<31:49:59,  1.16s/it]

12553 episode score is 830.26


  1%|          | 1187/100000 [24:01<32:17:16,  1.18s/it]

12562 episode score is 806.04


  1%|          | 1188/100000 [24:02<32:44:52,  1.19s/it]

12571 episode score is 789.90


  1%|          | 1189/100000 [24:04<33:01:29,  1.20s/it]

12580 episode score is 793.21


  1%|          | 1190/100000 [24:05<33:03:05,  1.20s/it]

12589 episode score is 751.76


  1%|          | 1191/100000 [24:06<32:57:01,  1.20s/it]

12598 episode score is 738.71


  1%|          | 1192/100000 [24:07<33:17:47,  1.21s/it]

12607 episode score is 756.40


  1%|          | 1193/100000 [24:08<32:51:45,  1.20s/it]

12616 episode score is 770.46


  1%|          | 1194/100000 [24:10<32:19:39,  1.18s/it]

12624 episode score is 775.89


  1%|          | 1195/100000 [24:11<33:19:33,  1.21s/it]

12633 episode score is 759.80


  1%|          | 1196/100000 [24:12<33:17:36,  1.21s/it]

12642 episode score is 756.60


  1%|          | 1197/100000 [24:13<32:51:33,  1.20s/it]

12651 episode score is 716.34


  1%|          | 1198/100000 [24:14<32:30:25,  1.18s/it]

12660 episode score is 741.35


  1%|          | 1199/100000 [24:16<32:22:17,  1.18s/it]

12669 episode score is 719.62
12678 episode score is 775.50


  1%|          | 1200/100000 [24:18<42:09:16,  1.54s/it]

Iteration 1200: Average test reward: 734.35


  1%|          | 1201/100000 [24:19<38:59:37,  1.42s/it]

12687 episode score is 703.29


  1%|          | 1202/100000 [24:20<37:01:32,  1.35s/it]

12696 episode score is 734.64


  1%|          | 1203/100000 [24:21<35:26:00,  1.29s/it]

12705 episode score is 748.70


  1%|          | 1204/100000 [24:23<33:57:05,  1.24s/it]

12713 episode score is 767.39


  1%|          | 1205/100000 [24:24<33:26:43,  1.22s/it]

12722 episode score is 767.18


  1%|          | 1206/100000 [24:25<32:43:32,  1.19s/it]

12731 episode score is 716.49


  1%|          | 1207/100000 [24:26<32:51:04,  1.20s/it]

12740 episode score is 738.71


  1%|          | 1208/100000 [24:27<32:59:07,  1.20s/it]

12749 episode score is 716.73


  1%|          | 1209/100000 [24:28<32:40:14,  1.19s/it]

12758 episode score is 683.50


  1%|          | 1210/100000 [24:30<32:28:50,  1.18s/it]

12767 episode score is 721.32


  1%|          | 1211/100000 [24:31<32:16:41,  1.18s/it]

12776 episode score is 746.56


  1%|          | 1212/100000 [24:32<32:36:46,  1.19s/it]

12785 episode score is 756.66


  1%|          | 1213/100000 [24:33<32:41:42,  1.19s/it]

12794 episode score is 761.49


  1%|          | 1214/100000 [24:34<32:37:07,  1.19s/it]

12803 episode score is 773.32


  1%|          | 1215/100000 [24:36<32:42:43,  1.19s/it]

12812 episode score is 750.35


  1%|          | 1216/100000 [24:37<32:42:21,  1.19s/it]

12821 episode score is 761.02


  1%|          | 1217/100000 [24:38<32:14:25,  1.17s/it]

12830 episode score is 651.24


  1%|          | 1218/100000 [24:39<32:26:33,  1.18s/it]

12839 episode score is 775.19


  1%|          | 1219/100000 [24:40<32:35:36,  1.19s/it]

12848 episode score is 788.73


  1%|          | 1220/100000 [24:41<32:50:25,  1.20s/it]

12857 episode score is 777.43


  1%|          | 1221/100000 [24:43<32:44:54,  1.19s/it]

12866 episode score is 714.27


  1%|          | 1222/100000 [24:44<32:03:23,  1.17s/it]

12875 episode score is 674.93


  1%|          | 1223/100000 [24:45<31:38:35,  1.15s/it]

12884 episode score is 717.32


  1%|          | 1224/100000 [24:46<31:15:21,  1.14s/it]

12893 episode score is 685.45


  1%|          | 1225/100000 [24:47<31:43:02,  1.16s/it]

12902 episode score is 738.85


  1%|          | 1226/100000 [24:48<31:27:06,  1.15s/it]

12911 episode score is 699.48


  1%|          | 1227/100000 [24:50<31:43:05,  1.16s/it]

12920 episode score is 729.71


  1%|          | 1228/100000 [24:51<31:42:08,  1.16s/it]

12929 episode score is 731.12


  1%|          | 1229/100000 [24:52<31:46:10,  1.16s/it]

12938 episode score is 735.45


  1%|          | 1230/100000 [24:53<31:25:19,  1.15s/it]

12946 episode score is 798.20


  1%|          | 1231/100000 [24:54<31:53:04,  1.16s/it]

12955 episode score is 757.99


  1%|          | 1232/100000 [24:55<32:05:33,  1.17s/it]

12964 episode score is 765.36


  1%|          | 1233/100000 [24:57<32:19:02,  1.18s/it]

12973 episode score is 751.16


  1%|          | 1234/100000 [24:58<32:38:33,  1.19s/it]

12982 episode score is 777.72


  1%|          | 1235/100000 [24:59<32:28:53,  1.18s/it]

12991 episode score is 766.60


  1%|          | 1236/100000 [25:00<32:47:03,  1.20s/it]

13000 episode score is 797.74


  1%|          | 1237/100000 [25:01<32:37:44,  1.19s/it]

13009 episode score is 748.69


  1%|          | 1238/100000 [25:02<32:25:24,  1.18s/it]

13018 episode score is 716.10


  1%|          | 1239/100000 [25:04<32:36:21,  1.19s/it]

13028 episode score is 693.73


  1%|          | 1240/100000 [25:05<31:53:20,  1.16s/it]

13037 episode score is 668.51


  1%|          | 1241/100000 [25:06<31:28:20,  1.15s/it]

13046 episode score is 717.77


  1%|          | 1242/100000 [25:07<31:04:28,  1.13s/it]

13055 episode score is 676.92


  1%|          | 1243/100000 [25:08<31:27:05,  1.15s/it]

13065 episode score is 672.01


  1%|          | 1244/100000 [25:09<31:07:04,  1.13s/it]

13074 episode score is 722.28


  1%|          | 1245/100000 [25:10<31:45:20,  1.16s/it]

13084 episode score is 677.94


  1%|          | 1246/100000 [25:12<31:25:04,  1.15s/it]

13093 episode score is 716.33


  1%|          | 1247/100000 [25:13<32:06:57,  1.17s/it]

13102 episode score is 766.42


  1%|          | 1248/100000 [25:14<32:15:55,  1.18s/it]

13112 episode score is 671.61


  1%|          | 1249/100000 [25:15<32:22:21,  1.18s/it]

13122 episode score is 677.71


  1%|▏         | 1250/100000 [25:16<31:45:43,  1.16s/it]

13131 episode score is 710.59


  1%|▏         | 1251/100000 [25:17<31:48:00,  1.16s/it]

13140 episode score is 713.63


  1%|▏         | 1252/100000 [25:19<31:41:11,  1.16s/it]

13149 episode score is 735.04


  1%|▏         | 1253/100000 [25:20<31:21:21,  1.14s/it]

13158 episode score is 714.47


  1%|▏         | 1254/100000 [25:21<31:51:36,  1.16s/it]

13167 episode score is 787.74


  1%|▏         | 1255/100000 [25:22<31:54:42,  1.16s/it]

13176 episode score is 763.72


  1%|▏         | 1256/100000 [25:23<32:07:45,  1.17s/it]

13185 episode score is 764.36


  1%|▏         | 1257/100000 [25:24<31:45:03,  1.16s/it]

13194 episode score is 733.21


  1%|▏         | 1258/100000 [25:26<32:36:51,  1.19s/it]

13203 episode score is 753.49


  1%|▏         | 1259/100000 [25:27<32:04:36,  1.17s/it]

13212 episode score is 727.52


  1%|▏         | 1260/100000 [25:28<31:48:49,  1.16s/it]

13221 episode score is 735.70


  1%|▏         | 1261/100000 [25:29<31:40:53,  1.16s/it]

13230 episode score is 745.19


  1%|▏         | 1262/100000 [25:30<31:45:09,  1.16s/it]

13239 episode score is 767.31


  1%|▏         | 1263/100000 [25:31<32:04:49,  1.17s/it]

13248 episode score is 783.33


  1%|▏         | 1264/100000 [25:33<32:19:05,  1.18s/it]

13257 episode score is 765.12


  1%|▏         | 1265/100000 [25:34<31:41:39,  1.16s/it]

13265 episode score is 835.30


  1%|▏         | 1266/100000 [25:35<31:43:19,  1.16s/it]

13274 episode score is 761.09


  1%|▏         | 1267/100000 [25:36<32:16:41,  1.18s/it]

13283 episode score is 784.34


  1%|▏         | 1268/100000 [25:37<32:24:11,  1.18s/it]

13292 episode score is 784.45


  1%|▏         | 1269/100000 [25:38<31:49:04,  1.16s/it]

13300 episode score is 823.98


  1%|▏         | 1270/100000 [25:40<32:14:53,  1.18s/it]

13309 episode score is 807.85


  1%|▏         | 1271/100000 [25:41<32:35:54,  1.19s/it]

13318 episode score is 804.81


  1%|▏         | 1272/100000 [25:42<32:02:40,  1.17s/it]

13326 episode score is 833.23


  1%|▏         | 1273/100000 [25:43<32:23:42,  1.18s/it]

13335 episode score is 795.21


  1%|▏         | 1274/100000 [25:44<31:48:26,  1.16s/it]

13343 episode score is 836.46


  1%|▏         | 1275/100000 [25:45<31:18:46,  1.14s/it]

13351 episode score is 816.21


  1%|▏         | 1276/100000 [25:47<31:47:21,  1.16s/it]

13360 episode score is 788.38


  1%|▏         | 1277/100000 [25:48<31:39:51,  1.15s/it]

13368 episode score is 816.87


  1%|▏         | 1278/100000 [25:49<32:23:18,  1.18s/it]

13377 episode score is 812.48


  1%|▏         | 1279/100000 [25:50<32:26:14,  1.18s/it]

13386 episode score is 759.00


  1%|▏         | 1280/100000 [25:51<32:32:54,  1.19s/it]

13395 episode score is 785.14


  1%|▏         | 1281/100000 [25:53<32:18:16,  1.18s/it]

13404 episode score is 751.80


  1%|▏         | 1282/100000 [25:54<32:26:23,  1.18s/it]

13413 episode score is 777.84


  1%|▏         | 1283/100000 [25:55<32:43:50,  1.19s/it]

13422 episode score is 789.84


  1%|▏         | 1284/100000 [25:56<32:58:04,  1.20s/it]

13431 episode score is 799.89


  1%|▏         | 1285/100000 [25:57<32:30:59,  1.19s/it]

13440 episode score is 724.36


  1%|▏         | 1286/100000 [25:58<32:13:43,  1.18s/it]

13449 episode score is 746.88


  1%|▏         | 1287/100000 [26:00<32:09:03,  1.17s/it]

13458 episode score is 745.44


  1%|▏         | 1288/100000 [26:01<32:24:34,  1.18s/it]

13467 episode score is 790.08


  1%|▏         | 1289/100000 [26:02<32:47:33,  1.20s/it]

13476 episode score is 780.94


  1%|▏         | 1290/100000 [26:03<32:25:50,  1.18s/it]

13485 episode score is 738.66


  1%|▏         | 1291/100000 [26:04<32:16:14,  1.18s/it]

13494 episode score is 758.10


  1%|▏         | 1292/100000 [26:06<32:00:58,  1.17s/it]

13503 episode score is 753.09


  1%|▏         | 1293/100000 [26:07<32:03:11,  1.17s/it]

13512 episode score is 763.44


  1%|▏         | 1294/100000 [26:08<32:29:26,  1.18s/it]

13521 episode score is 821.58


  1%|▏         | 1295/100000 [26:09<32:25:18,  1.18s/it]

13530 episode score is 779.80


  1%|▏         | 1296/100000 [26:10<32:28:46,  1.18s/it]

13539 episode score is 782.90


  1%|▏         | 1297/100000 [26:11<32:14:11,  1.18s/it]

13548 episode score is 748.33


  1%|▏         | 1298/100000 [26:13<32:44:20,  1.19s/it]

13557 episode score is 794.68


  1%|▏         | 1299/100000 [26:14<32:56:09,  1.20s/it]

13566 episode score is 803.92
13575 episode score is 769.73


  1%|▏         | 1300/100000 [26:16<43:21:01,  1.58s/it]

Iteration 1300: Average test reward: 790.08


  1%|▏         | 1301/100000 [26:18<39:59:42,  1.46s/it]

13584 episode score is 764.28


  1%|▏         | 1302/100000 [26:19<37:56:44,  1.38s/it]

13593 episode score is 778.30


  1%|▏         | 1303/100000 [26:20<36:32:12,  1.33s/it]

13602 episode score is 775.61


  1%|▏         | 1304/100000 [26:21<35:24:11,  1.29s/it]

13611 episode score is 783.89


  1%|▏         | 1305/100000 [26:22<34:18:00,  1.25s/it]

13620 episode score is 761.46


  1%|▏         | 1306/100000 [26:24<33:46:30,  1.23s/it]

13629 episode score is 749.55


  1%|▏         | 1307/100000 [26:25<33:07:42,  1.21s/it]

13638 episode score is 746.46


  1%|▏         | 1308/100000 [26:26<32:46:59,  1.20s/it]

13647 episode score is 763.54


  1%|▏         | 1309/100000 [26:27<32:53:13,  1.20s/it]

13656 episode score is 798.48


  1%|▏         | 1310/100000 [26:28<33:06:08,  1.21s/it]

13665 episode score is 764.15


  1%|▏         | 1311/100000 [26:29<32:44:48,  1.19s/it]

13674 episode score is 750.43


  1%|▏         | 1312/100000 [26:31<32:37:42,  1.19s/it]

13683 episode score is 771.13


  1%|▏         | 1313/100000 [26:32<32:33:36,  1.19s/it]

13692 episode score is 765.02


  1%|▏         | 1314/100000 [26:33<32:09:30,  1.17s/it]

13701 episode score is 729.79


  1%|▏         | 1315/100000 [26:34<32:04:34,  1.17s/it]

13710 episode score is 762.42


  1%|▏         | 1316/100000 [26:35<32:01:15,  1.17s/it]

13719 episode score is 760.80


  1%|▏         | 1317/100000 [26:36<31:54:31,  1.16s/it]

13728 episode score is 753.88


  1%|▏         | 1318/100000 [26:38<31:53:25,  1.16s/it]

13737 episode score is 764.94


  1%|▏         | 1319/100000 [26:39<31:49:14,  1.16s/it]

13746 episode score is 744.70


  1%|▏         | 1320/100000 [26:40<31:37:22,  1.15s/it]

13755 episode score is 746.30


  1%|▏         | 1321/100000 [26:41<31:49:54,  1.16s/it]

13764 episode score is 772.31


  1%|▏         | 1322/100000 [26:42<31:58:25,  1.17s/it]

13773 episode score is 773.99


  1%|▏         | 1323/100000 [26:43<32:17:04,  1.18s/it]

13782 episode score is 803.14


  1%|▏         | 1324/100000 [26:45<32:22:29,  1.18s/it]

13791 episode score is 792.28


  1%|▏         | 1325/100000 [26:46<32:44:43,  1.19s/it]

13800 episode score is 800.43


  1%|▏         | 1326/100000 [26:47<32:57:39,  1.20s/it]

13809 episode score is 783.08


  1%|▏         | 1327/100000 [26:48<32:40:00,  1.19s/it]

13818 episode score is 743.39


  1%|▏         | 1328/100000 [26:49<32:58:05,  1.20s/it]

13827 episode score is 800.54


  1%|▏         | 1329/100000 [26:51<33:06:45,  1.21s/it]

13836 episode score is 813.52


  1%|▏         | 1330/100000 [26:52<33:09:22,  1.21s/it]

13845 episode score is 782.33


  1%|▏         | 1331/100000 [26:53<32:20:23,  1.18s/it]

13853 episode score is 815.19


  1%|▏         | 1332/100000 [26:54<32:15:57,  1.18s/it]

13862 episode score is 755.72


  1%|▏         | 1333/100000 [26:55<32:19:27,  1.18s/it]

13871 episode score is 763.76


  1%|▏         | 1334/100000 [26:57<32:34:59,  1.19s/it]

13880 episode score is 810.16


  1%|▏         | 1335/100000 [26:58<33:05:25,  1.21s/it]

13889 episode score is 724.64


  1%|▏         | 1336/100000 [26:59<32:20:12,  1.18s/it]

13897 episode score is 806.69


  1%|▏         | 1337/100000 [27:00<32:03:31,  1.17s/it]

13906 episode score is 739.67


  1%|▏         | 1338/100000 [27:01<32:06:24,  1.17s/it]

13915 episode score is 740.08


  1%|▏         | 1339/100000 [27:02<32:03:35,  1.17s/it]

13924 episode score is 753.86


  1%|▏         | 1340/100000 [27:04<32:45:08,  1.20s/it]

13933 episode score is 802.85


  1%|▏         | 1341/100000 [27:05<32:52:06,  1.20s/it]

13942 episode score is 770.16


  1%|▏         | 1342/100000 [27:06<32:07:02,  1.17s/it]

13951 episode score is 706.30


  1%|▏         | 1343/100000 [27:07<32:29:38,  1.19s/it]

13960 episode score is 798.85


  1%|▏         | 1344/100000 [27:08<32:26:03,  1.18s/it]

13969 episode score is 764.89


  1%|▏         | 1345/100000 [27:10<32:32:49,  1.19s/it]

13978 episode score is 787.51


  1%|▏         | 1346/100000 [27:11<32:45:49,  1.20s/it]

13987 episode score is 798.03


  1%|▏         | 1347/100000 [27:12<32:22:01,  1.18s/it]

13996 episode score is 729.84


  1%|▏         | 1348/100000 [27:13<32:06:14,  1.17s/it]

14005 episode score is 750.19


  1%|▏         | 1349/100000 [27:14<32:03:37,  1.17s/it]

14014 episode score is 767.00


  1%|▏         | 1350/100000 [27:15<32:15:06,  1.18s/it]

14023 episode score is 784.79


  1%|▏         | 1351/100000 [27:17<32:32:21,  1.19s/it]

14032 episode score is 769.53


  1%|▏         | 1352/100000 [27:18<32:38:17,  1.19s/it]

14041 episode score is 754.47


  1%|▏         | 1353/100000 [27:19<32:38:49,  1.19s/it]

14050 episode score is 777.19


  1%|▏         | 1354/100000 [27:20<32:39:26,  1.19s/it]

14059 episode score is 759.27


  1%|▏         | 1355/100000 [27:21<32:31:18,  1.19s/it]

14068 episode score is 762.56


  1%|▏         | 1356/100000 [27:23<32:19:11,  1.18s/it]

14077 episode score is 665.31


  1%|▏         | 1357/100000 [27:24<32:21:32,  1.18s/it]

14086 episode score is 734.54


  1%|▏         | 1358/100000 [27:25<32:02:21,  1.17s/it]

14094 episode score is 677.69


  1%|▏         | 1359/100000 [27:26<32:29:30,  1.19s/it]

14103 episode score is 769.72


  1%|▏         | 1360/100000 [27:27<31:54:40,  1.16s/it]

14111 episode score is 608.03


  1%|▏         | 1361/100000 [27:28<31:33:59,  1.15s/it]

14119 episode score is 707.29


  1%|▏         | 1362/100000 [27:30<31:33:34,  1.15s/it]

14127 episode score is 683.12


  1%|▏         | 1363/100000 [27:31<32:07:54,  1.17s/it]

14136 episode score is 763.76


  1%|▏         | 1364/100000 [27:32<32:39:55,  1.19s/it]

14145 episode score is 757.02


  1%|▏         | 1365/100000 [27:33<32:13:55,  1.18s/it]

14153 episode score is 689.17


  1%|▏         | 1366/100000 [27:34<32:02:21,  1.17s/it]

14160 episode score is 847.94


  1%|▏         | 1367/100000 [27:36<32:34:14,  1.19s/it]

14169 episode score is 780.42


  1%|▏         | 1368/100000 [27:37<33:04:01,  1.21s/it]

14178 episode score is 718.70


  1%|▏         | 1369/100000 [27:38<33:04:01,  1.21s/it]

14187 episode score is 741.12


  1%|▏         | 1370/100000 [27:39<32:49:41,  1.20s/it]

14195 episode score is 778.66


  1%|▏         | 1371/100000 [27:40<33:35:05,  1.23s/it]

14204 episode score is 726.06


  1%|▏         | 1372/100000 [27:42<33:13:45,  1.21s/it]

14213 episode score is 734.63


  1%|▏         | 1373/100000 [27:43<32:38:31,  1.19s/it]

14221 episode score is 787.80


  1%|▏         | 1374/100000 [27:44<32:34:13,  1.19s/it]

14230 episode score is 739.69


  1%|▏         | 1375/100000 [27:45<32:38:47,  1.19s/it]

14239 episode score is 774.37


  1%|▏         | 1376/100000 [27:46<32:22:44,  1.18s/it]

14247 episode score is 834.07


  1%|▏         | 1377/100000 [27:48<32:44:13,  1.19s/it]

14256 episode score is 790.45


  1%|▏         | 1378/100000 [27:49<33:04:26,  1.21s/it]

14265 episode score is 791.41


  1%|▏         | 1379/100000 [27:50<32:59:08,  1.20s/it]

14274 episode score is 762.88


  1%|▏         | 1380/100000 [27:51<33:14:23,  1.21s/it]

14283 episode score is 645.32


  1%|▏         | 1381/100000 [27:52<33:04:38,  1.21s/it]

14292 episode score is 780.61


  1%|▏         | 1382/100000 [27:54<32:53:04,  1.20s/it]

14301 episode score is 774.63


  1%|▏         | 1383/100000 [27:55<32:33:52,  1.19s/it]

14310 episode score is 772.65


  1%|▏         | 1384/100000 [27:56<32:26:39,  1.18s/it]

14319 episode score is 744.10


  1%|▏         | 1385/100000 [27:57<32:00:27,  1.17s/it]

14328 episode score is 728.75


  1%|▏         | 1386/100000 [27:58<32:29:11,  1.19s/it]

14337 episode score is 787.76


  1%|▏         | 1387/100000 [27:59<32:47:30,  1.20s/it]

14346 episode score is 791.88


  1%|▏         | 1388/100000 [28:01<32:08:59,  1.17s/it]

14354 episode score is 826.18


  1%|▏         | 1389/100000 [28:02<31:56:29,  1.17s/it]

14362 episode score is 864.09


  1%|▏         | 1390/100000 [28:03<31:31:22,  1.15s/it]

14370 episode score is 808.20


  1%|▏         | 1391/100000 [28:04<31:28:48,  1.15s/it]

14378 episode score is 824.82


  1%|▏         | 1392/100000 [28:05<31:49:44,  1.16s/it]

14387 episode score is 783.46


  1%|▏         | 1393/100000 [28:06<32:24:31,  1.18s/it]

14396 episode score is 785.55


  1%|▏         | 1394/100000 [28:08<32:24:29,  1.18s/it]

14405 episode score is 778.84


  1%|▏         | 1395/100000 [28:09<32:14:23,  1.18s/it]

14414 episode score is 769.97


  1%|▏         | 1396/100000 [28:10<32:28:21,  1.19s/it]

14423 episode score is 756.87


  1%|▏         | 1397/100000 [28:11<32:25:35,  1.18s/it]

14432 episode score is 767.23


  1%|▏         | 1398/100000 [28:12<32:00:20,  1.17s/it]

14441 episode score is 741.91


  1%|▏         | 1399/100000 [28:13<31:52:02,  1.16s/it]

14450 episode score is 745.67
14459 episode score is 689.68


  1%|▏         | 1400/100000 [28:16<41:22:55,  1.51s/it]

Iteration 1400: Average test reward: 663.37


  1%|▏         | 1401/100000 [28:17<38:28:13,  1.40s/it]

14468 episode score is 764.17


  1%|▏         | 1402/100000 [28:18<36:45:25,  1.34s/it]

14477 episode score is 760.19


  1%|▏         | 1403/100000 [28:19<34:53:18,  1.27s/it]

14486 episode score is 718.31


  1%|▏         | 1404/100000 [28:20<34:05:29,  1.24s/it]

14495 episode score is 762.81


  1%|▏         | 1405/100000 [28:22<33:17:31,  1.22s/it]

14504 episode score is 749.42


  1%|▏         | 1406/100000 [28:23<33:24:40,  1.22s/it]

14513 episode score is 794.72


  1%|▏         | 1407/100000 [28:24<33:21:57,  1.22s/it]

14522 episode score is 792.69


  1%|▏         | 1408/100000 [28:25<32:57:20,  1.20s/it]

14531 episode score is 761.45


  1%|▏         | 1409/100000 [28:26<32:32:00,  1.19s/it]

14540 episode score is 751.74


  1%|▏         | 1410/100000 [28:27<32:13:40,  1.18s/it]

14549 episode score is 742.97


  1%|▏         | 1411/100000 [28:29<32:03:12,  1.17s/it]

14558 episode score is 735.48


  1%|▏         | 1412/100000 [28:30<32:04:30,  1.17s/it]

14567 episode score is 749.54


  1%|▏         | 1413/100000 [28:31<32:36:52,  1.19s/it]

14576 episode score is 803.47


  1%|▏         | 1414/100000 [28:32<32:42:32,  1.19s/it]

14585 episode score is 746.25


  1%|▏         | 1415/100000 [28:33<32:25:28,  1.18s/it]

14594 episode score is 731.65


  1%|▏         | 1416/100000 [28:35<31:53:06,  1.16s/it]

14603 episode score is 688.25


  1%|▏         | 1417/100000 [28:36<31:35:52,  1.15s/it]

14612 episode score is 739.55


  1%|▏         | 1418/100000 [28:37<31:23:15,  1.15s/it]

14621 episode score is 734.49


  1%|▏         | 1419/100000 [28:38<31:50:57,  1.16s/it]

14631 episode score is 690.12


  1%|▏         | 1420/100000 [28:39<31:50:12,  1.16s/it]

14640 episode score is 748.03


  1%|▏         | 1421/100000 [28:40<31:33:13,  1.15s/it]

14649 episode score is 717.62


  1%|▏         | 1422/100000 [28:41<31:25:37,  1.15s/it]

14658 episode score is 738.77


  1%|▏         | 1423/100000 [28:43<31:05:27,  1.14s/it]

14667 episode score is 716.61


  1%|▏         | 1424/100000 [28:44<31:49:48,  1.16s/it]

14677 episode score is 704.86


  1%|▏         | 1425/100000 [28:45<31:59:50,  1.17s/it]

14686 episode score is 754.50


  1%|▏         | 1426/100000 [28:46<31:46:58,  1.16s/it]

14695 episode score is 752.80


  1%|▏         | 1427/100000 [28:47<31:48:17,  1.16s/it]

14704 episode score is 755.88


  1%|▏         | 1428/100000 [28:48<31:43:25,  1.16s/it]

14713 episode score is 727.03


  1%|▏         | 1429/100000 [28:50<32:00:54,  1.17s/it]

14722 episode score is 754.09


  1%|▏         | 1430/100000 [28:51<31:56:28,  1.17s/it]

14731 episode score is 747.04


  1%|▏         | 1431/100000 [28:52<31:53:42,  1.16s/it]

14740 episode score is 753.43


  1%|▏         | 1432/100000 [28:53<31:45:40,  1.16s/it]

14749 episode score is 733.71


  1%|▏         | 1433/100000 [28:54<31:30:31,  1.15s/it]

14758 episode score is 740.10


  1%|▏         | 1434/100000 [28:55<31:39:38,  1.16s/it]

14767 episode score is 758.95


  1%|▏         | 1435/100000 [28:56<31:25:24,  1.15s/it]

14776 episode score is 724.89


  1%|▏         | 1436/100000 [28:58<32:08:18,  1.17s/it]

14785 episode score is 748.25


  1%|▏         | 1437/100000 [28:59<32:04:53,  1.17s/it]

14794 episode score is 732.59


  1%|▏         | 1438/100000 [29:00<32:04:26,  1.17s/it]

14803 episode score is 723.90


  1%|▏         | 1439/100000 [29:01<31:36:57,  1.15s/it]

14812 episode score is 719.39


  1%|▏         | 1440/100000 [29:02<31:44:45,  1.16s/it]

14821 episode score is 734.51


  1%|▏         | 1441/100000 [29:04<31:45:46,  1.16s/it]

14830 episode score is 759.64


  1%|▏         | 1442/100000 [29:05<31:29:50,  1.15s/it]

14838 episode score is 772.54


  1%|▏         | 1443/100000 [29:06<31:20:52,  1.15s/it]

14847 episode score is 735.26


  1%|▏         | 1444/100000 [29:07<31:55:50,  1.17s/it]

14856 episode score is 747.35


  1%|▏         | 1445/100000 [29:08<32:04:06,  1.17s/it]

14865 episode score is 760.45


  1%|▏         | 1446/100000 [29:09<31:57:21,  1.17s/it]

14874 episode score is 733.20


  1%|▏         | 1447/100000 [29:10<31:43:24,  1.16s/it]

14883 episode score is 709.90


  1%|▏         | 1448/100000 [29:12<32:01:29,  1.17s/it]

14892 episode score is 714.02


  1%|▏         | 1449/100000 [29:13<32:27:05,  1.19s/it]

14902 episode score is 697.66


  1%|▏         | 1450/100000 [29:14<31:49:15,  1.16s/it]

14911 episode score is 701.03


  1%|▏         | 1451/100000 [29:15<31:25:24,  1.15s/it]

14920 episode score is 656.15


  1%|▏         | 1452/100000 [29:16<31:13:50,  1.14s/it]

14929 episode score is 728.39


  1%|▏         | 1453/100000 [29:17<31:21:08,  1.15s/it]

14938 episode score is 742.50


  1%|▏         | 1454/100000 [29:19<31:14:28,  1.14s/it]

14947 episode score is 741.51


  1%|▏         | 1455/100000 [29:20<31:17:34,  1.14s/it]

14956 episode score is 748.41


  1%|▏         | 1456/100000 [29:21<31:15:38,  1.14s/it]

14965 episode score is 760.64


  1%|▏         | 1457/100000 [29:22<31:00:27,  1.13s/it]

14974 episode score is 734.57


  1%|▏         | 1458/100000 [29:23<30:58:35,  1.13s/it]

14983 episode score is 758.55


  1%|▏         | 1459/100000 [29:24<31:22:34,  1.15s/it]

14992 episode score is 766.97


  1%|▏         | 1460/100000 [29:25<31:09:38,  1.14s/it]

15001 episode score is 752.53


  1%|▏         | 1461/100000 [29:26<31:00:31,  1.13s/it]

15010 episode score is 729.50


  1%|▏         | 1462/100000 [29:28<31:31:17,  1.15s/it]

15019 episode score is 751.43


  1%|▏         | 1463/100000 [29:29<32:06:18,  1.17s/it]

15029 episode score is 721.73


  1%|▏         | 1464/100000 [29:30<32:14:37,  1.18s/it]

15038 episode score is 773.44


  1%|▏         | 1465/100000 [29:31<31:59:03,  1.17s/it]

15047 episode score is 735.57


  1%|▏         | 1466/100000 [29:32<32:12:08,  1.18s/it]

15056 episode score is 788.26


  1%|▏         | 1467/100000 [29:34<32:27:18,  1.19s/it]

15065 episode score is 794.97


  1%|▏         | 1468/100000 [29:35<32:43:49,  1.20s/it]

15075 episode score is 688.08


  1%|▏         | 1469/100000 [29:36<32:12:43,  1.18s/it]

15083 episode score is 848.49


  1%|▏         | 1470/100000 [29:37<31:44:55,  1.16s/it]

15091 episode score is 828.97


  1%|▏         | 1471/100000 [29:38<31:29:27,  1.15s/it]

15099 episode score is 852.67


  1%|▏         | 1472/100000 [29:39<31:19:45,  1.14s/it]

15107 episode score is 833.03


  1%|▏         | 1473/100000 [29:41<31:35:14,  1.15s/it]

15115 episode score is 896.89


  1%|▏         | 1474/100000 [29:42<31:46:21,  1.16s/it]

15123 episode score is 868.09


  1%|▏         | 1475/100000 [29:43<31:54:34,  1.17s/it]

15132 episode score is 793.02


  1%|▏         | 1476/100000 [29:44<32:16:54,  1.18s/it]

15141 episode score is 797.02


  1%|▏         | 1477/100000 [29:45<32:44:02,  1.20s/it]

15150 episode score is 823.94


  1%|▏         | 1478/100000 [29:47<32:29:46,  1.19s/it]

15159 episode score is 758.98


  1%|▏         | 1479/100000 [29:48<33:06:09,  1.21s/it]

15168 episode score is 831.26


  1%|▏         | 1480/100000 [29:49<32:24:03,  1.18s/it]

15176 episode score is 811.23


  1%|▏         | 1481/100000 [29:50<31:49:03,  1.16s/it]

15185 episode score is 719.34


  1%|▏         | 1482/100000 [29:51<31:31:33,  1.15s/it]

15193 episode score is 844.99


  1%|▏         | 1483/100000 [29:52<32:53:31,  1.20s/it]

15202 episode score is 807.15


  1%|▏         | 1484/100000 [29:54<33:03:44,  1.21s/it]

15211 episode score is 731.07


  1%|▏         | 1485/100000 [29:55<32:32:25,  1.19s/it]

15219 episode score is 858.18


  1%|▏         | 1486/100000 [29:56<32:02:05,  1.17s/it]

15227 episode score is 829.10


  1%|▏         | 1487/100000 [29:57<31:46:03,  1.16s/it]

15235 episode score is 854.39


  1%|▏         | 1488/100000 [29:58<32:18:55,  1.18s/it]

15244 episode score is 829.61


  1%|▏         | 1489/100000 [30:00<32:43:17,  1.20s/it]

15253 episode score is 826.30


  1%|▏         | 1490/100000 [30:01<33:03:27,  1.21s/it]

15262 episode score is 821.17


  1%|▏         | 1491/100000 [30:02<33:18:25,  1.22s/it]

15271 episode score is 823.44


  1%|▏         | 1492/100000 [30:03<33:18:45,  1.22s/it]

15280 episode score is 810.48


  1%|▏         | 1493/100000 [30:04<32:27:19,  1.19s/it]

15288 episode score is 833.77


  1%|▏         | 1494/100000 [30:06<33:03:31,  1.21s/it]

15297 episode score is 823.91


  1%|▏         | 1495/100000 [30:07<32:15:59,  1.18s/it]

15305 episode score is 820.08


  1%|▏         | 1496/100000 [30:08<32:18:48,  1.18s/it]

15314 episode score is 780.44


  1%|▏         | 1497/100000 [30:09<32:37:02,  1.19s/it]

15323 episode score is 810.77


  1%|▏         | 1498/100000 [30:10<32:26:46,  1.19s/it]

15332 episode score is 753.58


  1%|▏         | 1499/100000 [30:11<32:13:40,  1.18s/it]

15341 episode score is 773.39
15350 episode score is 748.47


  2%|▏         | 1500/100000 [30:14<41:04:53,  1.50s/it]

Iteration 1500: Average test reward: 754.70


  2%|▏         | 1501/100000 [30:15<38:18:21,  1.40s/it]

15359 episode score is 760.91


  2%|▏         | 1502/100000 [30:16<36:19:55,  1.33s/it]

15368 episode score is 760.03


  2%|▏         | 1503/100000 [30:17<35:12:51,  1.29s/it]

15377 episode score is 754.72


  2%|▏         | 1504/100000 [30:18<34:10:00,  1.25s/it]

15386 episode score is 745.33


  2%|▏         | 1505/100000 [30:20<33:15:50,  1.22s/it]

15395 episode score is 735.64


  2%|▏         | 1506/100000 [30:21<32:46:31,  1.20s/it]

15404 episode score is 747.21


  2%|▏         | 1507/100000 [30:22<32:16:47,  1.18s/it]

15413 episode score is 746.18


  2%|▏         | 1508/100000 [30:23<31:47:06,  1.16s/it]

15422 episode score is 720.48


  2%|▏         | 1509/100000 [30:24<31:44:57,  1.16s/it]

15431 episode score is 744.60


  2%|▏         | 1510/100000 [30:25<32:12:22,  1.18s/it]

15441 episode score is 685.77


  2%|▏         | 1511/100000 [30:26<32:05:07,  1.17s/it]

15450 episode score is 765.07


  2%|▏         | 1512/100000 [30:28<31:54:59,  1.17s/it]

15459 episode score is 751.90


  2%|▏         | 1513/100000 [30:29<31:50:48,  1.16s/it]

15468 episode score is 751.23


  2%|▏         | 1514/100000 [30:30<31:59:02,  1.17s/it]

15477 episode score is 783.67


  2%|▏         | 1515/100000 [30:31<32:10:00,  1.18s/it]

15486 episode score is 778.76


  2%|▏         | 1516/100000 [30:32<32:03:55,  1.17s/it]

15495 episode score is 762.81


  2%|▏         | 1517/100000 [30:34<32:14:13,  1.18s/it]

15504 episode score is 796.27


  2%|▏         | 1518/100000 [30:35<32:00:42,  1.17s/it]

15513 episode score is 748.67


  2%|▏         | 1519/100000 [30:36<31:49:12,  1.16s/it]

15522 episode score is 744.55


  2%|▏         | 1520/100000 [30:37<31:34:49,  1.15s/it]

15531 episode score is 735.19


  2%|▏         | 1521/100000 [30:38<31:10:45,  1.14s/it]

15540 episode score is 730.52


  2%|▏         | 1522/100000 [30:39<30:56:29,  1.13s/it]

15549 episode score is 733.65


  2%|▏         | 1523/100000 [30:40<30:47:21,  1.13s/it]

15558 episode score is 739.11


  2%|▏         | 1524/100000 [30:41<30:37:51,  1.12s/it]

15567 episode score is 716.69


  2%|▏         | 1525/100000 [30:43<30:44:48,  1.12s/it]

15576 episode score is 742.36


  2%|▏         | 1526/100000 [30:44<30:35:22,  1.12s/it]

15585 episode score is 709.86


  2%|▏         | 1527/100000 [30:45<31:18:14,  1.14s/it]

15595 episode score is 711.07


  2%|▏         | 1528/100000 [30:46<31:10:50,  1.14s/it]

15604 episode score is 718.61


  2%|▏         | 1529/100000 [30:47<31:04:41,  1.14s/it]

15613 episode score is 729.00


  2%|▏         | 1530/100000 [30:48<31:00:52,  1.13s/it]

15622 episode score is 741.46


  2%|▏         | 1531/100000 [30:49<30:58:50,  1.13s/it]

15631 episode score is 732.55


  2%|▏         | 1532/100000 [30:50<31:03:41,  1.14s/it]

15640 episode score is 769.27


  2%|▏         | 1533/100000 [30:52<31:04:47,  1.14s/it]

15649 episode score is 749.10


  2%|▏         | 1534/100000 [30:53<31:11:39,  1.14s/it]

15658 episode score is 758.12


  2%|▏         | 1535/100000 [30:54<31:15:40,  1.14s/it]

15667 episode score is 748.21


  2%|▏         | 1536/100000 [30:55<31:11:49,  1.14s/it]

15676 episode score is 748.59


  2%|▏         | 1537/100000 [30:56<31:16:14,  1.14s/it]

15685 episode score is 753.45


  2%|▏         | 1538/100000 [30:57<31:20:13,  1.15s/it]

15694 episode score is 771.09


  2%|▏         | 1539/100000 [30:59<31:29:38,  1.15s/it]

15703 episode score is 773.64


  2%|▏         | 1540/100000 [31:00<31:44:10,  1.16s/it]

15712 episode score is 785.64


  2%|▏         | 1541/100000 [31:01<31:41:09,  1.16s/it]

15721 episode score is 760.43


  2%|▏         | 1542/100000 [31:02<31:59:23,  1.17s/it]

15730 episode score is 792.41


  2%|▏         | 1543/100000 [31:03<31:38:29,  1.16s/it]

15739 episode score is 751.47


  2%|▏         | 1544/100000 [31:04<31:49:59,  1.16s/it]

15748 episode score is 794.72


  2%|▏         | 1545/100000 [31:06<32:10:59,  1.18s/it]

15757 episode score is 784.08


  2%|▏         | 1546/100000 [31:07<32:14:21,  1.18s/it]

15766 episode score is 776.34


  2%|▏         | 1547/100000 [31:08<32:24:30,  1.19s/it]

15775 episode score is 787.52


  2%|▏         | 1548/100000 [31:09<32:36:07,  1.19s/it]

15784 episode score is 790.83


  2%|▏         | 1549/100000 [31:10<31:57:01,  1.17s/it]

15792 episode score is 822.55


  2%|▏         | 1550/100000 [31:11<32:21:26,  1.18s/it]

15801 episode score is 801.46


  2%|▏         | 1551/100000 [31:13<32:41:04,  1.20s/it]

15810 episode score is 781.52


  2%|▏         | 1552/100000 [31:14<32:47:38,  1.20s/it]

15819 episode score is 805.75


  2%|▏         | 1553/100000 [31:15<32:36:32,  1.19s/it]

15828 episode score is 767.78


  2%|▏         | 1554/100000 [31:16<31:56:28,  1.17s/it]

15836 episode score is 804.60


  2%|▏         | 1555/100000 [31:17<31:59:08,  1.17s/it]

15845 episode score is 761.93


  2%|▏         | 1556/100000 [31:19<31:52:28,  1.17s/it]

15854 episode score is 750.86


  2%|▏         | 1557/100000 [31:20<32:05:56,  1.17s/it]

15863 episode score is 790.25


  2%|▏         | 1558/100000 [31:21<32:22:16,  1.18s/it]

15872 episode score is 804.20


  2%|▏         | 1559/100000 [31:22<32:31:53,  1.19s/it]

15880 episode score is 808.22


  2%|▏         | 1560/100000 [31:23<32:29:07,  1.19s/it]

15889 episode score is 799.81


  2%|▏         | 1561/100000 [31:25<32:28:04,  1.19s/it]

15898 episode score is 798.77


  2%|▏         | 1562/100000 [31:26<32:43:18,  1.20s/it]

15907 episode score is 801.71


  2%|▏         | 1563/100000 [31:27<32:34:40,  1.19s/it]

15916 episode score is 774.22


  2%|▏         | 1564/100000 [31:28<31:56:31,  1.17s/it]

15925 episode score is 731.30


  2%|▏         | 1565/100000 [31:29<31:55:07,  1.17s/it]

15934 episode score is 765.06


  2%|▏         | 1566/100000 [31:30<32:00:21,  1.17s/it]

15943 episode score is 774.17


  2%|▏         | 1567/100000 [31:31<31:40:16,  1.16s/it]

15952 episode score is 744.91


  2%|▏         | 1568/100000 [31:33<31:28:23,  1.15s/it]

15961 episode score is 750.47


  2%|▏         | 1569/100000 [31:34<31:55:59,  1.17s/it]

15970 episode score is 690.03


  2%|▏         | 1570/100000 [31:35<31:41:13,  1.16s/it]

15979 episode score is 741.39


  2%|▏         | 1571/100000 [31:36<31:15:58,  1.14s/it]

15988 episode score is 729.42


  2%|▏         | 1572/100000 [31:37<31:22:01,  1.15s/it]

15997 episode score is 748.60


  2%|▏         | 1573/100000 [31:38<31:19:29,  1.15s/it]

16006 episode score is 755.50


  2%|▏         | 1574/100000 [31:40<31:20:55,  1.15s/it]

16015 episode score is 746.57


  2%|▏         | 1575/100000 [31:41<31:17:46,  1.14s/it]

16024 episode score is 751.84


  2%|▏         | 1576/100000 [31:42<31:14:17,  1.14s/it]

16033 episode score is 748.33


  2%|▏         | 1577/100000 [31:43<31:16:36,  1.14s/it]

16042 episode score is 742.37


  2%|▏         | 1578/100000 [31:44<31:51:25,  1.17s/it]

16051 episode score is 761.99


  2%|▏         | 1579/100000 [31:45<31:28:29,  1.15s/it]

16060 episode score is 729.00


  2%|▏         | 1580/100000 [31:46<31:15:05,  1.14s/it]

16069 episode score is 726.16


  2%|▏         | 1581/100000 [31:48<31:14:24,  1.14s/it]

16078 episode score is 735.33


  2%|▏         | 1582/100000 [31:49<31:29:44,  1.15s/it]

16087 episode score is 753.66


  2%|▏         | 1583/100000 [31:50<31:23:04,  1.15s/it]

16096 episode score is 733.16


  2%|▏         | 1584/100000 [31:51<31:03:00,  1.14s/it]

16105 episode score is 701.87


  2%|▏         | 1585/100000 [31:52<31:42:24,  1.16s/it]

16115 episode score is 703.91


  2%|▏         | 1586/100000 [31:53<31:58:10,  1.17s/it]

16125 episode score is 700.46


  2%|▏         | 1587/100000 [31:54<31:28:34,  1.15s/it]

16134 episode score is 693.29


  2%|▏         | 1588/100000 [31:56<31:11:15,  1.14s/it]

16143 episode score is 717.97


  2%|▏         | 1589/100000 [31:57<31:12:45,  1.14s/it]

16152 episode score is 732.25


  2%|▏         | 1590/100000 [31:58<31:23:08,  1.15s/it]

16161 episode score is 746.76


  2%|▏         | 1591/100000 [31:59<31:15:00,  1.14s/it]

16170 episode score is 741.69


  2%|▏         | 1592/100000 [32:00<31:09:40,  1.14s/it]

16179 episode score is 739.04


  2%|▏         | 1593/100000 [32:01<31:15:20,  1.14s/it]

16188 episode score is 755.70


  2%|▏         | 1594/100000 [32:02<31:25:23,  1.15s/it]

16197 episode score is 759.42


  2%|▏         | 1595/100000 [32:04<31:27:17,  1.15s/it]

16206 episode score is 770.36


  2%|▏         | 1596/100000 [32:05<31:29:12,  1.15s/it]

16215 episode score is 772.09


  2%|▏         | 1597/100000 [32:06<31:35:28,  1.16s/it]

16224 episode score is 771.37


  2%|▏         | 1598/100000 [32:07<31:28:51,  1.15s/it]

16233 episode score is 754.53


  2%|▏         | 1599/100000 [32:08<31:08:41,  1.14s/it]

16242 episode score is 729.68
16251 episode score is 748.60


  2%|▏         | 1600/100000 [32:10<40:30:54,  1.48s/it]

Iteration 1600: Average test reward: 746.68


  2%|▏         | 1601/100000 [32:12<37:51:48,  1.39s/it]

16260 episode score is 775.85


  2%|▏         | 1602/100000 [32:13<36:00:20,  1.32s/it]

16269 episode score is 753.76


  2%|▏         | 1603/100000 [32:14<34:34:32,  1.27s/it]

16278 episode score is 738.80


  2%|▏         | 1604/100000 [32:15<34:13:11,  1.25s/it]

16288 episode score is 712.10


  2%|▏         | 1605/100000 [32:16<33:10:35,  1.21s/it]

16297 episode score is 722.67


  2%|▏         | 1606/100000 [32:17<32:29:28,  1.19s/it]

16306 episode score is 728.62


  2%|▏         | 1607/100000 [32:19<31:51:45,  1.17s/it]

16315 episode score is 733.21


  2%|▏         | 1608/100000 [32:20<31:58:12,  1.17s/it]

16324 episode score is 765.57


  2%|▏         | 1609/100000 [32:21<31:40:26,  1.16s/it]

16333 episode score is 748.40


  2%|▏         | 1610/100000 [32:22<31:35:42,  1.16s/it]

16342 episode score is 752.62


  2%|▏         | 1611/100000 [32:23<31:31:44,  1.15s/it]

16351 episode score is 753.89


  2%|▏         | 1612/100000 [32:24<31:27:04,  1.15s/it]

16360 episode score is 758.56


  2%|▏         | 1613/100000 [32:25<31:31:25,  1.15s/it]

16369 episode score is 756.10


  2%|▏         | 1614/100000 [32:27<31:18:02,  1.15s/it]

16378 episode score is 732.84


  2%|▏         | 1615/100000 [32:28<32:00:46,  1.17s/it]

16387 episode score is 760.07


  2%|▏         | 1616/100000 [32:29<31:51:19,  1.17s/it]

16396 episode score is 753.34


  2%|▏         | 1617/100000 [32:30<31:27:01,  1.15s/it]

16405 episode score is 743.58


  2%|▏         | 1618/100000 [32:31<31:38:24,  1.16s/it]

16414 episode score is 758.06


  2%|▏         | 1619/100000 [32:32<31:40:51,  1.16s/it]

16423 episode score is 762.90


  2%|▏         | 1620/100000 [32:34<31:37:38,  1.16s/it]

16432 episode score is 754.37


  2%|▏         | 1621/100000 [32:35<31:37:35,  1.16s/it]

16441 episode score is 769.95


  2%|▏         | 1622/100000 [32:36<32:01:05,  1.17s/it]

16450 episode score is 806.92


  2%|▏         | 1623/100000 [32:37<32:05:33,  1.17s/it]

16459 episode score is 797.65


  2%|▏         | 1624/100000 [32:38<32:40:55,  1.20s/it]

16468 episode score is 812.69


  2%|▏         | 1625/100000 [32:40<32:44:31,  1.20s/it]

16477 episode score is 803.53


  2%|▏         | 1626/100000 [32:41<32:49:17,  1.20s/it]

16486 episode score is 803.72


  2%|▏         | 1627/100000 [32:42<32:46:53,  1.20s/it]

16495 episode score is 796.67


  2%|▏         | 1628/100000 [32:43<32:53:28,  1.20s/it]

16504 episode score is 806.71


  2%|▏         | 1629/100000 [32:44<32:42:31,  1.20s/it]

16513 episode score is 773.14


  2%|▏         | 1630/100000 [32:46<32:26:02,  1.19s/it]

16522 episode score is 785.54


  2%|▏         | 1631/100000 [32:47<32:40:38,  1.20s/it]

16531 episode score is 811.62


  2%|▏         | 1632/100000 [32:48<32:40:19,  1.20s/it]

16540 episode score is 794.37


  2%|▏         | 1633/100000 [32:49<32:36:32,  1.19s/it]

16549 episode score is 804.62


  2%|▏         | 1634/100000 [32:50<32:26:45,  1.19s/it]

16558 episode score is 781.28


  2%|▏         | 1635/100000 [32:52<32:46:34,  1.20s/it]

16567 episode score is 816.37


  2%|▏         | 1636/100000 [32:53<32:51:32,  1.20s/it]

16576 episode score is 783.11


  2%|▏         | 1637/100000 [32:54<32:45:33,  1.20s/it]

16585 episode score is 785.71


  2%|▏         | 1638/100000 [32:55<32:45:01,  1.20s/it]

16594 episode score is 799.96


  2%|▏         | 1639/100000 [32:56<32:02:13,  1.17s/it]

16602 episode score is 831.13


  2%|▏         | 1640/100000 [32:57<32:21:15,  1.18s/it]

16611 episode score is 804.00


  2%|▏         | 1641/100000 [32:59<32:31:31,  1.19s/it]

16620 episode score is 796.17


  2%|▏         | 1642/100000 [33:00<32:38:22,  1.19s/it]

16629 episode score is 787.93


  2%|▏         | 1643/100000 [33:01<32:08:48,  1.18s/it]

16637 episode score is 852.54


  2%|▏         | 1644/100000 [33:02<32:12:38,  1.18s/it]

16646 episode score is 761.49


  2%|▏         | 1645/100000 [33:03<31:35:23,  1.16s/it]

16654 episode score is 837.95


  2%|▏         | 1646/100000 [33:04<31:45:15,  1.16s/it]

16663 episode score is 777.00


  2%|▏         | 1647/100000 [33:06<32:08:44,  1.18s/it]

16672 episode score is 796.19


  2%|▏         | 1648/100000 [33:07<32:06:38,  1.18s/it]

16681 episode score is 772.12


  2%|▏         | 1649/100000 [33:08<32:01:02,  1.17s/it]

16690 episode score is 769.08


  2%|▏         | 1650/100000 [33:09<31:49:07,  1.16s/it]

16699 episode score is 766.61


  2%|▏         | 1651/100000 [33:10<31:30:11,  1.15s/it]

16708 episode score is 741.71


  2%|▏         | 1652/100000 [33:11<31:05:58,  1.14s/it]

16717 episode score is 707.29


  2%|▏         | 1653/100000 [33:13<31:43:59,  1.16s/it]

16727 episode score is 714.24


  2%|▏         | 1654/100000 [33:14<31:20:48,  1.15s/it]

16736 episode score is 727.64


  2%|▏         | 1655/100000 [33:15<31:23:32,  1.15s/it]

16745 episode score is 745.42


  2%|▏         | 1656/100000 [33:16<31:32:16,  1.15s/it]

16754 episode score is 760.34


  2%|▏         | 1657/100000 [33:17<31:15:03,  1.14s/it]

16763 episode score is 716.30


  2%|▏         | 1658/100000 [33:18<31:02:14,  1.14s/it]

16772 episode score is 729.39


  2%|▏         | 1659/100000 [33:19<31:43:15,  1.16s/it]

16782 episode score is 721.04


  2%|▏         | 1660/100000 [33:21<31:12:44,  1.14s/it]

16791 episode score is 731.73


  2%|▏         | 1661/100000 [33:22<31:50:31,  1.17s/it]

16801 episode score is 728.44


  2%|▏         | 1662/100000 [33:23<31:24:17,  1.15s/it]

16810 episode score is 742.29


  2%|▏         | 1663/100000 [33:24<31:55:46,  1.17s/it]

16820 episode score is 706.85


  2%|▏         | 1664/100000 [33:25<32:18:25,  1.18s/it]

16830 episode score is 726.38


  2%|▏         | 1665/100000 [33:27<32:24:19,  1.19s/it]

16839 episode score is 725.56


  2%|▏         | 1666/100000 [33:28<31:43:54,  1.16s/it]

16848 episode score is 729.83


  2%|▏         | 1667/100000 [33:29<31:20:57,  1.15s/it]

16857 episode score is 735.11


  2%|▏         | 1668/100000 [33:30<30:57:53,  1.13s/it]

16866 episode score is 740.00


  2%|▏         | 1669/100000 [33:31<30:47:33,  1.13s/it]

16875 episode score is 741.86


  2%|▏         | 1670/100000 [33:32<31:43:35,  1.16s/it]

16885 episode score is 722.08


  2%|▏         | 1671/100000 [33:33<31:31:59,  1.15s/it]

16894 episode score is 762.87


  2%|▏         | 1672/100000 [33:34<31:04:59,  1.14s/it]

16903 episode score is 725.11


  2%|▏         | 1673/100000 [33:36<31:18:34,  1.15s/it]

16912 episode score is 765.23


  2%|▏         | 1674/100000 [33:37<31:43:02,  1.16s/it]

16921 episode score is 780.58


  2%|▏         | 1675/100000 [33:38<31:45:07,  1.16s/it]

16930 episode score is 777.33


  2%|▏         | 1676/100000 [33:39<31:33:12,  1.16s/it]

16939 episode score is 744.54


  2%|▏         | 1677/100000 [33:40<31:42:02,  1.16s/it]

16948 episode score is 770.62


  2%|▏         | 1678/100000 [33:41<31:41:07,  1.16s/it]

16957 episode score is 753.60


  2%|▏         | 1679/100000 [33:43<31:42:13,  1.16s/it]

16966 episode score is 774.75


  2%|▏         | 1680/100000 [33:44<31:40:36,  1.16s/it]

16975 episode score is 763.95


  2%|▏         | 1681/100000 [33:45<31:48:54,  1.16s/it]

16984 episode score is 780.80


  2%|▏         | 1682/100000 [33:46<31:31:28,  1.15s/it]

16993 episode score is 746.01


  2%|▏         | 1683/100000 [33:47<32:24:24,  1.19s/it]

17003 episode score is 724.04


  2%|▏         | 1684/100000 [33:48<32:01:49,  1.17s/it]

17012 episode score is 758.64


  2%|▏         | 1685/100000 [33:50<31:37:09,  1.16s/it]

17021 episode score is 737.97


  2%|▏         | 1686/100000 [33:51<31:30:54,  1.15s/it]

17030 episode score is 765.25


  2%|▏         | 1687/100000 [33:52<31:56:34,  1.17s/it]

17040 episode score is 711.65


  2%|▏         | 1688/100000 [33:53<32:09:35,  1.18s/it]

17050 episode score is 704.19


  2%|▏         | 1689/100000 [33:54<32:10:54,  1.18s/it]

17060 episode score is 705.44


  2%|▏         | 1690/100000 [33:56<32:30:22,  1.19s/it]

17070 episode score is 725.11


  2%|▏         | 1691/100000 [33:57<31:49:49,  1.17s/it]

17079 episode score is 737.07


  2%|▏         | 1692/100000 [33:58<31:19:27,  1.15s/it]

17088 episode score is 735.06


  2%|▏         | 1693/100000 [33:59<31:02:05,  1.14s/it]

17097 episode score is 733.20


  2%|▏         | 1694/100000 [34:00<30:56:43,  1.13s/it]

17106 episode score is 738.39


  2%|▏         | 1695/100000 [34:01<30:40:58,  1.12s/it]

17115 episode score is 721.69


  2%|▏         | 1696/100000 [34:02<31:41:49,  1.16s/it]

17125 episode score is 730.32


  2%|▏         | 1697/100000 [34:03<31:24:40,  1.15s/it]

17134 episode score is 742.99


  2%|▏         | 1698/100000 [34:05<31:05:38,  1.14s/it]

17143 episode score is 732.27


  2%|▏         | 1699/100000 [34:06<31:15:01,  1.14s/it]

17152 episode score is 750.21
17161 episode score is 747.44


  2%|▏         | 1700/100000 [34:08<40:15:57,  1.47s/it]

Iteration 1700: Average test reward: 733.63


  2%|▏         | 1701/100000 [34:09<37:32:25,  1.37s/it]

17170 episode score is 745.23


  2%|▏         | 1702/100000 [34:10<35:44:08,  1.31s/it]

17179 episode score is 765.03


  2%|▏         | 1703/100000 [34:11<34:22:30,  1.26s/it]

17188 episode score is 749.23


  2%|▏         | 1704/100000 [34:13<33:29:34,  1.23s/it]

17197 episode score is 753.71


  2%|▏         | 1705/100000 [34:14<32:30:49,  1.19s/it]

17206 episode score is 739.09


  2%|▏         | 1706/100000 [34:15<32:23:09,  1.19s/it]

17215 episode score is 769.00


  2%|▏         | 1707/100000 [34:16<31:58:10,  1.17s/it]

17224 episode score is 750.07


  2%|▏         | 1708/100000 [34:17<32:13:12,  1.18s/it]

17233 episode score is 779.50


  2%|▏         | 1709/100000 [34:18<32:12:58,  1.18s/it]

17242 episode score is 776.59


  2%|▏         | 1710/100000 [34:20<32:31:02,  1.19s/it]

17251 episode score is 800.63


  2%|▏         | 1711/100000 [34:21<32:45:01,  1.20s/it]

17260 episode score is 807.25


  2%|▏         | 1712/100000 [34:22<32:08:35,  1.18s/it]

17268 episode score is 839.78


  2%|▏         | 1713/100000 [34:23<32:21:06,  1.18s/it]

17277 episode score is 794.33


  2%|▏         | 1714/100000 [34:24<31:52:25,  1.17s/it]

17285 episode score is 851.58


  2%|▏         | 1715/100000 [34:25<32:17:10,  1.18s/it]

17294 episode score is 821.49


  2%|▏         | 1716/100000 [34:27<32:29:23,  1.19s/it]

17303 episode score is 812.98


  2%|▏         | 1717/100000 [34:28<31:56:17,  1.17s/it]

17311 episode score is 847.38


  2%|▏         | 1718/100000 [34:29<33:15:28,  1.22s/it]

17320 episode score is 823.10


  2%|▏         | 1719/100000 [34:30<33:12:16,  1.22s/it]

17329 episode score is 813.51


  2%|▏         | 1720/100000 [34:32<33:10:32,  1.22s/it]

17338 episode score is 808.88


  2%|▏         | 1721/100000 [34:33<33:17:48,  1.22s/it]

17347 episode score is 808.31


  2%|▏         | 1722/100000 [34:34<33:06:50,  1.21s/it]

17356 episode score is 798.73


  2%|▏         | 1723/100000 [34:35<32:44:12,  1.20s/it]

17365 episode score is 771.00


  2%|▏         | 1724/100000 [34:36<32:27:40,  1.19s/it]

17374 episode score is 777.56


  2%|▏         | 1725/100000 [34:38<32:30:52,  1.19s/it]

17383 episode score is 794.28


  2%|▏         | 1726/100000 [34:39<32:01:21,  1.17s/it]

17392 episode score is 746.00


  2%|▏         | 1727/100000 [34:40<31:55:12,  1.17s/it]

17401 episode score is 768.65


  2%|▏         | 1728/100000 [34:41<32:16:01,  1.18s/it]

17410 episode score is 814.86


  2%|▏         | 1729/100000 [34:42<32:23:20,  1.19s/it]

17419 episode score is 807.89


  2%|▏         | 1730/100000 [34:43<32:12:22,  1.18s/it]

17428 episode score is 783.22


  2%|▏         | 1731/100000 [34:45<32:23:51,  1.19s/it]

17437 episode score is 818.86


  2%|▏         | 1732/100000 [34:46<32:38:00,  1.20s/it]

17446 episode score is 821.43


  2%|▏         | 1733/100000 [34:47<32:45:27,  1.20s/it]

17455 episode score is 811.18


  2%|▏         | 1734/100000 [34:48<32:40:00,  1.20s/it]

17464 episode score is 783.09


  2%|▏         | 1735/100000 [34:49<32:52:33,  1.20s/it]

17473 episode score is 803.10


  2%|▏         | 1736/100000 [34:51<32:35:39,  1.19s/it]

17482 episode score is 779.04


  2%|▏         | 1737/100000 [34:52<32:22:42,  1.19s/it]

17491 episode score is 777.57


  2%|▏         | 1738/100000 [34:53<32:18:59,  1.18s/it]

17500 episode score is 790.96


  2%|▏         | 1739/100000 [34:54<32:22:45,  1.19s/it]

17509 episode score is 798.87


  2%|▏         | 1740/100000 [34:55<32:29:51,  1.19s/it]

17518 episode score is 812.59


  2%|▏         | 1741/100000 [34:57<32:13:40,  1.18s/it]

17527 episode score is 779.24


  2%|▏         | 1742/100000 [34:58<31:43:09,  1.16s/it]

17535 episode score is 860.47


  2%|▏         | 1743/100000 [34:59<32:05:52,  1.18s/it]

17544 episode score is 808.72


  2%|▏         | 1744/100000 [35:00<32:17:43,  1.18s/it]

17553 episode score is 800.45


  2%|▏         | 1745/100000 [35:01<32:44:51,  1.20s/it]

17562 episode score is 830.92


  2%|▏         | 1746/100000 [35:02<32:31:10,  1.19s/it]

17571 episode score is 773.19


  2%|▏         | 1747/100000 [35:04<32:39:10,  1.20s/it]

17580 episode score is 826.51


  2%|▏         | 1748/100000 [35:05<32:00:52,  1.17s/it]

17588 episode score is 859.07


  2%|▏         | 1749/100000 [35:06<31:22:54,  1.15s/it]

17596 episode score is 834.02


  2%|▏         | 1750/100000 [35:07<30:58:15,  1.13s/it]

17604 episode score is 833.76


  2%|▏         | 1751/100000 [35:08<31:43:14,  1.16s/it]

17613 episode score is 814.87


  2%|▏         | 1752/100000 [35:09<32:10:18,  1.18s/it]

17622 episode score is 822.64


  2%|▏         | 1753/100000 [35:11<32:10:14,  1.18s/it]

17631 episode score is 800.58


  2%|▏         | 1754/100000 [35:12<32:18:13,  1.18s/it]

17640 episode score is 806.12


  2%|▏         | 1755/100000 [35:13<31:58:56,  1.17s/it]

17649 episode score is 748.81


  2%|▏         | 1756/100000 [35:14<31:37:39,  1.16s/it]

17658 episode score is 745.13


  2%|▏         | 1757/100000 [35:15<31:26:01,  1.15s/it]

17667 episode score is 761.07


  2%|▏         | 1758/100000 [35:16<31:10:52,  1.14s/it]

17676 episode score is 748.79


  2%|▏         | 1759/100000 [35:18<32:15:55,  1.18s/it]

17686 episode score is 730.87


  2%|▏         | 1760/100000 [35:19<32:33:26,  1.19s/it]

17696 episode score is 706.86


  2%|▏         | 1761/100000 [35:20<32:06:41,  1.18s/it]

17706 episode score is 666.01


  2%|▏         | 1762/100000 [35:21<31:33:21,  1.16s/it]

17715 episode score is 722.64


  2%|▏         | 1763/100000 [35:22<31:17:13,  1.15s/it]

17724 episode score is 745.64


  2%|▏         | 1764/100000 [35:23<31:19:27,  1.15s/it]

17733 episode score is 772.25


  2%|▏         | 1765/100000 [35:24<31:08:36,  1.14s/it]

17742 episode score is 749.25


  2%|▏         | 1766/100000 [35:26<30:57:57,  1.13s/it]

17751 episode score is 747.43


  2%|▏         | 1767/100000 [35:27<30:50:51,  1.13s/it]

17760 episode score is 742.33


  2%|▏         | 1768/100000 [35:28<31:05:16,  1.14s/it]

17769 episode score is 764.94


  2%|▏         | 1769/100000 [35:29<31:47:16,  1.16s/it]

17779 episode score is 717.62


  2%|▏         | 1770/100000 [35:30<31:33:18,  1.16s/it]

17788 episode score is 742.78


  2%|▏         | 1771/100000 [35:31<31:29:46,  1.15s/it]

17797 episode score is 750.70


  2%|▏         | 1772/100000 [35:33<31:24:30,  1.15s/it]

17806 episode score is 752.22


  2%|▏         | 1773/100000 [35:34<32:06:04,  1.18s/it]

17815 episode score is 764.80


  2%|▏         | 1774/100000 [35:35<31:47:01,  1.16s/it]

17824 episode score is 738.99


  2%|▏         | 1775/100000 [35:36<31:54:24,  1.17s/it]

17833 episode score is 777.47


  2%|▏         | 1776/100000 [35:37<31:46:50,  1.16s/it]

17842 episode score is 773.95


  2%|▏         | 1777/100000 [35:38<31:55:39,  1.17s/it]

17852 episode score is 703.16


  2%|▏         | 1778/100000 [35:40<31:39:54,  1.16s/it]

17861 episode score is 749.44


  2%|▏         | 1779/100000 [35:41<31:56:33,  1.17s/it]

17871 episode score is 704.95


  2%|▏         | 1780/100000 [35:42<31:19:17,  1.15s/it]

17880 episode score is 731.89


  2%|▏         | 1781/100000 [35:43<31:57:11,  1.17s/it]

17889 episode score is 811.08


  2%|▏         | 1782/100000 [35:44<32:08:48,  1.18s/it]

17898 episode score is 814.25


  2%|▏         | 1783/100000 [35:45<32:15:18,  1.18s/it]

17907 episode score is 813.08


  2%|▏         | 1784/100000 [35:47<31:37:55,  1.16s/it]

17916 episode score is 721.61


  2%|▏         | 1785/100000 [35:48<31:31:26,  1.16s/it]

17925 episode score is 727.50


  2%|▏         | 1786/100000 [35:49<31:16:34,  1.15s/it]

17934 episode score is 733.05


  2%|▏         | 1787/100000 [35:50<32:01:25,  1.17s/it]

17943 episode score is 828.68


  2%|▏         | 1788/100000 [35:51<31:43:55,  1.16s/it]

17952 episode score is 753.57


  2%|▏         | 1789/100000 [35:52<31:27:47,  1.15s/it]

17960 episode score is 849.06


  2%|▏         | 1790/100000 [35:54<31:55:28,  1.17s/it]

17969 episode score is 830.90


  2%|▏         | 1791/100000 [35:55<31:40:07,  1.16s/it]

17977 episode score is 870.83


  2%|▏         | 1792/100000 [35:56<31:29:10,  1.15s/it]

17985 episode score is 873.52


  2%|▏         | 1793/100000 [35:57<31:18:21,  1.15s/it]

17993 episode score is 866.28


  2%|▏         | 1794/100000 [35:58<31:49:09,  1.17s/it]

18002 episode score is 826.85


  2%|▏         | 1795/100000 [35:59<32:25:47,  1.19s/it]

18011 episode score is 830.06


  2%|▏         | 1796/100000 [36:01<32:35:07,  1.19s/it]

18020 episode score is 807.73


  2%|▏         | 1797/100000 [36:02<32:27:39,  1.19s/it]

18029 episode score is 802.86


  2%|▏         | 1798/100000 [36:03<32:38:45,  1.20s/it]

18038 episode score is 803.87


  2%|▏         | 1799/100000 [36:04<32:12:36,  1.18s/it]

18046 episode score is 879.35
18055 episode score is 827.73


  2%|▏         | 1800/100000 [36:07<42:23:15,  1.55s/it]

Iteration 1800: Average test reward: 823.93


  2%|▏         | 1801/100000 [36:08<39:01:05,  1.43s/it]

18063 episode score is 871.35


  2%|▏         | 1802/100000 [36:09<37:04:40,  1.36s/it]

18072 episode score is 810.41


  2%|▏         | 1803/100000 [36:10<35:18:49,  1.29s/it]

18081 episode score is 758.85


  2%|▏         | 1804/100000 [36:11<34:19:18,  1.26s/it]

18090 episode score is 778.49


  2%|▏         | 1805/100000 [36:12<34:00:13,  1.25s/it]

18099 episode score is 827.90


  2%|▏         | 1806/100000 [36:14<33:26:10,  1.23s/it]

18108 episode score is 795.84


  2%|▏         | 1807/100000 [36:15<32:35:54,  1.20s/it]

18117 episode score is 742.94


  2%|▏         | 1808/100000 [36:16<32:44:32,  1.20s/it]

18126 episode score is 819.74


  2%|▏         | 1809/100000 [36:17<32:15:35,  1.18s/it]

18134 episode score is 849.18


  2%|▏         | 1810/100000 [36:18<31:42:26,  1.16s/it]

18142 episode score is 828.68


  2%|▏         | 1811/100000 [36:19<31:50:30,  1.17s/it]

18152 episode score is 684.31


  2%|▏         | 1812/100000 [36:21<31:40:11,  1.16s/it]

18161 episode score is 766.51


  2%|▏         | 1813/100000 [36:22<31:36:18,  1.16s/it]

18170 episode score is 775.34


  2%|▏         | 1814/100000 [36:23<31:37:46,  1.16s/it]

18179 episode score is 767.69


  2%|▏         | 1815/100000 [36:24<31:29:46,  1.15s/it]

18188 episode score is 767.52


  2%|▏         | 1816/100000 [36:25<31:27:18,  1.15s/it]

18197 episode score is 760.28


  2%|▏         | 1817/100000 [36:26<31:43:59,  1.16s/it]

18206 episode score is 788.41


  2%|▏         | 1818/100000 [36:27<31:47:19,  1.17s/it]

18215 episode score is 777.59


  2%|▏         | 1819/100000 [36:29<31:44:51,  1.16s/it]

18224 episode score is 769.96


  2%|▏         | 1820/100000 [36:30<31:31:13,  1.16s/it]

18233 episode score is 745.90


  2%|▏         | 1821/100000 [36:31<31:03:40,  1.14s/it]

18242 episode score is 727.51


  2%|▏         | 1822/100000 [36:32<31:37:16,  1.16s/it]

18251 episode score is 796.46


  2%|▏         | 1823/100000 [36:33<32:05:45,  1.18s/it]

18260 episode score is 818.68


  2%|▏         | 1824/100000 [36:34<31:55:52,  1.17s/it]

18269 episode score is 777.85


  2%|▏         | 1825/100000 [36:36<31:49:43,  1.17s/it]

18278 episode score is 778.37


  2%|▏         | 1826/100000 [36:37<32:12:04,  1.18s/it]

18287 episode score is 798.07


  2%|▏         | 1827/100000 [36:38<32:02:58,  1.18s/it]

18296 episode score is 782.92


  2%|▏         | 1828/100000 [36:39<32:42:08,  1.20s/it]

18305 episode score is 770.34


  2%|▏         | 1829/100000 [36:40<32:21:32,  1.19s/it]

18314 episode score is 771.03


  2%|▏         | 1830/100000 [36:42<32:11:42,  1.18s/it]

18323 episode score is 780.53


  2%|▏         | 1831/100000 [36:43<32:03:01,  1.18s/it]

18332 episode score is 766.91


  2%|▏         | 1832/100000 [36:44<31:29:10,  1.15s/it]

18341 episode score is 724.22


  2%|▏         | 1833/100000 [36:45<31:30:11,  1.16s/it]

18350 episode score is 780.96


  2%|▏         | 1834/100000 [36:46<32:00:52,  1.17s/it]

18359 episode score is 816.57


  2%|▏         | 1835/100000 [36:47<32:14:10,  1.18s/it]

18368 episode score is 784.00


  2%|▏         | 1836/100000 [36:49<32:06:46,  1.18s/it]

18377 episode score is 762.04


  2%|▏         | 1837/100000 [36:50<32:18:01,  1.18s/it]

18386 episode score is 779.01


  2%|▏         | 1838/100000 [36:51<32:20:24,  1.19s/it]

18396 episode score is 687.40


  2%|▏         | 1839/100000 [36:52<32:40:09,  1.20s/it]

18406 episode score is 730.41


  2%|▏         | 1840/100000 [36:53<32:43:18,  1.20s/it]

18416 episode score is 707.56


  2%|▏         | 1841/100000 [36:55<32:27:30,  1.19s/it]

18426 episode score is 677.70


  2%|▏         | 1842/100000 [36:56<32:00:05,  1.17s/it]

18436 episode score is 637.78


  2%|▏         | 1843/100000 [36:57<31:35:13,  1.16s/it]

18446 episode score is 646.79


  2%|▏         | 1844/100000 [36:58<31:11:47,  1.14s/it]

18455 episode score is 724.58


  2%|▏         | 1845/100000 [36:59<31:48:58,  1.17s/it]

18465 episode score is 717.59


  2%|▏         | 1846/100000 [37:00<31:59:25,  1.17s/it]

18475 episode score is 696.89


  2%|▏         | 1847/100000 [37:02<32:02:33,  1.18s/it]

18485 episode score is 689.85


  2%|▏         | 1848/100000 [37:03<32:27:44,  1.19s/it]

18495 episode score is 711.12


  2%|▏         | 1849/100000 [37:04<32:28:37,  1.19s/it]

18505 episode score is 681.75


  2%|▏         | 1850/100000 [37:05<31:59:16,  1.17s/it]

18514 episode score is 750.76


  2%|▏         | 1851/100000 [37:06<31:44:48,  1.16s/it]

18524 episode score is 637.58


  2%|▏         | 1852/100000 [37:07<31:56:58,  1.17s/it]

18533 episode score is 788.55


  2%|▏         | 1853/100000 [37:09<31:35:58,  1.16s/it]

18542 episode score is 727.16


  2%|▏         | 1854/100000 [37:10<31:11:49,  1.14s/it]

18551 episode score is 727.38


  2%|▏         | 1855/100000 [37:11<31:16:36,  1.15s/it]

18560 episode score is 761.52


  2%|▏         | 1856/100000 [37:12<31:26:52,  1.15s/it]

18570 episode score is 667.65


  2%|▏         | 1857/100000 [37:13<31:35:38,  1.16s/it]

18580 episode score is 668.20


  2%|▏         | 1858/100000 [37:14<31:08:00,  1.14s/it]

18589 episode score is 720.35


  2%|▏         | 1859/100000 [37:15<31:28:31,  1.15s/it]

18599 episode score is 686.91


  2%|▏         | 1860/100000 [37:17<31:44:52,  1.16s/it]

18609 episode score is 676.38


  2%|▏         | 1861/100000 [37:18<32:07:08,  1.18s/it]

18619 episode score is 704.09


  2%|▏         | 1862/100000 [37:19<32:24:01,  1.19s/it]

18629 episode score is 700.05


  2%|▏         | 1863/100000 [37:20<32:12:04,  1.18s/it]

18639 episode score is 663.70


  2%|▏         | 1864/100000 [37:21<32:46:54,  1.20s/it]

18649 episode score is 738.03


  2%|▏         | 1865/100000 [37:23<33:00:37,  1.21s/it]

18659 episode score is 720.77


  2%|▏         | 1866/100000 [37:24<33:06:21,  1.21s/it]

18669 episode score is 709.49


  2%|▏         | 1867/100000 [37:25<32:23:04,  1.19s/it]

18678 episode score is 736.25


  2%|▏         | 1868/100000 [37:26<32:12:38,  1.18s/it]

18687 episode score is 770.10


  2%|▏         | 1869/100000 [37:27<32:32:15,  1.19s/it]

18697 episode score is 713.06


  2%|▏         | 1870/100000 [37:29<32:18:12,  1.19s/it]

18706 episode score is 759.75


  2%|▏         | 1871/100000 [37:30<32:07:52,  1.18s/it]

18715 episode score is 777.09


  2%|▏         | 1872/100000 [37:31<31:36:39,  1.16s/it]

18724 episode score is 729.96


  2%|▏         | 1873/100000 [37:32<31:40:34,  1.16s/it]

18733 episode score is 754.52


  2%|▏         | 1874/100000 [37:33<31:28:33,  1.15s/it]

18742 episode score is 740.70


  2%|▏         | 1875/100000 [37:34<31:11:33,  1.14s/it]

18751 episode score is 733.11


  2%|▏         | 1876/100000 [37:35<31:09:46,  1.14s/it]

18760 episode score is 737.97


  2%|▏         | 1877/100000 [37:37<31:06:56,  1.14s/it]

18769 episode score is 745.05


  2%|▏         | 1878/100000 [37:38<31:00:31,  1.14s/it]

18778 episode score is 747.70


  2%|▏         | 1879/100000 [37:39<30:54:04,  1.13s/it]

18787 episode score is 748.97


  2%|▏         | 1880/100000 [37:40<30:49:09,  1.13s/it]

18796 episode score is 731.71


  2%|▏         | 1881/100000 [37:41<30:39:08,  1.12s/it]

18805 episode score is 732.36


  2%|▏         | 1882/100000 [37:42<30:38:38,  1.12s/it]

18814 episode score is 743.04


  2%|▏         | 1883/100000 [37:43<30:48:44,  1.13s/it]

18823 episode score is 753.23


  2%|▏         | 1884/100000 [37:44<30:38:18,  1.12s/it]

18832 episode score is 737.06


  2%|▏         | 1885/100000 [37:46<31:17:59,  1.15s/it]

18842 episode score is 704.04


  2%|▏         | 1886/100000 [37:47<31:36:29,  1.16s/it]

18852 episode score is 680.68


  2%|▏         | 1887/100000 [37:48<31:38:14,  1.16s/it]

18861 episode score is 749.30


  2%|▏         | 1888/100000 [37:49<32:21:33,  1.19s/it]

18870 episode score is 757.08


  2%|▏         | 1889/100000 [37:50<32:01:52,  1.18s/it]

18879 episode score is 750.34


  2%|▏         | 1890/100000 [37:52<31:32:12,  1.16s/it]

18888 episode score is 721.24


  2%|▏         | 1891/100000 [37:53<31:23:11,  1.15s/it]

18897 episode score is 749.83


  2%|▏         | 1892/100000 [37:54<31:02:31,  1.14s/it]

18906 episode score is 725.37


  2%|▏         | 1893/100000 [37:55<31:21:49,  1.15s/it]

18915 episode score is 756.23


  2%|▏         | 1894/100000 [37:56<31:55:43,  1.17s/it]

18924 episode score is 783.01


  2%|▏         | 1895/100000 [37:57<32:10:08,  1.18s/it]

18933 episode score is 790.97


  2%|▏         | 1896/100000 [37:59<32:36:06,  1.20s/it]

18942 episode score is 790.67


  2%|▏         | 1897/100000 [38:00<32:02:55,  1.18s/it]

18950 episode score is 832.06


  2%|▏         | 1898/100000 [38:01<31:41:28,  1.16s/it]

18958 episode score is 835.42


  2%|▏         | 1899/100000 [38:02<32:14:09,  1.18s/it]

18967 episode score is 803.06
18975 episode score is 829.96


  2%|▏         | 1900/100000 [38:04<41:01:41,  1.51s/it]

Iteration 1900: Average test reward: 785.98


  2%|▏         | 1901/100000 [38:05<37:44:09,  1.38s/it]

18983 episode score is 823.17


  2%|▏         | 1902/100000 [38:07<36:14:19,  1.33s/it]

18992 episode score is 793.71


  2%|▏         | 1903/100000 [38:08<35:14:04,  1.29s/it]

19001 episode score is 800.00


  2%|▏         | 1904/100000 [38:09<34:19:53,  1.26s/it]

19010 episode score is 785.86


  2%|▏         | 1905/100000 [38:10<33:43:47,  1.24s/it]

19019 episode score is 795.67


  2%|▏         | 1906/100000 [38:11<33:38:16,  1.23s/it]

19028 episode score is 809.49


  2%|▏         | 1907/100000 [38:13<33:36:00,  1.23s/it]

19037 episode score is 824.45


  2%|▏         | 1908/100000 [38:14<32:45:52,  1.20s/it]

19045 episode score is 858.70


  2%|▏         | 1909/100000 [38:15<31:58:01,  1.17s/it]

19053 episode score is 824.68


  2%|▏         | 1910/100000 [38:16<32:36:58,  1.20s/it]

19062 episode score is 839.24


  2%|▏         | 1911/100000 [38:17<32:41:58,  1.20s/it]

19071 episode score is 770.67


  2%|▏         | 1912/100000 [38:19<32:38:05,  1.20s/it]

19080 episode score is 765.36


  2%|▏         | 1913/100000 [38:20<32:38:43,  1.20s/it]

19089 episode score is 771.93


  2%|▏         | 1914/100000 [38:21<32:12:45,  1.18s/it]

19098 episode score is 736.18


  2%|▏         | 1915/100000 [38:22<32:21:10,  1.19s/it]

19107 episode score is 782.21


  2%|▏         | 1916/100000 [38:23<32:41:31,  1.20s/it]

19116 episode score is 806.72


  2%|▏         | 1917/100000 [38:24<31:56:23,  1.17s/it]

19124 episode score is 827.40


  2%|▏         | 1918/100000 [38:26<31:26:15,  1.15s/it]

19132 episode score is 825.39


  2%|▏         | 1919/100000 [38:27<32:03:21,  1.18s/it]

19141 episode score is 819.07


  2%|▏         | 1920/100000 [38:28<31:37:38,  1.16s/it]

19149 episode score is 861.39


  2%|▏         | 1921/100000 [38:29<31:26:36,  1.15s/it]

19157 episode score is 850.71


  2%|▏         | 1922/100000 [38:30<31:15:09,  1.15s/it]

19165 episode score is 832.63


  2%|▏         | 1923/100000 [38:31<32:06:02,  1.18s/it]

19173 episode score is 927.36


  2%|▏         | 1924/100000 [38:33<32:05:49,  1.18s/it]

19181 episode score is 869.55


  2%|▏         | 1925/100000 [38:34<31:58:55,  1.17s/it]

19189 episode score is 881.24


  2%|▏         | 1926/100000 [38:35<32:12:19,  1.18s/it]

19197 episode score is 917.09


  2%|▏         | 1927/100000 [38:36<32:20:15,  1.19s/it]

19205 episode score is 902.95


  2%|▏         | 1928/100000 [38:37<32:22:45,  1.19s/it]

19213 episode score is 902.93


  2%|▏         | 1929/100000 [38:39<32:48:14,  1.20s/it]

19221 episode score is 941.40


  2%|▏         | 1930/100000 [38:40<33:21:36,  1.22s/it]

19229 episode score is 936.06


  2%|▏         | 1931/100000 [38:41<33:21:37,  1.22s/it]

19237 episode score is 915.08


  2%|▏         | 1932/100000 [38:42<33:12:05,  1.22s/it]

19245 episode score is 895.33


  2%|▏         | 1933/100000 [38:43<32:34:00,  1.20s/it]

19253 episode score is 861.09


  2%|▏         | 1934/100000 [38:45<32:07:00,  1.18s/it]

19261 episode score is 848.26


  2%|▏         | 1935/100000 [38:46<32:33:18,  1.20s/it]

19269 episode score is 948.91


  2%|▏         | 1936/100000 [38:47<32:10:58,  1.18s/it]

19277 episode score is 868.30


  2%|▏         | 1937/100000 [38:48<32:01:59,  1.18s/it]

19285 episode score is 861.58


  2%|▏         | 1938/100000 [38:49<32:16:05,  1.18s/it]

19293 episode score is 887.93


  2%|▏         | 1939/100000 [38:51<32:14:10,  1.18s/it]

19301 episode score is 887.55


  2%|▏         | 1940/100000 [38:52<32:19:41,  1.19s/it]

19309 episode score is 905.44


  2%|▏         | 1941/100000 [38:53<32:54:26,  1.21s/it]

19318 episode score is 834.79


  2%|▏         | 1942/100000 [38:54<32:59:13,  1.21s/it]

19326 episode score is 850.65


  2%|▏         | 1943/100000 [38:55<32:27:43,  1.19s/it]

19334 episode score is 848.27


  2%|▏         | 1944/100000 [38:57<32:58:13,  1.21s/it]

19343 episode score is 821.12


  2%|▏         | 1945/100000 [38:58<32:36:48,  1.20s/it]

19351 episode score is 886.69


  2%|▏         | 1946/100000 [38:59<32:15:24,  1.18s/it]

19359 episode score is 859.49


  2%|▏         | 1947/100000 [39:00<31:45:58,  1.17s/it]

19367 episode score is 842.59


  2%|▏         | 1948/100000 [39:01<32:05:55,  1.18s/it]

19376 episode score is 806.72


  2%|▏         | 1949/100000 [39:02<32:31:52,  1.19s/it]

19385 episode score is 804.17


  2%|▏         | 1950/100000 [39:04<32:40:37,  1.20s/it]

19394 episode score is 807.86


  2%|▏         | 1951/100000 [39:05<33:07:22,  1.22s/it]

19403 episode score is 844.31


  2%|▏         | 1952/100000 [39:06<33:09:28,  1.22s/it]

19412 episode score is 813.79


  2%|▏         | 1953/100000 [39:07<32:36:28,  1.20s/it]

19420 episode score is 879.09


  2%|▏         | 1954/100000 [39:09<32:29:05,  1.19s/it]

19428 episode score is 903.78


  2%|▏         | 1955/100000 [39:10<32:19:59,  1.19s/it]

19437 episode score is 764.57


  2%|▏         | 1956/100000 [39:11<32:40:45,  1.20s/it]

19446 episode score is 838.07


  2%|▏         | 1957/100000 [39:12<32:37:47,  1.20s/it]

19455 episode score is 800.12


  2%|▏         | 1958/100000 [39:13<32:41:39,  1.20s/it]

19464 episode score is 791.22


  2%|▏         | 1959/100000 [39:15<32:46:23,  1.20s/it]

19472 episode score is 935.17


  2%|▏         | 1960/100000 [39:16<32:33:49,  1.20s/it]

19481 episode score is 777.01


  2%|▏         | 1961/100000 [39:17<32:36:33,  1.20s/it]

19489 episode score is 906.08


  2%|▏         | 1962/100000 [39:18<32:55:19,  1.21s/it]

19498 episode score is 816.45


  2%|▏         | 1963/100000 [39:19<33:15:21,  1.22s/it]

19507 episode score is 826.35


  2%|▏         | 1964/100000 [39:21<33:15:00,  1.22s/it]

19516 episode score is 816.49


  2%|▏         | 1965/100000 [39:22<32:51:21,  1.21s/it]

19525 episode score is 767.10


  2%|▏         | 1966/100000 [39:23<32:47:41,  1.20s/it]

19534 episode score is 797.17


  2%|▏         | 1967/100000 [39:24<32:08:17,  1.18s/it]

19543 episode score is 736.11


  2%|▏         | 1968/100000 [39:25<32:10:41,  1.18s/it]

19552 episode score is 764.31


  2%|▏         | 1969/100000 [39:26<31:54:37,  1.17s/it]

19561 episode score is 741.04


  2%|▏         | 1970/100000 [39:28<31:52:21,  1.17s/it]

19570 episode score is 776.65


  2%|▏         | 1971/100000 [39:29<32:04:08,  1.18s/it]

19579 episode score is 799.07


  2%|▏         | 1972/100000 [39:30<31:41:40,  1.16s/it]

19588 episode score is 738.61


  2%|▏         | 1973/100000 [39:31<31:24:29,  1.15s/it]

19597 episode score is 723.42


  2%|▏         | 1974/100000 [39:32<31:27:18,  1.16s/it]

19607 episode score is 617.32


  2%|▏         | 1975/100000 [39:33<31:24:25,  1.15s/it]

19616 episode score is 733.99


  2%|▏         | 1976/100000 [39:35<31:26:09,  1.15s/it]

19625 episode score is 735.67


  2%|▏         | 1977/100000 [39:36<31:37:40,  1.16s/it]

19634 episode score is 776.33


  2%|▏         | 1978/100000 [39:37<31:34:52,  1.16s/it]

19643 episode score is 736.68


  2%|▏         | 1979/100000 [39:38<32:10:35,  1.18s/it]

19652 episode score is 833.84


  2%|▏         | 1980/100000 [39:39<32:17:35,  1.19s/it]

19661 episode score is 800.40


  2%|▏         | 1981/100000 [39:40<32:03:05,  1.18s/it]

19670 episode score is 754.21


  2%|▏         | 1982/100000 [39:42<31:30:26,  1.16s/it]

19679 episode score is 717.24


  2%|▏         | 1983/100000 [39:43<31:13:46,  1.15s/it]

19687 episode score is 841.31


  2%|▏         | 1984/100000 [39:44<31:34:51,  1.16s/it]

19696 episode score is 789.29


  2%|▏         | 1985/100000 [39:45<32:28:21,  1.19s/it]

19705 episode score is 770.33


  2%|▏         | 1986/100000 [39:46<32:30:12,  1.19s/it]

19714 episode score is 790.69


  2%|▏         | 1987/100000 [39:48<33:49:08,  1.24s/it]

19722 episode score is 845.66


  2%|▏         | 1988/100000 [39:49<32:42:06,  1.20s/it]

19730 episode score is 848.87


  2%|▏         | 1989/100000 [39:50<32:26:21,  1.19s/it]

19739 episode score is 776.20


  2%|▏         | 1990/100000 [39:51<31:40:16,  1.16s/it]

19747 episode score is 873.15


  2%|▏         | 1991/100000 [39:52<31:26:55,  1.16s/it]

19756 episode score is 727.79


  2%|▏         | 1992/100000 [39:53<32:00:40,  1.18s/it]

19765 episode score is 830.93


  2%|▏         | 1993/100000 [39:55<31:34:33,  1.16s/it]

19773 episode score is 881.00


  2%|▏         | 1994/100000 [39:56<31:45:24,  1.17s/it]

19782 episode score is 793.19


  2%|▏         | 1995/100000 [39:57<31:32:42,  1.16s/it]

19791 episode score is 770.74


  2%|▏         | 1996/100000 [39:58<31:21:29,  1.15s/it]

19800 episode score is 751.69


  2%|▏         | 1997/100000 [39:59<31:49:23,  1.17s/it]

19809 episode score is 806.91


  2%|▏         | 1998/100000 [40:00<31:36:32,  1.16s/it]

19818 episode score is 765.43


  2%|▏         | 1999/100000 [40:02<31:31:06,  1.16s/it]

19827 episode score is 773.96
19837 episode score is 680.50


  2%|▏         | 2000/100000 [40:04<40:39:16,  1.49s/it]

Iteration 2000: Average test reward: 721.42


  2%|▏         | 2001/100000 [40:05<37:37:16,  1.38s/it]

19846 episode score is 732.44


  2%|▏         | 2002/100000 [40:06<36:09:18,  1.33s/it]

19856 episode score is 717.91


  2%|▏         | 2003/100000 [40:07<34:38:17,  1.27s/it]

19865 episode score is 761.53


  2%|▏         | 2004/100000 [40:08<33:35:46,  1.23s/it]

19874 episode score is 761.93


  2%|▏         | 2005/100000 [40:10<33:02:22,  1.21s/it]

19884 episode score is 683.64


  2%|▏         | 2006/100000 [40:11<32:23:40,  1.19s/it]

19893 episode score is 755.22


  2%|▏         | 2007/100000 [40:12<32:08:01,  1.18s/it]

19902 episode score is 760.42


  2%|▏         | 2008/100000 [40:13<32:16:31,  1.19s/it]

19912 episode score is 704.45


  2%|▏         | 2009/100000 [40:14<31:51:25,  1.17s/it]

19922 episode score is 662.40


  2%|▏         | 2010/100000 [40:15<31:43:44,  1.17s/it]

19931 episode score is 752.12


  2%|▏         | 2011/100000 [40:17<31:52:19,  1.17s/it]

19941 episode score is 707.22


  2%|▏         | 2012/100000 [40:18<31:55:12,  1.17s/it]

19951 episode score is 670.06


  2%|▏         | 2013/100000 [40:19<31:19:54,  1.15s/it]

19960 episode score is 731.72


  2%|▏         | 2014/100000 [40:20<31:41:33,  1.16s/it]

19970 episode score is 697.57


  2%|▏         | 2015/100000 [40:21<31:24:30,  1.15s/it]

19979 episode score is 764.69


  2%|▏         | 2016/100000 [40:22<31:18:13,  1.15s/it]

19988 episode score is 762.17


  2%|▏         | 2017/100000 [40:23<31:09:11,  1.14s/it]

19997 episode score is 758.15


  2%|▏         | 2018/100000 [40:25<31:36:28,  1.16s/it]

20007 episode score is 707.26


  2%|▏         | 2019/100000 [40:26<31:35:17,  1.16s/it]

20017 episode score is 696.26


  2%|▏         | 2020/100000 [40:27<31:00:52,  1.14s/it]

20026 episode score is 727.55


  2%|▏         | 2021/100000 [40:28<30:50:59,  1.13s/it]

20035 episode score is 744.70


  2%|▏         | 2022/100000 [40:29<30:44:24,  1.13s/it]

20044 episode score is 758.04


  2%|▏         | 2023/100000 [40:30<30:37:38,  1.13s/it]

20053 episode score is 742.30


  2%|▏         | 2024/100000 [40:31<30:47:15,  1.13s/it]

20062 episode score is 753.42


  2%|▏         | 2025/100000 [40:32<30:41:13,  1.13s/it]

20071 episode score is 717.11


  2%|▏         | 2026/100000 [40:34<30:51:36,  1.13s/it]

20081 episode score is 667.08


  2%|▏         | 2027/100000 [40:35<31:19:09,  1.15s/it]

20091 episode score is 705.30


  2%|▏         | 2028/100000 [40:36<31:11:53,  1.15s/it]

20101 episode score is 661.94


  2%|▏         | 2029/100000 [40:37<31:46:54,  1.17s/it]

20111 episode score is 718.41


  2%|▏         | 2030/100000 [40:38<32:23:30,  1.19s/it]

20121 episode score is 673.44


  2%|▏         | 2031/100000 [40:39<31:39:23,  1.16s/it]

20130 episode score is 721.61


  2%|▏         | 2032/100000 [40:41<31:51:55,  1.17s/it]

20140 episode score is 693.73


  2%|▏         | 2033/100000 [40:42<31:40:44,  1.16s/it]

20149 episode score is 771.66


  2%|▏         | 2034/100000 [40:43<31:53:05,  1.17s/it]

20158 episode score is 773.55


  2%|▏         | 2035/100000 [40:44<31:35:34,  1.16s/it]

20167 episode score is 763.04


  2%|▏         | 2036/100000 [40:45<31:15:10,  1.15s/it]

20176 episode score is 745.24


  2%|▏         | 2037/100000 [40:46<30:42:15,  1.13s/it]

20185 episode score is 720.96


  2%|▏         | 2038/100000 [40:47<30:43:12,  1.13s/it]

20194 episode score is 741.46


  2%|▏         | 2039/100000 [40:49<30:54:28,  1.14s/it]

20203 episode score is 758.11


  2%|▏         | 2040/100000 [40:50<31:01:13,  1.14s/it]

20212 episode score is 761.26


  2%|▏         | 2041/100000 [40:51<30:45:23,  1.13s/it]

20221 episode score is 726.97


  2%|▏         | 2042/100000 [40:52<31:27:15,  1.16s/it]

20231 episode score is 709.95


  2%|▏         | 2043/100000 [40:53<30:59:24,  1.14s/it]

20240 episode score is 740.31


  2%|▏         | 2044/100000 [40:54<30:43:43,  1.13s/it]

20249 episode score is 715.37


  2%|▏         | 2045/100000 [40:55<30:45:33,  1.13s/it]

20258 episode score is 745.42


  2%|▏         | 2046/100000 [40:57<30:29:58,  1.12s/it]

20267 episode score is 729.18


  2%|▏         | 2047/100000 [40:58<30:36:13,  1.12s/it]

20276 episode score is 752.34


  2%|▏         | 2048/100000 [40:59<30:41:09,  1.13s/it]

20285 episode score is 747.94


  2%|▏         | 2049/100000 [41:00<30:26:17,  1.12s/it]

20294 episode score is 746.42


  2%|▏         | 2050/100000 [41:01<31:13:33,  1.15s/it]

20304 episode score is 724.92


  2%|▏         | 2051/100000 [41:02<31:05:45,  1.14s/it]

20313 episode score is 739.47


  2%|▏         | 2052/100000 [41:03<31:08:11,  1.14s/it]

20323 episode score is 683.31


  2%|▏         | 2053/100000 [41:05<30:59:54,  1.14s/it]

20332 episode score is 772.32


  2%|▏         | 2054/100000 [41:06<31:36:40,  1.16s/it]

20342 episode score is 728.85


  2%|▏         | 2055/100000 [41:07<31:51:02,  1.17s/it]

20351 episode score is 760.19


  2%|▏         | 2056/100000 [41:08<31:46:16,  1.17s/it]

20360 episode score is 770.69


  2%|▏         | 2057/100000 [41:09<31:53:57,  1.17s/it]

20369 episode score is 775.71


  2%|▏         | 2058/100000 [41:10<31:27:48,  1.16s/it]

20378 episode score is 716.87


  2%|▏         | 2059/100000 [41:12<31:29:29,  1.16s/it]

20387 episode score is 772.53


  2%|▏         | 2060/100000 [41:13<31:48:42,  1.17s/it]

20396 episode score is 799.86


  2%|▏         | 2061/100000 [41:14<31:11:40,  1.15s/it]

20405 episode score is 725.33


  2%|▏         | 2062/100000 [41:15<31:35:00,  1.16s/it]

20415 episode score is 727.31


  2%|▏         | 2063/100000 [41:16<31:35:04,  1.16s/it]

20424 episode score is 755.80


  2%|▏         | 2064/100000 [41:17<31:09:50,  1.15s/it]

20433 episode score is 720.90


  2%|▏         | 2065/100000 [41:19<31:35:51,  1.16s/it]

20442 episode score is 757.17


  2%|▏         | 2066/100000 [41:20<31:38:23,  1.16s/it]

20451 episode score is 751.27


  2%|▏         | 2067/100000 [41:21<31:58:16,  1.18s/it]

20460 episode score is 789.55


  2%|▏         | 2068/100000 [41:22<31:46:19,  1.17s/it]

20469 episode score is 768.00


  2%|▏         | 2069/100000 [41:23<31:29:51,  1.16s/it]

20478 episode score is 746.42


  2%|▏         | 2070/100000 [41:24<31:53:32,  1.17s/it]

20487 episode score is 822.34


  2%|▏         | 2071/100000 [41:26<32:03:00,  1.18s/it]

20496 episode score is 787.98


  2%|▏         | 2072/100000 [41:27<31:43:50,  1.17s/it]

20505 episode score is 732.96


  2%|▏         | 2073/100000 [41:28<31:38:58,  1.16s/it]

20514 episode score is 765.37


  2%|▏         | 2074/100000 [41:29<32:40:08,  1.20s/it]

20523 episode score is 775.93


  2%|▏         | 2075/100000 [41:30<32:37:21,  1.20s/it]

20532 episode score is 757.84


  2%|▏         | 2076/100000 [41:32<32:43:58,  1.20s/it]

20541 episode score is 784.28


  2%|▏         | 2077/100000 [41:33<32:22:06,  1.19s/it]

20550 episode score is 735.69


  2%|▏         | 2078/100000 [41:34<32:21:14,  1.19s/it]

20559 episode score is 784.27


  2%|▏         | 2079/100000 [41:35<32:29:15,  1.19s/it]

20568 episode score is 777.02


  2%|▏         | 2080/100000 [41:36<32:25:36,  1.19s/it]

20577 episode score is 811.42


  2%|▏         | 2081/100000 [41:37<31:41:18,  1.17s/it]

20585 episode score is 830.80


  2%|▏         | 2082/100000 [41:39<32:17:07,  1.19s/it]

20594 episode score is 815.86


  2%|▏         | 2083/100000 [41:40<32:20:29,  1.19s/it]

20603 episode score is 798.18


  2%|▏         | 2084/100000 [41:41<32:27:12,  1.19s/it]

20612 episode score is 801.49


  2%|▏         | 2085/100000 [41:42<32:24:44,  1.19s/it]

20621 episode score is 786.33


  2%|▏         | 2086/100000 [41:43<32:31:51,  1.20s/it]

20630 episode score is 801.14


  2%|▏         | 2087/100000 [41:45<32:51:15,  1.21s/it]

20639 episode score is 826.76


  2%|▏         | 2088/100000 [41:46<32:40:18,  1.20s/it]

20648 episode score is 800.53


  2%|▏         | 2089/100000 [41:47<32:01:39,  1.18s/it]

20656 episode score is 855.93


  2%|▏         | 2090/100000 [41:48<32:07:09,  1.18s/it]

20665 episode score is 814.44


  2%|▏         | 2091/100000 [41:49<31:42:26,  1.17s/it]

20673 episode score is 853.95


  2%|▏         | 2092/100000 [41:50<31:10:46,  1.15s/it]

20681 episode score is 849.52


  2%|▏         | 2093/100000 [41:52<31:13:44,  1.15s/it]

20690 episode score is 755.88


  2%|▏         | 2094/100000 [41:53<30:55:50,  1.14s/it]

20699 episode score is 720.68


  2%|▏         | 2095/100000 [41:54<31:36:21,  1.16s/it]

20708 episode score is 836.01


  2%|▏         | 2096/100000 [41:55<31:13:18,  1.15s/it]

20716 episode score is 848.10


  2%|▏         | 2097/100000 [41:56<31:28:40,  1.16s/it]

20725 episode score is 804.31


  2%|▏         | 2098/100000 [41:57<31:42:29,  1.17s/it]

20734 episode score is 790.63


  2%|▏         | 2099/100000 [41:59<32:12:06,  1.18s/it]

20743 episode score is 836.66
20752 episode score is 816.20


  2%|▏         | 2100/100000 [42:01<42:26:16,  1.56s/it]

Iteration 2100: Average test reward: 802.40


  2%|▏         | 2101/100000 [42:02<39:32:17,  1.45s/it]

20761 episode score is 797.02


  2%|▏         | 2102/100000 [42:03<37:15:43,  1.37s/it]

20770 episode score is 792.82


  2%|▏         | 2103/100000 [42:05<35:49:06,  1.32s/it]

20779 episode score is 806.20


  2%|▏         | 2104/100000 [42:06<34:12:53,  1.26s/it]

20787 episode score is 855.34


  2%|▏         | 2105/100000 [42:07<33:32:53,  1.23s/it]

20796 episode score is 784.47


  2%|▏         | 2106/100000 [42:08<33:21:13,  1.23s/it]

20805 episode score is 814.26


  2%|▏         | 2107/100000 [42:09<32:51:43,  1.21s/it]

20814 episode score is 762.65


  2%|▏         | 2108/100000 [42:10<32:50:24,  1.21s/it]

20823 episode score is 813.22


  2%|▏         | 2109/100000 [42:12<32:16:18,  1.19s/it]

20832 episode score is 743.67


  2%|▏         | 2110/100000 [42:13<31:47:09,  1.17s/it]

20841 episode score is 739.99


  2%|▏         | 2111/100000 [42:14<31:39:06,  1.16s/it]

20850 episode score is 764.45


  2%|▏         | 2112/100000 [42:15<31:27:34,  1.16s/it]

20859 episode score is 759.71


  2%|▏         | 2113/100000 [42:16<31:43:09,  1.17s/it]

20868 episode score is 779.75


  2%|▏         | 2114/100000 [42:17<31:39:56,  1.16s/it]

20877 episode score is 762.32


  2%|▏         | 2115/100000 [42:19<31:33:35,  1.16s/it]

20886 episode score is 764.28


  2%|▏         | 2116/100000 [42:20<31:27:50,  1.16s/it]

20895 episode score is 765.81


  2%|▏         | 2117/100000 [42:21<31:34:55,  1.16s/it]

20904 episode score is 779.52


  2%|▏         | 2118/100000 [42:22<31:56:11,  1.17s/it]

20913 episode score is 814.25


  2%|▏         | 2119/100000 [42:23<31:53:01,  1.17s/it]

20922 episode score is 767.46


  2%|▏         | 2120/100000 [42:24<31:52:47,  1.17s/it]

20931 episode score is 773.30


  2%|▏         | 2121/100000 [42:26<32:39:29,  1.20s/it]

20940 episode score is 784.60


  2%|▏         | 2122/100000 [42:27<32:17:41,  1.19s/it]

20949 episode score is 758.72


  2%|▏         | 2123/100000 [42:28<32:26:25,  1.19s/it]

20958 episode score is 786.88


  2%|▏         | 2124/100000 [42:29<31:53:14,  1.17s/it]

20967 episode score is 738.95


  2%|▏         | 2125/100000 [42:30<31:24:51,  1.16s/it]

20976 episode score is 746.52


  2%|▏         | 2126/100000 [42:31<31:06:00,  1.14s/it]

20985 episode score is 723.88


  2%|▏         | 2127/100000 [42:33<31:05:49,  1.14s/it]

20994 episode score is 737.52


  2%|▏         | 2128/100000 [42:34<31:40:31,  1.17s/it]

21004 episode score is 705.60


  2%|▏         | 2129/100000 [42:35<31:45:28,  1.17s/it]

21014 episode score is 671.46


  2%|▏         | 2130/100000 [42:36<32:09:56,  1.18s/it]

21024 episode score is 720.92


  2%|▏         | 2131/100000 [42:37<32:09:59,  1.18s/it]

21034 episode score is 698.19


  2%|▏         | 2132/100000 [42:38<31:35:57,  1.16s/it]

21043 episode score is 720.27


  2%|▏         | 2133/100000 [42:40<32:14:55,  1.19s/it]

21053 episode score is 721.37


  2%|▏         | 2134/100000 [42:41<32:24:44,  1.19s/it]

21063 episode score is 704.05


  2%|▏         | 2135/100000 [42:42<32:00:45,  1.18s/it]

21072 episode score is 729.66


  2%|▏         | 2136/100000 [42:43<31:54:51,  1.17s/it]

21081 episode score is 756.87


  2%|▏         | 2137/100000 [42:44<31:48:14,  1.17s/it]

21090 episode score is 760.48


  2%|▏         | 2138/100000 [42:46<31:47:00,  1.17s/it]

21099 episode score is 770.53


  2%|▏         | 2139/100000 [42:47<31:40:47,  1.17s/it]

21108 episode score is 746.66


  2%|▏         | 2140/100000 [42:48<32:06:42,  1.18s/it]

21117 episode score is 787.59


  2%|▏         | 2141/100000 [42:49<32:15:30,  1.19s/it]

21126 episode score is 794.76


  2%|▏         | 2142/100000 [42:50<31:42:47,  1.17s/it]

21134 episode score is 844.04


  2%|▏         | 2143/100000 [42:51<31:14:44,  1.15s/it]

21142 episode score is 832.02


  2%|▏         | 2144/100000 [42:53<31:53:19,  1.17s/it]

21151 episode score is 842.23


  2%|▏         | 2145/100000 [42:54<31:59:55,  1.18s/it]

21160 episode score is 796.93


  2%|▏         | 2146/100000 [42:55<31:51:47,  1.17s/it]

21169 episode score is 762.81


  2%|▏         | 2147/100000 [42:56<32:00:30,  1.18s/it]

21178 episode score is 798.93


  2%|▏         | 2148/100000 [42:57<31:23:09,  1.15s/it]

21186 episode score is 844.26


  2%|▏         | 2149/100000 [42:58<31:33:15,  1.16s/it]

21195 episode score is 782.09


  2%|▏         | 2150/100000 [43:00<31:46:51,  1.17s/it]

21204 episode score is 795.39


  2%|▏         | 2151/100000 [43:01<31:34:26,  1.16s/it]

21213 episode score is 769.08


  2%|▏         | 2152/100000 [43:02<31:36:33,  1.16s/it]

21222 episode score is 761.17


  2%|▏         | 2153/100000 [43:03<31:38:45,  1.16s/it]

21231 episode score is 774.88


  2%|▏         | 2154/100000 [43:04<31:34:01,  1.16s/it]

21240 episode score is 777.19


  2%|▏         | 2155/100000 [43:05<31:29:22,  1.16s/it]

21249 episode score is 774.51


  2%|▏         | 2156/100000 [43:06<31:14:26,  1.15s/it]

21258 episode score is 755.91


  2%|▏         | 2157/100000 [43:08<31:18:30,  1.15s/it]

21267 episode score is 784.23


  2%|▏         | 2158/100000 [43:09<31:21:27,  1.15s/it]

21276 episode score is 777.50


  2%|▏         | 2159/100000 [43:10<31:34:47,  1.16s/it]

21285 episode score is 790.15


  2%|▏         | 2160/100000 [43:11<31:29:09,  1.16s/it]

21294 episode score is 766.20


  2%|▏         | 2161/100000 [43:12<32:01:32,  1.18s/it]

21304 episode score is 711.92


  2%|▏         | 2162/100000 [43:14<32:02:32,  1.18s/it]

21313 episode score is 794.25


  2%|▏         | 2163/100000 [43:15<31:29:10,  1.16s/it]

21321 episode score is 853.00


  2%|▏         | 2164/100000 [43:16<31:53:38,  1.17s/it]

21330 episode score is 814.61


  2%|▏         | 2165/100000 [43:17<32:06:17,  1.18s/it]

21339 episode score is 796.91


  2%|▏         | 2166/100000 [43:18<32:01:49,  1.18s/it]

21348 episode score is 773.32


  2%|▏         | 2167/100000 [43:19<31:41:43,  1.17s/it]

21357 episode score is 750.09


  2%|▏         | 2168/100000 [43:21<31:40:43,  1.17s/it]

21366 episode score is 781.34


  2%|▏         | 2169/100000 [43:22<31:18:16,  1.15s/it]

21374 episode score is 842.03


  2%|▏         | 2170/100000 [43:23<31:36:54,  1.16s/it]

21383 episode score is 794.60


  2%|▏         | 2171/100000 [43:24<31:42:04,  1.17s/it]

21392 episode score is 783.01


  2%|▏         | 2172/100000 [43:25<32:42:35,  1.20s/it]

21401 episode score is 798.61


  2%|▏         | 2173/100000 [43:27<32:45:56,  1.21s/it]

21410 episode score is 810.27


  2%|▏         | 2174/100000 [43:28<31:59:22,  1.18s/it]

21418 episode score is 840.81


  2%|▏         | 2175/100000 [43:29<31:54:22,  1.17s/it]

21427 episode score is 773.14


  2%|▏         | 2176/100000 [43:30<31:51:00,  1.17s/it]

21436 episode score is 773.43


  2%|▏         | 2177/100000 [43:31<32:11:52,  1.18s/it]

21446 episode score is 726.79


  2%|▏         | 2178/100000 [43:32<32:37:57,  1.20s/it]

21456 episode score is 719.54


  2%|▏         | 2179/100000 [43:34<32:01:51,  1.18s/it]

21465 episode score is 738.35


  2%|▏         | 2180/100000 [43:35<32:06:30,  1.18s/it]

21475 episode score is 703.64


  2%|▏         | 2181/100000 [43:36<31:30:13,  1.16s/it]

21484 episode score is 727.66


  2%|▏         | 2182/100000 [43:37<31:13:33,  1.15s/it]

21493 episode score is 720.31


  2%|▏         | 2183/100000 [43:38<30:48:22,  1.13s/it]

21502 episode score is 727.34


  2%|▏         | 2184/100000 [43:39<30:40:33,  1.13s/it]

21511 episode score is 729.73


  2%|▏         | 2185/100000 [43:40<30:56:56,  1.14s/it]

21520 episode score is 760.68


  2%|▏         | 2186/100000 [43:41<30:50:13,  1.13s/it]

21529 episode score is 741.04


  2%|▏         | 2187/100000 [43:43<30:45:58,  1.13s/it]

21538 episode score is 742.30


  2%|▏         | 2188/100000 [43:44<31:01:23,  1.14s/it]

21547 episode score is 767.18


  2%|▏         | 2189/100000 [43:45<30:54:17,  1.14s/it]

21556 episode score is 738.62


  2%|▏         | 2190/100000 [43:46<30:52:25,  1.14s/it]

21565 episode score is 764.08


  2%|▏         | 2191/100000 [43:47<30:55:15,  1.14s/it]

21574 episode score is 752.45


  2%|▏         | 2192/100000 [43:48<31:18:38,  1.15s/it]

21584 episode score is 696.69


  2%|▏         | 2193/100000 [43:50<31:40:25,  1.17s/it]

21594 episode score is 711.75


  2%|▏         | 2194/100000 [43:51<31:10:55,  1.15s/it]

21603 episode score is 725.47


  2%|▏         | 2195/100000 [43:52<31:22:01,  1.15s/it]

21613 episode score is 673.17


  2%|▏         | 2196/100000 [43:53<31:34:19,  1.16s/it]

21622 episode score is 805.17


  2%|▏         | 2197/100000 [43:54<31:06:37,  1.15s/it]

21631 episode score is 740.02


  2%|▏         | 2198/100000 [43:55<30:46:05,  1.13s/it]

21640 episode score is 720.11


  2%|▏         | 2199/100000 [43:56<30:30:21,  1.12s/it]

21649 episode score is 731.13
21659 episode score is 713.20


  2%|▏         | 2200/100000 [43:59<39:41:26,  1.46s/it]

Iteration 2200: Average test reward: 706.00


  2%|▏         | 2201/100000 [44:00<37:12:51,  1.37s/it]

21669 episode score is 663.66


  2%|▏         | 2202/100000 [44:01<35:10:01,  1.29s/it]

21678 episode score is 743.01


  2%|▏         | 2203/100000 [44:02<34:03:17,  1.25s/it]

21687 episode score is 752.56


  2%|▏         | 2204/100000 [44:03<32:57:25,  1.21s/it]

21696 episode score is 739.15


  2%|▏         | 2205/100000 [44:04<32:40:53,  1.20s/it]

21706 episode score is 705.68


  2%|▏         | 2206/100000 [44:05<32:14:53,  1.19s/it]

21715 episode score is 770.91


  2%|▏         | 2207/100000 [44:07<31:54:40,  1.17s/it]

21724 episode score is 739.61


  2%|▏         | 2208/100000 [44:08<31:57:42,  1.18s/it]

21733 episode score is 794.01


  2%|▏         | 2209/100000 [44:09<32:16:33,  1.19s/it]

21742 episode score is 804.77


  2%|▏         | 2210/100000 [44:10<32:06:13,  1.18s/it]

21751 episode score is 790.11


  2%|▏         | 2211/100000 [44:11<32:13:14,  1.19s/it]

21760 episode score is 796.75


  2%|▏         | 2212/100000 [44:13<32:07:53,  1.18s/it]

21769 episode score is 774.73


  2%|▏         | 2213/100000 [44:14<32:07:02,  1.18s/it]

21778 episode score is 786.23


  2%|▏         | 2214/100000 [44:15<32:22:53,  1.19s/it]

21787 episode score is 819.19


  2%|▏         | 2215/100000 [44:16<32:38:42,  1.20s/it]

21796 episode score is 810.19


  2%|▏         | 2216/100000 [44:17<31:56:49,  1.18s/it]

21804 episode score is 834.89


  2%|▏         | 2217/100000 [44:18<31:33:03,  1.16s/it]

21812 episode score is 842.53


  2%|▏         | 2218/100000 [44:20<32:00:12,  1.18s/it]

21821 episode score is 829.21


  2%|▏         | 2219/100000 [44:21<32:28:23,  1.20s/it]

21830 episode score is 830.49


  2%|▏         | 2220/100000 [44:22<32:17:41,  1.19s/it]

21839 episode score is 769.11


  2%|▏         | 2221/100000 [44:23<32:32:34,  1.20s/it]

21848 episode score is 805.53


  2%|▏         | 2222/100000 [44:24<31:55:01,  1.18s/it]

21857 episode score is 739.83


  2%|▏         | 2223/100000 [44:26<32:08:02,  1.18s/it]

21866 episode score is 805.42


  2%|▏         | 2224/100000 [44:27<31:55:53,  1.18s/it]

21875 episode score is 786.05


  2%|▏         | 2225/100000 [44:28<32:14:19,  1.19s/it]

21885 episode score is 720.91


  2%|▏         | 2226/100000 [44:29<32:20:55,  1.19s/it]

21894 episode score is 810.34


  2%|▏         | 2227/100000 [44:30<32:28:28,  1.20s/it]

21903 episode score is 817.64


  2%|▏         | 2228/100000 [44:32<32:35:52,  1.20s/it]

21912 episode score is 807.65


  2%|▏         | 2229/100000 [44:33<32:03:13,  1.18s/it]

21921 episode score is 738.74


  2%|▏         | 2230/100000 [44:34<32:37:04,  1.20s/it]

21930 episode score is 783.59


  2%|▏         | 2231/100000 [44:35<32:07:20,  1.18s/it]

21939 episode score is 752.30


  2%|▏         | 2232/100000 [44:36<31:56:59,  1.18s/it]

21948 episode score is 787.94


  2%|▏         | 2233/100000 [44:37<32:07:31,  1.18s/it]

21957 episode score is 820.38


  2%|▏         | 2234/100000 [44:39<32:17:08,  1.19s/it]

21966 episode score is 836.03


  2%|▏         | 2235/100000 [44:40<31:45:20,  1.17s/it]

21975 episode score is 754.40


  2%|▏         | 2236/100000 [44:41<31:47:00,  1.17s/it]

21984 episode score is 791.26
21993 episode score is 765.30


  2%|▏         | 2237/100000 [44:42<32:01:19,  1.18s/it]

22002 episode score is 748.88


  2%|▏         | 2238/100000 [44:43<33:20:02,  1.23s/it]

22011 episode score is 752.46


  2%|▏         | 2240/100000 [44:46<33:17:53,  1.23s/it]

22020 episode score is 742.49


  2%|▏         | 2241/100000 [44:47<32:54:16,  1.21s/it]

22030 episode score is 676.32


  2%|▏         | 2242/100000 [44:48<33:27:12,  1.23s/it]

22039 episode score is 851.56


  2%|▏         | 2243/100000 [44:50<33:07:08,  1.22s/it]

22048 episode score is 789.14


  2%|▏         | 2244/100000 [44:51<32:29:19,  1.20s/it]

22057 episode score is 759.74


  2%|▏         | 2245/100000 [44:52<32:00:18,  1.18s/it]

22066 episode score is 767.49


  2%|▏         | 2246/100000 [44:53<31:28:15,  1.16s/it]

22074 episode score is 840.96


  2%|▏         | 2247/100000 [44:54<30:58:08,  1.14s/it]

22083 episode score is 717.67


  2%|▏         | 2248/100000 [44:55<31:05:22,  1.14s/it]

22091 episode score is 865.84


  2%|▏         | 2249/100000 [44:56<31:37:01,  1.16s/it]

22101 episode score is 716.17


  2%|▏         | 2250/100000 [44:58<31:24:26,  1.16s/it]

22110 episode score is 756.86


  2%|▏         | 2251/100000 [44:59<31:45:28,  1.17s/it]

22119 episode score is 817.01


  2%|▏         | 2252/100000 [45:00<32:04:25,  1.18s/it]

22128 episode score is 805.15


  2%|▏         | 2253/100000 [45:01<31:42:40,  1.17s/it]

22137 episode score is 763.31


  2%|▏         | 2254/100000 [45:02<32:08:08,  1.18s/it]

22146 episode score is 821.37


  2%|▏         | 2255/100000 [45:03<31:39:08,  1.17s/it]

22154 episode score is 855.34


  2%|▏         | 2256/100000 [45:05<32:11:59,  1.19s/it]

22163 episode score is 822.48


  2%|▏         | 2257/100000 [45:06<32:06:22,  1.18s/it]

22172 episode score is 785.17


  2%|▏         | 2258/100000 [45:07<32:04:21,  1.18s/it]

22181 episode score is 787.05


  2%|▏         | 2259/100000 [45:08<32:09:04,  1.18s/it]

22191 episode score is 713.74


  2%|▏         | 2260/100000 [45:09<31:55:57,  1.18s/it]

22201 episode score is 682.07


  2%|▏         | 2261/100000 [45:11<31:21:09,  1.15s/it]

22210 episode score is 728.76


  2%|▏         | 2262/100000 [45:12<31:36:32,  1.16s/it]

22220 episode score is 688.75


  2%|▏         | 2263/100000 [45:13<31:25:32,  1.16s/it]

22229 episode score is 748.06


  2%|▏         | 2264/100000 [45:14<31:37:46,  1.17s/it]

22239 episode score is 681.22


  2%|▏         | 2265/100000 [45:15<31:13:33,  1.15s/it]

22248 episode score is 735.86


  2%|▏         | 2266/100000 [45:16<31:05:41,  1.15s/it]

22257 episode score is 734.03


  2%|▏         | 2267/100000 [45:17<31:08:59,  1.15s/it]

22266 episode score is 759.18


  2%|▏         | 2268/100000 [45:19<31:14:12,  1.15s/it]

22275 episode score is 737.78


  2%|▏         | 2269/100000 [45:20<31:42:56,  1.17s/it]

22285 episode score is 705.10


  2%|▏         | 2270/100000 [45:21<31:52:15,  1.17s/it]

22295 episode score is 709.39


  2%|▏         | 2271/100000 [45:22<32:17:26,  1.19s/it]

22305 episode score is 713.08


  2%|▏         | 2272/100000 [45:23<32:16:50,  1.19s/it]

22315 episode score is 682.55


  2%|▏         | 2273/100000 [45:25<32:14:03,  1.19s/it]

22325 episode score is 687.90


  2%|▏         | 2274/100000 [45:26<32:20:59,  1.19s/it]

22335 episode score is 703.96


  2%|▏         | 2275/100000 [45:27<32:31:32,  1.20s/it]

22345 episode score is 726.04


  2%|▏         | 2276/100000 [45:28<32:08:25,  1.18s/it]

22354 episode score is 744.17


  2%|▏         | 2277/100000 [45:29<31:42:04,  1.17s/it]

22363 episode score is 734.65


  2%|▏         | 2278/100000 [45:30<31:17:22,  1.15s/it]

22372 episode score is 726.09


  2%|▏         | 2279/100000 [45:32<31:39:15,  1.17s/it]

22382 episode score is 714.13


  2%|▏         | 2280/100000 [45:33<32:33:29,  1.20s/it]

22392 episode score is 700.73


  2%|▏         | 2281/100000 [45:34<32:36:04,  1.20s/it]

22402 episode score is 703.92


  2%|▏         | 2282/100000 [45:35<32:23:45,  1.19s/it]

22412 episode score is 694.64


  2%|▏         | 2283/100000 [45:36<32:30:43,  1.20s/it]

22422 episode score is 705.67


  2%|▏         | 2284/100000 [45:38<32:23:40,  1.19s/it]

22432 episode score is 696.13


  2%|▏         | 2285/100000 [45:39<31:47:51,  1.17s/it]

22441 episode score is 727.52


  2%|▏         | 2286/100000 [45:40<31:54:15,  1.18s/it]

22451 episode score is 686.51


  2%|▏         | 2287/100000 [45:41<32:02:06,  1.18s/it]

22461 episode score is 694.48


  2%|▏         | 2288/100000 [45:42<31:30:23,  1.16s/it]

22471 episode score is 637.57


  2%|▏         | 2289/100000 [45:43<31:41:25,  1.17s/it]

22481 episode score is 682.77


  2%|▏         | 2290/100000 [45:45<31:48:11,  1.17s/it]

22491 episode score is 689.73


  2%|▏         | 2291/100000 [45:46<31:46:16,  1.17s/it]

22501 episode score is 694.32


  2%|▏         | 2292/100000 [45:47<32:07:27,  1.18s/it]

22511 episode score is 704.31


  2%|▏         | 2293/100000 [45:48<31:52:12,  1.17s/it]

22521 episode score is 676.79


  2%|▏         | 2294/100000 [45:49<32:17:39,  1.19s/it]

22531 episode score is 715.87


  2%|▏         | 2295/100000 [45:51<32:08:02,  1.18s/it]

22541 episode score is 674.40


  2%|▏         | 2296/100000 [45:52<32:06:47,  1.18s/it]

22551 episode score is 702.51


  2%|▏         | 2297/100000 [45:53<32:13:42,  1.19s/it]

22561 episode score is 714.18


  2%|▏         | 2298/100000 [45:54<31:45:06,  1.17s/it]

22570 episode score is 753.71


  2%|▏         | 2299/100000 [45:55<32:11:18,  1.19s/it]

22580 episode score is 715.81
22589 episode score is 733.39


  2%|▏         | 2300/100000 [45:57<40:21:54,  1.49s/it]

Iteration 2300: Average test reward: 695.93


  2%|▏         | 2301/100000 [45:59<37:26:44,  1.38s/it]

22598 episode score is 739.23


  2%|▏         | 2302/100000 [46:00<36:20:00,  1.34s/it]

22608 episode score is 740.17


  2%|▏         | 2303/100000 [46:01<35:16:51,  1.30s/it]

22618 episode score is 723.90


  2%|▏         | 2304/100000 [46:02<34:04:39,  1.26s/it]

22628 episode score is 665.31


  2%|▏         | 2305/100000 [46:03<33:47:38,  1.25s/it]

22638 episode score is 732.64


  2%|▏         | 2306/100000 [46:05<33:45:40,  1.24s/it]

22648 episode score is 731.44


  2%|▏         | 2307/100000 [46:06<33:23:33,  1.23s/it]

22658 episode score is 714.70


  2%|▏         | 2308/100000 [46:07<32:37:56,  1.20s/it]

22667 episode score is 749.09


  2%|▏         | 2309/100000 [46:08<32:35:06,  1.20s/it]

22677 episode score is 708.42


  2%|▏         | 2310/100000 [46:09<32:23:51,  1.19s/it]

22686 episode score is 781.25


  2%|▏         | 2311/100000 [46:11<31:51:09,  1.17s/it]

22695 episode score is 742.76


  2%|▏         | 2312/100000 [46:12<31:20:52,  1.16s/it]

22704 episode score is 737.13


  2%|▏         | 2313/100000 [46:13<31:23:48,  1.16s/it]

22713 episode score is 756.93


  2%|▏         | 2314/100000 [46:14<31:41:55,  1.17s/it]

22722 episode score is 793.18


  2%|▏         | 2315/100000 [46:15<31:30:22,  1.16s/it]

22731 episode score is 769.72


  2%|▏         | 2316/100000 [46:16<31:35:44,  1.16s/it]

22740 episode score is 770.45


  2%|▏         | 2317/100000 [46:17<31:26:07,  1.16s/it]

22749 episode score is 758.58


  2%|▏         | 2318/100000 [46:19<31:12:29,  1.15s/it]

22758 episode score is 741.73


  2%|▏         | 2319/100000 [46:20<31:48:00,  1.17s/it]

22768 episode score is 729.10


  2%|▏         | 2320/100000 [46:21<31:51:47,  1.17s/it]

22777 episode score is 785.31


  2%|▏         | 2321/100000 [46:22<31:43:50,  1.17s/it]

22786 episode score is 771.22


  2%|▏         | 2322/100000 [46:23<31:52:56,  1.18s/it]

22795 episode score is 780.19


  2%|▏         | 2323/100000 [46:25<32:09:24,  1.19s/it]

22804 episode score is 813.62


  2%|▏         | 2324/100000 [46:26<32:21:31,  1.19s/it]

22813 episode score is 812.29


  2%|▏         | 2325/100000 [46:27<32:23:30,  1.19s/it]

22822 episode score is 797.63


  2%|▏         | 2326/100000 [46:28<32:25:01,  1.19s/it]

22831 episode score is 802.66


  2%|▏         | 2327/100000 [46:29<31:52:31,  1.17s/it]

22840 episode score is 731.61


  2%|▏         | 2328/100000 [46:30<31:22:05,  1.16s/it]

22849 episode score is 731.01


  2%|▏         | 2329/100000 [46:32<31:38:12,  1.17s/it]

22858 episode score is 789.63


  2%|▏         | 2330/100000 [46:33<31:23:47,  1.16s/it]

22867 episode score is 756.46


  2%|▏         | 2331/100000 [46:34<32:18:23,  1.19s/it]

22876 episode score is 773.24


  2%|▏         | 2332/100000 [46:35<32:21:37,  1.19s/it]

22886 episode score is 699.86


  2%|▏         | 2333/100000 [46:36<31:44:11,  1.17s/it]

22895 episode score is 729.34


  2%|▏         | 2334/100000 [46:37<31:21:03,  1.16s/it]

22904 episode score is 749.54


  2%|▏         | 2335/100000 [46:39<31:40:13,  1.17s/it]

22914 episode score is 705.44


  2%|▏         | 2336/100000 [46:40<31:32:58,  1.16s/it]

22924 episode score is 686.24


  2%|▏         | 2337/100000 [46:41<31:19:48,  1.15s/it]

22933 episode score is 761.80


  2%|▏         | 2338/100000 [46:42<31:38:28,  1.17s/it]

22943 episode score is 698.32


  2%|▏         | 2339/100000 [46:43<31:52:13,  1.17s/it]

22953 episode score is 699.29


  2%|▏         | 2340/100000 [46:44<31:56:03,  1.18s/it]

22963 episode score is 702.94


  2%|▏         | 2341/100000 [46:46<31:22:36,  1.16s/it]

22972 episode score is 741.87


  2%|▏         | 2342/100000 [46:47<31:01:05,  1.14s/it]

22981 episode score is 738.46


  2%|▏         | 2343/100000 [46:48<31:13:14,  1.15s/it]

22990 episode score is 757.54


  2%|▏         | 2344/100000 [46:49<31:02:05,  1.14s/it]

22999 episode score is 727.70


  2%|▏         | 2345/100000 [46:50<30:56:41,  1.14s/it]

23008 episode score is 747.60


  2%|▏         | 2346/100000 [46:51<31:30:39,  1.16s/it]

23018 episode score is 714.65


  2%|▏         | 2347/100000 [46:52<31:38:50,  1.17s/it]

23027 episode score is 795.24


  2%|▏         | 2348/100000 [46:54<31:14:13,  1.15s/it]

23035 episode score is 840.38


  2%|▏         | 2349/100000 [46:55<31:34:32,  1.16s/it]

23044 episode score is 804.75


  2%|▏         | 2350/100000 [46:56<31:51:39,  1.17s/it]

23053 episode score is 810.01


  2%|▏         | 2351/100000 [46:57<32:17:57,  1.19s/it]

23062 episode score is 839.19


  2%|▏         | 2352/100000 [46:58<32:03:51,  1.18s/it]

23071 episode score is 802.80


  2%|▏         | 2353/100000 [47:00<31:33:05,  1.16s/it]

23080 episode score is 748.33


  2%|▏         | 2354/100000 [47:01<31:21:58,  1.16s/it]

23088 episode score is 832.19


  2%|▏         | 2355/100000 [47:02<30:59:35,  1.14s/it]

23096 episode score is 859.60


  2%|▏         | 2356/100000 [47:03<31:10:26,  1.15s/it]

23105 episode score is 789.27


  2%|▏         | 2357/100000 [47:04<31:02:41,  1.14s/it]

23114 episode score is 766.19


  2%|▏         | 2358/100000 [47:05<30:48:18,  1.14s/it]

23123 episode score is 736.24


  2%|▏         | 2359/100000 [47:06<31:26:18,  1.16s/it]

23132 episode score is 787.45


  2%|▏         | 2360/100000 [47:08<31:37:54,  1.17s/it]

23141 episode score is 788.31


  2%|▏         | 2361/100000 [47:09<31:15:47,  1.15s/it]

23149 episode score is 854.86


  2%|▏         | 2362/100000 [47:10<31:34:40,  1.16s/it]

23158 episode score is 809.71


  2%|▏         | 2363/100000 [47:11<32:04:32,  1.18s/it]

23167 episode score is 823.16


  2%|▏         | 2364/100000 [47:12<31:42:37,  1.17s/it]

23175 episode score is 849.32


  2%|▏         | 2365/100000 [47:13<32:00:39,  1.18s/it]

23184 episode score is 810.72


  2%|▏         | 2366/100000 [47:15<32:22:45,  1.19s/it]

23193 episode score is 820.85


  2%|▏         | 2367/100000 [47:16<31:52:43,  1.18s/it]

23201 episode score is 836.51


  2%|▏         | 2368/100000 [47:17<32:25:03,  1.20s/it]

23210 episode score is 839.49


  2%|▏         | 2369/100000 [47:18<31:55:26,  1.18s/it]

23218 episode score is 880.36


  2%|▏         | 2370/100000 [47:19<32:18:36,  1.19s/it]

23227 episode score is 828.97


  2%|▏         | 2371/100000 [47:21<32:39:43,  1.20s/it]

23236 episode score is 840.29


  2%|▏         | 2372/100000 [47:22<32:27:39,  1.20s/it]

23244 episode score is 895.97


  2%|▏         | 2373/100000 [47:23<32:38:26,  1.20s/it]

23253 episode score is 822.73


  2%|▏         | 2374/100000 [47:24<32:01:27,  1.18s/it]

23261 episode score is 858.07


  2%|▏         | 2375/100000 [47:25<31:55:02,  1.18s/it]

23269 episode score is 894.57


  2%|▏         | 2376/100000 [47:27<32:05:11,  1.18s/it]

23278 episode score is 796.77


  2%|▏         | 2377/100000 [47:28<32:11:34,  1.19s/it]

23287 episode score is 816.35


  2%|▏         | 2378/100000 [47:29<31:33:51,  1.16s/it]

23295 episode score is 857.91


  2%|▏         | 2379/100000 [47:30<31:22:01,  1.16s/it]

23303 episode score is 870.90


  2%|▏         | 2380/100000 [47:31<31:19:17,  1.16s/it]

23311 episode score is 894.38


  2%|▏         | 2381/100000 [47:32<31:41:12,  1.17s/it]

23320 episode score is 802.86


  2%|▏         | 2382/100000 [47:34<32:11:50,  1.19s/it]

23329 episode score is 824.29


  2%|▏         | 2383/100000 [47:35<32:34:39,  1.20s/it]

23338 episode score is 845.09


  2%|▏         | 2384/100000 [47:36<32:21:46,  1.19s/it]

23347 episode score is 788.54


  2%|▏         | 2385/100000 [47:37<31:43:16,  1.17s/it]

23355 episode score is 829.51


  2%|▏         | 2386/100000 [47:38<31:33:55,  1.16s/it]

23364 episode score is 780.49


  2%|▏         | 2387/100000 [47:39<31:59:48,  1.18s/it]

23373 episode score is 822.57


  2%|▏         | 2388/100000 [47:41<31:51:30,  1.17s/it]

23382 episode score is 770.20


  2%|▏         | 2389/100000 [47:42<31:45:05,  1.17s/it]

23391 episode score is 768.74


  2%|▏         | 2390/100000 [47:43<32:17:33,  1.19s/it]

23400 episode score is 836.84


  2%|▏         | 2391/100000 [47:44<32:24:27,  1.20s/it]

23409 episode score is 797.69


  2%|▏         | 2392/100000 [47:45<32:33:59,  1.20s/it]

23418 episode score is 808.77


  2%|▏         | 2393/100000 [47:47<32:56:22,  1.21s/it]

23427 episode score is 846.42


  2%|▏         | 2394/100000 [47:48<32:22:28,  1.19s/it]

23436 episode score is 727.05


  2%|▏         | 2395/100000 [47:49<33:31:21,  1.24s/it]

23445 episode score is 837.51


  2%|▏         | 2396/100000 [47:50<32:41:35,  1.21s/it]

23453 episode score is 832.15


  2%|▏         | 2397/100000 [47:51<32:05:43,  1.18s/it]

23461 episode score is 875.15


  2%|▏         | 2398/100000 [47:53<32:33:14,  1.20s/it]

23470 episode score is 842.10


  2%|▏         | 2399/100000 [47:54<32:57:02,  1.22s/it]

23479 episode score is 838.78
23488 episode score is 800.93


  2%|▏         | 2400/100000 [47:56<42:06:29,  1.55s/it]

Iteration 2400: Average test reward: 808.53


  2%|▏         | 2401/100000 [47:57<39:13:22,  1.45s/it]

23497 episode score is 814.02


  2%|▏         | 2402/100000 [47:59<37:16:44,  1.38s/it]

23506 episode score is 808.79


  2%|▏         | 2403/100000 [48:00<35:37:49,  1.31s/it]

23514 episode score is 900.02


  2%|▏         | 2404/100000 [48:01<34:24:08,  1.27s/it]

23522 episode score is 907.60


  2%|▏         | 2405/100000 [48:02<33:33:23,  1.24s/it]

23531 episode score is 774.60


  2%|▏         | 2406/100000 [48:03<33:40:53,  1.24s/it]

23540 episode score is 848.63


  2%|▏         | 2407/100000 [48:05<33:47:08,  1.25s/it]

23549 episode score is 859.66


  2%|▏         | 2408/100000 [48:06<33:21:34,  1.23s/it]

23558 episode score is 794.68


  2%|▏         | 2409/100000 [48:07<32:29:18,  1.20s/it]

23566 episode score is 860.52


  2%|▏         | 2410/100000 [48:08<32:42:10,  1.21s/it]

23575 episode score is 833.89


  2%|▏         | 2411/100000 [48:09<32:52:20,  1.21s/it]

23584 episode score is 827.56


  2%|▏         | 2412/100000 [48:11<32:36:26,  1.20s/it]

23593 episode score is 812.46


  2%|▏         | 2413/100000 [48:12<31:58:07,  1.18s/it]

23602 episode score is 752.55


  2%|▏         | 2414/100000 [48:13<31:57:55,  1.18s/it]

23611 episode score is 780.62


  2%|▏         | 2415/100000 [48:14<31:29:00,  1.16s/it]

23620 episode score is 744.44


  2%|▏         | 2416/100000 [48:15<31:50:53,  1.17s/it]

23629 episode score is 792.59


  2%|▏         | 2417/100000 [48:16<31:17:42,  1.15s/it]

23637 episode score is 857.56


  2%|▏         | 2418/100000 [48:18<31:26:28,  1.16s/it]

23646 episode score is 789.42


  2%|▏         | 2419/100000 [48:19<31:42:46,  1.17s/it]

23655 episode score is 811.48


  2%|▏         | 2420/100000 [48:20<31:23:16,  1.16s/it]

23663 episode score is 844.94


  2%|▏         | 2421/100000 [48:21<31:46:52,  1.17s/it]

23672 episode score is 810.71


  2%|▏         | 2422/100000 [48:22<32:26:37,  1.20s/it]

23681 episode score is 838.45


  2%|▏         | 2423/100000 [48:24<32:40:25,  1.21s/it]

23690 episode score is 818.00


  2%|▏         | 2424/100000 [48:25<32:30:11,  1.20s/it]

23698 episode score is 912.00


  2%|▏         | 2425/100000 [48:26<32:10:58,  1.19s/it]

23707 episode score is 771.87


  2%|▏         | 2426/100000 [48:27<32:10:07,  1.19s/it]

23716 episode score is 789.65


  2%|▏         | 2427/100000 [48:28<31:55:30,  1.18s/it]

23725 episode score is 759.83


  2%|▏         | 2428/100000 [48:29<31:46:17,  1.17s/it]

23734 episode score is 767.98


  2%|▏         | 2429/100000 [48:31<31:33:59,  1.16s/it]

23743 episode score is 770.05


  2%|▏         | 2430/100000 [48:32<31:33:10,  1.16s/it]

23752 episode score is 776.57


  2%|▏         | 2431/100000 [48:33<31:56:36,  1.18s/it]

23762 episode score is 697.03


  2%|▏         | 2432/100000 [48:34<31:56:59,  1.18s/it]

23771 episode score is 792.11


  2%|▏         | 2433/100000 [48:35<31:55:52,  1.18s/it]

23780 episode score is 789.23


  2%|▏         | 2434/100000 [48:36<31:51:42,  1.18s/it]

23789 episode score is 775.88


  2%|▏         | 2435/100000 [48:38<32:12:22,  1.19s/it]

23798 episode score is 812.02


  2%|▏         | 2436/100000 [48:39<32:03:46,  1.18s/it]

23807 episode score is 782.12


  2%|▏         | 2437/100000 [48:40<31:57:18,  1.18s/it]

23816 episode score is 789.01
23825 episode score is 752.40


  2%|▏         | 2438/100000 [48:41<33:26:36,  1.23s/it]

23834 episode score is 822.56


  2%|▏         | 2439/100000 [48:43<35:15:33,  1.30s/it]

23843 episode score is 801.07


  2%|▏         | 2440/100000 [48:44<36:04:31,  1.33s/it]

23852 episode score is 824.43


  2%|▏         | 2441/100000 [48:46<37:00:17,  1.37s/it]

23861 episode score is 791.03


  2%|▏         | 2442/100000 [48:47<38:08:28,  1.41s/it]

23870 episode score is 787.73


  2%|▏         | 2443/100000 [48:49<38:02:23,  1.40s/it]

23879 episode score is 796.87


  2%|▏         | 2444/100000 [48:50<38:03:20,  1.40s/it]

23888 episode score is 783.43


  2%|▏         | 2445/100000 [48:51<37:48:18,  1.40s/it]

23897 episode score is 747.91


  2%|▏         | 2446/100000 [48:53<37:26:34,  1.38s/it]

23906 episode score is 739.01


  2%|▏         | 2447/100000 [48:54<37:03:15,  1.37s/it]

23915 episode score is 745.43


  2%|▏         | 2448/100000 [48:55<36:51:28,  1.36s/it]

23924 episode score is 738.86


  2%|▏         | 2449/100000 [48:57<36:25:11,  1.34s/it]

23934 episode score is 677.62


  2%|▏         | 2450/100000 [48:58<36:35:24,  1.35s/it]

23943 episode score is 739.73


  2%|▏         | 2451/100000 [48:59<36:25:38,  1.34s/it]

23952 episode score is 793.11


  2%|▏         | 2452/100000 [49:01<36:53:24,  1.36s/it]

23961 episode score is 805.81


  2%|▏         | 2453/100000 [49:02<37:24:26,  1.38s/it]

23970 episode score is 748.35


  2%|▏         | 2454/100000 [49:04<37:17:15,  1.38s/it]

23979 episode score is 782.09


  2%|▏         | 2455/100000 [49:05<37:17:45,  1.38s/it]

23988 episode score is 777.22


  2%|▏         | 2456/100000 [49:06<37:18:50,  1.38s/it]

23997 episode score is 830.00


  2%|▏         | 2457/100000 [49:08<37:46:55,  1.39s/it]

24006 episode score is 763.80


  2%|▏         | 2458/100000 [49:09<37:33:07,  1.39s/it]

24014 episode score is 878.53


  2%|▏         | 2459/100000 [49:10<37:19:03,  1.38s/it]

24023 episode score is 802.37


  2%|▏         | 2460/100000 [49:12<37:41:52,  1.39s/it]

24031 episode score is 856.18


  2%|▏         | 2461/100000 [49:13<37:10:23,  1.37s/it]

24040 episode score is 808.38


  2%|▏         | 2462/100000 [49:15<37:43:31,  1.39s/it]

24048 episode score is 831.46


  2%|▏         | 2463/100000 [49:16<37:04:49,  1.37s/it]

24057 episode score is 811.47


  2%|▏         | 2464/100000 [49:17<37:38:40,  1.39s/it]

24065 episode score is 851.82


  2%|▏         | 2465/100000 [49:19<37:06:10,  1.37s/it]

24074 episode score is 803.66


  2%|▏         | 2466/100000 [49:20<37:27:03,  1.38s/it]

24083 episode score is 845.31


  2%|▏         | 2467/100000 [49:22<38:01:08,  1.40s/it]

24092 episode score is 748.40


  2%|▏         | 2468/100000 [49:23<37:30:47,  1.38s/it]

24100 episode score is 969.83


  2%|▏         | 2469/100000 [49:24<38:18:16,  1.41s/it]

24108 episode score is 901.56


  2%|▏         | 2470/100000 [49:26<38:22:37,  1.42s/it]

24116 episode score is 960.24


  2%|▏         | 2471/100000 [49:27<38:45:53,  1.43s/it]

24124 episode score is 947.44


  2%|▏         | 2472/100000 [49:29<38:53:50,  1.44s/it]

24132 episode score is 956.44


  2%|▏         | 2473/100000 [49:30<39:02:23,  1.44s/it]

24140 episode score is 934.01


  2%|▏         | 2474/100000 [49:32<39:06:48,  1.44s/it]

24148 episode score is 889.18


  2%|▏         | 2475/100000 [49:33<38:36:31,  1.43s/it]

24156 episode score is 872.05


  2%|▏         | 2476/100000 [49:34<37:55:54,  1.40s/it]

24164 episode score is 926.44


  2%|▏         | 2477/100000 [49:36<37:59:50,  1.40s/it]

24172 episode score is 928.53


  2%|▏         | 2478/100000 [49:37<38:11:07,  1.41s/it]

24180 episode score is 912.43


  2%|▏         | 2479/100000 [49:39<37:51:00,  1.40s/it]

24188 episode score is 853.29


  2%|▏         | 2480/100000 [49:40<37:09:01,  1.37s/it]

24196 episode score is 913.06


  2%|▏         | 2481/100000 [49:41<37:37:36,  1.39s/it]

24204 episode score is 877.86


  2%|▏         | 2482/100000 [49:43<37:29:52,  1.38s/it]

24212 episode score is 908.96


  2%|▏         | 2483/100000 [49:44<37:37:09,  1.39s/it]

24220 episode score is 849.38


  2%|▏         | 2484/100000 [49:45<37:08:42,  1.37s/it]

24228 episode score is 892.21


  2%|▏         | 2485/100000 [49:47<36:55:43,  1.36s/it]

24236 episode score is 921.29


  2%|▏         | 2486/100000 [49:48<37:30:46,  1.38s/it]

24244 episode score is 927.50


  2%|▏         | 2487/100000 [49:50<37:53:46,  1.40s/it]

24252 episode score is 908.30


  2%|▏         | 2488/100000 [49:51<37:55:56,  1.40s/it]

24260 episode score is 882.66


  2%|▏         | 2489/100000 [49:52<37:40:27,  1.39s/it]

24268 episode score is 919.45


  2%|▏         | 2490/100000 [49:54<37:47:01,  1.39s/it]

24276 episode score is 966.03


  2%|▏         | 2491/100000 [49:55<38:35:09,  1.42s/it]

24284 episode score is 960.30


  2%|▏         | 2492/100000 [49:57<38:55:58,  1.44s/it]

24292 episode score is 862.91


  2%|▏         | 2493/100000 [49:58<38:05:37,  1.41s/it]

24301 episode score is 841.64


  2%|▏         | 2494/100000 [50:00<38:31:41,  1.42s/it]

24310 episode score is 834.99


  2%|▏         | 2495/100000 [50:01<38:38:13,  1.43s/it]

24318 episode score is 867.88


  2%|▏         | 2496/100000 [50:02<37:55:24,  1.40s/it]

24327 episode score is 847.33


  2%|▏         | 2497/100000 [50:04<38:37:15,  1.43s/it]

24335 episode score is 839.57


  2%|▏         | 2498/100000 [50:05<38:30:40,  1.42s/it]

24343 episode score is 883.92


  2%|▏         | 2499/100000 [50:07<37:45:10,  1.39s/it]

24352 episode score is 819.93


  2%|▎         | 2500/100000 [50:09<46:47:22,  1.73s/it]

Iteration 2500: Average test reward: 835.12


  3%|▎         | 2501/100000 [50:10<41:59:07,  1.55s/it]

24360 episode score is 889.91


  3%|▎         | 2502/100000 [50:11<38:47:40,  1.43s/it]

24368 episode score is 905.09


  3%|▎         | 2503/100000 [50:13<37:07:13,  1.37s/it]

24376 episode score is 957.39


  3%|▎         | 2504/100000 [50:14<35:09:43,  1.30s/it]

24383 episode score is 1014.70


  3%|▎         | 2505/100000 [50:15<33:37:26,  1.24s/it]

24390 episode score is 1012.54


  3%|▎         | 2506/100000 [50:16<33:12:53,  1.23s/it]

24399 episode score is 836.06


  3%|▎         | 2507/100000 [50:17<32:51:28,  1.21s/it]

24408 episode score is 806.98


  3%|▎         | 2508/100000 [50:18<32:19:22,  1.19s/it]

24416 episode score is 902.43


  3%|▎         | 2509/100000 [50:20<32:22:38,  1.20s/it]

24425 episode score is 822.27


  3%|▎         | 2510/100000 [50:21<31:48:56,  1.17s/it]

24433 episode score is 898.48


  3%|▎         | 2511/100000 [50:22<31:08:44,  1.15s/it]

24441 episode score is 855.31


  3%|▎         | 2512/100000 [50:23<31:39:54,  1.17s/it]

24450 episode score is 834.81


  3%|▎         | 2513/100000 [50:24<31:33:55,  1.17s/it]

24458 episode score is 912.32


  3%|▎         | 2514/100000 [50:25<31:18:59,  1.16s/it]

24466 episode score is 886.41


  3%|▎         | 2515/100000 [50:26<31:01:17,  1.15s/it]

24474 episode score is 859.54


  3%|▎         | 2516/100000 [50:28<31:11:35,  1.15s/it]

24483 episode score is 805.85


  3%|▎         | 2517/100000 [50:29<31:42:07,  1.17s/it]

24492 episode score is 824.15


  3%|▎         | 2518/100000 [50:30<31:14:33,  1.15s/it]

24501 episode score is 740.01


  3%|▎         | 2519/100000 [50:31<31:59:46,  1.18s/it]

24510 episode score is 849.90


  3%|▎         | 2520/100000 [50:32<32:26:16,  1.20s/it]

24519 episode score is 831.62


  3%|▎         | 2521/100000 [50:34<32:20:56,  1.19s/it]

24528 episode score is 805.64


  3%|▎         | 2522/100000 [50:35<31:56:50,  1.18s/it]

24536 episode score is 892.21


  3%|▎         | 2523/100000 [50:36<31:51:23,  1.18s/it]

24545 episode score is 781.86


  3%|▎         | 2524/100000 [50:37<32:15:32,  1.19s/it]

24554 episode score is 834.44


  3%|▎         | 2525/100000 [50:38<32:12:37,  1.19s/it]

24564 episode score is 696.98


  3%|▎         | 2526/100000 [50:40<32:17:36,  1.19s/it]

24574 episode score is 713.44


  3%|▎         | 2527/100000 [50:41<31:38:26,  1.17s/it]

24583 episode score is 754.91


  3%|▎         | 2528/100000 [50:42<31:58:34,  1.18s/it]

24592 episode score is 822.39


  3%|▎         | 2529/100000 [50:43<31:39:42,  1.17s/it]

24601 episode score is 780.83


  3%|▎         | 2530/100000 [50:44<31:17:47,  1.16s/it]

24610 episode score is 761.82


  3%|▎         | 2531/100000 [50:45<31:45:54,  1.17s/it]

24619 episode score is 827.29


  3%|▎         | 2532/100000 [50:47<32:12:14,  1.19s/it]

24628 episode score is 841.51


  3%|▎         | 2533/100000 [50:48<32:00:17,  1.18s/it]

24636 episode score is 865.58


  3%|▎         | 2534/100000 [50:49<31:54:41,  1.18s/it]

24645 episode score is 769.21


  3%|▎         | 2535/100000 [50:50<31:41:44,  1.17s/it]

24654 episode score is 785.83


  3%|▎         | 2536/100000 [50:51<31:56:18,  1.18s/it]

24663 episode score is 823.25


  3%|▎         | 2537/100000 [50:52<31:55:01,  1.18s/it]

24672 episode score is 805.95


  3%|▎         | 2538/100000 [50:54<31:35:21,  1.17s/it]

24681 episode score is 765.54


  3%|▎         | 2539/100000 [50:55<31:11:36,  1.15s/it]

24690 episode score is 744.38


  3%|▎         | 2540/100000 [50:56<31:09:59,  1.15s/it]

24699 episode score is 759.32


  3%|▎         | 2541/100000 [50:57<31:26:29,  1.16s/it]

24708 episode score is 805.40


  3%|▎         | 2542/100000 [50:58<31:06:01,  1.15s/it]

24716 episode score is 860.36
24725 episode score is 843.57


  3%|▎         | 2544/100000 [51:01<31:35:19,  1.17s/it]

24732 episode score is 1033.63


  3%|▎         | 2545/100000 [51:02<30:53:51,  1.14s/it]

24740 episode score is 849.68


  3%|▎         | 2546/100000 [51:03<31:03:04,  1.15s/it]

24749 episode score is 789.86


  3%|▎         | 2547/100000 [51:04<30:30:15,  1.13s/it]

24757 episode score is 849.37


  3%|▎         | 2548/100000 [51:05<30:30:08,  1.13s/it]

24766 episode score is 778.88


  3%|▎         | 2549/100000 [51:06<30:51:33,  1.14s/it]

24775 episode score is 813.72


  3%|▎         | 2550/100000 [51:07<30:46:11,  1.14s/it]

24784 episode score is 775.38


  3%|▎         | 2551/100000 [51:08<30:29:16,  1.13s/it]

24793 episode score is 758.74


  3%|▎         | 2552/100000 [51:10<30:37:43,  1.13s/it]

24802 episode score is 790.76


  3%|▎         | 2553/100000 [51:11<30:59:12,  1.14s/it]

24812 episode score is 728.05


  3%|▎         | 2554/100000 [51:12<31:24:46,  1.16s/it]

24822 episode score is 727.47


  3%|▎         | 2555/100000 [51:13<30:48:53,  1.14s/it]

24831 episode score is 725.80


  3%|▎         | 2556/100000 [51:14<30:19:33,  1.12s/it]

24840 episode score is 737.53


  3%|▎         | 2557/100000 [51:15<29:59:57,  1.11s/it]

24849 episode score is 747.00


  3%|▎         | 2558/100000 [51:16<30:32:39,  1.13s/it]

24858 episode score is 734.19


  3%|▎         | 2559/100000 [51:17<30:58:46,  1.14s/it]

24868 episode score is 715.75


  3%|▎         | 2560/100000 [51:19<30:26:12,  1.12s/it]

24877 episode score is 733.95


  3%|▎         | 2561/100000 [51:20<31:06:24,  1.15s/it]

24887 episode score is 734.77


  3%|▎         | 2562/100000 [51:21<31:18:25,  1.16s/it]

24897 episode score is 722.26


  3%|▎         | 2563/100000 [51:22<30:40:37,  1.13s/it]

24906 episode score is 736.89


  3%|▎         | 2564/100000 [51:23<30:53:14,  1.14s/it]

24916 episode score is 718.15


  3%|▎         | 2565/100000 [51:24<31:14:38,  1.15s/it]

24926 episode score is 733.40


  3%|▎         | 2566/100000 [51:26<31:15:41,  1.16s/it]

24936 episode score is 708.76


  3%|▎         | 2567/100000 [51:27<30:41:25,  1.13s/it]

24945 episode score is 733.49


  3%|▎         | 2568/100000 [51:28<31:00:37,  1.15s/it]

24955 episode score is 711.49


  3%|▎         | 2569/100000 [51:29<31:11:48,  1.15s/it]

24965 episode score is 715.94


  3%|▎         | 2570/100000 [51:30<31:20:40,  1.16s/it]

24975 episode score is 715.41


  3%|▎         | 2571/100000 [51:31<31:29:35,  1.16s/it]

24984 episode score is 806.82


  3%|▎         | 2572/100000 [51:32<31:05:48,  1.15s/it]

24993 episode score is 751.34


  3%|▎         | 2573/100000 [51:34<30:42:40,  1.13s/it]

25002 episode score is 754.79


  3%|▎         | 2574/100000 [51:35<30:16:01,  1.12s/it]

25011 episode score is 747.64


  3%|▎         | 2575/100000 [51:36<30:28:01,  1.13s/it]

25020 episode score is 776.18


  3%|▎         | 2576/100000 [51:37<30:08:39,  1.11s/it]

25029 episode score is 747.55


  3%|▎         | 2577/100000 [51:38<29:54:08,  1.10s/it]

25038 episode score is 756.81


  3%|▎         | 2578/100000 [51:39<30:18:18,  1.12s/it]

25048 episode score is 694.15


  3%|▎         | 2579/100000 [51:40<29:55:57,  1.11s/it]

25057 episode score is 726.13


  3%|▎         | 2580/100000 [51:41<29:36:05,  1.09s/it]

25065 episode score is 824.22


  3%|▎         | 2581/100000 [51:42<29:33:27,  1.09s/it]

25074 episode score is 736.92


  3%|▎         | 2582/100000 [51:43<30:17:22,  1.12s/it]

25084 episode score is 719.17


  3%|▎         | 2583/100000 [51:45<30:28:08,  1.13s/it]

25094 episode score is 714.84


  3%|▎         | 2584/100000 [51:46<30:14:37,  1.12s/it]

25103 episode score is 741.89


  3%|▎         | 2585/100000 [51:47<30:07:19,  1.11s/it]

25112 episode score is 754.42


  3%|▎         | 2586/100000 [51:48<30:50:57,  1.14s/it]

25122 episode score is 733.11


  3%|▎         | 2587/100000 [51:49<31:28:00,  1.16s/it]

25132 episode score is 751.61


  3%|▎         | 2588/100000 [51:50<31:41:24,  1.17s/it]

25142 episode score is 731.93


  3%|▎         | 2589/100000 [51:52<31:34:44,  1.17s/it]

25151 episode score is 801.40


  3%|▎         | 2590/100000 [51:53<31:21:07,  1.16s/it]

25160 episode score is 806.44


  3%|▎         | 2591/100000 [51:54<31:23:24,  1.16s/it]

25170 episode score is 733.85


  3%|▎         | 2592/100000 [51:55<31:01:21,  1.15s/it]

25179 episode score is 774.72


  3%|▎         | 2593/100000 [51:56<30:45:13,  1.14s/it]

25188 episode score is 785.72


  3%|▎         | 2594/100000 [51:57<31:08:38,  1.15s/it]

25197 episode score is 833.40


  3%|▎         | 2595/100000 [51:58<31:09:48,  1.15s/it]

25207 episode score is 715.47


  3%|▎         | 2596/100000 [52:00<30:57:01,  1.14s/it]

25216 episode score is 767.35


  3%|▎         | 2597/100000 [52:01<31:16:29,  1.16s/it]

25226 episode score is 707.95


  3%|▎         | 2598/100000 [52:02<30:50:23,  1.14s/it]

25235 episode score is 756.18


  3%|▎         | 2599/100000 [52:03<30:26:45,  1.13s/it]

25243 episode score is 845.68
25252 episode score is 746.43


  3%|▎         | 2600/100000 [52:05<38:18:49,  1.42s/it]

Iteration 2600: Average test reward: 693.06


  3%|▎         | 2601/100000 [52:06<35:48:47,  1.32s/it]

25261 episode score is 754.91


  3%|▎         | 2602/100000 [52:07<34:41:40,  1.28s/it]

25271 episode score is 728.63


  3%|▎         | 2603/100000 [52:08<33:25:08,  1.24s/it]

25281 episode score is 693.47


  3%|▎         | 2604/100000 [52:10<32:12:54,  1.19s/it]

25290 episode score is 755.56


  3%|▎         | 2605/100000 [52:11<31:16:10,  1.16s/it]

25299 episode score is 734.34


  3%|▎         | 2606/100000 [52:12<31:22:44,  1.16s/it]

25309 episode score is 721.46


  3%|▎         | 2607/100000 [52:13<31:16:03,  1.16s/it]

25319 episode score is 697.40


  3%|▎         | 2608/100000 [52:14<31:07:43,  1.15s/it]

25328 episode score is 775.57


  3%|▎         | 2609/100000 [52:15<31:17:05,  1.16s/it]

25338 episode score is 727.45


  3%|▎         | 2610/100000 [52:16<31:19:23,  1.16s/it]

25348 episode score is 705.79


  3%|▎         | 2611/100000 [52:18<31:32:18,  1.17s/it]

25358 episode score is 721.22


  3%|▎         | 2612/100000 [52:19<31:09:37,  1.15s/it]

25368 episode score is 678.51


  3%|▎         | 2613/100000 [52:20<31:04:26,  1.15s/it]

25378 episode score is 681.25


  3%|▎         | 2614/100000 [52:21<31:05:04,  1.15s/it]

25388 episode score is 696.84


  3%|▎         | 2615/100000 [52:22<31:14:02,  1.15s/it]

25398 episode score is 709.92


  3%|▎         | 2616/100000 [52:23<30:53:29,  1.14s/it]

25407 episode score is 756.30


  3%|▎         | 2617/100000 [52:24<30:45:16,  1.14s/it]

25417 episode score is 694.49


  3%|▎         | 2618/100000 [52:26<31:15:38,  1.16s/it]

25427 episode score is 675.32


  3%|▎         | 2619/100000 [52:27<31:19:43,  1.16s/it]

25437 episode score is 704.22


  3%|▎         | 2620/100000 [52:28<31:16:25,  1.16s/it]

25447 episode score is 708.74


  3%|▎         | 2621/100000 [52:29<31:29:09,  1.16s/it]

25457 episode score is 726.50


  3%|▎         | 2622/100000 [52:30<31:07:18,  1.15s/it]

25467 episode score is 694.19


  3%|▎         | 2623/100000 [52:31<31:04:03,  1.15s/it]

25477 episode score is 700.73


  3%|▎         | 2624/100000 [52:33<31:26:21,  1.16s/it]

25487 episode score is 727.38


  3%|▎         | 2625/100000 [52:34<31:11:31,  1.15s/it]

25497 episode score is 691.59


  3%|▎         | 2626/100000 [52:35<31:06:40,  1.15s/it]

25507 episode score is 711.96


  3%|▎         | 2627/100000 [52:36<31:29:00,  1.16s/it]

25517 episode score is 733.27


  3%|▎         | 2628/100000 [52:37<31:27:45,  1.16s/it]

25527 episode score is 719.86


  3%|▎         | 2629/100000 [52:38<31:27:43,  1.16s/it]

25537 episode score is 710.52


  3%|▎         | 2630/100000 [52:40<31:50:50,  1.18s/it]

25547 episode score is 726.86


  3%|▎         | 2631/100000 [52:41<31:32:09,  1.17s/it]

25557 episode score is 692.05


  3%|▎         | 2632/100000 [52:42<31:34:57,  1.17s/it]

25567 episode score is 716.45


  3%|▎         | 2633/100000 [52:43<31:27:29,  1.16s/it]

25577 episode score is 702.64


  3%|▎         | 2634/100000 [52:44<31:23:19,  1.16s/it]

25587 episode score is 704.32


  3%|▎         | 2635/100000 [52:45<31:21:31,  1.16s/it]

25597 episode score is 713.43


  3%|▎         | 2636/100000 [52:46<31:01:28,  1.15s/it]

25607 episode score is 672.11


  3%|▎         | 2637/100000 [52:48<31:25:01,  1.16s/it]

25617 episode score is 727.99


  3%|▎         | 2638/100000 [52:49<31:21:58,  1.16s/it]

25627 episode score is 686.02


  3%|▎         | 2639/100000 [52:50<31:03:15,  1.15s/it]

25637 episode score is 691.17


  3%|▎         | 2640/100000 [52:51<30:56:00,  1.14s/it]

25647 episode score is 682.78


  3%|▎         | 2641/100000 [52:52<30:53:24,  1.14s/it]

25657 episode score is 685.99


  3%|▎         | 2642/100000 [52:53<30:40:22,  1.13s/it]

25667 episode score is 673.13


  3%|▎         | 2643/100000 [52:54<30:42:31,  1.14s/it]

25677 episode score is 687.86


  3%|▎         | 2644/100000 [52:56<30:51:50,  1.14s/it]

25687 episode score is 699.10


  3%|▎         | 2645/100000 [52:57<31:21:27,  1.16s/it]

25697 episode score is 727.75


  3%|▎         | 2646/100000 [52:58<30:52:07,  1.14s/it]

25706 episode score is 739.38


  3%|▎         | 2647/100000 [52:59<30:53:25,  1.14s/it]

25716 episode score is 693.42


  3%|▎         | 2648/100000 [53:00<30:24:35,  1.12s/it]

25726 episode score is 647.03


  3%|▎         | 2649/100000 [53:01<30:41:44,  1.14s/it]

25736 episode score is 703.26


  3%|▎         | 2650/100000 [53:02<31:01:45,  1.15s/it]

25746 episode score is 701.11


  3%|▎         | 2651/100000 [53:04<31:11:27,  1.15s/it]

25756 episode score is 691.82


  3%|▎         | 2652/100000 [53:05<30:56:52,  1.14s/it]

25766 episode score is 676.79


  3%|▎         | 2653/100000 [53:06<30:56:46,  1.14s/it]

25776 episode score is 704.13


  3%|▎         | 2654/100000 [53:07<30:53:06,  1.14s/it]

25786 episode score is 682.78


  3%|▎         | 2655/100000 [53:08<30:31:04,  1.13s/it]

25796 episode score is 660.09


  3%|▎         | 2656/100000 [53:09<30:38:52,  1.13s/it]

25806 episode score is 685.09


  3%|▎         | 2657/100000 [53:10<30:40:52,  1.13s/it]

25816 episode score is 676.86


  3%|▎         | 2658/100000 [53:12<31:07:29,  1.15s/it]

25826 episode score is 713.58


  3%|▎         | 2659/100000 [53:13<31:45:56,  1.17s/it]

25836 episode score is 741.78


  3%|▎         | 2660/100000 [53:14<31:27:20,  1.16s/it]

25846 episode score is 684.43


  3%|▎         | 2661/100000 [53:15<31:58:13,  1.18s/it]

25856 episode score is 701.59


  3%|▎         | 2662/100000 [53:16<31:43:14,  1.17s/it]

25866 episode score is 688.30


  3%|▎         | 2663/100000 [53:18<31:54:11,  1.18s/it]

25876 episode score is 716.81


  3%|▎         | 2664/100000 [53:19<31:39:32,  1.17s/it]

25886 episode score is 695.72


  3%|▎         | 2665/100000 [53:20<31:37:11,  1.17s/it]

25896 episode score is 703.09


  3%|▎         | 2666/100000 [53:21<31:40:09,  1.17s/it]

25906 episode score is 723.81


  3%|▎         | 2667/100000 [53:22<31:25:02,  1.16s/it]

25916 episode score is 695.10


  3%|▎         | 2668/100000 [53:23<31:18:29,  1.16s/it]

25926 episode score is 692.26


  3%|▎         | 2669/100000 [53:24<30:47:38,  1.14s/it]

25936 episode score is 675.17


  3%|▎         | 2670/100000 [53:26<30:50:31,  1.14s/it]

25946 episode score is 690.60


  3%|▎         | 2671/100000 [53:27<30:56:55,  1.14s/it]

25956 episode score is 675.83


  3%|▎         | 2672/100000 [53:28<30:45:30,  1.14s/it]

25966 episode score is 663.77


  3%|▎         | 2673/100000 [53:29<30:49:16,  1.14s/it]

25976 episode score is 690.70


  3%|▎         | 2674/100000 [53:30<30:36:14,  1.13s/it]

25985 episode score is 747.78


  3%|▎         | 2675/100000 [53:31<30:50:41,  1.14s/it]

25995 episode score is 702.90


  3%|▎         | 2676/100000 [53:32<31:19:03,  1.16s/it]

26005 episode score is 717.04


  3%|▎         | 2677/100000 [53:34<31:34:07,  1.17s/it]

26015 episode score is 720.83


  3%|▎         | 2678/100000 [53:35<31:16:23,  1.16s/it]

26025 episode score is 669.14


  3%|▎         | 2679/100000 [53:36<31:23:35,  1.16s/it]

26035 episode score is 700.93


  3%|▎         | 2680/100000 [53:37<31:21:31,  1.16s/it]

26045 episode score is 689.84


  3%|▎         | 2681/100000 [53:38<31:11:14,  1.15s/it]

26055 episode score is 682.25


  3%|▎         | 2682/100000 [53:39<31:23:01,  1.16s/it]

26065 episode score is 713.01


  3%|▎         | 2683/100000 [53:41<31:40:31,  1.17s/it]

26075 episode score is 731.29


  3%|▎         | 2684/100000 [53:42<31:47:16,  1.18s/it]

26085 episode score is 724.85


  3%|▎         | 2685/100000 [53:43<31:07:58,  1.15s/it]

26094 episode score is 741.72


  3%|▎         | 2686/100000 [53:44<31:43:24,  1.17s/it]

26104 episode score is 742.79


  3%|▎         | 2687/100000 [53:45<31:37:42,  1.17s/it]

26114 episode score is 712.18


  3%|▎         | 2688/100000 [53:46<31:39:03,  1.17s/it]

26124 episode score is 703.25


  3%|▎         | 2689/100000 [53:48<31:27:12,  1.16s/it]

26133 episode score is 748.05


  3%|▎         | 2690/100000 [53:49<31:11:40,  1.15s/it]

26143 episode score is 673.44


  3%|▎         | 2691/100000 [53:50<31:13:27,  1.16s/it]

26153 episode score is 703.94


  3%|▎         | 2692/100000 [53:51<31:31:27,  1.17s/it]

26163 episode score is 700.39


  3%|▎         | 2693/100000 [53:52<31:15:21,  1.16s/it]

26172 episode score is 766.52


  3%|▎         | 2694/100000 [53:53<31:25:36,  1.16s/it]

26182 episode score is 715.33


  3%|▎         | 2695/100000 [53:55<31:42:51,  1.17s/it]

26192 episode score is 731.56


  3%|▎         | 2696/100000 [53:56<31:58:44,  1.18s/it]

26202 episode score is 733.65


  3%|▎         | 2697/100000 [53:57<31:39:03,  1.17s/it]

26211 episode score is 770.28


  3%|▎         | 2698/100000 [53:58<31:03:07,  1.15s/it]

26220 episode score is 739.61


  3%|▎         | 2699/100000 [53:59<31:06:47,  1.15s/it]

26229 episode score is 793.47
26238 episode score is 773.15


  3%|▎         | 2700/100000 [54:01<40:02:54,  1.48s/it]

Iteration 2700: Average test reward: 769.66


  3%|▎         | 2701/100000 [54:03<37:06:09,  1.37s/it]

26247 episode score is 758.23


  3%|▎         | 2702/100000 [54:04<35:12:45,  1.30s/it]

26256 episode score is 769.97


  3%|▎         | 2703/100000 [54:05<34:21:57,  1.27s/it]

26266 episode score is 736.27


  3%|▎         | 2704/100000 [54:06<33:20:33,  1.23s/it]

26275 episode score is 765.97


  3%|▎         | 2705/100000 [54:07<32:12:53,  1.19s/it]

26284 episode score is 749.97


  3%|▎         | 2706/100000 [54:08<31:43:55,  1.17s/it]

26293 episode score is 773.22


  3%|▎         | 2707/100000 [54:09<31:25:37,  1.16s/it]

26302 episode score is 770.17


  3%|▎         | 2708/100000 [54:11<31:27:37,  1.16s/it]

26311 episode score is 799.81


  3%|▎         | 2709/100000 [54:12<31:08:09,  1.15s/it]

26320 episode score is 769.03


  3%|▎         | 2710/100000 [54:13<31:10:06,  1.15s/it]

26329 episode score is 781.72


  3%|▎         | 2711/100000 [54:14<31:31:29,  1.17s/it]

26339 episode score is 728.10


  3%|▎         | 2712/100000 [54:15<32:26:08,  1.20s/it]

26348 episode score is 815.89


  3%|▎         | 2713/100000 [54:17<31:59:29,  1.18s/it]

26357 episode score is 781.24


  3%|▎         | 2714/100000 [54:18<32:12:25,  1.19s/it]

26366 episode score is 809.80


  3%|▎         | 2715/100000 [54:19<32:09:56,  1.19s/it]

26376 episode score is 709.98


  3%|▎         | 2716/100000 [54:20<31:44:56,  1.17s/it]

26385 episode score is 775.03


  3%|▎         | 2717/100000 [54:21<31:10:50,  1.15s/it]

26394 episode score is 744.66


  3%|▎         | 2718/100000 [54:22<30:56:23,  1.14s/it]

26404 episode score is 659.61


  3%|▎         | 2719/100000 [54:23<30:32:12,  1.13s/it]

26413 episode score is 744.71


  3%|▎         | 2720/100000 [54:25<30:30:54,  1.13s/it]

26422 episode score is 769.41


  3%|▎         | 2721/100000 [54:26<31:05:30,  1.15s/it]

26432 episode score is 727.97


  3%|▎         | 2722/100000 [54:27<31:04:43,  1.15s/it]

26442 episode score is 690.38


  3%|▎         | 2723/100000 [54:28<30:39:40,  1.13s/it]

26451 episode score is 733.21


  3%|▎         | 2724/100000 [54:29<30:21:56,  1.12s/it]

26460 episode score is 738.30


  3%|▎         | 2725/100000 [54:30<30:17:00,  1.12s/it]

26469 episode score is 751.19


  3%|▎         | 2726/100000 [54:31<30:46:09,  1.14s/it]

26479 episode score is 721.69


  3%|▎         | 2727/100000 [54:33<31:11:38,  1.15s/it]

26489 episode score is 709.04


  3%|▎         | 2728/100000 [54:34<30:55:42,  1.14s/it]

26498 episode score is 762.06


  3%|▎         | 2729/100000 [54:35<30:31:03,  1.13s/it]

26507 episode score is 737.24


  3%|▎         | 2730/100000 [54:36<30:49:06,  1.14s/it]

26516 episode score is 789.69


  3%|▎         | 2731/100000 [54:37<30:43:24,  1.14s/it]

26525 episode score is 763.11


  3%|▎         | 2732/100000 [54:38<31:21:24,  1.16s/it]

26535 episode score is 729.69


  3%|▎         | 2733/100000 [54:39<30:57:32,  1.15s/it]

26544 episode score is 752.39


  3%|▎         | 2734/100000 [54:40<30:34:18,  1.13s/it]

26553 episode score is 750.90


  3%|▎         | 2735/100000 [54:42<30:56:47,  1.15s/it]

26563 episode score is 717.25


  3%|▎         | 2736/100000 [54:43<30:32:33,  1.13s/it]

26572 episode score is 740.43


  3%|▎         | 2737/100000 [54:44<30:52:32,  1.14s/it]

26581 episode score is 800.47


  3%|▎         | 2738/100000 [54:45<31:06:08,  1.15s/it]

26590 episode score is 801.68


  3%|▎         | 2739/100000 [54:46<31:08:29,  1.15s/it]

26599 episode score is 774.11


  3%|▎         | 2740/100000 [54:47<31:14:27,  1.16s/it]

26608 episode score is 775.29


  3%|▎         | 2741/100000 [54:49<31:15:29,  1.16s/it]

26617 episode score is 762.75


  3%|▎         | 2742/100000 [54:50<31:11:50,  1.15s/it]

26626 episode score is 750.49


  3%|▎         | 2743/100000 [54:51<31:05:30,  1.15s/it]

26635 episode score is 751.88


  3%|▎         | 2744/100000 [54:52<30:46:05,  1.14s/it]

26643 episode score is 851.39


  3%|▎         | 2745/100000 [54:53<31:16:46,  1.16s/it]

26652 episode score is 820.15


  3%|▎         | 2746/100000 [54:54<31:20:44,  1.16s/it]

26661 episode score is 793.19


  3%|▎         | 2747/100000 [54:55<31:00:39,  1.15s/it]

26669 episode score is 868.46


  3%|▎         | 2748/100000 [54:57<31:12:13,  1.16s/it]

26678 episode score is 798.74


  3%|▎         | 2749/100000 [54:58<31:30:05,  1.17s/it]

26687 episode score is 790.13


  3%|▎         | 2750/100000 [54:59<31:23:08,  1.16s/it]

26696 episode score is 775.44


  3%|▎         | 2751/100000 [55:00<30:53:13,  1.14s/it]

26705 episode score is 744.14


  3%|▎         | 2752/100000 [55:01<30:52:43,  1.14s/it]

26714 episode score is 769.88


  3%|▎         | 2753/100000 [55:02<31:20:23,  1.16s/it]

26724 episode score is 709.03


  3%|▎         | 2754/100000 [55:04<31:46:48,  1.18s/it]

26734 episode score is 737.08


  3%|▎         | 2755/100000 [55:05<31:08:57,  1.15s/it]

26743 episode score is 742.81


  3%|▎         | 2756/100000 [55:06<31:23:19,  1.16s/it]

26752 episode score is 803.09


  3%|▎         | 2757/100000 [55:07<31:42:58,  1.17s/it]

26762 episode score is 730.83


  3%|▎         | 2758/100000 [55:08<31:45:51,  1.18s/it]

26772 episode score is 708.38


  3%|▎         | 2759/100000 [55:09<31:08:55,  1.15s/it]

26781 episode score is 727.35


  3%|▎         | 2760/100000 [55:11<30:57:39,  1.15s/it]

26790 episode score is 775.02


  3%|▎         | 2761/100000 [55:12<31:33:57,  1.17s/it]

26800 episode score is 738.46


  3%|▎         | 2762/100000 [55:13<31:10:57,  1.15s/it]

26809 episode score is 762.74


  3%|▎         | 2763/100000 [55:14<30:42:27,  1.14s/it]

26818 episode score is 741.10


  3%|▎         | 2764/100000 [55:15<30:29:52,  1.13s/it]

26827 episode score is 753.62


  3%|▎         | 2765/100000 [55:16<30:49:25,  1.14s/it]

26837 episode score is 722.79


  3%|▎         | 2766/100000 [55:17<31:14:07,  1.16s/it]

26847 episode score is 711.17


  3%|▎         | 2767/100000 [55:19<30:52:24,  1.14s/it]

26857 episode score is 661.99


  3%|▎         | 2768/100000 [55:20<31:16:22,  1.16s/it]

26867 episode score is 717.95


  3%|▎         | 2769/100000 [55:21<31:09:46,  1.15s/it]

26877 episode score is 698.01


  3%|▎         | 2770/100000 [55:22<30:58:37,  1.15s/it]

26887 episode score is 678.82


  3%|▎         | 2771/100000 [55:23<30:57:03,  1.15s/it]

26897 episode score is 693.11


  3%|▎         | 2772/100000 [55:24<31:38:04,  1.17s/it]

26907 episode score is 691.97


  3%|▎         | 2773/100000 [55:26<31:13:48,  1.16s/it]

26917 episode score is 673.77


  3%|▎         | 2774/100000 [55:27<31:11:33,  1.15s/it]

26927 episode score is 704.74


  3%|▎         | 2775/100000 [55:28<31:20:00,  1.16s/it]

26937 episode score is 705.38


  3%|▎         | 2776/100000 [55:29<31:09:55,  1.15s/it]

26947 episode score is 691.83


  3%|▎         | 2777/100000 [55:30<30:40:46,  1.14s/it]

26956 episode score is 737.85


  3%|▎         | 2778/100000 [55:31<31:02:26,  1.15s/it]

26966 episode score is 723.93


  3%|▎         | 2779/100000 [55:32<31:22:33,  1.16s/it]

26976 episode score is 722.30


  3%|▎         | 2780/100000 [55:34<30:53:32,  1.14s/it]

26985 episode score is 748.88


  3%|▎         | 2781/100000 [55:35<31:00:24,  1.15s/it]

26995 episode score is 713.25


  3%|▎         | 2782/100000 [55:36<31:13:11,  1.16s/it]

27004 episode score is 792.48


  3%|▎         | 2783/100000 [55:37<31:00:11,  1.15s/it]

27013 episode score is 768.25


  3%|▎         | 2784/100000 [55:38<30:49:58,  1.14s/it]

27022 episode score is 754.20


  3%|▎         | 2785/100000 [55:39<31:08:21,  1.15s/it]

27032 episode score is 726.54


  3%|▎         | 2786/100000 [55:40<30:42:24,  1.14s/it]

27041 episode score is 740.36


  3%|▎         | 2787/100000 [55:42<30:33:54,  1.13s/it]

27050 episode score is 768.92


  3%|▎         | 2788/100000 [55:43<30:48:19,  1.14s/it]

27059 episode score is 791.88


  3%|▎         | 2789/100000 [55:44<31:14:27,  1.16s/it]

27069 episode score is 735.71


  3%|▎         | 2790/100000 [55:45<31:05:11,  1.15s/it]

27079 episode score is 673.22


  3%|▎         | 2791/100000 [55:46<31:08:26,  1.15s/it]

27089 episode score is 705.97


  3%|▎         | 2792/100000 [55:47<31:09:19,  1.15s/it]

27098 episode score is 779.48


  3%|▎         | 2793/100000 [55:49<31:24:21,  1.16s/it]

27108 episode score is 717.69


  3%|▎         | 2794/100000 [55:50<30:59:42,  1.15s/it]

27117 episode score is 773.48


  3%|▎         | 2795/100000 [55:51<31:09:10,  1.15s/it]

27127 episode score is 715.59


  3%|▎         | 2796/100000 [55:52<30:45:24,  1.14s/it]

27136 episode score is 749.01


  3%|▎         | 2797/100000 [55:53<30:59:23,  1.15s/it]

27146 episode score is 714.64


  3%|▎         | 2798/100000 [55:54<31:05:39,  1.15s/it]

27156 episode score is 713.97


  3%|▎         | 2799/100000 [55:55<30:48:11,  1.14s/it]

27165 episode score is 783.39
27175 episode score is 743.85


  3%|▎         | 2800/100000 [55:58<39:56:51,  1.48s/it]

Iteration 2800: Average test reward: 732.33


  3%|▎         | 2801/100000 [55:59<37:50:17,  1.40s/it]

27185 episode score is 759.42


  3%|▎         | 2802/100000 [56:00<35:22:24,  1.31s/it]

27194 episode score is 750.91


  3%|▎         | 2803/100000 [56:01<33:34:56,  1.24s/it]

27203 episode score is 726.50


  3%|▎         | 2804/100000 [56:02<32:46:08,  1.21s/it]

27212 episode score is 770.80


  3%|▎         | 2805/100000 [56:03<31:48:45,  1.18s/it]

27221 episode score is 741.05


  3%|▎         | 2806/100000 [56:04<31:04:40,  1.15s/it]

27229 episode score is 829.16


  3%|▎         | 2807/100000 [56:06<31:14:48,  1.16s/it]

27238 episode score is 813.95


  3%|▎         | 2808/100000 [56:07<30:48:29,  1.14s/it]

27247 episode score is 749.14


  3%|▎         | 2809/100000 [56:08<31:12:51,  1.16s/it]

27256 episode score is 807.31


  3%|▎         | 2810/100000 [56:09<31:25:53,  1.16s/it]

27266 episode score is 713.24


  3%|▎         | 2811/100000 [56:10<31:09:04,  1.15s/it]

27275 episode score is 770.64


  3%|▎         | 2812/100000 [56:11<30:58:07,  1.15s/it]

27284 episode score is 770.02


  3%|▎         | 2813/100000 [56:12<31:26:09,  1.16s/it]

27293 episode score is 814.95


  3%|▎         | 2814/100000 [56:14<31:23:47,  1.16s/it]

27302 episode score is 786.67


  3%|▎         | 2815/100000 [56:15<31:03:41,  1.15s/it]

27311 episode score is 756.54


  3%|▎         | 2816/100000 [56:16<30:51:28,  1.14s/it]

27320 episode score is 763.11


  3%|▎         | 2817/100000 [56:17<30:41:32,  1.14s/it]

27329 episode score is 767.36


  3%|▎         | 2818/100000 [56:18<30:23:52,  1.13s/it]

27338 episode score is 735.38


  3%|▎         | 2819/100000 [56:19<30:23:13,  1.13s/it]

27347 episode score is 762.17


  3%|▎         | 2820/100000 [56:20<30:24:02,  1.13s/it]

27356 episode score is 772.21


  3%|▎         | 2821/100000 [56:21<30:25:05,  1.13s/it]

27365 episode score is 780.43


  3%|▎         | 2822/100000 [56:23<30:40:08,  1.14s/it]

27374 episode score is 794.89


  3%|▎         | 2823/100000 [56:24<30:42:36,  1.14s/it]

27383 episode score is 785.74


  3%|▎         | 2824/100000 [56:25<30:42:04,  1.14s/it]

27393 episode score is 690.60


  3%|▎         | 2825/100000 [56:26<30:50:45,  1.14s/it]

27403 episode score is 706.19


  3%|▎         | 2826/100000 [56:27<30:58:22,  1.15s/it]

27413 episode score is 709.49


  3%|▎         | 2827/100000 [56:28<30:58:19,  1.15s/it]

27423 episode score is 675.11


  3%|▎         | 2828/100000 [56:30<31:22:07,  1.16s/it]

27433 episode score is 738.45


  3%|▎         | 2829/100000 [56:31<31:36:51,  1.17s/it]

27443 episode score is 744.16


  3%|▎         | 2830/100000 [56:32<31:06:08,  1.15s/it]

27452 episode score is 753.59


  3%|▎         | 2831/100000 [56:33<31:07:56,  1.15s/it]

27462 episode score is 700.20


  3%|▎         | 2832/100000 [56:34<30:58:23,  1.15s/it]

27472 episode score is 695.64


  3%|▎         | 2833/100000 [56:35<31:26:59,  1.17s/it]

27482 episode score is 739.44


  3%|▎         | 2834/100000 [56:37<31:36:34,  1.17s/it]

27492 episode score is 671.15


  3%|▎         | 2835/100000 [56:38<31:23:39,  1.16s/it]

27502 episode score is 687.65


  3%|▎         | 2836/100000 [56:39<31:39:02,  1.17s/it]

27512 episode score is 719.60


  3%|▎         | 2837/100000 [56:40<31:32:05,  1.17s/it]

27522 episode score is 696.63


  3%|▎         | 2838/100000 [56:41<31:06:24,  1.15s/it]

27532 episode score is 681.09


  3%|▎         | 2839/100000 [56:42<30:59:52,  1.15s/it]

27542 episode score is 693.01


  3%|▎         | 2840/100000 [56:43<30:49:04,  1.14s/it]

27552 episode score is 680.61


  3%|▎         | 2841/100000 [56:45<30:22:55,  1.13s/it]

27562 episode score is 654.93


  3%|▎         | 2842/100000 [56:46<30:46:51,  1.14s/it]

27572 episode score is 706.07


  3%|▎         | 2843/100000 [56:47<31:04:32,  1.15s/it]

27582 episode score is 709.74


  3%|▎         | 2844/100000 [56:48<30:53:57,  1.14s/it]

27592 episode score is 648.68


  3%|▎         | 2845/100000 [56:49<31:15:13,  1.16s/it]

27603 episode score is 647.34


  3%|▎         | 2846/100000 [56:50<31:16:40,  1.16s/it]

27613 episode score is 722.02


  3%|▎         | 2847/100000 [56:51<30:44:37,  1.14s/it]

27623 episode score is 653.15


  3%|▎         | 2848/100000 [56:53<30:28:26,  1.13s/it]

27632 episode score is 744.36


  3%|▎         | 2849/100000 [56:54<30:32:01,  1.13s/it]

27642 episode score is 692.10


  3%|▎         | 2850/100000 [56:55<30:40:19,  1.14s/it]

27651 episode score is 785.95


  3%|▎         | 2851/100000 [56:56<30:32:47,  1.13s/it]

27660 episode score is 764.40


  3%|▎         | 2852/100000 [56:57<30:47:12,  1.14s/it]

27669 episode score is 817.67


  3%|▎         | 2853/100000 [56:58<31:26:05,  1.16s/it]

27679 episode score is 754.62


  3%|▎         | 2854/100000 [57:00<31:46:37,  1.18s/it]

27689 episode score is 753.20


  3%|▎         | 2855/100000 [57:01<31:41:51,  1.17s/it]

27699 episode score is 716.60


  3%|▎         | 2856/100000 [57:02<30:59:58,  1.15s/it]

27708 episode score is 751.07


  3%|▎         | 2857/100000 [57:03<31:32:19,  1.17s/it]

27718 episode score is 749.53


  3%|▎         | 2858/100000 [57:04<30:50:29,  1.14s/it]

27727 episode score is 755.23


  3%|▎         | 2859/100000 [57:05<30:38:04,  1.14s/it]

27736 episode score is 773.63


  3%|▎         | 2860/100000 [57:06<30:55:52,  1.15s/it]

27746 episode score is 723.63


  3%|▎         | 2861/100000 [57:08<31:16:16,  1.16s/it]

27756 episode score is 739.15


  3%|▎         | 2862/100000 [57:09<31:07:55,  1.15s/it]

27765 episode score is 782.56


  3%|▎         | 2863/100000 [57:10<30:58:26,  1.15s/it]

27774 episode score is 777.64


  3%|▎         | 2864/100000 [57:11<31:09:43,  1.15s/it]

27783 episode score is 799.25


  3%|▎         | 2865/100000 [57:12<30:58:54,  1.15s/it]

27792 episode score is 766.94


  3%|▎         | 2866/100000 [57:13<30:49:18,  1.14s/it]

27801 episode score is 774.65


  3%|▎         | 2867/100000 [57:14<31:16:32,  1.16s/it]

27811 episode score is 741.45


  3%|▎         | 2868/100000 [57:16<31:39:13,  1.17s/it]

27821 episode score is 729.43


  3%|▎         | 2869/100000 [57:17<31:57:53,  1.18s/it]

27831 episode score is 747.32


  3%|▎         | 2870/100000 [57:18<31:26:04,  1.17s/it]

27840 episode score is 771.32


  3%|▎         | 2871/100000 [57:19<31:07:38,  1.15s/it]

27849 episode score is 767.53


  3%|▎         | 2872/100000 [57:20<31:19:12,  1.16s/it]

27859 episode score is 725.75


  3%|▎         | 2873/100000 [57:22<31:30:43,  1.17s/it]

27869 episode score is 734.54


  3%|▎         | 2874/100000 [57:23<31:00:26,  1.15s/it]

27878 episode score is 748.50


  3%|▎         | 2875/100000 [57:24<31:07:11,  1.15s/it]

27888 episode score is 706.77


  3%|▎         | 2876/100000 [57:25<31:17:18,  1.16s/it]

27898 episode score is 719.87


  3%|▎         | 2877/100000 [57:26<31:34:43,  1.17s/it]

27908 episode score is 737.19


  3%|▎         | 2878/100000 [57:27<31:35:07,  1.17s/it]

27918 episode score is 698.41


  3%|▎         | 2879/100000 [57:28<31:31:57,  1.17s/it]

27928 episode score is 709.15


  3%|▎         | 2880/100000 [57:30<31:30:38,  1.17s/it]

27938 episode score is 721.25


  3%|▎         | 2881/100000 [57:31<31:17:25,  1.16s/it]

27948 episode score is 704.84


  3%|▎         | 2882/100000 [57:32<31:00:09,  1.15s/it]

27958 episode score is 691.44


  3%|▎         | 2883/100000 [57:33<30:56:33,  1.15s/it]

27968 episode score is 697.38


  3%|▎         | 2884/100000 [57:34<30:49:54,  1.14s/it]

27978 episode score is 685.36


  3%|▎         | 2885/100000 [57:35<30:43:40,  1.14s/it]

27988 episode score is 684.09


  3%|▎         | 2886/100000 [57:36<30:56:02,  1.15s/it]

27999 episode score is 628.87


  3%|▎         | 2887/100000 [57:38<31:09:26,  1.16s/it]

28010 episode score is 649.37


  3%|▎         | 2888/100000 [57:39<30:53:31,  1.15s/it]

28020 episode score is 664.28


  3%|▎         | 2889/100000 [57:40<31:16:14,  1.16s/it]

28031 episode score is 638.96


  3%|▎         | 2890/100000 [57:41<30:54:40,  1.15s/it]

28041 episode score is 669.55


  3%|▎         | 2891/100000 [57:42<30:32:49,  1.13s/it]

28051 episode score is 666.83


  3%|▎         | 2892/100000 [57:43<30:28:08,  1.13s/it]

28061 episode score is 683.14


  3%|▎         | 2893/100000 [57:44<30:32:16,  1.13s/it]

28071 episode score is 690.88


  3%|▎         | 2894/100000 [57:46<30:12:46,  1.12s/it]

28081 episode score is 668.61


  3%|▎         | 2895/100000 [57:47<29:55:59,  1.11s/it]

28091 episode score is 653.31


  3%|▎         | 2896/100000 [57:48<31:10:14,  1.16s/it]

28102 episode score is 633.93


  3%|▎         | 2897/100000 [57:49<30:51:30,  1.14s/it]

28112 episode score is 665.84


  3%|▎         | 2898/100000 [57:50<30:35:43,  1.13s/it]

28122 episode score is 666.33


  3%|▎         | 2899/100000 [57:51<30:32:54,  1.13s/it]

28132 episode score is 693.32
28142 episode score is 674.65


  3%|▎         | 2900/100000 [57:53<38:19:23,  1.42s/it]

Iteration 2900: Average test reward: 666.59


  3%|▎         | 2901/100000 [57:54<36:00:53,  1.34s/it]

28152 episode score is 692.82


  3%|▎         | 2902/100000 [57:56<34:45:29,  1.29s/it]

28162 episode score is 718.28


  3%|▎         | 2903/100000 [57:57<33:28:55,  1.24s/it]

28172 episode score is 693.24


  3%|▎         | 2904/100000 [57:58<32:37:44,  1.21s/it]

28182 episode score is 696.84


  3%|▎         | 2905/100000 [57:59<32:09:24,  1.19s/it]

28192 episode score is 687.47


  3%|▎         | 2906/100000 [58:00<31:36:12,  1.17s/it]

28202 episode score is 666.42


  3%|▎         | 2907/100000 [58:01<31:29:01,  1.17s/it]

28212 episode score is 702.10


  3%|▎         | 2908/100000 [58:03<31:25:30,  1.17s/it]

28222 episode score is 703.70


  3%|▎         | 2909/100000 [58:04<31:21:30,  1.16s/it]

28232 episode score is 707.33


  3%|▎         | 2910/100000 [58:05<31:15:52,  1.16s/it]

28242 episode score is 697.97


  3%|▎         | 2911/100000 [58:06<31:37:00,  1.17s/it]

28252 episode score is 735.19


  3%|▎         | 2912/100000 [58:07<31:39:09,  1.17s/it]

28262 episode score is 732.27


  3%|▎         | 2913/100000 [58:08<31:36:37,  1.17s/it]

28272 episode score is 719.86


  3%|▎         | 2914/100000 [58:09<30:56:52,  1.15s/it]

28281 episode score is 733.48


  3%|▎         | 2915/100000 [58:11<30:26:56,  1.13s/it]

28290 episode score is 750.95


  3%|▎         | 2916/100000 [58:12<30:42:00,  1.14s/it]

28300 episode score is 703.09


  3%|▎         | 2917/100000 [58:13<31:01:46,  1.15s/it]

28310 episode score is 720.87


  3%|▎         | 2918/100000 [58:14<30:53:29,  1.15s/it]

28320 episode score is 684.89


  3%|▎         | 2919/100000 [58:15<30:44:50,  1.14s/it]

28330 episode score is 686.23


  3%|▎         | 2920/100000 [58:16<30:45:12,  1.14s/it]

28340 episode score is 689.88


  3%|▎         | 2921/100000 [58:17<30:51:58,  1.14s/it]

28350 episode score is 693.30


  3%|▎         | 2922/100000 [58:19<31:25:35,  1.17s/it]

28360 episode score is 720.33


  3%|▎         | 2923/100000 [58:20<31:14:44,  1.16s/it]

28370 episode score is 694.25


  3%|▎         | 2924/100000 [58:21<31:00:21,  1.15s/it]

28380 episode score is 685.82


  3%|▎         | 2925/100000 [58:22<30:45:56,  1.14s/it]

28390 episode score is 686.27


  3%|▎         | 2926/100000 [58:23<30:39:14,  1.14s/it]

28400 episode score is 689.97


  3%|▎         | 2927/100000 [58:24<30:35:23,  1.13s/it]

28410 episode score is 689.68


  3%|▎         | 2928/100000 [58:25<30:20:57,  1.13s/it]

28420 episode score is 660.17


  3%|▎         | 2929/100000 [58:27<30:26:57,  1.13s/it]

28430 episode score is 694.62


  3%|▎         | 2930/100000 [58:28<30:49:25,  1.14s/it]

28440 episode score is 717.95


  3%|▎         | 2931/100000 [58:29<30:28:02,  1.13s/it]

28450 episode score is 654.24


  3%|▎         | 2932/100000 [58:30<31:03:45,  1.15s/it]

28460 episode score is 719.20


  3%|▎         | 2933/100000 [58:31<31:01:00,  1.15s/it]

28470 episode score is 680.66


  3%|▎         | 2934/100000 [58:32<30:44:26,  1.14s/it]

28480 episode score is 677.38


  3%|▎         | 2935/100000 [58:33<30:46:41,  1.14s/it]

28491 episode score is 604.99


  3%|▎         | 2936/100000 [58:35<30:24:57,  1.13s/it]

28501 episode score is 663.29


  3%|▎         | 2937/100000 [58:36<30:22:23,  1.13s/it]

28511 episode score is 676.41


  3%|▎         | 2938/100000 [58:37<30:23:14,  1.13s/it]

28521 episode score is 680.96


  3%|▎         | 2939/100000 [58:38<30:54:39,  1.15s/it]

28532 episode score is 647.07


  3%|▎         | 2940/100000 [58:39<31:18:31,  1.16s/it]

28543 episode score is 649.62


  3%|▎         | 2941/100000 [58:40<30:52:12,  1.15s/it]

28553 episode score is 656.87


  3%|▎         | 2942/100000 [58:41<30:48:31,  1.14s/it]

28563 episode score is 683.96


  3%|▎         | 2943/100000 [58:43<30:41:56,  1.14s/it]

28573 episode score is 688.02


  3%|▎         | 2944/100000 [58:44<30:45:53,  1.14s/it]

28583 episode score is 688.00


  3%|▎         | 2945/100000 [58:45<31:09:05,  1.16s/it]

28594 episode score is 649.23


  3%|▎         | 2946/100000 [58:46<30:37:30,  1.14s/it]

28604 episode score is 649.83


  3%|▎         | 2947/100000 [58:47<30:42:15,  1.14s/it]

28614 episode score is 690.22


  3%|▎         | 2948/100000 [58:48<30:47:36,  1.14s/it]

28624 episode score is 676.74


  3%|▎         | 2949/100000 [58:49<30:41:58,  1.14s/it]

28634 episode score is 665.04


  3%|▎         | 2950/100000 [58:51<31:06:58,  1.15s/it]

28644 episode score is 713.09


  3%|▎         | 2951/100000 [58:52<31:14:00,  1.16s/it]

28654 episode score is 708.70


  3%|▎         | 2952/100000 [58:53<31:14:15,  1.16s/it]

28664 episode score is 681.08


  3%|▎         | 2953/100000 [58:54<31:12:49,  1.16s/it]

28674 episode score is 699.84


  3%|▎         | 2954/100000 [58:55<30:50:09,  1.14s/it]

28684 episode score is 663.54


  3%|▎         | 2955/100000 [58:56<31:01:06,  1.15s/it]

28694 episode score is 698.09


  3%|▎         | 2956/100000 [58:58<31:05:09,  1.15s/it]

28704 episode score is 686.45


  3%|▎         | 2957/100000 [58:59<31:46:57,  1.18s/it]

28714 episode score is 696.99


  3%|▎         | 2958/100000 [59:00<31:56:23,  1.18s/it]

28725 episode score is 642.29


  3%|▎         | 2959/100000 [59:01<31:55:54,  1.18s/it]

28735 episode score is 706.93


  3%|▎         | 2960/100000 [59:02<32:05:52,  1.19s/it]

28745 episode score is 710.25


  3%|▎         | 2961/100000 [59:04<32:01:02,  1.19s/it]

28755 episode score is 714.31


  3%|▎         | 2962/100000 [59:05<31:58:26,  1.19s/it]

28765 episode score is 715.74


  3%|▎         | 2963/100000 [59:06<31:57:46,  1.19s/it]

28775 episode score is 711.37


  3%|▎         | 2964/100000 [59:07<31:24:00,  1.16s/it]

28784 episode score is 751.26


  3%|▎         | 2965/100000 [59:08<31:48:17,  1.18s/it]

28794 episode score is 728.27


  3%|▎         | 2966/100000 [59:09<31:46:12,  1.18s/it]

28804 episode score is 698.41


  3%|▎         | 2967/100000 [59:11<31:36:18,  1.17s/it]

28814 episode score is 692.87


  3%|▎         | 2968/100000 [59:12<31:06:32,  1.15s/it]

28823 episode score is 748.55


  3%|▎         | 2969/100000 [59:13<31:06:12,  1.15s/it]

28832 episode score is 752.98


  3%|▎         | 2970/100000 [59:14<31:34:00,  1.17s/it]

28841 episode score is 806.09


  3%|▎         | 2971/100000 [59:15<31:19:11,  1.16s/it]

28850 episode score is 754.75


  3%|▎         | 2972/100000 [59:16<31:09:43,  1.16s/it]

28859 episode score is 759.32


  3%|▎         | 2973/100000 [59:18<31:56:45,  1.19s/it]

28869 episode score is 733.09


  3%|▎         | 2974/100000 [59:19<31:36:09,  1.17s/it]

28878 episode score is 754.10


  3%|▎         | 2975/100000 [59:20<31:29:18,  1.17s/it]

28887 episode score is 767.96


  3%|▎         | 2976/100000 [59:21<31:04:52,  1.15s/it]

28896 episode score is 736.45


  3%|▎         | 2977/100000 [59:22<30:50:56,  1.14s/it]

28905 episode score is 734.70


  3%|▎         | 2978/100000 [59:23<30:48:44,  1.14s/it]

28914 episode score is 764.04


  3%|▎         | 2979/100000 [59:24<30:50:01,  1.14s/it]

28923 episode score is 766.30


  3%|▎         | 2980/100000 [59:26<30:37:04,  1.14s/it]

28932 episode score is 753.74


  3%|▎         | 2981/100000 [59:27<30:46:24,  1.14s/it]

28942 episode score is 700.83


  3%|▎         | 2982/100000 [59:28<31:21:22,  1.16s/it]

28952 episode score is 725.48


  3%|▎         | 2983/100000 [59:29<31:11:33,  1.16s/it]

28961 episode score is 770.14


  3%|▎         | 2984/100000 [59:30<31:17:19,  1.16s/it]

28971 episode score is 695.43


  3%|▎         | 2985/100000 [59:31<31:19:37,  1.16s/it]

28981 episode score is 701.64


  3%|▎         | 2986/100000 [59:33<31:35:21,  1.17s/it]

28991 episode score is 708.00


  3%|▎         | 2987/100000 [59:34<31:05:37,  1.15s/it]

29000 episode score is 752.15


  3%|▎         | 2988/100000 [59:35<30:43:48,  1.14s/it]

29009 episode score is 731.45


  3%|▎         | 2989/100000 [59:36<31:22:40,  1.16s/it]

29019 episode score is 738.27


  3%|▎         | 2990/100000 [59:37<31:31:32,  1.17s/it]

29029 episode score is 716.41


  3%|▎         | 2991/100000 [59:38<31:04:46,  1.15s/it]

29038 episode score is 752.64


  3%|▎         | 2992/100000 [59:39<30:57:44,  1.15s/it]

29047 episode score is 761.41


  3%|▎         | 2993/100000 [59:41<31:26:02,  1.17s/it]

29056 episode score is 794.11


  3%|▎         | 2994/100000 [59:42<31:23:20,  1.16s/it]

29065 episode score is 775.72


  3%|▎         | 2995/100000 [59:43<31:10:09,  1.16s/it]

29074 episode score is 771.71


  3%|▎         | 2996/100000 [59:44<30:55:53,  1.15s/it]

29083 episode score is 759.28


  3%|▎         | 2997/100000 [59:45<30:57:52,  1.15s/it]

29092 episode score is 768.14


  3%|▎         | 2998/100000 [59:46<30:42:32,  1.14s/it]

29101 episode score is 761.74


  3%|▎         | 2999/100000 [59:47<30:44:24,  1.14s/it]

29110 episode score is 752.02
29119 episode score is 820.26


  3%|▎         | 3000/100000 [59:50<41:09:49,  1.53s/it]

Iteration 3000: Average test reward: 832.47


  3%|▎         | 3001/100000 [59:51<38:44:18,  1.44s/it]

29127 episode score is 947.33


  3%|▎         | 3002/100000 [59:52<36:47:09,  1.37s/it]

29136 episode score is 820.93


  3%|▎         | 3003/100000 [59:54<35:32:12,  1.32s/it]

29145 episode score is 799.42


  3%|▎         | 3004/100000 [59:55<34:50:54,  1.29s/it]

29154 episode score is 840.82


  3%|▎         | 3005/100000 [59:56<34:28:49,  1.28s/it]

29163 episode score is 839.16


  3%|▎         | 3006/100000 [59:57<33:49:24,  1.26s/it]

29172 episode score is 825.74


  3%|▎         | 3007/100000 [59:58<33:51:45,  1.26s/it]

29181 episode score is 866.51


  3%|▎         | 3008/100000 [1:00:00<33:04:55,  1.23s/it]

29189 episode score is 887.40


  3%|▎         | 3009/100000 [1:00:01<33:01:09,  1.23s/it]

29198 episode score is 833.74


  3%|▎         | 3010/100000 [1:00:02<33:13:57,  1.23s/it]

29207 episode score is 836.09


  3%|▎         | 3011/100000 [1:00:03<33:10:03,  1.23s/it]

29216 episode score is 840.90


  3%|▎         | 3012/100000 [1:00:05<32:49:18,  1.22s/it]

29225 episode score is 803.71


  3%|▎         | 3013/100000 [1:00:06<32:45:07,  1.22s/it]

29234 episode score is 826.16


  3%|▎         | 3014/100000 [1:00:07<32:34:02,  1.21s/it]

29243 episode score is 833.55


  3%|▎         | 3015/100000 [1:00:08<32:31:47,  1.21s/it]

29252 episode score is 815.09


  3%|▎         | 3016/100000 [1:00:09<31:53:00,  1.18s/it]

29260 episode score is 874.14


  3%|▎         | 3017/100000 [1:00:10<32:12:51,  1.20s/it]

29269 episode score is 859.27


  3%|▎         | 3018/100000 [1:00:12<32:26:51,  1.20s/it]

29278 episode score is 842.16


  3%|▎         | 3019/100000 [1:00:13<33:33:51,  1.25s/it]

29287 episode score is 824.57


  3%|▎         | 3020/100000 [1:00:14<33:15:39,  1.23s/it]

29296 episode score is 821.83


  3%|▎         | 3021/100000 [1:00:15<32:56:07,  1.22s/it]

29305 episode score is 821.74


  3%|▎         | 3022/100000 [1:00:17<32:18:46,  1.20s/it]

29313 episode score is 886.36


  3%|▎         | 3023/100000 [1:00:18<32:43:57,  1.22s/it]

29322 episode score is 836.99


  3%|▎         | 3024/100000 [1:00:19<32:48:34,  1.22s/it]

29331 episode score is 830.93


  3%|▎         | 3025/100000 [1:00:20<32:10:46,  1.19s/it]

29339 episode score is 865.57


  3%|▎         | 3026/100000 [1:00:21<32:13:49,  1.20s/it]

29347 episode score is 931.43


  3%|▎         | 3027/100000 [1:00:23<31:40:25,  1.18s/it]

29355 episode score is 880.33


  3%|▎         | 3028/100000 [1:00:24<32:11:48,  1.20s/it]

29364 episode score is 849.72


  3%|▎         | 3029/100000 [1:00:25<32:07:15,  1.19s/it]

29373 episode score is 818.16


  3%|▎         | 3030/100000 [1:00:26<31:55:18,  1.19s/it]

29382 episode score is 806.95


  3%|▎         | 3031/100000 [1:00:27<32:09:12,  1.19s/it]

29391 episode score is 839.39


  3%|▎         | 3032/100000 [1:00:29<32:09:46,  1.19s/it]

29400 episode score is 815.87


  3%|▎         | 3033/100000 [1:00:30<31:30:04,  1.17s/it]

29408 episode score is 840.18


  3%|▎         | 3034/100000 [1:00:31<31:57:08,  1.19s/it]

29417 episode score is 831.50


  3%|▎         | 3035/100000 [1:00:32<31:29:26,  1.17s/it]

29425 episode score is 850.91


  3%|▎         | 3036/100000 [1:00:33<31:43:24,  1.18s/it]

29434 episode score is 827.38


  3%|▎         | 3037/100000 [1:00:34<31:48:22,  1.18s/it]

29444 episode score is 722.57


  3%|▎         | 3038/100000 [1:00:36<32:09:34,  1.19s/it]

29453 episode score is 835.85


  3%|▎         | 3039/100000 [1:00:37<32:13:17,  1.20s/it]

29462 episode score is 818.49


  3%|▎         | 3040/100000 [1:00:38<31:30:33,  1.17s/it]

29471 episode score is 759.30


  3%|▎         | 3041/100000 [1:00:39<31:52:57,  1.18s/it]

29480 episode score is 824.82


  3%|▎         | 3042/100000 [1:00:40<31:43:35,  1.18s/it]

29490 episode score is 698.88


  3%|▎         | 3043/100000 [1:00:41<31:17:21,  1.16s/it]

29499 episode score is 752.45


  3%|▎         | 3044/100000 [1:00:43<31:44:27,  1.18s/it]

29509 episode score is 730.32


  3%|▎         | 3045/100000 [1:00:44<31:42:50,  1.18s/it]

29518 episode score is 769.36


  3%|▎         | 3046/100000 [1:00:45<31:16:57,  1.16s/it]

29527 episode score is 752.07


  3%|▎         | 3047/100000 [1:00:46<31:47:29,  1.18s/it]

29537 episode score is 736.08


  3%|▎         | 3048/100000 [1:00:47<32:14:52,  1.20s/it]

29546 episode score is 775.35


  3%|▎         | 3049/100000 [1:00:49<31:54:29,  1.18s/it]

29555 episode score is 745.86


  3%|▎         | 3050/100000 [1:00:50<31:25:56,  1.17s/it]

29564 episode score is 722.13


  3%|▎         | 3051/100000 [1:00:51<31:06:32,  1.16s/it]

29573 episode score is 736.80


  3%|▎         | 3052/100000 [1:00:52<31:16:53,  1.16s/it]

29582 episode score is 759.80


  3%|▎         | 3053/100000 [1:00:53<31:25:39,  1.17s/it]

29591 episode score is 767.74


  3%|▎         | 3054/100000 [1:00:54<31:25:44,  1.17s/it]

29600 episode score is 765.78


  3%|▎         | 3055/100000 [1:00:56<31:29:59,  1.17s/it]

29609 episode score is 775.82


  3%|▎         | 3056/100000 [1:00:57<31:20:34,  1.16s/it]

29618 episode score is 756.81


  3%|▎         | 3057/100000 [1:00:58<31:14:06,  1.16s/it]

29627 episode score is 774.83


  3%|▎         | 3058/100000 [1:00:59<31:12:04,  1.16s/it]

29636 episode score is 764.09


  3%|▎         | 3059/100000 [1:01:00<31:12:46,  1.16s/it]

29645 episode score is 757.35


  3%|▎         | 3060/100000 [1:01:01<31:19:34,  1.16s/it]

29654 episode score is 773.81


  3%|▎         | 3061/100000 [1:01:03<31:30:21,  1.17s/it]

29663 episode score is 769.46


  3%|▎         | 3062/100000 [1:01:04<31:21:24,  1.16s/it]

29672 episode score is 768.74


  3%|▎         | 3063/100000 [1:01:05<31:12:32,  1.16s/it]

29681 episode score is 752.68


  3%|▎         | 3064/100000 [1:01:06<31:37:19,  1.17s/it]

29690 episode score is 796.35


  3%|▎         | 3065/100000 [1:01:07<32:37:33,  1.21s/it]

29699 episode score is 789.69


  3%|▎         | 3066/100000 [1:01:08<32:14:01,  1.20s/it]

29708 episode score is 765.63


  3%|▎         | 3067/100000 [1:01:10<32:09:32,  1.19s/it]

29717 episode score is 767.13


  3%|▎         | 3068/100000 [1:01:11<31:59:26,  1.19s/it]

29726 episode score is 786.07


  3%|▎         | 3069/100000 [1:01:12<32:05:56,  1.19s/it]

29735 episode score is 791.19


  3%|▎         | 3070/100000 [1:01:13<32:02:52,  1.19s/it]

29744 episode score is 788.35


  3%|▎         | 3071/100000 [1:01:14<31:51:00,  1.18s/it]

29753 episode score is 749.91


  3%|▎         | 3072/100000 [1:01:16<31:59:47,  1.19s/it]

29762 episode score is 786.92


  3%|▎         | 3073/100000 [1:01:17<32:03:10,  1.19s/it]

29771 episode score is 794.50


  3%|▎         | 3074/100000 [1:01:18<31:54:18,  1.19s/it]

29780 episode score is 770.30


  3%|▎         | 3075/100000 [1:01:19<31:49:28,  1.18s/it]

29789 episode score is 780.51


  3%|▎         | 3076/100000 [1:01:20<31:35:11,  1.17s/it]

29798 episode score is 753.38


  3%|▎         | 3077/100000 [1:01:21<31:29:48,  1.17s/it]

29807 episode score is 764.53


  3%|▎         | 3078/100000 [1:01:23<31:39:38,  1.18s/it]

29816 episode score is 773.06


  3%|▎         | 3079/100000 [1:01:24<31:29:05,  1.17s/it]

29825 episode score is 753.14


  3%|▎         | 3080/100000 [1:01:25<31:32:12,  1.17s/it]

29834 episode score is 777.38


  3%|▎         | 3081/100000 [1:01:26<31:17:31,  1.16s/it]

29843 episode score is 757.85


  3%|▎         | 3082/100000 [1:01:27<31:20:43,  1.16s/it]

29852 episode score is 784.27


  3%|▎         | 3083/100000 [1:01:28<31:25:35,  1.17s/it]

29861 episode score is 772.64


  3%|▎         | 3084/100000 [1:01:30<31:24:09,  1.17s/it]

29870 episode score is 765.52


  3%|▎         | 3085/100000 [1:01:31<31:13:11,  1.16s/it]

29879 episode score is 756.61


  3%|▎         | 3086/100000 [1:01:32<31:10:48,  1.16s/it]

29888 episode score is 731.26


  3%|▎         | 3087/100000 [1:01:33<31:45:27,  1.18s/it]

29897 episode score is 779.23


  3%|▎         | 3088/100000 [1:01:34<31:24:58,  1.17s/it]

29906 episode score is 761.31


  3%|▎         | 3089/100000 [1:01:35<31:10:57,  1.16s/it]

29915 episode score is 759.87


  3%|▎         | 3090/100000 [1:01:37<31:04:02,  1.15s/it]

29924 episode score is 781.04


  3%|▎         | 3091/100000 [1:01:38<31:03:28,  1.15s/it]

29933 episode score is 774.49


  3%|▎         | 3092/100000 [1:01:39<31:03:58,  1.15s/it]

29942 episode score is 773.88


  3%|▎         | 3093/100000 [1:01:40<31:33:14,  1.17s/it]

29951 episode score is 824.79


  3%|▎         | 3094/100000 [1:01:41<31:21:26,  1.16s/it]

29960 episode score is 770.74


  3%|▎         | 3095/100000 [1:01:42<31:08:46,  1.16s/it]

29969 episode score is 765.72


  3%|▎         | 3096/100000 [1:01:44<31:44:04,  1.18s/it]

29978 episode score is 812.35


  3%|▎         | 3097/100000 [1:01:45<31:54:14,  1.19s/it]

29987 episode score is 819.59


  3%|▎         | 3098/100000 [1:01:46<31:39:21,  1.18s/it]

29996 episode score is 780.26


  3%|▎         | 3099/100000 [1:01:47<31:35:44,  1.17s/it]

30005 episode score is 789.36
30014 episode score is 761.44


  3%|▎         | 3100/100000 [1:01:49<40:34:39,  1.51s/it]

Iteration 3100: Average test reward: 758.11


  3%|▎         | 3101/100000 [1:01:51<38:51:39,  1.44s/it]

30024 episode score is 771.22


  3%|▎         | 3102/100000 [1:01:52<36:41:38,  1.36s/it]

30033 episode score is 776.24


  3%|▎         | 3103/100000 [1:01:53<34:57:58,  1.30s/it]

30042 episode score is 772.89


  3%|▎         | 3104/100000 [1:01:54<34:01:31,  1.26s/it]

30051 episode score is 803.48


  3%|▎         | 3105/100000 [1:01:55<33:43:24,  1.25s/it]

30061 episode score is 746.41


  3%|▎         | 3106/100000 [1:01:57<33:00:06,  1.23s/it]

30071 episode score is 706.64


  3%|▎         | 3107/100000 [1:01:58<32:54:50,  1.22s/it]

30081 episode score is 747.12


  3%|▎         | 3108/100000 [1:01:59<32:05:44,  1.19s/it]

30090 episode score is 737.43


  3%|▎         | 3109/100000 [1:02:00<32:16:07,  1.20s/it]

30100 episode score is 740.52


  3%|▎         | 3110/100000 [1:02:01<31:38:20,  1.18s/it]

30109 episode score is 739.19


  3%|▎         | 3111/100000 [1:02:03<32:00:16,  1.19s/it]

30119 episode score is 728.73


  3%|▎         | 3112/100000 [1:02:04<31:59:14,  1.19s/it]

30128 episode score is 798.65


  3%|▎         | 3113/100000 [1:02:05<31:25:09,  1.17s/it]

30138 episode score is 668.13


  3%|▎         | 3114/100000 [1:02:06<31:01:13,  1.15s/it]

30147 episode score is 739.03


  3%|▎         | 3115/100000 [1:02:07<30:54:47,  1.15s/it]

30156 episode score is 754.60


  3%|▎         | 3116/100000 [1:02:08<31:19:57,  1.16s/it]

30166 episode score is 724.71


  3%|▎         | 3117/100000 [1:02:09<31:46:38,  1.18s/it]

30175 episode score is 815.35


  3%|▎         | 3118/100000 [1:02:11<31:45:53,  1.18s/it]

30184 episode score is 794.36


  3%|▎         | 3119/100000 [1:02:12<32:18:48,  1.20s/it]

30194 episode score is 742.86


  3%|▎         | 3120/100000 [1:02:13<31:49:06,  1.18s/it]

30203 episode score is 768.47


  3%|▎         | 3121/100000 [1:02:14<31:22:31,  1.17s/it]

30212 episode score is 758.11


  3%|▎         | 3122/100000 [1:02:15<31:19:50,  1.16s/it]

30221 episode score is 783.68


  3%|▎         | 3123/100000 [1:02:17<31:27:10,  1.17s/it]

30231 episode score is 704.39


  3%|▎         | 3124/100000 [1:02:18<31:05:31,  1.16s/it]

30239 episode score is 844.77


  3%|▎         | 3125/100000 [1:02:19<31:28:07,  1.17s/it]

30248 episode score is 813.97


  3%|▎         | 3126/100000 [1:02:20<31:41:29,  1.18s/it]

30258 episode score is 726.24


  3%|▎         | 3127/100000 [1:02:21<32:04:38,  1.19s/it]

30268 episode score is 732.31


  3%|▎         | 3128/100000 [1:02:22<31:42:55,  1.18s/it]

30278 episode score is 679.54


  3%|▎         | 3129/100000 [1:02:24<31:32:27,  1.17s/it]

30287 episode score is 807.39


  3%|▎         | 3130/100000 [1:02:25<31:03:24,  1.15s/it]

30296 episode score is 755.23


  3%|▎         | 3131/100000 [1:02:26<31:22:02,  1.17s/it]

30305 episode score is 802.50


  3%|▎         | 3132/100000 [1:02:27<31:50:07,  1.18s/it]

30315 episode score is 738.10


  3%|▎         | 3133/100000 [1:02:28<31:49:23,  1.18s/it]

30325 episode score is 714.52


  3%|▎         | 3134/100000 [1:02:30<32:48:56,  1.22s/it]

30334 episode score is 881.30


  3%|▎         | 3135/100000 [1:02:31<31:59:30,  1.19s/it]

30343 episode score is 752.75


  3%|▎         | 3136/100000 [1:02:32<32:30:57,  1.21s/it]

30353 episode score is 729.75


  3%|▎         | 3137/100000 [1:02:33<31:55:53,  1.19s/it]

30362 episode score is 760.08


  3%|▎         | 3138/100000 [1:02:34<32:13:52,  1.20s/it]

30372 episode score is 706.96


  3%|▎         | 3139/100000 [1:02:36<32:10:45,  1.20s/it]

30382 episode score is 723.29


  3%|▎         | 3140/100000 [1:02:37<31:53:43,  1.19s/it]

30392 episode score is 704.13


  3%|▎         | 3141/100000 [1:02:38<31:38:28,  1.18s/it]

30402 episode score is 695.25


  3%|▎         | 3142/100000 [1:02:39<32:23:44,  1.20s/it]

30412 episode score is 699.33


  3%|▎         | 3143/100000 [1:02:40<31:56:54,  1.19s/it]

30422 episode score is 694.63


  3%|▎         | 3144/100000 [1:02:41<31:21:18,  1.17s/it]

30432 episode score is 664.57


  3%|▎         | 3145/100000 [1:02:43<31:12:02,  1.16s/it]

30442 episode score is 688.20


  3%|▎         | 3146/100000 [1:02:44<30:53:41,  1.15s/it]

30452 episode score is 663.13


  3%|▎         | 3147/100000 [1:02:45<31:18:44,  1.16s/it]

30462 episode score is 729.47


  3%|▎         | 3148/100000 [1:02:46<31:04:16,  1.15s/it]

30472 episode score is 671.70


  3%|▎         | 3149/100000 [1:02:47<31:08:00,  1.16s/it]

30482 episode score is 680.42


  3%|▎         | 3150/100000 [1:02:48<31:35:17,  1.17s/it]

30492 episode score is 734.96


  3%|▎         | 3151/100000 [1:02:50<31:48:27,  1.18s/it]

30502 episode score is 734.89


  3%|▎         | 3152/100000 [1:02:51<31:25:44,  1.17s/it]

30512 episode score is 692.58


  3%|▎         | 3153/100000 [1:02:52<31:32:01,  1.17s/it]

30522 episode score is 721.54


  3%|▎         | 3154/100000 [1:02:53<31:05:41,  1.16s/it]

30532 episode score is 674.04


  3%|▎         | 3155/100000 [1:02:54<31:01:21,  1.15s/it]

30542 episode score is 688.37


  3%|▎         | 3156/100000 [1:02:55<31:25:34,  1.17s/it]

30553 episode score is 651.23


  3%|▎         | 3157/100000 [1:02:57<31:46:32,  1.18s/it]

30563 episode score is 762.37


  3%|▎         | 3158/100000 [1:02:58<31:01:20,  1.15s/it]

30573 episode score is 669.33


  3%|▎         | 3159/100000 [1:02:59<30:34:33,  1.14s/it]

30582 episode score is 742.08


  3%|▎         | 3160/100000 [1:03:00<30:45:21,  1.14s/it]

30592 episode score is 690.43


  3%|▎         | 3161/100000 [1:03:01<30:23:35,  1.13s/it]

30601 episode score is 732.09


  3%|▎         | 3162/100000 [1:03:02<31:02:15,  1.15s/it]

30611 episode score is 731.45


  3%|▎         | 3163/100000 [1:03:03<30:42:23,  1.14s/it]

30620 episode score is 753.54


  3%|▎         | 3164/100000 [1:03:04<31:03:09,  1.15s/it]

30630 episode score is 729.66


  3%|▎         | 3165/100000 [1:03:06<31:27:00,  1.17s/it]

30640 episode score is 735.66


  3%|▎         | 3166/100000 [1:03:07<31:04:13,  1.16s/it]

30650 episode score is 682.94


  3%|▎         | 3167/100000 [1:03:08<31:37:44,  1.18s/it]

30660 episode score is 757.43


  3%|▎         | 3168/100000 [1:03:09<31:05:20,  1.16s/it]

30669 episode score is 758.77


  3%|▎         | 3169/100000 [1:03:10<31:28:37,  1.17s/it]

30679 episode score is 734.15


  3%|▎         | 3170/100000 [1:03:11<31:07:13,  1.16s/it]

30688 episode score is 778.10


  3%|▎         | 3171/100000 [1:03:13<30:44:33,  1.14s/it]

30697 episode score is 749.53


  3%|▎         | 3172/100000 [1:03:14<30:21:32,  1.13s/it]

30706 episode score is 738.52


  3%|▎         | 3173/100000 [1:03:15<30:30:50,  1.13s/it]

30715 episode score is 780.16


  3%|▎         | 3174/100000 [1:03:16<30:59:32,  1.15s/it]

30725 episode score is 731.48


  3%|▎         | 3175/100000 [1:03:17<30:56:02,  1.15s/it]

30735 episode score is 690.11


  3%|▎         | 3176/100000 [1:03:18<31:03:53,  1.16s/it]

30745 episode score is 708.01


  3%|▎         | 3177/100000 [1:03:19<30:54:43,  1.15s/it]

30754 episode score is 751.61


  3%|▎         | 3178/100000 [1:03:21<31:07:50,  1.16s/it]

30764 episode score is 722.92


  3%|▎         | 3179/100000 [1:03:22<31:23:28,  1.17s/it]

30774 episode score is 723.57


  3%|▎         | 3180/100000 [1:03:23<30:53:47,  1.15s/it]

30783 episode score is 735.73


  3%|▎         | 3181/100000 [1:03:24<31:34:38,  1.17s/it]

30793 episode score is 747.55


  3%|▎         | 3182/100000 [1:03:25<31:12:53,  1.16s/it]

30802 episode score is 760.14


  3%|▎         | 3183/100000 [1:03:26<31:24:24,  1.17s/it]

30812 episode score is 725.51


  3%|▎         | 3184/100000 [1:03:28<31:46:58,  1.18s/it]

30823 episode score is 667.80


  3%|▎         | 3185/100000 [1:03:29<31:56:39,  1.19s/it]

30833 episode score is 725.36


  3%|▎         | 3186/100000 [1:03:30<32:03:52,  1.19s/it]

30843 episode score is 736.01


  3%|▎         | 3187/100000 [1:03:31<32:01:39,  1.19s/it]

30853 episode score is 726.56


  3%|▎         | 3188/100000 [1:03:33<32:25:48,  1.21s/it]

30863 episode score is 734.32


  3%|▎         | 3189/100000 [1:03:34<32:08:45,  1.20s/it]

30873 episode score is 705.93


  3%|▎         | 3190/100000 [1:03:35<31:28:05,  1.17s/it]

30882 episode score is 756.14


  3%|▎         | 3191/100000 [1:03:36<31:24:59,  1.17s/it]

30892 episode score is 711.64


  3%|▎         | 3192/100000 [1:03:37<31:13:11,  1.16s/it]

30901 episode score is 759.52


  3%|▎         | 3193/100000 [1:03:38<31:35:56,  1.18s/it]

30911 episode score is 732.40


  3%|▎         | 3194/100000 [1:03:39<31:01:41,  1.15s/it]

30920 episode score is 743.97


  3%|▎         | 3195/100000 [1:03:41<30:37:37,  1.14s/it]

30929 episode score is 730.14


  3%|▎         | 3196/100000 [1:03:42<31:14:28,  1.16s/it]

30939 episode score is 702.02


  3%|▎         | 3197/100000 [1:03:43<31:25:24,  1.17s/it]

30948 episode score is 772.08


  3%|▎         | 3198/100000 [1:03:44<31:28:11,  1.17s/it]

30957 episode score is 776.60


  3%|▎         | 3199/100000 [1:03:45<31:29:22,  1.17s/it]

30966 episode score is 779.40
30975 episode score is 758.90


  3%|▎         | 3200/100000 [1:03:48<41:23:17,  1.54s/it]

Iteration 3200: Average test reward: 769.61


  3%|▎         | 3201/100000 [1:03:49<38:16:09,  1.42s/it]

30984 episode score is 754.87


  3%|▎         | 3202/100000 [1:03:50<36:25:20,  1.35s/it]

30993 episode score is 794.21


  3%|▎         | 3203/100000 [1:03:51<34:24:19,  1.28s/it]

31002 episode score is 725.59


  3%|▎         | 3204/100000 [1:03:52<33:05:25,  1.23s/it]

31011 episode score is 720.52


  3%|▎         | 3205/100000 [1:03:53<32:32:36,  1.21s/it]

31020 episode score is 775.97


  3%|▎         | 3206/100000 [1:03:55<31:39:13,  1.18s/it]

31029 episode score is 735.14


  3%|▎         | 3207/100000 [1:03:56<31:35:06,  1.17s/it]

31038 episode score is 779.88


  3%|▎         | 3208/100000 [1:03:57<31:24:14,  1.17s/it]

31047 episode score is 771.90


  3%|▎         | 3209/100000 [1:03:58<31:15:53,  1.16s/it]

31056 episode score is 767.28


  3%|▎         | 3210/100000 [1:03:59<31:13:27,  1.16s/it]

31065 episode score is 785.91


  3%|▎         | 3211/100000 [1:04:00<31:40:49,  1.18s/it]

31074 episode score is 815.90


  3%|▎         | 3212/100000 [1:04:02<31:59:26,  1.19s/it]

31083 episode score is 799.39


  3%|▎         | 3213/100000 [1:04:03<31:50:57,  1.18s/it]

31092 episode score is 762.95


  3%|▎         | 3214/100000 [1:04:04<31:17:41,  1.16s/it]

31101 episode score is 736.45


  3%|▎         | 3215/100000 [1:04:05<31:10:11,  1.16s/it]

31110 episode score is 764.74


  3%|▎         | 3216/100000 [1:04:06<31:17:31,  1.16s/it]

31119 episode score is 772.15


  3%|▎         | 3217/100000 [1:04:07<30:48:53,  1.15s/it]

31128 episode score is 714.09


  3%|▎         | 3218/100000 [1:04:08<31:03:56,  1.16s/it]

31137 episode score is 769.02


  3%|▎         | 3219/100000 [1:04:10<30:54:13,  1.15s/it]

31146 episode score is 743.65


  3%|▎         | 3220/100000 [1:04:11<30:48:03,  1.15s/it]

31155 episode score is 739.97


  3%|▎         | 3221/100000 [1:04:12<30:44:58,  1.14s/it]

31164 episode score is 739.07


  3%|▎         | 3222/100000 [1:04:13<30:46:27,  1.14s/it]

31173 episode score is 735.45


  3%|▎         | 3223/100000 [1:04:14<31:14:49,  1.16s/it]

31183 episode score is 689.44


  3%|▎         | 3224/100000 [1:04:15<31:33:47,  1.17s/it]

31193 episode score is 701.27


  3%|▎         | 3225/100000 [1:04:17<31:29:03,  1.17s/it]

31203 episode score is 664.20


  3%|▎         | 3226/100000 [1:04:18<31:29:00,  1.17s/it]

31212 episode score is 730.39


  3%|▎         | 3227/100000 [1:04:19<31:07:04,  1.16s/it]

31221 episode score is 724.02


  3%|▎         | 3228/100000 [1:04:20<31:19:34,  1.17s/it]

31231 episode score is 687.41


  3%|▎         | 3229/100000 [1:04:21<31:06:01,  1.16s/it]

31240 episode score is 758.57


  3%|▎         | 3230/100000 [1:04:22<30:53:46,  1.15s/it]

31249 episode score is 734.39


  3%|▎         | 3231/100000 [1:04:24<30:52:09,  1.15s/it]

31258 episode score is 764.03


  3%|▎         | 3232/100000 [1:04:25<30:51:10,  1.15s/it]

31267 episode score is 751.93


  3%|▎         | 3233/100000 [1:04:26<30:51:27,  1.15s/it]

31276 episode score is 772.09


  3%|▎         | 3234/100000 [1:04:27<30:30:58,  1.14s/it]

31285 episode score is 725.69


  3%|▎         | 3235/100000 [1:04:28<30:36:57,  1.14s/it]

31294 episode score is 771.80


  3%|▎         | 3236/100000 [1:04:29<30:25:32,  1.13s/it]

31303 episode score is 744.01


  3%|▎         | 3237/100000 [1:04:30<30:31:23,  1.14s/it]

31312 episode score is 752.81


  3%|▎         | 3238/100000 [1:04:31<30:40:39,  1.14s/it]

31321 episode score is 769.72


  3%|▎         | 3239/100000 [1:04:33<31:18:09,  1.16s/it]

31330 episode score is 801.26


  3%|▎         | 3240/100000 [1:04:34<31:02:44,  1.16s/it]

31339 episode score is 748.58


  3%|▎         | 3241/100000 [1:04:35<30:52:58,  1.15s/it]

31348 episode score is 753.74


  3%|▎         | 3242/100000 [1:04:36<30:50:04,  1.15s/it]

31357 episode score is 750.85


  3%|▎         | 3243/100000 [1:04:37<31:11:41,  1.16s/it]

31366 episode score is 792.38


  3%|▎         | 3244/100000 [1:04:38<30:52:23,  1.15s/it]

31374 episode score is 858.00


  3%|▎         | 3245/100000 [1:04:40<30:39:11,  1.14s/it]

31383 episode score is 731.56


  3%|▎         | 3246/100000 [1:04:41<30:39:40,  1.14s/it]

31391 episode score is 859.61


  3%|▎         | 3247/100000 [1:04:42<31:14:36,  1.16s/it]

31400 episode score is 805.81


  3%|▎         | 3248/100000 [1:04:43<31:33:05,  1.17s/it]

31409 episode score is 785.72


  3%|▎         | 3249/100000 [1:04:44<32:13:01,  1.20s/it]

31418 episode score is 838.74


  3%|▎         | 3250/100000 [1:04:45<31:49:08,  1.18s/it]

31426 episode score is 853.30


  3%|▎         | 3251/100000 [1:04:47<32:06:40,  1.19s/it]

31436 episode score is 721.61


  3%|▎         | 3252/100000 [1:04:48<32:03:01,  1.19s/it]

31445 episode score is 778.53


  3%|▎         | 3253/100000 [1:04:49<31:49:12,  1.18s/it]

31455 episode score is 674.78


  3%|▎         | 3254/100000 [1:04:50<31:58:22,  1.19s/it]

31465 episode score is 706.39


  3%|▎         | 3255/100000 [1:04:51<31:19:05,  1.17s/it]

31474 episode score is 744.50


  3%|▎         | 3256/100000 [1:04:52<30:55:23,  1.15s/it]

31483 episode score is 723.89


  3%|▎         | 3257/100000 [1:04:54<30:26:11,  1.13s/it]

31492 episode score is 721.44


  3%|▎         | 3258/100000 [1:04:55<31:07:08,  1.16s/it]

31501 episode score is 746.03


  3%|▎         | 3259/100000 [1:04:56<31:00:52,  1.15s/it]

31511 episode score is 672.03


  3%|▎         | 3260/100000 [1:04:57<31:00:36,  1.15s/it]

31521 episode score is 699.39


  3%|▎         | 3261/100000 [1:04:58<31:08:26,  1.16s/it]

31531 episode score is 714.77


  3%|▎         | 3262/100000 [1:04:59<31:22:48,  1.17s/it]

31541 episode score is 724.43


  3%|▎         | 3263/100000 [1:05:01<31:17:18,  1.16s/it]

31550 episode score is 785.92


  3%|▎         | 3264/100000 [1:05:02<31:22:18,  1.17s/it]

31559 episode score is 810.12


  3%|▎         | 3265/100000 [1:05:03<31:37:49,  1.18s/it]

31569 episode score is 710.63


  3%|▎         | 3266/100000 [1:05:04<31:34:18,  1.17s/it]

31579 episode score is 701.21


  3%|▎         | 3267/100000 [1:05:05<31:06:53,  1.16s/it]

31588 episode score is 753.60


  3%|▎         | 3268/100000 [1:05:06<31:18:06,  1.16s/it]

31598 episode score is 705.84


  3%|▎         | 3269/100000 [1:05:08<31:09:14,  1.16s/it]

31608 episode score is 699.27


  3%|▎         | 3270/100000 [1:05:09<30:56:38,  1.15s/it]

31618 episode score is 678.43


  3%|▎         | 3271/100000 [1:05:10<31:00:56,  1.15s/it]

31628 episode score is 713.86


  3%|▎         | 3272/100000 [1:05:11<30:33:13,  1.14s/it]

31638 episode score is 659.68


  3%|▎         | 3273/100000 [1:05:12<30:32:26,  1.14s/it]

31648 episode score is 695.38


  3%|▎         | 3274/100000 [1:05:13<31:05:46,  1.16s/it]

31658 episode score is 730.97


  3%|▎         | 3275/100000 [1:05:14<30:35:18,  1.14s/it]

31668 episode score is 659.13


  3%|▎         | 3276/100000 [1:05:16<30:09:57,  1.12s/it]

31677 episode score is 737.84


  3%|▎         | 3277/100000 [1:05:17<30:03:21,  1.12s/it]

31686 episode score is 759.62


  3%|▎         | 3278/100000 [1:05:18<30:39:52,  1.14s/it]

31696 episode score is 721.67


  3%|▎         | 3279/100000 [1:05:19<30:47:46,  1.15s/it]

31706 episode score is 702.90


  3%|▎         | 3280/100000 [1:05:20<31:10:33,  1.16s/it]

31716 episode score is 727.42


  3%|▎         | 3281/100000 [1:05:21<31:10:46,  1.16s/it]

31726 episode score is 708.19


  3%|▎         | 3282/100000 [1:05:23<31:23:45,  1.17s/it]

31735 episode score is 803.96


  3%|▎         | 3283/100000 [1:05:24<31:31:17,  1.17s/it]

31745 episode score is 722.27


  3%|▎         | 3284/100000 [1:05:25<31:07:11,  1.16s/it]

31754 episode score is 761.13


  3%|▎         | 3285/100000 [1:05:26<30:51:34,  1.15s/it]

31763 episode score is 760.51


  3%|▎         | 3286/100000 [1:05:27<30:48:48,  1.15s/it]

31772 episode score is 780.04


  3%|▎         | 3287/100000 [1:05:28<31:02:29,  1.16s/it]

31782 episode score is 707.21


  3%|▎         | 3288/100000 [1:05:29<31:14:14,  1.16s/it]

31792 episode score is 694.67


  3%|▎         | 3289/100000 [1:05:31<31:33:25,  1.17s/it]

31802 episode score is 730.98


  3%|▎         | 3290/100000 [1:05:32<31:53:40,  1.19s/it]

31812 episode score is 719.91


  3%|▎         | 3291/100000 [1:05:33<31:31:26,  1.17s/it]

31821 episode score is 777.92


  3%|▎         | 3292/100000 [1:05:34<31:03:07,  1.16s/it]

31830 episode score is 753.70


  3%|▎         | 3293/100000 [1:05:35<31:08:41,  1.16s/it]

31840 episode score is 698.91


  3%|▎         | 3294/100000 [1:05:37<31:32:16,  1.17s/it]

31850 episode score is 727.50


  3%|▎         | 3295/100000 [1:05:38<31:48:56,  1.18s/it]

31860 episode score is 735.14


  3%|▎         | 3296/100000 [1:05:39<31:44:54,  1.18s/it]

31870 episode score is 700.42


  3%|▎         | 3297/100000 [1:05:40<31:25:33,  1.17s/it]

31879 episode score is 781.26


  3%|▎         | 3298/100000 [1:05:41<31:02:40,  1.16s/it]

31889 episode score is 681.10


  3%|▎         | 3299/100000 [1:05:42<31:25:59,  1.17s/it]

31899 episode score is 729.83
31909 episode score is 694.92


  3%|▎         | 3300/100000 [1:05:45<39:25:06,  1.47s/it]

Iteration 3300: Average test reward: 708.94


  3%|▎         | 3301/100000 [1:05:46<36:28:29,  1.36s/it]

31918 episode score is 764.73


  3%|▎         | 3302/100000 [1:05:47<34:25:41,  1.28s/it]

31927 episode score is 744.22


  3%|▎         | 3303/100000 [1:05:48<33:51:05,  1.26s/it]

31937 episode score is 712.06


  3%|▎         | 3304/100000 [1:05:49<32:34:10,  1.21s/it]

31946 episode score is 741.06


  3%|▎         | 3305/100000 [1:05:50<31:39:17,  1.18s/it]

31955 episode score is 735.38


  3%|▎         | 3306/100000 [1:05:51<31:22:08,  1.17s/it]

31965 episode score is 686.01


  3%|▎         | 3307/100000 [1:05:52<30:59:40,  1.15s/it]

31975 episode score is 671.52


  3%|▎         | 3308/100000 [1:05:54<30:48:56,  1.15s/it]

31985 episode score is 680.22


  3%|▎         | 3309/100000 [1:05:55<30:37:47,  1.14s/it]

31995 episode score is 687.15


  3%|▎         | 3310/100000 [1:05:56<30:42:02,  1.14s/it]

32005 episode score is 697.66


  3%|▎         | 3311/100000 [1:05:57<31:16:58,  1.16s/it]

32015 episode score is 740.05


  3%|▎         | 3312/100000 [1:05:58<31:23:29,  1.17s/it]

32025 episode score is 723.26


  3%|▎         | 3313/100000 [1:05:59<31:30:32,  1.17s/it]

32034 episode score is 802.67


  3%|▎         | 3314/100000 [1:06:01<31:16:23,  1.16s/it]

32044 episode score is 688.40


  3%|▎         | 3315/100000 [1:06:02<31:15:18,  1.16s/it]

32054 episode score is 697.15


  3%|▎         | 3316/100000 [1:06:03<31:24:33,  1.17s/it]

32064 episode score is 707.92


  3%|▎         | 3317/100000 [1:06:04<30:51:19,  1.15s/it]

32074 episode score is 655.81


  3%|▎         | 3318/100000 [1:06:05<31:03:02,  1.16s/it]

32084 episode score is 707.80


  3%|▎         | 3319/100000 [1:06:06<31:13:20,  1.16s/it]

32094 episode score is 702.73


  3%|▎         | 3320/100000 [1:06:07<31:20:04,  1.17s/it]

32103 episode score is 790.08


  3%|▎         | 3321/100000 [1:06:09<31:09:20,  1.16s/it]

32113 episode score is 683.28


  3%|▎         | 3322/100000 [1:06:10<31:15:52,  1.16s/it]

32123 episode score is 714.23


  3%|▎         | 3323/100000 [1:06:11<30:51:28,  1.15s/it]

32133 episode score is 664.55


  3%|▎         | 3324/100000 [1:06:12<31:06:23,  1.16s/it]

32143 episode score is 698.71


  3%|▎         | 3325/100000 [1:06:13<32:21:22,  1.20s/it]

32153 episode score is 746.53


  3%|▎         | 3326/100000 [1:06:15<31:45:41,  1.18s/it]

32163 episode score is 693.36


  3%|▎         | 3327/100000 [1:06:16<31:01:39,  1.16s/it]

32172 episode score is 746.58


  3%|▎         | 3328/100000 [1:06:17<31:22:41,  1.17s/it]

32182 episode score is 733.39


  3%|▎         | 3329/100000 [1:06:18<31:36:31,  1.18s/it]

32191 episode score is 803.69


  3%|▎         | 3330/100000 [1:06:19<31:03:24,  1.16s/it]

32200 episode score is 743.38


  3%|▎         | 3331/100000 [1:06:20<31:29:46,  1.17s/it]

32210 episode score is 729.67


  3%|▎         | 3332/100000 [1:06:21<31:12:08,  1.16s/it]

32220 episode score is 696.99


  3%|▎         | 3333/100000 [1:06:23<31:24:57,  1.17s/it]

32230 episode score is 714.25


  3%|▎         | 3334/100000 [1:06:24<30:58:00,  1.15s/it]

32240 episode score is 673.77


  3%|▎         | 3335/100000 [1:06:25<30:33:03,  1.14s/it]

32249 episode score is 741.43


  3%|▎         | 3336/100000 [1:06:26<31:04:32,  1.16s/it]

32260 episode score is 662.27


  3%|▎         | 3337/100000 [1:06:27<31:17:52,  1.17s/it]

32270 episode score is 712.48


  3%|▎         | 3338/100000 [1:06:28<31:17:50,  1.17s/it]

32280 episode score is 700.01


  3%|▎         | 3339/100000 [1:06:30<31:30:55,  1.17s/it]

32290 episode score is 731.59


  3%|▎         | 3340/100000 [1:06:31<31:10:00,  1.16s/it]

32300 episode score is 691.08


  3%|▎         | 3341/100000 [1:06:32<30:54:23,  1.15s/it]

32309 episode score is 772.52


  3%|▎         | 3342/100000 [1:06:33<30:58:27,  1.15s/it]

32319 episode score is 701.13


  3%|▎         | 3343/100000 [1:06:34<30:27:18,  1.13s/it]

32328 episode score is 751.82


  3%|▎         | 3344/100000 [1:06:35<30:48:06,  1.15s/it]

32338 episode score is 723.96


  3%|▎         | 3345/100000 [1:06:36<30:25:10,  1.13s/it]

32347 episode score is 760.65


  3%|▎         | 3346/100000 [1:06:38<30:14:24,  1.13s/it]

32356 episode score is 756.28


  3%|▎         | 3347/100000 [1:06:39<30:11:41,  1.12s/it]

32365 episode score is 758.68


  3%|▎         | 3348/100000 [1:06:40<30:55:48,  1.15s/it]

32375 episode score is 751.47


  3%|▎         | 3349/100000 [1:06:41<31:06:20,  1.16s/it]

32385 episode score is 717.05


  3%|▎         | 3350/100000 [1:06:42<31:00:59,  1.16s/it]

32395 episode score is 690.83


  3%|▎         | 3351/100000 [1:06:43<31:57:10,  1.19s/it]

32405 episode score is 769.51


  3%|▎         | 3352/100000 [1:06:45<31:24:16,  1.17s/it]

32414 episode score is 755.56


  3%|▎         | 3353/100000 [1:06:46<31:27:51,  1.17s/it]

32424 episode score is 707.72


  3%|▎         | 3354/100000 [1:06:47<31:34:21,  1.18s/it]

32434 episode score is 713.59


  3%|▎         | 3355/100000 [1:06:48<31:28:54,  1.17s/it]

32444 episode score is 695.37


  3%|▎         | 3356/100000 [1:06:49<31:12:05,  1.16s/it]

32453 episode score is 764.37


  3%|▎         | 3357/100000 [1:06:50<30:38:36,  1.14s/it]

32462 episode score is 743.20


  3%|▎         | 3358/100000 [1:06:51<30:18:31,  1.13s/it]

32471 episode score is 759.91


  3%|▎         | 3359/100000 [1:06:53<30:30:07,  1.14s/it]

32480 episode score is 777.52


  3%|▎         | 3360/100000 [1:06:54<30:35:43,  1.14s/it]

32489 episode score is 768.78


  3%|▎         | 3361/100000 [1:06:55<30:26:34,  1.13s/it]

32498 episode score is 763.70


  3%|▎         | 3362/100000 [1:06:56<30:53:12,  1.15s/it]

32508 episode score is 729.68


  3%|▎         | 3363/100000 [1:06:57<30:34:26,  1.14s/it]

32517 episode score is 757.39


  3%|▎         | 3364/100000 [1:06:58<30:27:01,  1.13s/it]

32526 episode score is 755.27


  3%|▎         | 3365/100000 [1:07:00<31:14:34,  1.16s/it]

32535 episode score is 813.00


  3%|▎         | 3366/100000 [1:07:01<31:32:28,  1.18s/it]

32544 episode score is 819.56


  3%|▎         | 3367/100000 [1:07:02<31:51:35,  1.19s/it]

32553 episode score is 811.57


  3%|▎         | 3368/100000 [1:07:03<31:57:52,  1.19s/it]

32562 episode score is 810.72


  3%|▎         | 3369/100000 [1:07:04<31:22:33,  1.17s/it]

32571 episode score is 747.64


  3%|▎         | 3370/100000 [1:07:05<30:46:00,  1.15s/it]

32579 episode score is 861.10


  3%|▎         | 3371/100000 [1:07:06<30:25:39,  1.13s/it]

32588 episode score is 745.62


  3%|▎         | 3372/100000 [1:07:08<30:40:06,  1.14s/it]

32597 episode score is 802.27


  3%|▎         | 3373/100000 [1:07:09<30:39:01,  1.14s/it]

32606 episode score is 766.61


  3%|▎         | 3374/100000 [1:07:10<30:41:52,  1.14s/it]

32616 episode score is 661.36


  3%|▎         | 3375/100000 [1:07:11<30:54:27,  1.15s/it]

32625 episode score is 787.44


  3%|▎         | 3376/100000 [1:07:12<30:33:02,  1.14s/it]

32634 episode score is 743.46


  3%|▎         | 3377/100000 [1:07:13<30:29:33,  1.14s/it]

32643 episode score is 766.64


  3%|▎         | 3378/100000 [1:07:14<30:25:26,  1.13s/it]

32652 episode score is 751.54


  3%|▎         | 3379/100000 [1:07:16<30:06:52,  1.12s/it]

32661 episode score is 743.27


  3%|▎         | 3380/100000 [1:07:17<30:08:51,  1.12s/it]

32670 episode score is 765.37


  3%|▎         | 3381/100000 [1:07:18<30:30:18,  1.14s/it]

32680 episode score is 681.95


  3%|▎         | 3382/100000 [1:07:19<31:14:57,  1.16s/it]

32689 episode score is 820.91


  3%|▎         | 3383/100000 [1:07:20<31:14:53,  1.16s/it]

32698 episode score is 785.82


  3%|▎         | 3384/100000 [1:07:21<31:10:34,  1.16s/it]

32707 episode score is 775.03


  3%|▎         | 3385/100000 [1:07:22<30:43:50,  1.15s/it]

32716 episode score is 740.06


  3%|▎         | 3386/100000 [1:07:24<30:44:47,  1.15s/it]

32725 episode score is 771.43


  3%|▎         | 3387/100000 [1:07:25<30:24:33,  1.13s/it]

32733 episode score is 838.40


  3%|▎         | 3388/100000 [1:07:26<30:22:42,  1.13s/it]

32742 episode score is 736.41


  3%|▎         | 3389/100000 [1:07:27<30:57:14,  1.15s/it]

32751 episode score is 822.83


  3%|▎         | 3390/100000 [1:07:28<30:33:58,  1.14s/it]

32759 episode score is 835.10


  3%|▎         | 3391/100000 [1:07:29<30:58:56,  1.15s/it]

32768 episode score is 822.05


  3%|▎         | 3392/100000 [1:07:31<31:46:37,  1.18s/it]

32777 episode score is 789.31


  3%|▎         | 3393/100000 [1:07:32<31:15:22,  1.16s/it]

32786 episode score is 746.24


  3%|▎         | 3394/100000 [1:07:33<31:02:38,  1.16s/it]

32794 episode score is 858.91


  3%|▎         | 3395/100000 [1:07:34<31:24:59,  1.17s/it]

32803 episode score is 820.24


  3%|▎         | 3396/100000 [1:07:35<30:51:13,  1.15s/it]

32811 episode score is 859.05


  3%|▎         | 3397/100000 [1:07:36<30:51:31,  1.15s/it]

32820 episode score is 775.10


  3%|▎         | 3398/100000 [1:07:37<30:48:51,  1.15s/it]

32829 episode score is 796.95


  3%|▎         | 3399/100000 [1:07:39<30:56:28,  1.15s/it]

32838 episode score is 799.57
32847 episode score is 806.80


  3%|▎         | 3400/100000 [1:07:41<40:18:08,  1.50s/it]

Iteration 3400: Average test reward: 789.07


  3%|▎         | 3401/100000 [1:07:42<37:15:15,  1.39s/it]

32856 episode score is 765.75


  3%|▎         | 3402/100000 [1:07:43<35:27:22,  1.32s/it]

32865 episode score is 774.74


  3%|▎         | 3403/100000 [1:07:44<34:15:08,  1.28s/it]

32874 episode score is 790.05


  3%|▎         | 3404/100000 [1:07:46<33:02:21,  1.23s/it]

32883 episode score is 752.49


  3%|▎         | 3405/100000 [1:07:47<32:02:32,  1.19s/it]

32892 episode score is 760.74


  3%|▎         | 3406/100000 [1:07:48<31:35:48,  1.18s/it]

32901 episode score is 784.93


  3%|▎         | 3407/100000 [1:07:49<31:22:05,  1.17s/it]

32910 episode score is 776.10


  3%|▎         | 3408/100000 [1:07:50<31:14:34,  1.16s/it]

32919 episode score is 786.58


  3%|▎         | 3409/100000 [1:07:51<31:05:13,  1.16s/it]

32928 episode score is 781.29


  3%|▎         | 3410/100000 [1:07:52<30:40:09,  1.14s/it]

32937 episode score is 745.38


  3%|▎         | 3411/100000 [1:07:53<30:40:46,  1.14s/it]

32946 episode score is 759.04


  3%|▎         | 3412/100000 [1:07:55<30:34:35,  1.14s/it]

32955 episode score is 771.25


  3%|▎         | 3413/100000 [1:07:56<30:29:34,  1.14s/it]

32964 episode score is 766.89


  3%|▎         | 3414/100000 [1:07:57<30:41:00,  1.14s/it]

32974 episode score is 703.87


  3%|▎         | 3415/100000 [1:07:58<31:04:53,  1.16s/it]

32984 episode score is 722.62


  3%|▎         | 3416/100000 [1:07:59<30:50:14,  1.15s/it]

32993 episode score is 759.04


  3%|▎         | 3417/100000 [1:08:00<30:48:02,  1.15s/it]

33002 episode score is 789.34


  3%|▎         | 3418/100000 [1:08:02<30:57:44,  1.15s/it]

33012 episode score is 717.97


  3%|▎         | 3419/100000 [1:08:03<31:16:40,  1.17s/it]

33022 episode score is 724.87


  3%|▎         | 3420/100000 [1:08:04<31:31:45,  1.18s/it]

33032 episode score is 725.45


  3%|▎         | 3421/100000 [1:08:05<30:52:23,  1.15s/it]

33041 episode score is 741.20


  3%|▎         | 3422/100000 [1:08:06<30:19:51,  1.13s/it]

33050 episode score is 735.78


  3%|▎         | 3423/100000 [1:08:07<30:12:30,  1.13s/it]

33059 episode score is 742.50


  3%|▎         | 3424/100000 [1:08:08<30:52:17,  1.15s/it]

33068 episode score is 821.52


  3%|▎         | 3425/100000 [1:08:10<30:29:28,  1.14s/it]

33076 episode score is 828.31


  3%|▎         | 3426/100000 [1:08:11<30:33:10,  1.14s/it]

33085 episode score is 778.23


  3%|▎         | 3427/100000 [1:08:12<31:00:12,  1.16s/it]

33094 episode score is 797.44


  3%|▎         | 3428/100000 [1:08:13<30:38:58,  1.14s/it]

33103 episode score is 757.41


  3%|▎         | 3429/100000 [1:08:14<30:48:09,  1.15s/it]

33112 episode score is 795.85


  3%|▎         | 3430/100000 [1:08:15<30:34:03,  1.14s/it]

33121 episode score is 767.01


  3%|▎         | 3431/100000 [1:08:16<30:30:44,  1.14s/it]

33130 episode score is 776.93


  3%|▎         | 3432/100000 [1:08:18<31:00:56,  1.16s/it]

33140 episode score is 730.51


  3%|▎         | 3433/100000 [1:08:19<31:16:59,  1.17s/it]

33149 episode score is 802.61


  3%|▎         | 3434/100000 [1:08:20<31:36:08,  1.18s/it]

33159 episode score is 727.54


  3%|▎         | 3435/100000 [1:08:21<31:33:37,  1.18s/it]

33169 episode score is 719.74


  3%|▎         | 3436/100000 [1:08:22<31:17:24,  1.17s/it]

33178 episode score is 776.08


  3%|▎         | 3437/100000 [1:08:23<30:40:25,  1.14s/it]

33187 episode score is 747.11


  3%|▎         | 3438/100000 [1:08:25<31:13:46,  1.16s/it]

33197 episode score is 748.05


  3%|▎         | 3439/100000 [1:08:26<31:33:57,  1.18s/it]

33207 episode score is 743.43


  3%|▎         | 3440/100000 [1:08:27<31:11:26,  1.16s/it]

33216 episode score is 767.06


  3%|▎         | 3441/100000 [1:08:28<30:40:14,  1.14s/it]

33225 episode score is 751.42


  3%|▎         | 3442/100000 [1:08:29<31:05:04,  1.16s/it]

33235 episode score is 731.14


  3%|▎         | 3443/100000 [1:08:30<30:53:56,  1.15s/it]

33244 episode score is 781.34


  3%|▎         | 3444/100000 [1:08:32<30:39:27,  1.14s/it]

33253 episode score is 764.35


  3%|▎         | 3445/100000 [1:08:33<31:16:54,  1.17s/it]

33263 episode score is 747.69


  3%|▎         | 3446/100000 [1:08:34<30:41:40,  1.14s/it]

33272 episode score is 753.70


  3%|▎         | 3447/100000 [1:08:35<30:30:10,  1.14s/it]

33281 episode score is 757.90


  3%|▎         | 3448/100000 [1:08:36<30:58:25,  1.15s/it]

33291 episode score is 745.61


  3%|▎         | 3449/100000 [1:08:37<30:34:58,  1.14s/it]

33300 episode score is 753.67


  3%|▎         | 3450/100000 [1:08:38<30:57:13,  1.15s/it]

33310 episode score is 743.06


  3%|▎         | 3451/100000 [1:08:40<31:09:18,  1.16s/it]

33319 episode score is 795.60


  3%|▎         | 3452/100000 [1:08:41<30:43:45,  1.15s/it]

33328 episode score is 752.46


  3%|▎         | 3453/100000 [1:08:42<31:06:12,  1.16s/it]

33337 episode score is 751.34


  3%|▎         | 3454/100000 [1:08:43<30:52:28,  1.15s/it]

33346 episode score is 787.86


  3%|▎         | 3455/100000 [1:08:44<30:38:33,  1.14s/it]

33354 episode score is 863.17


  3%|▎         | 3456/100000 [1:08:45<30:51:12,  1.15s/it]

33363 episode score is 784.10


  3%|▎         | 3457/100000 [1:08:47<31:09:07,  1.16s/it]

33372 episode score is 816.81


  3%|▎         | 3458/100000 [1:08:48<30:59:33,  1.16s/it]

33381 episode score is 778.45


  3%|▎         | 3459/100000 [1:08:49<30:55:50,  1.15s/it]

33390 episode score is 785.55


  3%|▎         | 3460/100000 [1:08:50<30:30:56,  1.14s/it]

33399 episode score is 763.18


  3%|▎         | 3461/100000 [1:08:51<30:18:25,  1.13s/it]

33408 episode score is 776.34


  3%|▎         | 3462/100000 [1:08:52<30:52:07,  1.15s/it]

33418 episode score is 741.83


  3%|▎         | 3463/100000 [1:08:53<30:38:12,  1.14s/it]

33427 episode score is 756.30


  3%|▎         | 3464/100000 [1:08:55<31:04:42,  1.16s/it]

33437 episode score is 733.75


  3%|▎         | 3465/100000 [1:08:56<31:26:05,  1.17s/it]

33447 episode score is 738.80


  3%|▎         | 3466/100000 [1:08:57<31:00:09,  1.16s/it]

33456 episode score is 770.36


  3%|▎         | 3467/100000 [1:08:58<31:23:42,  1.17s/it]

33466 episode score is 739.83


  3%|▎         | 3468/100000 [1:08:59<31:38:51,  1.18s/it]

33476 episode score is 734.19


  3%|▎         | 3469/100000 [1:09:00<31:28:10,  1.17s/it]

33486 episode score is 695.76


  3%|▎         | 3470/100000 [1:09:02<31:16:52,  1.17s/it]

33496 episode score is 706.89


  3%|▎         | 3471/100000 [1:09:03<31:05:18,  1.16s/it]

33506 episode score is 700.64


  3%|▎         | 3472/100000 [1:09:04<31:15:09,  1.17s/it]

33516 episode score is 710.33


  3%|▎         | 3473/100000 [1:09:05<31:24:50,  1.17s/it]

33526 episode score is 732.06


  3%|▎         | 3474/100000 [1:09:06<31:02:00,  1.16s/it]

33536 episode score is 681.76


  3%|▎         | 3475/100000 [1:09:07<31:22:01,  1.17s/it]

33547 episode score is 663.91


  3%|▎         | 3476/100000 [1:09:09<30:45:25,  1.15s/it]

33557 episode score is 667.23


  3%|▎         | 3477/100000 [1:09:10<31:07:27,  1.16s/it]

33568 episode score is 653.14


  3%|▎         | 3478/100000 [1:09:11<30:46:54,  1.15s/it]

33578 episode score is 686.44


  3%|▎         | 3479/100000 [1:09:12<30:42:40,  1.15s/it]

33588 episode score is 700.76


  3%|▎         | 3480/100000 [1:09:13<30:53:21,  1.15s/it]

33599 episode score is 644.79


  3%|▎         | 3481/100000 [1:09:14<31:03:14,  1.16s/it]

33610 episode score is 641.45


  3%|▎         | 3482/100000 [1:09:15<31:14:26,  1.17s/it]

33621 episode score is 652.62


  3%|▎         | 3483/100000 [1:09:17<30:36:03,  1.14s/it]

33631 episode score is 667.29


  3%|▎         | 3484/100000 [1:09:18<30:46:12,  1.15s/it]

33641 episode score is 699.64


  3%|▎         | 3485/100000 [1:09:19<30:18:56,  1.13s/it]

33651 episode score is 664.95


  3%|▎         | 3486/100000 [1:09:20<30:06:27,  1.12s/it]

33661 episode score is 660.25


  3%|▎         | 3487/100000 [1:09:21<30:15:23,  1.13s/it]

33671 episode score is 689.98


  3%|▎         | 3488/100000 [1:09:22<29:55:31,  1.12s/it]

33681 episode score is 662.21


  3%|▎         | 3489/100000 [1:09:23<30:07:22,  1.12s/it]

33691 episode score is 704.76


  3%|▎         | 3490/100000 [1:09:24<30:19:07,  1.13s/it]

33701 episode score is 697.16


  3%|▎         | 3491/100000 [1:09:26<30:39:00,  1.14s/it]

33711 episode score is 714.42


  3%|▎         | 3492/100000 [1:09:27<30:29:49,  1.14s/it]

33721 episode score is 689.00


  3%|▎         | 3493/100000 [1:09:28<30:45:35,  1.15s/it]

33731 episode score is 711.92


  3%|▎         | 3494/100000 [1:09:29<31:14:19,  1.17s/it]

33742 episode score is 659.62


  3%|▎         | 3495/100000 [1:09:30<31:04:14,  1.16s/it]

33752 episode score is 691.98


  3%|▎         | 3496/100000 [1:09:31<30:55:58,  1.15s/it]

33762 episode score is 694.50


  3%|▎         | 3497/100000 [1:09:33<31:01:36,  1.16s/it]

33772 episode score is 717.02


  3%|▎         | 3498/100000 [1:09:34<30:50:15,  1.15s/it]

33782 episode score is 686.33


  3%|▎         | 3499/100000 [1:09:35<30:51:18,  1.15s/it]

33792 episode score is 711.19
33802 episode score is 709.98


  4%|▎         | 3500/100000 [1:09:37<39:05:32,  1.46s/it]

Iteration 3500: Average test reward: 706.82


  4%|▎         | 3501/100000 [1:09:38<36:42:25,  1.37s/it]

33812 episode score is 713.90


  4%|▎         | 3502/100000 [1:09:39<34:34:49,  1.29s/it]

33822 episode score is 665.75


  4%|▎         | 3503/100000 [1:09:40<33:22:58,  1.25s/it]

33832 episode score is 706.19


  4%|▎         | 3504/100000 [1:09:42<32:26:48,  1.21s/it]

33842 episode score is 685.65


  4%|▎         | 3505/100000 [1:09:43<31:50:08,  1.19s/it]

33852 episode score is 695.87


  4%|▎         | 3506/100000 [1:09:44<31:44:18,  1.18s/it]

33862 episode score is 719.38


  4%|▎         | 3507/100000 [1:09:45<31:21:48,  1.17s/it]

33872 episode score is 694.69


  4%|▎         | 3508/100000 [1:09:46<31:24:15,  1.17s/it]

33882 episode score is 727.14


  4%|▎         | 3509/100000 [1:09:47<31:14:30,  1.17s/it]

33892 episode score is 698.02


  4%|▎         | 3510/100000 [1:09:49<31:11:15,  1.16s/it]

33903 episode score is 628.80


  4%|▎         | 3511/100000 [1:09:50<30:47:16,  1.15s/it]

33913 episode score is 673.34


  4%|▎         | 3512/100000 [1:09:51<30:47:41,  1.15s/it]

33923 episode score is 696.47


  4%|▎         | 3513/100000 [1:09:52<30:15:41,  1.13s/it]

33932 episode score is 744.07


  4%|▎         | 3514/100000 [1:09:53<30:29:41,  1.14s/it]

33942 episode score is 705.64


  4%|▎         | 3515/100000 [1:09:54<30:41:36,  1.15s/it]

33952 episode score is 714.24


  4%|▎         | 3516/100000 [1:09:55<30:28:33,  1.14s/it]

33961 episode score is 742.46


  4%|▎         | 3517/100000 [1:09:56<30:54:32,  1.15s/it]

33971 episode score is 730.80


  4%|▎         | 3518/100000 [1:09:58<31:04:24,  1.16s/it]

33981 episode score is 724.88


  4%|▎         | 3519/100000 [1:09:59<31:23:26,  1.17s/it]

33991 episode score is 727.61


  4%|▎         | 3520/100000 [1:10:00<31:31:16,  1.18s/it]

34001 episode score is 730.58


  4%|▎         | 3521/100000 [1:10:01<31:02:53,  1.16s/it]

34010 episode score is 753.82


  4%|▎         | 3522/100000 [1:10:02<31:29:47,  1.18s/it]

34020 episode score is 745.99


  4%|▎         | 3523/100000 [1:10:03<30:47:13,  1.15s/it]

34029 episode score is 740.59


  4%|▎         | 3524/100000 [1:10:05<30:32:40,  1.14s/it]

34038 episode score is 766.79


  4%|▎         | 3525/100000 [1:10:06<30:37:35,  1.14s/it]

34047 episode score is 768.63


  4%|▎         | 3526/100000 [1:10:07<30:18:40,  1.13s/it]

34056 episode score is 755.20


  4%|▎         | 3527/100000 [1:10:08<30:49:06,  1.15s/it]

34065 episode score is 802.51


  4%|▎         | 3528/100000 [1:10:09<31:11:52,  1.16s/it]

34075 episode score is 725.26


  4%|▎         | 3529/100000 [1:10:10<31:05:03,  1.16s/it]

34084 episode score is 789.96


  4%|▎         | 3530/100000 [1:10:12<31:21:12,  1.17s/it]

34093 episode score is 757.13


  4%|▎         | 3531/100000 [1:10:13<30:41:41,  1.15s/it]

34102 episode score is 737.61


  4%|▎         | 3532/100000 [1:10:14<30:47:38,  1.15s/it]

34112 episode score is 713.60


  4%|▎         | 3533/100000 [1:10:15<30:50:36,  1.15s/it]

34122 episode score is 701.99


  4%|▎         | 3534/100000 [1:10:16<30:48:46,  1.15s/it]

34132 episode score is 695.89


  4%|▎         | 3535/100000 [1:10:17<30:39:08,  1.14s/it]

34142 episode score is 678.01


  4%|▎         | 3536/100000 [1:10:18<30:38:15,  1.14s/it]

34152 episode score is 699.22


  4%|▎         | 3537/100000 [1:10:20<30:55:28,  1.15s/it]

34162 episode score is 724.55


  4%|▎         | 3538/100000 [1:10:21<30:50:56,  1.15s/it]

34172 episode score is 694.92


  4%|▎         | 3539/100000 [1:10:22<30:53:04,  1.15s/it]

34182 episode score is 708.37


  4%|▎         | 3540/100000 [1:10:23<30:52:48,  1.15s/it]

34192 episode score is 694.34


  4%|▎         | 3541/100000 [1:10:24<31:03:53,  1.16s/it]

34202 episode score is 724.26


  4%|▎         | 3542/100000 [1:10:25<30:37:05,  1.14s/it]

34212 episode score is 672.15


  4%|▎         | 3543/100000 [1:10:26<30:47:12,  1.15s/it]

34222 episode score is 705.70


  4%|▎         | 3544/100000 [1:10:28<30:46:39,  1.15s/it]

34232 episode score is 703.42


  4%|▎         | 3545/100000 [1:10:29<30:47:47,  1.15s/it]

34242 episode score is 700.77


  4%|▎         | 3546/100000 [1:10:30<30:29:10,  1.14s/it]

34252 episode score is 674.46


  4%|▎         | 3547/100000 [1:10:31<30:40:13,  1.14s/it]

34262 episode score is 711.60


  4%|▎         | 3548/100000 [1:10:32<30:40:05,  1.14s/it]

34272 episode score is 693.73


  4%|▎         | 3549/100000 [1:10:33<30:24:58,  1.14s/it]

34282 episode score is 679.30


  4%|▎         | 3550/100000 [1:10:34<30:23:32,  1.13s/it]

34292 episode score is 699.54


  4%|▎         | 3551/100000 [1:10:36<30:22:15,  1.13s/it]

34302 episode score is 679.90


  4%|▎         | 3552/100000 [1:10:37<30:04:48,  1.12s/it]

34312 episode score is 668.79


  4%|▎         | 3553/100000 [1:10:38<29:55:41,  1.12s/it]

34322 episode score is 672.09


  4%|▎         | 3554/100000 [1:10:39<30:15:32,  1.13s/it]

34332 episode score is 707.19


  4%|▎         | 3555/100000 [1:10:40<30:07:40,  1.12s/it]

34342 episode score is 675.34


  4%|▎         | 3556/100000 [1:10:41<29:50:35,  1.11s/it]

34352 episode score is 665.26


  4%|▎         | 3557/100000 [1:10:42<30:20:29,  1.13s/it]

34363 episode score is 655.71


  4%|▎         | 3558/100000 [1:10:43<30:17:32,  1.13s/it]

34373 episode score is 688.82


  4%|▎         | 3559/100000 [1:10:45<30:12:21,  1.13s/it]

34383 episode score is 682.38


  4%|▎         | 3560/100000 [1:10:46<30:19:01,  1.13s/it]

34393 episode score is 693.36


  4%|▎         | 3561/100000 [1:10:47<30:27:29,  1.14s/it]

34403 episode score is 693.96


  4%|▎         | 3562/100000 [1:10:48<30:18:27,  1.13s/it]

34413 episode score is 681.11


  4%|▎         | 3563/100000 [1:10:49<30:26:39,  1.14s/it]

34423 episode score is 703.04


  4%|▎         | 3564/100000 [1:10:50<30:15:51,  1.13s/it]

34433 episode score is 677.27


  4%|▎         | 3565/100000 [1:10:51<30:39:24,  1.14s/it]

34443 episode score is 713.87


  4%|▎         | 3566/100000 [1:10:53<30:37:31,  1.14s/it]

34453 episode score is 697.03


  4%|▎         | 3567/100000 [1:10:54<30:16:02,  1.13s/it]

34463 episode score is 665.18


  4%|▎         | 3568/100000 [1:10:55<30:13:03,  1.13s/it]

34473 episode score is 689.98


  4%|▎         | 3569/100000 [1:10:56<30:11:20,  1.13s/it]

34483 episode score is 689.83


  4%|▎         | 3570/100000 [1:10:57<29:59:40,  1.12s/it]

34493 episode score is 679.13


  4%|▎         | 3571/100000 [1:10:58<30:00:14,  1.12s/it]

34503 episode score is 691.22


  4%|▎         | 3572/100000 [1:10:59<30:17:28,  1.13s/it]

34514 episode score is 624.62


  4%|▎         | 3573/100000 [1:11:00<30:22:13,  1.13s/it]

34524 episode score is 699.46


  4%|▎         | 3574/100000 [1:11:01<30:03:29,  1.12s/it]

34534 episode score is 675.37


  4%|▎         | 3575/100000 [1:11:03<30:19:48,  1.13s/it]

34544 episode score is 699.67


  4%|▎         | 3576/100000 [1:11:04<30:17:46,  1.13s/it]

34554 episode score is 685.09


  4%|▎         | 3577/100000 [1:11:05<30:16:26,  1.13s/it]

34564 episode score is 686.31


  4%|▎         | 3578/100000 [1:11:06<30:13:39,  1.13s/it]

34574 episode score is 686.63


  4%|▎         | 3579/100000 [1:11:07<30:11:43,  1.13s/it]

34584 episode score is 690.38


  4%|▎         | 3580/100000 [1:11:08<30:29:19,  1.14s/it]

34594 episode score is 713.28


  4%|▎         | 3581/100000 [1:11:09<30:24:33,  1.14s/it]

34604 episode score is 688.32


  4%|▎         | 3582/100000 [1:11:11<30:33:09,  1.14s/it]

34614 episode score is 709.21


  4%|▎         | 3583/100000 [1:11:12<30:59:16,  1.16s/it]

34624 episode score is 729.72


  4%|▎         | 3584/100000 [1:11:13<31:04:24,  1.16s/it]

34634 episode score is 720.53


  4%|▎         | 3585/100000 [1:11:14<30:53:19,  1.15s/it]

34644 episode score is 701.28


  4%|▎         | 3586/100000 [1:11:15<30:25:51,  1.14s/it]

34653 episode score is 754.08


  4%|▎         | 3587/100000 [1:11:16<30:41:25,  1.15s/it]

34663 episode score is 722.34


  4%|▎         | 3588/100000 [1:11:18<30:49:52,  1.15s/it]

34673 episode score is 707.66


  4%|▎         | 3589/100000 [1:11:19<30:57:03,  1.16s/it]

34683 episode score is 712.84


  4%|▎         | 3590/100000 [1:11:20<30:50:37,  1.15s/it]

34693 episode score is 711.32


  4%|▎         | 3591/100000 [1:11:21<30:53:19,  1.15s/it]

34703 episode score is 723.17


  4%|▎         | 3592/100000 [1:11:22<30:47:39,  1.15s/it]

34713 episode score is 707.34


  4%|▎         | 3593/100000 [1:11:23<31:18:33,  1.17s/it]

34723 episode score is 685.81


  4%|▎         | 3594/100000 [1:11:24<31:05:38,  1.16s/it]

34733 episode score is 708.28


  4%|▎         | 3595/100000 [1:11:26<30:45:03,  1.15s/it]

34743 episode score is 686.41


  4%|▎         | 3596/100000 [1:11:27<30:24:46,  1.14s/it]

34753 episode score is 682.94


  4%|▎         | 3597/100000 [1:11:28<30:09:22,  1.13s/it]

34763 episode score is 678.51


  4%|▎         | 3598/100000 [1:11:29<30:22:14,  1.13s/it]

34773 episode score is 693.31


  4%|▎         | 3599/100000 [1:11:30<30:21:54,  1.13s/it]

34783 episode score is 675.41
34793 episode score is 698.18


  4%|▎         | 3600/100000 [1:11:32<39:09:18,  1.46s/it]

Iteration 3600: Average test reward: 682.73


  4%|▎         | 3601/100000 [1:11:34<37:03:42,  1.38s/it]

34802 episode score is 740.02


  4%|▎         | 3602/100000 [1:11:35<35:07:35,  1.31s/it]

34811 episode score is 730.16


  4%|▎         | 3603/100000 [1:11:36<33:59:59,  1.27s/it]

34820 episode score is 733.45


  4%|▎         | 3604/100000 [1:11:37<32:34:40,  1.22s/it]

34829 episode score is 714.81


  4%|▎         | 3605/100000 [1:11:38<31:40:45,  1.18s/it]

34838 episode score is 720.32


  4%|▎         | 3606/100000 [1:11:39<31:26:18,  1.17s/it]

34847 episode score is 728.42


  4%|▎         | 3607/100000 [1:11:40<31:07:17,  1.16s/it]

34856 episode score is 725.68


  4%|▎         | 3608/100000 [1:11:42<31:09:27,  1.16s/it]

34865 episode score is 738.36


  4%|▎         | 3609/100000 [1:11:43<30:45:41,  1.15s/it]

34874 episode score is 715.21


  4%|▎         | 3610/100000 [1:11:44<30:27:38,  1.14s/it]

34883 episode score is 724.59


  4%|▎         | 3611/100000 [1:11:45<30:20:17,  1.13s/it]

34892 episode score is 723.05


  4%|▎         | 3612/100000 [1:11:46<30:28:53,  1.14s/it]

34901 episode score is 735.15


  4%|▎         | 3613/100000 [1:11:47<31:02:38,  1.16s/it]

34911 episode score is 703.13


  4%|▎         | 3614/100000 [1:11:48<31:24:47,  1.17s/it]

34921 episode score is 714.78


  4%|▎         | 3615/100000 [1:11:50<31:32:14,  1.18s/it]

34931 episode score is 686.13


  4%|▎         | 3616/100000 [1:11:51<30:51:53,  1.15s/it]

34940 episode score is 700.61


  4%|▎         | 3617/100000 [1:11:52<30:40:37,  1.15s/it]

34949 episode score is 704.12


  4%|▎         | 3618/100000 [1:11:53<31:22:05,  1.17s/it]

34959 episode score is 693.36


  4%|▎         | 3619/100000 [1:11:54<31:15:40,  1.17s/it]

34968 episode score is 722.84


  4%|▎         | 3620/100000 [1:11:55<31:07:17,  1.16s/it]

34977 episode score is 717.24


  4%|▎         | 3621/100000 [1:11:57<31:29:50,  1.18s/it]

34987 episode score is 699.79


  4%|▎         | 3622/100000 [1:11:58<31:57:03,  1.19s/it]

34997 episode score is 707.60


  4%|▎         | 3623/100000 [1:11:59<32:07:20,  1.20s/it]

35007 episode score is 707.46


  4%|▎         | 3624/100000 [1:12:00<32:17:49,  1.21s/it]

35017 episode score is 711.25


  4%|▎         | 3625/100000 [1:12:01<32:16:43,  1.21s/it]

35027 episode score is 709.43


  4%|▎         | 3626/100000 [1:12:03<32:07:04,  1.20s/it]

35037 episode score is 697.22


  4%|▎         | 3627/100000 [1:12:04<31:26:26,  1.17s/it]

35046 episode score is 708.80


  4%|▎         | 3628/100000 [1:12:05<31:45:14,  1.19s/it]

35056 episode score is 709.19


  4%|▎         | 3629/100000 [1:12:06<31:58:58,  1.19s/it]

35066 episode score is 719.74


  4%|▎         | 3630/100000 [1:12:07<32:14:40,  1.20s/it]

35076 episode score is 705.59


  4%|▎         | 3631/100000 [1:12:09<31:29:46,  1.18s/it]

35085 episode score is 719.49


  4%|▎         | 3632/100000 [1:12:10<31:47:36,  1.19s/it]

35095 episode score is 717.68


  4%|▎         | 3633/100000 [1:12:11<31:13:24,  1.17s/it]

35104 episode score is 720.61


  4%|▎         | 3634/100000 [1:12:12<31:39:35,  1.18s/it]

35114 episode score is 715.71


  4%|▎         | 3635/100000 [1:12:13<31:53:03,  1.19s/it]

35124 episode score is 722.10


  4%|▎         | 3636/100000 [1:12:14<32:02:25,  1.20s/it]

35134 episode score is 728.03


  4%|▎         | 3637/100000 [1:12:16<32:10:12,  1.20s/it]

35144 episode score is 725.42


  4%|▎         | 3638/100000 [1:12:17<32:11:57,  1.20s/it]

35154 episode score is 709.53


  4%|▎         | 3639/100000 [1:12:18<32:25:47,  1.21s/it]

35164 episode score is 724.63


  4%|▎         | 3640/100000 [1:12:19<32:42:14,  1.22s/it]

35174 episode score is 732.60


  4%|▎         | 3641/100000 [1:12:21<31:54:44,  1.19s/it]

35183 episode score is 726.11


  4%|▎         | 3642/100000 [1:12:22<32:08:36,  1.20s/it]

35193 episode score is 706.08


  4%|▎         | 3643/100000 [1:12:23<33:00:23,  1.23s/it]

35203 episode score is 708.05


  4%|▎         | 3644/100000 [1:12:24<32:27:00,  1.21s/it]

35212 episode score is 727.75


  4%|▎         | 3645/100000 [1:12:25<32:36:27,  1.22s/it]

35222 episode score is 712.09


  4%|▎         | 3646/100000 [1:12:27<31:51:15,  1.19s/it]

35231 episode score is 720.40


  4%|▎         | 3647/100000 [1:12:28<32:00:23,  1.20s/it]

35241 episode score is 709.00


  4%|▎         | 3648/100000 [1:12:29<32:10:58,  1.20s/it]

35251 episode score is 713.02


  4%|▎         | 3649/100000 [1:12:30<31:35:54,  1.18s/it]

35260 episode score is 737.72


  4%|▎         | 3650/100000 [1:12:31<31:08:46,  1.16s/it]

35269 episode score is 734.54


  4%|▎         | 3651/100000 [1:12:32<30:47:27,  1.15s/it]

35278 episode score is 723.61


  4%|▎         | 3652/100000 [1:12:34<30:42:48,  1.15s/it]

35287 episode score is 733.52


  4%|▎         | 3653/100000 [1:12:35<31:22:19,  1.17s/it]

35297 episode score is 731.55


  4%|▎         | 3654/100000 [1:12:36<31:00:41,  1.16s/it]

35306 episode score is 732.07


  4%|▎         | 3655/100000 [1:12:37<31:34:52,  1.18s/it]

35316 episode score is 723.60


  4%|▎         | 3656/100000 [1:12:38<31:03:59,  1.16s/it]

35325 episode score is 735.42


  4%|▎         | 3657/100000 [1:12:39<31:31:28,  1.18s/it]

35335 episode score is 725.37


  4%|▎         | 3658/100000 [1:12:41<31:52:31,  1.19s/it]

35345 episode score is 725.11


  4%|▎         | 3659/100000 [1:12:42<32:11:48,  1.20s/it]

35355 episode score is 726.90


  4%|▎         | 3660/100000 [1:12:43<32:25:15,  1.21s/it]

35365 episode score is 724.67


  4%|▎         | 3661/100000 [1:12:44<31:47:08,  1.19s/it]

35374 episode score is 725.03


  4%|▎         | 3662/100000 [1:12:45<32:07:18,  1.20s/it]

35384 episode score is 730.05


  4%|▎         | 3663/100000 [1:12:47<31:27:42,  1.18s/it]

35393 episode score is 727.82


  4%|▎         | 3664/100000 [1:12:48<31:04:23,  1.16s/it]

35402 episode score is 731.17


  4%|▎         | 3665/100000 [1:12:49<30:59:01,  1.16s/it]

35411 episode score is 747.33


  4%|▎         | 3666/100000 [1:12:50<30:36:29,  1.14s/it]

35420 episode score is 729.72


  4%|▎         | 3667/100000 [1:12:51<31:12:13,  1.17s/it]

35430 episode score is 732.31


  4%|▎         | 3668/100000 [1:12:52<31:30:06,  1.18s/it]

35440 episode score is 720.50


  4%|▎         | 3669/100000 [1:12:54<31:49:21,  1.19s/it]

35450 episode score is 727.89


  4%|▎         | 3670/100000 [1:12:55<32:02:20,  1.20s/it]

35460 episode score is 724.37


  4%|▎         | 3671/100000 [1:12:56<32:00:15,  1.20s/it]

35470 episode score is 710.68


  4%|▎         | 3672/100000 [1:12:57<32:13:48,  1.20s/it]

35480 episode score is 721.83


  4%|▎         | 3673/100000 [1:12:58<31:37:17,  1.18s/it]

35489 episode score is 728.03


  4%|▎         | 3674/100000 [1:13:00<32:03:00,  1.20s/it]

35499 episode score is 706.65


  4%|▎         | 3675/100000 [1:13:01<31:26:13,  1.17s/it]

35508 episode score is 726.64


  4%|▎         | 3676/100000 [1:13:02<31:41:42,  1.18s/it]

35518 episode score is 718.44


  4%|▎         | 3677/100000 [1:13:03<31:53:53,  1.19s/it]

35528 episode score is 715.74


  4%|▎         | 3678/100000 [1:13:04<31:44:41,  1.19s/it]

35538 episode score is 702.86


  4%|▎         | 3679/100000 [1:13:06<31:53:56,  1.19s/it]

35548 episode score is 709.81


  4%|▎         | 3680/100000 [1:13:07<32:11:34,  1.20s/it]

35558 episode score is 725.40


  4%|▎         | 3681/100000 [1:13:08<32:28:12,  1.21s/it]

35568 episode score is 724.55


  4%|▎         | 3682/100000 [1:13:09<32:31:02,  1.22s/it]

35578 episode score is 724.38


  4%|▎         | 3683/100000 [1:13:10<32:23:40,  1.21s/it]

35588 episode score is 722.92


  4%|▎         | 3684/100000 [1:13:12<31:39:41,  1.18s/it]

35597 episode score is 741.73


  4%|▎         | 3685/100000 [1:13:13<31:22:56,  1.17s/it]

35606 episode score is 750.70


  4%|▎         | 3686/100000 [1:13:14<31:46:19,  1.19s/it]

35616 episode score is 730.36


  4%|▎         | 3687/100000 [1:13:15<31:49:08,  1.19s/it]

35626 episode score is 721.65


  4%|▎         | 3688/100000 [1:13:16<32:09:53,  1.20s/it]

35636 episode score is 727.38


  4%|▎         | 3689/100000 [1:13:18<32:18:10,  1.21s/it]

35646 episode score is 726.54


  4%|▎         | 3690/100000 [1:13:19<32:27:10,  1.21s/it]

35656 episode score is 729.89


  4%|▎         | 3691/100000 [1:13:20<32:24:24,  1.21s/it]

35666 episode score is 724.48


  4%|▎         | 3692/100000 [1:13:21<32:30:13,  1.21s/it]

35676 episode score is 730.40


  4%|▎         | 3693/100000 [1:13:22<32:20:14,  1.21s/it]

35686 episode score is 720.63


  4%|▎         | 3694/100000 [1:13:24<31:33:59,  1.18s/it]

35695 episode score is 740.96


  4%|▎         | 3695/100000 [1:13:25<31:48:23,  1.19s/it]

35705 episode score is 726.69


  4%|▎         | 3696/100000 [1:13:26<31:51:32,  1.19s/it]

35715 episode score is 722.29


  4%|▎         | 3697/100000 [1:13:27<31:48:05,  1.19s/it]

35725 episode score is 713.99


  4%|▎         | 3698/100000 [1:13:28<32:00:45,  1.20s/it]

35735 episode score is 726.75


  4%|▎         | 3699/100000 [1:13:30<32:04:08,  1.20s/it]

35745 episode score is 721.34
35755 episode score is 741.80


  4%|▎         | 3700/100000 [1:13:32<41:29:55,  1.55s/it]

Iteration 3700: Average test reward: 748.69


  4%|▎         | 3701/100000 [1:13:33<38:47:48,  1.45s/it]

35765 episode score is 729.14


  4%|▎         | 3702/100000 [1:13:34<36:49:02,  1.38s/it]

35775 episode score is 733.11


  4%|▎         | 3703/100000 [1:13:35<34:51:08,  1.30s/it]

35784 episode score is 757.35


  4%|▎         | 3704/100000 [1:13:37<34:17:04,  1.28s/it]

35794 episode score is 737.97


  4%|▎         | 3705/100000 [1:13:38<34:42:52,  1.30s/it]

35804 episode score is 734.00


  4%|▎         | 3706/100000 [1:13:39<33:26:16,  1.25s/it]

35813 episode score is 733.32


  4%|▎         | 3707/100000 [1:13:40<32:22:42,  1.21s/it]

35822 episode score is 745.01


  4%|▎         | 3708/100000 [1:13:41<31:41:56,  1.19s/it]

35831 episode score is 742.75


  4%|▎         | 3709/100000 [1:13:43<31:15:29,  1.17s/it]

35840 episode score is 746.50


  4%|▎         | 3710/100000 [1:13:44<31:11:31,  1.17s/it]

35849 episode score is 751.97


  4%|▎         | 3711/100000 [1:13:45<30:52:19,  1.15s/it]

35858 episode score is 742.60


  4%|▎         | 3712/100000 [1:13:46<31:01:17,  1.16s/it]

35867 episode score is 751.54


  4%|▎         | 3713/100000 [1:13:47<31:14:08,  1.17s/it]

35876 episode score is 752.56


  4%|▎         | 3714/100000 [1:13:48<31:33:59,  1.18s/it]

35886 episode score is 698.13


  4%|▎         | 3715/100000 [1:13:50<32:02:53,  1.20s/it]

35896 episode score is 704.67


  4%|▎         | 3716/100000 [1:13:51<32:14:25,  1.21s/it]

35906 episode score is 715.11


  4%|▎         | 3717/100000 [1:13:52<31:32:19,  1.18s/it]

35915 episode score is 723.31


  4%|▎         | 3718/100000 [1:13:53<31:36:11,  1.18s/it]

35925 episode score is 700.32


  4%|▎         | 3719/100000 [1:13:54<31:54:23,  1.19s/it]

35935 episode score is 711.20


  4%|▎         | 3720/100000 [1:13:56<32:21:39,  1.21s/it]

35945 episode score is 717.12


  4%|▎         | 3721/100000 [1:13:57<32:09:47,  1.20s/it]

35955 episode score is 701.28


  4%|▎         | 3722/100000 [1:13:58<32:12:33,  1.20s/it]

35965 episode score is 706.88


  4%|▎         | 3723/100000 [1:13:59<32:05:20,  1.20s/it]

35975 episode score is 702.33


  4%|▎         | 3724/100000 [1:14:00<31:27:09,  1.18s/it]

35984 episode score is 731.83


  4%|▎         | 3725/100000 [1:14:02<31:31:18,  1.18s/it]

35994 episode score is 704.49


  4%|▎         | 3726/100000 [1:14:03<31:40:23,  1.18s/it]

36004 episode score is 708.67


  4%|▎         | 3727/100000 [1:14:04<31:40:08,  1.18s/it]

36014 episode score is 718.74


  4%|▎         | 3728/100000 [1:14:05<31:57:37,  1.20s/it]

36024 episode score is 727.14


  4%|▎         | 3729/100000 [1:14:06<32:07:20,  1.20s/it]

36034 episode score is 730.66


  4%|▎         | 3730/100000 [1:14:08<32:33:43,  1.22s/it]

36044 episode score is 737.57


  4%|▎         | 3731/100000 [1:14:09<32:24:12,  1.21s/it]

36054 episode score is 715.38


  4%|▎         | 3732/100000 [1:14:10<31:35:43,  1.18s/it]

36063 episode score is 736.32


  4%|▎         | 3733/100000 [1:14:11<31:37:10,  1.18s/it]

36073 episode score is 713.17


  4%|▎         | 3734/100000 [1:14:12<31:29:11,  1.18s/it]

36083 episode score is 702.86


  4%|▎         | 3735/100000 [1:14:13<31:36:37,  1.18s/it]

36093 episode score is 720.07


  4%|▎         | 3736/100000 [1:14:15<31:52:31,  1.19s/it]

36103 episode score is 734.33


  4%|▎         | 3737/100000 [1:14:16<31:56:46,  1.19s/it]

36113 episode score is 727.91


  4%|▎         | 3738/100000 [1:14:17<31:57:40,  1.20s/it]

36123 episode score is 713.27


  4%|▎         | 3739/100000 [1:14:18<31:57:06,  1.19s/it]

36133 episode score is 710.16


  4%|▎         | 3740/100000 [1:14:19<31:58:42,  1.20s/it]

36143 episode score is 723.16


  4%|▎         | 3741/100000 [1:14:21<31:54:35,  1.19s/it]

36153 episode score is 723.06


  4%|▎         | 3742/100000 [1:14:22<32:06:45,  1.20s/it]

36163 episode score is 735.46


  4%|▎         | 3743/100000 [1:14:23<32:01:17,  1.20s/it]

36173 episode score is 719.55


  4%|▎         | 3744/100000 [1:14:24<32:01:56,  1.20s/it]

36183 episode score is 727.46


  4%|▎         | 3745/100000 [1:14:25<31:19:20,  1.17s/it]

36192 episode score is 745.26


  4%|▎         | 3746/100000 [1:14:26<30:55:12,  1.16s/it]

36201 episode score is 745.14


  4%|▎         | 3747/100000 [1:14:28<31:00:56,  1.16s/it]

36210 episode score is 755.93


  4%|▎         | 3748/100000 [1:14:29<30:55:37,  1.16s/it]

36219 episode score is 750.07


  4%|▎         | 3749/100000 [1:14:30<30:44:45,  1.15s/it]

36228 episode score is 747.48


  4%|▍         | 3750/100000 [1:14:31<31:24:13,  1.17s/it]

36238 episode score is 743.61


  4%|▍         | 3751/100000 [1:14:32<31:01:36,  1.16s/it]

36247 episode score is 745.45


  4%|▍         | 3752/100000 [1:14:33<30:48:29,  1.15s/it]

36256 episode score is 750.48


  4%|▍         | 3753/100000 [1:14:35<30:30:12,  1.14s/it]

36265 episode score is 739.69


  4%|▍         | 3754/100000 [1:14:36<31:14:06,  1.17s/it]

36275 episode score is 730.23


  4%|▍         | 3755/100000 [1:14:37<31:38:46,  1.18s/it]

36285 episode score is 726.79


  4%|▍         | 3756/100000 [1:14:38<31:47:31,  1.19s/it]

36295 episode score is 721.26


  4%|▍         | 3757/100000 [1:14:39<31:20:22,  1.17s/it]

36304 episode score is 742.43


  4%|▍         | 3758/100000 [1:14:40<31:05:53,  1.16s/it]

36313 episode score is 740.00


  4%|▍         | 3759/100000 [1:14:42<31:03:38,  1.16s/it]

36322 episode score is 738.94


  4%|▍         | 3760/100000 [1:14:43<31:05:14,  1.16s/it]

36331 episode score is 740.23


  4%|▍         | 3761/100000 [1:14:44<31:14:02,  1.17s/it]

36340 episode score is 738.18


  4%|▍         | 3762/100000 [1:14:45<31:06:00,  1.16s/it]

36349 episode score is 735.81


  4%|▍         | 3763/100000 [1:14:46<31:03:20,  1.16s/it]

36358 episode score is 724.73


  4%|▍         | 3764/100000 [1:14:47<31:31:25,  1.18s/it]

36367 episode score is 747.90


  4%|▍         | 3765/100000 [1:14:49<31:32:51,  1.18s/it]

36376 episode score is 740.72


  4%|▍         | 3766/100000 [1:14:50<31:30:44,  1.18s/it]

36385 episode score is 732.37


  4%|▍         | 3767/100000 [1:14:51<31:17:25,  1.17s/it]

36394 episode score is 732.00


  4%|▍         | 3768/100000 [1:14:52<31:11:36,  1.17s/it]

36403 episode score is 739.89


  4%|▍         | 3769/100000 [1:14:53<31:09:42,  1.17s/it]

36412 episode score is 742.09


  4%|▍         | 3770/100000 [1:14:55<31:35:42,  1.18s/it]

36422 episode score is 728.01


  4%|▍         | 3771/100000 [1:14:56<31:23:26,  1.17s/it]

36431 episode score is 753.44


  4%|▍         | 3772/100000 [1:14:57<30:57:01,  1.16s/it]

36440 episode score is 749.41


  4%|▍         | 3773/100000 [1:14:58<31:37:11,  1.18s/it]

36449 episode score is 746.25


  4%|▍         | 3774/100000 [1:14:59<31:23:54,  1.17s/it]

36458 episode score is 763.80


  4%|▍         | 3775/100000 [1:15:00<30:59:38,  1.16s/it]

36467 episode score is 752.12


  4%|▍         | 3776/100000 [1:15:02<31:00:40,  1.16s/it]

36476 episode score is 765.07


  4%|▍         | 3777/100000 [1:15:03<31:09:58,  1.17s/it]

36485 episode score is 765.44


  4%|▍         | 3778/100000 [1:15:04<30:50:38,  1.15s/it]

36494 episode score is 743.99


  4%|▍         | 3779/100000 [1:15:05<30:50:37,  1.15s/it]

36503 episode score is 753.84


  4%|▍         | 3780/100000 [1:15:06<31:12:03,  1.17s/it]

36512 episode score is 774.28


  4%|▍         | 3781/100000 [1:15:07<31:10:50,  1.17s/it]

36521 episode score is 766.62


  4%|▍         | 3782/100000 [1:15:08<31:07:02,  1.16s/it]

36530 episode score is 755.92


  4%|▍         | 3783/100000 [1:15:10<30:45:31,  1.15s/it]

36539 episode score is 736.77


  4%|▍         | 3784/100000 [1:15:11<30:48:02,  1.15s/it]

36548 episode score is 755.82


  4%|▍         | 3785/100000 [1:15:12<30:45:36,  1.15s/it]

36557 episode score is 755.28


  4%|▍         | 3786/100000 [1:15:13<30:47:07,  1.15s/it]

36566 episode score is 747.67


  4%|▍         | 3787/100000 [1:15:14<30:59:23,  1.16s/it]

36575 episode score is 759.51


  4%|▍         | 3788/100000 [1:15:15<31:03:08,  1.16s/it]

36584 episode score is 759.47


  4%|▍         | 3789/100000 [1:15:17<30:53:41,  1.16s/it]

36593 episode score is 759.47


  4%|▍         | 3790/100000 [1:15:18<30:54:27,  1.16s/it]

36602 episode score is 749.66


  4%|▍         | 3791/100000 [1:15:19<30:50:20,  1.15s/it]

36611 episode score is 751.22


  4%|▍         | 3792/100000 [1:15:20<30:53:16,  1.16s/it]

36620 episode score is 758.49


  4%|▍         | 3793/100000 [1:15:21<30:48:30,  1.15s/it]

36629 episode score is 752.76


  4%|▍         | 3794/100000 [1:15:22<30:50:17,  1.15s/it]

36638 episode score is 756.20


  4%|▍         | 3795/100000 [1:15:23<30:57:08,  1.16s/it]

36647 episode score is 758.87


  4%|▍         | 3796/100000 [1:15:25<30:40:19,  1.15s/it]

36656 episode score is 737.05


  4%|▍         | 3797/100000 [1:15:26<30:44:38,  1.15s/it]

36665 episode score is 758.78


  4%|▍         | 3798/100000 [1:15:27<30:42:00,  1.15s/it]

36674 episode score is 746.34


  4%|▍         | 3799/100000 [1:15:28<30:37:44,  1.15s/it]

36683 episode score is 740.21
36692 episode score is 730.52


  4%|▍         | 3800/100000 [1:15:30<39:04:08,  1.46s/it]

Iteration 3800: Average test reward: 718.03


  4%|▍         | 3801/100000 [1:15:31<36:49:39,  1.38s/it]

36701 episode score is 744.84


  4%|▍         | 3802/100000 [1:15:33<35:13:15,  1.32s/it]

36710 episode score is 749.25


  4%|▍         | 3803/100000 [1:15:34<33:46:00,  1.26s/it]

36719 episode score is 740.36


  4%|▍         | 3804/100000 [1:15:35<32:54:22,  1.23s/it]

36728 episode score is 754.45


  4%|▍         | 3805/100000 [1:15:36<32:04:04,  1.20s/it]

36737 episode score is 733.07


  4%|▍         | 3806/100000 [1:15:37<31:31:20,  1.18s/it]

36746 episode score is 740.80


  4%|▍         | 3807/100000 [1:15:38<31:21:30,  1.17s/it]

36755 episode score is 734.26


  4%|▍         | 3808/100000 [1:15:39<31:00:34,  1.16s/it]

36764 episode score is 727.60


  4%|▍         | 3809/100000 [1:15:41<30:37:32,  1.15s/it]

36773 episode score is 729.76


  4%|▍         | 3810/100000 [1:15:42<30:20:48,  1.14s/it]

36782 episode score is 733.04


  4%|▍         | 3811/100000 [1:15:43<30:23:24,  1.14s/it]

36791 episode score is 746.84


  4%|▍         | 3812/100000 [1:15:44<30:19:43,  1.14s/it]

36800 episode score is 741.61


  4%|▍         | 3813/100000 [1:15:45<30:17:50,  1.13s/it]

36809 episode score is 740.66


  4%|▍         | 3814/100000 [1:15:46<30:12:48,  1.13s/it]

36818 episode score is 742.98


  4%|▍         | 3815/100000 [1:15:47<30:25:00,  1.14s/it]

36827 episode score is 746.35


  4%|▍         | 3816/100000 [1:15:49<30:26:55,  1.14s/it]

36836 episode score is 743.63


  4%|▍         | 3817/100000 [1:15:50<30:28:08,  1.14s/it]

36845 episode score is 743.80


  4%|▍         | 3818/100000 [1:15:51<31:11:49,  1.17s/it]

36855 episode score is 733.73


  4%|▍         | 3819/100000 [1:15:52<31:04:43,  1.16s/it]

36864 episode score is 754.74


  4%|▍         | 3820/100000 [1:15:53<31:34:40,  1.18s/it]

36874 episode score is 731.36


  4%|▍         | 3821/100000 [1:15:54<31:15:56,  1.17s/it]

36883 episode score is 757.32


  4%|▍         | 3822/100000 [1:15:56<30:54:22,  1.16s/it]

36892 episode score is 748.93


  4%|▍         | 3823/100000 [1:15:57<31:30:53,  1.18s/it]

36901 episode score is 766.84


  4%|▍         | 3824/100000 [1:15:58<31:16:09,  1.17s/it]

36910 episode score is 763.53


  4%|▍         | 3825/100000 [1:15:59<31:04:30,  1.16s/it]

36919 episode score is 765.13


  4%|▍         | 3826/100000 [1:16:00<30:45:18,  1.15s/it]

36928 episode score is 756.43


  4%|▍         | 3827/100000 [1:16:01<31:23:04,  1.17s/it]

36938 episode score is 742.66


  4%|▍         | 3828/100000 [1:16:03<31:00:44,  1.16s/it]

36947 episode score is 756.38


  4%|▍         | 3829/100000 [1:16:04<30:37:44,  1.15s/it]

36956 episode score is 751.46


  4%|▍         | 3830/100000 [1:16:05<30:28:53,  1.14s/it]

36965 episode score is 761.55


  4%|▍         | 3831/100000 [1:16:06<30:28:07,  1.14s/it]

36974 episode score is 767.94


  4%|▍         | 3832/100000 [1:16:07<30:13:58,  1.13s/it]

36983 episode score is 745.73


  4%|▍         | 3833/100000 [1:16:08<31:05:15,  1.16s/it]

36993 episode score is 738.59


  4%|▍         | 3834/100000 [1:16:09<31:35:29,  1.18s/it]

37003 episode score is 744.74


  4%|▍         | 3835/100000 [1:16:11<31:06:36,  1.16s/it]

37012 episode score is 758.32


  4%|▍         | 3836/100000 [1:16:12<31:33:12,  1.18s/it]

37022 episode score is 743.89


  4%|▍         | 3837/100000 [1:16:13<31:53:05,  1.19s/it]

37032 episode score is 739.19


  4%|▍         | 3838/100000 [1:16:14<31:21:12,  1.17s/it]

37041 episode score is 764.28


  4%|▍         | 3839/100000 [1:16:15<31:34:18,  1.18s/it]

37051 episode score is 729.68


  4%|▍         | 3840/100000 [1:16:17<31:57:08,  1.20s/it]

37061 episode score is 744.62


  4%|▍         | 3841/100000 [1:16:18<32:17:56,  1.21s/it]

37071 episode score is 738.57


  4%|▍         | 3842/100000 [1:16:19<31:44:01,  1.19s/it]

37080 episode score is 755.37


  4%|▍         | 3843/100000 [1:16:20<31:06:46,  1.16s/it]

37089 episode score is 749.67


  4%|▍         | 3844/100000 [1:16:21<30:39:52,  1.15s/it]

37098 episode score is 748.09


  4%|▍         | 3845/100000 [1:16:22<30:40:06,  1.15s/it]

37107 episode score is 775.58


  4%|▍         | 3846/100000 [1:16:23<30:33:17,  1.14s/it]

37116 episode score is 757.91


  4%|▍         | 3847/100000 [1:16:25<30:25:06,  1.14s/it]

37125 episode score is 768.38


  4%|▍         | 3848/100000 [1:16:26<31:06:47,  1.16s/it]

37135 episode score is 747.94


  4%|▍         | 3849/100000 [1:16:27<30:56:54,  1.16s/it]

37144 episode score is 770.86


  4%|▍         | 3850/100000 [1:16:28<31:30:35,  1.18s/it]

37154 episode score is 743.10


  4%|▍         | 3851/100000 [1:16:29<30:59:24,  1.16s/it]

37163 episode score is 753.83


  4%|▍         | 3852/100000 [1:16:31<31:30:49,  1.18s/it]

37173 episode score is 746.33


  4%|▍         | 3853/100000 [1:16:32<31:46:53,  1.19s/it]

37183 episode score is 735.24


  4%|▍         | 3854/100000 [1:16:33<31:18:51,  1.17s/it]

37192 episode score is 750.43


  4%|▍         | 3855/100000 [1:16:34<31:40:00,  1.19s/it]

37202 episode score is 741.64


  4%|▍         | 3856/100000 [1:16:35<32:03:45,  1.20s/it]

37212 episode score is 742.46


  4%|▍         | 3857/100000 [1:16:36<31:19:25,  1.17s/it]

37221 episode score is 753.61


  4%|▍         | 3858/100000 [1:16:38<30:48:40,  1.15s/it]

37230 episode score is 750.99


  4%|▍         | 3859/100000 [1:16:39<30:28:58,  1.14s/it]

37239 episode score is 752.63


  4%|▍         | 3860/100000 [1:16:40<30:53:26,  1.16s/it]

37249 episode score is 729.53


  4%|▍         | 3861/100000 [1:16:41<31:09:58,  1.17s/it]

37259 episode score is 727.59


  4%|▍         | 3862/100000 [1:16:42<31:53:47,  1.19s/it]

37269 episode score is 771.67


  4%|▍         | 3863/100000 [1:16:44<31:49:21,  1.19s/it]

37278 episode score is 799.08


  4%|▍         | 3864/100000 [1:16:45<31:14:57,  1.17s/it]

37286 episode score is 839.45


  4%|▍         | 3865/100000 [1:16:46<30:48:30,  1.15s/it]

37294 episode score is 852.32


  4%|▍         | 3866/100000 [1:16:47<30:53:32,  1.16s/it]

37303 episode score is 761.52


  4%|▍         | 3867/100000 [1:16:48<31:39:25,  1.19s/it]

37312 episode score is 835.31


  4%|▍         | 3868/100000 [1:16:49<31:09:10,  1.17s/it]

37321 episode score is 751.15


  4%|▍         | 3869/100000 [1:16:51<31:36:47,  1.18s/it]

37331 episode score is 740.65


  4%|▍         | 3870/100000 [1:16:52<31:06:28,  1.16s/it]

37340 episode score is 756.91


  4%|▍         | 3871/100000 [1:16:53<31:23:43,  1.18s/it]

37350 episode score is 723.69


  4%|▍         | 3872/100000 [1:16:54<31:08:57,  1.17s/it]

37359 episode score is 777.52


  4%|▍         | 3873/100000 [1:16:55<31:02:16,  1.16s/it]

37368 episode score is 782.31


  4%|▍         | 3874/100000 [1:16:56<30:58:21,  1.16s/it]

37378 episode score is 681.46


  4%|▍         | 3875/100000 [1:16:57<30:51:25,  1.16s/it]

37386 episode score is 875.19


  4%|▍         | 3876/100000 [1:16:59<30:39:02,  1.15s/it]

37395 episode score is 756.02


  4%|▍         | 3877/100000 [1:17:00<30:30:21,  1.14s/it]

37404 episode score is 756.75


  4%|▍         | 3878/100000 [1:17:01<31:04:57,  1.16s/it]

37414 episode score is 739.11


  4%|▍         | 3879/100000 [1:17:02<31:25:20,  1.18s/it]

37424 episode score is 733.85


  4%|▍         | 3880/100000 [1:17:03<30:56:36,  1.16s/it]

37433 episode score is 751.53


  4%|▍         | 3881/100000 [1:17:04<31:11:26,  1.17s/it]

37443 episode score is 724.99


  4%|▍         | 3882/100000 [1:17:06<30:48:38,  1.15s/it]

37451 episode score is 856.28


  4%|▍         | 3883/100000 [1:17:07<30:55:00,  1.16s/it]

37459 episode score is 888.51


  4%|▍         | 3884/100000 [1:17:08<31:22:15,  1.17s/it]

37467 episode score is 852.31


  4%|▍         | 3885/100000 [1:17:09<31:35:49,  1.18s/it]

37474 episode score is 1041.81


  4%|▍         | 3886/100000 [1:17:10<31:11:54,  1.17s/it]

37483 episode score is 777.85


  4%|▍         | 3887/100000 [1:17:11<30:53:59,  1.16s/it]

37492 episode score is 768.10


  4%|▍         | 3888/100000 [1:17:13<30:44:56,  1.15s/it]

37501 episode score is 769.71


  4%|▍         | 3889/100000 [1:17:14<30:53:41,  1.16s/it]

37510 episode score is 803.13


  4%|▍         | 3890/100000 [1:17:15<30:45:49,  1.15s/it]

37519 episode score is 782.71


  4%|▍         | 3891/100000 [1:17:16<30:34:14,  1.15s/it]

37528 episode score is 770.23


  4%|▍         | 3892/100000 [1:17:17<31:19:38,  1.17s/it]

37538 episode score is 758.20


  4%|▍         | 3893/100000 [1:17:18<31:00:34,  1.16s/it]

37547 episode score is 770.13


  4%|▍         | 3894/100000 [1:17:19<30:42:27,  1.15s/it]

37556 episode score is 761.03


  4%|▍         | 3895/100000 [1:17:21<30:22:29,  1.14s/it]

37565 episode score is 761.62


  4%|▍         | 3896/100000 [1:17:22<30:16:29,  1.13s/it]

37574 episode score is 769.29


  4%|▍         | 3897/100000 [1:17:23<30:22:55,  1.14s/it]

37583 episode score is 785.39


  4%|▍         | 3898/100000 [1:17:24<30:33:01,  1.14s/it]

37592 episode score is 793.66


  4%|▍         | 3899/100000 [1:17:25<30:39:55,  1.15s/it]

37601 episode score is 792.86
37610 episode score is 805.58


  4%|▍         | 3900/100000 [1:17:28<40:22:59,  1.51s/it]

Iteration 3900: Average test reward: 804.50


  4%|▍         | 3901/100000 [1:17:29<37:45:02,  1.41s/it]

37618 episode score is 901.89


  4%|▍         | 3902/100000 [1:17:30<35:31:14,  1.33s/it]

37625 episode score is 995.60


  4%|▍         | 3903/100000 [1:17:31<34:29:18,  1.29s/it]

37635 episode score is 727.52


  4%|▍         | 3904/100000 [1:17:32<33:22:23,  1.25s/it]

37643 episode score is 860.22


  4%|▍         | 3905/100000 [1:17:33<33:08:46,  1.24s/it]

37652 episode score is 823.98


  4%|▍         | 3906/100000 [1:17:35<33:00:34,  1.24s/it]

37662 episode score is 736.55


  4%|▍         | 3907/100000 [1:17:36<32:18:56,  1.21s/it]

37672 episode score is 691.66


  4%|▍         | 3908/100000 [1:17:37<32:16:56,  1.21s/it]

37682 episode score is 740.57


  4%|▍         | 3909/100000 [1:17:38<31:58:05,  1.20s/it]

37692 episode score is 721.29


  4%|▍         | 3910/100000 [1:17:39<31:30:44,  1.18s/it]

37701 episode score is 770.06


  4%|▍         | 3911/100000 [1:17:40<31:18:44,  1.17s/it]

37711 episode score is 710.57


  4%|▍         | 3912/100000 [1:17:42<30:49:07,  1.15s/it]

37720 episode score is 757.18


  4%|▍         | 3913/100000 [1:17:43<31:06:58,  1.17s/it]

37730 episode score is 728.85


  4%|▍         | 3914/100000 [1:17:44<31:28:09,  1.18s/it]

37740 episode score is 755.58


  4%|▍         | 3915/100000 [1:17:45<31:04:54,  1.16s/it]

37749 episode score is 776.34


  4%|▍         | 3916/100000 [1:17:46<31:20:21,  1.17s/it]

37759 episode score is 736.65


  4%|▍         | 3917/100000 [1:17:48<31:50:55,  1.19s/it]

37769 episode score is 758.02


  4%|▍         | 3918/100000 [1:17:49<31:21:29,  1.17s/it]

37778 episode score is 760.65


  4%|▍         | 3919/100000 [1:17:50<30:49:23,  1.15s/it]

37787 episode score is 767.57


  4%|▍         | 3920/100000 [1:17:51<31:13:27,  1.17s/it]

37796 episode score is 832.86


  4%|▍         | 3921/100000 [1:17:52<31:33:29,  1.18s/it]

37804 episode score is 915.65


  4%|▍         | 3922/100000 [1:17:53<31:42:46,  1.19s/it]

37813 episode score is 831.07


  4%|▍         | 3923/100000 [1:17:55<31:51:25,  1.19s/it]

37823 episode score is 756.63


  4%|▍         | 3924/100000 [1:17:56<32:04:14,  1.20s/it]

37832 episode score is 835.40


  4%|▍         | 3925/100000 [1:17:57<32:09:15,  1.20s/it]

37841 episode score is 832.27


  4%|▍         | 3926/100000 [1:17:58<31:35:49,  1.18s/it]

37850 episode score is 794.52


  4%|▍         | 3927/100000 [1:17:59<31:05:51,  1.17s/it]

37858 episode score is 880.43


  4%|▍         | 3928/100000 [1:18:00<31:14:59,  1.17s/it]

37868 episode score is 737.32


  4%|▍         | 3929/100000 [1:18:02<30:59:34,  1.16s/it]

37875 episode score is 1016.55


  4%|▍         | 3930/100000 [1:18:03<31:06:24,  1.17s/it]

37884 episode score is 813.91


  4%|▍         | 3931/100000 [1:18:04<30:33:12,  1.14s/it]

37893 episode score is 760.25


  4%|▍         | 3932/100000 [1:18:05<30:13:06,  1.13s/it]

37902 episode score is 766.64


  4%|▍         | 3933/100000 [1:18:06<30:26:45,  1.14s/it]

37911 episode score is 807.93


  4%|▍         | 3934/100000 [1:18:07<31:38:27,  1.19s/it]

37921 episode score is 744.63


  4%|▍         | 3935/100000 [1:18:09<31:51:23,  1.19s/it]

37931 episode score is 755.68


  4%|▍         | 3936/100000 [1:18:10<31:23:43,  1.18s/it]

37940 episode score is 787.02


  4%|▍         | 3937/100000 [1:18:11<30:59:27,  1.16s/it]

37949 episode score is 769.38


  4%|▍         | 3938/100000 [1:18:12<31:23:51,  1.18s/it]

37958 episode score is 838.69


  4%|▍         | 3939/100000 [1:18:13<31:19:49,  1.17s/it]

37967 episode score is 805.71


  4%|▍         | 3940/100000 [1:18:14<30:50:02,  1.16s/it]

37976 episode score is 777.55


  4%|▍         | 3941/100000 [1:18:16<31:11:03,  1.17s/it]

37986 episode score is 740.35


  4%|▍         | 3942/100000 [1:18:17<31:15:59,  1.17s/it]

37995 episode score is 817.25


  4%|▍         | 3943/100000 [1:18:18<31:07:55,  1.17s/it]

38004 episode score is 792.40


  4%|▍         | 3944/100000 [1:18:19<30:44:33,  1.15s/it]

38012 episode score is 863.79


  4%|▍         | 3945/100000 [1:18:20<31:09:17,  1.17s/it]

38022 episode score is 739.73


  4%|▍         | 3946/100000 [1:18:21<30:56:05,  1.16s/it]

38031 episode score is 792.25


  4%|▍         | 3947/100000 [1:18:23<31:08:52,  1.17s/it]

38040 episode score is 823.15


  4%|▍         | 3948/100000 [1:18:24<31:32:34,  1.18s/it]

38050 episode score is 761.19


  4%|▍         | 3949/100000 [1:18:25<31:48:14,  1.19s/it]

38060 episode score is 759.47


  4%|▍         | 3950/100000 [1:18:26<31:19:01,  1.17s/it]

38069 episode score is 800.50


  4%|▍         | 3951/100000 [1:18:27<31:23:11,  1.18s/it]

38076 episode score is 1017.02


  4%|▍         | 3952/100000 [1:18:28<31:09:53,  1.17s/it]

38085 episode score is 788.85


  4%|▍         | 3953/100000 [1:18:30<32:23:11,  1.21s/it]

38095 episode score is 808.47


  4%|▍         | 3954/100000 [1:18:31<31:31:20,  1.18s/it]

38104 episode score is 764.39


  4%|▍         | 3955/100000 [1:18:32<31:30:45,  1.18s/it]

38113 episode score is 813.14


  4%|▍         | 3956/100000 [1:18:33<31:43:16,  1.19s/it]

38123 episode score is 754.52


  4%|▍         | 3957/100000 [1:18:34<31:05:58,  1.17s/it]

38133 episode score is 686.56


  4%|▍         | 3958/100000 [1:18:36<31:19:44,  1.17s/it]

38143 episode score is 745.55


  4%|▍         | 3959/100000 [1:18:37<31:22:15,  1.18s/it]

38153 episode score is 732.18


  4%|▍         | 3960/100000 [1:18:38<31:25:43,  1.18s/it]

38162 episode score is 824.88


  4%|▍         | 3961/100000 [1:18:39<31:26:48,  1.18s/it]

38168 episode score is 1189.54


  4%|▍         | 3962/100000 [1:18:40<31:24:13,  1.18s/it]

38176 episode score is 880.01


  4%|▍         | 3963/100000 [1:18:42<31:28:44,  1.18s/it]

38185 episode score is 829.13


  4%|▍         | 3964/100000 [1:18:43<31:16:18,  1.17s/it]

38194 episode score is 798.77


  4%|▍         | 3965/100000 [1:18:44<30:42:06,  1.15s/it]

38203 episode score is 770.50


  4%|▍         | 3966/100000 [1:18:45<31:18:38,  1.17s/it]

38212 episode score is 842.27


  4%|▍         | 3967/100000 [1:18:46<31:04:07,  1.16s/it]

38221 episode score is 788.95


  4%|▍         | 3968/100000 [1:18:47<31:30:48,  1.18s/it]

38231 episode score is 743.02


  4%|▍         | 3969/100000 [1:18:49<31:54:04,  1.20s/it]

38241 episode score is 753.92


  4%|▍         | 3970/100000 [1:18:50<31:52:13,  1.19s/it]

38251 episode score is 742.06


  4%|▍         | 3971/100000 [1:18:51<32:01:12,  1.20s/it]

38261 episode score is 756.32


  4%|▍         | 3972/100000 [1:18:52<32:01:20,  1.20s/it]

38271 episode score is 744.80


  4%|▍         | 3973/100000 [1:18:53<32:03:28,  1.20s/it]

38281 episode score is 744.39


  4%|▍         | 3974/100000 [1:18:55<31:56:22,  1.20s/it]

38291 episode score is 742.02


  4%|▍         | 3975/100000 [1:18:56<31:11:19,  1.17s/it]

38300 episode score is 762.93


  4%|▍         | 3976/100000 [1:18:57<31:23:08,  1.18s/it]

38310 episode score is 748.92


  4%|▍         | 3977/100000 [1:18:58<31:11:10,  1.17s/it]

38319 episode score is 796.45


  4%|▍         | 3978/100000 [1:18:59<31:07:15,  1.17s/it]

38329 episode score is 712.92


  4%|▍         | 3979/100000 [1:19:00<30:53:29,  1.16s/it]

38339 episode score is 707.62


  4%|▍         | 3980/100000 [1:19:02<31:06:42,  1.17s/it]

38349 episode score is 730.39


  4%|▍         | 3981/100000 [1:19:03<31:38:06,  1.19s/it]

38359 episode score is 758.19


  4%|▍         | 3982/100000 [1:19:04<31:48:35,  1.19s/it]

38369 episode score is 742.50


  4%|▍         | 3983/100000 [1:19:05<31:15:16,  1.17s/it]

38378 episode score is 767.29


  4%|▍         | 3984/100000 [1:19:06<31:06:38,  1.17s/it]

38388 episode score is 713.21


  4%|▍         | 3985/100000 [1:19:07<30:53:27,  1.16s/it]

38398 episode score is 697.38


  4%|▍         | 3986/100000 [1:19:09<31:00:01,  1.16s/it]

38408 episode score is 725.30


  4%|▍         | 3987/100000 [1:19:10<30:55:42,  1.16s/it]

38418 episode score is 705.93


  4%|▍         | 3988/100000 [1:19:11<30:46:03,  1.15s/it]

38428 episode score is 703.12


  4%|▍         | 3989/100000 [1:19:12<30:41:57,  1.15s/it]

38437 episode score is 787.62


  4%|▍         | 3990/100000 [1:19:13<30:42:51,  1.15s/it]

38447 episode score is 720.44


  4%|▍         | 3991/100000 [1:19:14<30:46:01,  1.15s/it]

38457 episode score is 721.78


  4%|▍         | 3992/100000 [1:19:16<31:10:48,  1.17s/it]

38467 episode score is 763.63


  4%|▍         | 3993/100000 [1:19:17<31:14:28,  1.17s/it]

38477 episode score is 730.61
38485 episode score is 1113.27


  4%|▍         | 3995/100000 [1:19:19<32:52:39,  1.23s/it]

38494 episode score is 776.43


  4%|▍         | 3996/100000 [1:19:21<32:36:46,  1.22s/it]

38504 episode score is 737.30


  4%|▍         | 3997/100000 [1:19:22<32:50:09,  1.23s/it]

38514 episode score is 711.34


  4%|▍         | 3998/100000 [1:19:23<32:33:32,  1.22s/it]

38524 episode score is 735.36


  4%|▍         | 3999/100000 [1:19:24<31:40:25,  1.19s/it]

38532 episode score is 867.70
38542 episode score is 741.55


  4%|▍         | 4000/100000 [1:19:26<40:23:58,  1.51s/it]

Iteration 4000: Average test reward: 742.61


  4%|▍         | 4001/100000 [1:19:28<37:23:56,  1.40s/it]

38552 episode score is 710.99


  4%|▍         | 4002/100000 [1:19:29<35:30:13,  1.33s/it]

38561 episode score is 811.31


  4%|▍         | 4003/100000 [1:19:30<33:58:07,  1.27s/it]

38570 episode score is 787.43


  4%|▍         | 4004/100000 [1:19:31<32:42:44,  1.23s/it]

38580 episode score is 698.57


  4%|▍         | 4005/100000 [1:19:32<32:05:56,  1.20s/it]

38590 episode score is 706.63


  4%|▍         | 4006/100000 [1:19:33<31:44:21,  1.19s/it]

38600 episode score is 705.78


  4%|▍         | 4007/100000 [1:19:34<31:10:56,  1.17s/it]

38610 episode score is 690.65


  4%|▍         | 4008/100000 [1:19:35<30:51:59,  1.16s/it]

38620 episode score is 695.53


  4%|▍         | 4009/100000 [1:19:37<30:44:08,  1.15s/it]

38630 episode score is 704.28


  4%|▍         | 4010/100000 [1:19:38<30:29:06,  1.14s/it]

38640 episode score is 686.94


  4%|▍         | 4011/100000 [1:19:39<31:02:02,  1.16s/it]

38649 episode score is 827.42


  4%|▍         | 4012/100000 [1:19:40<31:21:42,  1.18s/it]

38659 episode score is 740.67


  4%|▍         | 4013/100000 [1:19:41<31:03:13,  1.16s/it]

38669 episode score is 707.26


  4%|▍         | 4014/100000 [1:19:43<31:16:16,  1.17s/it]

38679 episode score is 731.57


  4%|▍         | 4015/100000 [1:19:44<31:06:52,  1.17s/it]

38689 episode score is 712.21


  4%|▍         | 4016/100000 [1:19:45<30:45:43,  1.15s/it]

38699 episode score is 693.57


  4%|▍         | 4017/100000 [1:19:46<30:42:21,  1.15s/it]

38709 episode score is 702.65


  4%|▍         | 4018/100000 [1:19:47<30:47:50,  1.16s/it]

38719 episode score is 711.70


  4%|▍         | 4019/100000 [1:19:48<30:45:47,  1.15s/it]

38729 episode score is 702.61


  4%|▍         | 4020/100000 [1:19:49<30:39:24,  1.15s/it]

38739 episode score is 692.94


  4%|▍         | 4021/100000 [1:19:51<30:36:04,  1.15s/it]

38749 episode score is 699.73


  4%|▍         | 4022/100000 [1:19:52<30:24:46,  1.14s/it]

38759 episode score is 690.84


  4%|▍         | 4023/100000 [1:19:53<30:27:51,  1.14s/it]

38769 episode score is 705.77


  4%|▍         | 4024/100000 [1:19:54<30:16:17,  1.14s/it]

38779 episode score is 690.87


  4%|▍         | 4025/100000 [1:19:55<30:27:35,  1.14s/it]

38789 episode score is 712.98


  4%|▍         | 4026/100000 [1:19:56<30:26:24,  1.14s/it]

38799 episode score is 706.13


  4%|▍         | 4027/100000 [1:19:57<30:38:00,  1.15s/it]

38809 episode score is 727.42


  4%|▍         | 4028/100000 [1:19:59<30:38:12,  1.15s/it]

38819 episode score is 713.58


  4%|▍         | 4029/100000 [1:20:00<30:58:01,  1.16s/it]

38829 episode score is 719.82


  4%|▍         | 4030/100000 [1:20:01<30:58:14,  1.16s/it]

38839 episode score is 723.24


  4%|▍         | 4031/100000 [1:20:02<31:14:39,  1.17s/it]

38849 episode score is 740.30


  4%|▍         | 4032/100000 [1:20:03<31:02:56,  1.16s/it]

38859 episode score is 708.88


  4%|▍         | 4033/100000 [1:20:04<30:55:49,  1.16s/it]

38869 episode score is 718.13


  4%|▍         | 4034/100000 [1:20:06<30:44:20,  1.15s/it]

38879 episode score is 702.39


  4%|▍         | 4035/100000 [1:20:07<30:20:15,  1.14s/it]

38888 episode score is 764.55


  4%|▍         | 4036/100000 [1:20:08<30:04:10,  1.13s/it]

38897 episode score is 770.79


  4%|▍         | 4037/100000 [1:20:09<30:39:12,  1.15s/it]

38907 episode score is 734.98


  4%|▍         | 4038/100000 [1:20:10<30:55:12,  1.16s/it]

38917 episode score is 737.60


  4%|▍         | 4039/100000 [1:20:11<30:58:54,  1.16s/it]

38927 episode score is 725.97


  4%|▍         | 4040/100000 [1:20:12<31:02:30,  1.16s/it]

38937 episode score is 728.85


  4%|▍         | 4041/100000 [1:20:14<31:14:24,  1.17s/it]

38947 episode score is 743.05


  4%|▍         | 4042/100000 [1:20:15<31:41:37,  1.19s/it]

38957 episode score is 764.39


  4%|▍         | 4043/100000 [1:20:16<31:21:49,  1.18s/it]

38966 episode score is 786.63


  4%|▍         | 4044/100000 [1:20:17<31:32:18,  1.18s/it]

38976 episode score is 746.01


  4%|▍         | 4045/100000 [1:20:18<31:22:16,  1.18s/it]

38986 episode score is 726.07


  4%|▍         | 4046/100000 [1:20:20<31:10:07,  1.17s/it]

38996 episode score is 717.26


  4%|▍         | 4047/100000 [1:20:21<30:50:05,  1.16s/it]

39006 episode score is 697.15


  4%|▍         | 4048/100000 [1:20:22<30:37:36,  1.15s/it]

39016 episode score is 681.05


  4%|▍         | 4049/100000 [1:20:23<30:50:59,  1.16s/it]

39026 episode score is 727.04


  4%|▍         | 4050/100000 [1:20:24<31:18:32,  1.17s/it]

39036 episode score is 750.80


  4%|▍         | 4051/100000 [1:20:25<31:30:25,  1.18s/it]

39046 episode score is 741.19


  4%|▍         | 4052/100000 [1:20:27<32:11:38,  1.21s/it]

39056 episode score is 779.50


  4%|▍         | 4053/100000 [1:20:28<31:50:30,  1.19s/it]

39066 episode score is 725.49


  4%|▍         | 4054/100000 [1:20:29<31:12:06,  1.17s/it]

39076 episode score is 692.61


  4%|▍         | 4055/100000 [1:20:30<31:07:08,  1.17s/it]

39086 episode score is 712.57


  4%|▍         | 4056/100000 [1:20:31<30:55:52,  1.16s/it]

39096 episode score is 709.58


  4%|▍         | 4057/100000 [1:20:32<31:02:20,  1.16s/it]

39106 episode score is 724.10


  4%|▍         | 4058/100000 [1:20:34<30:52:01,  1.16s/it]

39115 episode score is 786.24


  4%|▍         | 4059/100000 [1:20:35<30:52:51,  1.16s/it]

39125 episode score is 717.80


  4%|▍         | 4060/100000 [1:20:36<31:32:23,  1.18s/it]

39134 episode score is 790.73


  4%|▍         | 4061/100000 [1:20:37<31:36:40,  1.19s/it]

39145 episode score is 662.27


  4%|▍         | 4062/100000 [1:20:38<31:13:18,  1.17s/it]

39155 episode score is 701.78


  4%|▍         | 4063/100000 [1:20:39<31:12:09,  1.17s/it]

39166 episode score is 628.27


  4%|▍         | 4064/100000 [1:20:41<31:00:48,  1.16s/it]

39177 episode score is 618.19


  4%|▍         | 4065/100000 [1:20:42<30:58:58,  1.16s/it]

39188 episode score is 637.14


  4%|▍         | 4066/100000 [1:20:43<31:08:54,  1.17s/it]

39199 episode score is 643.87


  4%|▍         | 4067/100000 [1:20:44<31:09:02,  1.17s/it]

39210 episode score is 639.65


  4%|▍         | 4068/100000 [1:20:45<30:46:47,  1.16s/it]

39220 episode score is 677.36


  4%|▍         | 4069/100000 [1:20:46<30:58:39,  1.16s/it]

39231 episode score is 647.64


  4%|▍         | 4070/100000 [1:20:48<31:16:09,  1.17s/it]

39242 episode score is 657.79


  4%|▍         | 4071/100000 [1:20:49<30:46:20,  1.15s/it]

39252 episode score is 670.63


  4%|▍         | 4072/100000 [1:20:50<30:52:38,  1.16s/it]

39263 episode score is 634.75


  4%|▍         | 4073/100000 [1:20:51<30:53:11,  1.16s/it]

39274 episode score is 632.81


  4%|▍         | 4074/100000 [1:20:52<30:51:32,  1.16s/it]

39285 episode score is 624.04


  4%|▍         | 4075/100000 [1:20:53<30:56:55,  1.16s/it]

39296 episode score is 642.83


  4%|▍         | 4076/100000 [1:20:55<31:02:34,  1.17s/it]

39307 episode score is 648.35


  4%|▍         | 4077/100000 [1:20:56<30:56:57,  1.16s/it]

39318 episode score is 624.54


  4%|▍         | 4078/100000 [1:20:57<30:55:10,  1.16s/it]

39329 episode score is 633.90


  4%|▍         | 4079/100000 [1:20:58<31:02:12,  1.16s/it]

39340 episode score is 643.48


  4%|▍         | 4080/100000 [1:20:59<30:56:02,  1.16s/it]

39351 episode score is 624.36


  4%|▍         | 4081/100000 [1:21:00<30:49:39,  1.16s/it]

39362 episode score is 618.55


  4%|▍         | 4082/100000 [1:21:01<30:46:51,  1.16s/it]

39373 episode score is 618.28


  4%|▍         | 4083/100000 [1:21:03<30:45:29,  1.15s/it]

39384 episode score is 616.80


  4%|▍         | 4084/100000 [1:21:04<30:39:59,  1.15s/it]

39395 episode score is 614.28


  4%|▍         | 4085/100000 [1:21:05<30:55:01,  1.16s/it]

39406 episode score is 645.58


  4%|▍         | 4086/100000 [1:21:06<30:48:10,  1.16s/it]

39417 episode score is 625.44


  4%|▍         | 4087/100000 [1:21:07<30:45:55,  1.15s/it]

39428 episode score is 625.73


  4%|▍         | 4088/100000 [1:21:08<30:42:44,  1.15s/it]

39439 episode score is 625.43


  4%|▍         | 4089/100000 [1:21:10<30:54:28,  1.16s/it]

39450 episode score is 639.98


  4%|▍         | 4090/100000 [1:21:11<30:49:33,  1.16s/it]

39461 episode score is 623.81


  4%|▍         | 4091/100000 [1:21:12<30:59:20,  1.16s/it]

39472 episode score is 638.23


  4%|▍         | 4092/100000 [1:21:13<31:06:52,  1.17s/it]

39483 episode score is 643.30


  4%|▍         | 4093/100000 [1:21:14<31:04:42,  1.17s/it]

39494 episode score is 643.14


  4%|▍         | 4094/100000 [1:21:15<31:18:47,  1.18s/it]

39505 episode score is 652.27


  4%|▍         | 4095/100000 [1:21:17<30:47:25,  1.16s/it]

39515 episode score is 674.53


  4%|▍         | 4096/100000 [1:21:18<30:55:08,  1.16s/it]

39526 episode score is 632.75


  4%|▍         | 4097/100000 [1:21:19<31:12:28,  1.17s/it]

39537 episode score is 649.93


  4%|▍         | 4098/100000 [1:21:20<31:07:04,  1.17s/it]

39548 episode score is 636.75


  4%|▍         | 4099/100000 [1:21:21<30:56:42,  1.16s/it]

39559 episode score is 626.06
39570 episode score is 640.09


  4%|▍         | 4100/100000 [1:21:23<38:33:05,  1.45s/it]

Iteration 4100: Average test reward: 626.30


  4%|▍         | 4101/100000 [1:21:24<36:13:36,  1.36s/it]

39581 episode score is 629.90


  4%|▍         | 4102/100000 [1:21:26<34:52:40,  1.31s/it]

39592 episode score is 643.10


  4%|▍         | 4103/100000 [1:21:27<33:23:18,  1.25s/it]

39603 episode score is 607.12


  4%|▍         | 4104/100000 [1:21:28<32:35:13,  1.22s/it]

39614 episode score is 628.22


  4%|▍         | 4105/100000 [1:21:29<32:28:55,  1.22s/it]

39625 episode score is 666.94


  4%|▍         | 4106/100000 [1:21:30<32:12:04,  1.21s/it]

39636 episode score is 657.93


  4%|▍         | 4107/100000 [1:21:32<32:25:39,  1.22s/it]

39647 episode score is 669.97


  4%|▍         | 4108/100000 [1:21:33<32:25:03,  1.22s/it]

39658 episode score is 668.23


  4%|▍         | 4109/100000 [1:21:34<32:18:19,  1.21s/it]

39669 episode score is 643.47


  4%|▍         | 4110/100000 [1:21:35<32:13:01,  1.21s/it]

39680 episode score is 667.22


  4%|▍         | 4111/100000 [1:21:36<31:27:03,  1.18s/it]

39690 episode score is 674.74


  4%|▍         | 4112/100000 [1:21:37<31:01:09,  1.16s/it]

39700 episode score is 680.76


  4%|▍         | 4113/100000 [1:21:39<31:17:07,  1.17s/it]

39711 episode score is 658.89


  4%|▍         | 4114/100000 [1:21:40<30:48:41,  1.16s/it]

39721 episode score is 681.32


  4%|▍         | 4115/100000 [1:21:41<31:12:41,  1.17s/it]

39732 episode score is 661.30


  4%|▍         | 4116/100000 [1:21:42<31:19:44,  1.18s/it]

39743 episode score is 652.54


  4%|▍         | 4117/100000 [1:21:43<31:34:08,  1.19s/it]

39754 episode score is 660.40


  4%|▍         | 4118/100000 [1:21:45<31:34:21,  1.19s/it]

39765 episode score is 655.87


  4%|▍         | 4119/100000 [1:21:46<31:39:30,  1.19s/it]

39776 episode score is 659.89


  4%|▍         | 4120/100000 [1:21:47<31:49:58,  1.20s/it]

39787 episode score is 658.46


  4%|▍         | 4121/100000 [1:21:48<31:25:55,  1.18s/it]

39797 episode score is 689.62


  4%|▍         | 4122/100000 [1:21:49<30:52:39,  1.16s/it]

39807 episode score is 672.14


  4%|▍         | 4123/100000 [1:21:50<30:38:26,  1.15s/it]

39817 episode score is 692.80


  4%|▍         | 4124/100000 [1:21:52<31:00:24,  1.16s/it]

39828 episode score is 655.49


  4%|▍         | 4125/100000 [1:21:53<30:56:49,  1.16s/it]

39839 episode score is 628.98


  4%|▍         | 4126/100000 [1:21:54<32:02:58,  1.20s/it]

39850 episode score is 668.37


  4%|▍         | 4127/100000 [1:21:55<32:08:05,  1.21s/it]

39861 episode score is 667.73


  4%|▍         | 4128/100000 [1:21:56<32:08:13,  1.21s/it]

39872 episode score is 660.98


  4%|▍         | 4129/100000 [1:21:58<31:56:29,  1.20s/it]

39883 episode score is 641.00


  4%|▍         | 4130/100000 [1:21:59<31:48:01,  1.19s/it]

39894 episode score is 627.22


  4%|▍         | 4131/100000 [1:22:00<31:30:12,  1.18s/it]

39905 episode score is 622.10


  4%|▍         | 4132/100000 [1:22:01<31:06:06,  1.17s/it]

39916 episode score is 603.45


  4%|▍         | 4133/100000 [1:22:02<31:27:37,  1.18s/it]

39928 episode score is 568.61


  4%|▍         | 4134/100000 [1:22:03<31:26:53,  1.18s/it]

39939 episode score is 628.12


  4%|▍         | 4135/100000 [1:22:05<31:32:18,  1.18s/it]

39950 episode score is 632.43


  4%|▍         | 4136/100000 [1:22:06<31:37:33,  1.19s/it]

39961 episode score is 643.35


  4%|▍         | 4137/100000 [1:22:07<31:36:54,  1.19s/it]

39972 episode score is 644.45


  4%|▍         | 4138/100000 [1:22:08<31:17:24,  1.18s/it]

39983 episode score is 621.63


  4%|▍         | 4139/100000 [1:22:09<31:18:42,  1.18s/it]

39994 episode score is 637.27


  4%|▍         | 4140/100000 [1:22:11<31:13:03,  1.17s/it]

40005 episode score is 632.74


  4%|▍         | 4141/100000 [1:22:12<31:01:47,  1.17s/it]

40016 episode score is 613.99


  4%|▍         | 4142/100000 [1:22:13<30:54:49,  1.16s/it]

40027 episode score is 618.81


  4%|▍         | 4143/100000 [1:22:14<31:08:41,  1.17s/it]

40038 episode score is 651.51


  4%|▍         | 4144/100000 [1:22:15<30:43:02,  1.15s/it]

40048 episode score is 673.36


  4%|▍         | 4145/100000 [1:22:16<30:20:16,  1.14s/it]

40058 episode score is 663.96


  4%|▍         | 4146/100000 [1:22:17<31:01:56,  1.17s/it]

40069 episode score is 663.46


  4%|▍         | 4147/100000 [1:22:19<31:25:45,  1.18s/it]

40080 episode score is 661.55


  4%|▍         | 4148/100000 [1:22:20<31:05:54,  1.17s/it]

40091 episode score is 605.35


  4%|▍         | 4149/100000 [1:22:21<31:23:40,  1.18s/it]

40102 episode score is 665.37


  4%|▍         | 4150/100000 [1:22:22<31:35:03,  1.19s/it]

40113 episode score is 661.82


  4%|▍         | 4151/100000 [1:22:23<31:08:38,  1.17s/it]

40123 episode score is 690.47


  4%|▍         | 4152/100000 [1:22:24<30:40:35,  1.15s/it]

40133 episode score is 678.08


  4%|▍         | 4153/100000 [1:22:26<30:19:13,  1.14s/it]

40143 episode score is 670.38


  4%|▍         | 4154/100000 [1:22:27<30:35:01,  1.15s/it]

40154 episode score is 629.18


  4%|▍         | 4155/100000 [1:22:28<31:05:26,  1.17s/it]

40165 episode score is 662.76


  4%|▍         | 4156/100000 [1:22:29<31:16:15,  1.17s/it]

40176 episode score is 657.06


  4%|▍         | 4157/100000 [1:22:30<31:23:16,  1.18s/it]

40187 episode score is 651.14


  4%|▍         | 4158/100000 [1:22:32<31:42:34,  1.19s/it]

40198 episode score is 669.10


  4%|▍         | 4159/100000 [1:22:33<31:12:05,  1.17s/it]

40208 episode score is 686.81


  4%|▍         | 4160/100000 [1:22:34<30:47:59,  1.16s/it]

40218 episode score is 682.52


  4%|▍         | 4161/100000 [1:22:35<31:11:48,  1.17s/it]

40229 episode score is 676.70


  4%|▍         | 4162/100000 [1:22:36<31:27:09,  1.18s/it]

40240 episode score is 673.18


  4%|▍         | 4163/100000 [1:22:37<31:30:49,  1.18s/it]

40251 episode score is 665.54


  4%|▍         | 4164/100000 [1:22:39<31:30:34,  1.18s/it]

40262 episode score is 651.45


  4%|▍         | 4165/100000 [1:22:40<30:59:44,  1.16s/it]

40273 episode score is 591.20


  4%|▍         | 4166/100000 [1:22:41<30:50:56,  1.16s/it]

40284 episode score is 622.35


  4%|▍         | 4167/100000 [1:22:42<30:59:33,  1.16s/it]

40295 episode score is 641.01


  4%|▍         | 4168/100000 [1:22:43<30:38:06,  1.15s/it]

40305 episode score is 679.18


  4%|▍         | 4169/100000 [1:22:44<30:52:39,  1.16s/it]

40316 episode score is 636.16


  4%|▍         | 4170/100000 [1:22:46<30:58:43,  1.16s/it]

40327 episode score is 639.55


  4%|▍         | 4171/100000 [1:22:47<31:06:11,  1.17s/it]

40338 episode score is 636.24


  4%|▍         | 4172/100000 [1:22:48<30:48:54,  1.16s/it]

40348 episode score is 680.26


  4%|▍         | 4173/100000 [1:22:49<30:29:16,  1.15s/it]

40358 episode score is 680.56


  4%|▍         | 4174/100000 [1:22:50<30:22:56,  1.14s/it]

40368 episode score is 691.98


  4%|▍         | 4175/100000 [1:22:51<30:29:25,  1.15s/it]

40378 episode score is 694.26


  4%|▍         | 4176/100000 [1:22:52<30:28:06,  1.14s/it]

40388 episode score is 695.02


  4%|▍         | 4177/100000 [1:22:54<30:32:12,  1.15s/it]

40398 episode score is 706.79


  4%|▍         | 4178/100000 [1:22:55<30:52:06,  1.16s/it]

40408 episode score is 735.43


  4%|▍         | 4179/100000 [1:22:56<30:55:13,  1.16s/it]

40418 episode score is 720.32


  4%|▍         | 4180/100000 [1:22:57<30:51:34,  1.16s/it]

40428 episode score is 709.16


  4%|▍         | 4181/100000 [1:22:58<30:41:05,  1.15s/it]

40438 episode score is 696.89


  4%|▍         | 4182/100000 [1:22:59<30:42:19,  1.15s/it]

40448 episode score is 705.05


  4%|▍         | 4183/100000 [1:23:01<30:59:53,  1.16s/it]

40458 episode score is 723.66


  4%|▍         | 4184/100000 [1:23:02<31:04:47,  1.17s/it]

40468 episode score is 726.71


  4%|▍         | 4185/100000 [1:23:03<31:08:16,  1.17s/it]

40478 episode score is 716.52


  4%|▍         | 4186/100000 [1:23:04<31:00:07,  1.16s/it]

40488 episode score is 706.85


  4%|▍         | 4187/100000 [1:23:05<30:54:54,  1.16s/it]

40498 episode score is 711.80


  4%|▍         | 4188/100000 [1:23:06<30:43:00,  1.15s/it]

40508 episode score is 700.95


  4%|▍         | 4189/100000 [1:23:07<30:37:26,  1.15s/it]

40518 episode score is 707.05


  4%|▍         | 4190/100000 [1:23:09<30:40:35,  1.15s/it]

40528 episode score is 714.08


  4%|▍         | 4191/100000 [1:23:10<31:32:15,  1.19s/it]

40538 episode score is 725.38


  4%|▍         | 4192/100000 [1:23:11<31:10:23,  1.17s/it]

40548 episode score is 703.05


  4%|▍         | 4193/100000 [1:23:12<30:57:20,  1.16s/it]

40558 episode score is 702.24


  4%|▍         | 4194/100000 [1:23:13<31:05:54,  1.17s/it]

40568 episode score is 730.25


  4%|▍         | 4195/100000 [1:23:14<30:45:50,  1.16s/it]

40578 episode score is 697.86


  4%|▍         | 4196/100000 [1:23:16<30:32:24,  1.15s/it]

40588 episode score is 697.35


  4%|▍         | 4197/100000 [1:23:17<30:20:44,  1.14s/it]

40598 episode score is 678.82


  4%|▍         | 4198/100000 [1:23:18<30:12:28,  1.14s/it]

40608 episode score is 683.27


  4%|▍         | 4199/100000 [1:23:19<30:00:44,  1.13s/it]

40618 episode score is 682.83
40628 episode score is 696.97


  4%|▍         | 4200/100000 [1:23:21<37:49:35,  1.42s/it]

Iteration 4200: Average test reward: 684.11


  4%|▍         | 4201/100000 [1:23:22<36:15:20,  1.36s/it]

40639 episode score is 679.15


  4%|▍         | 4202/100000 [1:23:23<35:05:30,  1.32s/it]

40650 episode score is 687.77


  4%|▍         | 4203/100000 [1:23:25<33:22:51,  1.25s/it]

40660 episode score is 685.89


  4%|▍         | 4204/100000 [1:23:26<32:15:32,  1.21s/it]

40670 episode score is 691.44


  4%|▍         | 4205/100000 [1:23:27<31:29:09,  1.18s/it]

40680 episode score is 693.47


  4%|▍         | 4206/100000 [1:23:28<31:43:37,  1.19s/it]

40691 episode score is 682.29


  4%|▍         | 4207/100000 [1:23:29<31:53:19,  1.20s/it]

40702 episode score is 673.35


  4%|▍         | 4208/100000 [1:23:30<31:54:57,  1.20s/it]

40713 episode score is 669.09


  4%|▍         | 4209/100000 [1:23:32<31:48:20,  1.20s/it]

40724 episode score is 654.99


  4%|▍         | 4210/100000 [1:23:33<31:48:51,  1.20s/it]

40735 episode score is 662.41


  4%|▍         | 4211/100000 [1:23:34<31:49:48,  1.20s/it]

40746 episode score is 657.90


  4%|▍         | 4212/100000 [1:23:35<31:39:56,  1.19s/it]

40757 episode score is 648.86


  4%|▍         | 4213/100000 [1:23:36<31:25:32,  1.18s/it]

40768 episode score is 644.46


  4%|▍         | 4214/100000 [1:23:38<31:22:01,  1.18s/it]

40779 episode score is 654.09


  4%|▍         | 4215/100000 [1:23:39<30:56:43,  1.16s/it]

40790 episode score is 616.14


  4%|▍         | 4216/100000 [1:23:40<30:49:44,  1.16s/it]

40801 episode score is 633.17


  4%|▍         | 4217/100000 [1:23:41<30:53:39,  1.16s/it]

40812 episode score is 652.33


  4%|▍         | 4218/100000 [1:23:42<31:00:19,  1.17s/it]

40823 episode score is 655.53


  4%|▍         | 4219/100000 [1:23:43<30:51:03,  1.16s/it]

40834 episode score is 632.91


  4%|▍         | 4220/100000 [1:23:44<31:01:45,  1.17s/it]

40845 episode score is 656.44


  4%|▍         | 4221/100000 [1:23:46<31:15:35,  1.17s/it]

40856 episode score is 672.97


  4%|▍         | 4222/100000 [1:23:47<30:42:31,  1.15s/it]

40866 episode score is 685.00


  4%|▍         | 4223/100000 [1:23:48<31:08:46,  1.17s/it]

40877 episode score is 674.76


  4%|▍         | 4224/100000 [1:23:49<31:25:58,  1.18s/it]

40888 episode score is 669.07


  4%|▍         | 4225/100000 [1:23:50<31:24:44,  1.18s/it]

40899 episode score is 655.47


  4%|▍         | 4226/100000 [1:23:52<31:24:05,  1.18s/it]

40910 episode score is 652.89


  4%|▍         | 4227/100000 [1:23:53<31:34:40,  1.19s/it]

40921 episode score is 670.01


  4%|▍         | 4228/100000 [1:23:54<31:42:30,  1.19s/it]

40932 episode score is 676.98


  4%|▍         | 4229/100000 [1:23:55<31:42:46,  1.19s/it]

40943 episode score is 660.76


  4%|▍         | 4230/100000 [1:23:56<31:37:14,  1.19s/it]

40954 episode score is 655.54


  4%|▍         | 4231/100000 [1:23:57<30:56:44,  1.16s/it]

40964 episode score is 682.81


  4%|▍         | 4232/100000 [1:23:59<30:54:38,  1.16s/it]

40975 episode score is 643.78


  4%|▍         | 4233/100000 [1:24:00<30:54:51,  1.16s/it]

40986 episode score is 640.40


  4%|▍         | 4234/100000 [1:24:01<30:49:08,  1.16s/it]

40997 episode score is 638.84


  4%|▍         | 4235/100000 [1:24:02<30:59:57,  1.17s/it]

41008 episode score is 650.44


  4%|▍         | 4236/100000 [1:24:03<31:00:01,  1.17s/it]

41019 episode score is 649.31


  4%|▍         | 4237/100000 [1:24:04<31:02:42,  1.17s/it]

41030 episode score is 651.39


  4%|▍         | 4238/100000 [1:24:06<31:04:29,  1.17s/it]

41041 episode score is 642.56


  4%|▍         | 4239/100000 [1:24:07<30:33:08,  1.15s/it]

41051 episode score is 684.71


  4%|▍         | 4240/100000 [1:24:08<31:00:47,  1.17s/it]

41062 episode score is 674.44


  4%|▍         | 4241/100000 [1:24:09<30:37:11,  1.15s/it]

41072 episode score is 691.97


  4%|▍         | 4242/100000 [1:24:10<30:31:41,  1.15s/it]

41082 episode score is 710.01


  4%|▍         | 4243/100000 [1:24:11<30:56:34,  1.16s/it]

41093 episode score is 673.60


  4%|▍         | 4244/100000 [1:24:12<30:34:20,  1.15s/it]

41103 episode score is 684.04


  4%|▍         | 4245/100000 [1:24:14<30:24:34,  1.14s/it]

41113 episode score is 700.29


  4%|▍         | 4246/100000 [1:24:15<30:12:39,  1.14s/it]

41123 episode score is 694.50


  4%|▍         | 4247/100000 [1:24:16<29:57:47,  1.13s/it]

41133 episode score is 681.78


  4%|▍         | 4248/100000 [1:24:17<30:54:34,  1.16s/it]

41143 episode score is 702.04


  4%|▍         | 4249/100000 [1:24:18<31:22:54,  1.18s/it]

41154 episode score is 683.96


  4%|▍         | 4250/100000 [1:24:19<31:01:56,  1.17s/it]

41164 episode score is 698.34


  4%|▍         | 4251/100000 [1:24:21<31:01:35,  1.17s/it]

41174 episode score is 717.87


  4%|▍         | 4252/100000 [1:24:22<30:41:28,  1.15s/it]

41184 episode score is 687.07


  4%|▍         | 4253/100000 [1:24:23<30:51:11,  1.16s/it]

41195 episode score is 633.26


  4%|▍         | 4254/100000 [1:24:24<30:45:49,  1.16s/it]

41206 episode score is 616.15


  4%|▍         | 4255/100000 [1:24:25<30:58:21,  1.16s/it]

41218 episode score is 571.88


  4%|▍         | 4256/100000 [1:24:26<30:54:07,  1.16s/it]

41229 episode score is 621.76


  4%|▍         | 4257/100000 [1:24:28<31:00:58,  1.17s/it]

41241 episode score is 563.53


  4%|▍         | 4258/100000 [1:24:29<30:52:14,  1.16s/it]

41252 episode score is 616.52


  4%|▍         | 4259/100000 [1:24:30<30:52:38,  1.16s/it]

41263 episode score is 608.31


  4%|▍         | 4260/100000 [1:24:31<31:06:21,  1.17s/it]

41274 episode score is 641.83


  4%|▍         | 4261/100000 [1:24:32<30:58:11,  1.16s/it]

41285 episode score is 606.45


  4%|▍         | 4262/100000 [1:24:33<30:28:06,  1.15s/it]

41295 episode score is 669.94


  4%|▍         | 4263/100000 [1:24:34<30:14:46,  1.14s/it]

41305 episode score is 685.05


  4%|▍         | 4264/100000 [1:24:36<30:03:40,  1.13s/it]

41315 episode score is 679.77


  4%|▍         | 4265/100000 [1:24:37<29:59:35,  1.13s/it]

41325 episode score is 679.88


  4%|▍         | 4266/100000 [1:24:38<30:23:31,  1.14s/it]

41336 episode score is 635.58


  4%|▍         | 4267/100000 [1:24:39<30:48:27,  1.16s/it]

41346 episode score is 738.28


  4%|▍         | 4268/100000 [1:24:40<30:45:36,  1.16s/it]

41356 episode score is 713.61


  4%|▍         | 4269/100000 [1:24:41<31:02:33,  1.17s/it]

41367 episode score is 662.02


  4%|▍         | 4270/100000 [1:24:43<30:35:58,  1.15s/it]

41377 episode score is 678.27


  4%|▍         | 4271/100000 [1:24:44<30:22:10,  1.14s/it]

41387 episode score is 694.50


  4%|▍         | 4272/100000 [1:24:45<30:06:15,  1.13s/it]

41397 episode score is 686.68


  4%|▍         | 4273/100000 [1:24:46<30:43:17,  1.16s/it]

41408 episode score is 675.69


  4%|▍         | 4274/100000 [1:24:47<30:53:01,  1.16s/it]

41419 episode score is 650.30


  4%|▍         | 4275/100000 [1:24:48<31:16:50,  1.18s/it]

41430 episode score is 682.65


  4%|▍         | 4276/100000 [1:24:49<30:46:53,  1.16s/it]

41440 episode score is 690.80


  4%|▍         | 4277/100000 [1:24:51<31:00:59,  1.17s/it]

41451 episode score is 664.21


  4%|▍         | 4278/100000 [1:24:52<31:23:34,  1.18s/it]

41462 episode score is 677.89


  4%|▍         | 4279/100000 [1:24:53<31:14:13,  1.17s/it]

41473 episode score is 636.79


  4%|▍         | 4280/100000 [1:24:54<30:47:25,  1.16s/it]

41483 episode score is 694.03


  4%|▍         | 4281/100000 [1:24:55<30:34:41,  1.15s/it]

41493 episode score is 694.36


  4%|▍         | 4282/100000 [1:24:56<30:18:02,  1.14s/it]

41503 episode score is 683.42


  4%|▍         | 4283/100000 [1:24:58<30:43:20,  1.16s/it]

41514 episode score is 667.63


  4%|▍         | 4284/100000 [1:24:59<31:00:57,  1.17s/it]

41525 episode score is 669.49


  4%|▍         | 4285/100000 [1:25:00<31:00:39,  1.17s/it]

41536 episode score is 646.98


  4%|▍         | 4286/100000 [1:25:01<30:33:43,  1.15s/it]

41546 episode score is 686.87


  4%|▍         | 4287/100000 [1:25:02<30:59:48,  1.17s/it]

41557 episode score is 664.90


  4%|▍         | 4288/100000 [1:25:03<30:32:28,  1.15s/it]

41567 episode score is 690.51


  4%|▍         | 4289/100000 [1:25:05<30:34:28,  1.15s/it]

41578 episode score is 642.58


  4%|▍         | 4290/100000 [1:25:06<30:55:45,  1.16s/it]

41589 episode score is 658.80


  4%|▍         | 4291/100000 [1:25:07<31:11:56,  1.17s/it]

41600 episode score is 671.72


  4%|▍         | 4292/100000 [1:25:08<31:24:25,  1.18s/it]

41611 episode score is 668.95


  4%|▍         | 4293/100000 [1:25:09<31:24:48,  1.18s/it]

41622 episode score is 662.00


  4%|▍         | 4294/100000 [1:25:10<31:25:39,  1.18s/it]

41633 episode score is 664.31


  4%|▍         | 4295/100000 [1:25:12<31:19:15,  1.18s/it]

41644 episode score is 647.94


  4%|▍         | 4296/100000 [1:25:13<31:37:15,  1.19s/it]

41655 episode score is 676.24


  4%|▍         | 4297/100000 [1:25:14<31:39:37,  1.19s/it]

41666 episode score is 663.66


  4%|▍         | 4298/100000 [1:25:15<30:58:10,  1.16s/it]

41676 episode score is 681.36


  4%|▍         | 4299/100000 [1:25:16<30:53:35,  1.16s/it]

41687 episode score is 638.81
41698 episode score is 656.64


  4%|▍         | 4300/100000 [1:25:18<38:50:17,  1.46s/it]

Iteration 4300: Average test reward: 682.91


  4%|▍         | 4301/100000 [1:25:20<36:04:37,  1.36s/it]

41708 episode score is 686.01


  4%|▍         | 4302/100000 [1:25:21<34:07:39,  1.28s/it]

41718 episode score is 686.64


  4%|▍         | 4303/100000 [1:25:22<32:52:13,  1.24s/it]

41728 episode score is 696.86


  4%|▍         | 4304/100000 [1:25:23<32:34:54,  1.23s/it]

41739 episode score is 671.10


  4%|▍         | 4305/100000 [1:25:24<32:09:58,  1.21s/it]

41750 episode score is 653.80


  4%|▍         | 4306/100000 [1:25:25<32:07:09,  1.21s/it]

41761 episode score is 684.36


  4%|▍         | 4307/100000 [1:25:27<31:56:58,  1.20s/it]

41772 episode score is 663.15


  4%|▍         | 4308/100000 [1:25:28<31:57:50,  1.20s/it]

41783 episode score is 680.64


  4%|▍         | 4309/100000 [1:25:29<31:55:59,  1.20s/it]

41794 episode score is 672.42


  4%|▍         | 4310/100000 [1:25:30<31:35:47,  1.19s/it]

41805 episode score is 642.98


  4%|▍         | 4311/100000 [1:25:31<31:41:47,  1.19s/it]

41816 episode score is 676.36


  4%|▍         | 4312/100000 [1:25:33<31:43:44,  1.19s/it]

41827 episode score is 647.33


  4%|▍         | 4313/100000 [1:25:34<31:38:39,  1.19s/it]

41838 episode score is 657.23


  4%|▍         | 4314/100000 [1:25:35<31:35:11,  1.19s/it]

41849 episode score is 641.94


  4%|▍         | 4315/100000 [1:25:36<31:50:54,  1.20s/it]

41860 episode score is 607.43


  4%|▍         | 4316/100000 [1:25:37<31:26:53,  1.18s/it]

41871 episode score is 634.29


  4%|▍         | 4317/100000 [1:25:38<31:29:16,  1.18s/it]

41883 episode score is 594.26


  4%|▍         | 4318/100000 [1:25:40<30:49:50,  1.16s/it]

41894 episode score is 599.05


  4%|▍         | 4319/100000 [1:25:41<31:11:40,  1.17s/it]

41906 episode score is 602.83
41917 episode score is 608.31


  4%|▍         | 4320/100000 [1:25:42<32:11:46,  1.21s/it]

41928 episode score is 604.59


  4%|▍         | 4321/100000 [1:25:43<33:24:42,  1.26s/it]

41939 episode score is 618.63


  4%|▍         | 4322/100000 [1:25:45<34:03:16,  1.28s/it]

41950 episode score is 667.37


  4%|▍         | 4323/100000 [1:25:46<35:00:26,  1.32s/it]

41960 episode score is 717.97


  4%|▍         | 4324/100000 [1:25:48<35:34:51,  1.34s/it]

41971 episode score is 648.19


  4%|▍         | 4325/100000 [1:25:49<36:02:54,  1.36s/it]

41982 episode score is 670.28


  4%|▍         | 4326/100000 [1:25:50<36:29:22,  1.37s/it]

41992 episode score is 685.28


  4%|▍         | 4327/100000 [1:25:52<35:55:20,  1.35s/it]

42003 episode score is 671.56


  4%|▍         | 4328/100000 [1:25:53<36:24:05,  1.37s/it]

42014 episode score is 626.86


  4%|▍         | 4329/100000 [1:25:54<36:11:26,  1.36s/it]

42025 episode score is 649.26


  4%|▍         | 4330/100000 [1:25:56<36:16:50,  1.37s/it]

42036 episode score is 634.20


  4%|▍         | 4331/100000 [1:25:57<36:10:11,  1.36s/it]

42047 episode score is 643.49


  4%|▍         | 4332/100000 [1:25:59<36:14:02,  1.36s/it]

42058 episode score is 630.26


  4%|▍         | 4333/100000 [1:26:00<36:11:16,  1.36s/it]

42069 episode score is 640.98


  4%|▍         | 4334/100000 [1:26:01<36:09:57,  1.36s/it]

42080 episode score is 615.37


  4%|▍         | 4335/100000 [1:26:03<36:00:13,  1.35s/it]

42091 episode score is 624.80


  4%|▍         | 4336/100000 [1:26:04<35:54:19,  1.35s/it]

42102 episode score is 602.37


  4%|▍         | 4337/100000 [1:26:05<35:36:53,  1.34s/it]

42113 episode score is 603.55


  4%|▍         | 4338/100000 [1:26:07<35:24:50,  1.33s/it]

42124 episode score is 635.49


  4%|▍         | 4339/100000 [1:26:08<35:44:36,  1.35s/it]

42135 episode score is 628.47


  4%|▍         | 4340/100000 [1:26:09<35:49:18,  1.35s/it]

42146 episode score is 635.66


  4%|▍         | 4341/100000 [1:26:11<35:56:18,  1.35s/it]

42157 episode score is 634.72


  4%|▍         | 4342/100000 [1:26:12<36:07:43,  1.36s/it]

42168 episode score is 626.62


  4%|▍         | 4343/100000 [1:26:13<36:06:02,  1.36s/it]

42179 episode score is 649.70


  4%|▍         | 4344/100000 [1:26:15<36:14:13,  1.36s/it]

42191 episode score is 563.83


  4%|▍         | 4345/100000 [1:26:16<36:14:37,  1.36s/it]

42202 episode score is 607.22


  4%|▍         | 4346/100000 [1:26:17<35:59:03,  1.35s/it]

42213 episode score is 624.32


  4%|▍         | 4347/100000 [1:26:19<35:54:34,  1.35s/it]

42224 episode score is 610.93


  4%|▍         | 4348/100000 [1:26:20<35:40:04,  1.34s/it]

42235 episode score is 598.38


  4%|▍         | 4349/100000 [1:26:21<35:25:30,  1.33s/it]

42247 episode score is 590.89


  4%|▍         | 4350/100000 [1:26:23<36:06:53,  1.36s/it]

42258 episode score is 595.25


  4%|▍         | 4351/100000 [1:26:24<35:43:39,  1.34s/it]

42269 episode score is 621.64


  4%|▍         | 4352/100000 [1:26:26<35:44:12,  1.35s/it]

42281 episode score is 595.57


  4%|▍         | 4353/100000 [1:26:27<36:15:39,  1.36s/it]

42292 episode score is 601.67


  4%|▍         | 4354/100000 [1:26:28<35:45:46,  1.35s/it]

42303 episode score is 608.11


  4%|▍         | 4355/100000 [1:26:30<35:30:48,  1.34s/it]

42314 episode score is 597.25


  4%|▍         | 4356/100000 [1:26:31<35:20:30,  1.33s/it]

42325 episode score is 638.59


  4%|▍         | 4357/100000 [1:26:32<35:38:43,  1.34s/it]

42336 episode score is 634.98


  4%|▍         | 4358/100000 [1:26:34<35:44:58,  1.35s/it]

42347 episode score is 656.50


  4%|▍         | 4359/100000 [1:26:35<36:09:33,  1.36s/it]

42358 episode score is 663.30


  4%|▍         | 4360/100000 [1:26:36<36:30:28,  1.37s/it]

42369 episode score is 618.48


  4%|▍         | 4361/100000 [1:26:38<36:11:41,  1.36s/it]

42380 episode score is 668.01


  4%|▍         | 4362/100000 [1:26:39<36:28:55,  1.37s/it]

42391 episode score is 662.93


  4%|▍         | 4363/100000 [1:26:41<36:43:36,  1.38s/it]

42402 episode score is 668.97


  4%|▍         | 4364/100000 [1:26:42<36:55:35,  1.39s/it]

42413 episode score is 645.62


  4%|▍         | 4365/100000 [1:26:43<36:58:06,  1.39s/it]

42424 episode score is 658.36


  4%|▍         | 4366/100000 [1:26:45<36:55:12,  1.39s/it]

42434 episode score is 680.49


  4%|▍         | 4367/100000 [1:26:46<36:58:58,  1.39s/it]

42444 episode score is 682.71


  4%|▍         | 4368/100000 [1:26:47<36:23:48,  1.37s/it]

42455 episode score is 686.97


  4%|▍         | 4369/100000 [1:26:49<37:07:40,  1.40s/it]

42465 episode score is 696.14


  4%|▍         | 4370/100000 [1:26:50<36:38:23,  1.38s/it]

42475 episode score is 692.19


  4%|▍         | 4371/100000 [1:26:52<36:05:48,  1.36s/it]

42485 episode score is 684.14


  4%|▍         | 4372/100000 [1:26:53<35:45:56,  1.35s/it]

42495 episode score is 717.57


  4%|▍         | 4373/100000 [1:26:54<35:52:08,  1.35s/it]

42505 episode score is 702.06


  4%|▍         | 4374/100000 [1:26:56<35:47:05,  1.35s/it]

42515 episode score is 734.26


  4%|▍         | 4375/100000 [1:26:57<36:05:37,  1.36s/it]

42525 episode score is 710.18


  4%|▍         | 4376/100000 [1:26:58<36:03:23,  1.36s/it]

42535 episode score is 738.61


  4%|▍         | 4377/100000 [1:27:00<36:32:49,  1.38s/it]

42545 episode score is 729.61


  4%|▍         | 4378/100000 [1:27:01<36:43:28,  1.38s/it]

42555 episode score is 719.89


  4%|▍         | 4379/100000 [1:27:02<36:41:26,  1.38s/it]

42565 episode score is 695.03


  4%|▍         | 4380/100000 [1:27:04<36:22:04,  1.37s/it]

42575 episode score is 740.04


  4%|▍         | 4381/100000 [1:27:05<36:39:05,  1.38s/it]

42585 episode score is 697.39


  4%|▍         | 4382/100000 [1:27:07<36:16:28,  1.37s/it]

42595 episode score is 712.48


  4%|▍         | 4383/100000 [1:27:08<36:07:08,  1.36s/it]

42605 episode score is 704.25


  4%|▍         | 4384/100000 [1:27:09<36:00:08,  1.36s/it]

42615 episode score is 712.07


  4%|▍         | 4385/100000 [1:27:11<36:06:35,  1.36s/it]

42625 episode score is 724.80


  4%|▍         | 4386/100000 [1:27:12<36:12:09,  1.36s/it]

42635 episode score is 725.21


  4%|▍         | 4387/100000 [1:27:13<36:19:15,  1.37s/it]

42645 episode score is 697.48


  4%|▍         | 4388/100000 [1:27:15<36:00:46,  1.36s/it]

42655 episode score is 724.19


  4%|▍         | 4389/100000 [1:27:16<36:15:29,  1.37s/it]

42665 episode score is 724.87


  4%|▍         | 4390/100000 [1:27:17<36:22:07,  1.37s/it]

42675 episode score is 711.66


  4%|▍         | 4391/100000 [1:27:19<36:13:53,  1.36s/it]

42685 episode score is 711.70


  4%|▍         | 4392/100000 [1:27:20<36:08:50,  1.36s/it]

42695 episode score is 713.12


  4%|▍         | 4393/100000 [1:27:22<36:02:43,  1.36s/it]

42704 episode score is 768.30


  4%|▍         | 4394/100000 [1:27:23<35:37:28,  1.34s/it]

42714 episode score is 710.58


  4%|▍         | 4395/100000 [1:27:24<35:40:41,  1.34s/it]

42724 episode score is 732.71


  4%|▍         | 4396/100000 [1:27:26<36:10:43,  1.36s/it]

42734 episode score is 675.85


  4%|▍         | 4397/100000 [1:27:27<35:45:35,  1.35s/it]

42745 episode score is 660.01


  4%|▍         | 4398/100000 [1:27:28<36:19:07,  1.37s/it]

42755 episode score is 699.98


  4%|▍         | 4399/100000 [1:27:30<36:04:44,  1.36s/it]

42765 episode score is 680.21
Iteration 4400: Average test reward: 648.13


  4%|▍         | 4400/100000 [1:27:32<44:22:13,  1.67s/it]

42775 episode score is 672.35


  4%|▍         | 4401/100000 [1:27:33<41:28:29,  1.56s/it]

42785 episode score is 678.63


  4%|▍         | 4402/100000 [1:27:35<39:29:11,  1.49s/it]

42795 episode score is 718.92


  4%|▍         | 4403/100000 [1:27:36<38:42:20,  1.46s/it]

42805 episode score is 703.05


  4%|▍         | 4404/100000 [1:27:37<37:43:58,  1.42s/it]

42815 episode score is 700.00


  4%|▍         | 4405/100000 [1:27:39<37:07:02,  1.40s/it]

42825 episode score is 717.95


  4%|▍         | 4406/100000 [1:27:40<36:51:56,  1.39s/it]

42835 episode score is 709.25


  4%|▍         | 4407/100000 [1:27:41<36:41:29,  1.38s/it]

42845 episode score is 723.37


  4%|▍         | 4408/100000 [1:27:43<36:40:11,  1.38s/it]

42855 episode score is 706.23


  4%|▍         | 4409/100000 [1:27:44<36:28:06,  1.37s/it]

42865 episode score is 708.71


  4%|▍         | 4410/100000 [1:27:46<36:19:23,  1.37s/it]

42875 episode score is 732.90


  4%|▍         | 4411/100000 [1:27:47<36:27:43,  1.37s/it]

42885 episode score is 719.66


  4%|▍         | 4412/100000 [1:27:48<36:34:57,  1.38s/it]

42895 episode score is 690.37


  4%|▍         | 4413/100000 [1:27:50<36:18:03,  1.37s/it]

42905 episode score is 703.24


  4%|▍         | 4414/100000 [1:27:51<36:04:21,  1.36s/it]

42915 episode score is 715.83


  4%|▍         | 4415/100000 [1:27:52<36:04:16,  1.36s/it]

42925 episode score is 686.97


  4%|▍         | 4416/100000 [1:27:54<35:47:39,  1.35s/it]

42935 episode score is 678.56


  4%|▍         | 4417/100000 [1:27:55<35:32:48,  1.34s/it]

42945 episode score is 698.80


  4%|▍         | 4418/100000 [1:27:56<35:28:19,  1.34s/it]

42955 episode score is 689.35


  4%|▍         | 4419/100000 [1:27:58<35:20:03,  1.33s/it]

42965 episode score is 716.27


  4%|▍         | 4420/100000 [1:27:59<35:30:58,  1.34s/it]

42975 episode score is 706.38


  4%|▍         | 4421/100000 [1:28:00<35:34:37,  1.34s/it]

42985 episode score is 701.60


  4%|▍         | 4422/100000 [1:28:02<35:39:54,  1.34s/it]

42995 episode score is 716.84


  4%|▍         | 4423/100000 [1:28:03<35:56:09,  1.35s/it]

43005 episode score is 724.31


  4%|▍         | 4424/100000 [1:28:04<36:05:14,  1.36s/it]

43015 episode score is 695.47


  4%|▍         | 4425/100000 [1:28:06<35:51:50,  1.35s/it]

43025 episode score is 709.05


  4%|▍         | 4426/100000 [1:28:07<35:53:30,  1.35s/it]

43035 episode score is 696.54


  4%|▍         | 4427/100000 [1:28:08<35:41:37,  1.34s/it]

43045 episode score is 731.66


  4%|▍         | 4428/100000 [1:28:10<36:01:57,  1.36s/it]

43055 episode score is 685.44


  4%|▍         | 4429/100000 [1:28:11<35:39:37,  1.34s/it]

43065 episode score is 683.98


  4%|▍         | 4430/100000 [1:28:12<35:28:56,  1.34s/it]

43076 episode score is 689.60


  4%|▍         | 4431/100000 [1:28:14<36:35:05,  1.38s/it]

43086 episode score is 727.25


  4%|▍         | 4432/100000 [1:28:15<36:38:13,  1.38s/it]

43096 episode score is 697.26


  4%|▍         | 4433/100000 [1:28:17<36:18:01,  1.37s/it]

43106 episode score is 688.45


  4%|▍         | 4434/100000 [1:28:18<36:06:43,  1.36s/it]

43116 episode score is 709.35


  4%|▍         | 4435/100000 [1:28:19<36:13:20,  1.36s/it]

43126 episode score is 688.88


  4%|▍         | 4436/100000 [1:28:21<35:54:19,  1.35s/it]

43136 episode score is 698.37


  4%|▍         | 4437/100000 [1:28:22<35:47:56,  1.35s/it]

43146 episode score is 701.31


  4%|▍         | 4438/100000 [1:28:23<35:43:21,  1.35s/it]

43156 episode score is 707.43


  4%|▍         | 4439/100000 [1:28:25<35:50:37,  1.35s/it]

43166 episode score is 686.03


  4%|▍         | 4440/100000 [1:28:26<35:32:06,  1.34s/it]

43176 episode score is 714.90


  4%|▍         | 4441/100000 [1:28:28<36:24:16,  1.37s/it]

43186 episode score is 715.38


  4%|▍         | 4442/100000 [1:28:29<36:20:31,  1.37s/it]

43196 episode score is 701.98


  4%|▍         | 4443/100000 [1:28:30<36:03:29,  1.36s/it]

43206 episode score is 699.79


  4%|▍         | 4444/100000 [1:28:32<35:49:31,  1.35s/it]

43216 episode score is 716.89


  4%|▍         | 4445/100000 [1:28:33<36:05:47,  1.36s/it]

43226 episode score is 698.42


  4%|▍         | 4446/100000 [1:28:34<35:56:00,  1.35s/it]

43236 episode score is 711.96


  4%|▍         | 4447/100000 [1:28:36<35:55:27,  1.35s/it]

43246 episode score is 686.76


  4%|▍         | 4448/100000 [1:28:37<35:34:51,  1.34s/it]

43256 episode score is 688.12


  4%|▍         | 4449/100000 [1:28:38<35:20:04,  1.33s/it]

43266 episode score is 707.17


  4%|▍         | 4450/100000 [1:28:40<35:23:24,  1.33s/it]

43276 episode score is 715.76


  4%|▍         | 4451/100000 [1:28:41<35:30:09,  1.34s/it]

43286 episode score is 716.89


  4%|▍         | 4452/100000 [1:28:42<35:46:24,  1.35s/it]

43296 episode score is 712.95


  4%|▍         | 4453/100000 [1:28:44<35:53:06,  1.35s/it]

43306 episode score is 700.86


  4%|▍         | 4454/100000 [1:28:45<35:41:44,  1.34s/it]

43316 episode score is 690.72


  4%|▍         | 4455/100000 [1:28:46<35:34:52,  1.34s/it]

43326 episode score is 701.61


  4%|▍         | 4456/100000 [1:28:48<35:32:20,  1.34s/it]

43336 episode score is 724.22


  4%|▍         | 4457/100000 [1:28:49<35:49:52,  1.35s/it]

43346 episode score is 704.66


  4%|▍         | 4458/100000 [1:28:50<35:41:47,  1.35s/it]

43356 episode score is 720.67


  4%|▍         | 4459/100000 [1:28:52<35:50:00,  1.35s/it]

43366 episode score is 693.93


  4%|▍         | 4460/100000 [1:28:53<35:38:19,  1.34s/it]

43376 episode score is 705.19


  4%|▍         | 4461/100000 [1:28:54<35:38:51,  1.34s/it]

43386 episode score is 717.07


  4%|▍         | 4462/100000 [1:28:56<35:49:33,  1.35s/it]

43396 episode score is 713.90


  4%|▍         | 4463/100000 [1:28:57<35:49:42,  1.35s/it]

43406 episode score is 711.21


  4%|▍         | 4464/100000 [1:28:58<35:48:12,  1.35s/it]

43416 episode score is 701.50


  4%|▍         | 4465/100000 [1:29:00<35:47:55,  1.35s/it]

43426 episode score is 714.07


  4%|▍         | 4466/100000 [1:29:01<35:53:49,  1.35s/it]

43436 episode score is 727.95


  4%|▍         | 4467/100000 [1:29:03<36:12:16,  1.36s/it]

43446 episode score is 707.68


  4%|▍         | 4468/100000 [1:29:04<35:59:54,  1.36s/it]

43456 episode score is 703.20


  4%|▍         | 4469/100000 [1:29:05<35:51:35,  1.35s/it]

43466 episode score is 703.53


  4%|▍         | 4470/100000 [1:29:07<35:44:32,  1.35s/it]

43476 episode score is 729.43


  4%|▍         | 4471/100000 [1:29:08<35:59:45,  1.36s/it]

43486 episode score is 715.69


  4%|▍         | 4472/100000 [1:29:09<35:59:13,  1.36s/it]

43496 episode score is 725.34


  4%|▍         | 4473/100000 [1:29:11<36:04:05,  1.36s/it]

43506 episode score is 714.02


  4%|▍         | 4474/100000 [1:29:12<36:04:45,  1.36s/it]

43516 episode score is 726.74


  4%|▍         | 4475/100000 [1:29:13<36:08:14,  1.36s/it]

43526 episode score is 713.32


  4%|▍         | 4476/100000 [1:29:15<36:03:47,  1.36s/it]

43536 episode score is 713.18


  4%|▍         | 4477/100000 [1:29:16<35:58:47,  1.36s/it]

43546 episode score is 707.46


  4%|▍         | 4478/100000 [1:29:17<36:03:15,  1.36s/it]

43556 episode score is 699.03


  4%|▍         | 4479/100000 [1:29:19<35:50:17,  1.35s/it]

43566 episode score is 692.56


  4%|▍         | 4480/100000 [1:29:20<35:36:39,  1.34s/it]

43576 episode score is 675.16


  4%|▍         | 4481/100000 [1:29:21<35:22:14,  1.33s/it]

43586 episode score is 682.18


  4%|▍         | 4482/100000 [1:29:23<35:15:07,  1.33s/it]

43596 episode score is 685.68


  4%|▍         | 4483/100000 [1:29:24<35:06:58,  1.32s/it]

43606 episode score is 698.46


  4%|▍         | 4484/100000 [1:29:25<35:11:23,  1.33s/it]

43616 episode score is 692.46


  4%|▍         | 4485/100000 [1:29:27<35:08:26,  1.32s/it]

43626 episode score is 703.36


  4%|▍         | 4486/100000 [1:29:28<35:12:34,  1.33s/it]

43636 episode score is 712.88


  4%|▍         | 4487/100000 [1:29:29<35:27:20,  1.34s/it]

43646 episode score is 704.71


  4%|▍         | 4488/100000 [1:29:31<35:28:41,  1.34s/it]

43656 episode score is 700.28


  4%|▍         | 4489/100000 [1:29:32<35:31:25,  1.34s/it]

43666 episode score is 709.61


  4%|▍         | 4490/100000 [1:29:33<35:36:53,  1.34s/it]

43676 episode score is 724.35


  4%|▍         | 4491/100000 [1:29:35<35:53:12,  1.35s/it]

43686 episode score is 702.36


  4%|▍         | 4492/100000 [1:29:36<35:53:17,  1.35s/it]

43696 episode score is 714.05


  4%|▍         | 4493/100000 [1:29:38<35:56:32,  1.35s/it]

43706 episode score is 729.36


  4%|▍         | 4494/100000 [1:29:39<36:12:16,  1.36s/it]

43716 episode score is 718.77


  4%|▍         | 4495/100000 [1:29:40<36:10:58,  1.36s/it]

43726 episode score is 720.01


  4%|▍         | 4496/100000 [1:29:42<36:18:14,  1.37s/it]

43736 episode score is 735.38


  4%|▍         | 4497/100000 [1:29:43<36:29:16,  1.38s/it]

43746 episode score is 711.73


  4%|▍         | 4498/100000 [1:29:44<36:23:30,  1.37s/it]

43756 episode score is 737.72


  4%|▍         | 4499/100000 [1:29:46<37:29:19,  1.41s/it]

43766 episode score is 744.19
Iteration 4500: Average test reward: 767.91


  4%|▍         | 4500/100000 [1:29:49<47:35:39,  1.79s/it]

43776 episode score is 755.12


  5%|▍         | 4501/100000 [1:29:50<44:41:44,  1.68s/it]

43786 episode score is 739.98


  5%|▍         | 4502/100000 [1:29:51<42:27:46,  1.60s/it]

43796 episode score is 712.41


  5%|▍         | 4503/100000 [1:29:53<40:33:03,  1.53s/it]

43806 episode score is 750.99


  5%|▍         | 4504/100000 [1:29:54<39:38:59,  1.49s/it]

43816 episode score is 733.54


  5%|▍         | 4505/100000 [1:29:56<38:58:32,  1.47s/it]

43826 episode score is 731.19


  5%|▍         | 4506/100000 [1:29:57<38:20:24,  1.45s/it]

43836 episode score is 720.35


  5%|▍         | 4507/100000 [1:29:58<37:44:53,  1.42s/it]

43846 episode score is 734.18


  5%|▍         | 4508/100000 [1:30:00<37:31:15,  1.41s/it]

43856 episode score is 720.32


  5%|▍         | 4509/100000 [1:30:01<37:11:12,  1.40s/it]

43866 episode score is 719.98


  5%|▍         | 4510/100000 [1:30:03<37:04:29,  1.40s/it]

43876 episode score is 705.25


  5%|▍         | 4511/100000 [1:30:04<36:40:59,  1.38s/it]

43886 episode score is 719.82


  5%|▍         | 4512/100000 [1:30:05<36:35:55,  1.38s/it]

43896 episode score is 728.15


  5%|▍         | 4513/100000 [1:30:07<36:40:46,  1.38s/it]

43906 episode score is 717.31


  5%|▍         | 4514/100000 [1:30:08<36:32:27,  1.38s/it]

43916 episode score is 733.52


  5%|▍         | 4515/100000 [1:30:09<36:40:17,  1.38s/it]

43926 episode score is 734.17


  5%|▍         | 4516/100000 [1:30:11<36:43:51,  1.38s/it]

43936 episode score is 714.13


  5%|▍         | 4517/100000 [1:30:12<36:40:37,  1.38s/it]

43946 episode score is 739.36


  5%|▍         | 4518/100000 [1:30:14<36:49:32,  1.39s/it]

43956 episode score is 736.46


  5%|▍         | 4519/100000 [1:30:15<36:50:28,  1.39s/it]

43966 episode score is 724.49


  5%|▍         | 4520/100000 [1:30:16<36:38:13,  1.38s/it]

43976 episode score is 710.19


  5%|▍         | 4521/100000 [1:30:18<36:32:38,  1.38s/it]

43986 episode score is 707.92


  5%|▍         | 4522/100000 [1:30:19<36:28:59,  1.38s/it]

43996 episode score is 694.34


  5%|▍         | 4523/100000 [1:30:20<36:02:54,  1.36s/it]

44006 episode score is 695.78


  5%|▍         | 4524/100000 [1:30:22<35:46:09,  1.35s/it]

44016 episode score is 692.73


  5%|▍         | 4525/100000 [1:30:23<35:36:40,  1.34s/it]

44026 episode score is 721.97


  5%|▍         | 4526/100000 [1:30:24<35:46:24,  1.35s/it]

44036 episode score is 732.00


  5%|▍         | 4527/100000 [1:30:26<36:03:53,  1.36s/it]

44046 episode score is 718.98


  5%|▍         | 4528/100000 [1:30:27<36:07:47,  1.36s/it]

44056 episode score is 715.62


  5%|▍         | 4529/100000 [1:30:29<36:10:44,  1.36s/it]

44066 episode score is 682.74


  5%|▍         | 4530/100000 [1:30:30<35:49:57,  1.35s/it]

44076 episode score is 685.33


  5%|▍         | 4531/100000 [1:30:31<35:32:17,  1.34s/it]

44086 episode score is 701.89


  5%|▍         | 4532/100000 [1:30:33<35:40:27,  1.35s/it]

44096 episode score is 683.44


  5%|▍         | 4533/100000 [1:30:34<35:22:51,  1.33s/it]

44106 episode score is 694.52


  5%|▍         | 4534/100000 [1:30:35<35:32:40,  1.34s/it]

44116 episode score is 675.07


  5%|▍         | 4535/100000 [1:30:37<35:19:07,  1.33s/it]

44126 episode score is 706.48


  5%|▍         | 4536/100000 [1:30:38<35:22:40,  1.33s/it]

44136 episode score is 679.86


  5%|▍         | 4537/100000 [1:30:39<35:03:49,  1.32s/it]

44146 episode score is 694.90


  5%|▍         | 4538/100000 [1:30:40<34:58:43,  1.32s/it]

44157 episode score is 683.67


  5%|▍         | 4539/100000 [1:30:42<35:48:28,  1.35s/it]

44167 episode score is 697.90


  5%|▍         | 4540/100000 [1:30:43<35:35:03,  1.34s/it]

44178 episode score is 676.73


  5%|▍         | 4541/100000 [1:30:45<36:13:50,  1.37s/it]

44189 episode score is 667.60


  5%|▍         | 4542/100000 [1:30:46<36:28:50,  1.38s/it]

44200 episode score is 663.27


  5%|▍         | 4543/100000 [1:30:47<36:42:49,  1.38s/it]

44211 episode score is 676.37


  5%|▍         | 4544/100000 [1:30:49<37:10:10,  1.40s/it]

44222 episode score is 664.26


  5%|▍         | 4545/100000 [1:30:50<37:15:23,  1.41s/it]

44233 episode score is 673.65


  5%|▍         | 4546/100000 [1:30:52<37:21:42,  1.41s/it]

44244 episode score is 677.31


  5%|▍         | 4547/100000 [1:30:53<37:25:50,  1.41s/it]

44255 episode score is 674.61


  5%|▍         | 4548/100000 [1:30:55<37:26:10,  1.41s/it]

44266 episode score is 679.31


  5%|▍         | 4549/100000 [1:30:56<37:38:43,  1.42s/it]

44277 episode score is 672.63


  5%|▍         | 4550/100000 [1:30:57<37:38:22,  1.42s/it]

44288 episode score is 675.12


  5%|▍         | 4551/100000 [1:30:59<37:43:50,  1.42s/it]

44299 episode score is 674.08


  5%|▍         | 4552/100000 [1:31:00<37:48:01,  1.43s/it]

44309 episode score is 688.10


  5%|▍         | 4553/100000 [1:31:02<36:57:08,  1.39s/it]

44319 episode score is 689.84


  5%|▍         | 4554/100000 [1:31:03<36:26:24,  1.37s/it]

44330 episode score is 677.48


  5%|▍         | 4555/100000 [1:31:04<36:53:08,  1.39s/it]

44341 episode score is 673.74


  5%|▍         | 4556/100000 [1:31:06<37:10:18,  1.40s/it]

44351 episode score is 695.82


  5%|▍         | 4557/100000 [1:31:07<36:33:24,  1.38s/it]

44361 episode score is 697.37


  5%|▍         | 4558/100000 [1:31:08<36:07:32,  1.36s/it]

44372 episode score is 678.88


  5%|▍         | 4559/100000 [1:31:10<36:37:05,  1.38s/it]

44383 episode score is 680.18


  5%|▍         | 4560/100000 [1:31:11<36:57:30,  1.39s/it]

44393 episode score is 702.11


  5%|▍         | 4561/100000 [1:31:13<36:38:35,  1.38s/it]

44403 episode score is 681.87


  5%|▍         | 4562/100000 [1:31:14<36:03:11,  1.36s/it]

44414 episode score is 684.28


  5%|▍         | 4563/100000 [1:31:15<36:41:58,  1.38s/it]

44424 episode score is 687.70


  5%|▍         | 4564/100000 [1:31:17<36:12:35,  1.37s/it]

44435 episode score is 684.60


  5%|▍         | 4565/100000 [1:31:18<36:49:10,  1.39s/it]

44446 episode score is 679.94


  5%|▍         | 4566/100000 [1:31:20<37:11:36,  1.40s/it]

44457 episode score is 668.05


  5%|▍         | 4567/100000 [1:31:21<37:15:22,  1.41s/it]

44468 episode score is 678.04


  5%|▍         | 4568/100000 [1:31:22<37:24:39,  1.41s/it]

44478 episode score is 700.08


  5%|▍         | 4569/100000 [1:31:24<36:49:29,  1.39s/it]

44489 episode score is 677.84


  5%|▍         | 4570/100000 [1:31:25<37:04:04,  1.40s/it]

44500 episode score is 671.18


  5%|▍         | 4571/100000 [1:31:27<37:13:22,  1.40s/it]

44510 episode score is 692.68


  5%|▍         | 4572/100000 [1:31:28<36:36:36,  1.38s/it]

44520 episode score is 686.13


  5%|▍         | 4573/100000 [1:31:29<36:03:06,  1.36s/it]

44531 episode score is 644.45


  5%|▍         | 4574/100000 [1:31:31<36:06:09,  1.36s/it]

44541 episode score is 686.19


  5%|▍         | 4575/100000 [1:31:32<35:52:17,  1.35s/it]

44552 episode score is 675.11


  5%|▍         | 4576/100000 [1:31:33<36:27:44,  1.38s/it]

44563 episode score is 666.62


  5%|▍         | 4577/100000 [1:31:35<36:42:24,  1.38s/it]

44573 episode score is 712.67


  5%|▍         | 4578/100000 [1:31:36<36:34:22,  1.38s/it]

44583 episode score is 722.58


  5%|▍         | 4579/100000 [1:31:38<36:44:59,  1.39s/it]

44593 episode score is 729.88


  5%|▍         | 4580/100000 [1:31:39<36:48:28,  1.39s/it]

44603 episode score is 739.56


  5%|▍         | 4581/100000 [1:31:40<36:54:41,  1.39s/it]

44613 episode score is 722.83


  5%|▍         | 4582/100000 [1:31:42<36:45:14,  1.39s/it]

44623 episode score is 724.59


  5%|▍         | 4583/100000 [1:31:43<36:42:09,  1.38s/it]

44633 episode score is 734.00


  5%|▍         | 4584/100000 [1:31:44<36:48:08,  1.39s/it]

44643 episode score is 729.50


  5%|▍         | 4585/100000 [1:31:46<36:51:45,  1.39s/it]

44653 episode score is 723.87


  5%|▍         | 4586/100000 [1:31:47<36:51:16,  1.39s/it]

44663 episode score is 733.47


  5%|▍         | 4587/100000 [1:31:49<37:48:01,  1.43s/it]

44673 episode score is 732.66


  5%|▍         | 4588/100000 [1:31:50<37:36:58,  1.42s/it]

44683 episode score is 713.18


  5%|▍         | 4589/100000 [1:31:52<37:12:49,  1.40s/it]

44693 episode score is 727.15


  5%|▍         | 4590/100000 [1:31:53<37:06:32,  1.40s/it]

44703 episode score is 709.10


  5%|▍         | 4591/100000 [1:31:54<36:35:01,  1.38s/it]

44713 episode score is 729.41


  5%|▍         | 4592/100000 [1:31:56<36:35:57,  1.38s/it]

44723 episode score is 720.72


  5%|▍         | 4593/100000 [1:31:57<36:27:27,  1.38s/it]

44733 episode score is 713.52


  5%|▍         | 4594/100000 [1:31:58<36:17:04,  1.37s/it]

44743 episode score is 706.59


  5%|▍         | 4595/100000 [1:32:00<36:15:11,  1.37s/it]

44753 episode score is 730.29


  5%|▍         | 4596/100000 [1:32:01<36:26:40,  1.38s/it]

44763 episode score is 742.24


  5%|▍         | 4597/100000 [1:32:03<36:50:36,  1.39s/it]

44773 episode score is 727.41


  5%|▍         | 4598/100000 [1:32:04<36:51:34,  1.39s/it]

44783 episode score is 737.87


  5%|▍         | 4599/100000 [1:32:05<36:55:47,  1.39s/it]

44793 episode score is 720.56
Iteration 4600: Average test reward: 729.93


  5%|▍         | 4600/100000 [1:32:08<46:03:53,  1.74s/it]

44803 episode score is 738.16


  5%|▍         | 4601/100000 [1:32:09<43:21:39,  1.64s/it]

44813 episode score is 743.96


  5%|▍         | 4602/100000 [1:32:11<41:32:49,  1.57s/it]

44823 episode score is 736.37


  5%|▍         | 4603/100000 [1:32:12<40:17:28,  1.52s/it]

44833 episode score is 743.74


  5%|▍         | 4604/100000 [1:32:14<39:27:16,  1.49s/it]

44843 episode score is 735.76


  5%|▍         | 4605/100000 [1:32:15<38:40:40,  1.46s/it]

44853 episode score is 727.97


  5%|▍         | 4606/100000 [1:32:16<38:01:18,  1.43s/it]

44863 episode score is 739.46


  5%|▍         | 4607/100000 [1:32:18<37:51:51,  1.43s/it]

44873 episode score is 739.84


  5%|▍         | 4608/100000 [1:32:19<37:42:54,  1.42s/it]

44883 episode score is 731.81


  5%|▍         | 4609/100000 [1:32:20<37:21:03,  1.41s/it]

44893 episode score is 740.69


  5%|▍         | 4610/100000 [1:32:22<37:14:44,  1.41s/it]

44903 episode score is 730.13


  5%|▍         | 4611/100000 [1:32:23<36:57:39,  1.39s/it]

44913 episode score is 742.66


  5%|▍         | 4612/100000 [1:32:25<37:04:27,  1.40s/it]

44923 episode score is 719.08


  5%|▍         | 4613/100000 [1:32:26<36:47:54,  1.39s/it]

44933 episode score is 713.78


  5%|▍         | 4614/100000 [1:32:27<36:40:07,  1.38s/it]

44943 episode score is 712.00


  5%|▍         | 4615/100000 [1:32:29<36:26:46,  1.38s/it]

44953 episode score is 728.49


  5%|▍         | 4616/100000 [1:32:30<36:28:28,  1.38s/it]

44963 episode score is 713.47


  5%|▍         | 4617/100000 [1:32:31<36:18:41,  1.37s/it]

44973 episode score is 725.78


  5%|▍         | 4618/100000 [1:32:33<36:20:57,  1.37s/it]

44983 episode score is 717.20


  5%|▍         | 4619/100000 [1:32:34<36:18:16,  1.37s/it]

44993 episode score is 725.68


  5%|▍         | 4620/100000 [1:32:36<36:15:43,  1.37s/it]

45003 episode score is 725.51


  5%|▍         | 4621/100000 [1:32:37<36:17:24,  1.37s/it]

45013 episode score is 713.14


  5%|▍         | 4622/100000 [1:32:38<36:09:06,  1.36s/it]

45023 episode score is 721.87


  5%|▍         | 4623/100000 [1:32:40<36:21:11,  1.37s/it]

45032 episode score is 773.16


  5%|▍         | 4624/100000 [1:32:41<35:53:25,  1.35s/it]

45042 episode score is 719.25


  5%|▍         | 4625/100000 [1:32:42<36:02:30,  1.36s/it]

45051 episode score is 774.03


  5%|▍         | 4626/100000 [1:32:44<35:42:52,  1.35s/it]

45060 episode score is 766.66


  5%|▍         | 4627/100000 [1:32:45<35:27:13,  1.34s/it]

45070 episode score is 738.71


  5%|▍         | 4628/100000 [1:32:46<36:06:55,  1.36s/it]

45080 episode score is 741.92


  5%|▍         | 4629/100000 [1:32:48<36:36:28,  1.38s/it]

45089 episode score is 774.32


  5%|▍         | 4630/100000 [1:32:49<36:08:54,  1.36s/it]

45099 episode score is 738.32


  5%|▍         | 4631/100000 [1:32:51<36:32:36,  1.38s/it]

45109 episode score is 750.22


  5%|▍         | 4632/100000 [1:32:52<36:57:49,  1.40s/it]

45119 episode score is 737.33


  5%|▍         | 4633/100000 [1:32:53<37:01:10,  1.40s/it]

45129 episode score is 695.10


  5%|▍         | 4634/100000 [1:32:55<36:33:54,  1.38s/it]

45138 episode score is 774.59


  5%|▍         | 4635/100000 [1:32:56<36:15:29,  1.37s/it]

45148 episode score is 743.82


  5%|▍         | 4636/100000 [1:32:58<36:42:07,  1.39s/it]

45158 episode score is 750.95


  5%|▍         | 4637/100000 [1:32:59<37:03:11,  1.40s/it]

45167 episode score is 766.39


  5%|▍         | 4638/100000 [1:33:00<36:22:44,  1.37s/it]

45177 episode score is 719.41


  5%|▍         | 4639/100000 [1:33:02<36:23:16,  1.37s/it]

45187 episode score is 739.36


  5%|▍         | 4640/100000 [1:33:03<36:42:09,  1.39s/it]

45197 episode score is 715.84


  5%|▍         | 4641/100000 [1:33:04<36:32:09,  1.38s/it]

45207 episode score is 701.74


  5%|▍         | 4642/100000 [1:33:06<36:20:03,  1.37s/it]

45217 episode score is 707.37


  5%|▍         | 4643/100000 [1:33:07<36:13:49,  1.37s/it]

45227 episode score is 706.40


  5%|▍         | 4644/100000 [1:33:09<36:50:36,  1.39s/it]

45237 episode score is 681.34


  5%|▍         | 4645/100000 [1:33:10<36:11:57,  1.37s/it]

45247 episode score is 694.85


  5%|▍         | 4646/100000 [1:33:11<35:50:48,  1.35s/it]

45258 episode score is 670.00


  5%|▍         | 4647/100000 [1:33:13<36:19:40,  1.37s/it]

45269 episode score is 655.88


  5%|▍         | 4648/100000 [1:33:14<36:28:10,  1.38s/it]

45280 episode score is 660.46


  5%|▍         | 4649/100000 [1:33:15<36:34:31,  1.38s/it]

45290 episode score is 686.76


  5%|▍         | 4650/100000 [1:33:17<36:03:51,  1.36s/it]

45301 episode score is 677.44


  5%|▍         | 4651/100000 [1:33:18<36:40:22,  1.38s/it]

45312 episode score is 669.96


  5%|▍         | 4652/100000 [1:33:20<37:02:06,  1.40s/it]

45323 episode score is 665.61


  5%|▍         | 4653/100000 [1:33:21<37:07:06,  1.40s/it]

45334 episode score is 677.26


  5%|▍         | 4654/100000 [1:33:22<37:24:31,  1.41s/it]

45345 episode score is 680.15


  5%|▍         | 4655/100000 [1:33:24<37:30:49,  1.42s/it]

45356 episode score is 673.16


  5%|▍         | 4656/100000 [1:33:25<37:27:52,  1.41s/it]

45366 episode score is 684.36


  5%|▍         | 4657/100000 [1:33:27<36:38:15,  1.38s/it]

45376 episode score is 701.76


  5%|▍         | 4658/100000 [1:33:28<36:11:48,  1.37s/it]

45386 episode score is 711.98


  5%|▍         | 4659/100000 [1:33:29<36:02:04,  1.36s/it]

45396 episode score is 697.48


  5%|▍         | 4660/100000 [1:33:31<35:43:32,  1.35s/it]

45406 episode score is 707.21


  5%|▍         | 4661/100000 [1:33:32<35:41:11,  1.35s/it]

45417 episode score is 686.44


  5%|▍         | 4662/100000 [1:33:33<36:21:46,  1.37s/it]

45427 episode score is 694.22


  5%|▍         | 4663/100000 [1:33:35<35:57:29,  1.36s/it]

45437 episode score is 714.30


  5%|▍         | 4664/100000 [1:33:36<35:53:44,  1.36s/it]

45447 episode score is 702.58


  5%|▍         | 4665/100000 [1:33:37<35:54:56,  1.36s/it]

45457 episode score is 695.42


  5%|▍         | 4666/100000 [1:33:39<35:41:12,  1.35s/it]

45467 episode score is 696.54


  5%|▍         | 4667/100000 [1:33:40<35:31:14,  1.34s/it]

45477 episode score is 679.00


  5%|▍         | 4668/100000 [1:33:41<35:13:41,  1.33s/it]

45487 episode score is 699.02


  5%|▍         | 4669/100000 [1:33:43<35:12:16,  1.33s/it]

45497 episode score is 712.16


  5%|▍         | 4670/100000 [1:33:44<35:17:19,  1.33s/it]

45507 episode score is 710.96


  5%|▍         | 4671/100000 [1:33:45<35:21:48,  1.34s/it]

45517 episode score is 688.33


  5%|▍         | 4672/100000 [1:33:47<35:07:34,  1.33s/it]

45527 episode score is 714.28


  5%|▍         | 4673/100000 [1:33:48<35:26:08,  1.34s/it]

45537 episode score is 697.86


  5%|▍         | 4674/100000 [1:33:49<35:23:05,  1.34s/it]

45548 episode score is 677.76


  5%|▍         | 4675/100000 [1:33:51<36:06:14,  1.36s/it]

45558 episode score is 703.91


  5%|▍         | 4676/100000 [1:33:52<35:48:52,  1.35s/it]

45568 episode score is 699.43


  5%|▍         | 4677/100000 [1:33:53<35:34:17,  1.34s/it]

45578 episode score is 702.75


  5%|▍         | 4678/100000 [1:33:55<35:32:09,  1.34s/it]

45588 episode score is 731.69


  5%|▍         | 4679/100000 [1:33:56<35:50:41,  1.35s/it]

45598 episode score is 718.10


  5%|▍         | 4680/100000 [1:33:58<35:59:55,  1.36s/it]

45608 episode score is 716.57


  5%|▍         | 4681/100000 [1:33:59<35:59:25,  1.36s/it]

45618 episode score is 716.32


  5%|▍         | 4682/100000 [1:34:00<35:57:00,  1.36s/it]

45628 episode score is 728.45


  5%|▍         | 4683/100000 [1:34:02<36:07:52,  1.36s/it]

45638 episode score is 717.25


  5%|▍         | 4684/100000 [1:34:03<36:08:11,  1.36s/it]

45648 episode score is 738.61


  5%|▍         | 4685/100000 [1:34:04<36:21:22,  1.37s/it]

45658 episode score is 742.75


  5%|▍         | 4686/100000 [1:34:06<36:34:36,  1.38s/it]

45668 episode score is 741.25


  5%|▍         | 4687/100000 [1:34:07<36:49:06,  1.39s/it]

45678 episode score is 711.98


  5%|▍         | 4688/100000 [1:34:09<36:26:55,  1.38s/it]

45688 episode score is 730.93


  5%|▍         | 4689/100000 [1:34:10<36:28:03,  1.38s/it]

45698 episode score is 736.01


  5%|▍         | 4690/100000 [1:34:11<36:35:29,  1.38s/it]

45708 episode score is 735.56


  5%|▍         | 4691/100000 [1:34:13<36:45:23,  1.39s/it]

45718 episode score is 737.12


  5%|▍         | 4692/100000 [1:34:14<36:44:52,  1.39s/it]

45728 episode score is 747.66


  5%|▍         | 4693/100000 [1:34:16<37:06:04,  1.40s/it]

45738 episode score is 730.33


  5%|▍         | 4694/100000 [1:34:17<37:07:15,  1.40s/it]

45748 episode score is 723.02


  5%|▍         | 4695/100000 [1:34:18<36:59:03,  1.40s/it]

45758 episode score is 734.95


  5%|▍         | 4696/100000 [1:34:20<36:58:01,  1.40s/it]

45768 episode score is 733.29


  5%|▍         | 4697/100000 [1:34:21<37:37:37,  1.42s/it]

45778 episode score is 732.12


  5%|▍         | 4698/100000 [1:34:23<37:15:31,  1.41s/it]

45788 episode score is 735.05


  5%|▍         | 4699/100000 [1:34:24<37:06:08,  1.40s/it]

45798 episode score is 739.74
Iteration 4700: Average test reward: 743.00


  5%|▍         | 4700/100000 [1:34:27<46:39:21,  1.76s/it]

45808 episode score is 722.64


  5%|▍         | 4701/100000 [1:34:28<43:33:44,  1.65s/it]

45818 episode score is 738.17


  5%|▍         | 4702/100000 [1:34:29<41:37:45,  1.57s/it]

45828 episode score is 726.07


  5%|▍         | 4703/100000 [1:34:31<40:06:51,  1.52s/it]

45838 episode score is 754.52


  5%|▍         | 4704/100000 [1:34:32<39:44:28,  1.50s/it]

45848 episode score is 746.78


  5%|▍         | 4705/100000 [1:34:34<39:05:12,  1.48s/it]

45858 episode score is 704.67


  5%|▍         | 4706/100000 [1:34:35<38:07:43,  1.44s/it]

45868 episode score is 732.71


  5%|▍         | 4707/100000 [1:34:36<37:48:13,  1.43s/it]

45878 episode score is 724.42


  5%|▍         | 4708/100000 [1:34:38<37:21:09,  1.41s/it]

45888 episode score is 726.23


  5%|▍         | 4709/100000 [1:34:39<37:04:55,  1.40s/it]

45898 episode score is 709.28


  5%|▍         | 4710/100000 [1:34:40<36:39:08,  1.38s/it]

45908 episode score is 726.88


  5%|▍         | 4711/100000 [1:34:42<36:27:01,  1.38s/it]

45918 episode score is 731.10


  5%|▍         | 4712/100000 [1:34:43<36:28:40,  1.38s/it]

45928 episode score is 742.90


  5%|▍         | 4713/100000 [1:34:45<36:39:59,  1.39s/it]

45938 episode score is 727.94


  5%|▍         | 4714/100000 [1:34:46<36:34:34,  1.38s/it]

45948 episode score is 719.08


  5%|▍         | 4715/100000 [1:34:47<36:38:12,  1.38s/it]

45958 episode score is 712.28


  5%|▍         | 4716/100000 [1:34:49<36:26:26,  1.38s/it]

45968 episode score is 761.02


  5%|▍         | 4717/100000 [1:34:50<36:56:54,  1.40s/it]

45978 episode score is 749.62


  5%|▍         | 4718/100000 [1:34:52<37:07:46,  1.40s/it]

45987 episode score is 776.80


  5%|▍         | 4719/100000 [1:34:53<36:23:35,  1.38s/it]

45997 episode score is 764.15


  5%|▍         | 4720/100000 [1:34:54<36:48:57,  1.39s/it]

46007 episode score is 740.39


  5%|▍         | 4721/100000 [1:34:56<36:56:14,  1.40s/it]

46017 episode score is 748.42


  5%|▍         | 4722/100000 [1:34:57<37:06:23,  1.40s/it]

46027 episode score is 736.14


  5%|▍         | 4723/100000 [1:34:59<37:15:57,  1.41s/it]

46037 episode score is 739.01


  5%|▍         | 4724/100000 [1:35:00<37:11:46,  1.41s/it]

46047 episode score is 723.58


  5%|▍         | 4725/100000 [1:35:01<36:59:55,  1.40s/it]

46057 episode score is 743.77


  5%|▍         | 4726/100000 [1:35:03<37:18:03,  1.41s/it]

46067 episode score is 716.44


  5%|▍         | 4727/100000 [1:35:04<37:01:28,  1.40s/it]

46077 episode score is 751.08


  5%|▍         | 4728/100000 [1:35:06<37:15:26,  1.41s/it]

46086 episode score is 770.15


  5%|▍         | 4729/100000 [1:35:07<36:25:20,  1.38s/it]

46096 episode score is 696.48


  5%|▍         | 4730/100000 [1:35:08<36:10:37,  1.37s/it]

46106 episode score is 732.34


  5%|▍         | 4731/100000 [1:35:10<36:22:12,  1.37s/it]

46116 episode score is 691.44


  5%|▍         | 4732/100000 [1:35:11<36:04:18,  1.36s/it]

46126 episode score is 721.50


  5%|▍         | 4733/100000 [1:35:12<36:14:20,  1.37s/it]

46136 episode score is 709.88


  5%|▍         | 4734/100000 [1:35:14<36:19:05,  1.37s/it]

46146 episode score is 705.97


  5%|▍         | 4735/100000 [1:35:15<36:11:36,  1.37s/it]

46156 episode score is 742.33


  5%|▍         | 4736/100000 [1:35:17<36:32:15,  1.38s/it]

46166 episode score is 725.50


  5%|▍         | 4737/100000 [1:35:18<36:38:47,  1.38s/it]

46176 episode score is 688.88


  5%|▍         | 4738/100000 [1:35:19<36:14:08,  1.37s/it]

46186 episode score is 715.26


  5%|▍         | 4739/100000 [1:35:21<36:10:53,  1.37s/it]

46196 episode score is 703.11


  5%|▍         | 4740/100000 [1:35:22<36:04:26,  1.36s/it]

46206 episode score is 709.84


  5%|▍         | 4741/100000 [1:35:23<36:02:10,  1.36s/it]

46216 episode score is 734.05


  5%|▍         | 4742/100000 [1:35:25<36:16:25,  1.37s/it]

46226 episode score is 735.05


  5%|▍         | 4743/100000 [1:35:26<36:28:11,  1.38s/it]

46236 episode score is 721.01


  5%|▍         | 4744/100000 [1:35:27<36:29:36,  1.38s/it]

46246 episode score is 732.57


  5%|▍         | 4745/100000 [1:35:29<36:35:44,  1.38s/it]

46256 episode score is 706.53


  5%|▍         | 4746/100000 [1:35:30<36:21:12,  1.37s/it]

46266 episode score is 721.56


  5%|▍         | 4747/100000 [1:35:32<36:27:16,  1.38s/it]

46276 episode score is 704.53


  5%|▍         | 4748/100000 [1:35:33<36:15:12,  1.37s/it]

46286 episode score is 709.91


  5%|▍         | 4749/100000 [1:35:34<36:04:21,  1.36s/it]

46296 episode score is 731.48


  5%|▍         | 4750/100000 [1:35:36<36:13:34,  1.37s/it]

46306 episode score is 727.28


  5%|▍         | 4751/100000 [1:35:37<36:25:58,  1.38s/it]

46316 episode score is 749.27


  5%|▍         | 4752/100000 [1:35:39<36:53:18,  1.39s/it]

46326 episode score is 731.86


  5%|▍         | 4753/100000 [1:35:40<36:51:50,  1.39s/it]

46336 episode score is 727.93


  5%|▍         | 4754/100000 [1:35:41<36:48:57,  1.39s/it]

46346 episode score is 735.28


  5%|▍         | 4755/100000 [1:35:43<36:52:46,  1.39s/it]

46356 episode score is 755.01


  5%|▍         | 4756/100000 [1:35:44<37:03:40,  1.40s/it]

46366 episode score is 753.25


  5%|▍         | 4757/100000 [1:35:46<37:09:20,  1.40s/it]

46376 episode score is 725.17


  5%|▍         | 4758/100000 [1:35:47<37:55:30,  1.43s/it]

46386 episode score is 726.12


  5%|▍         | 4759/100000 [1:35:48<37:36:59,  1.42s/it]

46396 episode score is 751.79


  5%|▍         | 4760/100000 [1:35:50<37:36:22,  1.42s/it]

46406 episode score is 753.81


  5%|▍         | 4761/100000 [1:35:51<37:36:16,  1.42s/it]

46416 episode score is 728.43


  5%|▍         | 4762/100000 [1:35:53<37:18:43,  1.41s/it]

46426 episode score is 740.42


  5%|▍         | 4763/100000 [1:35:54<37:14:34,  1.41s/it]

46436 episode score is 740.39


  5%|▍         | 4764/100000 [1:35:55<37:18:26,  1.41s/it]

46446 episode score is 714.42


  5%|▍         | 4765/100000 [1:35:57<36:55:18,  1.40s/it]

46456 episode score is 723.22


  5%|▍         | 4766/100000 [1:35:58<36:44:33,  1.39s/it]

46466 episode score is 738.66


  5%|▍         | 4767/100000 [1:36:00<36:50:40,  1.39s/it]

46476 episode score is 734.87


  5%|▍         | 4768/100000 [1:36:01<36:50:41,  1.39s/it]

46486 episode score is 720.24


  5%|▍         | 4769/100000 [1:36:02<36:53:25,  1.39s/it]

46496 episode score is 689.33


  5%|▍         | 4770/100000 [1:36:04<36:15:40,  1.37s/it]

46506 episode score is 717.24


  5%|▍         | 4771/100000 [1:36:05<36:11:32,  1.37s/it]

46516 episode score is 701.08


  5%|▍         | 4772/100000 [1:36:06<35:51:46,  1.36s/it]

46526 episode score is 705.87


  5%|▍         | 4773/100000 [1:36:08<35:46:09,  1.35s/it]

46536 episode score is 726.39


  5%|▍         | 4774/100000 [1:36:09<35:59:02,  1.36s/it]

46546 episode score is 725.98


  5%|▍         | 4775/100000 [1:36:11<36:05:06,  1.36s/it]

46556 episode score is 719.29


  5%|▍         | 4776/100000 [1:36:12<36:05:42,  1.36s/it]

46566 episode score is 734.17


  5%|▍         | 4777/100000 [1:36:13<36:19:13,  1.37s/it]

46576 episode score is 732.00


  5%|▍         | 4778/100000 [1:36:15<36:20:32,  1.37s/it]

46586 episode score is 720.51


  5%|▍         | 4779/100000 [1:36:16<36:13:10,  1.37s/it]

46596 episode score is 728.53


  5%|▍         | 4780/100000 [1:36:17<36:20:31,  1.37s/it]

46606 episode score is 716.47


  5%|▍         | 4781/100000 [1:36:19<36:23:18,  1.38s/it]

46616 episode score is 711.53


  5%|▍         | 4782/100000 [1:36:20<36:12:31,  1.37s/it]

46626 episode score is 708.92


  5%|▍         | 4783/100000 [1:36:21<35:57:31,  1.36s/it]

46636 episode score is 716.15


  5%|▍         | 4784/100000 [1:36:23<35:57:41,  1.36s/it]

46646 episode score is 713.08


  5%|▍         | 4785/100000 [1:36:24<35:49:18,  1.35s/it]

46656 episode score is 711.34


  5%|▍         | 4786/100000 [1:36:26<35:48:20,  1.35s/it]

46666 episode score is 710.88


  5%|▍         | 4787/100000 [1:36:27<35:45:16,  1.35s/it]

46676 episode score is 729.08


  5%|▍         | 4788/100000 [1:36:28<35:55:55,  1.36s/it]

46686 episode score is 726.73


  5%|▍         | 4789/100000 [1:36:30<36:03:08,  1.36s/it]

46696 episode score is 731.42


  5%|▍         | 4790/100000 [1:36:31<36:11:16,  1.37s/it]

46706 episode score is 728.54


  5%|▍         | 4791/100000 [1:36:32<36:20:59,  1.37s/it]

46716 episode score is 689.06


  5%|▍         | 4792/100000 [1:36:34<35:56:34,  1.36s/it]

46726 episode score is 686.19


  5%|▍         | 4793/100000 [1:36:35<35:39:15,  1.35s/it]

46736 episode score is 719.55


  5%|▍         | 4794/100000 [1:36:36<35:51:11,  1.36s/it]

46747 episode score is 670.77


  5%|▍         | 4795/100000 [1:36:38<36:28:27,  1.38s/it]

46758 episode score is 673.93


  5%|▍         | 4796/100000 [1:36:39<36:53:31,  1.40s/it]

46768 episode score is 719.54


  5%|▍         | 4797/100000 [1:36:41<36:40:14,  1.39s/it]

46778 episode score is 722.84


  5%|▍         | 4798/100000 [1:36:42<36:40:37,  1.39s/it]

46788 episode score is 695.68


  5%|▍         | 4799/100000 [1:36:43<36:15:59,  1.37s/it]

46798 episode score is 710.86
Iteration 4800: Average test reward: 655.21


  5%|▍         | 4800/100000 [1:36:46<44:31:04,  1.68s/it]

46809 episode score is 673.75


  5%|▍         | 4801/100000 [1:36:47<42:31:42,  1.61s/it]

46819 episode score is 697.87


  5%|▍         | 4802/100000 [1:36:49<40:21:59,  1.53s/it]

46830 episode score is 673.17


  5%|▍         | 4803/100000 [1:36:50<39:34:48,  1.50s/it]

46841 episode score is 671.42


  5%|▍         | 4804/100000 [1:36:51<38:56:47,  1.47s/it]

46852 episode score is 655.28


  5%|▍         | 4805/100000 [1:36:53<38:17:17,  1.45s/it]

46863 episode score is 669.53


  5%|▍         | 4806/100000 [1:36:54<38:04:04,  1.44s/it]

46873 episode score is 696.62


  5%|▍         | 4807/100000 [1:36:56<37:14:15,  1.41s/it]

46883 episode score is 685.46


  5%|▍         | 4808/100000 [1:36:57<36:31:59,  1.38s/it]

46894 episode score is 664.65


  5%|▍         | 4809/100000 [1:36:58<36:53:02,  1.39s/it]

46904 episode score is 707.42


  5%|▍         | 4810/100000 [1:37:00<37:22:17,  1.41s/it]

46914 episode score is 722.00


  5%|▍         | 4811/100000 [1:37:01<37:03:36,  1.40s/it]

46924 episode score is 765.77


  5%|▍         | 4812/100000 [1:37:03<37:23:55,  1.41s/it]

46934 episode score is 691.19


  5%|▍         | 4813/100000 [1:37:04<36:46:11,  1.39s/it]

46944 episode score is 688.70


  5%|▍         | 4814/100000 [1:37:05<36:20:32,  1.37s/it]

46955 episode score is 618.96


  5%|▍         | 4815/100000 [1:37:07<36:06:29,  1.37s/it]

46966 episode score is 662.99


  5%|▍         | 4816/100000 [1:37:08<36:25:58,  1.38s/it]

46977 episode score is 604.11


  5%|▍         | 4817/100000 [1:37:09<36:06:35,  1.37s/it]

46987 episode score is 724.15


  5%|▍         | 4818/100000 [1:37:11<36:07:02,  1.37s/it]

46998 episode score is 643.38


  5%|▍         | 4819/100000 [1:37:12<36:14:03,  1.37s/it]

47008 episode score is 696.79


  5%|▍         | 4820/100000 [1:37:13<35:59:12,  1.36s/it]

47018 episode score is 695.61


  5%|▍         | 4821/100000 [1:37:15<35:39:26,  1.35s/it]

47028 episode score is 705.11


  5%|▍         | 4822/100000 [1:37:16<35:42:10,  1.35s/it]

47038 episode score is 704.80


  5%|▍         | 4823/100000 [1:37:17<35:42:33,  1.35s/it]

47048 episode score is 704.19


  5%|▍         | 4824/100000 [1:37:19<35:39:19,  1.35s/it]

47058 episode score is 709.54


  5%|▍         | 4825/100000 [1:37:20<35:39:03,  1.35s/it]

47068 episode score is 699.80


  5%|▍         | 4826/100000 [1:37:21<35:31:48,  1.34s/it]

47078 episode score is 699.97


  5%|▍         | 4827/100000 [1:37:23<35:26:57,  1.34s/it]

47088 episode score is 703.88


  5%|▍         | 4828/100000 [1:37:24<35:25:08,  1.34s/it]

47098 episode score is 696.34


  5%|▍         | 4829/100000 [1:37:25<35:14:02,  1.33s/it]

47109 episode score is 662.54


  5%|▍         | 4830/100000 [1:37:27<35:50:26,  1.36s/it]

47119 episode score is 703.35


  5%|▍         | 4831/100000 [1:37:28<35:37:06,  1.35s/it]

47129 episode score is 687.86


  5%|▍         | 4832/100000 [1:37:29<35:23:08,  1.34s/it]

47139 episode score is 687.35


  5%|▍         | 4833/100000 [1:37:31<35:08:57,  1.33s/it]

47149 episode score is 696.97


  5%|▍         | 4834/100000 [1:37:32<35:16:20,  1.33s/it]

47159 episode score is 701.70


  5%|▍         | 4835/100000 [1:37:33<35:15:45,  1.33s/it]

47169 episode score is 686.13


  5%|▍         | 4836/100000 [1:37:35<35:08:02,  1.33s/it]

47179 episode score is 722.67


  5%|▍         | 4837/100000 [1:37:36<35:22:53,  1.34s/it]

47189 episode score is 696.59


  5%|▍         | 4838/100000 [1:37:37<35:19:47,  1.34s/it]

47199 episode score is 708.17


  5%|▍         | 4839/100000 [1:37:39<35:31:34,  1.34s/it]

47209 episode score is 696.21


  5%|▍         | 4840/100000 [1:37:40<35:24:59,  1.34s/it]

47219 episode score is 697.80


  5%|▍         | 4841/100000 [1:37:41<35:16:14,  1.33s/it]

47230 episode score is 685.20


  5%|▍         | 4842/100000 [1:37:43<36:08:03,  1.37s/it]

47240 episode score is 723.76


  5%|▍         | 4843/100000 [1:37:44<36:07:49,  1.37s/it]

47251 episode score is 669.32


  5%|▍         | 4844/100000 [1:37:46<36:29:19,  1.38s/it]

47261 episode score is 697.28


  5%|▍         | 4845/100000 [1:37:47<36:09:24,  1.37s/it]

47272 episode score is 675.57


  5%|▍         | 4846/100000 [1:37:48<36:41:09,  1.39s/it]

47282 episode score is 691.68


  5%|▍         | 4847/100000 [1:37:50<36:09:44,  1.37s/it]

47292 episode score is 691.07


  5%|▍         | 4848/100000 [1:37:51<35:45:15,  1.35s/it]

47303 episode score is 683.97


  5%|▍         | 4849/100000 [1:37:53<36:26:34,  1.38s/it]

47314 episode score is 676.44


  5%|▍         | 4850/100000 [1:37:54<36:47:39,  1.39s/it]

47324 episode score is 698.19


  5%|▍         | 4851/100000 [1:37:55<36:18:13,  1.37s/it]

47335 episode score is 674.95


  5%|▍         | 4852/100000 [1:37:57<36:38:38,  1.39s/it]

47345 episode score is 695.40


  5%|▍         | 4853/100000 [1:37:58<36:11:48,  1.37s/it]

47355 episode score is 686.73


  5%|▍         | 4854/100000 [1:37:59<35:49:26,  1.36s/it]

47365 episode score is 700.15


  5%|▍         | 4855/100000 [1:38:01<35:36:07,  1.35s/it]

47375 episode score is 699.60


  5%|▍         | 4856/100000 [1:38:02<35:36:06,  1.35s/it]

47385 episode score is 691.22


  5%|▍         | 4857/100000 [1:38:03<35:25:02,  1.34s/it]

47395 episode score is 696.81


  5%|▍         | 4858/100000 [1:38:05<35:20:56,  1.34s/it]

47405 episode score is 691.39


  5%|▍         | 4859/100000 [1:38:06<35:11:13,  1.33s/it]

47415 episode score is 698.41


  5%|▍         | 4860/100000 [1:38:07<35:10:00,  1.33s/it]

47425 episode score is 709.83


  5%|▍         | 4861/100000 [1:38:09<35:17:57,  1.34s/it]

47435 episode score is 717.01


  5%|▍         | 4862/100000 [1:38:10<35:23:56,  1.34s/it]

47445 episode score is 723.42


  5%|▍         | 4863/100000 [1:38:11<35:35:16,  1.35s/it]

47455 episode score is 712.04


  5%|▍         | 4864/100000 [1:38:13<35:37:58,  1.35s/it]

47465 episode score is 676.63


  5%|▍         | 4865/100000 [1:38:14<35:13:43,  1.33s/it]

47475 episode score is 692.19


  5%|▍         | 4866/100000 [1:38:15<35:10:20,  1.33s/it]

47485 episode score is 728.09


  5%|▍         | 4867/100000 [1:38:17<35:31:23,  1.34s/it]

47495 episode score is 692.45


  5%|▍         | 4868/100000 [1:38:18<35:24:36,  1.34s/it]

47505 episode score is 698.61


  5%|▍         | 4869/100000 [1:38:19<35:28:12,  1.34s/it]

47515 episode score is 720.67


  5%|▍         | 4870/100000 [1:38:21<35:37:42,  1.35s/it]

47526 episode score is 679.26


  5%|▍         | 4871/100000 [1:38:22<36:14:52,  1.37s/it]

47536 episode score is 697.36


  5%|▍         | 4872/100000 [1:38:24<35:53:12,  1.36s/it]

47546 episode score is 733.42


  5%|▍         | 4873/100000 [1:38:25<36:04:31,  1.37s/it]

47556 episode score is 739.46


  5%|▍         | 4874/100000 [1:38:26<36:19:26,  1.37s/it]

47566 episode score is 733.19


  5%|▍         | 4875/100000 [1:38:28<37:11:44,  1.41s/it]

47576 episode score is 732.18


  5%|▍         | 4876/100000 [1:38:29<37:03:39,  1.40s/it]

47586 episode score is 721.53


  5%|▍         | 4877/100000 [1:38:31<36:47:35,  1.39s/it]

47596 episode score is 685.60


  5%|▍         | 4878/100000 [1:38:32<36:20:09,  1.38s/it]

47606 episode score is 708.66


  5%|▍         | 4879/100000 [1:38:33<36:14:46,  1.37s/it]

47616 episode score is 705.46


  5%|▍         | 4880/100000 [1:38:35<36:03:23,  1.36s/it]

47626 episode score is 724.59


  5%|▍         | 4881/100000 [1:38:36<36:05:54,  1.37s/it]

47636 episode score is 697.16


  5%|▍         | 4882/100000 [1:38:37<35:44:14,  1.35s/it]

47646 episode score is 718.58


  5%|▍         | 4883/100000 [1:38:39<35:47:02,  1.35s/it]

47656 episode score is 730.87


  5%|▍         | 4884/100000 [1:38:40<36:01:46,  1.36s/it]

47666 episode score is 715.41


  5%|▍         | 4885/100000 [1:38:41<35:57:50,  1.36s/it]

47676 episode score is 724.19


  5%|▍         | 4886/100000 [1:38:43<36:01:18,  1.36s/it]

47686 episode score is 736.44


  5%|▍         | 4887/100000 [1:38:44<36:22:48,  1.38s/it]

47696 episode score is 722.84


  5%|▍         | 4888/100000 [1:38:46<36:23:52,  1.38s/it]

47706 episode score is 720.46


  5%|▍         | 4889/100000 [1:38:47<36:18:51,  1.37s/it]

47716 episode score is 731.00


  5%|▍         | 4890/100000 [1:38:48<36:24:48,  1.38s/it]

47726 episode score is 734.73


  5%|▍         | 4891/100000 [1:38:50<36:34:33,  1.38s/it]

47736 episode score is 700.13


  5%|▍         | 4892/100000 [1:38:51<36:10:50,  1.37s/it]

47746 episode score is 741.04


  5%|▍         | 4893/100000 [1:38:52<36:25:06,  1.38s/it]

47756 episode score is 710.35


  5%|▍         | 4894/100000 [1:38:54<36:11:58,  1.37s/it]

47766 episode score is 718.90


  5%|▍         | 4895/100000 [1:38:55<36:07:57,  1.37s/it]

47776 episode score is 712.60


  5%|▍         | 4896/100000 [1:38:57<36:01:42,  1.36s/it]

47786 episode score is 723.75


  5%|▍         | 4897/100000 [1:38:58<36:02:41,  1.36s/it]

47796 episode score is 730.36


  5%|▍         | 4898/100000 [1:38:59<36:13:48,  1.37s/it]

47806 episode score is 731.10


  5%|▍         | 4899/100000 [1:39:01<36:22:24,  1.38s/it]

47816 episode score is 702.94
Iteration 4900: Average test reward: 743.23


  5%|▍         | 4900/100000 [1:39:03<45:28:12,  1.72s/it]

47826 episode score is 732.38


  5%|▍         | 4901/100000 [1:39:05<42:48:26,  1.62s/it]

47836 episode score is 742.14


  5%|▍         | 4902/100000 [1:39:06<41:01:09,  1.55s/it]

47846 episode score is 726.56


  5%|▍         | 4903/100000 [1:39:07<39:40:39,  1.50s/it]

47856 episode score is 740.56


  5%|▍         | 4904/100000 [1:39:09<38:55:05,  1.47s/it]

47866 episode score is 747.92


  5%|▍         | 4905/100000 [1:39:10<38:26:21,  1.46s/it]

47876 episode score is 748.52


  5%|▍         | 4906/100000 [1:39:12<38:00:27,  1.44s/it]

47886 episode score is 706.23


  5%|▍         | 4907/100000 [1:39:13<37:17:32,  1.41s/it]

47896 episode score is 738.81


  5%|▍         | 4908/100000 [1:39:14<37:04:11,  1.40s/it]

47906 episode score is 746.15


  5%|▍         | 4909/100000 [1:39:16<36:58:10,  1.40s/it]

47916 episode score is 742.97


  5%|▍         | 4910/100000 [1:39:17<37:10:33,  1.41s/it]

47925 episode score is 775.19


  5%|▍         | 4911/100000 [1:39:18<36:21:31,  1.38s/it]

47935 episode score is 762.03


  5%|▍         | 4912/100000 [1:39:20<36:51:53,  1.40s/it]

47945 episode score is 755.37


  5%|▍         | 4913/100000 [1:39:21<37:04:52,  1.40s/it]

47955 episode score is 739.48


  5%|▍         | 4914/100000 [1:39:23<37:45:35,  1.43s/it]

47964 episode score is 778.67


  5%|▍         | 4915/100000 [1:39:24<36:49:56,  1.39s/it]

47974 episode score is 758.55


  5%|▍         | 4916/100000 [1:39:26<36:59:56,  1.40s/it]

47984 episode score is 736.70


  5%|▍         | 4917/100000 [1:39:27<36:51:16,  1.40s/it]

47994 episode score is 740.82


  5%|▍         | 4918/100000 [1:39:28<36:54:32,  1.40s/it]

48004 episode score is 724.76


  5%|▍         | 4919/100000 [1:39:30<36:41:45,  1.39s/it]

48014 episode score is 726.82


  5%|▍         | 4920/100000 [1:39:31<36:35:16,  1.39s/it]

48024 episode score is 754.81


  5%|▍         | 4921/100000 [1:39:32<36:56:42,  1.40s/it]

48034 episode score is 695.67


  5%|▍         | 4922/100000 [1:39:34<36:20:56,  1.38s/it]

48044 episode score is 729.96


  5%|▍         | 4923/100000 [1:39:35<36:22:50,  1.38s/it]

48054 episode score is 735.36


  5%|▍         | 4924/100000 [1:39:37<36:25:29,  1.38s/it]

48064 episode score is 719.57


  5%|▍         | 4925/100000 [1:39:38<36:17:18,  1.37s/it]

48074 episode score is 734.05


  5%|▍         | 4926/100000 [1:39:39<36:28:08,  1.38s/it]

48084 episode score is 732.46


  5%|▍         | 4927/100000 [1:39:41<36:33:09,  1.38s/it]

48094 episode score is 723.33


  5%|▍         | 4928/100000 [1:39:42<36:26:14,  1.38s/it]

48104 episode score is 735.34


  5%|▍         | 4929/100000 [1:39:43<36:30:42,  1.38s/it]

48114 episode score is 746.64


  5%|▍         | 4930/100000 [1:39:45<36:42:43,  1.39s/it]

48124 episode score is 729.07


  5%|▍         | 4931/100000 [1:39:46<36:29:27,  1.38s/it]

48134 episode score is 744.10


  5%|▍         | 4932/100000 [1:39:48<36:47:08,  1.39s/it]

48144 episode score is 749.23


  5%|▍         | 4933/100000 [1:39:49<37:01:41,  1.40s/it]

48154 episode score is 739.63


  5%|▍         | 4934/100000 [1:39:50<36:58:37,  1.40s/it]

48164 episode score is 746.51


  5%|▍         | 4935/100000 [1:39:52<37:01:21,  1.40s/it]

48174 episode score is 737.22


  5%|▍         | 4936/100000 [1:39:53<36:56:42,  1.40s/it]

48184 episode score is 730.27


  5%|▍         | 4937/100000 [1:39:55<36:47:26,  1.39s/it]

48194 episode score is 703.67


  5%|▍         | 4938/100000 [1:39:56<36:25:29,  1.38s/it]

48204 episode score is 710.84


  5%|▍         | 4939/100000 [1:39:57<36:09:19,  1.37s/it]

48214 episode score is 703.45


  5%|▍         | 4940/100000 [1:39:59<35:47:32,  1.36s/it]

48225 episode score is 668.42


  5%|▍         | 4941/100000 [1:40:00<36:24:39,  1.38s/it]

48235 episode score is 717.59


  5%|▍         | 4942/100000 [1:40:01<36:12:00,  1.37s/it]

48245 episode score is 716.84


  5%|▍         | 4943/100000 [1:40:03<36:10:49,  1.37s/it]

48255 episode score is 700.69


  5%|▍         | 4944/100000 [1:40:04<35:53:53,  1.36s/it]

48265 episode score is 735.89


  5%|▍         | 4945/100000 [1:40:06<36:06:44,  1.37s/it]

48275 episode score is 725.32


  5%|▍         | 4946/100000 [1:40:07<36:05:13,  1.37s/it]

48285 episode score is 711.16


  5%|▍         | 4947/100000 [1:40:08<35:56:03,  1.36s/it]

48295 episode score is 714.31


  5%|▍         | 4948/100000 [1:40:10<35:52:02,  1.36s/it]

48305 episode score is 721.22


  5%|▍         | 4949/100000 [1:40:11<35:54:47,  1.36s/it]

48315 episode score is 710.71


  5%|▍         | 4950/100000 [1:40:12<35:56:24,  1.36s/it]

48325 episode score is 715.87


  5%|▍         | 4951/100000 [1:40:14<35:58:32,  1.36s/it]

48335 episode score is 750.78


  5%|▍         | 4952/100000 [1:40:15<36:19:40,  1.38s/it]

48345 episode score is 721.30


  5%|▍         | 4953/100000 [1:40:16<36:09:10,  1.37s/it]

48355 episode score is 746.60


  5%|▍         | 4954/100000 [1:40:18<36:29:46,  1.38s/it]

48365 episode score is 724.59


  5%|▍         | 4955/100000 [1:40:19<36:23:14,  1.38s/it]

48375 episode score is 718.53


  5%|▍         | 4956/100000 [1:40:21<36:12:26,  1.37s/it]

48385 episode score is 722.72


  5%|▍         | 4957/100000 [1:40:22<36:10:23,  1.37s/it]

48395 episode score is 717.47


  5%|▍         | 4958/100000 [1:40:23<36:01:42,  1.36s/it]

48405 episode score is 730.12


  5%|▍         | 4959/100000 [1:40:25<36:01:19,  1.36s/it]

48415 episode score is 713.40


  5%|▍         | 4960/100000 [1:40:26<35:50:42,  1.36s/it]

48425 episode score is 704.61


  5%|▍         | 4961/100000 [1:40:27<35:37:05,  1.35s/it]

48435 episode score is 698.91


  5%|▍         | 4962/100000 [1:40:29<35:30:43,  1.35s/it]

48445 episode score is 721.29


  5%|▍         | 4963/100000 [1:40:30<35:39:31,  1.35s/it]

48455 episode score is 718.76


  5%|▍         | 4964/100000 [1:40:31<35:48:19,  1.36s/it]

48465 episode score is 709.80


  5%|▍         | 4965/100000 [1:40:33<35:44:31,  1.35s/it]

48475 episode score is 734.98


  5%|▍         | 4966/100000 [1:40:34<35:58:40,  1.36s/it]

48485 episode score is 745.09


  5%|▍         | 4967/100000 [1:40:36<36:16:54,  1.37s/it]

48495 episode score is 715.51


  5%|▍         | 4968/100000 [1:40:37<36:09:37,  1.37s/it]

48505 episode score is 727.94


  5%|▍         | 4969/100000 [1:40:38<36:10:16,  1.37s/it]

48515 episode score is 734.68


  5%|▍         | 4970/100000 [1:40:40<36:18:59,  1.38s/it]

48525 episode score is 751.16


  5%|▍         | 4971/100000 [1:40:41<36:39:37,  1.39s/it]

48535 episode score is 739.93


  5%|▍         | 4972/100000 [1:40:43<36:43:44,  1.39s/it]

48545 episode score is 721.32


  5%|▍         | 4973/100000 [1:40:44<36:28:36,  1.38s/it]

48555 episode score is 717.40


  5%|▍         | 4974/100000 [1:40:45<36:13:53,  1.37s/it]

48565 episode score is 722.73


  5%|▍         | 4975/100000 [1:40:47<36:09:39,  1.37s/it]

48572 episode score is 1108.87


  5%|▍         | 4976/100000 [1:40:48<37:17:06,  1.41s/it]

48579 episode score is 1072.07


  5%|▍         | 4977/100000 [1:40:50<37:30:01,  1.42s/it]

48586 episode score is 1054.17


  5%|▍         | 4978/100000 [1:40:51<38:05:00,  1.44s/it]

48593 episode score is 993.29


  5%|▍         | 4979/100000 [1:40:52<37:42:15,  1.43s/it]

48600 episode score is 1028.67


  5%|▍         | 4980/100000 [1:40:54<37:18:59,  1.41s/it]

48607 episode score is 1026.99


  5%|▍         | 4981/100000 [1:40:55<37:05:08,  1.41s/it]

48614 episode score is 1062.40


  5%|▍         | 4982/100000 [1:40:57<37:41:28,  1.43s/it]

48620 episode score is 1155.43


  5%|▍         | 4983/100000 [1:40:58<37:05:40,  1.41s/it]

48627 episode score is 1093.08


  5%|▍         | 4984/100000 [1:41:00<37:52:46,  1.44s/it]

48634 episode score is 1066.29


  5%|▍         | 4985/100000 [1:41:01<38:02:52,  1.44s/it]

48641 episode score is 1076.00


  5%|▍         | 4986/100000 [1:41:02<38:23:52,  1.45s/it]

48648 episode score is 1099.53


  5%|▍         | 4987/100000 [1:41:04<38:41:10,  1.47s/it]

48655 episode score is 1080.15


  5%|▍         | 4988/100000 [1:41:05<38:46:42,  1.47s/it]

48663 episode score is 966.01


  5%|▍         | 4989/100000 [1:41:07<38:47:57,  1.47s/it]

48670 episode score is 1066.07


  5%|▍         | 4990/100000 [1:41:08<38:22:21,  1.45s/it]

48677 episode score is 1107.96


  5%|▍         | 4991/100000 [1:41:10<38:21:23,  1.45s/it]

48685 episode score is 989.88


  5%|▍         | 4992/100000 [1:41:11<38:37:47,  1.46s/it]

48693 episode score is 913.54


  5%|▍         | 4993/100000 [1:41:13<37:49:57,  1.43s/it]

48702 episode score is 778.16


  5%|▍         | 4994/100000 [1:41:14<37:01:44,  1.40s/it]

48711 episode score is 882.53


  5%|▍         | 4995/100000 [1:41:15<37:43:27,  1.43s/it]

48719 episode score is 905.96


  5%|▍         | 4996/100000 [1:41:17<37:06:43,  1.41s/it]

48727 episode score is 930.10


  5%|▍         | 4997/100000 [1:41:18<36:59:32,  1.40s/it]

48736 episode score is 866.06


  5%|▍         | 4998/100000 [1:41:20<37:16:44,  1.41s/it]

48744 episode score is 879.30


  5%|▍         | 4999/100000 [1:41:21<36:32:49,  1.38s/it]

48753 episode score is 837.51
Iteration 5000: Average test reward: 793.45


  5%|▌         | 5000/100000 [1:41:24<46:48:23,  1.77s/it]

48761 episode score is 894.65


  5%|▌         | 5001/100000 [1:41:25<43:11:35,  1.64s/it]

48770 episode score is 855.91


  5%|▌         | 5002/100000 [1:41:26<42:23:42,  1.61s/it]

48779 episode score is 801.77


  5%|▌         | 5003/100000 [1:41:28<40:19:12,  1.53s/it]

48788 episode score is 792.72


  5%|▌         | 5004/100000 [1:41:29<38:46:48,  1.47s/it]

48797 episode score is 797.17


  5%|▌         | 5005/100000 [1:41:31<37:41:14,  1.43s/it]

48805 episode score is 890.42


  5%|▌         | 5006/100000 [1:41:32<36:51:57,  1.40s/it]

48814 episode score is 865.39


  5%|▌         | 5007/100000 [1:41:33<37:06:38,  1.41s/it]

48823 episode score is 825.78


  5%|▌         | 5008/100000 [1:41:35<36:51:54,  1.40s/it]

48832 episode score is 777.59


  5%|▌         | 5009/100000 [1:41:36<36:13:42,  1.37s/it]

48842 episode score is 767.57


  5%|▌         | 5010/100000 [1:41:37<36:48:33,  1.40s/it]

48852 episode score is 741.44


  5%|▌         | 5011/100000 [1:41:39<36:55:01,  1.40s/it]

48861 episode score is 791.75


  5%|▌         | 5012/100000 [1:41:40<36:31:18,  1.38s/it]

48870 episode score is 766.75


  5%|▌         | 5013/100000 [1:41:41<35:57:36,  1.36s/it]

48879 episode score is 790.22


  5%|▌         | 5014/100000 [1:41:43<35:46:52,  1.36s/it]

48888 episode score is 827.93


  5%|▌         | 5015/100000 [1:41:44<35:56:26,  1.36s/it]

48897 episode score is 819.53


  5%|▌         | 5016/100000 [1:41:46<35:54:23,  1.36s/it]

48906 episode score is 816.03


  5%|▌         | 5017/100000 [1:41:47<36:02:26,  1.37s/it]

48916 episode score is 765.36


  5%|▌         | 5018/100000 [1:41:48<36:41:15,  1.39s/it]

48925 episode score is 803.77


  5%|▌         | 5019/100000 [1:41:50<36:28:25,  1.38s/it]

48934 episode score is 844.85


  5%|▌         | 5020/100000 [1:41:51<36:37:17,  1.39s/it]

48943 episode score is 798.32


  5%|▌         | 5021/100000 [1:41:52<36:17:37,  1.38s/it]

48952 episode score is 773.08


  5%|▌         | 5022/100000 [1:41:54<35:46:54,  1.36s/it]

48961 episode score is 823.09


  5%|▌         | 5023/100000 [1:41:55<35:55:12,  1.36s/it]

48970 episode score is 810.83


  5%|▌         | 5024/100000 [1:41:57<35:51:06,  1.36s/it]

48979 episode score is 807.91


  5%|▌         | 5025/100000 [1:41:58<35:46:08,  1.36s/it]

48988 episode score is 810.10


  5%|▌         | 5026/100000 [1:41:59<35:52:51,  1.36s/it]

48997 episode score is 794.96


  5%|▌         | 5027/100000 [1:42:01<35:40:35,  1.35s/it]

49007 episode score is 762.81


  5%|▌         | 5028/100000 [1:42:02<36:19:59,  1.38s/it]

49016 episode score is 780.53


  5%|▌         | 5029/100000 [1:42:03<35:55:00,  1.36s/it]

49025 episode score is 795.65


  5%|▌         | 5030/100000 [1:42:05<35:48:02,  1.36s/it]

49034 episode score is 792.75


  5%|▌         | 5031/100000 [1:42:06<35:36:21,  1.35s/it]

49043 episode score is 791.40


  5%|▌         | 5032/100000 [1:42:07<35:25:32,  1.34s/it]

49052 episode score is 782.75


  5%|▌         | 5033/100000 [1:42:09<35:09:50,  1.33s/it]

49062 episode score is 763.93


  5%|▌         | 5034/100000 [1:42:10<35:54:22,  1.36s/it]

49071 episode score is 796.81


  5%|▌         | 5035/100000 [1:42:11<35:40:32,  1.35s/it]

49081 episode score is 748.31


  5%|▌         | 5036/100000 [1:42:13<36:08:53,  1.37s/it]

49091 episode score is 756.03


  5%|▌         | 5037/100000 [1:42:14<36:33:24,  1.39s/it]

49101 episode score is 732.08


  5%|▌         | 5038/100000 [1:42:16<36:47:22,  1.39s/it]

49111 episode score is 760.03


  5%|▌         | 5039/100000 [1:42:17<37:07:46,  1.41s/it]

49121 episode score is 746.28


  5%|▌         | 5040/100000 [1:42:18<37:09:21,  1.41s/it]

49131 episode score is 724.49


  5%|▌         | 5041/100000 [1:42:20<36:56:39,  1.40s/it]

49141 episode score is 704.19


  5%|▌         | 5042/100000 [1:42:21<36:33:44,  1.39s/it]

49151 episode score is 742.03


  5%|▌         | 5043/100000 [1:42:23<36:40:01,  1.39s/it]

49160 episode score is 765.79


  5%|▌         | 5044/100000 [1:42:24<36:01:29,  1.37s/it]

49170 episode score is 750.61


  5%|▌         | 5045/100000 [1:42:25<36:21:44,  1.38s/it]

49180 episode score is 737.57


  5%|▌         | 5046/100000 [1:42:27<36:26:27,  1.38s/it]

49190 episode score is 738.28


  5%|▌         | 5047/100000 [1:42:28<36:36:27,  1.39s/it]

49200 episode score is 726.83


  5%|▌         | 5048/100000 [1:42:30<36:30:46,  1.38s/it]

49210 episode score is 742.55


  5%|▌         | 5049/100000 [1:42:31<36:40:02,  1.39s/it]

49220 episode score is 721.53


  5%|▌         | 5050/100000 [1:42:32<36:39:57,  1.39s/it]

49230 episode score is 735.00


  5%|▌         | 5051/100000 [1:42:34<36:41:35,  1.39s/it]

49240 episode score is 682.35


  5%|▌         | 5052/100000 [1:42:35<36:51:33,  1.40s/it]

49250 episode score is 749.41


  5%|▌         | 5053/100000 [1:42:37<36:54:55,  1.40s/it]

49260 episode score is 706.13


  5%|▌         | 5054/100000 [1:42:38<36:26:14,  1.38s/it]

49270 episode score is 738.96


  5%|▌         | 5055/100000 [1:42:39<36:31:33,  1.38s/it]

49280 episode score is 728.26


  5%|▌         | 5056/100000 [1:42:41<36:24:59,  1.38s/it]

49290 episode score is 732.18


  5%|▌         | 5057/100000 [1:42:42<36:23:21,  1.38s/it]

49300 episode score is 723.61


  5%|▌         | 5058/100000 [1:42:43<36:17:50,  1.38s/it]

49310 episode score is 718.10


  5%|▌         | 5059/100000 [1:42:45<36:07:50,  1.37s/it]

49320 episode score is 701.89


  5%|▌         | 5060/100000 [1:42:46<35:45:17,  1.36s/it]

49330 episode score is 735.36


  5%|▌         | 5061/100000 [1:42:47<36:04:36,  1.37s/it]

49340 episode score is 727.57


  5%|▌         | 5062/100000 [1:42:49<36:13:04,  1.37s/it]

49350 episode score is 728.25


  5%|▌         | 5063/100000 [1:42:50<36:14:31,  1.37s/it]

49360 episode score is 719.66


  5%|▌         | 5064/100000 [1:42:52<36:08:28,  1.37s/it]

49370 episode score is 766.00


  5%|▌         | 5065/100000 [1:42:53<36:39:28,  1.39s/it]

49380 episode score is 763.98


  5%|▌         | 5066/100000 [1:42:54<36:56:24,  1.40s/it]

49390 episode score is 753.04


  5%|▌         | 5067/100000 [1:42:56<37:10:19,  1.41s/it]

49400 episode score is 746.34


  5%|▌         | 5068/100000 [1:42:57<37:09:30,  1.41s/it]

49410 episode score is 742.72


  5%|▌         | 5069/100000 [1:42:59<37:02:16,  1.40s/it]

49420 episode score is 746.54


  5%|▌         | 5070/100000 [1:43:00<37:01:39,  1.40s/it]

49430 episode score is 711.35


  5%|▌         | 5071/100000 [1:43:01<36:37:56,  1.39s/it]

49440 episode score is 715.13


  5%|▌         | 5072/100000 [1:43:03<36:26:41,  1.38s/it]

49450 episode score is 722.04


  5%|▌         | 5073/100000 [1:43:04<36:19:49,  1.38s/it]

49460 episode score is 751.01


  5%|▌         | 5074/100000 [1:43:06<36:31:28,  1.39s/it]

49470 episode score is 735.63


  5%|▌         | 5075/100000 [1:43:07<36:30:32,  1.38s/it]

49480 episode score is 737.75


  5%|▌         | 5076/100000 [1:43:08<36:37:15,  1.39s/it]

49490 episode score is 752.97


  5%|▌         | 5077/100000 [1:43:10<36:52:47,  1.40s/it]

49500 episode score is 749.54


  5%|▌         | 5078/100000 [1:43:11<37:04:42,  1.41s/it]

49510 episode score is 751.34


  5%|▌         | 5079/100000 [1:43:13<37:11:35,  1.41s/it]

49520 episode score is 745.49


  5%|▌         | 5080/100000 [1:43:14<37:11:29,  1.41s/it]

49530 episode score is 765.81


  5%|▌         | 5081/100000 [1:43:15<37:21:53,  1.42s/it]

49539 episode score is 801.52


  5%|▌         | 5082/100000 [1:43:17<36:45:07,  1.39s/it]

49548 episode score is 835.81


  5%|▌         | 5083/100000 [1:43:18<37:00:51,  1.40s/it]

49557 episode score is 806.40


  5%|▌         | 5084/100000 [1:43:20<36:42:09,  1.39s/it]

49566 episode score is 824.97


  5%|▌         | 5085/100000 [1:43:21<36:39:24,  1.39s/it]

49575 episode score is 815.38


  5%|▌         | 5086/100000 [1:43:22<36:36:43,  1.39s/it]

49584 episode score is 804.51


  5%|▌         | 5087/100000 [1:43:24<36:15:23,  1.38s/it]

49593 episode score is 798.68


  5%|▌         | 5088/100000 [1:43:25<35:57:06,  1.36s/it]

49602 episode score is 805.71


  5%|▌         | 5089/100000 [1:43:26<35:50:02,  1.36s/it]

49611 episode score is 832.44


  5%|▌         | 5090/100000 [1:43:28<35:59:44,  1.37s/it]

49620 episode score is 824.83


  5%|▌         | 5091/100000 [1:43:29<36:09:27,  1.37s/it]

49629 episode score is 792.53


  5%|▌         | 5092/100000 [1:43:30<35:57:31,  1.36s/it]

49638 episode score is 802.03


  5%|▌         | 5093/100000 [1:43:32<35:51:42,  1.36s/it]

49647 episode score is 784.61


  5%|▌         | 5094/100000 [1:43:33<35:39:34,  1.35s/it]

49656 episode score is 795.10


  5%|▌         | 5095/100000 [1:43:35<35:40:19,  1.35s/it]

49666 episode score is 768.73


  5%|▌         | 5096/100000 [1:43:36<36:24:23,  1.38s/it]

49675 episode score is 787.15


  5%|▌         | 5097/100000 [1:43:37<36:01:22,  1.37s/it]

49685 episode score is 760.36


  5%|▌         | 5098/100000 [1:43:39<36:34:28,  1.39s/it]

49695 episode score is 761.60


  5%|▌         | 5099/100000 [1:43:40<36:53:11,  1.40s/it]

49705 episode score is 750.85
Iteration 5100: Average test reward: 744.76


  5%|▌         | 5100/100000 [1:43:43<46:38:01,  1.77s/it]

49715 episode score is 744.34


  5%|▌         | 5101/100000 [1:43:44<43:42:20,  1.66s/it]

49725 episode score is 733.70


  5%|▌         | 5102/100000 [1:43:46<41:31:31,  1.58s/it]

49735 episode score is 757.38


  5%|▌         | 5103/100000 [1:43:47<40:31:15,  1.54s/it]

49745 episode score is 747.69


  5%|▌         | 5104/100000 [1:43:48<39:36:20,  1.50s/it]

49755 episode score is 758.85


  5%|▌         | 5105/100000 [1:43:50<39:01:19,  1.48s/it]

49764 episode score is 795.70


  5%|▌         | 5106/100000 [1:43:51<37:51:19,  1.44s/it]

49774 episode score is 767.96


  5%|▌         | 5107/100000 [1:43:53<38:00:45,  1.44s/it]

49784 episode score is 765.15


  5%|▌         | 5108/100000 [1:43:54<38:01:33,  1.44s/it]

49794 episode score is 724.62


  5%|▌         | 5109/100000 [1:43:56<37:33:03,  1.42s/it]

49804 episode score is 756.35


  5%|▌         | 5110/100000 [1:43:57<37:28:07,  1.42s/it]

49814 episode score is 754.63


  5%|▌         | 5111/100000 [1:43:58<37:30:20,  1.42s/it]

49823 episode score is 783.13


  5%|▌         | 5112/100000 [1:44:00<36:46:30,  1.40s/it]

49832 episode score is 825.06


  5%|▌         | 5113/100000 [1:44:01<36:41:29,  1.39s/it]

49841 episode score is 822.01


  5%|▌         | 5114/100000 [1:44:02<36:45:42,  1.39s/it]

49850 episode score is 852.27


  5%|▌         | 5115/100000 [1:44:04<37:03:49,  1.41s/it]

49859 episode score is 821.01


  5%|▌         | 5116/100000 [1:44:05<36:49:07,  1.40s/it]

49868 episode score is 776.75


  5%|▌         | 5117/100000 [1:44:07<36:07:48,  1.37s/it]

49877 episode score is 801.68


  5%|▌         | 5118/100000 [1:44:08<35:57:03,  1.36s/it]

49886 episode score is 776.26


  5%|▌         | 5119/100000 [1:44:09<35:31:18,  1.35s/it]

49896 episode score is 742.84


  5%|▌         | 5120/100000 [1:44:11<35:54:16,  1.36s/it]

49906 episode score is 735.56


  5%|▌         | 5121/100000 [1:44:12<36:13:45,  1.37s/it]

49915 episode score is 805.28


  5%|▌         | 5122/100000 [1:44:13<36:03:15,  1.37s/it]

49925 episode score is 764.52


  5%|▌         | 5123/100000 [1:44:15<36:37:00,  1.39s/it]

49935 episode score is 745.68


  5%|▌         | 5124/100000 [1:44:16<36:45:56,  1.40s/it]

49945 episode score is 745.89


  5%|▌         | 5125/100000 [1:44:18<37:08:02,  1.41s/it]

49955 episode score is 756.03


  5%|▌         | 5126/100000 [1:44:19<37:16:14,  1.41s/it]

49964 episode score is 773.96


  5%|▌         | 5127/100000 [1:44:20<36:31:35,  1.39s/it]

49973 episode score is 772.70


  5%|▌         | 5128/100000 [1:44:22<35:58:03,  1.36s/it]

49983 episode score is 752.94


  5%|▌         | 5129/100000 [1:44:23<36:29:28,  1.38s/it]

49993 episode score is 748.71


  5%|▌         | 5130/100000 [1:44:25<36:45:54,  1.40s/it]

50003 episode score is 745.78


  5%|▌         | 5131/100000 [1:44:26<36:52:47,  1.40s/it]

50013 episode score is 757.96


  5%|▌         | 5132/100000 [1:44:27<37:01:41,  1.41s/it]

50023 episode score is 746.65


  5%|▌         | 5133/100000 [1:44:29<37:05:31,  1.41s/it]

50033 episode score is 737.76


  5%|▌         | 5134/100000 [1:44:30<36:55:21,  1.40s/it]

50043 episode score is 738.80


  5%|▌         | 5135/100000 [1:44:32<36:50:39,  1.40s/it]

50053 episode score is 767.66


  5%|▌         | 5136/100000 [1:44:33<37:16:51,  1.41s/it]

50063 episode score is 765.60


  5%|▌         | 5137/100000 [1:44:34<37:24:11,  1.42s/it]

50073 episode score is 752.86


  5%|▌         | 5138/100000 [1:44:36<37:24:36,  1.42s/it]

50082 episode score is 781.05


  5%|▌         | 5139/100000 [1:44:37<36:30:16,  1.39s/it]

50092 episode score is 746.35


  5%|▌         | 5140/100000 [1:44:39<36:41:32,  1.39s/it]

50101 episode score is 794.43


  5%|▌         | 5141/100000 [1:44:40<36:14:22,  1.38s/it]

50111 episode score is 768.66


  5%|▌         | 5142/100000 [1:44:41<36:50:03,  1.40s/it]

50121 episode score is 765.95


  5%|▌         | 5143/100000 [1:44:43<37:14:54,  1.41s/it]

50131 episode score is 783.10


  5%|▌         | 5144/100000 [1:44:44<37:31:32,  1.42s/it]

50141 episode score is 759.09


  5%|▌         | 5145/100000 [1:44:46<38:19:37,  1.45s/it]

50150 episode score is 778.01


  5%|▌         | 5146/100000 [1:44:47<37:16:09,  1.41s/it]

50160 episode score is 782.21


  5%|▌         | 5147/100000 [1:44:49<37:35:52,  1.43s/it]

50170 episode score is 745.26


  5%|▌         | 5148/100000 [1:44:50<37:27:11,  1.42s/it]

50179 episode score is 798.73


  5%|▌         | 5149/100000 [1:44:51<36:44:16,  1.39s/it]

50188 episode score is 782.50


  5%|▌         | 5150/100000 [1:44:53<36:11:17,  1.37s/it]

50198 episode score is 749.78


  5%|▌         | 5151/100000 [1:44:54<36:31:53,  1.39s/it]

50208 episode score is 768.16


  5%|▌         | 5152/100000 [1:44:56<37:06:54,  1.41s/it]

50217 episode score is 781.49


  5%|▌         | 5153/100000 [1:44:57<36:23:19,  1.38s/it]

50227 episode score is 760.14


  5%|▌         | 5154/100000 [1:44:58<36:49:43,  1.40s/it]

50237 episode score is 773.86


  5%|▌         | 5155/100000 [1:45:00<37:15:59,  1.41s/it]

50247 episode score is 747.68


  5%|▌         | 5156/100000 [1:45:01<37:11:08,  1.41s/it]

50257 episode score is 739.19


  5%|▌         | 5157/100000 [1:45:03<37:16:16,  1.41s/it]

50267 episode score is 712.27


  5%|▌         | 5158/100000 [1:45:04<36:43:04,  1.39s/it]

50277 episode score is 724.33


  5%|▌         | 5159/100000 [1:45:05<36:29:48,  1.39s/it]

50287 episode score is 744.41


  5%|▌         | 5160/100000 [1:45:07<36:39:00,  1.39s/it]

50297 episode score is 726.05


  5%|▌         | 5161/100000 [1:45:08<36:32:50,  1.39s/it]

50307 episode score is 732.35


  5%|▌         | 5162/100000 [1:45:09<36:37:39,  1.39s/it]

50317 episode score is 732.43


  5%|▌         | 5163/100000 [1:45:11<36:34:52,  1.39s/it]

50327 episode score is 729.31


  5%|▌         | 5164/100000 [1:45:12<36:30:32,  1.39s/it]

50337 episode score is 743.25


  5%|▌         | 5165/100000 [1:45:14<36:37:56,  1.39s/it]

50347 episode score is 766.71


  5%|▌         | 5166/100000 [1:45:15<37:02:18,  1.41s/it]

50357 episode score is 731.97


  5%|▌         | 5167/100000 [1:45:16<36:46:03,  1.40s/it]

50367 episode score is 736.36


  5%|▌         | 5168/100000 [1:45:18<36:49:31,  1.40s/it]

50377 episode score is 760.36


  5%|▌         | 5169/100000 [1:45:19<37:05:24,  1.41s/it]

50387 episode score is 745.73


  5%|▌         | 5170/100000 [1:45:21<37:07:15,  1.41s/it]

50397 episode score is 745.00


  5%|▌         | 5171/100000 [1:45:22<37:07:18,  1.41s/it]

50407 episode score is 716.22


  5%|▌         | 5172/100000 [1:45:23<36:46:07,  1.40s/it]

50417 episode score is 713.66


  5%|▌         | 5173/100000 [1:45:25<36:28:38,  1.38s/it]

50427 episode score is 697.97


  5%|▌         | 5174/100000 [1:45:26<36:05:41,  1.37s/it]

50437 episode score is 721.02


  5%|▌         | 5175/100000 [1:45:28<36:00:42,  1.37s/it]

50447 episode score is 687.00


  5%|▌         | 5176/100000 [1:45:29<35:35:53,  1.35s/it]

50457 episode score is 706.43


  5%|▌         | 5177/100000 [1:45:30<35:29:48,  1.35s/it]

50467 episode score is 733.98


  5%|▌         | 5178/100000 [1:45:32<35:46:44,  1.36s/it]

50477 episode score is 715.84


  5%|▌         | 5179/100000 [1:45:33<35:47:42,  1.36s/it]

50487 episode score is 723.14


  5%|▌         | 5180/100000 [1:45:34<35:48:50,  1.36s/it]

50497 episode score is 694.16


  5%|▌         | 5181/100000 [1:45:36<35:38:29,  1.35s/it]

50507 episode score is 713.50


  5%|▌         | 5182/100000 [1:45:37<35:32:48,  1.35s/it]

50517 episode score is 713.86


  5%|▌         | 5183/100000 [1:45:38<35:29:26,  1.35s/it]

50527 episode score is 694.31


  5%|▌         | 5184/100000 [1:45:40<35:18:59,  1.34s/it]

50537 episode score is 683.63


  5%|▌         | 5185/100000 [1:45:41<34:59:27,  1.33s/it]

50547 episode score is 691.64


  5%|▌         | 5186/100000 [1:45:42<35:06:56,  1.33s/it]

50557 episode score is 704.53


  5%|▌         | 5187/100000 [1:45:44<35:16:25,  1.34s/it]

50567 episode score is 704.90


  5%|▌         | 5188/100000 [1:45:45<35:17:06,  1.34s/it]

50577 episode score is 711.93


  5%|▌         | 5189/100000 [1:45:46<35:19:58,  1.34s/it]

50587 episode score is 710.63


  5%|▌         | 5190/100000 [1:45:48<35:25:08,  1.34s/it]

50597 episode score is 709.98


  5%|▌         | 5191/100000 [1:45:49<35:25:25,  1.35s/it]

50607 episode score is 708.72


  5%|▌         | 5192/100000 [1:45:50<35:16:59,  1.34s/it]

50617 episode score is 700.86


  5%|▌         | 5193/100000 [1:45:52<35:07:14,  1.33s/it]

50627 episode score is 698.72


  5%|▌         | 5194/100000 [1:45:53<35:02:42,  1.33s/it]

50637 episode score is 705.18


  5%|▌         | 5195/100000 [1:45:54<34:57:28,  1.33s/it]

50647 episode score is 717.52


  5%|▌         | 5196/100000 [1:45:56<35:09:17,  1.33s/it]

50657 episode score is 686.77


  5%|▌         | 5197/100000 [1:45:57<34:52:01,  1.32s/it]

50667 episode score is 700.82


  5%|▌         | 5198/100000 [1:45:58<34:49:50,  1.32s/it]

50677 episode score is 689.21


  5%|▌         | 5199/100000 [1:46:00<34:51:45,  1.32s/it]

50688 episode score is 675.51
Iteration 5200: Average test reward: 661.49


  5%|▌         | 5200/100000 [1:46:02<44:07:27,  1.68s/it]

50699 episode score is 674.12


  5%|▌         | 5201/100000 [1:46:04<42:05:07,  1.60s/it]

50709 episode score is 686.24


  5%|▌         | 5202/100000 [1:46:05<39:45:53,  1.51s/it]

50720 episode score is 672.21


  5%|▌         | 5203/100000 [1:46:06<38:53:47,  1.48s/it]

50731 episode score is 673.04


  5%|▌         | 5204/100000 [1:46:08<39:05:05,  1.48s/it]

50742 episode score is 672.27


  5%|▌         | 5205/100000 [1:46:09<38:29:40,  1.46s/it]

50753 episode score is 670.39


  5%|▌         | 5206/100000 [1:46:11<38:02:57,  1.45s/it]

50763 episode score is 685.75


  5%|▌         | 5207/100000 [1:46:12<36:56:05,  1.40s/it]

50774 episode score is 680.56


  5%|▌         | 5208/100000 [1:46:13<37:05:26,  1.41s/it]

50785 episode score is 683.81


  5%|▌         | 5209/100000 [1:46:15<37:21:25,  1.42s/it]

50796 episode score is 660.59


  5%|▌         | 5210/100000 [1:46:16<37:08:18,  1.41s/it]

50807 episode score is 677.14


  5%|▌         | 5211/100000 [1:46:18<37:16:45,  1.42s/it]

50818 episode score is 643.74


  5%|▌         | 5212/100000 [1:46:19<36:55:16,  1.40s/it]

50829 episode score is 679.05


  5%|▌         | 5213/100000 [1:46:20<37:03:17,  1.41s/it]

50840 episode score is 651.46


  5%|▌         | 5214/100000 [1:46:22<36:49:02,  1.40s/it]

50851 episode score is 661.17


  5%|▌         | 5215/100000 [1:46:23<36:48:52,  1.40s/it]

50862 episode score is 662.59


  5%|▌         | 5216/100000 [1:46:24<36:45:31,  1.40s/it]

50873 episode score is 667.32


  5%|▌         | 5217/100000 [1:46:26<36:48:26,  1.40s/it]

50884 episode score is 637.36


  5%|▌         | 5218/100000 [1:46:27<36:32:36,  1.39s/it]

50895 episode score is 680.03


  5%|▌         | 5219/100000 [1:46:29<36:56:06,  1.40s/it]

50906 episode score is 682.90


  5%|▌         | 5220/100000 [1:46:30<37:13:13,  1.41s/it]

50916 episode score is 697.94


  5%|▌         | 5221/100000 [1:46:31<36:27:18,  1.38s/it]

50927 episode score is 679.82


  5%|▌         | 5222/100000 [1:46:33<36:51:04,  1.40s/it]

50938 episode score is 668.64


  5%|▌         | 5223/100000 [1:46:34<36:52:17,  1.40s/it]

50948 episode score is 693.50


  5%|▌         | 5224/100000 [1:46:36<36:11:07,  1.37s/it]

50958 episode score is 707.50


  5%|▌         | 5225/100000 [1:46:37<35:51:16,  1.36s/it]

50968 episode score is 690.31


  5%|▌         | 5226/100000 [1:46:38<35:24:54,  1.35s/it]

50979 episode score is 684.09


  5%|▌         | 5227/100000 [1:46:40<36:05:17,  1.37s/it]

50989 episode score is 683.93


  5%|▌         | 5228/100000 [1:46:41<35:32:26,  1.35s/it]

50999 episode score is 688.97


  5%|▌         | 5229/100000 [1:46:42<35:14:00,  1.34s/it]

51009 episode score is 685.70


  5%|▌         | 5230/100000 [1:46:44<35:02:40,  1.33s/it]

51019 episode score is 713.54


  5%|▌         | 5231/100000 [1:46:45<35:06:40,  1.33s/it]

51029 episode score is 688.65


  5%|▌         | 5232/100000 [1:46:46<34:50:36,  1.32s/it]

51039 episode score is 702.31


  5%|▌         | 5233/100000 [1:46:48<34:56:16,  1.33s/it]

51049 episode score is 702.33


  5%|▌         | 5234/100000 [1:46:49<35:05:43,  1.33s/it]

51059 episode score is 699.43


  5%|▌         | 5235/100000 [1:46:50<35:00:30,  1.33s/it]

51069 episode score is 705.13


  5%|▌         | 5236/100000 [1:46:52<34:59:10,  1.33s/it]

51079 episode score is 712.23


  5%|▌         | 5237/100000 [1:46:53<35:05:56,  1.33s/it]

51089 episode score is 710.31


  5%|▌         | 5238/100000 [1:46:54<35:11:21,  1.34s/it]

51100 episode score is 682.16


  5%|▌         | 5239/100000 [1:46:56<35:55:15,  1.36s/it]

51110 episode score is 706.80


  5%|▌         | 5240/100000 [1:46:57<35:39:36,  1.35s/it]

51120 episode score is 696.31


  5%|▌         | 5241/100000 [1:46:58<35:23:20,  1.34s/it]

51131 episode score is 650.36


  5%|▌         | 5242/100000 [1:47:00<35:37:24,  1.35s/it]

51141 episode score is 701.72


  5%|▌         | 5243/100000 [1:47:01<35:18:29,  1.34s/it]

51152 episode score is 681.13


  5%|▌         | 5244/100000 [1:47:02<36:02:04,  1.37s/it]

51162 episode score is 700.24


  5%|▌         | 5245/100000 [1:47:04<35:40:26,  1.36s/it]

51172 episode score is 699.13


  5%|▌         | 5246/100000 [1:47:05<35:23:08,  1.34s/it]

51182 episode score is 695.35


  5%|▌         | 5247/100000 [1:47:06<35:06:46,  1.33s/it]

51192 episode score is 701.84


  5%|▌         | 5248/100000 [1:47:08<35:03:02,  1.33s/it]

51202 episode score is 690.40


  5%|▌         | 5249/100000 [1:47:09<34:51:13,  1.32s/it]

51212 episode score is 701.89


  5%|▌         | 5250/100000 [1:47:10<34:53:14,  1.33s/it]

51222 episode score is 715.54


  5%|▌         | 5251/100000 [1:47:12<35:09:34,  1.34s/it]

51232 episode score is 706.42


  5%|▌         | 5252/100000 [1:47:13<35:07:04,  1.33s/it]

51242 episode score is 697.80


  5%|▌         | 5253/100000 [1:47:14<35:00:59,  1.33s/it]

51252 episode score is 703.36


  5%|▌         | 5254/100000 [1:47:16<35:00:40,  1.33s/it]

51262 episode score is 724.44


  5%|▌         | 5255/100000 [1:47:17<35:19:48,  1.34s/it]

51272 episode score is 712.06


  5%|▌         | 5256/100000 [1:47:18<35:24:03,  1.35s/it]

51282 episode score is 690.46


  5%|▌         | 5257/100000 [1:47:20<35:11:10,  1.34s/it]

51292 episode score is 720.66


  5%|▌         | 5258/100000 [1:47:21<35:16:56,  1.34s/it]

51302 episode score is 704.91


  5%|▌         | 5259/100000 [1:47:23<35:59:28,  1.37s/it]

51312 episode score is 683.65


  5%|▌         | 5260/100000 [1:47:24<35:30:40,  1.35s/it]

51322 episode score is 695.14


  5%|▌         | 5261/100000 [1:47:25<35:21:30,  1.34s/it]

51332 episode score is 708.76


  5%|▌         | 5262/100000 [1:47:27<35:17:47,  1.34s/it]

51342 episode score is 720.37


  5%|▌         | 5263/100000 [1:47:28<35:24:50,  1.35s/it]

51352 episode score is 703.22


  5%|▌         | 5264/100000 [1:47:29<35:18:30,  1.34s/it]

51362 episode score is 714.34


  5%|▌         | 5265/100000 [1:47:31<35:28:44,  1.35s/it]

51372 episode score is 741.91


  5%|▌         | 5266/100000 [1:47:32<36:02:12,  1.37s/it]

51382 episode score is 723.89


  5%|▌         | 5267/100000 [1:47:33<36:11:31,  1.38s/it]

51392 episode score is 736.52


  5%|▌         | 5268/100000 [1:47:35<36:21:43,  1.38s/it]

51403 episode score is 687.48


  5%|▌         | 5269/100000 [1:47:36<36:44:37,  1.40s/it]

51413 episode score is 718.10


  5%|▌         | 5270/100000 [1:47:38<36:22:37,  1.38s/it]

51423 episode score is 727.66


  5%|▌         | 5271/100000 [1:47:39<36:20:05,  1.38s/it]

51433 episode score is 722.77


  5%|▌         | 5272/100000 [1:47:40<36:12:43,  1.38s/it]

51443 episode score is 728.33


  5%|▌         | 5273/100000 [1:47:42<36:08:34,  1.37s/it]

51453 episode score is 708.35


  5%|▌         | 5274/100000 [1:47:43<36:03:51,  1.37s/it]

51464 episode score is 674.54


  5%|▌         | 5275/100000 [1:47:44<36:28:43,  1.39s/it]

51474 episode score is 712.10


  5%|▌         | 5276/100000 [1:47:46<36:06:57,  1.37s/it]

51484 episode score is 718.89


  5%|▌         | 5277/100000 [1:47:47<36:10:35,  1.37s/it]

51494 episode score is 720.12


  5%|▌         | 5278/100000 [1:47:49<36:02:10,  1.37s/it]

51504 episode score is 731.77


  5%|▌         | 5279/100000 [1:47:50<36:05:03,  1.37s/it]

51514 episode score is 695.79


  5%|▌         | 5280/100000 [1:47:51<35:35:31,  1.35s/it]

51524 episode score is 698.84


  5%|▌         | 5281/100000 [1:47:53<35:21:20,  1.34s/it]

51534 episode score is 686.63


  5%|▌         | 5282/100000 [1:47:54<35:01:16,  1.33s/it]

51544 episode score is 695.26


  5%|▌         | 5283/100000 [1:47:55<34:53:28,  1.33s/it]

51555 episode score is 678.17


  5%|▌         | 5285/100000 [1:47:58<33:39:28,  1.28s/it]

51565 episode score is 690.13


  5%|▌         | 5286/100000 [1:47:59<32:15:09,  1.23s/it]

51575 episode score is 692.33


  5%|▌         | 5287/100000 [1:48:00<31:20:38,  1.19s/it]

51585 episode score is 704.56


  5%|▌         | 5288/100000 [1:48:01<30:37:59,  1.16s/it]

51595 episode score is 698.61


  5%|▌         | 5289/100000 [1:48:02<31:00:42,  1.18s/it]

51606 episode score is 680.99


  5%|▌         | 5290/100000 [1:48:03<31:10:12,  1.18s/it]

51617 episode score is 685.52


  5%|▌         | 5291/100000 [1:48:05<30:44:08,  1.17s/it]

51627 episode score is 713.19


  5%|▌         | 5292/100000 [1:48:06<30:19:25,  1.15s/it]

51637 episode score is 704.12


  5%|▌         | 5293/100000 [1:48:07<30:00:24,  1.14s/it]

51647 episode score is 700.71


  5%|▌         | 5294/100000 [1:48:08<29:48:53,  1.13s/it]

51657 episode score is 703.77


  5%|▌         | 5295/100000 [1:48:09<29:34:50,  1.12s/it]

51667 episode score is 695.31


  5%|▌         | 5296/100000 [1:48:10<29:29:49,  1.12s/it]

51677 episode score is 703.48


  5%|▌         | 5297/100000 [1:48:11<29:34:07,  1.12s/it]

51687 episode score is 713.73


  5%|▌         | 5298/100000 [1:48:12<29:44:41,  1.13s/it]

51697 episode score is 723.58


  5%|▌         | 5299/100000 [1:48:13<29:37:09,  1.13s/it]

51707 episode score is 705.28
51717 episode score is 722.00


  5%|▌         | 5300/100000 [1:48:16<37:40:09,  1.43s/it]

Iteration 5300: Average test reward: 705.40


  5%|▌         | 5301/100000 [1:48:17<35:00:24,  1.33s/it]

51727 episode score is 689.13


  5%|▌         | 5302/100000 [1:48:18<33:17:05,  1.27s/it]

51737 episode score is 696.88


  5%|▌         | 5303/100000 [1:48:19<32:09:42,  1.22s/it]

51747 episode score is 707.03


  5%|▌         | 5304/100000 [1:48:20<31:32:05,  1.20s/it]

51757 episode score is 719.92


  5%|▌         | 5305/100000 [1:48:21<31:06:50,  1.18s/it]

51767 episode score is 733.60


  5%|▌         | 5306/100000 [1:48:22<30:47:46,  1.17s/it]

51777 episode score is 723.97


  5%|▌         | 5307/100000 [1:48:24<30:45:56,  1.17s/it]

51787 episode score is 739.90


  5%|▌         | 5308/100000 [1:48:25<30:27:02,  1.16s/it]

51797 episode score is 700.98


  5%|▌         | 5309/100000 [1:48:26<31:10:12,  1.19s/it]

51807 episode score is 726.51


  5%|▌         | 5310/100000 [1:48:27<30:33:11,  1.16s/it]

51817 episode score is 692.95


  5%|▌         | 5311/100000 [1:48:28<30:24:29,  1.16s/it]

51827 episode score is 714.15


  5%|▌         | 5312/100000 [1:48:29<30:17:20,  1.15s/it]

51837 episode score is 724.64


  5%|▌         | 5313/100000 [1:48:30<30:00:42,  1.14s/it]

51847 episode score is 710.20


  5%|▌         | 5314/100000 [1:48:32<29:49:03,  1.13s/it]

51857 episode score is 704.96


  5%|▌         | 5315/100000 [1:48:33<29:53:32,  1.14s/it]

51867 episode score is 718.67


  5%|▌         | 5316/100000 [1:48:34<29:45:26,  1.13s/it]

51877 episode score is 705.31


  5%|▌         | 5317/100000 [1:48:35<29:48:11,  1.13s/it]

51887 episode score is 710.04


  5%|▌         | 5318/100000 [1:48:36<30:00:33,  1.14s/it]

51897 episode score is 727.59


  5%|▌         | 5319/100000 [1:48:37<30:04:31,  1.14s/it]

51907 episode score is 726.79


  5%|▌         | 5320/100000 [1:48:38<30:01:24,  1.14s/it]

51917 episode score is 714.90


  5%|▌         | 5321/100000 [1:48:40<30:22:58,  1.16s/it]

51927 episode score is 748.47


  5%|▌         | 5322/100000 [1:48:41<30:22:52,  1.16s/it]

51937 episode score is 729.74


  5%|▌         | 5323/100000 [1:48:42<30:39:02,  1.17s/it]

51947 episode score is 748.18


  5%|▌         | 5324/100000 [1:48:43<30:51:37,  1.17s/it]

51957 episode score is 749.43


  5%|▌         | 5325/100000 [1:48:44<31:03:35,  1.18s/it]

51967 episode score is 747.67


  5%|▌         | 5326/100000 [1:48:45<30:54:36,  1.18s/it]

51977 episode score is 734.67


  5%|▌         | 5327/100000 [1:48:47<30:58:05,  1.18s/it]

51987 episode score is 750.56


  5%|▌         | 5328/100000 [1:48:48<31:05:42,  1.18s/it]

51997 episode score is 748.88


  5%|▌         | 5329/100000 [1:48:49<30:58:00,  1.18s/it]

52007 episode score is 735.16


  5%|▌         | 5330/100000 [1:48:50<30:48:35,  1.17s/it]

52017 episode score is 731.44


  5%|▌         | 5331/100000 [1:48:51<30:45:19,  1.17s/it]

52027 episode score is 735.19


  5%|▌         | 5332/100000 [1:48:53<30:47:21,  1.17s/it]

52037 episode score is 737.24


  5%|▌         | 5333/100000 [1:48:54<30:35:42,  1.16s/it]

52047 episode score is 728.16


  5%|▌         | 5334/100000 [1:48:55<30:30:12,  1.16s/it]

52057 episode score is 721.98


  5%|▌         | 5335/100000 [1:48:56<30:14:47,  1.15s/it]

52067 episode score is 714.45


  5%|▌         | 5336/100000 [1:48:57<30:25:59,  1.16s/it]

52077 episode score is 744.37


  5%|▌         | 5337/100000 [1:48:58<30:33:56,  1.16s/it]

52087 episode score is 738.17


  5%|▌         | 5338/100000 [1:48:59<30:39:55,  1.17s/it]

52097 episode score is 738.83


  5%|▌         | 5339/100000 [1:49:01<30:14:13,  1.15s/it]

52107 episode score is 703.79


  5%|▌         | 5340/100000 [1:49:02<30:01:54,  1.14s/it]

52117 episode score is 706.06


  5%|▌         | 5341/100000 [1:49:03<29:48:02,  1.13s/it]

52127 episode score is 700.62


  5%|▌         | 5342/100000 [1:49:04<29:45:31,  1.13s/it]

52137 episode score is 708.86


  5%|▌         | 5343/100000 [1:49:05<29:30:01,  1.12s/it]

52147 episode score is 689.59


  5%|▌         | 5344/100000 [1:49:06<29:21:51,  1.12s/it]

52157 episode score is 694.66


  5%|▌         | 5345/100000 [1:49:07<29:10:05,  1.11s/it]

52167 episode score is 684.42


  5%|▌         | 5346/100000 [1:49:08<29:41:30,  1.13s/it]

52177 episode score is 740.47


  5%|▌         | 5347/100000 [1:49:10<30:04:28,  1.14s/it]

52187 episode score is 750.46


  5%|▌         | 5348/100000 [1:49:11<29:50:19,  1.13s/it]

52197 episode score is 702.05


  5%|▌         | 5349/100000 [1:49:12<29:45:10,  1.13s/it]

52207 episode score is 708.07


  5%|▌         | 5350/100000 [1:49:13<29:41:18,  1.13s/it]

52217 episode score is 708.40


  5%|▌         | 5351/100000 [1:49:14<29:33:38,  1.12s/it]

52227 episode score is 700.99


  5%|▌         | 5352/100000 [1:49:15<29:54:40,  1.14s/it]

52238 episode score is 651.03


  5%|▌         | 5353/100000 [1:49:16<29:48:42,  1.13s/it]

52248 episode score is 707.88


  5%|▌         | 5354/100000 [1:49:18<30:28:39,  1.16s/it]

52258 episode score is 704.16


  5%|▌         | 5355/100000 [1:49:19<30:46:29,  1.17s/it]

52269 episode score is 682.25


  5%|▌         | 5356/100000 [1:49:20<30:15:05,  1.15s/it]

52279 episode score is 692.61


  5%|▌         | 5357/100000 [1:49:21<29:51:39,  1.14s/it]

52289 episode score is 703.55


  5%|▌         | 5358/100000 [1:49:22<30:19:35,  1.15s/it]

52300 episode score is 683.51


  5%|▌         | 5359/100000 [1:49:23<29:53:47,  1.14s/it]

52310 episode score is 694.84


  5%|▌         | 5360/100000 [1:49:24<29:34:59,  1.13s/it]

52320 episode score is 693.38


  5%|▌         | 5361/100000 [1:49:26<30:12:41,  1.15s/it]

52331 episode score is 690.68


  5%|▌         | 5362/100000 [1:49:27<30:33:38,  1.16s/it]

52342 episode score is 681.34


  5%|▌         | 5363/100000 [1:49:28<30:10:37,  1.15s/it]

52352 episode score is 704.41


  5%|▌         | 5364/100000 [1:49:29<29:54:49,  1.14s/it]

52362 episode score is 703.94


  5%|▌         | 5365/100000 [1:49:30<29:40:45,  1.13s/it]

52372 episode score is 692.28


  5%|▌         | 5366/100000 [1:49:31<29:21:14,  1.12s/it]

52382 episode score is 686.98


  5%|▌         | 5367/100000 [1:49:32<29:25:08,  1.12s/it]

52392 episode score is 694.28


  5%|▌         | 5368/100000 [1:49:33<29:13:19,  1.11s/it]

52402 episode score is 688.75


  5%|▌         | 5369/100000 [1:49:35<29:20:09,  1.12s/it]

52412 episode score is 711.08


  5%|▌         | 5370/100000 [1:49:36<29:54:43,  1.14s/it]

52423 episode score is 679.99


  5%|▌         | 5371/100000 [1:49:37<30:13:38,  1.15s/it]

52434 episode score is 670.44


  5%|▌         | 5372/100000 [1:49:38<29:59:37,  1.14s/it]

52444 episode score is 696.41


  5%|▌         | 5373/100000 [1:49:39<29:45:10,  1.13s/it]

52454 episode score is 697.26


  5%|▌         | 5374/100000 [1:49:40<29:34:11,  1.12s/it]

52464 episode score is 701.27


  5%|▌         | 5375/100000 [1:49:41<29:30:39,  1.12s/it]

52474 episode score is 702.27


  5%|▌         | 5376/100000 [1:49:42<29:27:33,  1.12s/it]

52484 episode score is 701.82


  5%|▌         | 5377/100000 [1:49:44<29:38:19,  1.13s/it]

52494 episode score is 712.91


  5%|▌         | 5378/100000 [1:49:45<29:44:17,  1.13s/it]

52504 episode score is 717.40


  5%|▌         | 5379/100000 [1:49:46<29:43:47,  1.13s/it]

52514 episode score is 712.16


  5%|▌         | 5380/100000 [1:49:47<29:50:50,  1.14s/it]

52524 episode score is 721.26


  5%|▌         | 5381/100000 [1:49:48<29:36:13,  1.13s/it]

52534 episode score is 687.34


  5%|▌         | 5382/100000 [1:49:49<29:31:48,  1.12s/it]

52544 episode score is 699.12


  5%|▌         | 5383/100000 [1:49:50<29:37:06,  1.13s/it]

52554 episode score is 720.23


  5%|▌         | 5384/100000 [1:49:52<29:40:06,  1.13s/it]

52564 episode score is 712.92


  5%|▌         | 5385/100000 [1:49:53<29:32:59,  1.12s/it]

52574 episode score is 697.04


  5%|▌         | 5386/100000 [1:49:54<29:42:12,  1.13s/it]

52584 episode score is 726.26


  5%|▌         | 5387/100000 [1:49:55<29:56:30,  1.14s/it]

52594 episode score is 728.88


  5%|▌         | 5388/100000 [1:49:56<29:55:01,  1.14s/it]

52604 episode score is 717.24


  5%|▌         | 5389/100000 [1:49:57<30:17:46,  1.15s/it]

52614 episode score is 740.44


  5%|▌         | 5390/100000 [1:49:58<30:17:22,  1.15s/it]

52624 episode score is 724.65


  5%|▌         | 5391/100000 [1:50:00<30:19:30,  1.15s/it]

52634 episode score is 730.89


  5%|▌         | 5392/100000 [1:50:01<30:09:49,  1.15s/it]

52644 episode score is 717.78


  5%|▌         | 5393/100000 [1:50:02<29:49:47,  1.14s/it]

52654 episode score is 688.23


  5%|▌         | 5394/100000 [1:50:03<29:42:37,  1.13s/it]

52664 episode score is 701.55


  5%|▌         | 5395/100000 [1:50:04<30:13:12,  1.15s/it]

52675 episode score is 675.73


  5%|▌         | 5396/100000 [1:50:05<29:52:36,  1.14s/it]

52685 episode score is 696.43


  5%|▌         | 5397/100000 [1:50:06<29:39:26,  1.13s/it]

52695 episode score is 697.90


  5%|▌         | 5398/100000 [1:50:08<30:18:33,  1.15s/it]

52706 episode score is 684.58


  5%|▌         | 5399/100000 [1:50:09<29:55:14,  1.14s/it]

52716 episode score is 687.58
52726 episode score is 714.67


  5%|▌         | 5400/100000 [1:50:11<37:39:52,  1.43s/it]

Iteration 5400: Average test reward: 694.88


  5%|▌         | 5401/100000 [1:50:12<35:08:57,  1.34s/it]

52736 episode score is 698.19


  5%|▌         | 5402/100000 [1:50:13<33:31:58,  1.28s/it]

52746 episode score is 704.89


  5%|▌         | 5403/100000 [1:50:14<32:15:14,  1.23s/it]

52756 episode score is 686.96


  5%|▌         | 5404/100000 [1:50:15<31:21:17,  1.19s/it]

52766 episode score is 699.39


  5%|▌         | 5405/100000 [1:50:16<30:43:32,  1.17s/it]

52776 episode score is 700.31


  5%|▌         | 5406/100000 [1:50:18<30:30:55,  1.16s/it]

52786 episode score is 709.68


  5%|▌         | 5407/100000 [1:50:19<30:04:19,  1.14s/it]

52796 episode score is 695.11


  5%|▌         | 5408/100000 [1:50:20<29:57:44,  1.14s/it]

52806 episode score is 708.22


  5%|▌         | 5409/100000 [1:50:21<29:44:05,  1.13s/it]

52816 episode score is 701.97


  5%|▌         | 5410/100000 [1:50:22<29:38:15,  1.13s/it]

52826 episode score is 705.87


  5%|▌         | 5411/100000 [1:50:23<29:40:45,  1.13s/it]

52836 episode score is 713.55


  5%|▌         | 5412/100000 [1:50:24<29:27:08,  1.12s/it]

52846 episode score is 684.85


  5%|▌         | 5413/100000 [1:50:25<29:24:52,  1.12s/it]

52856 episode score is 700.53


  5%|▌         | 5414/100000 [1:50:27<29:51:39,  1.14s/it]

52867 episode score is 660.89


  5%|▌         | 5415/100000 [1:50:28<29:33:06,  1.12s/it]

52877 episode score is 687.25


  5%|▌         | 5416/100000 [1:50:29<29:23:58,  1.12s/it]

52887 episode score is 693.40


  5%|▌         | 5417/100000 [1:50:30<29:19:49,  1.12s/it]

52897 episode score is 696.57


  5%|▌         | 5418/100000 [1:50:31<29:26:48,  1.12s/it]

52907 episode score is 704.17


  5%|▌         | 5419/100000 [1:50:32<29:19:29,  1.12s/it]

52917 episode score is 688.21


  5%|▌         | 5420/100000 [1:50:33<29:11:01,  1.11s/it]

52927 episode score is 692.63


  5%|▌         | 5421/100000 [1:50:34<29:15:57,  1.11s/it]

52937 episode score is 704.35


  5%|▌         | 5422/100000 [1:50:35<29:19:07,  1.12s/it]

52947 episode score is 704.36


  5%|▌         | 5423/100000 [1:50:37<29:38:58,  1.13s/it]

52958 episode score is 653.85


  5%|▌         | 5424/100000 [1:50:38<29:32:47,  1.12s/it]

52968 episode score is 705.77


  5%|▌         | 5425/100000 [1:50:39<30:15:03,  1.15s/it]

52978 episode score is 701.21


  5%|▌         | 5426/100000 [1:50:40<30:01:16,  1.14s/it]

52988 episode score is 699.66


  5%|▌         | 5427/100000 [1:50:41<29:49:56,  1.14s/it]

52998 episode score is 703.52


  5%|▌         | 5428/100000 [1:50:42<29:52:47,  1.14s/it]

53008 episode score is 719.54


  5%|▌         | 5429/100000 [1:50:43<29:58:46,  1.14s/it]

53018 episode score is 728.35


  5%|▌         | 5430/100000 [1:50:45<29:59:20,  1.14s/it]

53028 episode score is 726.47


  5%|▌         | 5431/100000 [1:50:46<29:51:04,  1.14s/it]

53038 episode score is 709.91


  5%|▌         | 5432/100000 [1:50:47<29:51:35,  1.14s/it]

53048 episode score is 714.36


  5%|▌         | 5433/100000 [1:50:48<29:57:42,  1.14s/it]

53058 episode score is 718.24


  5%|▌         | 5434/100000 [1:50:49<29:37:51,  1.13s/it]

53068 episode score is 686.95


  5%|▌         | 5435/100000 [1:50:50<29:20:06,  1.12s/it]

53078 episode score is 686.86


  5%|▌         | 5436/100000 [1:50:51<29:10:46,  1.11s/it]

53088 episode score is 689.58


  5%|▌         | 5437/100000 [1:50:52<29:17:08,  1.11s/it]

53098 episode score is 711.28


  5%|▌         | 5438/100000 [1:50:54<29:17:47,  1.12s/it]

53108 episode score is 702.33


  5%|▌         | 5439/100000 [1:50:55<29:29:16,  1.12s/it]

53119 episode score is 642.74


  5%|▌         | 5440/100000 [1:50:56<30:03:04,  1.14s/it]

53130 episode score is 670.52


  5%|▌         | 5441/100000 [1:50:57<29:47:36,  1.13s/it]

53140 episode score is 694.57


  5%|▌         | 5442/100000 [1:50:58<29:43:35,  1.13s/it]

53151 episode score is 628.30


  5%|▌         | 5443/100000 [1:50:59<29:34:19,  1.13s/it]

53161 episode score is 694.95


  5%|▌         | 5444/100000 [1:51:00<29:32:58,  1.13s/it]

53171 episode score is 711.16


  5%|▌         | 5445/100000 [1:51:01<29:25:09,  1.12s/it]

53181 episode score is 694.42


  5%|▌         | 5446/100000 [1:51:03<29:26:38,  1.12s/it]

53191 episode score is 686.68


  5%|▌         | 5447/100000 [1:51:04<29:25:59,  1.12s/it]

53201 episode score is 701.48


  5%|▌         | 5448/100000 [1:51:05<29:19:25,  1.12s/it]

53211 episode score is 699.33


  5%|▌         | 5449/100000 [1:51:06<29:25:34,  1.12s/it]

53221 episode score is 707.43


  5%|▌         | 5450/100000 [1:51:07<29:37:16,  1.13s/it]

53231 episode score is 720.56


  5%|▌         | 5451/100000 [1:51:08<29:52:19,  1.14s/it]

53241 episode score is 732.39


  5%|▌         | 5452/100000 [1:51:09<29:37:12,  1.13s/it]

53251 episode score is 694.78


  5%|▌         | 5453/100000 [1:51:11<30:11:40,  1.15s/it]

53262 episode score is 677.94


  5%|▌         | 5454/100000 [1:51:12<30:30:05,  1.16s/it]

53273 episode score is 672.55


  5%|▌         | 5455/100000 [1:51:13<30:30:00,  1.16s/it]

53283 episode score is 736.71


  5%|▌         | 5456/100000 [1:51:14<30:18:12,  1.15s/it]

53293 episode score is 721.39


  5%|▌         | 5457/100000 [1:51:15<30:06:22,  1.15s/it]

53303 episode score is 716.76


  5%|▌         | 5458/100000 [1:51:16<29:59:24,  1.14s/it]

53313 episode score is 715.15


  5%|▌         | 5459/100000 [1:51:17<29:55:43,  1.14s/it]

53323 episode score is 703.82


  5%|▌         | 5460/100000 [1:51:19<29:47:09,  1.13s/it]

53333 episode score is 703.02


  5%|▌         | 5461/100000 [1:51:20<29:41:04,  1.13s/it]

53343 episode score is 704.08


  5%|▌         | 5462/100000 [1:51:21<29:35:42,  1.13s/it]

53353 episode score is 701.34


  5%|▌         | 5463/100000 [1:51:22<29:38:01,  1.13s/it]

53363 episode score is 714.26


  5%|▌         | 5464/100000 [1:51:23<29:40:56,  1.13s/it]

53373 episode score is 714.56


  5%|▌         | 5465/100000 [1:51:24<29:46:36,  1.13s/it]

53383 episode score is 724.12


  5%|▌         | 5466/100000 [1:51:25<29:37:56,  1.13s/it]

53393 episode score is 709.01


  5%|▌         | 5467/100000 [1:51:26<29:22:22,  1.12s/it]

53403 episode score is 683.28


  5%|▌         | 5468/100000 [1:51:28<29:33:45,  1.13s/it]

53413 episode score is 726.72


  5%|▌         | 5469/100000 [1:51:29<29:48:29,  1.14s/it]

53423 episode score is 733.91


  5%|▌         | 5470/100000 [1:51:30<29:55:55,  1.14s/it]

53433 episode score is 732.08


  5%|▌         | 5471/100000 [1:51:31<29:51:06,  1.14s/it]

53443 episode score is 718.32


  5%|▌         | 5472/100000 [1:51:32<29:45:43,  1.13s/it]

53453 episode score is 707.59


  5%|▌         | 5473/100000 [1:51:33<29:49:29,  1.14s/it]

53463 episode score is 720.54


  5%|▌         | 5474/100000 [1:51:34<29:37:04,  1.13s/it]

53473 episode score is 701.83


  5%|▌         | 5475/100000 [1:51:35<29:50:12,  1.14s/it]

53483 episode score is 732.20


  5%|▌         | 5476/100000 [1:51:37<30:05:57,  1.15s/it]

53493 episode score is 729.25


  5%|▌         | 5477/100000 [1:51:38<30:02:38,  1.14s/it]

53503 episode score is 721.44


  5%|▌         | 5478/100000 [1:51:39<29:56:52,  1.14s/it]

53513 episode score is 702.85


  5%|▌         | 5479/100000 [1:51:40<29:47:04,  1.13s/it]

53523 episode score is 707.23


  5%|▌         | 5480/100000 [1:51:41<29:35:28,  1.13s/it]

53533 episode score is 695.54


  5%|▌         | 5481/100000 [1:51:42<29:41:58,  1.13s/it]

53543 episode score is 701.70


  5%|▌         | 5482/100000 [1:51:43<29:50:37,  1.14s/it]

53553 episode score is 713.61


  5%|▌         | 5483/100000 [1:51:45<29:50:49,  1.14s/it]

53563 episode score is 712.19


  5%|▌         | 5484/100000 [1:51:46<29:38:11,  1.13s/it]

53573 episode score is 697.03


  5%|▌         | 5485/100000 [1:51:47<29:24:16,  1.12s/it]

53583 episode score is 692.13


  5%|▌         | 5486/100000 [1:51:48<30:08:25,  1.15s/it]

53594 episode score is 682.80


  5%|▌         | 5487/100000 [1:51:49<30:04:50,  1.15s/it]

53604 episode score is 711.29


  5%|▌         | 5488/100000 [1:51:50<30:06:25,  1.15s/it]

53615 episode score is 636.90


  5%|▌         | 5489/100000 [1:51:51<29:51:33,  1.14s/it]

53625 episode score is 701.10


  5%|▌         | 5490/100000 [1:51:53<30:02:25,  1.14s/it]

53636 episode score is 636.76
53646 episode score is 697.36


  5%|▌         | 5492/100000 [1:51:55<30:52:16,  1.18s/it]

53657 episode score is 683.62


  5%|▌         | 5493/100000 [1:51:56<30:15:36,  1.15s/it]

53667 episode score is 679.92


  5%|▌         | 5494/100000 [1:51:57<29:58:30,  1.14s/it]

53677 episode score is 702.12


  5%|▌         | 5495/100000 [1:51:58<30:11:58,  1.15s/it]

53687 episode score is 732.27


  5%|▌         | 5496/100000 [1:52:00<30:26:27,  1.16s/it]

53697 episode score is 738.92


  5%|▌         | 5497/100000 [1:52:01<30:20:27,  1.16s/it]

53707 episode score is 720.09


  5%|▌         | 5498/100000 [1:52:02<30:32:34,  1.16s/it]

53717 episode score is 736.29


  5%|▌         | 5499/100000 [1:52:03<30:26:40,  1.16s/it]

53727 episode score is 722.00
53737 episode score is 718.33


  6%|▌         | 5500/100000 [1:52:05<38:10:08,  1.45s/it]

Iteration 5500: Average test reward: 703.18


  6%|▌         | 5501/100000 [1:52:06<35:34:42,  1.36s/it]

53747 episode score is 706.37


  6%|▌         | 5502/100000 [1:52:07<33:34:20,  1.28s/it]

53757 episode score is 693.69


  6%|▌         | 5503/100000 [1:52:09<32:17:26,  1.23s/it]

53767 episode score is 703.48


  6%|▌         | 5504/100000 [1:52:10<31:16:50,  1.19s/it]

53777 episode score is 691.13


  6%|▌         | 5505/100000 [1:52:11<30:31:27,  1.16s/it]

53787 episode score is 690.11


  6%|▌         | 5506/100000 [1:52:12<30:10:22,  1.15s/it]

53797 episode score is 713.93


  6%|▌         | 5507/100000 [1:52:13<30:06:37,  1.15s/it]

53807 episode score is 717.14


  6%|▌         | 5508/100000 [1:52:14<29:57:57,  1.14s/it]

53817 episode score is 707.38


  6%|▌         | 5509/100000 [1:52:15<29:52:45,  1.14s/it]

53827 episode score is 700.79


  6%|▌         | 5510/100000 [1:52:16<29:46:44,  1.13s/it]

53837 episode score is 698.81


  6%|▌         | 5511/100000 [1:52:17<29:37:50,  1.13s/it]

53847 episode score is 707.27


  6%|▌         | 5512/100000 [1:52:19<29:26:34,  1.12s/it]

53857 episode score is 704.80


  6%|▌         | 5513/100000 [1:52:20<29:42:44,  1.13s/it]

53868 episode score is 660.11


  6%|▌         | 5514/100000 [1:52:21<29:24:50,  1.12s/it]

53878 episode score is 693.97


  6%|▌         | 5515/100000 [1:52:22<29:09:30,  1.11s/it]

53889 episode score is 603.77


  6%|▌         | 5516/100000 [1:52:23<29:06:46,  1.11s/it]

53899 episode score is 707.63


  6%|▌         | 5517/100000 [1:52:24<29:02:36,  1.11s/it]

53909 episode score is 700.44


  6%|▌         | 5518/100000 [1:52:25<29:34:33,  1.13s/it]

53920 episode score is 673.09


  6%|▌         | 5519/100000 [1:52:26<29:53:47,  1.14s/it]

53931 episode score is 663.59


  6%|▌         | 5520/100000 [1:52:28<30:07:57,  1.15s/it]

53942 episode score is 665.90


  6%|▌         | 5521/100000 [1:52:29<29:37:01,  1.13s/it]

53952 episode score is 689.98


  6%|▌         | 5522/100000 [1:52:30<29:23:22,  1.12s/it]

53962 episode score is 696.03


  6%|▌         | 5523/100000 [1:52:31<29:50:02,  1.14s/it]

53973 episode score is 676.75


  6%|▌         | 5524/100000 [1:52:32<29:49:33,  1.14s/it]

53984 episode score is 634.14


  6%|▌         | 5525/100000 [1:52:33<29:56:01,  1.14s/it]

53995 episode score is 652.11


  6%|▌         | 5526/100000 [1:52:34<30:14:10,  1.15s/it]

54006 episode score is 672.70


  6%|▌         | 5527/100000 [1:52:36<30:17:13,  1.15s/it]

54017 episode score is 662.00


  6%|▌         | 5528/100000 [1:52:37<30:24:57,  1.16s/it]

54028 episode score is 674.02


  6%|▌         | 5529/100000 [1:52:38<30:33:26,  1.16s/it]

54039 episode score is 674.62


  6%|▌         | 5530/100000 [1:52:39<30:33:22,  1.16s/it]

54050 episode score is 660.66


  6%|▌         | 5531/100000 [1:52:40<30:34:08,  1.16s/it]

54061 episode score is 673.62


  6%|▌         | 5532/100000 [1:52:41<30:18:05,  1.15s/it]

54072 episode score is 642.69


  6%|▌         | 5533/100000 [1:52:43<30:19:29,  1.16s/it]

54083 episode score is 667.01


  6%|▌         | 5534/100000 [1:52:44<30:25:53,  1.16s/it]

54094 episode score is 669.01


  6%|▌         | 5535/100000 [1:52:45<30:24:50,  1.16s/it]

54105 episode score is 644.04


  6%|▌         | 5536/100000 [1:52:46<30:31:39,  1.16s/it]

54116 episode score is 658.99


  6%|▌         | 5537/100000 [1:52:47<30:39:40,  1.17s/it]

54127 episode score is 658.75


  6%|▌         | 5538/100000 [1:52:48<30:35:22,  1.17s/it]

54138 episode score is 654.25


  6%|▌         | 5539/100000 [1:52:50<30:38:56,  1.17s/it]

54149 episode score is 662.05


  6%|▌         | 5540/100000 [1:52:51<30:26:51,  1.16s/it]

54160 episode score is 651.79


  6%|▌         | 5541/100000 [1:52:52<30:31:47,  1.16s/it]

54171 episode score is 672.16


  6%|▌         | 5542/100000 [1:52:53<30:37:58,  1.17s/it]

54182 episode score is 674.19


  6%|▌         | 5543/100000 [1:52:54<30:00:11,  1.14s/it]

54192 episode score is 688.57


  6%|▌         | 5544/100000 [1:52:55<30:12:01,  1.15s/it]

54203 episode score is 665.27


  6%|▌         | 5545/100000 [1:52:57<30:23:52,  1.16s/it]

54214 episode score is 657.92


  6%|▌         | 5546/100000 [1:52:58<30:26:44,  1.16s/it]

54225 episode score is 669.30


  6%|▌         | 5547/100000 [1:52:59<30:03:01,  1.15s/it]

54235 episode score is 695.17


  6%|▌         | 5548/100000 [1:53:00<30:06:31,  1.15s/it]

54245 episode score is 707.88


  6%|▌         | 5549/100000 [1:53:01<29:41:17,  1.13s/it]

54255 episode score is 697.36


  6%|▌         | 5550/100000 [1:53:02<29:31:08,  1.13s/it]

54265 episode score is 693.36


  6%|▌         | 5551/100000 [1:53:03<29:16:47,  1.12s/it]

54275 episode score is 695.82


  6%|▌         | 5552/100000 [1:53:04<29:29:09,  1.12s/it]

54286 episode score is 651.71


  6%|▌         | 5553/100000 [1:53:05<29:14:04,  1.11s/it]

54296 episode score is 695.57


  6%|▌         | 5554/100000 [1:53:07<29:07:16,  1.11s/it]

54306 episode score is 694.03


  6%|▌         | 5555/100000 [1:53:08<29:01:55,  1.11s/it]

54316 episode score is 704.44


  6%|▌         | 5556/100000 [1:53:09<29:03:20,  1.11s/it]

54326 episode score is 702.06


  6%|▌         | 5557/100000 [1:53:10<29:05:15,  1.11s/it]

54336 episode score is 702.58


  6%|▌         | 5558/100000 [1:53:11<29:10:22,  1.11s/it]

54346 episode score is 709.97


  6%|▌         | 5559/100000 [1:53:12<29:11:12,  1.11s/it]

54356 episode score is 706.77


  6%|▌         | 5560/100000 [1:53:13<29:15:48,  1.12s/it]

54366 episode score is 716.78


  6%|▌         | 5561/100000 [1:53:14<29:05:11,  1.11s/it]

54376 episode score is 692.95


  6%|▌         | 5562/100000 [1:53:15<29:00:43,  1.11s/it]

54386 episode score is 700.45


  6%|▌         | 5563/100000 [1:53:17<29:30:18,  1.12s/it]

54397 episode score is 665.06


  6%|▌         | 5564/100000 [1:53:18<30:02:23,  1.15s/it]

54408 episode score is 679.27


  6%|▌         | 5565/100000 [1:53:19<29:44:01,  1.13s/it]

54418 episode score is 692.55


  6%|▌         | 5566/100000 [1:53:20<29:56:55,  1.14s/it]

54429 episode score is 664.76


  6%|▌         | 5567/100000 [1:53:21<29:37:34,  1.13s/it]

54440 episode score is 613.37


  6%|▌         | 5568/100000 [1:53:22<29:30:53,  1.13s/it]

54450 episode score is 711.43


  6%|▌         | 5569/100000 [1:53:23<29:14:24,  1.11s/it]

54460 episode score is 684.01


  6%|▌         | 5570/100000 [1:53:24<29:06:21,  1.11s/it]

54470 episode score is 691.58


  6%|▌         | 5571/100000 [1:53:26<29:33:19,  1.13s/it]

54481 episode score is 664.85


  6%|▌         | 5572/100000 [1:53:27<29:22:03,  1.12s/it]

54491 episode score is 699.71


  6%|▌         | 5573/100000 [1:53:28<30:00:07,  1.14s/it]

54501 episode score is 701.08


  6%|▌         | 5574/100000 [1:53:29<29:38:00,  1.13s/it]

54511 episode score is 694.11


  6%|▌         | 5575/100000 [1:53:30<29:42:34,  1.13s/it]

54521 episode score is 729.03


  6%|▌         | 5576/100000 [1:53:31<29:36:11,  1.13s/it]

54531 episode score is 713.27


  6%|▌         | 5577/100000 [1:53:32<29:40:36,  1.13s/it]

54541 episode score is 711.62


  6%|▌         | 5578/100000 [1:53:34<29:48:52,  1.14s/it]

54551 episode score is 732.25


  6%|▌         | 5579/100000 [1:53:35<29:53:39,  1.14s/it]

54561 episode score is 731.18


  6%|▌         | 5580/100000 [1:53:36<29:52:27,  1.14s/it]

54571 episode score is 724.72


  6%|▌         | 5581/100000 [1:53:37<30:16:56,  1.15s/it]

54581 episode score is 759.04


  6%|▌         | 5582/100000 [1:53:38<30:09:01,  1.15s/it]

54591 episode score is 729.36


  6%|▌         | 5583/100000 [1:53:39<30:17:57,  1.16s/it]

54601 episode score is 738.32


  6%|▌         | 5584/100000 [1:53:41<30:18:49,  1.16s/it]

54611 episode score is 737.09


  6%|▌         | 5585/100000 [1:53:42<30:25:23,  1.16s/it]

54621 episode score is 741.65


  6%|▌         | 5586/100000 [1:53:43<30:30:28,  1.16s/it]

54631 episode score is 748.38


  6%|▌         | 5587/100000 [1:53:44<30:44:33,  1.17s/it]

54641 episode score is 757.24


  6%|▌         | 5588/100000 [1:53:45<30:58:09,  1.18s/it]

54651 episode score is 734.12


  6%|▌         | 5589/100000 [1:53:46<30:44:50,  1.17s/it]

54661 episode score is 722.18


  6%|▌         | 5590/100000 [1:53:48<30:57:13,  1.18s/it]

54671 episode score is 745.23


  6%|▌         | 5591/100000 [1:53:49<30:40:07,  1.17s/it]

54681 episode score is 719.47


  6%|▌         | 5592/100000 [1:53:50<30:29:50,  1.16s/it]

54691 episode score is 730.63


  6%|▌         | 5593/100000 [1:53:51<30:19:53,  1.16s/it]

54701 episode score is 726.26


  6%|▌         | 5594/100000 [1:53:52<30:34:50,  1.17s/it]

54711 episode score is 755.84


  6%|▌         | 5595/100000 [1:53:53<30:31:16,  1.16s/it]

54721 episode score is 737.81


  6%|▌         | 5596/100000 [1:53:55<30:29:30,  1.16s/it]

54731 episode score is 742.00


  6%|▌         | 5597/100000 [1:53:56<30:27:39,  1.16s/it]

54741 episode score is 741.33


  6%|▌         | 5598/100000 [1:53:57<30:38:47,  1.17s/it]

54751 episode score is 749.37


  6%|▌         | 5599/100000 [1:53:58<30:34:19,  1.17s/it]

54761 episode score is 740.33
54771 episode score is 750.78


  6%|▌         | 5600/100000 [1:54:00<38:48:53,  1.48s/it]

Iteration 5600: Average test reward: 757.27


  6%|▌         | 5601/100000 [1:54:01<35:43:38,  1.36s/it]

54780 episode score is 771.42


  6%|▌         | 5602/100000 [1:54:03<34:24:56,  1.31s/it]

54790 episode score is 754.24


  6%|▌         | 5603/100000 [1:54:04<33:33:24,  1.28s/it]

54800 episode score is 754.12


  6%|▌         | 5604/100000 [1:54:05<33:06:01,  1.26s/it]

54810 episode score is 762.23


  6%|▌         | 5605/100000 [1:54:06<32:30:10,  1.24s/it]

54820 episode score is 748.96


  6%|▌         | 5606/100000 [1:54:07<31:57:12,  1.22s/it]

54830 episode score is 746.85


  6%|▌         | 5607/100000 [1:54:09<31:40:21,  1.21s/it]

54840 episode score is 750.81


  6%|▌         | 5608/100000 [1:54:10<31:25:35,  1.20s/it]

54850 episode score is 751.08


  6%|▌         | 5609/100000 [1:54:11<31:13:54,  1.19s/it]

54860 episode score is 753.49


  6%|▌         | 5610/100000 [1:54:12<31:05:17,  1.19s/it]

54870 episode score is 746.81


  6%|▌         | 5611/100000 [1:54:13<30:51:34,  1.18s/it]

54880 episode score is 738.51


  6%|▌         | 5612/100000 [1:54:14<31:00:05,  1.18s/it]

54890 episode score is 755.65


  6%|▌         | 5613/100000 [1:54:16<31:03:43,  1.18s/it]

54900 episode score is 751.50


  6%|▌         | 5614/100000 [1:54:17<31:01:23,  1.18s/it]

54910 episode score is 743.42


  6%|▌         | 5615/100000 [1:54:18<30:59:57,  1.18s/it]

54920 episode score is 745.06


  6%|▌         | 5616/100000 [1:54:19<30:55:49,  1.18s/it]

54930 episode score is 746.33


  6%|▌         | 5617/100000 [1:54:20<30:50:42,  1.18s/it]

54940 episode score is 745.35


  6%|▌         | 5618/100000 [1:54:21<30:50:47,  1.18s/it]

54950 episode score is 749.32


  6%|▌         | 5619/100000 [1:54:23<30:43:32,  1.17s/it]

54960 episode score is 740.30


  6%|▌         | 5620/100000 [1:54:24<30:47:06,  1.17s/it]

54970 episode score is 753.21


  6%|▌         | 5621/100000 [1:54:25<30:25:34,  1.16s/it]

54980 episode score is 717.36


  6%|▌         | 5622/100000 [1:54:26<30:21:17,  1.16s/it]

54990 episode score is 738.39


  6%|▌         | 5623/100000 [1:54:27<30:31:02,  1.16s/it]

55000 episode score is 751.08


  6%|▌         | 5624/100000 [1:54:28<30:38:41,  1.17s/it]

55010 episode score is 750.34


  6%|▌         | 5625/100000 [1:54:30<30:35:22,  1.17s/it]

55020 episode score is 738.21


  6%|▌         | 5626/100000 [1:54:31<30:27:51,  1.16s/it]

55030 episode score is 740.11


  6%|▌         | 5627/100000 [1:54:32<30:08:17,  1.15s/it]

55040 episode score is 710.19


  6%|▌         | 5628/100000 [1:54:33<29:43:29,  1.13s/it]

55050 episode score is 690.11


  6%|▌         | 5629/100000 [1:54:34<29:44:29,  1.13s/it]

55060 episode score is 725.69


  6%|▌         | 5630/100000 [1:54:35<30:00:15,  1.14s/it]

55070 episode score is 743.95


  6%|▌         | 5631/100000 [1:54:36<29:57:04,  1.14s/it]

55080 episode score is 729.03


  6%|▌         | 5632/100000 [1:54:38<30:02:35,  1.15s/it]

55090 episode score is 729.59


  6%|▌         | 5633/100000 [1:54:39<29:58:05,  1.14s/it]

55100 episode score is 719.92


  6%|▌         | 5634/100000 [1:54:40<30:20:33,  1.16s/it]

55110 episode score is 747.52


  6%|▌         | 5635/100000 [1:54:41<30:24:20,  1.16s/it]

55120 episode score is 735.82


  6%|▌         | 5636/100000 [1:54:42<30:12:21,  1.15s/it]

55130 episode score is 726.92


  6%|▌         | 5637/100000 [1:54:43<29:57:56,  1.14s/it]

55140 episode score is 715.61


  6%|▌         | 5638/100000 [1:54:44<29:55:08,  1.14s/it]

55150 episode score is 726.24


  6%|▌         | 5639/100000 [1:54:46<29:49:18,  1.14s/it]

55160 episode score is 719.97


  6%|▌         | 5640/100000 [1:54:47<29:46:06,  1.14s/it]

55170 episode score is 717.70


  6%|▌         | 5641/100000 [1:54:48<30:19:51,  1.16s/it]

55180 episode score is 699.28


  6%|▌         | 5642/100000 [1:54:49<30:05:57,  1.15s/it]

55190 episode score is 702.72


  6%|▌         | 5643/100000 [1:54:50<29:53:23,  1.14s/it]

55200 episode score is 714.16


  6%|▌         | 5644/100000 [1:54:51<30:22:12,  1.16s/it]

55211 episode score is 684.12


  6%|▌         | 5645/100000 [1:54:53<30:09:23,  1.15s/it]

55221 episode score is 716.05


  6%|▌         | 5646/100000 [1:54:54<29:55:47,  1.14s/it]

55231 episode score is 713.77


  6%|▌         | 5647/100000 [1:54:55<30:18:55,  1.16s/it]

55242 episode score is 680.90


  6%|▌         | 5648/100000 [1:54:56<29:49:25,  1.14s/it]

55252 episode score is 697.48


  6%|▌         | 5649/100000 [1:54:57<29:23:08,  1.12s/it]

55262 episode score is 686.32


  6%|▌         | 5650/100000 [1:54:58<29:19:41,  1.12s/it]

55272 episode score is 715.49


  6%|▌         | 5651/100000 [1:54:59<29:42:15,  1.13s/it]

55283 episode score is 672.90


  6%|▌         | 5652/100000 [1:55:00<29:26:02,  1.12s/it]

55293 episode score is 699.54


  6%|▌         | 5653/100000 [1:55:02<29:42:54,  1.13s/it]

55304 episode score is 664.83


  6%|▌         | 5654/100000 [1:55:03<29:29:32,  1.13s/it]

55314 episode score is 689.31


  6%|▌         | 5655/100000 [1:55:04<29:09:04,  1.11s/it]

55324 episode score is 677.56


  6%|▌         | 5656/100000 [1:55:05<29:04:46,  1.11s/it]

55334 episode score is 699.99


  6%|▌         | 5657/100000 [1:55:06<29:01:31,  1.11s/it]

55344 episode score is 698.08


  6%|▌         | 5658/100000 [1:55:07<28:56:52,  1.10s/it]

55354 episode score is 699.94


  6%|▌         | 5659/100000 [1:55:08<29:01:37,  1.11s/it]

55364 episode score is 704.38


  6%|▌         | 5660/100000 [1:55:09<28:52:05,  1.10s/it]

55374 episode score is 686.84


  6%|▌         | 5661/100000 [1:55:10<28:49:58,  1.10s/it]

55384 episode score is 695.41


  6%|▌         | 5662/100000 [1:55:11<28:47:43,  1.10s/it]

55394 episode score is 689.54


  6%|▌         | 5663/100000 [1:55:13<28:39:49,  1.09s/it]

55404 episode score is 678.91


  6%|▌         | 5664/100000 [1:55:14<28:49:57,  1.10s/it]

55414 episode score is 706.62


  6%|▌         | 5665/100000 [1:55:15<28:38:49,  1.09s/it]

55424 episode score is 679.17


  6%|▌         | 5666/100000 [1:55:16<28:41:48,  1.10s/it]

55434 episode score is 693.37


  6%|▌         | 5667/100000 [1:55:17<28:56:02,  1.10s/it]

55444 episode score is 703.90


  6%|▌         | 5668/100000 [1:55:18<29:02:56,  1.11s/it]

55454 episode score is 694.78


  6%|▌         | 5669/100000 [1:55:19<29:07:12,  1.11s/it]

55464 episode score is 708.74


  6%|▌         | 5670/100000 [1:55:20<28:58:49,  1.11s/it]

55474 episode score is 695.09


  6%|▌         | 5671/100000 [1:55:21<29:05:51,  1.11s/it]

55484 episode score is 716.96


  6%|▌         | 5672/100000 [1:55:23<29:13:35,  1.12s/it]

55494 episode score is 722.04


  6%|▌         | 5673/100000 [1:55:24<29:14:30,  1.12s/it]

55504 episode score is 719.83


  6%|▌         | 5674/100000 [1:55:25<29:14:05,  1.12s/it]

55514 episode score is 707.56


  6%|▌         | 5675/100000 [1:55:26<29:21:58,  1.12s/it]

55524 episode score is 725.40


  6%|▌         | 5676/100000 [1:55:27<29:12:44,  1.11s/it]

55534 episode score is 706.36


  6%|▌         | 5677/100000 [1:55:28<29:20:48,  1.12s/it]

55544 episode score is 722.45


  6%|▌         | 5678/100000 [1:55:29<29:18:46,  1.12s/it]

55554 episode score is 708.45


  6%|▌         | 5679/100000 [1:55:30<29:10:24,  1.11s/it]

55564 episode score is 704.10


  6%|▌         | 5680/100000 [1:55:31<29:21:53,  1.12s/it]

55574 episode score is 729.80


  6%|▌         | 5681/100000 [1:55:33<29:37:20,  1.13s/it]

55584 episode score is 730.69


  6%|▌         | 5682/100000 [1:55:34<29:26:24,  1.12s/it]

55594 episode score is 704.02


  6%|▌         | 5683/100000 [1:55:35<29:32:40,  1.13s/it]

55604 episode score is 729.47


  6%|▌         | 5684/100000 [1:55:36<29:22:15,  1.12s/it]

55614 episode score is 700.83


  6%|▌         | 5685/100000 [1:55:37<29:18:33,  1.12s/it]

55624 episode score is 714.71


  6%|▌         | 5686/100000 [1:55:38<29:22:01,  1.12s/it]

55634 episode score is 721.64


  6%|▌         | 5687/100000 [1:55:39<29:12:16,  1.11s/it]

55644 episode score is 701.76


  6%|▌         | 5688/100000 [1:55:40<29:08:27,  1.11s/it]

55654 episode score is 708.44


  6%|▌         | 5689/100000 [1:55:42<28:58:19,  1.11s/it]

55664 episode score is 691.67


  6%|▌         | 5690/100000 [1:55:43<29:25:38,  1.12s/it]

55675 episode score is 669.52


  6%|▌         | 5691/100000 [1:55:44<29:09:54,  1.11s/it]

55685 episode score is 695.63


  6%|▌         | 5692/100000 [1:55:45<29:31:36,  1.13s/it]

55696 episode score is 668.35


  6%|▌         | 5693/100000 [1:55:46<29:50:32,  1.14s/it]

55707 episode score is 665.25


  6%|▌         | 5694/100000 [1:55:47<29:52:02,  1.14s/it]

55718 episode score is 654.84


  6%|▌         | 5695/100000 [1:55:48<30:17:25,  1.16s/it]

55729 episode score is 686.70


  6%|▌         | 5696/100000 [1:55:50<30:21:39,  1.16s/it]

55740 episode score is 668.39


  6%|▌         | 5697/100000 [1:55:51<29:54:33,  1.14s/it]

55750 episode score is 708.41


  6%|▌         | 5698/100000 [1:55:52<29:26:32,  1.12s/it]

55760 episode score is 693.87


  6%|▌         | 5699/100000 [1:55:53<29:47:05,  1.14s/it]

55771 episode score is 674.39
55782 episode score is 671.89


  6%|▌         | 5700/100000 [1:55:55<37:32:30,  1.43s/it]

Iteration 5700: Average test reward: 682.63


  6%|▌         | 5701/100000 [1:55:56<35:32:46,  1.36s/it]

55793 episode score is 684.86


  6%|▌         | 5702/100000 [1:55:57<34:03:27,  1.30s/it]

55804 episode score is 670.70


  6%|▌         | 5703/100000 [1:55:59<32:58:30,  1.26s/it]

55815 episode score is 673.96


  6%|▌         | 5704/100000 [1:56:00<32:02:38,  1.22s/it]

55826 episode score is 634.61


  6%|▌         | 5705/100000 [1:56:01<32:11:40,  1.23s/it]

55837 episode score is 666.80


  6%|▌         | 5706/100000 [1:56:02<31:09:18,  1.19s/it]

55847 episode score is 698.26


  6%|▌         | 5707/100000 [1:56:03<31:03:54,  1.19s/it]

55858 episode score is 680.87


  6%|▌         | 5708/100000 [1:56:04<30:50:33,  1.18s/it]

55869 episode score is 670.14


  6%|▌         | 5709/100000 [1:56:06<30:39:53,  1.17s/it]

55880 episode score is 662.97


  6%|▌         | 5710/100000 [1:56:07<30:27:05,  1.16s/it]

55891 episode score is 658.29


  6%|▌         | 5711/100000 [1:56:08<30:26:26,  1.16s/it]

55902 episode score is 659.49


  6%|▌         | 5712/100000 [1:56:09<29:52:35,  1.14s/it]

55912 episode score is 695.44


  6%|▌         | 5713/100000 [1:56:10<29:54:33,  1.14s/it]

55923 episode score is 658.76


  6%|▌         | 5714/100000 [1:56:11<30:14:00,  1.15s/it]

55934 episode score is 683.44


  6%|▌         | 5715/100000 [1:56:12<29:50:43,  1.14s/it]

55944 episode score is 704.99


  6%|▌         | 5716/100000 [1:56:14<30:08:55,  1.15s/it]

55955 episode score is 681.05


  6%|▌         | 5717/100000 [1:56:15<30:21:54,  1.16s/it]

55966 episode score is 683.23


  6%|▌         | 5718/100000 [1:56:16<30:03:10,  1.15s/it]

55976 episode score is 696.39


  6%|▌         | 5719/100000 [1:56:17<29:48:19,  1.14s/it]

55986 episode score is 698.41


  6%|▌         | 5720/100000 [1:56:18<29:39:39,  1.13s/it]

55996 episode score is 708.10


  6%|▌         | 5721/100000 [1:56:19<29:36:17,  1.13s/it]

56006 episode score is 711.49


  6%|▌         | 5722/100000 [1:56:20<30:06:12,  1.15s/it]

56017 episode score is 688.15


  6%|▌         | 5723/100000 [1:56:21<29:32:36,  1.13s/it]

56027 episode score is 689.91


  6%|▌         | 5724/100000 [1:56:23<29:11:01,  1.11s/it]

56037 episode score is 696.79


  6%|▌         | 5725/100000 [1:56:24<28:59:40,  1.11s/it]

56047 episode score is 698.37


  6%|▌         | 5726/100000 [1:56:25<28:43:25,  1.10s/it]

56057 episode score is 687.64


  6%|▌         | 5727/100000 [1:56:26<28:35:17,  1.09s/it]

56067 episode score is 695.02


  6%|▌         | 5728/100000 [1:56:27<28:39:11,  1.09s/it]

56077 episode score is 703.86


  6%|▌         | 5729/100000 [1:56:28<28:39:50,  1.09s/it]

56087 episode score is 688.99


  6%|▌         | 5730/100000 [1:56:29<28:38:42,  1.09s/it]

56097 episode score is 695.69


  6%|▌         | 5731/100000 [1:56:30<28:39:34,  1.09s/it]

56107 episode score is 703.09


  6%|▌         | 5732/100000 [1:56:31<28:37:45,  1.09s/it]

56117 episode score is 698.29


  6%|▌         | 5733/100000 [1:56:32<28:41:46,  1.10s/it]

56127 episode score is 690.22


  6%|▌         | 5734/100000 [1:56:33<28:42:09,  1.10s/it]

56137 episode score is 695.86


  6%|▌         | 5735/100000 [1:56:35<28:45:26,  1.10s/it]

56147 episode score is 704.88


  6%|▌         | 5736/100000 [1:56:36<28:50:35,  1.10s/it]

56157 episode score is 699.21


  6%|▌         | 5737/100000 [1:56:37<29:14:43,  1.12s/it]

56168 episode score is 657.83


  6%|▌         | 5738/100000 [1:56:38<29:37:58,  1.13s/it]

56179 episode score is 657.97


  6%|▌         | 5739/100000 [1:56:39<29:45:24,  1.14s/it]

56190 episode score is 649.07


  6%|▌         | 5740/100000 [1:56:40<29:14:53,  1.12s/it]

56200 episode score is 689.92


  6%|▌         | 5741/100000 [1:56:41<29:00:01,  1.11s/it]

56211 episode score is 615.61


  6%|▌         | 5742/100000 [1:56:42<29:12:53,  1.12s/it]

56223 episode score is 572.30


  6%|▌         | 5743/100000 [1:56:44<29:15:40,  1.12s/it]

56234 episode score is 644.18


  6%|▌         | 5744/100000 [1:56:45<29:18:07,  1.12s/it]

56245 episode score is 647.40


  6%|▌         | 5745/100000 [1:56:46<29:46:21,  1.14s/it]

56256 episode score is 685.34


  6%|▌         | 5746/100000 [1:56:47<29:37:24,  1.13s/it]

56266 episode score is 711.61


  6%|▌         | 5747/100000 [1:56:48<30:06:50,  1.15s/it]

56277 episode score is 683.48


  6%|▌         | 5748/100000 [1:56:49<30:20:52,  1.16s/it]

56288 episode score is 681.97


  6%|▌         | 5749/100000 [1:56:51<30:30:29,  1.17s/it]

56299 episode score is 685.62


  6%|▌         | 5750/100000 [1:56:52<30:08:15,  1.15s/it]

56309 episode score is 722.79


  6%|▌         | 5751/100000 [1:56:53<29:37:06,  1.13s/it]

56319 episode score is 694.80


  6%|▌         | 5752/100000 [1:56:54<29:51:23,  1.14s/it]

56330 episode score is 670.48


  6%|▌         | 5753/100000 [1:56:55<29:45:44,  1.14s/it]

56341 episode score is 635.85


  6%|▌         | 5754/100000 [1:56:56<29:51:50,  1.14s/it]

56352 episode score is 654.28


  6%|▌         | 5755/100000 [1:56:57<29:19:16,  1.12s/it]

56362 episode score is 690.00


  6%|▌         | 5756/100000 [1:56:58<29:44:42,  1.14s/it]

56373 episode score is 674.37


  6%|▌         | 5757/100000 [1:57:00<29:47:29,  1.14s/it]

56384 episode score is 655.88


  6%|▌         | 5758/100000 [1:57:01<29:53:17,  1.14s/it]

56395 episode score is 663.27


  6%|▌         | 5759/100000 [1:57:02<29:56:18,  1.14s/it]

56407 episode score is 574.82


  6%|▌         | 5760/100000 [1:57:03<29:46:49,  1.14s/it]

56418 episode score is 634.29


  6%|▌         | 5761/100000 [1:57:04<29:42:10,  1.13s/it]

56429 episode score is 647.91


  6%|▌         | 5762/100000 [1:57:05<29:48:57,  1.14s/it]

56440 episode score is 662.09


  6%|▌         | 5763/100000 [1:57:06<29:58:53,  1.15s/it]

56451 episode score is 673.94


  6%|▌         | 5764/100000 [1:57:08<30:06:13,  1.15s/it]

56462 episode score is 674.47


  6%|▌         | 5765/100000 [1:57:09<30:18:20,  1.16s/it]

56473 episode score is 678.57


  6%|▌         | 5766/100000 [1:57:10<30:03:34,  1.15s/it]

56484 episode score is 644.97


  6%|▌         | 5767/100000 [1:57:11<29:34:59,  1.13s/it]

56494 episode score is 689.73


  6%|▌         | 5768/100000 [1:57:12<29:15:46,  1.12s/it]

56504 episode score is 689.91


  6%|▌         | 5769/100000 [1:57:13<29:04:55,  1.11s/it]

56514 episode score is 697.70


  6%|▌         | 5770/100000 [1:57:14<28:54:40,  1.10s/it]

56524 episode score is 704.38


  6%|▌         | 5771/100000 [1:57:15<29:36:27,  1.13s/it]

56534 episode score is 700.32


  6%|▌         | 5772/100000 [1:57:17<29:31:00,  1.13s/it]

56544 episode score is 723.76


  6%|▌         | 5773/100000 [1:57:18<29:39:49,  1.13s/it]

56554 episode score is 734.34


  6%|▌         | 5774/100000 [1:57:19<29:36:02,  1.13s/it]

56564 episode score is 711.60


  6%|▌         | 5775/100000 [1:57:20<29:31:59,  1.13s/it]

56574 episode score is 722.00


  6%|▌         | 5776/100000 [1:57:21<29:25:02,  1.12s/it]

56584 episode score is 719.62


  6%|▌         | 5777/100000 [1:57:22<29:13:56,  1.12s/it]

56594 episode score is 707.04


  6%|▌         | 5778/100000 [1:57:23<28:58:13,  1.11s/it]

56604 episode score is 692.19


  6%|▌         | 5779/100000 [1:57:24<29:23:37,  1.12s/it]

56615 episode score is 666.97


  6%|▌         | 5780/100000 [1:57:25<29:00:17,  1.11s/it]

56625 episode score is 681.20


  6%|▌         | 5781/100000 [1:57:27<29:21:52,  1.12s/it]

56636 episode score is 665.94


  6%|▌         | 5782/100000 [1:57:28<29:48:49,  1.14s/it]

56647 episode score is 681.70


  6%|▌         | 5783/100000 [1:57:29<29:37:58,  1.13s/it]

56657 episode score is 712.56


  6%|▌         | 5784/100000 [1:57:30<29:26:16,  1.12s/it]

56667 episode score is 709.96


  6%|▌         | 5785/100000 [1:57:31<29:14:28,  1.12s/it]

56677 episode score is 703.18


  6%|▌         | 5786/100000 [1:57:32<29:29:08,  1.13s/it]

56687 episode score is 738.10


  6%|▌         | 5787/100000 [1:57:33<29:18:43,  1.12s/it]

56697 episode score is 706.07


  6%|▌         | 5788/100000 [1:57:35<29:23:40,  1.12s/it]

56707 episode score is 730.95


  6%|▌         | 5789/100000 [1:57:36<29:28:00,  1.13s/it]

56717 episode score is 728.78


  6%|▌         | 5790/100000 [1:57:37<29:46:40,  1.14s/it]

56727 episode score is 757.56


  6%|▌         | 5791/100000 [1:57:38<29:51:14,  1.14s/it]

56737 episode score is 748.38


  6%|▌         | 5792/100000 [1:57:39<29:41:43,  1.13s/it]

56747 episode score is 714.44


  6%|▌         | 5793/100000 [1:57:40<29:47:12,  1.14s/it]

56757 episode score is 730.67


  6%|▌         | 5794/100000 [1:57:41<30:00:23,  1.15s/it]

56767 episode score is 753.39


  6%|▌         | 5795/100000 [1:57:42<29:29:12,  1.13s/it]

56776 episode score is 773.32


  6%|▌         | 5796/100000 [1:57:44<29:12:30,  1.12s/it]

56785 episode score is 776.05


  6%|▌         | 5797/100000 [1:57:45<29:27:03,  1.13s/it]

56795 episode score is 743.92


  6%|▌         | 5798/100000 [1:57:46<29:45:50,  1.14s/it]

56805 episode score is 743.86


  6%|▌         | 5799/100000 [1:57:47<29:33:08,  1.13s/it]

56814 episode score is 783.64
56824 episode score is 743.99


  6%|▌         | 5800/100000 [1:57:49<38:15:39,  1.46s/it]

Iteration 5800: Average test reward: 746.33


  6%|▌         | 5801/100000 [1:57:50<35:56:18,  1.37s/it]

56834 episode score is 743.85


  6%|▌         | 5802/100000 [1:57:52<33:44:48,  1.29s/it]

56843 episode score is 773.06


  6%|▌         | 5803/100000 [1:57:53<33:02:59,  1.26s/it]

56853 episode score is 758.63


  6%|▌         | 5804/100000 [1:57:54<32:24:20,  1.24s/it]

56863 episode score is 751.33


  6%|▌         | 5805/100000 [1:57:55<31:55:37,  1.22s/it]

56873 episode score is 749.28


  6%|▌         | 5806/100000 [1:57:56<31:33:12,  1.21s/it]

56883 episode score is 742.72


  6%|▌         | 5807/100000 [1:57:57<31:28:37,  1.20s/it]

56893 episode score is 756.43


  6%|▌         | 5808/100000 [1:57:59<30:47:04,  1.18s/it]

56902 episode score is 799.70


  6%|▌         | 5809/100000 [1:58:00<30:50:19,  1.18s/it]

56912 episode score is 750.12


  6%|▌         | 5810/100000 [1:58:01<30:26:22,  1.16s/it]

56921 episode score is 785.48


  6%|▌         | 5811/100000 [1:58:02<30:42:14,  1.17s/it]

56931 episode score is 763.24


  6%|▌         | 5812/100000 [1:58:03<30:52:27,  1.18s/it]

56941 episode score is 758.75


  6%|▌         | 5813/100000 [1:58:04<30:50:56,  1.18s/it]

56951 episode score is 747.68


  6%|▌         | 5814/100000 [1:58:06<30:48:21,  1.18s/it]

56961 episode score is 750.23


  6%|▌         | 5815/100000 [1:58:07<30:07:16,  1.15s/it]

56970 episode score is 775.31


  6%|▌         | 5816/100000 [1:58:08<30:22:10,  1.16s/it]

56980 episode score is 749.58


  6%|▌         | 5817/100000 [1:58:09<30:44:02,  1.17s/it]

56990 episode score is 765.58


  6%|▌         | 5818/100000 [1:58:10<30:43:38,  1.17s/it]

57000 episode score is 741.98


  6%|▌         | 5819/100000 [1:58:11<30:58:51,  1.18s/it]

57010 episode score is 775.89


  6%|▌         | 5820/100000 [1:58:13<31:09:12,  1.19s/it]

57020 episode score is 768.40


  6%|▌         | 5821/100000 [1:58:14<30:19:38,  1.16s/it]

57029 episode score is 771.36


  6%|▌         | 5822/100000 [1:58:15<30:24:39,  1.16s/it]

57039 episode score is 743.55


  6%|▌         | 5823/100000 [1:58:16<30:23:59,  1.16s/it]

57049 episode score is 738.70


  6%|▌         | 5824/100000 [1:58:17<30:11:19,  1.15s/it]

57058 episode score is 789.46


  6%|▌         | 5825/100000 [1:58:18<30:31:13,  1.17s/it]

57068 episode score is 761.62


  6%|▌         | 5826/100000 [1:58:20<30:38:35,  1.17s/it]

57078 episode score is 752.74


  6%|▌         | 5827/100000 [1:58:21<30:48:14,  1.18s/it]

57088 episode score is 757.00


  6%|▌         | 5828/100000 [1:58:22<30:50:55,  1.18s/it]

57098 episode score is 750.95


  6%|▌         | 5829/100000 [1:58:23<30:47:38,  1.18s/it]

57108 episode score is 748.62


  6%|▌         | 5830/100000 [1:58:24<30:52:30,  1.18s/it]

57118 episode score is 752.90


  6%|▌         | 5831/100000 [1:58:26<30:48:02,  1.18s/it]

57128 episode score is 749.23


  6%|▌         | 5832/100000 [1:58:27<30:42:17,  1.17s/it]

57138 episode score is 743.25


  6%|▌         | 5833/100000 [1:58:28<30:40:44,  1.17s/it]

57148 episode score is 743.61


  6%|▌         | 5834/100000 [1:58:29<30:21:53,  1.16s/it]

57158 episode score is 718.51


  6%|▌         | 5835/100000 [1:58:30<31:05:33,  1.19s/it]

57168 episode score is 730.42


  6%|▌         | 5836/100000 [1:58:31<30:51:40,  1.18s/it]

57178 episode score is 736.86


  6%|▌         | 5837/100000 [1:58:33<30:52:17,  1.18s/it]

57188 episode score is 738.71


  6%|▌         | 5838/100000 [1:58:34<30:42:32,  1.17s/it]

57198 episode score is 735.52


  6%|▌         | 5839/100000 [1:58:35<30:28:49,  1.17s/it]

57208 episode score is 727.09


  6%|▌         | 5840/100000 [1:58:36<30:15:03,  1.16s/it]

57218 episode score is 722.71


  6%|▌         | 5841/100000 [1:58:37<30:23:14,  1.16s/it]

57228 episode score is 754.19


  6%|▌         | 5842/100000 [1:58:38<30:25:45,  1.16s/it]

57238 episode score is 747.09


  6%|▌         | 5843/100000 [1:58:40<30:42:09,  1.17s/it]

57248 episode score is 760.42


  6%|▌         | 5844/100000 [1:58:41<30:33:52,  1.17s/it]

57258 episode score is 739.32


  6%|▌         | 5845/100000 [1:58:42<30:43:45,  1.17s/it]

57268 episode score is 751.71


  6%|▌         | 5846/100000 [1:58:43<30:27:22,  1.16s/it]

57278 episode score is 715.37


  6%|▌         | 5847/100000 [1:58:44<30:17:12,  1.16s/it]

57288 episode score is 719.55


  6%|▌         | 5848/100000 [1:58:45<30:02:09,  1.15s/it]

57298 episode score is 709.53


  6%|▌         | 5849/100000 [1:58:46<29:50:39,  1.14s/it]

57308 episode score is 716.27


  6%|▌         | 5850/100000 [1:58:48<30:00:57,  1.15s/it]

57318 episode score is 726.38


  6%|▌         | 5851/100000 [1:58:49<30:07:36,  1.15s/it]

57328 episode score is 738.52


  6%|▌         | 5852/100000 [1:58:50<30:15:50,  1.16s/it]

57338 episode score is 735.32


  6%|▌         | 5853/100000 [1:58:51<29:59:51,  1.15s/it]

57348 episode score is 712.41


  6%|▌         | 5854/100000 [1:58:52<29:55:25,  1.14s/it]

57358 episode score is 718.73


  6%|▌         | 5855/100000 [1:58:53<29:51:19,  1.14s/it]

57368 episode score is 715.38


  6%|▌         | 5856/100000 [1:58:54<29:44:06,  1.14s/it]

57378 episode score is 719.89


  6%|▌         | 5857/100000 [1:58:56<29:35:54,  1.13s/it]

57388 episode score is 699.07


  6%|▌         | 5858/100000 [1:58:57<29:37:56,  1.13s/it]

57398 episode score is 728.39


  6%|▌         | 5859/100000 [1:58:58<29:50:31,  1.14s/it]

57408 episode score is 731.93


  6%|▌         | 5860/100000 [1:58:59<29:53:25,  1.14s/it]

57418 episode score is 716.52


  6%|▌         | 5861/100000 [1:59:00<30:07:51,  1.15s/it]

57428 episode score is 746.86


  6%|▌         | 5862/100000 [1:59:01<30:17:54,  1.16s/it]

57438 episode score is 747.00


  6%|▌         | 5863/100000 [1:59:03<30:13:54,  1.16s/it]

57448 episode score is 724.47


  6%|▌         | 5864/100000 [1:59:04<29:53:05,  1.14s/it]

57458 episode score is 702.20


  6%|▌         | 5865/100000 [1:59:05<30:21:19,  1.16s/it]

57468 episode score is 766.57


  6%|▌         | 5866/100000 [1:59:06<30:22:54,  1.16s/it]

57478 episode score is 736.13


  6%|▌         | 5867/100000 [1:59:07<30:09:22,  1.15s/it]

57488 episode score is 714.65


  6%|▌         | 5868/100000 [1:59:08<30:10:46,  1.15s/it]

57498 episode score is 736.75


  6%|▌         | 5869/100000 [1:59:09<30:01:04,  1.15s/it]

57508 episode score is 709.10


  6%|▌         | 5870/100000 [1:59:11<30:00:31,  1.15s/it]

57518 episode score is 730.06


  6%|▌         | 5871/100000 [1:59:12<30:01:40,  1.15s/it]

57528 episode score is 722.09


  6%|▌         | 5872/100000 [1:59:13<29:48:34,  1.14s/it]

57538 episode score is 703.16


  6%|▌         | 5873/100000 [1:59:14<30:07:51,  1.15s/it]

57548 episode score is 757.56


  6%|▌         | 5874/100000 [1:59:15<30:10:25,  1.15s/it]

57558 episode score is 739.97


  6%|▌         | 5875/100000 [1:59:16<30:13:03,  1.16s/it]

57568 episode score is 733.20


  6%|▌         | 5876/100000 [1:59:18<30:33:35,  1.17s/it]

57578 episode score is 755.19


  6%|▌         | 5877/100000 [1:59:19<30:35:18,  1.17s/it]

57588 episode score is 738.11


  6%|▌         | 5878/100000 [1:59:20<30:32:45,  1.17s/it]

57598 episode score is 733.97


  6%|▌         | 5879/100000 [1:59:21<30:30:26,  1.17s/it]

57608 episode score is 739.42


  6%|▌         | 5880/100000 [1:59:22<30:29:20,  1.17s/it]

57618 episode score is 750.78


  6%|▌         | 5881/100000 [1:59:23<30:37:37,  1.17s/it]

57628 episode score is 756.02


  6%|▌         | 5882/100000 [1:59:25<30:35:09,  1.17s/it]

57638 episode score is 741.10


  6%|▌         | 5883/100000 [1:59:26<30:44:13,  1.18s/it]

57647 episode score is 784.97


  6%|▌         | 5884/100000 [1:59:27<30:29:32,  1.17s/it]

57656 episode score is 827.49


  6%|▌         | 5885/100000 [1:59:28<30:16:54,  1.16s/it]

57665 episode score is 821.69


  6%|▌         | 5886/100000 [1:59:29<29:52:59,  1.14s/it]

57674 episode score is 782.85


  6%|▌         | 5887/100000 [1:59:30<29:51:24,  1.14s/it]

57683 episode score is 801.58


  6%|▌         | 5888/100000 [1:59:31<29:56:52,  1.15s/it]

57692 episode score is 830.41


  6%|▌         | 5889/100000 [1:59:33<30:09:44,  1.15s/it]

57701 episode score is 853.06


  6%|▌         | 5890/100000 [1:59:34<30:26:19,  1.16s/it]

57711 episode score is 754.18


  6%|▌         | 5891/100000 [1:59:35<30:01:51,  1.15s/it]

57720 episode score is 799.86


  6%|▌         | 5892/100000 [1:59:36<29:46:03,  1.14s/it]

57729 episode score is 797.21


  6%|▌         | 5893/100000 [1:59:37<29:41:35,  1.14s/it]

57738 episode score is 818.58


  6%|▌         | 5894/100000 [1:59:38<29:18:07,  1.12s/it]

57747 episode score is 772.78


  6%|▌         | 5895/100000 [1:59:39<29:26:44,  1.13s/it]

57756 episode score is 818.93


  6%|▌         | 5896/100000 [1:59:41<30:04:46,  1.15s/it]

57766 episode score is 768.67


  6%|▌         | 5897/100000 [1:59:42<30:14:15,  1.16s/it]

57776 episode score is 748.61


  6%|▌         | 5898/100000 [1:59:43<30:34:59,  1.17s/it]

57786 episode score is 771.10


  6%|▌         | 5899/100000 [1:59:44<29:55:07,  1.14s/it]

57795 episode score is 776.00
57805 episode score is 719.04


  6%|▌         | 5900/100000 [1:59:46<37:49:50,  1.45s/it]

Iteration 5900: Average test reward: 714.21


  6%|▌         | 5901/100000 [1:59:47<36:08:11,  1.38s/it]

57815 episode score is 755.83


  6%|▌         | 5902/100000 [1:59:49<34:29:10,  1.32s/it]

57825 episode score is 731.95


  6%|▌         | 5903/100000 [1:59:50<33:14:47,  1.27s/it]

57835 episode score is 737.04


  6%|▌         | 5904/100000 [1:59:51<32:21:10,  1.24s/it]

57845 episode score is 732.05


  6%|▌         | 5905/100000 [1:59:52<31:28:18,  1.20s/it]

57855 episode score is 704.26


  6%|▌         | 5906/100000 [1:59:53<31:23:31,  1.20s/it]

57864 episode score is 868.83


  6%|▌         | 5907/100000 [1:59:54<30:29:51,  1.17s/it]

57872 episode score is 861.24


  6%|▌         | 5908/100000 [1:59:55<30:04:28,  1.15s/it]

57881 episode score is 786.44


  6%|▌         | 5909/100000 [1:59:57<29:44:48,  1.14s/it]

57890 episode score is 793.48


  6%|▌         | 5910/100000 [1:59:58<29:46:22,  1.14s/it]

57899 episode score is 802.54


  6%|▌         | 5911/100000 [1:59:59<30:25:42,  1.16s/it]

57909 episode score is 777.88


  6%|▌         | 5912/100000 [2:00:00<30:48:21,  1.18s/it]

57919 episode score is 769.24


  6%|▌         | 5913/100000 [2:00:01<30:53:16,  1.18s/it]

57929 episode score is 748.83


  6%|▌         | 5914/100000 [2:00:02<30:48:25,  1.18s/it]

57939 episode score is 731.57


  6%|▌         | 5915/100000 [2:00:04<30:06:46,  1.15s/it]

57948 episode score is 767.97


  6%|▌         | 5916/100000 [2:00:05<30:29:24,  1.17s/it]

57958 episode score is 765.97


  6%|▌         | 5917/100000 [2:00:06<30:11:49,  1.16s/it]

57967 episode score is 800.44


  6%|▌         | 5918/100000 [2:00:07<30:17:01,  1.16s/it]

57977 episode score is 734.89


  6%|▌         | 5919/100000 [2:00:08<30:14:53,  1.16s/it]

57987 episode score is 730.22


  6%|▌         | 5920/100000 [2:00:09<30:12:41,  1.16s/it]

57997 episode score is 732.51


  6%|▌         | 5921/100000 [2:00:11<30:18:51,  1.16s/it]

58007 episode score is 748.44


  6%|▌         | 5922/100000 [2:00:12<29:56:57,  1.15s/it]

58017 episode score is 700.40


  6%|▌         | 5923/100000 [2:00:13<30:07:14,  1.15s/it]

58027 episode score is 740.29


  6%|▌         | 5924/100000 [2:00:14<29:50:09,  1.14s/it]

58037 episode score is 704.20


  6%|▌         | 5925/100000 [2:00:15<29:49:21,  1.14s/it]

58047 episode score is 719.26


  6%|▌         | 5926/100000 [2:00:16<29:28:11,  1.13s/it]

58057 episode score is 693.00


  6%|▌         | 5927/100000 [2:00:17<29:20:39,  1.12s/it]

58067 episode score is 694.21


  6%|▌         | 5928/100000 [2:00:18<29:31:14,  1.13s/it]

58077 episode score is 719.06


  6%|▌         | 5929/100000 [2:00:20<29:24:43,  1.13s/it]

58087 episode score is 707.51


  6%|▌         | 5930/100000 [2:00:21<29:26:18,  1.13s/it]

58097 episode score is 712.60


  6%|▌         | 5931/100000 [2:00:22<29:53:24,  1.14s/it]

58108 episode score is 674.72


  6%|▌         | 5932/100000 [2:00:23<29:46:52,  1.14s/it]

58118 episode score is 717.47


  6%|▌         | 5933/100000 [2:00:24<29:22:40,  1.12s/it]

58128 episode score is 679.76


  6%|▌         | 5934/100000 [2:00:25<29:01:06,  1.11s/it]

58138 episode score is 676.78


  6%|▌         | 5935/100000 [2:00:26<28:47:50,  1.10s/it]

58148 episode score is 680.86


  6%|▌         | 5936/100000 [2:00:27<28:45:49,  1.10s/it]

58158 episode score is 691.76


  6%|▌         | 5937/100000 [2:00:28<28:42:59,  1.10s/it]

58168 episode score is 688.10


  6%|▌         | 5938/100000 [2:00:30<29:23:29,  1.12s/it]

58179 episode score is 673.57


  6%|▌         | 5939/100000 [2:00:31<29:52:04,  1.14s/it]

58189 episode score is 688.91


  6%|▌         | 5940/100000 [2:00:32<30:13:40,  1.16s/it]

58200 episode score is 670.99


  6%|▌         | 5941/100000 [2:00:33<29:53:58,  1.14s/it]

58210 episode score is 701.91


  6%|▌         | 5942/100000 [2:00:34<29:26:36,  1.13s/it]

58220 episode score is 687.62


  6%|▌         | 5943/100000 [2:00:35<29:39:07,  1.13s/it]

58230 episode score is 731.05


  6%|▌         | 5944/100000 [2:00:36<29:37:21,  1.13s/it]

58240 episode score is 718.42


  6%|▌         | 5945/100000 [2:00:38<29:37:07,  1.13s/it]

58250 episode score is 714.69


  6%|▌         | 5946/100000 [2:00:39<29:20:12,  1.12s/it]

58260 episode score is 689.12


  6%|▌         | 5947/100000 [2:00:40<29:18:05,  1.12s/it]

58270 episode score is 702.45


  6%|▌         | 5948/100000 [2:00:41<29:07:25,  1.11s/it]

58280 episode score is 695.18


  6%|▌         | 5949/100000 [2:00:42<28:55:59,  1.11s/it]

58290 episode score is 685.35


  6%|▌         | 5950/100000 [2:00:43<29:28:03,  1.13s/it]

58301 episode score is 669.48


  6%|▌         | 5951/100000 [2:00:44<29:39:58,  1.14s/it]

58312 episode score is 663.69


  6%|▌         | 5952/100000 [2:00:46<29:54:02,  1.14s/it]

58323 episode score is 673.57


  6%|▌         | 5953/100000 [2:00:47<29:34:59,  1.13s/it]

58333 episode score is 707.64


  6%|▌         | 5954/100000 [2:00:48<29:58:18,  1.15s/it]

58344 episode score is 659.41


  6%|▌         | 5955/100000 [2:00:49<30:14:33,  1.16s/it]

58355 episode score is 677.92


  6%|▌         | 5956/100000 [2:00:50<30:10:06,  1.15s/it]

58366 episode score is 653.68


  6%|▌         | 5957/100000 [2:00:51<29:32:53,  1.13s/it]

58376 episode score is 681.49


  6%|▌         | 5958/100000 [2:00:52<29:56:28,  1.15s/it]

58387 episode score is 672.95


  6%|▌         | 5959/100000 [2:00:54<30:05:12,  1.15s/it]

58398 episode score is 666.87


  6%|▌         | 5960/100000 [2:00:55<30:16:13,  1.16s/it]

58409 episode score is 673.90


  6%|▌         | 5961/100000 [2:00:56<29:58:54,  1.15s/it]

58420 episode score is 632.22


  6%|▌         | 5962/100000 [2:00:57<29:42:23,  1.14s/it]

58431 episode score is 626.99


  6%|▌         | 5963/100000 [2:00:58<29:42:58,  1.14s/it]

58442 episode score is 650.60


  6%|▌         | 5964/100000 [2:00:59<29:59:09,  1.15s/it]

58453 episode score is 671.10


  6%|▌         | 5965/100000 [2:01:00<29:42:58,  1.14s/it]

58464 episode score is 628.02


  6%|▌         | 5966/100000 [2:01:02<29:46:03,  1.14s/it]

58476 episode score is 573.94


  6%|▌         | 5967/100000 [2:01:03<29:49:14,  1.14s/it]

58487 episode score is 633.18


  6%|▌         | 5968/100000 [2:01:04<29:55:38,  1.15s/it]

58499 episode score is 578.09


  6%|▌         | 5969/100000 [2:01:05<30:16:31,  1.16s/it]

58511 episode score is 609.50


  6%|▌         | 5970/100000 [2:01:06<29:48:08,  1.14s/it]

58523 episode score is 534.81


  6%|▌         | 5971/100000 [2:01:07<29:20:52,  1.12s/it]

58533 episode score is 686.69


  6%|▌         | 5972/100000 [2:01:08<29:50:08,  1.14s/it]

58547 episode score is 470.77


  6%|▌         | 5973/100000 [2:01:10<29:48:01,  1.14s/it]

58558 episode score is 639.45


  6%|▌         | 5974/100000 [2:01:11<29:53:53,  1.14s/it]

58570 episode score is 574.76


  6%|▌         | 5975/100000 [2:01:12<29:53:46,  1.14s/it]

58581 episode score is 644.67


  6%|▌         | 5976/100000 [2:01:13<30:16:55,  1.16s/it]

58592 episode score is 677.20


  6%|▌         | 5977/100000 [2:01:14<29:43:28,  1.14s/it]

58602 episode score is 685.58


  6%|▌         | 5978/100000 [2:01:15<29:23:25,  1.13s/it]

58612 episode score is 695.68


  6%|▌         | 5979/100000 [2:01:16<29:20:34,  1.12s/it]

58622 episode score is 712.47


  6%|▌         | 5980/100000 [2:01:18<29:53:18,  1.14s/it]

58633 episode score is 668.83


  6%|▌         | 5981/100000 [2:01:19<29:45:03,  1.14s/it]

58644 episode score is 623.75


  6%|▌         | 5982/100000 [2:01:20<29:37:25,  1.13s/it]

58654 episode score is 707.60


  6%|▌         | 5983/100000 [2:01:21<30:05:26,  1.15s/it]

58665 episode score is 677.63


  6%|▌         | 5984/100000 [2:01:22<29:59:39,  1.15s/it]

58676 episode score is 640.52


  6%|▌         | 5985/100000 [2:01:23<29:39:42,  1.14s/it]

58686 episode score is 698.28


  6%|▌         | 5986/100000 [2:01:24<29:31:25,  1.13s/it]

58696 episode score is 714.56


  6%|▌         | 5987/100000 [2:01:25<29:42:50,  1.14s/it]

58706 episode score is 738.16


  6%|▌         | 5988/100000 [2:01:27<29:49:02,  1.14s/it]

58716 episode score is 738.88


  6%|▌         | 5989/100000 [2:01:28<29:49:36,  1.14s/it]

58726 episode score is 727.01


  6%|▌         | 5990/100000 [2:01:29<30:09:49,  1.16s/it]

58736 episode score is 752.49


  6%|▌         | 5991/100000 [2:01:30<30:50:05,  1.18s/it]

58746 episode score is 735.84


  6%|▌         | 5992/100000 [2:01:31<30:41:56,  1.18s/it]

58756 episode score is 740.60


  6%|▌         | 5993/100000 [2:01:33<30:40:45,  1.17s/it]

58766 episode score is 741.98


  6%|▌         | 5994/100000 [2:01:34<30:38:57,  1.17s/it]

58776 episode score is 753.00


  6%|▌         | 5995/100000 [2:01:35<30:52:51,  1.18s/it]

58786 episode score is 770.57


  6%|▌         | 5996/100000 [2:01:36<30:29:07,  1.17s/it]

58796 episode score is 711.12


  6%|▌         | 5997/100000 [2:01:37<30:19:48,  1.16s/it]

58806 episode score is 733.03


  6%|▌         | 5998/100000 [2:01:38<30:24:51,  1.16s/it]

58816 episode score is 750.88


  6%|▌         | 5999/100000 [2:01:40<30:35:47,  1.17s/it]

58826 episode score is 762.45
58835 episode score is 771.25


  6%|▌         | 6000/100000 [2:01:42<37:53:09,  1.45s/it]

Iteration 6000: Average test reward: 735.70


  6%|▌         | 6001/100000 [2:01:43<35:37:03,  1.36s/it]

58845 episode score is 732.23


  6%|▌         | 6002/100000 [2:01:44<33:26:28,  1.28s/it]

58855 episode score is 685.54


  6%|▌         | 6003/100000 [2:01:45<32:24:11,  1.24s/it]

58865 episode score is 716.46


  6%|▌         | 6004/100000 [2:01:46<31:34:43,  1.21s/it]

58875 episode score is 720.41


  6%|▌         | 6005/100000 [2:01:47<30:57:39,  1.19s/it]

58885 episode score is 711.72


  6%|▌         | 6006/100000 [2:01:48<30:51:10,  1.18s/it]

58895 episode score is 743.29


  6%|▌         | 6007/100000 [2:01:50<30:24:30,  1.16s/it]

58905 episode score is 711.81


  6%|▌         | 6008/100000 [2:01:51<30:11:14,  1.16s/it]

58915 episode score is 719.66


  6%|▌         | 6009/100000 [2:01:52<30:02:39,  1.15s/it]

58925 episode score is 718.30


  6%|▌         | 6010/100000 [2:01:53<29:44:14,  1.14s/it]

58934 episode score is 782.43


  6%|▌         | 6011/100000 [2:01:54<30:15:28,  1.16s/it]

58944 episode score is 765.23


  6%|▌         | 6012/100000 [2:01:55<30:06:49,  1.15s/it]

58954 episode score is 714.29


  6%|▌         | 6013/100000 [2:01:57<30:11:45,  1.16s/it]

58964 episode score is 742.35


  6%|▌         | 6014/100000 [2:01:58<30:17:21,  1.16s/it]

58974 episode score is 742.57


  6%|▌         | 6015/100000 [2:01:59<30:17:25,  1.16s/it]

58984 episode score is 731.98


  6%|▌         | 6016/100000 [2:02:00<30:28:48,  1.17s/it]

58994 episode score is 754.69


  6%|▌         | 6017/100000 [2:02:01<30:01:08,  1.15s/it]

59003 episode score is 764.64


  6%|▌         | 6018/100000 [2:02:02<29:40:09,  1.14s/it]

59012 episode score is 785.56


  6%|▌         | 6019/100000 [2:02:03<29:59:26,  1.15s/it]

59022 episode score is 741.83


  6%|▌         | 6020/100000 [2:02:05<30:05:44,  1.15s/it]

59032 episode score is 733.60


  6%|▌         | 6021/100000 [2:02:06<30:20:19,  1.16s/it]

59042 episode score is 747.48


  6%|▌         | 6022/100000 [2:02:07<30:24:42,  1.16s/it]

59052 episode score is 744.11


  6%|▌         | 6023/100000 [2:02:08<29:53:38,  1.15s/it]

59062 episode score is 696.79


  6%|▌         | 6024/100000 [2:02:09<29:53:01,  1.14s/it]

59072 episode score is 726.13


  6%|▌         | 6025/100000 [2:02:10<30:10:03,  1.16s/it]

59082 episode score is 749.41


  6%|▌         | 6026/100000 [2:02:11<29:59:09,  1.15s/it]

59092 episode score is 708.92


  6%|▌         | 6027/100000 [2:02:13<29:44:58,  1.14s/it]

59102 episode score is 703.00


  6%|▌         | 6028/100000 [2:02:14<30:01:13,  1.15s/it]

59112 episode score is 749.25


  6%|▌         | 6029/100000 [2:02:15<30:15:11,  1.16s/it]

59122 episode score is 751.07


  6%|▌         | 6030/100000 [2:02:16<30:26:33,  1.17s/it]

59132 episode score is 748.16


  6%|▌         | 6031/100000 [2:02:17<30:52:20,  1.18s/it]

59142 episode score is 777.68


  6%|▌         | 6032/100000 [2:02:18<30:26:56,  1.17s/it]

59152 episode score is 713.28


  6%|▌         | 6033/100000 [2:02:20<30:37:59,  1.17s/it]

59162 episode score is 763.25


  6%|▌         | 6034/100000 [2:02:21<30:49:29,  1.18s/it]

59172 episode score is 772.71


  6%|▌         | 6035/100000 [2:02:22<30:43:55,  1.18s/it]

59182 episode score is 739.42


  6%|▌         | 6036/100000 [2:02:23<30:33:20,  1.17s/it]

59192 episode score is 732.38


  6%|▌         | 6037/100000 [2:02:24<30:42:26,  1.18s/it]

59202 episode score is 758.62


  6%|▌         | 6038/100000 [2:02:26<30:27:14,  1.17s/it]

59212 episode score is 725.34


  6%|▌         | 6039/100000 [2:02:27<30:30:13,  1.17s/it]

59222 episode score is 743.87


  6%|▌         | 6040/100000 [2:02:28<30:26:13,  1.17s/it]

59232 episode score is 735.32


  6%|▌         | 6041/100000 [2:02:29<30:06:52,  1.15s/it]

59241 episode score is 798.17


  6%|▌         | 6042/100000 [2:02:30<29:57:08,  1.15s/it]

59250 episode score is 815.76


  6%|▌         | 6043/100000 [2:02:31<30:00:04,  1.15s/it]

59259 episode score is 826.92


  6%|▌         | 6044/100000 [2:02:32<30:26:18,  1.17s/it]

59268 episode score is 857.44


  6%|▌         | 6045/100000 [2:02:34<30:03:08,  1.15s/it]

59277 episode score is 797.24


  6%|▌         | 6046/100000 [2:02:35<30:29:00,  1.17s/it]

59287 episode score is 776.33


  6%|▌         | 6047/100000 [2:02:36<29:53:33,  1.15s/it]

59296 episode score is 778.16


  6%|▌         | 6048/100000 [2:02:37<29:34:18,  1.13s/it]

59305 episode score is 781.41


  6%|▌         | 6049/100000 [2:02:38<29:13:33,  1.12s/it]

59314 episode score is 773.24


  6%|▌         | 6050/100000 [2:02:39<29:59:42,  1.15s/it]

59323 episode score is 809.65


  6%|▌         | 6051/100000 [2:02:40<29:31:37,  1.13s/it]

59332 episode score is 769.54


  6%|▌         | 6052/100000 [2:02:42<30:02:23,  1.15s/it]

59342 episode score is 770.97


  6%|▌         | 6053/100000 [2:02:43<29:43:04,  1.14s/it]

59351 episode score is 790.58


  6%|▌         | 6054/100000 [2:02:44<29:53:19,  1.15s/it]

59360 episode score is 838.51


  6%|▌         | 6055/100000 [2:02:45<29:52:58,  1.15s/it]

59369 episode score is 810.30


  6%|▌         | 6056/100000 [2:02:46<30:00:31,  1.15s/it]

59378 episode score is 835.99


  6%|▌         | 6057/100000 [2:02:47<29:54:45,  1.15s/it]

59387 episode score is 807.61


  6%|▌         | 6058/100000 [2:02:48<29:28:46,  1.13s/it]

59396 episode score is 774.10


  6%|▌         | 6059/100000 [2:02:50<29:27:18,  1.13s/it]

59405 episode score is 785.87


  6%|▌         | 6060/100000 [2:02:51<29:14:05,  1.12s/it]

59414 episode score is 780.56


  6%|▌         | 6061/100000 [2:02:52<29:27:54,  1.13s/it]

59423 episode score is 823.93


  6%|▌         | 6062/100000 [2:02:53<30:03:04,  1.15s/it]

59433 episode score is 768.43


  6%|▌         | 6063/100000 [2:02:54<29:37:48,  1.14s/it]

59442 episode score is 783.56


  6%|▌         | 6064/100000 [2:02:55<29:17:26,  1.12s/it]

59451 episode score is 773.62


  6%|▌         | 6065/100000 [2:02:56<29:49:13,  1.14s/it]

59461 episode score is 759.96


  6%|▌         | 6066/100000 [2:02:57<29:23:31,  1.13s/it]

59470 episode score is 775.80


  6%|▌         | 6067/100000 [2:02:59<29:55:04,  1.15s/it]

59480 episode score is 761.94


  6%|▌         | 6068/100000 [2:03:00<30:06:30,  1.15s/it]

59490 episode score is 749.59


  6%|▌         | 6069/100000 [2:03:01<30:08:38,  1.16s/it]

59500 episode score is 740.65


  6%|▌         | 6070/100000 [2:03:02<30:12:42,  1.16s/it]

59510 episode score is 737.66


  6%|▌         | 6071/100000 [2:03:03<30:03:59,  1.15s/it]

59520 episode score is 727.20


  6%|▌         | 6072/100000 [2:03:04<29:42:30,  1.14s/it]

59530 episode score is 699.91


  6%|▌         | 6073/100000 [2:03:06<29:43:42,  1.14s/it]

59540 episode score is 732.72


  6%|▌         | 6074/100000 [2:03:07<29:52:05,  1.14s/it]

59550 episode score is 741.50


  6%|▌         | 6075/100000 [2:03:08<30:06:19,  1.15s/it]

59560 episode score is 744.69


  6%|▌         | 6076/100000 [2:03:09<30:13:03,  1.16s/it]

59570 episode score is 745.11


  6%|▌         | 6077/100000 [2:03:10<30:18:21,  1.16s/it]

59580 episode score is 737.55


  6%|▌         | 6078/100000 [2:03:11<30:10:30,  1.16s/it]

59590 episode score is 723.23


  6%|▌         | 6079/100000 [2:03:13<30:11:24,  1.16s/it]

59600 episode score is 736.04


  6%|▌         | 6080/100000 [2:03:14<30:03:37,  1.15s/it]

59610 episode score is 731.37


  6%|▌         | 6081/100000 [2:03:15<30:06:17,  1.15s/it]

59620 episode score is 739.81


  6%|▌         | 6082/100000 [2:03:16<29:53:59,  1.15s/it]

59630 episode score is 723.76


  6%|▌         | 6083/100000 [2:03:17<30:10:28,  1.16s/it]

59641 episode score is 665.50


  6%|▌         | 6084/100000 [2:03:18<29:58:00,  1.15s/it]

59651 episode score is 712.79


  6%|▌         | 6085/100000 [2:03:19<30:26:20,  1.17s/it]

59661 episode score is 757.43


  6%|▌         | 6086/100000 [2:03:21<30:25:22,  1.17s/it]

59671 episode score is 739.78


  6%|▌         | 6087/100000 [2:03:22<30:28:15,  1.17s/it]

59681 episode score is 737.23


  6%|▌         | 6088/100000 [2:03:23<30:24:29,  1.17s/it]

59691 episode score is 736.54


  6%|▌         | 6089/100000 [2:03:24<29:54:58,  1.15s/it]

59701 episode score is 700.47


  6%|▌         | 6090/100000 [2:03:25<30:05:35,  1.15s/it]

59711 episode score is 746.50


  6%|▌         | 6091/100000 [2:03:26<30:00:59,  1.15s/it]

59721 episode score is 731.79


  6%|▌         | 6092/100000 [2:03:28<29:58:37,  1.15s/it]

59731 episode score is 731.35


  6%|▌         | 6093/100000 [2:03:29<29:39:40,  1.14s/it]

59741 episode score is 701.97


  6%|▌         | 6094/100000 [2:03:30<29:54:19,  1.15s/it]

59751 episode score is 742.36


  6%|▌         | 6095/100000 [2:03:31<29:31:00,  1.13s/it]

59761 episode score is 696.27


  6%|▌         | 6096/100000 [2:03:32<29:55:30,  1.15s/it]

59771 episode score is 740.59


  6%|▌         | 6097/100000 [2:03:33<30:16:18,  1.16s/it]

59781 episode score is 755.02


  6%|▌         | 6098/100000 [2:03:34<30:19:48,  1.16s/it]

59791 episode score is 750.41


  6%|▌         | 6099/100000 [2:03:36<30:19:57,  1.16s/it]

59801 episode score is 743.30
59811 episode score is 720.26


  6%|▌         | 6100/100000 [2:03:38<38:19:39,  1.47s/it]

Iteration 6100: Average test reward: 715.04


  6%|▌         | 6101/100000 [2:03:39<35:29:51,  1.36s/it]

59821 episode score is 701.50


  6%|▌         | 6102/100000 [2:03:40<33:34:45,  1.29s/it]

59831 episode score is 706.33


  6%|▌         | 6103/100000 [2:03:41<32:10:13,  1.23s/it]

59842 episode score is 607.93


  6%|▌         | 6104/100000 [2:03:42<31:47:46,  1.22s/it]

59853 episode score is 674.35


  6%|▌         | 6105/100000 [2:03:43<30:47:25,  1.18s/it]

59863 episode score is 688.34


  6%|▌         | 6106/100000 [2:03:45<31:21:19,  1.20s/it]

59873 episode score is 721.59


  6%|▌         | 6107/100000 [2:03:46<30:39:06,  1.18s/it]

59883 episode score is 714.47


  6%|▌         | 6108/100000 [2:03:47<30:09:00,  1.16s/it]

59893 episode score is 705.08


  6%|▌         | 6109/100000 [2:03:48<30:15:07,  1.16s/it]

59904 episode score is 660.72


  6%|▌         | 6110/100000 [2:03:49<29:59:57,  1.15s/it]

59914 episode score is 700.68


  6%|▌         | 6111/100000 [2:03:50<29:40:01,  1.14s/it]

59924 episode score is 706.53


  6%|▌         | 6112/100000 [2:03:51<29:51:58,  1.15s/it]

59935 episode score is 654.47


  6%|▌         | 6113/100000 [2:03:53<29:50:05,  1.14s/it]

59945 episode score is 729.56


  6%|▌         | 6114/100000 [2:03:54<29:23:12,  1.13s/it]

59955 episode score is 687.70


  6%|▌         | 6115/100000 [2:03:55<29:26:57,  1.13s/it]

59965 episode score is 726.91


  6%|▌         | 6116/100000 [2:03:56<29:09:32,  1.12s/it]

59975 episode score is 695.11


  6%|▌         | 6117/100000 [2:03:57<29:03:26,  1.11s/it]

59985 episode score is 702.65


  6%|▌         | 6118/100000 [2:03:58<29:22:23,  1.13s/it]

59995 episode score is 744.02


  6%|▌         | 6119/100000 [2:03:59<29:36:55,  1.14s/it]

60005 episode score is 735.91


  6%|▌         | 6120/100000 [2:04:00<30:02:45,  1.15s/it]

60015 episode score is 752.29


  6%|▌         | 6121/100000 [2:04:02<30:22:35,  1.16s/it]

60025 episode score is 763.26


  6%|▌         | 6122/100000 [2:04:03<30:27:02,  1.17s/it]

60035 episode score is 742.53


  6%|▌         | 6123/100000 [2:04:04<30:23:10,  1.17s/it]

60045 episode score is 745.70


  6%|▌         | 6124/100000 [2:04:05<30:32:53,  1.17s/it]

60055 episode score is 763.31


  6%|▌         | 6125/100000 [2:04:06<30:39:29,  1.18s/it]

60065 episode score is 759.99


  6%|▌         | 6126/100000 [2:04:08<30:41:29,  1.18s/it]

60075 episode score is 760.21


  6%|▌         | 6127/100000 [2:04:09<30:38:01,  1.17s/it]

60085 episode score is 745.73


  6%|▌         | 6128/100000 [2:04:10<30:18:59,  1.16s/it]

60095 episode score is 730.92


  6%|▌         | 6129/100000 [2:04:11<30:05:52,  1.15s/it]

60105 episode score is 731.67


  6%|▌         | 6130/100000 [2:04:12<30:00:22,  1.15s/it]

60115 episode score is 727.36


  6%|▌         | 6131/100000 [2:04:13<29:57:29,  1.15s/it]

60125 episode score is 734.77


  6%|▌         | 6132/100000 [2:04:14<30:02:28,  1.15s/it]

60135 episode score is 746.86


  6%|▌         | 6133/100000 [2:04:16<30:09:36,  1.16s/it]

60145 episode score is 744.26


  6%|▌         | 6134/100000 [2:04:17<30:11:43,  1.16s/it]

60155 episode score is 745.59


  6%|▌         | 6135/100000 [2:04:18<30:11:18,  1.16s/it]

60165 episode score is 737.00


  6%|▌         | 6136/100000 [2:04:19<30:18:20,  1.16s/it]

60175 episode score is 750.18


  6%|▌         | 6137/100000 [2:04:20<30:28:24,  1.17s/it]

60185 episode score is 758.82


  6%|▌         | 6138/100000 [2:04:21<30:27:21,  1.17s/it]

60195 episode score is 746.84


  6%|▌         | 6139/100000 [2:04:23<30:33:35,  1.17s/it]

60205 episode score is 742.75


  6%|▌         | 6140/100000 [2:04:24<30:26:33,  1.17s/it]

60215 episode score is 738.06


  6%|▌         | 6141/100000 [2:04:25<30:31:07,  1.17s/it]

60225 episode score is 754.71


  6%|▌         | 6142/100000 [2:04:26<30:25:31,  1.17s/it]

60235 episode score is 745.46


  6%|▌         | 6143/100000 [2:04:27<30:16:15,  1.16s/it]

60245 episode score is 738.71


  6%|▌         | 6144/100000 [2:04:28<30:14:58,  1.16s/it]

60255 episode score is 744.38


  6%|▌         | 6145/100000 [2:04:30<30:08:23,  1.16s/it]

60265 episode score is 733.06


  6%|▌         | 6146/100000 [2:04:31<30:05:20,  1.15s/it]

60275 episode score is 738.28


  6%|▌         | 6147/100000 [2:04:32<29:53:46,  1.15s/it]

60285 episode score is 719.52


  6%|▌         | 6148/100000 [2:04:33<30:31:38,  1.17s/it]

60295 episode score is 733.30


  6%|▌         | 6149/100000 [2:04:34<30:06:18,  1.15s/it]

60307 episode score is 551.87


  6%|▌         | 6150/100000 [2:04:35<30:06:30,  1.15s/it]

60319 episode score is 576.36


  6%|▌         | 6151/100000 [2:04:37<29:56:52,  1.15s/it]

60332 episode score is 502.28


  6%|▌         | 6152/100000 [2:04:38<29:37:35,  1.14s/it]

60343 episode score is 611.65


  6%|▌         | 6153/100000 [2:04:39<29:23:11,  1.13s/it]

60355 episode score is 540.29


  6%|▌         | 6154/100000 [2:04:40<29:23:57,  1.13s/it]

60367 episode score is 549.84


  6%|▌         | 6155/100000 [2:04:41<29:13:16,  1.12s/it]

60379 episode score is 531.47


  6%|▌         | 6156/100000 [2:04:42<29:45:39,  1.14s/it]

60391 episode score is 596.78


  6%|▌         | 6157/100000 [2:04:43<29:31:14,  1.13s/it]

60402 episode score is 618.87


  6%|▌         | 6158/100000 [2:04:44<29:52:24,  1.15s/it]

60413 episode score is 679.06


  6%|▌         | 6159/100000 [2:04:46<29:49:39,  1.14s/it]

60423 episode score is 731.38


  6%|▌         | 6160/100000 [2:04:47<30:07:57,  1.16s/it]

60434 episode score is 676.90


  6%|▌         | 6161/100000 [2:04:48<29:57:17,  1.15s/it]

60444 episode score is 707.18


  6%|▌         | 6162/100000 [2:04:49<30:07:20,  1.16s/it]

60455 episode score is 648.38


  6%|▌         | 6163/100000 [2:04:50<29:41:00,  1.14s/it]

60465 episode score is 694.00


  6%|▌         | 6164/100000 [2:04:51<29:38:57,  1.14s/it]

60475 episode score is 722.23


  6%|▌         | 6165/100000 [2:04:52<29:34:01,  1.13s/it]

60485 episode score is 715.95


  6%|▌         | 6166/100000 [2:04:54<29:31:57,  1.13s/it]

60495 episode score is 721.13


  6%|▌         | 6167/100000 [2:04:55<29:44:17,  1.14s/it]

60505 episode score is 733.56


  6%|▌         | 6168/100000 [2:04:56<29:50:37,  1.15s/it]

60515 episode score is 735.67


  6%|▌         | 6169/100000 [2:04:57<29:47:13,  1.14s/it]

60525 episode score is 729.64


  6%|▌         | 6170/100000 [2:04:58<29:44:26,  1.14s/it]

60535 episode score is 720.98


  6%|▌         | 6171/100000 [2:04:59<29:40:22,  1.14s/it]

60545 episode score is 714.54


  6%|▌         | 6172/100000 [2:05:00<29:53:23,  1.15s/it]

60555 episode score is 750.45


  6%|▌         | 6173/100000 [2:05:02<30:06:40,  1.16s/it]

60565 episode score is 738.87


  6%|▌         | 6174/100000 [2:05:03<30:25:44,  1.17s/it]

60575 episode score is 754.79


  6%|▌         | 6175/100000 [2:05:04<30:20:08,  1.16s/it]

60585 episode score is 738.49


  6%|▌         | 6176/100000 [2:05:05<29:55:00,  1.15s/it]

60594 episode score is 794.26


  6%|▌         | 6177/100000 [2:05:06<30:11:07,  1.16s/it]

60604 episode score is 758.62


  6%|▌         | 6178/100000 [2:05:07<30:17:44,  1.16s/it]

60614 episode score is 753.96


  6%|▌         | 6179/100000 [2:05:09<30:16:20,  1.16s/it]

60624 episode score is 742.58


  6%|▌         | 6180/100000 [2:05:10<30:20:58,  1.16s/it]

60634 episode score is 751.64


  6%|▌         | 6181/100000 [2:05:11<30:13:25,  1.16s/it]

60644 episode score is 727.27


  6%|▌         | 6182/100000 [2:05:12<30:22:03,  1.17s/it]

60654 episode score is 748.06


  6%|▌         | 6183/100000 [2:05:13<31:21:09,  1.20s/it]

60664 episode score is 760.45


  6%|▌         | 6184/100000 [2:05:15<31:17:45,  1.20s/it]

60674 episode score is 756.98


  6%|▌         | 6185/100000 [2:05:16<31:13:28,  1.20s/it]

60684 episode score is 753.28


  6%|▌         | 6186/100000 [2:05:17<31:20:35,  1.20s/it]

60694 episode score is 767.16


  6%|▌         | 6187/100000 [2:05:18<31:15:34,  1.20s/it]

60704 episode score is 754.30


  6%|▌         | 6188/100000 [2:05:19<30:26:28,  1.17s/it]

60713 episode score is 776.28


  6%|▌         | 6189/100000 [2:05:20<30:30:28,  1.17s/it]

60723 episode score is 747.94


  6%|▌         | 6190/100000 [2:05:22<30:23:08,  1.17s/it]

60733 episode score is 731.75


  6%|▌         | 6191/100000 [2:05:23<29:54:15,  1.15s/it]

60742 episode score is 781.72


  6%|▌         | 6192/100000 [2:05:24<30:24:52,  1.17s/it]

60752 episode score is 778.70


  6%|▌         | 6193/100000 [2:05:25<29:56:00,  1.15s/it]

60761 episode score is 789.28


  6%|▌         | 6194/100000 [2:05:26<29:45:13,  1.14s/it]

60770 episode score is 808.49


  6%|▌         | 6195/100000 [2:05:27<30:05:42,  1.15s/it]

60780 episode score is 753.91


  6%|▌         | 6196/100000 [2:05:28<30:11:44,  1.16s/it]

60790 episode score is 745.23


  6%|▌         | 6197/100000 [2:05:30<29:51:10,  1.15s/it]

60799 episode score is 794.53


  6%|▌         | 6198/100000 [2:05:31<29:51:08,  1.15s/it]

60808 episode score is 812.37


  6%|▌         | 6199/100000 [2:05:32<30:08:53,  1.16s/it]

60818 episode score is 750.58
60828 episode score is 750.25


  6%|▌         | 6200/100000 [2:05:34<38:43:04,  1.49s/it]

Iteration 6200: Average test reward: 760.49


  6%|▌         | 6201/100000 [2:05:35<35:46:03,  1.37s/it]

60837 episode score is 788.64


  6%|▌         | 6202/100000 [2:05:36<33:50:54,  1.30s/it]

60846 episode score is 808.95


  6%|▌         | 6203/100000 [2:05:38<32:38:31,  1.25s/it]

60855 episode score is 836.18


  6%|▌         | 6204/100000 [2:05:39<32:03:51,  1.23s/it]

60865 episode score is 754.87


  6%|▌         | 6205/100000 [2:05:40<31:34:36,  1.21s/it]

60875 episode score is 733.82


  6%|▌         | 6206/100000 [2:05:41<30:41:36,  1.18s/it]

60884 episode score is 784.57


  6%|▌         | 6207/100000 [2:05:42<30:04:02,  1.15s/it]

60893 episode score is 779.11


  6%|▌         | 6208/100000 [2:05:43<29:54:13,  1.15s/it]

60902 episode score is 800.97


  6%|▌         | 6209/100000 [2:05:44<30:09:22,  1.16s/it]

60911 episode score is 853.51


  6%|▌         | 6210/100000 [2:05:46<30:38:35,  1.18s/it]

60921 episode score is 778.58


  6%|▌         | 6211/100000 [2:05:47<30:09:14,  1.16s/it]

60930 episode score is 789.57


  6%|▌         | 6212/100000 [2:05:48<30:05:07,  1.15s/it]

60939 episode score is 814.58


  6%|▌         | 6213/100000 [2:05:49<29:43:38,  1.14s/it]

60948 episode score is 783.79


  6%|▌         | 6214/100000 [2:05:50<29:44:59,  1.14s/it]

60957 episode score is 811.96


  6%|▌         | 6215/100000 [2:05:51<30:05:46,  1.16s/it]

60967 episode score is 750.88


  6%|▌         | 6216/100000 [2:05:52<29:42:32,  1.14s/it]

60976 episode score is 777.38


  6%|▌         | 6217/100000 [2:05:54<29:52:30,  1.15s/it]

60986 episode score is 742.46


  6%|▌         | 6218/100000 [2:05:55<29:54:15,  1.15s/it]

60996 episode score is 730.55


  6%|▌         | 6219/100000 [2:05:56<30:01:05,  1.15s/it]

61006 episode score is 741.49


  6%|▌         | 6220/100000 [2:05:57<30:10:44,  1.16s/it]

61016 episode score is 749.64


  6%|▌         | 6221/100000 [2:05:58<30:18:23,  1.16s/it]

61026 episode score is 762.49


  6%|▌         | 6222/100000 [2:05:59<30:06:29,  1.16s/it]

61036 episode score is 728.93


  6%|▌         | 6223/100000 [2:06:01<30:03:02,  1.15s/it]

61046 episode score is 731.40


  6%|▌         | 6224/100000 [2:06:02<29:44:05,  1.14s/it]

61056 episode score is 703.40


  6%|▌         | 6225/100000 [2:06:03<29:26:28,  1.13s/it]

61066 episode score is 678.81


  6%|▌         | 6226/100000 [2:06:04<29:05:19,  1.12s/it]

61076 episode score is 694.19


  6%|▌         | 6227/100000 [2:06:05<29:10:33,  1.12s/it]

61086 episode score is 727.03


  6%|▌         | 6228/100000 [2:06:06<29:19:05,  1.13s/it]

61096 episode score is 732.56


  6%|▌         | 6229/100000 [2:06:07<29:03:45,  1.12s/it]

61106 episode score is 695.92


  6%|▌         | 6230/100000 [2:06:08<29:11:47,  1.12s/it]

61116 episode score is 726.35


  6%|▌         | 6231/100000 [2:06:10<29:36:03,  1.14s/it]

61126 episode score is 752.33


  6%|▌         | 6232/100000 [2:06:11<29:34:17,  1.14s/it]

61136 episode score is 727.70


  6%|▌         | 6233/100000 [2:06:12<29:31:44,  1.13s/it]

61146 episode score is 722.31


  6%|▌         | 6234/100000 [2:06:13<29:50:02,  1.15s/it]

61156 episode score is 743.43


  6%|▌         | 6235/100000 [2:06:14<30:06:53,  1.16s/it]

61166 episode score is 737.89


  6%|▌         | 6236/100000 [2:06:15<29:51:54,  1.15s/it]

61176 episode score is 714.21


  6%|▌         | 6237/100000 [2:06:16<29:43:54,  1.14s/it]

61186 episode score is 720.77


  6%|▌         | 6238/100000 [2:06:18<29:41:31,  1.14s/it]

61196 episode score is 713.69


  6%|▌         | 6239/100000 [2:06:19<29:21:05,  1.13s/it]

61206 episode score is 690.71


  6%|▌         | 6240/100000 [2:06:20<29:01:38,  1.11s/it]

61216 episode score is 684.28


  6%|▌         | 6241/100000 [2:06:21<29:06:06,  1.12s/it]

61226 episode score is 719.48


  6%|▌         | 6242/100000 [2:06:22<29:22:36,  1.13s/it]

61236 episode score is 739.28


  6%|▌         | 6243/100000 [2:06:23<29:28:41,  1.13s/it]

61246 episode score is 721.49


  6%|▌         | 6244/100000 [2:06:24<29:30:07,  1.13s/it]

61257 episode score is 639.32


  6%|▌         | 6245/100000 [2:06:25<29:46:42,  1.14s/it]

61267 episode score is 683.62


  6%|▌         | 6246/100000 [2:06:27<29:40:04,  1.14s/it]

61278 episode score is 639.99


  6%|▌         | 6247/100000 [2:06:28<29:24:52,  1.13s/it]

61288 episode score is 701.73


  6%|▌         | 6248/100000 [2:06:29<29:14:35,  1.12s/it]

61298 episode score is 704.15


  6%|▌         | 6249/100000 [2:06:30<29:05:31,  1.12s/it]

61308 episode score is 701.23


  6%|▋         | 6250/100000 [2:06:31<29:05:06,  1.12s/it]

61318 episode score is 714.22


  6%|▋         | 6251/100000 [2:06:32<29:19:11,  1.13s/it]

61328 episode score is 719.18


  6%|▋         | 6252/100000 [2:06:33<29:35:30,  1.14s/it]

61338 episode score is 743.58


  6%|▋         | 6253/100000 [2:06:34<29:45:18,  1.14s/it]

61348 episode score is 746.37


  6%|▋         | 6254/100000 [2:06:36<29:36:07,  1.14s/it]

61358 episode score is 724.82


  6%|▋         | 6255/100000 [2:06:37<29:27:33,  1.13s/it]

61368 episode score is 714.45


  6%|▋         | 6256/100000 [2:06:38<29:30:04,  1.13s/it]

61378 episode score is 732.07


  6%|▋         | 6257/100000 [2:06:39<29:40:19,  1.14s/it]

61388 episode score is 738.83


  6%|▋         | 6258/100000 [2:06:40<29:39:27,  1.14s/it]

61398 episode score is 725.57


  6%|▋         | 6259/100000 [2:06:41<29:56:19,  1.15s/it]

61408 episode score is 755.02


  6%|▋         | 6260/100000 [2:06:42<29:48:31,  1.14s/it]

61418 episode score is 718.90


  6%|▋         | 6261/100000 [2:06:44<29:44:44,  1.14s/it]

61428 episode score is 720.15


  6%|▋         | 6262/100000 [2:06:45<29:47:04,  1.14s/it]

61438 episode score is 734.98


  6%|▋         | 6263/100000 [2:06:46<29:57:30,  1.15s/it]

61448 episode score is 753.13


  6%|▋         | 6264/100000 [2:06:47<30:00:19,  1.15s/it]

61458 episode score is 735.60


  6%|▋         | 6265/100000 [2:06:48<29:32:19,  1.13s/it]

61468 episode score is 688.41


  6%|▋         | 6266/100000 [2:06:49<29:28:32,  1.13s/it]

61478 episode score is 712.87


  6%|▋         | 6267/100000 [2:06:50<29:44:31,  1.14s/it]

61489 episode score is 670.90


  6%|▋         | 6268/100000 [2:06:52<29:23:51,  1.13s/it]

61499 episode score is 695.38


  6%|▋         | 6269/100000 [2:06:53<29:10:10,  1.12s/it]

61509 episode score is 696.76


  6%|▋         | 6270/100000 [2:06:54<29:16:17,  1.12s/it]

61519 episode score is 712.63


  6%|▋         | 6271/100000 [2:06:55<29:23:28,  1.13s/it]

61529 episode score is 731.02


  6%|▋         | 6272/100000 [2:06:56<29:27:49,  1.13s/it]

61539 episode score is 726.97


  6%|▋         | 6273/100000 [2:06:57<29:20:59,  1.13s/it]

61549 episode score is 711.51


  6%|▋         | 6274/100000 [2:06:58<29:31:20,  1.13s/it]

61559 episode score is 741.40


  6%|▋         | 6275/100000 [2:06:59<29:41:25,  1.14s/it]

61569 episode score is 735.41


  6%|▋         | 6276/100000 [2:07:01<29:43:03,  1.14s/it]

61579 episode score is 730.52


  6%|▋         | 6277/100000 [2:07:02<29:32:28,  1.13s/it]

61589 episode score is 703.79


  6%|▋         | 6278/100000 [2:07:03<29:16:23,  1.12s/it]

61599 episode score is 692.46


  6%|▋         | 6279/100000 [2:07:04<29:39:19,  1.14s/it]

61610 episode score is 667.68


  6%|▋         | 6280/100000 [2:07:05<29:20:32,  1.13s/it]

61620 episode score is 700.02


  6%|▋         | 6281/100000 [2:07:06<29:11:46,  1.12s/it]

61630 episode score is 705.66


  6%|▋         | 6282/100000 [2:07:07<29:03:19,  1.12s/it]

61640 episode score is 699.54


  6%|▋         | 6283/100000 [2:07:08<29:25:24,  1.13s/it]

61650 episode score is 746.84


  6%|▋         | 6284/100000 [2:07:10<29:51:20,  1.15s/it]

61660 episode score is 759.89


  6%|▋         | 6285/100000 [2:07:11<30:01:30,  1.15s/it]

61670 episode score is 745.83


  6%|▋         | 6286/100000 [2:07:12<29:52:49,  1.15s/it]

61680 episode score is 727.54


  6%|▋         | 6287/100000 [2:07:13<30:02:20,  1.15s/it]

61690 episode score is 742.54


  6%|▋         | 6288/100000 [2:07:14<30:02:08,  1.15s/it]

61700 episode score is 740.59


  6%|▋         | 6289/100000 [2:07:15<29:56:15,  1.15s/it]

61710 episode score is 732.64


  6%|▋         | 6290/100000 [2:07:17<30:00:47,  1.15s/it]

61720 episode score is 748.99


  6%|▋         | 6291/100000 [2:07:18<29:52:02,  1.15s/it]

61730 episode score is 714.11


  6%|▋         | 6292/100000 [2:07:19<29:57:42,  1.15s/it]

61740 episode score is 731.66


  6%|▋         | 6293/100000 [2:07:20<30:16:50,  1.16s/it]

61750 episode score is 761.47


  6%|▋         | 6294/100000 [2:07:21<30:17:39,  1.16s/it]

61760 episode score is 744.48


  6%|▋         | 6295/100000 [2:07:22<30:06:00,  1.16s/it]

61770 episode score is 732.57


  6%|▋         | 6296/100000 [2:07:24<29:51:42,  1.15s/it]

61780 episode score is 716.90


  6%|▋         | 6297/100000 [2:07:25<29:50:23,  1.15s/it]

61790 episode score is 739.17


  6%|▋         | 6298/100000 [2:07:26<29:43:55,  1.14s/it]

61800 episode score is 726.23


  6%|▋         | 6299/100000 [2:07:27<30:39:06,  1.18s/it]

61810 episode score is 745.65
61820 episode score is 720.79


  6%|▋         | 6300/100000 [2:07:29<38:03:36,  1.46s/it]

Iteration 6300: Average test reward: 711.65


  6%|▋         | 6301/100000 [2:07:30<35:57:21,  1.38s/it]

61831 episode score is 685.91


  6%|▋         | 6302/100000 [2:07:31<33:36:22,  1.29s/it]

61841 episode score is 685.18


  6%|▋         | 6303/100000 [2:07:33<32:00:22,  1.23s/it]

61851 episode score is 685.57


  6%|▋         | 6304/100000 [2:07:34<31:26:03,  1.21s/it]

61862 episode score is 653.82


  6%|▋         | 6305/100000 [2:07:35<30:27:52,  1.17s/it]

61872 episode score is 692.43


  6%|▋         | 6306/100000 [2:07:36<29:53:59,  1.15s/it]

61882 episode score is 703.37


  6%|▋         | 6307/100000 [2:07:37<29:54:23,  1.15s/it]

61892 episode score is 723.70


  6%|▋         | 6308/100000 [2:07:38<30:08:03,  1.16s/it]

61903 episode score is 673.08


  6%|▋         | 6309/100000 [2:07:39<29:40:45,  1.14s/it]

61913 episode score is 699.69


  6%|▋         | 6310/100000 [2:07:40<29:18:39,  1.13s/it]

61923 episode score is 704.46


  6%|▋         | 6311/100000 [2:07:42<29:10:31,  1.12s/it]

61933 episode score is 698.55


  6%|▋         | 6312/100000 [2:07:43<29:35:06,  1.14s/it]

61944 episode score is 672.31


  6%|▋         | 6313/100000 [2:07:44<30:01:04,  1.15s/it]

61955 episode score is 667.62


  6%|▋         | 6314/100000 [2:07:45<29:33:41,  1.14s/it]

61965 episode score is 691.14


  6%|▋         | 6315/100000 [2:07:46<29:15:35,  1.12s/it]

61975 episode score is 701.94


  6%|▋         | 6316/100000 [2:07:47<29:15:14,  1.12s/it]

61985 episode score is 713.36


  6%|▋         | 6317/100000 [2:07:48<29:28:50,  1.13s/it]

61996 episode score is 653.42


  6%|▋         | 6318/100000 [2:07:49<29:09:55,  1.12s/it]

62006 episode score is 691.48


  6%|▋         | 6319/100000 [2:07:51<29:19:59,  1.13s/it]

62017 episode score is 637.12


  6%|▋         | 6320/100000 [2:07:52<29:01:55,  1.12s/it]

62027 episode score is 693.53


  6%|▋         | 6321/100000 [2:07:53<29:38:01,  1.14s/it]

62038 episode score is 682.58


  6%|▋         | 6322/100000 [2:07:54<30:00:54,  1.15s/it]

62049 episode score is 680.28


  6%|▋         | 6323/100000 [2:07:55<29:49:40,  1.15s/it]

62060 episode score is 643.17


  6%|▋         | 6324/100000 [2:07:56<29:25:09,  1.13s/it]

62070 episode score is 701.55


  6%|▋         | 6325/100000 [2:07:57<29:46:30,  1.14s/it]

62081 episode score is 670.19


  6%|▋         | 6326/100000 [2:07:59<29:36:26,  1.14s/it]

62092 episode score is 636.69


  6%|▋         | 6327/100000 [2:08:00<29:23:28,  1.13s/it]

62102 episode score is 697.50


  6%|▋         | 6328/100000 [2:08:01<29:20:33,  1.13s/it]

62112 episode score is 720.03


  6%|▋         | 6329/100000 [2:08:02<29:20:51,  1.13s/it]

62122 episode score is 714.66


  6%|▋         | 6330/100000 [2:08:03<29:11:44,  1.12s/it]

62132 episode score is 701.39


  6%|▋         | 6331/100000 [2:08:04<28:57:34,  1.11s/it]

62142 episode score is 685.00


  6%|▋         | 6332/100000 [2:08:05<28:52:55,  1.11s/it]

62152 episode score is 704.81


  6%|▋         | 6333/100000 [2:08:06<28:55:34,  1.11s/it]

62162 episode score is 716.62


  6%|▋         | 6334/100000 [2:08:07<28:44:23,  1.10s/it]

62172 episode score is 695.40


  6%|▋         | 6335/100000 [2:08:09<29:09:20,  1.12s/it]

62183 episode score is 664.71


  6%|▋         | 6336/100000 [2:08:10<29:08:56,  1.12s/it]

62193 episode score is 716.15


  6%|▋         | 6337/100000 [2:08:11<29:39:24,  1.14s/it]

62204 episode score is 684.59


  6%|▋         | 6338/100000 [2:08:12<29:51:10,  1.15s/it]

62214 episode score is 737.44


  6%|▋         | 6339/100000 [2:08:13<30:07:13,  1.16s/it]

62225 episode score is 681.64


  6%|▋         | 6340/100000 [2:08:14<30:09:32,  1.16s/it]

62236 episode score is 661.80


  6%|▋         | 6341/100000 [2:08:16<30:10:29,  1.16s/it]

62248 episode score is 573.52


  6%|▋         | 6342/100000 [2:08:17<29:46:54,  1.14s/it]

62259 episode score is 617.75


  6%|▋         | 6343/100000 [2:08:18<29:35:50,  1.14s/it]

62269 episode score is 704.64


  6%|▋         | 6344/100000 [2:08:19<29:38:30,  1.14s/it]

62280 episode score is 654.67


  6%|▋         | 6345/100000 [2:08:20<29:52:38,  1.15s/it]

62291 episode score is 675.28


  6%|▋         | 6346/100000 [2:08:21<29:20:14,  1.13s/it]

62301 episode score is 687.87


  6%|▋         | 6347/100000 [2:08:22<29:34:37,  1.14s/it]

62312 episode score is 656.88


  6%|▋         | 6348/100000 [2:08:24<30:01:25,  1.15s/it]

62323 episode score is 682.70


  6%|▋         | 6349/100000 [2:08:25<29:32:32,  1.14s/it]

62333 episode score is 696.58


  6%|▋         | 6350/100000 [2:08:26<29:06:11,  1.12s/it]

62343 episode score is 695.58


  6%|▋         | 6351/100000 [2:08:27<29:02:11,  1.12s/it]

62353 episode score is 712.29


  6%|▋         | 6352/100000 [2:08:28<28:59:21,  1.11s/it]

62363 episode score is 702.60


  6%|▋         | 6353/100000 [2:08:29<28:49:17,  1.11s/it]

62373 episode score is 693.88


  6%|▋         | 6354/100000 [2:08:30<29:09:46,  1.12s/it]

62384 episode score is 656.80


  6%|▋         | 6355/100000 [2:08:31<29:34:06,  1.14s/it]

62395 episode score is 670.99


  6%|▋         | 6356/100000 [2:08:32<29:20:41,  1.13s/it]

62405 episode score is 688.20


  6%|▋         | 6357/100000 [2:08:34<29:01:24,  1.12s/it]

62415 episode score is 687.74


  6%|▋         | 6358/100000 [2:08:35<29:04:00,  1.12s/it]

62425 episode score is 714.29


  6%|▋         | 6359/100000 [2:08:36<28:55:48,  1.11s/it]

62435 episode score is 695.23


  6%|▋         | 6360/100000 [2:08:37<29:00:41,  1.12s/it]

62445 episode score is 714.75


  6%|▋         | 6361/100000 [2:08:38<29:06:51,  1.12s/it]

62455 episode score is 720.70


  6%|▋         | 6362/100000 [2:08:39<29:27:20,  1.13s/it]

62465 episode score is 743.15


  6%|▋         | 6363/100000 [2:08:40<29:03:53,  1.12s/it]

62475 episode score is 684.61


  6%|▋         | 6364/100000 [2:08:41<29:09:27,  1.12s/it]

62485 episode score is 721.35


  6%|▋         | 6365/100000 [2:08:43<29:11:50,  1.12s/it]

62495 episode score is 719.38


  6%|▋         | 6366/100000 [2:08:44<29:09:35,  1.12s/it]

62505 episode score is 713.86


  6%|▋         | 6367/100000 [2:08:45<29:45:36,  1.14s/it]

62515 episode score is 701.75


  6%|▋         | 6368/100000 [2:08:46<29:59:11,  1.15s/it]

62526 episode score is 678.34


  6%|▋         | 6369/100000 [2:08:47<29:57:24,  1.15s/it]

62536 episode score is 729.03


  6%|▋         | 6370/100000 [2:08:48<29:51:42,  1.15s/it]

62546 episode score is 732.04


  6%|▋         | 6371/100000 [2:08:49<29:35:53,  1.14s/it]

62556 episode score is 710.71


  6%|▋         | 6372/100000 [2:08:51<29:17:20,  1.13s/it]

62566 episode score is 697.72


  6%|▋         | 6373/100000 [2:08:52<29:03:28,  1.12s/it]

62576 episode score is 692.77


  6%|▋         | 6374/100000 [2:08:53<28:51:53,  1.11s/it]

62586 episode score is 690.37


  6%|▋         | 6375/100000 [2:08:54<28:44:49,  1.11s/it]

62596 episode score is 697.83


  6%|▋         | 6376/100000 [2:08:55<28:40:47,  1.10s/it]

62606 episode score is 681.63


  6%|▋         | 6377/100000 [2:08:56<28:44:12,  1.10s/it]

62616 episode score is 703.82


  6%|▋         | 6378/100000 [2:08:57<28:44:37,  1.11s/it]

62626 episode score is 710.42


  6%|▋         | 6379/100000 [2:08:58<29:09:33,  1.12s/it]

62637 episode score is 675.04


  6%|▋         | 6380/100000 [2:08:59<29:11:33,  1.12s/it]

62647 episode score is 728.42


  6%|▋         | 6381/100000 [2:09:00<28:50:45,  1.11s/it]

62657 episode score is 692.66


  6%|▋         | 6382/100000 [2:09:02<28:37:49,  1.10s/it]

62667 episode score is 689.91


  6%|▋         | 6383/100000 [2:09:03<29:03:50,  1.12s/it]

62677 episode score is 736.67


  6%|▋         | 6384/100000 [2:09:04<28:52:55,  1.11s/it]

62687 episode score is 711.84


  6%|▋         | 6385/100000 [2:09:05<29:03:32,  1.12s/it]

62697 episode score is 738.36


  6%|▋         | 6386/100000 [2:09:06<28:48:23,  1.11s/it]

62707 episode score is 708.66


  6%|▋         | 6387/100000 [2:09:07<29:04:55,  1.12s/it]

62717 episode score is 739.11


  6%|▋         | 6388/100000 [2:09:08<29:00:58,  1.12s/it]

62727 episode score is 724.01


  6%|▋         | 6389/100000 [2:09:09<29:04:41,  1.12s/it]

62737 episode score is 724.05


  6%|▋         | 6390/100000 [2:09:11<29:21:49,  1.13s/it]

62748 episode score is 669.79


  6%|▋         | 6391/100000 [2:09:12<29:29:50,  1.13s/it]

62758 episode score is 723.84


  6%|▋         | 6392/100000 [2:09:13<29:40:46,  1.14s/it]

62768 episode score is 738.93


  6%|▋         | 6393/100000 [2:09:14<29:45:42,  1.14s/it]

62779 episode score is 667.56


  6%|▋         | 6394/100000 [2:09:15<29:28:28,  1.13s/it]

62789 episode score is 706.77


  6%|▋         | 6395/100000 [2:09:16<29:45:24,  1.14s/it]

62800 episode score is 661.69


  6%|▋         | 6396/100000 [2:09:17<29:28:39,  1.13s/it]

62810 episode score is 693.65


  6%|▋         | 6397/100000 [2:09:19<29:18:34,  1.13s/it]

62820 episode score is 706.12


  6%|▋         | 6398/100000 [2:09:20<29:08:58,  1.12s/it]

62830 episode score is 702.19


  6%|▋         | 6399/100000 [2:09:21<29:08:52,  1.12s/it]

62840 episode score is 719.18
62850 episode score is 693.29


  6%|▋         | 6400/100000 [2:09:23<36:27:22,  1.40s/it]

Iteration 6400: Average test reward: 678.17


  6%|▋         | 6401/100000 [2:09:24<34:13:00,  1.32s/it]

62860 episode score is 712.24


  6%|▋         | 6402/100000 [2:09:25<33:18:24,  1.28s/it]

62871 episode score is 685.27


  6%|▋         | 6403/100000 [2:09:26<32:23:44,  1.25s/it]

62881 episode score is 735.40


  6%|▋         | 6404/100000 [2:09:27<31:17:13,  1.20s/it]

62891 episode score is 699.58


  6%|▋         | 6405/100000 [2:09:29<30:48:38,  1.19s/it]

62901 episode score is 727.85


  6%|▋         | 6406/100000 [2:09:30<30:46:23,  1.18s/it]

62911 episode score is 756.86


  6%|▋         | 6407/100000 [2:09:31<30:32:51,  1.18s/it]

62921 episode score is 735.38


  6%|▋         | 6408/100000 [2:09:32<30:30:46,  1.17s/it]

62931 episode score is 741.67


  6%|▋         | 6409/100000 [2:09:33<29:52:53,  1.15s/it]

62940 episode score is 776.42


  6%|▋         | 6410/100000 [2:09:34<30:12:17,  1.16s/it]

62950 episode score is 761.25


  6%|▋         | 6411/100000 [2:09:36<30:34:04,  1.18s/it]

62960 episode score is 764.15


  6%|▋         | 6412/100000 [2:09:37<30:01:03,  1.15s/it]

62969 episode score is 793.65


  6%|▋         | 6413/100000 [2:09:38<30:14:47,  1.16s/it]

62979 episode score is 758.33


  6%|▋         | 6414/100000 [2:09:39<30:25:38,  1.17s/it]

62989 episode score is 759.64


  6%|▋         | 6415/100000 [2:09:40<30:29:09,  1.17s/it]

62999 episode score is 751.14


  6%|▋         | 6416/100000 [2:09:41<29:48:53,  1.15s/it]

63008 episode score is 784.15


  6%|▋         | 6417/100000 [2:09:42<30:08:25,  1.16s/it]

63018 episode score is 767.43


  6%|▋         | 6418/100000 [2:09:44<29:37:11,  1.14s/it]

63027 episode score is 775.37


  6%|▋         | 6419/100000 [2:09:45<29:15:29,  1.13s/it]

63036 episode score is 778.66


  6%|▋         | 6420/100000 [2:09:46<29:47:34,  1.15s/it]

63046 episode score is 755.56


  6%|▋         | 6421/100000 [2:09:47<29:55:55,  1.15s/it]

63056 episode score is 736.45


  6%|▋         | 6422/100000 [2:09:48<30:12:53,  1.16s/it]

63066 episode score is 749.32


  6%|▋         | 6423/100000 [2:09:49<30:40:40,  1.18s/it]

63076 episode score is 714.44


  6%|▋         | 6424/100000 [2:09:50<29:56:48,  1.15s/it]

63085 episode score is 766.45


  6%|▋         | 6425/100000 [2:09:52<29:58:55,  1.15s/it]

63094 episode score is 807.38


  6%|▋         | 6426/100000 [2:09:53<30:18:33,  1.17s/it]

63104 episode score is 762.42


  6%|▋         | 6427/100000 [2:09:54<30:06:12,  1.16s/it]

63114 episode score is 729.33


  6%|▋         | 6428/100000 [2:09:55<30:15:45,  1.16s/it]

63124 episode score is 736.39


  6%|▋         | 6429/100000 [2:09:56<30:08:41,  1.16s/it]

63134 episode score is 732.82


  6%|▋         | 6430/100000 [2:09:57<30:05:09,  1.16s/it]

63144 episode score is 738.37


  6%|▋         | 6431/100000 [2:09:59<30:18:30,  1.17s/it]

63154 episode score is 762.66


  6%|▋         | 6432/100000 [2:10:00<29:54:32,  1.15s/it]

63164 episode score is 712.33


  6%|▋         | 6433/100000 [2:10:01<29:55:06,  1.15s/it]

63174 episode score is 747.58


  6%|▋         | 6434/100000 [2:10:02<29:48:51,  1.15s/it]

63184 episode score is 718.66


  6%|▋         | 6435/100000 [2:10:03<29:49:41,  1.15s/it]

63194 episode score is 738.33


  6%|▋         | 6436/100000 [2:10:04<29:42:44,  1.14s/it]

63204 episode score is 727.02


  6%|▋         | 6437/100000 [2:10:06<29:59:30,  1.15s/it]

63214 episode score is 761.75


  6%|▋         | 6438/100000 [2:10:07<29:57:28,  1.15s/it]

63224 episode score is 746.46


  6%|▋         | 6439/100000 [2:10:08<30:01:25,  1.16s/it]

63234 episode score is 751.68


  6%|▋         | 6440/100000 [2:10:09<29:26:38,  1.13s/it]

63243 episode score is 768.75


  6%|▋         | 6441/100000 [2:10:10<29:27:48,  1.13s/it]

63253 episode score is 738.97


  6%|▋         | 6442/100000 [2:10:11<29:00:46,  1.12s/it]

63262 episode score is 773.00


  6%|▋         | 6443/100000 [2:10:12<29:39:47,  1.14s/it]

63272 episode score is 756.15


  6%|▋         | 6444/100000 [2:10:13<29:55:53,  1.15s/it]

63282 episode score is 752.95


  6%|▋         | 6445/100000 [2:10:15<29:44:50,  1.14s/it]

63292 episode score is 731.60


  6%|▋         | 6446/100000 [2:10:16<29:40:13,  1.14s/it]

63302 episode score is 733.55


  6%|▋         | 6447/100000 [2:10:17<29:23:04,  1.13s/it]

63312 episode score is 709.43


  6%|▋         | 6448/100000 [2:10:18<29:14:45,  1.13s/it]

63322 episode score is 713.66


  6%|▋         | 6449/100000 [2:10:19<29:19:30,  1.13s/it]

63332 episode score is 731.71


  6%|▋         | 6450/100000 [2:10:20<29:00:12,  1.12s/it]

63342 episode score is 693.46


  6%|▋         | 6451/100000 [2:10:21<28:36:41,  1.10s/it]

63352 episode score is 691.66


  6%|▋         | 6452/100000 [2:10:22<28:48:37,  1.11s/it]

63362 episode score is 731.48


  6%|▋         | 6453/100000 [2:10:24<29:05:14,  1.12s/it]

63372 episode score is 742.45


  6%|▋         | 6454/100000 [2:10:25<29:05:04,  1.12s/it]

63382 episode score is 723.05


  6%|▋         | 6455/100000 [2:10:26<29:04:17,  1.12s/it]

63392 episode score is 724.43


  6%|▋         | 6456/100000 [2:10:27<29:10:20,  1.12s/it]

63402 episode score is 735.96


  6%|▋         | 6457/100000 [2:10:28<29:15:59,  1.13s/it]

63412 episode score is 719.21


  6%|▋         | 6458/100000 [2:10:29<28:53:53,  1.11s/it]

63421 episode score is 777.55


  6%|▋         | 6459/100000 [2:10:30<29:12:49,  1.12s/it]

63431 episode score is 730.15


  6%|▋         | 6460/100000 [2:10:31<29:21:24,  1.13s/it]

63441 episode score is 719.09


  6%|▋         | 6461/100000 [2:10:33<29:37:25,  1.14s/it]

63451 episode score is 748.84


  6%|▋         | 6462/100000 [2:10:34<29:35:34,  1.14s/it]

63461 episode score is 730.08


  6%|▋         | 6463/100000 [2:10:35<29:25:16,  1.13s/it]

63471 episode score is 715.34


  6%|▋         | 6464/100000 [2:10:36<29:28:50,  1.13s/it]

63481 episode score is 727.58


  6%|▋         | 6465/100000 [2:10:37<29:37:58,  1.14s/it]

63491 episode score is 740.99


  6%|▋         | 6466/100000 [2:10:38<29:21:42,  1.13s/it]

63501 episode score is 713.35


  6%|▋         | 6467/100000 [2:10:39<29:28:47,  1.13s/it]

63511 episode score is 738.10


  6%|▋         | 6468/100000 [2:10:41<29:21:57,  1.13s/it]

63521 episode score is 718.58


  6%|▋         | 6469/100000 [2:10:42<29:05:04,  1.12s/it]

63531 episode score is 699.12


  6%|▋         | 6470/100000 [2:10:43<29:06:38,  1.12s/it]

63541 episode score is 720.04


  6%|▋         | 6471/100000 [2:10:44<29:27:57,  1.13s/it]

63551 episode score is 749.01


  6%|▋         | 6472/100000 [2:10:45<29:42:43,  1.14s/it]

63561 episode score is 750.57


  6%|▋         | 6473/100000 [2:10:46<29:37:04,  1.14s/it]

63571 episode score is 725.76


  6%|▋         | 6474/100000 [2:10:47<30:42:40,  1.18s/it]

63581 episode score is 747.43


  6%|▋         | 6475/100000 [2:10:49<30:25:08,  1.17s/it]

63591 episode score is 732.46


  6%|▋         | 6476/100000 [2:10:50<30:29:53,  1.17s/it]

63601 episode score is 757.58


  6%|▋         | 6477/100000 [2:10:51<29:55:20,  1.15s/it]

63611 episode score is 706.06


  6%|▋         | 6478/100000 [2:10:52<29:36:21,  1.14s/it]

63621 episode score is 711.10


  6%|▋         | 6479/100000 [2:10:53<29:16:09,  1.13s/it]

63631 episode score is 699.13


  6%|▋         | 6480/100000 [2:10:54<29:25:56,  1.13s/it]

63641 episode score is 736.82


  6%|▋         | 6481/100000 [2:10:55<29:11:33,  1.12s/it]

63651 episode score is 702.10


  6%|▋         | 6482/100000 [2:10:56<29:15:13,  1.13s/it]

63661 episode score is 721.29


  6%|▋         | 6483/100000 [2:10:58<29:05:59,  1.12s/it]

63671 episode score is 714.15


  6%|▋         | 6484/100000 [2:10:59<28:53:38,  1.11s/it]

63681 episode score is 695.94


  6%|▋         | 6485/100000 [2:11:00<28:58:56,  1.12s/it]

63691 episode score is 726.16


  6%|▋         | 6486/100000 [2:11:01<29:08:14,  1.12s/it]

63701 episode score is 725.43


  6%|▋         | 6487/100000 [2:11:02<29:25:10,  1.13s/it]

63711 episode score is 734.94


  6%|▋         | 6488/100000 [2:11:03<29:20:16,  1.13s/it]

63721 episode score is 722.68


  6%|▋         | 6489/100000 [2:11:04<29:29:10,  1.14s/it]

63731 episode score is 740.77


  6%|▋         | 6490/100000 [2:11:05<29:22:38,  1.13s/it]

63741 episode score is 719.11


  6%|▋         | 6491/100000 [2:11:07<29:17:44,  1.13s/it]

63751 episode score is 723.47


  6%|▋         | 6492/100000 [2:11:08<29:27:47,  1.13s/it]

63761 episode score is 742.64


  6%|▋         | 6493/100000 [2:11:09<29:04:43,  1.12s/it]

63771 episode score is 698.93


  6%|▋         | 6494/100000 [2:11:10<29:35:18,  1.14s/it]

63782 episode score is 691.04


  6%|▋         | 6495/100000 [2:11:11<29:14:58,  1.13s/it]

63793 episode score is 622.97


  6%|▋         | 6496/100000 [2:11:12<29:41:58,  1.14s/it]

63804 episode score is 683.53


  6%|▋         | 6497/100000 [2:11:13<30:03:03,  1.16s/it]

63815 episode score is 691.13


  6%|▋         | 6498/100000 [2:11:15<29:44:53,  1.15s/it]

63825 episode score is 727.38


  6%|▋         | 6499/100000 [2:11:16<29:27:03,  1.13s/it]

63835 episode score is 710.13
63845 episode score is 737.74


  6%|▋         | 6500/100000 [2:11:18<37:38:23,  1.45s/it]

Iteration 6500: Average test reward: 728.05


  7%|▋         | 6501/100000 [2:11:19<35:25:59,  1.36s/it]

63855 episode score is 745.24


  7%|▋         | 6502/100000 [2:11:20<33:54:01,  1.31s/it]

63865 episode score is 750.12


  7%|▋         | 6503/100000 [2:11:21<32:47:31,  1.26s/it]

63875 episode score is 753.90


  7%|▋         | 6504/100000 [2:11:23<31:58:31,  1.23s/it]

63885 episode score is 745.22


  7%|▋         | 6505/100000 [2:11:24<31:24:05,  1.21s/it]

63895 episode score is 747.07


  7%|▋         | 6506/100000 [2:11:25<31:00:16,  1.19s/it]

63905 episode score is 744.51


  7%|▋         | 6507/100000 [2:11:26<30:38:25,  1.18s/it]

63915 episode score is 744.47


  7%|▋         | 6508/100000 [2:11:27<29:59:24,  1.15s/it]

63925 episode score is 707.74


  7%|▋         | 6509/100000 [2:11:28<29:23:51,  1.13s/it]

63935 episode score is 691.66


  7%|▋         | 6510/100000 [2:11:29<29:35:20,  1.14s/it]

63946 episode score is 664.79


  7%|▋         | 6511/100000 [2:11:31<29:51:39,  1.15s/it]

63957 episode score is 674.27


  7%|▋         | 6512/100000 [2:11:32<29:18:35,  1.13s/it]

63967 episode score is 686.65


  7%|▋         | 6513/100000 [2:11:33<29:51:16,  1.15s/it]

63978 episode score is 677.30


  7%|▋         | 6514/100000 [2:11:34<29:31:20,  1.14s/it]

63988 episode score is 710.13


  7%|▋         | 6515/100000 [2:11:35<29:48:16,  1.15s/it]

63999 episode score is 677.64


  7%|▋         | 6516/100000 [2:11:36<29:30:26,  1.14s/it]

64009 episode score is 707.13


  7%|▋         | 6517/100000 [2:11:37<29:18:14,  1.13s/it]

64019 episode score is 706.28


  7%|▋         | 6518/100000 [2:11:38<29:17:02,  1.13s/it]

64029 episode score is 725.41


  7%|▋         | 6519/100000 [2:11:40<29:41:05,  1.14s/it]

64039 episode score is 749.47


  7%|▋         | 6520/100000 [2:11:41<29:39:54,  1.14s/it]

64049 episode score is 730.14


  7%|▋         | 6521/100000 [2:11:42<30:05:36,  1.16s/it]

64059 episode score is 771.14


  7%|▋         | 6522/100000 [2:11:43<30:30:39,  1.18s/it]

64069 episode score is 715.30


  7%|▋         | 6523/100000 [2:11:44<29:48:21,  1.15s/it]

64078 episode score is 774.51


  7%|▋         | 6524/100000 [2:11:45<29:41:35,  1.14s/it]

64088 episode score is 727.66


  7%|▋         | 6525/100000 [2:11:47<29:38:20,  1.14s/it]

64098 episode score is 728.24


  7%|▋         | 6526/100000 [2:11:48<29:38:28,  1.14s/it]

64108 episode score is 727.50


  7%|▋         | 6527/100000 [2:11:49<29:41:29,  1.14s/it]

64118 episode score is 724.52


  7%|▋         | 6528/100000 [2:11:50<29:32:51,  1.14s/it]

64128 episode score is 723.31


  7%|▋         | 6529/100000 [2:11:51<29:39:06,  1.14s/it]

64138 episode score is 740.54


  7%|▋         | 6530/100000 [2:11:52<29:38:28,  1.14s/it]

64147 episode score is 817.06


  7%|▋         | 6531/100000 [2:11:53<30:01:50,  1.16s/it]

64157 episode score is 761.19


  7%|▋         | 6532/100000 [2:11:55<29:50:31,  1.15s/it]

64167 episode score is 725.36


  7%|▋         | 6533/100000 [2:11:56<29:37:15,  1.14s/it]

64177 episode score is 714.95


  7%|▋         | 6534/100000 [2:11:57<29:47:48,  1.15s/it]

64187 episode score is 744.31


  7%|▋         | 6535/100000 [2:11:58<29:41:05,  1.14s/it]

64197 episode score is 729.44


  7%|▋         | 6536/100000 [2:11:59<29:48:18,  1.15s/it]

64207 episode score is 746.76


  7%|▋         | 6537/100000 [2:12:00<29:40:11,  1.14s/it]

64217 episode score is 721.35


  7%|▋         | 6538/100000 [2:12:01<29:39:11,  1.14s/it]

64227 episode score is 735.64


  7%|▋         | 6539/100000 [2:12:03<29:42:03,  1.14s/it]

64237 episode score is 732.81


  7%|▋         | 6540/100000 [2:12:04<29:55:01,  1.15s/it]

64247 episode score is 751.43


  7%|▋         | 6541/100000 [2:12:05<29:57:39,  1.15s/it]

64257 episode score is 752.71


  7%|▋         | 6542/100000 [2:12:06<29:50:22,  1.15s/it]

64267 episode score is 731.86


  7%|▋         | 6543/100000 [2:12:07<29:25:20,  1.13s/it]

64277 episode score is 704.38


  7%|▋         | 6544/100000 [2:12:08<29:35:46,  1.14s/it]

64287 episode score is 747.55


  7%|▋         | 6545/100000 [2:12:09<29:58:30,  1.15s/it]

64297 episode score is 764.63


  7%|▋         | 6546/100000 [2:12:11<30:00:57,  1.16s/it]

64307 episode score is 751.83


  7%|▋         | 6547/100000 [2:12:12<30:08:56,  1.16s/it]

64317 episode score is 752.94


  7%|▋         | 6548/100000 [2:12:13<30:17:35,  1.17s/it]

64327 episode score is 758.54


  7%|▋         | 6549/100000 [2:12:14<29:37:18,  1.14s/it]

64336 episode score is 776.75


  7%|▋         | 6550/100000 [2:12:15<29:24:55,  1.13s/it]

64345 episode score is 793.95


  7%|▋         | 6551/100000 [2:12:16<29:19:13,  1.13s/it]

64355 episode score is 712.06


  7%|▋         | 6552/100000 [2:12:17<29:36:31,  1.14s/it]

64365 episode score is 741.04


  7%|▋         | 6553/100000 [2:12:19<29:48:21,  1.15s/it]

64375 episode score is 754.57


  7%|▋         | 6554/100000 [2:12:20<30:09:06,  1.16s/it]

64385 episode score is 761.54


  7%|▋         | 6555/100000 [2:12:21<30:03:59,  1.16s/it]

64395 episode score is 736.95


  7%|▋         | 6556/100000 [2:12:22<30:19:21,  1.17s/it]

64405 episode score is 773.86


  7%|▋         | 6557/100000 [2:12:23<30:20:13,  1.17s/it]

64415 episode score is 750.89


  7%|▋         | 6558/100000 [2:12:25<30:28:53,  1.17s/it]

64425 episode score is 770.36


  7%|▋         | 6559/100000 [2:12:26<29:44:12,  1.15s/it]

64434 episode score is 772.84


  7%|▋         | 6560/100000 [2:12:27<29:47:59,  1.15s/it]

64443 episode score is 831.54


  7%|▋         | 6561/100000 [2:12:28<30:17:37,  1.17s/it]

64453 episode score is 783.61


  7%|▋         | 6562/100000 [2:12:29<30:26:21,  1.17s/it]

64463 episode score is 769.73


  7%|▋         | 6563/100000 [2:12:30<29:45:15,  1.15s/it]

64472 episode score is 775.64


  7%|▋         | 6564/100000 [2:12:31<29:29:10,  1.14s/it]

64481 episode score is 804.11


  7%|▋         | 6565/100000 [2:12:32<29:23:10,  1.13s/it]

64490 episode score is 788.55


  7%|▋         | 6566/100000 [2:12:34<29:28:16,  1.14s/it]

64500 episode score is 727.79


  7%|▋         | 6567/100000 [2:12:35<29:43:56,  1.15s/it]

64510 episode score is 749.51


  7%|▋         | 6568/100000 [2:12:36<29:15:43,  1.13s/it]

64519 episode score is 778.90


  7%|▋         | 6569/100000 [2:12:37<29:03:09,  1.12s/it]

64528 episode score is 790.18


  7%|▋         | 6570/100000 [2:12:38<29:42:09,  1.14s/it]

64538 episode score is 778.88


  7%|▋         | 6571/100000 [2:12:39<29:17:58,  1.13s/it]

64547 episode score is 794.40


  7%|▋         | 6572/100000 [2:12:40<29:55:22,  1.15s/it]

64556 episode score is 887.43


  7%|▋         | 6573/100000 [2:12:42<29:46:22,  1.15s/it]

64565 episode score is 823.65


  7%|▋         | 6574/100000 [2:12:43<29:53:32,  1.15s/it]

64574 episode score is 839.10


  7%|▋         | 6575/100000 [2:12:44<29:44:44,  1.15s/it]

64583 episode score is 826.37


  7%|▋         | 6576/100000 [2:12:45<29:32:12,  1.14s/it]

64592 episode score is 818.55


  7%|▋         | 6577/100000 [2:12:46<29:46:06,  1.15s/it]

64601 episode score is 845.94


  7%|▋         | 6578/100000 [2:12:47<29:56:42,  1.15s/it]

64610 episode score is 836.25


  7%|▋         | 6579/100000 [2:12:49<30:05:34,  1.16s/it]

64619 episode score is 842.99


  7%|▋         | 6580/100000 [2:12:50<30:07:30,  1.16s/it]

64628 episode score is 846.99


  7%|▋         | 6581/100000 [2:12:51<29:49:38,  1.15s/it]

64637 episode score is 811.35


  7%|▋         | 6582/100000 [2:12:52<29:57:15,  1.15s/it]

64647 episode score is 751.24


  7%|▋         | 6583/100000 [2:12:53<29:55:17,  1.15s/it]

64656 episode score is 831.18


  7%|▋         | 6584/100000 [2:12:54<29:25:04,  1.13s/it]

64665 episode score is 785.96


  7%|▋         | 6585/100000 [2:12:55<29:18:13,  1.13s/it]

64674 episode score is 808.99


  7%|▋         | 6586/100000 [2:12:56<28:57:55,  1.12s/it]

64683 episode score is 782.38


  7%|▋         | 6587/100000 [2:12:57<28:41:10,  1.11s/it]

64692 episode score is 785.31


  7%|▋         | 6588/100000 [2:12:59<29:05:36,  1.12s/it]

64702 episode score is 745.11


  7%|▋         | 6589/100000 [2:13:00<29:34:53,  1.14s/it]

64712 episode score is 765.35


  7%|▋         | 6590/100000 [2:13:01<29:45:01,  1.15s/it]

64721 episode score is 778.66


  7%|▋         | 6591/100000 [2:13:02<29:48:50,  1.15s/it]

64731 episode score is 737.52


  7%|▋         | 6592/100000 [2:13:03<29:47:23,  1.15s/it]

64741 episode score is 734.60


  7%|▋         | 6593/100000 [2:13:04<29:33:40,  1.14s/it]

64751 episode score is 721.36


  7%|▋         | 6594/100000 [2:13:06<29:46:16,  1.15s/it]

64761 episode score is 752.63


  7%|▋         | 6595/100000 [2:13:07<29:48:19,  1.15s/it]

64771 episode score is 734.33


  7%|▋         | 6596/100000 [2:13:08<29:27:30,  1.14s/it]

64781 episode score is 707.78


  7%|▋         | 6597/100000 [2:13:09<29:28:01,  1.14s/it]

64791 episode score is 734.87


  7%|▋         | 6598/100000 [2:13:10<29:24:47,  1.13s/it]

64801 episode score is 723.18


  7%|▋         | 6599/100000 [2:13:11<29:10:50,  1.12s/it]

64811 episode score is 704.84
64821 episode score is 737.88


  7%|▋         | 6600/100000 [2:13:13<37:18:39,  1.44s/it]

Iteration 6600: Average test reward: 712.60


  7%|▋         | 6601/100000 [2:13:15<34:52:11,  1.34s/it]

64831 episode score is 723.95


  7%|▋         | 6602/100000 [2:13:16<33:08:01,  1.28s/it]

64841 episode score is 713.46


  7%|▋         | 6603/100000 [2:13:17<32:03:08,  1.24s/it]

64851 episode score is 732.25


  7%|▋         | 6604/100000 [2:13:18<31:40:10,  1.22s/it]

64861 episode score is 763.15


  7%|▋         | 6605/100000 [2:13:19<31:21:25,  1.21s/it]

64871 episode score is 753.24


  7%|▋         | 6606/100000 [2:13:20<31:00:02,  1.19s/it]

64881 episode score is 754.58


  7%|▋         | 6607/100000 [2:13:21<30:29:27,  1.18s/it]

64891 episode score is 721.07


  7%|▋         | 6608/100000 [2:13:23<30:14:27,  1.17s/it]

64901 episode score is 727.00


  7%|▋         | 6609/100000 [2:13:24<30:28:22,  1.17s/it]

64911 episode score is 774.30


  7%|▋         | 6610/100000 [2:13:25<30:05:51,  1.16s/it]

64921 episode score is 721.44


  7%|▋         | 6611/100000 [2:13:26<30:10:22,  1.16s/it]

64931 episode score is 757.52


  7%|▋         | 6612/100000 [2:13:27<30:19:15,  1.17s/it]

64941 episode score is 757.56


  7%|▋         | 6613/100000 [2:13:28<30:13:25,  1.17s/it]

64950 episode score is 848.93


  7%|▋         | 6614/100000 [2:13:30<29:55:01,  1.15s/it]

64959 episode score is 812.50


  7%|▋         | 6615/100000 [2:13:31<29:25:38,  1.13s/it]

64968 episode score is 787.16


  7%|▋         | 6616/100000 [2:13:32<29:23:50,  1.13s/it]

64977 episode score is 815.70


  7%|▋         | 6617/100000 [2:13:33<29:51:14,  1.15s/it]

64987 episode score is 758.85


  7%|▋         | 6618/100000 [2:13:34<29:29:54,  1.14s/it]

64997 episode score is 710.05


  7%|▋         | 6619/100000 [2:13:35<29:43:34,  1.15s/it]

65007 episode score is 748.56


  7%|▋         | 6620/100000 [2:13:36<29:38:43,  1.14s/it]

65017 episode score is 723.26


  7%|▋         | 6621/100000 [2:13:37<29:35:07,  1.14s/it]

65027 episode score is 715.08


  7%|▋         | 6622/100000 [2:13:39<29:28:23,  1.14s/it]

65037 episode score is 724.71


  7%|▋         | 6623/100000 [2:13:40<29:31:24,  1.14s/it]

65047 episode score is 740.58


  7%|▋         | 6624/100000 [2:13:41<29:23:24,  1.13s/it]

65057 episode score is 724.75


  7%|▋         | 6625/100000 [2:13:42<29:34:42,  1.14s/it]

65067 episode score is 754.05


  7%|▋         | 6626/100000 [2:13:43<29:26:24,  1.14s/it]

65077 episode score is 724.31


  7%|▋         | 6627/100000 [2:13:44<29:24:19,  1.13s/it]

65087 episode score is 728.15


  7%|▋         | 6628/100000 [2:13:45<29:28:54,  1.14s/it]

65097 episode score is 719.00


  7%|▋         | 6629/100000 [2:13:47<29:22:52,  1.13s/it]

65107 episode score is 715.30


  7%|▋         | 6630/100000 [2:13:48<29:24:51,  1.13s/it]

65117 episode score is 717.39


  7%|▋         | 6631/100000 [2:13:49<29:43:03,  1.15s/it]

65128 episode score is 673.31


  7%|▋         | 6632/100000 [2:13:50<29:29:05,  1.14s/it]

65138 episode score is 713.35


  7%|▋         | 6633/100000 [2:13:51<29:26:47,  1.14s/it]

65148 episode score is 731.88


  7%|▋         | 6634/100000 [2:13:52<29:11:55,  1.13s/it]

65158 episode score is 703.59


  7%|▋         | 6635/100000 [2:13:53<29:09:39,  1.12s/it]

65168 episode score is 710.11


  7%|▋         | 6636/100000 [2:13:54<29:00:23,  1.12s/it]

65178 episode score is 709.21


  7%|▋         | 6637/100000 [2:13:56<28:49:25,  1.11s/it]

65188 episode score is 699.54


  7%|▋         | 6638/100000 [2:13:57<29:16:44,  1.13s/it]

65199 episode score is 672.64


  7%|▋         | 6639/100000 [2:13:58<29:19:06,  1.13s/it]

65210 episode score is 653.52


  7%|▋         | 6640/100000 [2:13:59<29:24:00,  1.13s/it]

65221 episode score is 656.59


  7%|▋         | 6641/100000 [2:14:00<28:57:59,  1.12s/it]

65231 episode score is 682.74


  7%|▋         | 6642/100000 [2:14:01<29:25:14,  1.13s/it]

65242 episode score is 670.90


  7%|▋         | 6643/100000 [2:14:02<29:16:49,  1.13s/it]

65252 episode score is 707.63


  7%|▋         | 6644/100000 [2:14:04<29:30:10,  1.14s/it]

65263 episode score is 666.48


  7%|▋         | 6645/100000 [2:14:05<29:50:00,  1.15s/it]

65274 episode score is 689.69


  7%|▋         | 6646/100000 [2:14:06<29:37:12,  1.14s/it]

65284 episode score is 709.58


  7%|▋         | 6647/100000 [2:14:07<29:24:53,  1.13s/it]

65294 episode score is 714.67


  7%|▋         | 6648/100000 [2:14:08<29:19:16,  1.13s/it]

65304 episode score is 728.09


  7%|▋         | 6649/100000 [2:14:09<29:28:58,  1.14s/it]

65314 episode score is 743.12


  7%|▋         | 6650/100000 [2:14:10<29:04:57,  1.12s/it]

65324 episode score is 690.93


  7%|▋         | 6651/100000 [2:14:11<28:48:31,  1.11s/it]

65334 episode score is 699.88


  7%|▋         | 6652/100000 [2:14:12<28:42:20,  1.11s/it]

65344 episode score is 705.18


  7%|▋         | 6653/100000 [2:14:14<29:20:45,  1.13s/it]

65354 episode score is 702.89


  7%|▋         | 6654/100000 [2:14:15<29:43:52,  1.15s/it]

65365 episode score is 683.78


  7%|▋         | 6655/100000 [2:14:16<29:29:17,  1.14s/it]

65375 episode score is 716.17


  7%|▋         | 6656/100000 [2:14:17<30:02:14,  1.16s/it]

65385 episode score is 762.23


  7%|▋         | 6657/100000 [2:14:18<30:01:23,  1.16s/it]

65396 episode score is 668.44


  7%|▋         | 6658/100000 [2:14:19<29:32:36,  1.14s/it]

65406 episode score is 704.77


  7%|▋         | 6659/100000 [2:14:21<29:13:14,  1.13s/it]

65416 episode score is 708.74


  7%|▋         | 6660/100000 [2:14:22<29:09:52,  1.12s/it]

65425 episode score is 820.84


  7%|▋         | 6661/100000 [2:14:23<28:56:04,  1.12s/it]

65434 episode score is 789.25


  7%|▋         | 6662/100000 [2:14:24<28:49:42,  1.11s/it]

65443 episode score is 799.01


  7%|▋         | 6663/100000 [2:14:25<29:14:14,  1.13s/it]

65453 episode score is 757.17


  7%|▋         | 6664/100000 [2:14:26<29:11:34,  1.13s/it]

65462 episode score is 819.98


  7%|▋         | 6665/100000 [2:14:27<29:30:52,  1.14s/it]

65472 episode score is 752.91


  7%|▋         | 6666/100000 [2:14:28<29:07:41,  1.12s/it]

65481 episode score is 796.30


  7%|▋         | 6667/100000 [2:14:30<29:10:47,  1.13s/it]

65490 episode score is 824.77


  7%|▋         | 6668/100000 [2:14:31<29:50:50,  1.15s/it]

65500 episode score is 793.80


  7%|▋         | 6669/100000 [2:14:32<29:28:28,  1.14s/it]

65509 episode score is 791.66


  7%|▋         | 6670/100000 [2:14:33<29:28:47,  1.14s/it]

65518 episode score is 822.35


  7%|▋         | 6671/100000 [2:14:34<29:22:07,  1.13s/it]

65528 episode score is 728.79


  7%|▋         | 6672/100000 [2:14:35<29:09:45,  1.12s/it]

65538 episode score is 714.11


  7%|▋         | 6673/100000 [2:14:36<29:31:22,  1.14s/it]

65548 episode score is 762.66


  7%|▋         | 6674/100000 [2:14:38<29:58:06,  1.16s/it]

65558 episode score is 775.57


  7%|▋         | 6675/100000 [2:14:39<29:57:14,  1.16s/it]

65568 episode score is 745.63


  7%|▋         | 6676/100000 [2:14:40<29:42:42,  1.15s/it]

65578 episode score is 718.80


  7%|▋         | 6677/100000 [2:14:41<29:50:12,  1.15s/it]

65588 episode score is 736.51


  7%|▋         | 6678/100000 [2:14:42<29:43:39,  1.15s/it]

65598 episode score is 725.23


  7%|▋         | 6679/100000 [2:14:43<29:38:52,  1.14s/it]

65608 episode score is 734.08


  7%|▋         | 6680/100000 [2:14:44<29:45:14,  1.15s/it]

65618 episode score is 752.90


  7%|▋         | 6681/100000 [2:14:46<29:55:22,  1.15s/it]

65628 episode score is 765.40


  7%|▋         | 6682/100000 [2:14:47<29:43:12,  1.15s/it]

65637 episode score is 822.14


  7%|▋         | 6683/100000 [2:14:48<29:39:01,  1.14s/it]

65646 episode score is 819.60


  7%|▋         | 6684/100000 [2:14:49<29:50:35,  1.15s/it]

65656 episode score is 756.02


  7%|▋         | 6685/100000 [2:14:50<29:19:36,  1.13s/it]

65665 episode score is 787.99


  7%|▋         | 6686/100000 [2:14:51<29:01:15,  1.12s/it]

65674 episode score is 795.78


  7%|▋         | 6687/100000 [2:14:52<29:02:01,  1.12s/it]

65683 episode score is 813.16


  7%|▋         | 6688/100000 [2:14:54<29:32:16,  1.14s/it]

65692 episode score is 868.45


  7%|▋         | 6689/100000 [2:14:55<29:55:48,  1.15s/it]

65702 episode score is 766.06


  7%|▋         | 6690/100000 [2:14:56<30:05:55,  1.16s/it]

65712 episode score is 751.98


  7%|▋         | 6691/100000 [2:14:57<29:32:04,  1.14s/it]

65721 episode score is 778.82


  7%|▋         | 6692/100000 [2:14:58<29:19:32,  1.13s/it]

65730 episode score is 808.37


  7%|▋         | 6693/100000 [2:14:59<29:35:41,  1.14s/it]

65740 episode score is 752.78


  7%|▋         | 6694/100000 [2:15:00<29:42:01,  1.15s/it]

65749 episode score is 852.97


  7%|▋         | 6695/100000 [2:15:02<30:20:41,  1.17s/it]

65759 episode score is 782.08


  7%|▋         | 6696/100000 [2:15:03<30:09:52,  1.16s/it]

65769 episode score is 731.98


  7%|▋         | 6697/100000 [2:15:04<29:27:39,  1.14s/it]

65778 episode score is 778.38


  7%|▋         | 6698/100000 [2:15:05<28:59:47,  1.12s/it]

65787 episode score is 794.15


  7%|▋         | 6699/100000 [2:15:06<29:10:35,  1.13s/it]

65796 episode score is 848.17
65805 episode score is 795.81


  7%|▋         | 6700/100000 [2:15:08<36:59:54,  1.43s/it]

Iteration 6700: Average test reward: 767.93


  7%|▋         | 6701/100000 [2:15:09<34:38:24,  1.34s/it]

65815 episode score is 731.47


  7%|▋         | 6702/100000 [2:15:11<33:25:23,  1.29s/it]

65825 episode score is 775.68


  7%|▋         | 6703/100000 [2:15:12<32:19:53,  1.25s/it]

65834 episode score is 834.36


  7%|▋         | 6704/100000 [2:15:13<31:03:49,  1.20s/it]

65843 episode score is 786.43


  7%|▋         | 6705/100000 [2:15:14<30:54:46,  1.19s/it]

65853 episode score is 768.87


  7%|▋         | 6706/100000 [2:15:15<30:35:29,  1.18s/it]

65863 episode score is 743.47


  7%|▋         | 6707/100000 [2:15:16<30:08:18,  1.16s/it]

65873 episode score is 721.98


  7%|▋         | 6708/100000 [2:15:17<30:17:06,  1.17s/it]

65883 episode score is 743.79


  7%|▋         | 6709/100000 [2:15:19<30:11:14,  1.16s/it]

65893 episode score is 747.68


  7%|▋         | 6710/100000 [2:15:20<30:13:09,  1.17s/it]

65903 episode score is 759.14


  7%|▋         | 6711/100000 [2:15:21<29:39:00,  1.14s/it]

65912 episode score is 786.13


  7%|▋         | 6712/100000 [2:15:22<29:19:00,  1.13s/it]

65921 episode score is 804.27


  7%|▋         | 6713/100000 [2:15:23<29:07:39,  1.12s/it]

65929 episode score is 917.39


  7%|▋         | 6714/100000 [2:15:24<29:29:23,  1.14s/it]

65938 episode score is 868.51


  7%|▋         | 6715/100000 [2:15:25<29:46:50,  1.15s/it]

65946 episode score is 971.87


  7%|▋         | 6716/100000 [2:15:27<30:13:32,  1.17s/it]

65955 episode score is 895.76


  7%|▋         | 6717/100000 [2:15:28<30:50:54,  1.19s/it]

65963 episode score is 939.11


  7%|▋         | 6718/100000 [2:15:29<30:33:15,  1.18s/it]

65972 episode score is 822.95


  7%|▋         | 6719/100000 [2:15:30<30:42:17,  1.18s/it]

65981 episode score is 886.82


  7%|▋         | 6720/100000 [2:15:31<30:32:03,  1.18s/it]

65990 episode score is 846.80


  7%|▋         | 6721/100000 [2:15:33<30:49:15,  1.19s/it]

65999 episode score is 889.81


  7%|▋         | 6722/100000 [2:15:34<30:25:50,  1.17s/it]

66008 episode score is 829.28


  7%|▋         | 6723/100000 [2:15:35<30:14:33,  1.17s/it]

66017 episode score is 831.95


  7%|▋         | 6724/100000 [2:15:36<29:54:59,  1.15s/it]

66026 episode score is 820.77


  7%|▋         | 6725/100000 [2:15:37<29:52:18,  1.15s/it]

66035 episode score is 838.29


  7%|▋         | 6726/100000 [2:15:38<30:13:12,  1.17s/it]

66045 episode score is 767.84


  7%|▋         | 6727/100000 [2:15:39<29:37:41,  1.14s/it]

66054 episode score is 773.38


  7%|▋         | 6728/100000 [2:15:40<29:14:40,  1.13s/it]

66063 episode score is 785.98


  7%|▋         | 6729/100000 [2:15:42<29:30:55,  1.14s/it]

66073 episode score is 752.50


  7%|▋         | 6730/100000 [2:15:43<29:53:33,  1.15s/it]

66083 episode score is 758.46


  7%|▋         | 6731/100000 [2:15:44<30:05:05,  1.16s/it]

66093 episode score is 760.62


  7%|▋         | 6732/100000 [2:15:45<30:04:27,  1.16s/it]

66103 episode score is 742.26


  7%|▋         | 6733/100000 [2:15:46<29:54:39,  1.15s/it]

66113 episode score is 734.47


  7%|▋         | 6734/100000 [2:15:47<29:59:01,  1.16s/it]

66123 episode score is 740.20


  7%|▋         | 6735/100000 [2:15:49<29:56:42,  1.16s/it]

66134 episode score is 660.98


  7%|▋         | 6736/100000 [2:15:50<29:28:21,  1.14s/it]

66144 episode score is 709.06


  7%|▋         | 6737/100000 [2:15:51<29:16:15,  1.13s/it]

66154 episode score is 721.65


  7%|▋         | 6738/100000 [2:15:52<29:19:13,  1.13s/it]

66164 episode score is 736.84


  7%|▋         | 6739/100000 [2:15:53<29:21:43,  1.13s/it]

66174 episode score is 737.84


  7%|▋         | 6740/100000 [2:15:54<29:20:38,  1.13s/it]

66184 episode score is 722.74


  7%|▋         | 6741/100000 [2:15:55<29:16:59,  1.13s/it]

66194 episode score is 729.18


  7%|▋         | 6742/100000 [2:15:56<28:53:32,  1.12s/it]

66204 episode score is 693.99


  7%|▋         | 6743/100000 [2:15:58<29:15:19,  1.13s/it]

66215 episode score is 668.97


  7%|▋         | 6744/100000 [2:15:59<29:36:05,  1.14s/it]

66226 episode score is 672.67


  7%|▋         | 6745/100000 [2:16:00<29:09:18,  1.13s/it]

66236 episode score is 699.35


  7%|▋         | 6746/100000 [2:16:01<29:29:06,  1.14s/it]

66247 episode score is 673.91


  7%|▋         | 6747/100000 [2:16:02<29:27:46,  1.14s/it]

66257 episode score is 731.28


  7%|▋         | 6748/100000 [2:16:03<29:18:47,  1.13s/it]

66267 episode score is 713.49


  7%|▋         | 6749/100000 [2:16:04<29:48:26,  1.15s/it]

66277 episode score is 768.28


  7%|▋         | 6750/100000 [2:16:06<29:34:44,  1.14s/it]

66287 episode score is 723.45


  7%|▋         | 6751/100000 [2:16:07<29:20:44,  1.13s/it]

66297 episode score is 716.26


  7%|▋         | 6752/100000 [2:16:08<29:17:33,  1.13s/it]

66307 episode score is 724.58


  7%|▋         | 6753/100000 [2:16:09<29:36:54,  1.14s/it]

66318 episode score is 677.50


  7%|▋         | 6754/100000 [2:16:10<29:11:47,  1.13s/it]

66328 episode score is 703.55


  7%|▋         | 6755/100000 [2:16:11<29:32:39,  1.14s/it]

66338 episode score is 760.95


  7%|▋         | 6756/100000 [2:16:12<29:41:49,  1.15s/it]

66348 episode score is 741.66


  7%|▋         | 6757/100000 [2:16:14<30:04:12,  1.16s/it]

66358 episode score is 776.04


  7%|▋         | 6758/100000 [2:16:15<29:59:35,  1.16s/it]

66368 episode score is 740.08


  7%|▋         | 6759/100000 [2:16:16<30:16:37,  1.17s/it]

66378 episode score is 767.21


  7%|▋         | 6760/100000 [2:16:17<29:47:44,  1.15s/it]

66387 episode score is 779.99


  7%|▋         | 6761/100000 [2:16:18<29:29:28,  1.14s/it]

66396 episode score is 789.44


  7%|▋         | 6762/100000 [2:16:19<30:01:24,  1.16s/it]

66406 episode score is 773.55


  7%|▋         | 6763/100000 [2:16:21<30:12:25,  1.17s/it]

66416 episode score is 763.02


  7%|▋         | 6764/100000 [2:16:22<30:12:10,  1.17s/it]

66426 episode score is 752.46


  7%|▋         | 6765/100000 [2:16:23<30:24:33,  1.17s/it]

66436 episode score is 758.85


  7%|▋         | 6766/100000 [2:16:24<30:00:32,  1.16s/it]

66445 episode score is 802.11


  7%|▋         | 6767/100000 [2:16:25<29:33:02,  1.14s/it]

66454 episode score is 778.95


  7%|▋         | 6768/100000 [2:16:26<29:07:07,  1.12s/it]

66463 episode score is 775.16


  7%|▋         | 6769/100000 [2:16:27<28:56:03,  1.12s/it]

66472 episode score is 785.77


  7%|▋         | 6770/100000 [2:16:28<28:41:45,  1.11s/it]

66481 episode score is 768.60


  7%|▋         | 6771/100000 [2:16:30<28:31:07,  1.10s/it]

66490 episode score is 772.24


  7%|▋         | 6772/100000 [2:16:31<28:55:20,  1.12s/it]

66500 episode score is 735.68


  7%|▋         | 6773/100000 [2:16:32<29:18:32,  1.13s/it]

66510 episode score is 739.31


  7%|▋         | 6774/100000 [2:16:33<29:29:34,  1.14s/it]

66520 episode score is 740.98


  7%|▋         | 6775/100000 [2:16:34<30:21:11,  1.17s/it]

66530 episode score is 751.47


  7%|▋         | 6776/100000 [2:16:35<30:06:32,  1.16s/it]

66540 episode score is 735.79


  7%|▋         | 6777/100000 [2:16:36<29:33:16,  1.14s/it]

66550 episode score is 705.54


  7%|▋         | 6778/100000 [2:16:38<29:14:22,  1.13s/it]

66560 episode score is 711.90


  7%|▋         | 6779/100000 [2:16:39<29:03:41,  1.12s/it]

66570 episode score is 710.29


  7%|▋         | 6780/100000 [2:16:40<29:03:25,  1.12s/it]

66580 episode score is 718.08


  7%|▋         | 6781/100000 [2:16:41<28:57:21,  1.12s/it]

66590 episode score is 711.65


  7%|▋         | 6782/100000 [2:16:42<29:22:00,  1.13s/it]

66600 episode score is 755.19


  7%|▋         | 6783/100000 [2:16:43<29:21:33,  1.13s/it]

66610 episode score is 731.86


  7%|▋         | 6784/100000 [2:16:44<29:14:23,  1.13s/it]

66620 episode score is 720.57


  7%|▋         | 6785/100000 [2:16:45<29:16:15,  1.13s/it]

66630 episode score is 721.11


  7%|▋         | 6786/100000 [2:16:47<28:51:46,  1.11s/it]

66640 episode score is 693.00


  7%|▋         | 6787/100000 [2:16:48<28:52:46,  1.12s/it]

66650 episode score is 699.65


  7%|▋         | 6788/100000 [2:16:49<29:09:44,  1.13s/it]

66660 episode score is 728.99


  7%|▋         | 6789/100000 [2:16:50<29:03:44,  1.12s/it]

66670 episode score is 705.19


  7%|▋         | 6790/100000 [2:16:51<29:35:15,  1.14s/it]

66681 episode score is 693.70


  7%|▋         | 6791/100000 [2:16:52<29:22:14,  1.13s/it]

66691 episode score is 717.53


  7%|▋         | 6792/100000 [2:16:53<29:49:46,  1.15s/it]

66702 episode score is 696.27


  7%|▋         | 6793/100000 [2:16:55<29:35:01,  1.14s/it]

66712 episode score is 722.97


  7%|▋         | 6794/100000 [2:16:56<29:20:24,  1.13s/it]

66722 episode score is 709.25


  7%|▋         | 6795/100000 [2:16:57<29:13:10,  1.13s/it]

66732 episode score is 721.28


  7%|▋         | 6796/100000 [2:16:58<29:01:41,  1.12s/it]

66742 episode score is 712.36


  7%|▋         | 6797/100000 [2:16:59<29:02:38,  1.12s/it]

66752 episode score is 723.48


  7%|▋         | 6798/100000 [2:17:00<28:57:31,  1.12s/it]

66762 episode score is 719.35


  7%|▋         | 6799/100000 [2:17:01<28:44:20,  1.11s/it]

66772 episode score is 703.29
66782 episode score is 726.99


  7%|▋         | 6800/100000 [2:17:03<36:28:56,  1.41s/it]

Iteration 6800: Average test reward: 680.34


  7%|▋         | 6801/100000 [2:17:04<33:59:31,  1.31s/it]

66792 episode score is 702.37


  7%|▋         | 6802/100000 [2:17:06<32:21:57,  1.25s/it]

66802 episode score is 713.16


  7%|▋         | 6803/100000 [2:17:07<31:19:39,  1.21s/it]

66812 episode score is 719.59


  7%|▋         | 6804/100000 [2:17:08<30:36:03,  1.18s/it]

66822 episode score is 722.84


  7%|▋         | 6805/100000 [2:17:09<30:03:17,  1.16s/it]

66832 episode score is 709.17


  7%|▋         | 6806/100000 [2:17:10<30:10:35,  1.17s/it]

66843 episode score is 688.38


  7%|▋         | 6807/100000 [2:17:11<30:01:22,  1.16s/it]

66853 episode score is 743.57


  7%|▋         | 6808/100000 [2:17:12<29:27:04,  1.14s/it]

66863 episode score is 698.19


  7%|▋         | 6809/100000 [2:17:13<29:45:46,  1.15s/it]

66874 episode score is 682.41


  7%|▋         | 6810/100000 [2:17:15<29:39:05,  1.15s/it]

66884 episode score is 733.98


  7%|▋         | 6811/100000 [2:17:16<29:14:35,  1.13s/it]

66894 episode score is 705.06


  7%|▋         | 6812/100000 [2:17:17<28:51:57,  1.12s/it]

66904 episode score is 696.05


  7%|▋         | 6813/100000 [2:17:18<28:55:06,  1.12s/it]

66914 episode score is 714.00


  7%|▋         | 6814/100000 [2:17:19<28:51:29,  1.11s/it]

66924 episode score is 701.80


  7%|▋         | 6815/100000 [2:17:20<28:50:35,  1.11s/it]

66934 episode score is 714.90


  7%|▋         | 6816/100000 [2:17:21<28:56:43,  1.12s/it]

66944 episode score is 724.99


  7%|▋         | 6817/100000 [2:17:22<28:59:29,  1.12s/it]

66954 episode score is 726.61


  7%|▋         | 6818/100000 [2:17:23<29:05:29,  1.12s/it]

66964 episode score is 732.05


  7%|▋         | 6819/100000 [2:17:25<29:07:57,  1.13s/it]

66974 episode score is 729.35


  7%|▋         | 6820/100000 [2:17:26<29:07:59,  1.13s/it]

66984 episode score is 725.18


  7%|▋         | 6821/100000 [2:17:27<28:57:56,  1.12s/it]

66994 episode score is 717.43


  7%|▋         | 6822/100000 [2:17:28<29:02:59,  1.12s/it]

67004 episode score is 730.80


  7%|▋         | 6823/100000 [2:17:29<29:33:57,  1.14s/it]

67014 episode score is 710.83


  7%|▋         | 6824/100000 [2:17:30<29:11:20,  1.13s/it]

67024 episode score is 702.62


  7%|▋         | 6825/100000 [2:17:31<29:21:28,  1.13s/it]

67035 episode score is 668.19


  7%|▋         | 6826/100000 [2:17:33<29:43:16,  1.15s/it]

67046 episode score is 677.97


  7%|▋         | 6827/100000 [2:17:34<29:18:50,  1.13s/it]

67056 episode score is 710.89


  7%|▋         | 6828/100000 [2:17:35<29:30:45,  1.14s/it]

67067 episode score is 668.99


  7%|▋         | 6829/100000 [2:17:36<29:25:30,  1.14s/it]

67078 episode score is 645.15


  7%|▋         | 6830/100000 [2:17:37<29:41:00,  1.15s/it]

67089 episode score is 683.66


  7%|▋         | 6831/100000 [2:17:38<29:55:29,  1.16s/it]

67100 episode score is 691.66


  7%|▋         | 6832/100000 [2:17:39<29:30:37,  1.14s/it]

67110 episode score is 705.31


  7%|▋         | 6833/100000 [2:17:41<29:33:58,  1.14s/it]

67121 episode score is 659.81


  7%|▋         | 6834/100000 [2:17:42<29:40:25,  1.15s/it]

67132 episode score is 671.25


  7%|▋         | 6835/100000 [2:17:43<30:03:11,  1.16s/it]

67143 episode score is 685.79


  7%|▋         | 6836/100000 [2:17:44<29:59:17,  1.16s/it]

67154 episode score is 663.79


  7%|▋         | 6837/100000 [2:17:45<29:58:24,  1.16s/it]

67165 episode score is 668.43


  7%|▋         | 6838/100000 [2:17:46<30:05:34,  1.16s/it]

67176 episode score is 680.62


  7%|▋         | 6839/100000 [2:17:48<30:09:47,  1.17s/it]

67187 episode score is 663.80


  7%|▋         | 6840/100000 [2:17:49<30:13:21,  1.17s/it]

67198 episode score is 673.61


  7%|▋         | 6841/100000 [2:17:50<29:47:30,  1.15s/it]

67208 episode score is 702.94


  7%|▋         | 6842/100000 [2:17:51<29:33:40,  1.14s/it]

67219 episode score is 637.97


  7%|▋         | 6843/100000 [2:17:52<29:15:31,  1.13s/it]

67229 episode score is 710.54


  7%|▋         | 6844/100000 [2:17:53<29:11:04,  1.13s/it]

67239 episode score is 723.60


  7%|▋         | 6845/100000 [2:17:54<28:52:02,  1.12s/it]

67249 episode score is 696.07


  7%|▋         | 6846/100000 [2:17:55<29:13:39,  1.13s/it]

67259 episode score is 732.80


  7%|▋         | 6847/100000 [2:17:57<29:00:36,  1.12s/it]

67269 episode score is 703.20


  7%|▋         | 6848/100000 [2:17:58<29:07:30,  1.13s/it]

67279 episode score is 732.91


  7%|▋         | 6849/100000 [2:17:59<29:03:29,  1.12s/it]

67289 episode score is 708.97


  7%|▋         | 6850/100000 [2:18:00<29:17:24,  1.13s/it]

67299 episode score is 739.49


  7%|▋         | 6851/100000 [2:18:01<28:54:27,  1.12s/it]

67309 episode score is 685.50


  7%|▋         | 6852/100000 [2:18:02<28:53:20,  1.12s/it]

67318 episode score is 791.72


  7%|▋         | 6853/100000 [2:18:03<28:48:49,  1.11s/it]

67328 episode score is 707.00


  7%|▋         | 6854/100000 [2:18:04<29:10:20,  1.13s/it]

67337 episode score is 820.05


  7%|▋         | 6855/100000 [2:18:06<29:05:53,  1.12s/it]

67346 episode score is 789.74


  7%|▋         | 6856/100000 [2:18:07<29:31:37,  1.14s/it]

67356 episode score is 758.07


  7%|▋         | 6857/100000 [2:18:08<29:42:00,  1.15s/it]

67365 episode score is 836.50


  7%|▋         | 6858/100000 [2:18:09<29:52:38,  1.15s/it]

67375 episode score is 748.10


  7%|▋         | 6859/100000 [2:18:10<29:17:48,  1.13s/it]

67385 episode score is 688.96


  7%|▋         | 6860/100000 [2:18:11<29:15:57,  1.13s/it]

67395 episode score is 730.74


  7%|▋         | 6861/100000 [2:18:12<29:09:11,  1.13s/it]

67405 episode score is 720.94


  7%|▋         | 6862/100000 [2:18:13<28:58:09,  1.12s/it]

67415 episode score is 714.10


  7%|▋         | 6863/100000 [2:18:15<28:39:16,  1.11s/it]

67425 episode score is 692.09


  7%|▋         | 6864/100000 [2:18:16<28:42:40,  1.11s/it]

67435 episode score is 714.08


  7%|▋         | 6865/100000 [2:18:17<28:34:49,  1.10s/it]

67445 episode score is 707.26


  7%|▋         | 6866/100000 [2:18:18<29:22:47,  1.14s/it]

67456 episode score is 694.52


  7%|▋         | 6867/100000 [2:18:19<29:23:41,  1.14s/it]

67466 episode score is 731.26


  7%|▋         | 6868/100000 [2:18:20<29:12:27,  1.13s/it]

67476 episode score is 719.89


  7%|▋         | 6869/100000 [2:18:21<29:14:29,  1.13s/it]

67486 episode score is 737.92


  7%|▋         | 6870/100000 [2:18:23<29:32:48,  1.14s/it]

67496 episode score is 747.88


  7%|▋         | 6871/100000 [2:18:24<29:32:43,  1.14s/it]

67506 episode score is 738.27


  7%|▋         | 6872/100000 [2:18:25<29:26:25,  1.14s/it]

67516 episode score is 733.14


  7%|▋         | 6873/100000 [2:18:26<29:20:37,  1.13s/it]

67526 episode score is 731.18


  7%|▋         | 6874/100000 [2:18:27<29:25:09,  1.14s/it]

67536 episode score is 737.48


  7%|▋         | 6875/100000 [2:18:28<29:19:52,  1.13s/it]

67546 episode score is 727.46


  7%|▋         | 6876/100000 [2:18:29<29:36:07,  1.14s/it]

67556 episode score is 751.31


  7%|▋         | 6877/100000 [2:18:31<29:44:06,  1.15s/it]

67566 episode score is 755.35


  7%|▋         | 6878/100000 [2:18:32<29:26:22,  1.14s/it]

67576 episode score is 713.80


  7%|▋         | 6879/100000 [2:18:33<29:21:48,  1.14s/it]

67586 episode score is 725.74


  7%|▋         | 6880/100000 [2:18:34<29:15:25,  1.13s/it]

67596 episode score is 731.67


  7%|▋         | 6881/100000 [2:18:35<29:22:13,  1.14s/it]

67606 episode score is 744.68


  7%|▋         | 6882/100000 [2:18:36<29:13:15,  1.13s/it]

67616 episode score is 726.29


  7%|▋         | 6883/100000 [2:18:37<29:09:49,  1.13s/it]

67626 episode score is 732.69


  7%|▋         | 6884/100000 [2:18:38<29:02:54,  1.12s/it]

67636 episode score is 717.14


  7%|▋         | 6885/100000 [2:18:40<29:12:52,  1.13s/it]

67646 episode score is 745.49


  7%|▋         | 6886/100000 [2:18:41<29:03:07,  1.12s/it]

67656 episode score is 703.99


  7%|▋         | 6887/100000 [2:18:42<28:50:45,  1.12s/it]

67666 episode score is 709.30


  7%|▋         | 6888/100000 [2:18:43<29:25:47,  1.14s/it]

67676 episode score is 697.51


  7%|▋         | 6889/100000 [2:18:44<29:30:31,  1.14s/it]

67687 episode score is 662.27


  7%|▋         | 6890/100000 [2:18:45<29:05:20,  1.12s/it]

67697 episode score is 698.87


  7%|▋         | 6891/100000 [2:18:46<28:54:28,  1.12s/it]

67707 episode score is 705.93


  7%|▋         | 6892/100000 [2:18:47<29:41:49,  1.15s/it]

67718 episode score is 693.96


  7%|▋         | 6893/100000 [2:18:49<29:56:52,  1.16s/it]

67729 episode score is 681.55


  7%|▋         | 6894/100000 [2:18:50<29:24:12,  1.14s/it]

67739 episode score is 694.77


  7%|▋         | 6895/100000 [2:18:51<29:41:22,  1.15s/it]

67750 episode score is 681.77


  7%|▋         | 6896/100000 [2:18:52<30:04:55,  1.16s/it]

67761 episode score is 698.78


  7%|▋         | 6897/100000 [2:18:53<30:05:12,  1.16s/it]

67772 episode score is 673.37


  7%|▋         | 6898/100000 [2:18:54<29:36:11,  1.14s/it]

67782 episode score is 710.85


  7%|▋         | 6899/100000 [2:18:56<29:27:48,  1.14s/it]

67792 episode score is 726.47
67802 episode score is 706.01


  7%|▋         | 6900/100000 [2:18:58<36:43:16,  1.42s/it]

Iteration 6900: Average test reward: 717.70


  7%|▋         | 6901/100000 [2:18:59<34:41:15,  1.34s/it]

67812 episode score is 761.81


  7%|▋         | 6902/100000 [2:19:00<32:51:11,  1.27s/it]

67822 episode score is 712.84


  7%|▋         | 6903/100000 [2:19:01<31:36:36,  1.22s/it]

67832 episode score is 724.41


  7%|▋         | 6904/100000 [2:19:02<30:48:13,  1.19s/it]

67842 episode score is 717.90


  7%|▋         | 6905/100000 [2:19:03<30:03:41,  1.16s/it]

67852 episode score is 708.29


  7%|▋         | 6906/100000 [2:19:04<30:01:11,  1.16s/it]

67863 episode score is 678.85


  7%|▋         | 6907/100000 [2:19:05<29:40:57,  1.15s/it]

67873 episode score is 728.04


  7%|▋         | 6908/100000 [2:19:07<29:31:48,  1.14s/it]

67883 episode score is 728.57


  7%|▋         | 6909/100000 [2:19:08<29:19:29,  1.13s/it]

67893 episode score is 718.14


  7%|▋         | 6910/100000 [2:19:09<29:07:20,  1.13s/it]

67903 episode score is 713.50


  7%|▋         | 6911/100000 [2:19:10<28:59:41,  1.12s/it]

67913 episode score is 704.62


  7%|▋         | 6912/100000 [2:19:11<28:58:39,  1.12s/it]

67923 episode score is 716.97


  7%|▋         | 6913/100000 [2:19:12<29:01:49,  1.12s/it]

67933 episode score is 727.30


  7%|▋         | 6914/100000 [2:19:13<29:02:29,  1.12s/it]

67943 episode score is 717.04


  7%|▋         | 6915/100000 [2:19:14<28:49:28,  1.11s/it]

67953 episode score is 702.88


  7%|▋         | 6916/100000 [2:19:15<28:42:29,  1.11s/it]

67963 episode score is 704.96


  7%|▋         | 6917/100000 [2:19:17<29:16:13,  1.13s/it]

67974 episode score is 683.48


  7%|▋         | 6918/100000 [2:19:18<29:02:23,  1.12s/it]

67984 episode score is 704.46


  7%|▋         | 6919/100000 [2:19:19<28:53:30,  1.12s/it]

67994 episode score is 710.79


  7%|▋         | 6920/100000 [2:19:20<28:57:02,  1.12s/it]

68004 episode score is 727.75


  7%|▋         | 6921/100000 [2:19:21<28:59:32,  1.12s/it]

68014 episode score is 729.41


  7%|▋         | 6922/100000 [2:19:22<28:47:13,  1.11s/it]

68024 episode score is 709.16


  7%|▋         | 6923/100000 [2:19:23<28:50:00,  1.12s/it]

68034 episode score is 728.04


  7%|▋         | 6924/100000 [2:19:24<28:47:30,  1.11s/it]

68044 episode score is 725.79


  7%|▋         | 6925/100000 [2:19:26<28:35:08,  1.11s/it]

68054 episode score is 706.73


  7%|▋         | 6926/100000 [2:19:27<28:36:28,  1.11s/it]

68064 episode score is 725.99


  7%|▋         | 6927/100000 [2:19:28<28:51:23,  1.12s/it]

68074 episode score is 738.32


  7%|▋         | 6928/100000 [2:19:29<28:45:32,  1.11s/it]

68084 episode score is 724.58


  7%|▋         | 6929/100000 [2:19:30<28:46:02,  1.11s/it]

68094 episode score is 723.61


  7%|▋         | 6930/100000 [2:19:31<28:48:09,  1.11s/it]

68104 episode score is 727.84


  7%|▋         | 6931/100000 [2:19:32<28:38:45,  1.11s/it]

68114 episode score is 707.62


  7%|▋         | 6932/100000 [2:19:33<28:28:59,  1.10s/it]

68124 episode score is 700.77


  7%|▋         | 6933/100000 [2:19:34<28:19:10,  1.10s/it]

68134 episode score is 709.56


  7%|▋         | 6934/100000 [2:19:35<28:20:29,  1.10s/it]

68144 episode score is 718.68


  7%|▋         | 6935/100000 [2:19:37<28:33:02,  1.10s/it]

68154 episode score is 729.68


  7%|▋         | 6936/100000 [2:19:38<28:31:47,  1.10s/it]

68164 episode score is 715.08


  7%|▋         | 6937/100000 [2:19:39<28:28:14,  1.10s/it]

68174 episode score is 714.37


  7%|▋         | 6938/100000 [2:19:40<29:08:55,  1.13s/it]

68185 episode score is 696.33


  7%|▋         | 6939/100000 [2:19:41<28:55:18,  1.12s/it]

68195 episode score is 715.33


  7%|▋         | 6940/100000 [2:19:42<28:53:36,  1.12s/it]

68205 episode score is 726.14


  7%|▋         | 6941/100000 [2:19:43<28:58:28,  1.12s/it]

68215 episode score is 728.87


  7%|▋         | 6942/100000 [2:19:44<29:16:25,  1.13s/it]

68225 episode score is 754.15


  7%|▋         | 6943/100000 [2:19:46<29:21:08,  1.14s/it]

68235 episode score is 727.79


  7%|▋         | 6944/100000 [2:19:47<29:13:16,  1.13s/it]

68245 episode score is 722.22


  7%|▋         | 6945/100000 [2:19:48<29:18:05,  1.13s/it]

68255 episode score is 727.62


  7%|▋         | 6946/100000 [2:19:49<29:01:11,  1.12s/it]

68265 episode score is 708.09


  7%|▋         | 6947/100000 [2:19:50<29:01:27,  1.12s/it]

68275 episode score is 715.23


  7%|▋         | 6948/100000 [2:19:51<28:48:25,  1.11s/it]

68285 episode score is 705.78


  7%|▋         | 6949/100000 [2:19:52<28:48:49,  1.11s/it]

68295 episode score is 721.22


  7%|▋         | 6950/100000 [2:19:54<29:29:09,  1.14s/it]

68305 episode score is 719.18


  7%|▋         | 6951/100000 [2:19:55<29:15:33,  1.13s/it]

68315 episode score is 712.82


  7%|▋         | 6952/100000 [2:19:56<29:09:17,  1.13s/it]

68325 episode score is 699.53


  7%|▋         | 6953/100000 [2:19:57<29:11:16,  1.13s/it]

68335 episode score is 707.54


  7%|▋         | 6954/100000 [2:19:58<29:11:06,  1.13s/it]

68345 episode score is 740.46


  7%|▋         | 6955/100000 [2:19:59<29:14:50,  1.13s/it]

68355 episode score is 731.35


  7%|▋         | 6956/100000 [2:20:00<28:56:08,  1.12s/it]

68365 episode score is 699.28


  7%|▋         | 6957/100000 [2:20:01<28:48:56,  1.11s/it]

68375 episode score is 714.95


  7%|▋         | 6958/100000 [2:20:02<28:55:32,  1.12s/it]

68385 episode score is 703.76


  7%|▋         | 6959/100000 [2:20:04<28:51:03,  1.12s/it]

68395 episode score is 719.53


  7%|▋         | 6960/100000 [2:20:05<28:59:47,  1.12s/it]

68405 episode score is 738.69


  7%|▋         | 6961/100000 [2:20:06<29:01:35,  1.12s/it]

68415 episode score is 732.09


  7%|▋         | 6962/100000 [2:20:07<28:46:37,  1.11s/it]

68425 episode score is 702.31


  7%|▋         | 6963/100000 [2:20:08<28:38:30,  1.11s/it]

68435 episode score is 708.41


  7%|▋         | 6964/100000 [2:20:09<28:43:51,  1.11s/it]

68445 episode score is 717.87


  7%|▋         | 6965/100000 [2:20:10<29:10:09,  1.13s/it]

68456 episode score is 675.50


  7%|▋         | 6966/100000 [2:20:11<29:02:56,  1.12s/it]

68466 episode score is 700.77


  7%|▋         | 6967/100000 [2:20:13<28:54:20,  1.12s/it]

68476 episode score is 714.58


  7%|▋         | 6968/100000 [2:20:14<29:30:15,  1.14s/it]

68487 episode score is 694.57


  7%|▋         | 6969/100000 [2:20:15<29:39:02,  1.15s/it]

68497 episode score is 744.50


  7%|▋         | 6970/100000 [2:20:16<29:35:29,  1.15s/it]

68507 episode score is 739.59


  7%|▋         | 6971/100000 [2:20:17<29:20:47,  1.14s/it]

68517 episode score is 702.50


  7%|▋         | 6972/100000 [2:20:18<29:17:47,  1.13s/it]

68527 episode score is 733.40


  7%|▋         | 6973/100000 [2:20:19<29:18:32,  1.13s/it]

68537 episode score is 729.92


  7%|▋         | 6974/100000 [2:20:21<29:19:39,  1.13s/it]

68547 episode score is 730.72


  7%|▋         | 6975/100000 [2:20:22<29:33:33,  1.14s/it]

68558 episode score is 678.97


  7%|▋         | 6976/100000 [2:20:23<29:48:55,  1.15s/it]

68569 episode score is 681.10


  7%|▋         | 6977/100000 [2:20:24<29:31:22,  1.14s/it]

68579 episode score is 725.86


  7%|▋         | 6978/100000 [2:20:25<29:16:00,  1.13s/it]

68589 episode score is 716.39


  7%|▋         | 6979/100000 [2:20:26<29:09:34,  1.13s/it]

68599 episode score is 728.93


  7%|▋         | 6980/100000 [2:20:27<28:51:48,  1.12s/it]

68609 episode score is 701.47


  7%|▋         | 6981/100000 [2:20:28<29:19:22,  1.13s/it]

68620 episode score is 684.94


  7%|▋         | 6982/100000 [2:20:30<29:05:45,  1.13s/it]

68630 episode score is 712.97


  7%|▋         | 6983/100000 [2:20:31<29:01:50,  1.12s/it]

68640 episode score is 712.38


  7%|▋         | 6984/100000 [2:20:32<29:06:21,  1.13s/it]

68650 episode score is 731.14


  7%|▋         | 6985/100000 [2:20:33<28:59:23,  1.12s/it]

68660 episode score is 717.84


  7%|▋         | 6986/100000 [2:20:34<28:56:16,  1.12s/it]

68670 episode score is 719.94


  7%|▋         | 6987/100000 [2:20:35<28:41:03,  1.11s/it]

68680 episode score is 702.36


  7%|▋         | 6988/100000 [2:20:36<29:13:52,  1.13s/it]

68691 episode score is 688.62


  7%|▋         | 6989/100000 [2:20:37<28:51:18,  1.12s/it]

68701 episode score is 696.12


  7%|▋         | 6990/100000 [2:20:39<29:19:10,  1.13s/it]

68712 episode score is 686.21


  7%|▋         | 6991/100000 [2:20:40<29:35:22,  1.15s/it]

68723 episode score is 680.76


  7%|▋         | 6992/100000 [2:20:41<29:23:30,  1.14s/it]

68733 episode score is 701.92


  7%|▋         | 6993/100000 [2:20:42<29:01:07,  1.12s/it]

68743 episode score is 696.40


  7%|▋         | 6994/100000 [2:20:43<28:49:57,  1.12s/it]

68753 episode score is 702.74


  7%|▋         | 6995/100000 [2:20:44<29:17:48,  1.13s/it]

68764 episode score is 685.78


  7%|▋         | 6996/100000 [2:20:45<29:36:19,  1.15s/it]

68775 episode score is 683.43


  7%|▋         | 6997/100000 [2:20:47<29:19:56,  1.14s/it]

68785 episode score is 717.63


  7%|▋         | 6998/100000 [2:20:48<30:01:16,  1.16s/it]

68796 episode score is 694.38


  7%|▋         | 6999/100000 [2:20:49<29:31:12,  1.14s/it]

68806 episode score is 702.08
68816 episode score is 720.71


  7%|▋         | 7000/100000 [2:20:51<36:58:28,  1.43s/it]

Iteration 7000: Average test reward: 711.17


  7%|▋         | 7001/100000 [2:20:52<34:37:34,  1.34s/it]

68826 episode score is 726.61


  7%|▋         | 7002/100000 [2:20:53<33:01:51,  1.28s/it]

68836 episode score is 732.99


  7%|▋         | 7003/100000 [2:20:54<31:48:25,  1.23s/it]

68846 episode score is 727.26


  7%|▋         | 7004/100000 [2:20:55<30:53:27,  1.20s/it]

68856 episode score is 712.55


  7%|▋         | 7005/100000 [2:20:57<30:09:58,  1.17s/it]

68866 episode score is 700.57


  7%|▋         | 7006/100000 [2:20:58<29:49:52,  1.15s/it]

68876 episode score is 722.26


  7%|▋         | 7007/100000 [2:20:59<30:22:58,  1.18s/it]

68886 episode score is 727.60


  7%|▋         | 7008/100000 [2:21:00<29:45:18,  1.15s/it]

68896 episode score is 703.17


  7%|▋         | 7009/100000 [2:21:01<29:47:28,  1.15s/it]

68906 episode score is 735.52


  7%|▋         | 7010/100000 [2:21:02<29:33:32,  1.14s/it]

68916 episode score is 722.40


  7%|▋         | 7011/100000 [2:21:03<29:24:00,  1.14s/it]

68926 episode score is 725.95


  7%|▋         | 7012/100000 [2:21:05<29:18:48,  1.13s/it]

68936 episode score is 726.01


  7%|▋         | 7013/100000 [2:21:06<29:18:38,  1.13s/it]

68946 episode score is 731.60


  7%|▋         | 7014/100000 [2:21:07<29:16:06,  1.13s/it]

68956 episode score is 730.87


  7%|▋         | 7015/100000 [2:21:08<29:26:21,  1.14s/it]

68966 episode score is 754.65


  7%|▋         | 7016/100000 [2:21:09<29:21:25,  1.14s/it]

68976 episode score is 735.74


  7%|▋         | 7017/100000 [2:21:10<29:04:22,  1.13s/it]

68986 episode score is 712.00


  7%|▋         | 7018/100000 [2:21:11<28:49:01,  1.12s/it]

68996 episode score is 708.20


  7%|▋         | 7019/100000 [2:21:12<28:53:09,  1.12s/it]

69006 episode score is 724.64


  7%|▋         | 7020/100000 [2:21:14<28:48:39,  1.12s/it]

69016 episode score is 717.16


  7%|▋         | 7021/100000 [2:21:15<28:35:54,  1.11s/it]

69026 episode score is 705.58


  7%|▋         | 7022/100000 [2:21:16<28:56:58,  1.12s/it]

69037 episode score is 664.86


  7%|▋         | 7023/100000 [2:21:17<29:04:13,  1.13s/it]

69047 episode score is 728.19


  7%|▋         | 7024/100000 [2:21:18<29:16:10,  1.13s/it]

69057 episode score is 719.66


  7%|▋         | 7025/100000 [2:21:19<29:18:58,  1.14s/it]

69067 episode score is 734.60


  7%|▋         | 7026/100000 [2:21:20<29:27:31,  1.14s/it]

69077 episode score is 743.62


  7%|▋         | 7027/100000 [2:21:21<29:24:29,  1.14s/it]

69087 episode score is 731.05


  7%|▋         | 7028/100000 [2:21:23<29:10:23,  1.13s/it]

69097 episode score is 714.08


  7%|▋         | 7029/100000 [2:21:24<29:21:29,  1.14s/it]

69107 episode score is 741.68


  7%|▋         | 7030/100000 [2:21:25<29:21:17,  1.14s/it]

69117 episode score is 736.36


  7%|▋         | 7031/100000 [2:21:26<29:23:14,  1.14s/it]

69127 episode score is 744.26


  7%|▋         | 7032/100000 [2:21:27<29:01:57,  1.12s/it]

69137 episode score is 702.59


  7%|▋         | 7033/100000 [2:21:28<28:44:39,  1.11s/it]

69147 episode score is 694.89


  7%|▋         | 7034/100000 [2:21:29<28:36:03,  1.11s/it]

69157 episode score is 701.03


  7%|▋         | 7035/100000 [2:21:30<28:38:59,  1.11s/it]

69167 episode score is 710.34


  7%|▋         | 7036/100000 [2:21:32<28:36:46,  1.11s/it]

69177 episode score is 706.42


  7%|▋         | 7037/100000 [2:21:33<28:49:18,  1.12s/it]

69187 episode score is 728.43


  7%|▋         | 7038/100000 [2:21:34<28:48:25,  1.12s/it]

69197 episode score is 719.10


  7%|▋         | 7039/100000 [2:21:35<28:44:15,  1.11s/it]

69207 episode score is 714.26


  7%|▋         | 7040/100000 [2:21:36<28:51:16,  1.12s/it]

69217 episode score is 729.20


  7%|▋         | 7041/100000 [2:21:37<29:18:44,  1.14s/it]

69228 episode score is 686.74


  7%|▋         | 7042/100000 [2:21:38<29:33:28,  1.14s/it]

69238 episode score is 755.42


  7%|▋         | 7043/100000 [2:21:39<29:37:21,  1.15s/it]

69248 episode score is 742.12


  7%|▋         | 7044/100000 [2:21:41<29:25:50,  1.14s/it]

69258 episode score is 722.60


  7%|▋         | 7045/100000 [2:21:42<29:18:15,  1.13s/it]

69268 episode score is 717.26


  7%|▋         | 7046/100000 [2:21:43<29:08:10,  1.13s/it]

69278 episode score is 717.87


  7%|▋         | 7047/100000 [2:21:44<29:08:06,  1.13s/it]

69288 episode score is 728.22


  7%|▋         | 7048/100000 [2:21:45<28:55:18,  1.12s/it]

69298 episode score is 713.86


  7%|▋         | 7049/100000 [2:21:46<29:19:35,  1.14s/it]

69309 episode score is 681.62


  7%|▋         | 7050/100000 [2:21:47<29:03:34,  1.13s/it]

69319 episode score is 692.44


  7%|▋         | 7051/100000 [2:21:48<28:50:08,  1.12s/it]

69329 episode score is 700.84


  7%|▋         | 7052/100000 [2:21:50<29:03:38,  1.13s/it]

69339 episode score is 735.98


  7%|▋         | 7053/100000 [2:21:51<29:05:52,  1.13s/it]

69349 episode score is 725.92
69359 episode score is 705.79


  7%|▋         | 7055/100000 [2:21:53<29:27:31,  1.14s/it]

69369 episode score is 721.15


  7%|▋         | 7056/100000 [2:21:54<29:23:37,  1.14s/it]

69379 episode score is 735.75


  7%|▋         | 7057/100000 [2:21:55<29:31:37,  1.14s/it]

69389 episode score is 745.21


  7%|▋         | 7058/100000 [2:21:56<29:33:12,  1.14s/it]

69399 episode score is 742.31


  7%|▋         | 7059/100000 [2:21:58<29:44:13,  1.15s/it]

69409 episode score is 738.40


  7%|▋         | 7060/100000 [2:21:59<29:50:07,  1.16s/it]

69419 episode score is 744.54


  7%|▋         | 7061/100000 [2:22:00<29:55:57,  1.16s/it]

69429 episode score is 744.23


  7%|▋         | 7062/100000 [2:22:01<29:55:11,  1.16s/it]

69439 episode score is 740.29


  7%|▋         | 7063/100000 [2:22:02<29:55:27,  1.16s/it]

69449 episode score is 742.31


  7%|▋         | 7064/100000 [2:22:03<29:41:49,  1.15s/it]

69459 episode score is 733.34


  7%|▋         | 7065/100000 [2:22:05<29:42:57,  1.15s/it]

69469 episode score is 750.46


  7%|▋         | 7066/100000 [2:22:06<29:32:19,  1.14s/it]

69479 episode score is 731.12


  7%|▋         | 7067/100000 [2:22:07<29:32:16,  1.14s/it]

69489 episode score is 742.67


  7%|▋         | 7068/100000 [2:22:08<29:31:05,  1.14s/it]

69499 episode score is 737.68


  7%|▋         | 7069/100000 [2:22:09<29:18:25,  1.14s/it]

69509 episode score is 722.20


  7%|▋         | 7070/100000 [2:22:10<29:36:35,  1.15s/it]

69519 episode score is 760.36


  7%|▋         | 7071/100000 [2:22:11<29:34:35,  1.15s/it]

69529 episode score is 736.92


  7%|▋         | 7072/100000 [2:22:13<29:32:25,  1.14s/it]

69539 episode score is 735.36


  7%|▋         | 7073/100000 [2:22:14<29:11:28,  1.13s/it]

69549 episode score is 712.96


  7%|▋         | 7074/100000 [2:22:15<29:41:26,  1.15s/it]

69560 episode score is 688.66


  7%|▋         | 7075/100000 [2:22:16<29:56:23,  1.16s/it]

69571 episode score is 689.51


  7%|▋         | 7076/100000 [2:22:17<29:33:47,  1.15s/it]

69581 episode score is 709.54


  7%|▋         | 7077/100000 [2:22:18<29:34:08,  1.15s/it]

69591 episode score is 723.77


  7%|▋         | 7078/100000 [2:22:19<29:33:48,  1.15s/it]

69601 episode score is 702.63


  7%|▋         | 7079/100000 [2:22:21<29:13:05,  1.13s/it]

69611 episode score is 698.90


  7%|▋         | 7080/100000 [2:22:22<29:00:25,  1.12s/it]

69621 episode score is 696.94


  7%|▋         | 7081/100000 [2:22:23<29:23:08,  1.14s/it]

69632 episode score is 667.21


  7%|▋         | 7082/100000 [2:22:24<29:51:09,  1.16s/it]

69643 episode score is 694.35


  7%|▋         | 7083/100000 [2:22:25<30:10:26,  1.17s/it]

69654 episode score is 690.13


  7%|▋         | 7084/100000 [2:22:26<30:15:15,  1.17s/it]

69665 episode score is 673.13


  7%|▋         | 7085/100000 [2:22:28<29:46:49,  1.15s/it]

69675 episode score is 693.57


  7%|▋         | 7086/100000 [2:22:29<29:20:49,  1.14s/it]

69685 episode score is 695.52


  7%|▋         | 7087/100000 [2:22:30<29:14:20,  1.13s/it]

69695 episode score is 709.43


  7%|▋         | 7088/100000 [2:22:31<29:05:31,  1.13s/it]

69705 episode score is 703.63


  7%|▋         | 7089/100000 [2:22:32<29:15:49,  1.13s/it]

69715 episode score is 719.62


  7%|▋         | 7090/100000 [2:22:33<29:24:25,  1.14s/it]

69725 episode score is 732.50


  7%|▋         | 7091/100000 [2:22:34<29:48:17,  1.15s/it]

69736 episode score is 669.43


  7%|▋         | 7092/100000 [2:22:35<29:26:31,  1.14s/it]

69746 episode score is 701.22


  7%|▋         | 7093/100000 [2:22:37<29:56:40,  1.16s/it]

69757 episode score is 697.41


  7%|▋         | 7094/100000 [2:22:38<30:16:46,  1.17s/it]

69768 episode score is 689.37


  7%|▋         | 7095/100000 [2:22:39<29:47:54,  1.15s/it]

69778 episode score is 704.20


  7%|▋         | 7096/100000 [2:22:40<30:04:07,  1.17s/it]

69789 episode score is 686.54


  7%|▋         | 7097/100000 [2:22:41<29:53:45,  1.16s/it]

69799 episode score is 722.69


  7%|▋         | 7098/100000 [2:22:43<30:18:38,  1.17s/it]

69809 episode score is 703.01


  7%|▋         | 7099/100000 [2:22:44<30:30:49,  1.18s/it]

69820 episode score is 684.89
69830 episode score is 757.27


  7%|▋         | 7100/100000 [2:22:46<38:28:18,  1.49s/it]

Iteration 7100: Average test reward: 727.14


  7%|▋         | 7101/100000 [2:22:47<36:22:58,  1.41s/it]

69841 episode score is 687.03


  7%|▋         | 7102/100000 [2:22:48<34:25:43,  1.33s/it]

69851 episode score is 732.87


  7%|▋         | 7103/100000 [2:22:50<33:19:16,  1.29s/it]

69861 episode score is 754.99


  7%|▋         | 7104/100000 [2:22:51<32:18:58,  1.25s/it]

69871 episode score is 739.35


  7%|▋         | 7105/100000 [2:22:52<31:35:01,  1.22s/it]

69881 episode score is 727.68


  7%|▋         | 7106/100000 [2:22:53<31:18:26,  1.21s/it]

69891 episode score is 749.21


  7%|▋         | 7107/100000 [2:22:54<30:30:31,  1.18s/it]

69901 episode score is 698.34


  7%|▋         | 7108/100000 [2:22:55<30:19:25,  1.18s/it]

69911 episode score is 737.15


  7%|▋         | 7109/100000 [2:22:56<29:50:17,  1.16s/it]

69921 episode score is 713.83


  7%|▋         | 7110/100000 [2:22:58<30:10:36,  1.17s/it]

69932 episode score is 690.39


  7%|▋         | 7111/100000 [2:22:59<29:41:25,  1.15s/it]

69942 episode score is 699.36


  7%|▋         | 7112/100000 [2:23:00<29:52:58,  1.16s/it]

69952 episode score is 749.90


  7%|▋         | 7113/100000 [2:23:01<29:48:41,  1.16s/it]

69962 episode score is 732.73


  7%|▋         | 7114/100000 [2:23:02<29:32:07,  1.14s/it]

69972 episode score is 684.43


  7%|▋         | 7115/100000 [2:23:03<29:20:46,  1.14s/it]

69982 episode score is 704.67


  7%|▋         | 7116/100000 [2:23:04<29:06:57,  1.13s/it]

69992 episode score is 697.57


  7%|▋         | 7117/100000 [2:23:05<28:57:20,  1.12s/it]

70002 episode score is 700.51


  7%|▋         | 7118/100000 [2:23:07<28:50:51,  1.12s/it]

70012 episode score is 703.01


  7%|▋         | 7119/100000 [2:23:08<29:36:02,  1.15s/it]

70023 episode score is 691.11


  7%|▋         | 7120/100000 [2:23:09<29:59:48,  1.16s/it]

70034 episode score is 677.75


  7%|▋         | 7121/100000 [2:23:10<30:01:40,  1.16s/it]

70045 episode score is 665.05


  7%|▋         | 7122/100000 [2:23:11<30:10:08,  1.17s/it]

70056 episode score is 668.95


  7%|▋         | 7123/100000 [2:23:12<29:59:46,  1.16s/it]

70066 episode score is 721.85


  7%|▋         | 7124/100000 [2:23:14<30:20:02,  1.18s/it]

70077 episode score is 685.66


  7%|▋         | 7125/100000 [2:23:15<29:44:04,  1.15s/it]

70087 episode score is 693.51


  7%|▋         | 7126/100000 [2:23:16<29:31:11,  1.14s/it]

70097 episode score is 702.51


  7%|▋         | 7127/100000 [2:23:17<29:45:38,  1.15s/it]

70107 episode score is 725.39


  7%|▋         | 7128/100000 [2:23:18<29:39:59,  1.15s/it]

70117 episode score is 717.11


  7%|▋         | 7129/100000 [2:23:19<29:37:07,  1.15s/it]

70127 episode score is 718.05


  7%|▋         | 7130/100000 [2:23:21<29:38:21,  1.15s/it]

70137 episode score is 724.96


  7%|▋         | 7131/100000 [2:23:22<29:36:51,  1.15s/it]

70147 episode score is 719.87


  7%|▋         | 7132/100000 [2:23:23<30:14:36,  1.17s/it]

70158 episode score is 691.80


  7%|▋         | 7133/100000 [2:23:24<30:07:49,  1.17s/it]

70168 episode score is 732.40


  7%|▋         | 7134/100000 [2:23:25<29:45:31,  1.15s/it]

70178 episode score is 706.40


  7%|▋         | 7135/100000 [2:23:26<29:42:21,  1.15s/it]

70188 episode score is 723.50


  7%|▋         | 7136/100000 [2:23:28<29:46:53,  1.15s/it]

70198 episode score is 737.25


  7%|▋         | 7137/100000 [2:23:29<29:34:26,  1.15s/it]

70208 episode score is 715.21


  7%|▋         | 7138/100000 [2:23:30<29:59:38,  1.16s/it]

70218 episode score is 763.55


  7%|▋         | 7139/100000 [2:23:31<30:10:59,  1.17s/it]

70228 episode score is 744.90


  7%|▋         | 7140/100000 [2:23:32<30:07:11,  1.17s/it]

70238 episode score is 718.44


  7%|▋         | 7141/100000 [2:23:33<30:21:36,  1.18s/it]

70248 episode score is 752.85


  7%|▋         | 7142/100000 [2:23:35<30:41:21,  1.19s/it]

70258 episode score is 777.35


  7%|▋         | 7143/100000 [2:23:36<30:34:13,  1.19s/it]

70268 episode score is 737.78


  7%|▋         | 7144/100000 [2:23:37<30:35:58,  1.19s/it]

70278 episode score is 736.57


  7%|▋         | 7145/100000 [2:23:38<31:01:32,  1.20s/it]

70287 episode score is 878.44


  7%|▋         | 7146/100000 [2:23:39<31:07:44,  1.21s/it]

70296 episode score is 855.12


  7%|▋         | 7147/100000 [2:23:41<30:28:12,  1.18s/it]

70304 episode score is 905.27


  7%|▋         | 7148/100000 [2:23:42<30:39:07,  1.19s/it]

70310 episode score is 1205.92


  7%|▋         | 7149/100000 [2:23:43<30:35:44,  1.19s/it]

70316 episode score is 1188.19


  7%|▋         | 7150/100000 [2:23:44<30:01:30,  1.16s/it]

70321 episode score is 1348.18


  7%|▋         | 7151/100000 [2:23:45<31:04:34,  1.20s/it]

70327 episode score is 1320.53


  7%|▋         | 7152/100000 [2:23:47<31:28:54,  1.22s/it]

70333 episode score is 1273.33


  7%|▋         | 7153/100000 [2:23:48<30:45:13,  1.19s/it]

70338 episode score is 1333.40


  7%|▋         | 7154/100000 [2:23:49<30:37:53,  1.19s/it]

70343 episode score is 1408.38


  7%|▋         | 7155/100000 [2:23:50<30:53:35,  1.20s/it]

70349 episode score is 1249.25


  7%|▋         | 7156/100000 [2:23:51<31:39:29,  1.23s/it]

70356 episode score is 1133.02


  7%|▋         | 7157/100000 [2:23:53<30:53:55,  1.20s/it]

70361 episode score is 1343.36


  7%|▋         | 7158/100000 [2:23:54<30:34:29,  1.19s/it]

70367 episode score is 1182.99


  7%|▋         | 7159/100000 [2:23:55<31:46:44,  1.23s/it]

70374 episode score is 1099.57


  7%|▋         | 7160/100000 [2:23:56<31:31:36,  1.22s/it]

70381 episode score is 1051.63


  7%|▋         | 7161/100000 [2:23:57<31:41:45,  1.23s/it]

70387 episode score is 1265.61


  7%|▋         | 7162/100000 [2:23:59<31:09:24,  1.21s/it]

70393 episode score is 1168.52


  7%|▋         | 7163/100000 [2:24:00<31:04:21,  1.20s/it]

70399 episode score is 1210.95


  7%|▋         | 7164/100000 [2:24:01<30:28:58,  1.18s/it]

70405 episode score is 1151.43


  7%|▋         | 7165/100000 [2:24:02<30:34:01,  1.19s/it]

70411 episode score is 1183.34


  7%|▋         | 7166/100000 [2:24:03<31:09:11,  1.21s/it]

70417 episode score is 1285.40


  7%|▋         | 7167/100000 [2:24:05<31:28:26,  1.22s/it]

70423 episode score is 1271.54


  7%|▋         | 7168/100000 [2:24:06<31:30:11,  1.22s/it]

70429 episode score is 1250.82


  7%|▋         | 7169/100000 [2:24:07<30:51:21,  1.20s/it]

70435 episode score is 1188.66


  7%|▋         | 7170/100000 [2:24:08<30:18:41,  1.18s/it]

70440 episode score is 1349.66


  7%|▋         | 7171/100000 [2:24:09<30:14:49,  1.17s/it]

70448 episode score is 923.15


  7%|▋         | 7172/100000 [2:24:10<30:04:56,  1.17s/it]

70455 episode score is 1017.78


  7%|▋         | 7173/100000 [2:24:12<29:47:47,  1.16s/it]

70462 episode score is 993.64


  7%|▋         | 7174/100000 [2:24:13<30:50:05,  1.20s/it]

70469 episode score is 1127.00


  7%|▋         | 7175/100000 [2:24:14<30:09:45,  1.17s/it]

70476 episode score is 990.66


  7%|▋         | 7176/100000 [2:24:15<29:59:32,  1.16s/it]

70483 episode score is 1056.78


  7%|▋         | 7177/100000 [2:24:16<29:42:35,  1.15s/it]

70490 episode score is 998.80


  7%|▋         | 7178/100000 [2:24:17<29:38:55,  1.15s/it]

70497 episode score is 999.29


  7%|▋         | 7179/100000 [2:24:19<30:24:03,  1.18s/it]

70505 episode score is 973.76


  7%|▋         | 7180/100000 [2:24:20<30:22:13,  1.18s/it]

70512 episode score is 1043.15


  7%|▋         | 7181/100000 [2:24:21<29:51:40,  1.16s/it]

70521 episode score is 766.29


  7%|▋         | 7182/100000 [2:24:22<30:01:34,  1.16s/it]

70530 episode score is 817.98


  7%|▋         | 7183/100000 [2:24:23<29:33:57,  1.15s/it]

70539 episode score is 774.05


  7%|▋         | 7184/100000 [2:24:24<29:19:36,  1.14s/it]

70548 episode score is 776.68


  7%|▋         | 7185/100000 [2:24:25<29:09:33,  1.13s/it]

70557 episode score is 777.66


  7%|▋         | 7186/100000 [2:24:27<29:29:14,  1.14s/it]

70568 episode score is 646.75


  7%|▋         | 7187/100000 [2:24:28<29:11:34,  1.13s/it]

70578 episode score is 670.15


  7%|▋         | 7188/100000 [2:24:29<29:16:04,  1.14s/it]

70589 episode score is 615.15


  7%|▋         | 7189/100000 [2:24:30<28:58:12,  1.12s/it]

70599 episode score is 677.19


  7%|▋         | 7190/100000 [2:24:31<29:26:24,  1.14s/it]

70609 episode score is 737.26


  7%|▋         | 7191/100000 [2:24:32<30:24:10,  1.18s/it]

70618 episode score is 867.72


  7%|▋         | 7192/100000 [2:24:34<30:22:29,  1.18s/it]

70627 episode score is 818.56


  7%|▋         | 7193/100000 [2:24:35<29:59:21,  1.16s/it]

70637 episode score is 690.27


  7%|▋         | 7194/100000 [2:24:36<29:36:33,  1.15s/it]

70647 episode score is 683.78


  7%|▋         | 7195/100000 [2:24:37<30:15:36,  1.17s/it]

70657 episode score is 771.07


  7%|▋         | 7196/100000 [2:24:38<30:04:48,  1.17s/it]

70668 episode score is 614.03


  7%|▋         | 7197/100000 [2:24:39<30:03:11,  1.17s/it]

70678 episode score is 717.65


  7%|▋         | 7198/100000 [2:24:41<29:43:16,  1.15s/it]

70688 episode score is 684.98


  7%|▋         | 7199/100000 [2:24:42<29:30:41,  1.14s/it]

70697 episode score is 773.43
70707 episode score is 692.64


  7%|▋         | 7200/100000 [2:24:44<37:19:44,  1.45s/it]

Iteration 7200: Average test reward: 698.93


  7%|▋         | 7201/100000 [2:24:45<34:47:49,  1.35s/it]

70717 episode score is 695.60


  7%|▋         | 7202/100000 [2:24:46<33:49:05,  1.31s/it]

70728 episode score is 622.83


  7%|▋         | 7203/100000 [2:24:47<32:30:35,  1.26s/it]

70739 episode score is 620.82


  7%|▋         | 7204/100000 [2:24:48<31:39:42,  1.23s/it]

70749 episode score is 710.12


  7%|▋         | 7205/100000 [2:24:50<30:40:43,  1.19s/it]

70759 episode score is 672.57


  7%|▋         | 7206/100000 [2:24:51<29:57:05,  1.16s/it]

70769 episode score is 681.23


  7%|▋         | 7207/100000 [2:24:52<30:04:44,  1.17s/it]

70780 episode score is 669.40


  7%|▋         | 7208/100000 [2:24:53<30:07:29,  1.17s/it]

70790 episode score is 739.11


  7%|▋         | 7209/100000 [2:24:54<29:43:53,  1.15s/it]

70799 episode score is 786.85


  7%|▋         | 7210/100000 [2:24:55<29:21:24,  1.14s/it]

70807 episode score is 857.74


  7%|▋         | 7211/100000 [2:24:56<29:20:40,  1.14s/it]

70817 episode score is 715.96


  7%|▋         | 7212/100000 [2:24:58<29:34:44,  1.15s/it]

70826 episode score is 812.95


  7%|▋         | 7213/100000 [2:24:59<29:07:31,  1.13s/it]

70833 episode score is 982.59


  7%|▋         | 7214/100000 [2:25:00<29:07:31,  1.13s/it]

70842 episode score is 760.67


  7%|▋         | 7215/100000 [2:25:01<29:30:48,  1.15s/it]

70853 episode score is 662.68


  7%|▋         | 7216/100000 [2:25:02<29:08:28,  1.13s/it]

70863 episode score is 680.28


  7%|▋         | 7217/100000 [2:25:03<29:01:58,  1.13s/it]

70872 episode score is 774.42


  7%|▋         | 7218/100000 [2:25:04<29:23:26,  1.14s/it]

70882 episode score is 737.39


  7%|▋         | 7219/100000 [2:25:05<29:33:01,  1.15s/it]

70892 episode score is 721.29


  7%|▋         | 7220/100000 [2:25:07<29:17:47,  1.14s/it]

70902 episode score is 692.43


  7%|▋         | 7221/100000 [2:25:08<29:04:44,  1.13s/it]

70912 episode score is 686.99


  7%|▋         | 7222/100000 [2:25:09<29:36:50,  1.15s/it]

70923 episode score is 670.61


  7%|▋         | 7223/100000 [2:25:10<29:11:55,  1.13s/it]

70933 episode score is 684.55


  7%|▋         | 7224/100000 [2:25:11<29:17:43,  1.14s/it]

70942 episode score is 795.78


  7%|▋         | 7225/100000 [2:25:12<29:17:44,  1.14s/it]

70952 episode score is 704.98


  7%|▋         | 7226/100000 [2:25:13<29:44:18,  1.15s/it]

70963 episode score is 670.54


  7%|▋         | 7227/100000 [2:25:15<29:52:20,  1.16s/it]

70974 episode score is 660.16


  7%|▋         | 7228/100000 [2:25:16<30:10:51,  1.17s/it]

70985 episode score is 671.45


  7%|▋         | 7229/100000 [2:25:17<30:14:08,  1.17s/it]

70996 episode score is 658.93


  7%|▋         | 7230/100000 [2:25:18<30:21:50,  1.18s/it]

71007 episode score is 667.65


  7%|▋         | 7231/100000 [2:25:19<30:18:08,  1.18s/it]

71018 episode score is 653.59


  7%|▋         | 7232/100000 [2:25:20<29:39:56,  1.15s/it]

71028 episode score is 679.10


  7%|▋         | 7233/100000 [2:25:22<29:15:24,  1.14s/it]

71038 episode score is 691.31


  7%|▋         | 7234/100000 [2:25:23<29:23:06,  1.14s/it]

71048 episode score is 698.68


  7%|▋         | 7235/100000 [2:25:24<29:41:01,  1.15s/it]

71059 episode score is 666.82


  7%|▋         | 7236/100000 [2:25:25<29:17:23,  1.14s/it]

71069 episode score is 690.76


  7%|▋         | 7237/100000 [2:25:26<29:29:30,  1.14s/it]

71079 episode score is 722.46


  7%|▋         | 7238/100000 [2:25:27<29:45:09,  1.15s/it]

71090 episode score is 659.99


  7%|▋         | 7239/100000 [2:25:28<29:38:41,  1.15s/it]

71100 episode score is 714.83


  7%|▋         | 7240/100000 [2:25:30<29:57:13,  1.16s/it]

71111 episode score is 675.73


  7%|▋         | 7241/100000 [2:25:31<29:31:45,  1.15s/it]

71121 episode score is 693.02


  7%|▋         | 7242/100000 [2:25:32<29:31:41,  1.15s/it]

71131 episode score is 714.85


  7%|▋         | 7243/100000 [2:25:33<29:19:00,  1.14s/it]

71141 episode score is 697.18


  7%|▋         | 7244/100000 [2:25:34<29:02:55,  1.13s/it]

71151 episode score is 687.94


  7%|▋         | 7245/100000 [2:25:35<29:06:55,  1.13s/it]

71161 episode score is 717.14


  7%|▋         | 7246/100000 [2:25:36<29:09:44,  1.13s/it]

71170 episode score is 787.71


  7%|▋         | 7247/100000 [2:25:38<28:59:12,  1.13s/it]

71180 episode score is 691.71


  7%|▋         | 7248/100000 [2:25:39<28:53:40,  1.12s/it]

71190 episode score is 695.08


  7%|▋         | 7249/100000 [2:25:40<28:46:45,  1.12s/it]

71200 episode score is 688.90


  7%|▋         | 7250/100000 [2:25:41<29:24:58,  1.14s/it]

71211 episode score is 678.79


  7%|▋         | 7251/100000 [2:25:42<29:42:53,  1.15s/it]

71222 episode score is 670.17


  7%|▋         | 7252/100000 [2:25:43<29:57:21,  1.16s/it]

71233 episode score is 673.59


  7%|▋         | 7253/100000 [2:25:44<29:38:04,  1.15s/it]

71243 episode score is 702.60


  7%|▋         | 7254/100000 [2:25:46<29:50:35,  1.16s/it]

71254 episode score is 667.02


  7%|▋         | 7255/100000 [2:25:47<29:46:41,  1.16s/it]

71265 episode score is 649.17


  7%|▋         | 7256/100000 [2:25:48<29:29:30,  1.14s/it]

71275 episode score is 689.58


  7%|▋         | 7257/100000 [2:25:49<29:54:43,  1.16s/it]

71286 episode score is 680.01


  7%|▋         | 7258/100000 [2:25:50<30:00:09,  1.16s/it]

71297 episode score is 672.29


  7%|▋         | 7259/100000 [2:25:51<30:07:36,  1.17s/it]

71308 episode score is 674.18


  7%|▋         | 7260/100000 [2:25:53<30:12:41,  1.17s/it]

71319 episode score is 665.33


  7%|▋         | 7261/100000 [2:25:54<31:07:00,  1.21s/it]

71330 episode score is 676.20


  7%|▋         | 7262/100000 [2:25:55<30:24:54,  1.18s/it]

71340 episode score is 699.16


  7%|▋         | 7263/100000 [2:25:56<29:53:51,  1.16s/it]

71350 episode score is 701.15


  7%|▋         | 7264/100000 [2:25:57<29:31:33,  1.15s/it]

71360 episode score is 710.10


  7%|▋         | 7265/100000 [2:25:58<29:23:28,  1.14s/it]

71370 episode score is 716.43


  7%|▋         | 7266/100000 [2:26:00<29:32:04,  1.15s/it]

71380 episode score is 724.57


  7%|▋         | 7267/100000 [2:26:01<29:32:47,  1.15s/it]

71390 episode score is 734.39


  7%|▋         | 7268/100000 [2:26:02<29:22:11,  1.14s/it]

71400 episode score is 717.76


  7%|▋         | 7269/100000 [2:26:03<29:39:17,  1.15s/it]

71410 episode score is 745.39


  7%|▋         | 7270/100000 [2:26:04<29:52:19,  1.16s/it]

71420 episode score is 752.66


  7%|▋         | 7271/100000 [2:26:05<29:34:08,  1.15s/it]

71430 episode score is 718.76


  7%|▋         | 7272/100000 [2:26:06<29:26:02,  1.14s/it]

71440 episode score is 721.36


  7%|▋         | 7273/100000 [2:26:08<29:55:24,  1.16s/it]

71451 episode score is 698.46


  7%|▋         | 7274/100000 [2:26:09<29:34:17,  1.15s/it]

71461 episode score is 705.47


  7%|▋         | 7275/100000 [2:26:10<29:23:33,  1.14s/it]

71471 episode score is 717.16


  7%|▋         | 7276/100000 [2:26:11<29:31:32,  1.15s/it]

71481 episode score is 737.56


  7%|▋         | 7277/100000 [2:26:12<29:24:50,  1.14s/it]

71491 episode score is 716.18


  7%|▋         | 7278/100000 [2:26:13<29:32:34,  1.15s/it]

71501 episode score is 732.16


  7%|▋         | 7279/100000 [2:26:14<29:31:38,  1.15s/it]

71511 episode score is 730.53


  7%|▋         | 7280/100000 [2:26:16<29:51:45,  1.16s/it]

71521 episode score is 761.89


  7%|▋         | 7281/100000 [2:26:17<29:51:50,  1.16s/it]

71531 episode score is 744.68


  7%|▋         | 7282/100000 [2:26:18<29:32:47,  1.15s/it]

71541 episode score is 707.36


  7%|▋         | 7283/100000 [2:26:19<29:37:00,  1.15s/it]

71551 episode score is 734.31


  7%|▋         | 7284/100000 [2:26:20<29:39:14,  1.15s/it]

71561 episode score is 735.15


  7%|▋         | 7285/100000 [2:26:21<29:24:21,  1.14s/it]

71571 episode score is 717.26


  7%|▋         | 7286/100000 [2:26:23<29:35:29,  1.15s/it]

71581 episode score is 744.48


  7%|▋         | 7287/100000 [2:26:24<29:47:08,  1.16s/it]

71591 episode score is 748.52


  7%|▋         | 7288/100000 [2:26:25<29:59:41,  1.16s/it]

71601 episode score is 756.73


  7%|▋         | 7289/100000 [2:26:26<29:38:12,  1.15s/it]

71611 episode score is 719.97


  7%|▋         | 7290/100000 [2:26:27<29:25:28,  1.14s/it]

71621 episode score is 715.53


  7%|▋         | 7291/100000 [2:26:28<29:30:40,  1.15s/it]

71631 episode score is 741.03


  7%|▋         | 7292/100000 [2:26:29<29:37:51,  1.15s/it]

71641 episode score is 742.08


  7%|▋         | 7293/100000 [2:26:31<29:53:47,  1.16s/it]

71651 episode score is 743.42


  7%|▋         | 7294/100000 [2:26:32<30:10:35,  1.17s/it]

71661 episode score is 761.86


  7%|▋         | 7295/100000 [2:26:33<30:29:16,  1.18s/it]

71671 episode score is 760.34


  7%|▋         | 7296/100000 [2:26:34<30:14:18,  1.17s/it]

71681 episode score is 738.01


  7%|▋         | 7297/100000 [2:26:35<30:10:21,  1.17s/it]

71691 episode score is 746.28


  7%|▋         | 7298/100000 [2:26:37<29:59:08,  1.16s/it]

71701 episode score is 736.90


  7%|▋         | 7299/100000 [2:26:38<29:44:11,  1.15s/it]

71711 episode score is 722.34
71721 episode score is 709.90


  7%|▋         | 7300/100000 [2:26:40<37:20:44,  1.45s/it]

Iteration 7300: Average test reward: 738.08


  7%|▋         | 7301/100000 [2:26:41<34:55:30,  1.36s/it]

71731 episode score is 725.30


  7%|▋         | 7302/100000 [2:26:42<33:03:28,  1.28s/it]

71741 episode score is 709.87


  7%|▋         | 7303/100000 [2:26:43<31:40:41,  1.23s/it]

71751 episode score is 697.75


  7%|▋         | 7304/100000 [2:26:44<30:40:32,  1.19s/it]

71761 episode score is 700.63


  7%|▋         | 7305/100000 [2:26:45<30:10:49,  1.17s/it]

71771 episode score is 722.91


  7%|▋         | 7306/100000 [2:26:47<30:14:00,  1.17s/it]

71782 episode score is 677.50


  7%|▋         | 7307/100000 [2:26:48<30:20:44,  1.18s/it]

71793 episode score is 672.80


  7%|▋         | 7308/100000 [2:26:49<30:22:53,  1.18s/it]

71804 episode score is 668.47


  7%|▋         | 7309/100000 [2:26:50<29:52:38,  1.16s/it]

71814 episode score is 713.76


  7%|▋         | 7310/100000 [2:26:51<29:30:23,  1.15s/it]

71824 episode score is 712.74


  7%|▋         | 7311/100000 [2:26:52<29:08:57,  1.13s/it]

71834 episode score is 699.17


  7%|▋         | 7312/100000 [2:26:53<29:26:01,  1.14s/it]

71844 episode score is 739.21


  7%|▋         | 7313/100000 [2:26:55<29:25:10,  1.14s/it]

71854 episode score is 723.93


  7%|▋         | 7314/100000 [2:26:56<29:07:16,  1.13s/it]

71864 episode score is 706.59


  7%|▋         | 7315/100000 [2:26:57<29:07:33,  1.13s/it]

71874 episode score is 727.54


  7%|▋         | 7316/100000 [2:26:58<28:50:47,  1.12s/it]

71884 episode score is 701.38


  7%|▋         | 7317/100000 [2:26:59<29:18:58,  1.14s/it]

71895 episode score is 681.03


  7%|▋         | 7318/100000 [2:27:00<29:10:07,  1.13s/it]

71905 episode score is 713.33


  7%|▋         | 7319/100000 [2:27:01<29:00:03,  1.13s/it]

71915 episode score is 711.08


  7%|▋         | 7320/100000 [2:27:02<29:29:32,  1.15s/it]

71926 episode score is 680.10


  7%|▋         | 7321/100000 [2:27:04<29:47:21,  1.16s/it]

71937 episode score is 681.53


  7%|▋         | 7322/100000 [2:27:05<29:24:17,  1.14s/it]

71947 episode score is 708.60


  7%|▋         | 7323/100000 [2:27:06<29:40:56,  1.15s/it]

71958 episode score is 685.46


  7%|▋         | 7324/100000 [2:27:07<29:34:06,  1.15s/it]

71969 episode score is 651.87


  7%|▋         | 7325/100000 [2:27:08<30:38:55,  1.19s/it]

71980 episode score is 688.95


  7%|▋         | 7326/100000 [2:27:10<30:33:46,  1.19s/it]

71991 episode score is 671.25


  7%|▋         | 7327/100000 [2:27:11<30:22:59,  1.18s/it]

72002 episode score is 666.04


  7%|▋         | 7328/100000 [2:27:12<29:49:57,  1.16s/it]

72012 episode score is 704.45


  7%|▋         | 7329/100000 [2:27:13<29:32:43,  1.15s/it]

72022 episode score is 712.55


  7%|▋         | 7330/100000 [2:27:14<29:27:58,  1.14s/it]

72032 episode score is 717.62


  7%|▋         | 7331/100000 [2:27:15<29:08:18,  1.13s/it]

72042 episode score is 700.32


  7%|▋         | 7332/100000 [2:27:16<29:27:40,  1.14s/it]

72053 episode score is 680.38


  7%|▋         | 7333/100000 [2:27:18<29:32:05,  1.15s/it]

72063 episode score is 730.95


  7%|▋         | 7334/100000 [2:27:19<29:28:01,  1.14s/it]

72073 episode score is 729.90


  7%|▋         | 7335/100000 [2:27:20<29:59:49,  1.17s/it]

72083 episode score is 771.73


  7%|▋         | 7336/100000 [2:27:21<29:35:53,  1.15s/it]

72093 episode score is 705.96


  7%|▋         | 7337/100000 [2:27:22<29:48:24,  1.16s/it]

72103 episode score is 757.11


  7%|▋         | 7338/100000 [2:27:23<29:20:13,  1.14s/it]

72113 episode score is 697.47


  7%|▋         | 7339/100000 [2:27:24<29:14:22,  1.14s/it]

72123 episode score is 714.63


  7%|▋         | 7340/100000 [2:27:26<29:22:23,  1.14s/it]

72134 episode score is 663.11


  7%|▋         | 7341/100000 [2:27:27<29:36:25,  1.15s/it]

72145 episode score is 670.42


  7%|▋         | 7342/100000 [2:27:28<29:34:16,  1.15s/it]

72155 episode score is 735.62


  7%|▋         | 7343/100000 [2:27:29<29:27:11,  1.14s/it]

72165 episode score is 719.46


  7%|▋         | 7344/100000 [2:27:30<29:03:18,  1.13s/it]

72175 episode score is 695.27


  7%|▋         | 7345/100000 [2:27:31<29:03:24,  1.13s/it]

72185 episode score is 720.62


  7%|▋         | 7346/100000 [2:27:32<29:24:07,  1.14s/it]

72195 episode score is 739.73


  7%|▋         | 7347/100000 [2:27:34<29:13:48,  1.14s/it]

72205 episode score is 713.43


  7%|▋         | 7348/100000 [2:27:35<29:11:17,  1.13s/it]

72215 episode score is 725.45


  7%|▋         | 7349/100000 [2:27:36<29:11:31,  1.13s/it]

72225 episode score is 730.60


  7%|▋         | 7350/100000 [2:27:37<29:16:20,  1.14s/it]

72235 episode score is 735.46


  7%|▋         | 7351/100000 [2:27:38<28:58:48,  1.13s/it]

72245 episode score is 705.70


  7%|▋         | 7352/100000 [2:27:39<29:02:59,  1.13s/it]

72255 episode score is 728.69


  7%|▋         | 7353/100000 [2:27:40<29:20:25,  1.14s/it]

72265 episode score is 744.15


  7%|▋         | 7354/100000 [2:27:41<29:31:37,  1.15s/it]

72275 episode score is 742.00


  7%|▋         | 7355/100000 [2:27:43<29:45:12,  1.16s/it]

72285 episode score is 750.44


  7%|▋         | 7356/100000 [2:27:44<29:27:05,  1.14s/it]

72295 episode score is 700.63


  7%|▋         | 7357/100000 [2:27:45<29:47:15,  1.16s/it]

72306 episode score is 676.61


  7%|▋         | 7358/100000 [2:27:46<30:01:02,  1.17s/it]

72317 episode score is 685.96


  7%|▋         | 7359/100000 [2:27:47<29:42:56,  1.15s/it]

72327 episode score is 710.47


  7%|▋         | 7360/100000 [2:27:48<29:46:29,  1.16s/it]

72337 episode score is 736.23


  7%|▋         | 7361/100000 [2:27:50<29:42:17,  1.15s/it]

72347 episode score is 733.15


  7%|▋         | 7362/100000 [2:27:51<29:36:34,  1.15s/it]

72357 episode score is 726.30


  7%|▋         | 7363/100000 [2:27:52<29:29:11,  1.15s/it]

72367 episode score is 727.46


  7%|▋         | 7364/100000 [2:27:53<29:29:44,  1.15s/it]

72377 episode score is 728.55


  7%|▋         | 7365/100000 [2:27:54<29:38:10,  1.15s/it]

72387 episode score is 744.17


  7%|▋         | 7366/100000 [2:27:55<29:38:06,  1.15s/it]

72397 episode score is 734.52


  7%|▋         | 7367/100000 [2:27:56<29:17:44,  1.14s/it]

72407 episode score is 702.72


  7%|▋         | 7368/100000 [2:27:58<29:30:59,  1.15s/it]

72418 episode score is 671.73


  7%|▋         | 7369/100000 [2:27:59<29:26:38,  1.14s/it]

72428 episode score is 728.37


  7%|▋         | 7370/100000 [2:28:00<29:37:07,  1.15s/it]

72438 episode score is 727.55


  7%|▋         | 7371/100000 [2:28:01<29:22:34,  1.14s/it]

72448 episode score is 711.82


  7%|▋         | 7372/100000 [2:28:02<29:10:33,  1.13s/it]

72458 episode score is 709.55


  7%|▋         | 7373/100000 [2:28:03<29:15:00,  1.14s/it]

72468 episode score is 733.95


  7%|▋         | 7374/100000 [2:28:04<29:41:54,  1.15s/it]

72478 episode score is 758.39


  7%|▋         | 7375/100000 [2:28:06<29:24:50,  1.14s/it]

72488 episode score is 707.74


  7%|▋         | 7376/100000 [2:28:07<29:29:47,  1.15s/it]

72498 episode score is 736.53


  7%|▋         | 7377/100000 [2:28:08<29:57:16,  1.16s/it]

72509 episode score is 694.91


  7%|▋         | 7378/100000 [2:28:09<30:09:50,  1.17s/it]

72520 episode score is 682.17


  7%|▋         | 7379/100000 [2:28:10<31:00:08,  1.21s/it]

72531 episode score is 692.30


  7%|▋         | 7380/100000 [2:28:12<30:32:06,  1.19s/it]

72541 episode score is 724.16


  7%|▋         | 7381/100000 [2:28:13<30:31:59,  1.19s/it]

72552 episode score is 683.66


  7%|▋         | 7382/100000 [2:28:14<30:41:32,  1.19s/it]

72563 episode score is 691.94


  7%|▋         | 7383/100000 [2:28:15<30:06:29,  1.17s/it]

72573 episode score is 705.51


  7%|▋         | 7384/100000 [2:28:16<30:17:37,  1.18s/it]

72584 episode score is 680.79


  7%|▋         | 7385/100000 [2:28:18<30:35:02,  1.19s/it]

72595 episode score is 683.10


  7%|▋         | 7386/100000 [2:28:19<30:32:53,  1.19s/it]

72606 episode score is 666.32


  7%|▋         | 7387/100000 [2:28:20<30:36:23,  1.19s/it]

72617 episode score is 685.15


  7%|▋         | 7388/100000 [2:28:21<30:28:57,  1.18s/it]

72628 episode score is 666.44


  7%|▋         | 7389/100000 [2:28:22<30:14:51,  1.18s/it]

72639 episode score is 653.51


  7%|▋         | 7390/100000 [2:28:23<30:09:24,  1.17s/it]

72650 episode score is 666.85


  7%|▋         | 7391/100000 [2:28:25<30:17:00,  1.18s/it]

72661 episode score is 672.40


  7%|▋         | 7392/100000 [2:28:26<29:43:50,  1.16s/it]

72671 episode score is 703.52


  7%|▋         | 7393/100000 [2:28:27<29:49:48,  1.16s/it]

72682 episode score is 675.84


  7%|▋         | 7394/100000 [2:28:28<29:27:10,  1.14s/it]

72692 episode score is 712.79


  7%|▋         | 7395/100000 [2:28:29<29:07:21,  1.13s/it]

72702 episode score is 698.90


  7%|▋         | 7396/100000 [2:28:30<29:14:06,  1.14s/it]

72712 episode score is 741.62


  7%|▋         | 7397/100000 [2:28:31<28:56:58,  1.13s/it]

72722 episode score is 699.90


  7%|▋         | 7398/100000 [2:28:32<29:08:26,  1.13s/it]

72732 episode score is 713.68


  7%|▋         | 7399/100000 [2:28:34<29:09:46,  1.13s/it]

72742 episode score is 723.43
72752 episode score is 705.07


  7%|▋         | 7400/100000 [2:28:36<36:47:32,  1.43s/it]

Iteration 7400: Average test reward: 719.23


  7%|▋         | 7401/100000 [2:28:37<34:21:19,  1.34s/it]

72762 episode score is 711.23


  7%|▋         | 7402/100000 [2:28:38<32:43:05,  1.27s/it]

72772 episode score is 721.50


  7%|▋         | 7403/100000 [2:28:39<31:30:25,  1.22s/it]

72782 episode score is 705.40


  7%|▋         | 7404/100000 [2:28:40<31:20:02,  1.22s/it]

72793 episode score is 695.44


  7%|▋         | 7405/100000 [2:28:41<30:53:13,  1.20s/it]

72803 episode score is 738.08


  7%|▋         | 7406/100000 [2:28:43<30:18:59,  1.18s/it]

72813 episode score is 719.47


  7%|▋         | 7407/100000 [2:28:44<30:04:17,  1.17s/it]

72823 episode score is 728.26


  7%|▋         | 7408/100000 [2:28:45<29:48:59,  1.16s/it]

72833 episode score is 723.87


  7%|▋         | 7409/100000 [2:28:46<29:50:50,  1.16s/it]

72843 episode score is 749.64


  7%|▋         | 7410/100000 [2:28:47<30:07:10,  1.17s/it]

72853 episode score is 765.71


  7%|▋         | 7411/100000 [2:28:48<30:08:41,  1.17s/it]

72863 episode score is 744.40


  7%|▋         | 7412/100000 [2:28:50<30:08:39,  1.17s/it]

72873 episode score is 745.07


  7%|▋         | 7413/100000 [2:28:51<30:22:44,  1.18s/it]

72883 episode score is 765.54


  7%|▋         | 7414/100000 [2:28:52<30:05:01,  1.17s/it]

72893 episode score is 736.42


  7%|▋         | 7415/100000 [2:28:53<29:58:19,  1.17s/it]

72903 episode score is 740.31


  7%|▋         | 7416/100000 [2:28:54<29:57:51,  1.17s/it]

72913 episode score is 746.25


  7%|▋         | 7417/100000 [2:28:55<30:06:04,  1.17s/it]

72923 episode score is 745.52


  7%|▋         | 7418/100000 [2:28:57<29:44:21,  1.16s/it]

72933 episode score is 712.44


  7%|▋         | 7419/100000 [2:28:58<29:27:17,  1.15s/it]

72943 episode score is 712.14


  7%|▋         | 7420/100000 [2:28:59<29:06:13,  1.13s/it]

72953 episode score is 697.22


  7%|▋         | 7421/100000 [2:29:00<29:26:24,  1.14s/it]

72964 episode score is 679.14


  7%|▋         | 7422/100000 [2:29:01<29:11:56,  1.14s/it]

72974 episode score is 707.96


  7%|▋         | 7423/100000 [2:29:02<29:30:44,  1.15s/it]

72984 episode score is 741.38


  7%|▋         | 7424/100000 [2:29:03<29:32:29,  1.15s/it]

72994 episode score is 728.37


  7%|▋         | 7425/100000 [2:29:04<29:09:35,  1.13s/it]

73004 episode score is 695.80


  7%|▋         | 7426/100000 [2:29:06<29:09:50,  1.13s/it]

73014 episode score is 726.61


  7%|▋         | 7427/100000 [2:29:07<29:16:44,  1.14s/it]

73024 episode score is 732.26


  7%|▋         | 7428/100000 [2:29:08<29:25:41,  1.14s/it]

73034 episode score is 738.42


  7%|▋         | 7429/100000 [2:29:09<29:32:46,  1.15s/it]

73044 episode score is 738.77


  7%|▋         | 7430/100000 [2:29:10<29:42:46,  1.16s/it]

73054 episode score is 750.85


  7%|▋         | 7431/100000 [2:29:11<29:51:35,  1.16s/it]

73065 episode score is 672.35


  7%|▋         | 7432/100000 [2:29:13<29:50:18,  1.16s/it]

73075 episode score is 724.44


  7%|▋         | 7433/100000 [2:29:14<29:55:14,  1.16s/it]

73085 episode score is 742.49


  7%|▋         | 7434/100000 [2:29:15<29:41:26,  1.15s/it]

73095 episode score is 719.38


  7%|▋         | 7435/100000 [2:29:16<29:40:45,  1.15s/it]

73105 episode score is 733.14


  7%|▋         | 7436/100000 [2:29:17<29:33:39,  1.15s/it]

73115 episode score is 719.35


  7%|▋         | 7437/100000 [2:29:18<29:25:26,  1.14s/it]

73125 episode score is 716.34


  7%|▋         | 7438/100000 [2:29:19<29:36:28,  1.15s/it]

73135 episode score is 734.55


  7%|▋         | 7439/100000 [2:29:21<29:26:37,  1.15s/it]

73145 episode score is 708.00


  7%|▋         | 7440/100000 [2:29:22<29:51:02,  1.16s/it]

73156 episode score is 693.21


  7%|▋         | 7441/100000 [2:29:23<29:28:27,  1.15s/it]

73166 episode score is 701.07


  7%|▋         | 7442/100000 [2:29:24<29:12:39,  1.14s/it]

73176 episode score is 709.20


  7%|▋         | 7443/100000 [2:29:25<29:49:39,  1.16s/it]

73186 episode score is 711.98


  7%|▋         | 7444/100000 [2:29:26<30:02:13,  1.17s/it]

73197 episode score is 685.90


  7%|▋         | 7445/100000 [2:29:28<29:30:01,  1.15s/it]

73207 episode score is 693.93


  7%|▋         | 7446/100000 [2:29:29<29:39:16,  1.15s/it]

73217 episode score is 743.68


  7%|▋         | 7447/100000 [2:29:30<29:15:34,  1.14s/it]

73227 episode score is 703.41


  7%|▋         | 7448/100000 [2:29:31<29:18:52,  1.14s/it]

73237 episode score is 717.09


  7%|▋         | 7449/100000 [2:29:32<29:06:57,  1.13s/it]

73247 episode score is 705.82


  7%|▋         | 7450/100000 [2:29:33<29:03:19,  1.13s/it]

73257 episode score is 710.57


  7%|▋         | 7451/100000 [2:29:34<29:30:37,  1.15s/it]

73267 episode score is 758.31


  7%|▋         | 7452/100000 [2:29:35<29:32:49,  1.15s/it]

73277 episode score is 726.37


  7%|▋         | 7453/100000 [2:29:37<29:17:04,  1.14s/it]

73287 episode score is 708.72


  7%|▋         | 7454/100000 [2:29:38<29:46:34,  1.16s/it]

73298 episode score is 688.95


  7%|▋         | 7455/100000 [2:29:39<29:47:29,  1.16s/it]

73308 episode score is 740.36


  7%|▋         | 7456/100000 [2:29:40<29:38:49,  1.15s/it]

73318 episode score is 729.34


  7%|▋         | 7457/100000 [2:29:41<29:35:53,  1.15s/it]

73328 episode score is 726.72


  7%|▋         | 7458/100000 [2:29:42<29:32:49,  1.15s/it]

73338 episode score is 726.61


  7%|▋         | 7459/100000 [2:29:44<29:19:52,  1.14s/it]

73348 episode score is 712.05


  7%|▋         | 7460/100000 [2:29:45<29:37:46,  1.15s/it]

73358 episode score is 751.42


  7%|▋         | 7461/100000 [2:29:46<29:59:22,  1.17s/it]

73368 episode score is 768.84


  7%|▋         | 7462/100000 [2:29:47<30:14:53,  1.18s/it]

73378 episode score is 744.83


  7%|▋         | 7463/100000 [2:29:48<29:48:31,  1.16s/it]

73388 episode score is 711.65


  7%|▋         | 7464/100000 [2:29:49<29:53:16,  1.16s/it]

73398 episode score is 740.65


  7%|▋         | 7465/100000 [2:29:51<29:56:17,  1.16s/it]

73408 episode score is 747.53


  7%|▋         | 7466/100000 [2:29:52<29:54:30,  1.16s/it]

73418 episode score is 733.72


  7%|▋         | 7467/100000 [2:29:53<29:40:43,  1.15s/it]

73428 episode score is 716.99


  7%|▋         | 7468/100000 [2:29:54<29:20:57,  1.14s/it]

73438 episode score is 708.21


  7%|▋         | 7469/100000 [2:29:55<30:08:41,  1.17s/it]

73448 episode score is 772.04


  7%|▋         | 7470/100000 [2:29:56<29:56:31,  1.16s/it]

73458 episode score is 724.74


  7%|▋         | 7471/100000 [2:29:58<30:00:01,  1.17s/it]

73468 episode score is 746.98


  7%|▋         | 7472/100000 [2:29:59<29:51:23,  1.16s/it]

73478 episode score is 727.02


  7%|▋         | 7473/100000 [2:30:00<29:57:17,  1.17s/it]

73488 episode score is 742.61


  7%|▋         | 7474/100000 [2:30:01<30:06:46,  1.17s/it]

73498 episode score is 742.23


  7%|▋         | 7475/100000 [2:30:02<30:18:17,  1.18s/it]

73508 episode score is 761.22


  7%|▋         | 7476/100000 [2:30:03<29:47:53,  1.16s/it]

73518 episode score is 708.33


  7%|▋         | 7477/100000 [2:30:04<29:30:42,  1.15s/it]

73528 episode score is 714.77


  7%|▋         | 7478/100000 [2:30:06<29:48:44,  1.16s/it]

73538 episode score is 755.54


  7%|▋         | 7479/100000 [2:30:07<29:27:57,  1.15s/it]

73548 episode score is 714.84


  7%|▋         | 7480/100000 [2:30:08<29:19:42,  1.14s/it]

73559 episode score is 644.29


  7%|▋         | 7481/100000 [2:30:09<29:16:33,  1.14s/it]

73569 episode score is 724.91


  7%|▋         | 7482/100000 [2:30:10<29:19:15,  1.14s/it]

73579 episode score is 733.77


  7%|▋         | 7483/100000 [2:30:11<29:48:15,  1.16s/it]

73590 episode score is 693.90


  7%|▋         | 7484/100000 [2:30:13<29:28:22,  1.15s/it]

73600 episode score is 705.41


  7%|▋         | 7485/100000 [2:30:14<29:36:36,  1.15s/it]

73611 episode score is 672.27


  7%|▋         | 7486/100000 [2:30:15<29:53:32,  1.16s/it]

73622 episode score is 670.18


  7%|▋         | 7487/100000 [2:30:16<29:23:55,  1.14s/it]

73632 episode score is 697.46


  7%|▋         | 7488/100000 [2:30:17<29:54:20,  1.16s/it]

73642 episode score is 771.24


  7%|▋         | 7489/100000 [2:30:18<29:36:08,  1.15s/it]

73652 episode score is 713.99


  7%|▋         | 7490/100000 [2:30:19<29:37:15,  1.15s/it]

73662 episode score is 740.76


  7%|▋         | 7491/100000 [2:30:21<29:10:13,  1.14s/it]

73672 episode score is 694.08


  7%|▋         | 7492/100000 [2:30:22<29:22:32,  1.14s/it]

73682 episode score is 738.61


  7%|▋         | 7493/100000 [2:30:23<29:45:27,  1.16s/it]

73692 episode score is 757.95


  7%|▋         | 7494/100000 [2:30:24<29:30:33,  1.15s/it]

73702 episode score is 709.95


  7%|▋         | 7495/100000 [2:30:25<29:27:51,  1.15s/it]

73712 episode score is 721.97


  7%|▋         | 7496/100000 [2:30:26<29:54:05,  1.16s/it]

73722 episode score is 766.54


  7%|▋         | 7497/100000 [2:30:28<29:45:38,  1.16s/it]

73732 episode score is 731.42


  7%|▋         | 7498/100000 [2:30:29<29:32:47,  1.15s/it]

73742 episode score is 720.75


  7%|▋         | 7499/100000 [2:30:30<29:29:08,  1.15s/it]

73752 episode score is 728.77
73762 episode score is 753.41


  8%|▊         | 7500/100000 [2:30:32<37:51:08,  1.47s/it]

Iteration 7500: Average test reward: 733.33


  8%|▊         | 7501/100000 [2:30:33<35:24:04,  1.38s/it]

73772 episode score is 726.94


  8%|▊         | 7502/100000 [2:30:34<34:01:27,  1.32s/it]

73782 episode score is 771.07


  8%|▊         | 7503/100000 [2:30:36<32:45:16,  1.27s/it]

73792 episode score is 742.11


  8%|▊         | 7504/100000 [2:30:37<31:33:35,  1.23s/it]

73802 episode score is 712.53


  8%|▊         | 7505/100000 [2:30:38<30:54:52,  1.20s/it]

73812 episode score is 732.66


  8%|▊         | 7506/100000 [2:30:39<30:44:40,  1.20s/it]

73823 episode score is 679.13


  8%|▊         | 7507/100000 [2:30:40<30:03:27,  1.17s/it]

73833 episode score is 700.04


  8%|▊         | 7508/100000 [2:30:41<30:17:42,  1.18s/it]

73844 episode score is 687.66


  8%|▊         | 7509/100000 [2:30:42<30:01:44,  1.17s/it]

73854 episode score is 726.83


  8%|▊         | 7510/100000 [2:30:44<30:01:36,  1.17s/it]

73865 episode score is 664.77


  8%|▊         | 7511/100000 [2:30:45<30:03:39,  1.17s/it]

73876 episode score is 667.88


  8%|▊         | 7512/100000 [2:30:46<30:03:03,  1.17s/it]

73887 episode score is 669.91


  8%|▊         | 7513/100000 [2:30:47<29:52:47,  1.16s/it]

73897 episode score is 720.28


  8%|▊         | 7514/100000 [2:30:48<29:46:06,  1.16s/it]

73907 episode score is 715.60


  8%|▊         | 7515/100000 [2:30:49<29:51:50,  1.16s/it]

73917 episode score is 749.92


  8%|▊         | 7516/100000 [2:30:51<29:27:58,  1.15s/it]

73927 episode score is 711.19


  8%|▊         | 7517/100000 [2:30:52<29:56:28,  1.17s/it]

73937 episode score is 706.98


  8%|▊         | 7518/100000 [2:30:53<29:50:20,  1.16s/it]

73947 episode score is 728.15


  8%|▊         | 7519/100000 [2:30:54<29:32:39,  1.15s/it]

73957 episode score is 720.17


  8%|▊         | 7520/100000 [2:30:55<29:55:45,  1.17s/it]

73968 episode score is 687.85


  8%|▊         | 7521/100000 [2:30:56<29:41:32,  1.16s/it]

73978 episode score is 725.76


  8%|▊         | 7522/100000 [2:30:57<29:20:29,  1.14s/it]

73988 episode score is 709.03


  8%|▊         | 7523/100000 [2:30:59<29:31:13,  1.15s/it]

73998 episode score is 742.71


  8%|▊         | 7524/100000 [2:31:00<29:44:34,  1.16s/it]

74008 episode score is 754.36


  8%|▊         | 7525/100000 [2:31:01<29:46:37,  1.16s/it]

74018 episode score is 741.18


  8%|▊         | 7526/100000 [2:31:02<29:23:07,  1.14s/it]

74028 episode score is 698.15


  8%|▊         | 7527/100000 [2:31:03<29:23:57,  1.14s/it]

74038 episode score is 734.14


  8%|▊         | 7528/100000 [2:31:04<30:01:18,  1.17s/it]

74049 episode score is 714.36


  8%|▊         | 7529/100000 [2:31:06<30:12:55,  1.18s/it]

74059 episode score is 764.53


  8%|▊         | 7530/100000 [2:31:07<29:37:09,  1.15s/it]

74069 episode score is 696.80


  8%|▊         | 7531/100000 [2:31:08<29:09:12,  1.14s/it]

74079 episode score is 693.09


  8%|▊         | 7532/100000 [2:31:09<29:24:40,  1.15s/it]

74089 episode score is 746.62


  8%|▊         | 7533/100000 [2:31:10<29:33:07,  1.15s/it]

74099 episode score is 743.28


  8%|▊         | 7534/100000 [2:31:11<29:36:53,  1.15s/it]

74109 episode score is 740.53


  8%|▊         | 7535/100000 [2:31:13<29:55:08,  1.16s/it]

74119 episode score is 760.66


  8%|▊         | 7536/100000 [2:31:14<29:47:18,  1.16s/it]

74129 episode score is 738.51


  8%|▊         | 7537/100000 [2:31:15<30:00:24,  1.17s/it]

74140 episode score is 681.61


  8%|▊         | 7538/100000 [2:31:16<29:53:18,  1.16s/it]

74150 episode score is 729.55


  8%|▊         | 7539/100000 [2:31:17<30:21:55,  1.18s/it]

74161 episode score is 699.12


  8%|▊         | 7540/100000 [2:31:18<29:57:10,  1.17s/it]

74171 episode score is 714.54


  8%|▊         | 7541/100000 [2:31:20<30:02:20,  1.17s/it]

74181 episode score is 744.15


  8%|▊         | 7542/100000 [2:31:21<29:44:19,  1.16s/it]

74191 episode score is 725.36


  8%|▊         | 7543/100000 [2:31:22<29:24:39,  1.15s/it]

74201 episode score is 711.89


  8%|▊         | 7544/100000 [2:31:23<29:02:46,  1.13s/it]

74211 episode score is 698.16


  8%|▊         | 7545/100000 [2:31:24<29:23:11,  1.14s/it]

74222 episode score is 677.36


  8%|▊         | 7546/100000 [2:31:25<29:28:35,  1.15s/it]

74232 episode score is 732.72


  8%|▊         | 7547/100000 [2:31:26<29:34:10,  1.15s/it]

74242 episode score is 730.59


  8%|▊         | 7548/100000 [2:31:28<29:50:28,  1.16s/it]

74252 episode score is 761.20


  8%|▊         | 7549/100000 [2:31:29<30:04:26,  1.17s/it]

74262 episode score is 750.44


  8%|▊         | 7550/100000 [2:31:30<30:26:18,  1.19s/it]

74272 episode score is 773.08


  8%|▊         | 7551/100000 [2:31:31<30:07:00,  1.17s/it]

74282 episode score is 723.01


  8%|▊         | 7552/100000 [2:31:32<30:04:00,  1.17s/it]

74292 episode score is 729.96


  8%|▊         | 7553/100000 [2:31:33<29:59:31,  1.17s/it]

74302 episode score is 739.99


  8%|▊         | 7554/100000 [2:31:35<29:38:52,  1.15s/it]

74312 episode score is 719.62


  8%|▊         | 7555/100000 [2:31:36<29:41:47,  1.16s/it]

74322 episode score is 736.86


  8%|▊         | 7556/100000 [2:31:37<29:37:25,  1.15s/it]

74332 episode score is 733.95


  8%|▊         | 7557/100000 [2:31:38<29:15:00,  1.14s/it]

74342 episode score is 700.16


  8%|▊         | 7558/100000 [2:31:39<29:29:41,  1.15s/it]

74352 episode score is 748.09


  8%|▊         | 7559/100000 [2:31:40<29:47:31,  1.16s/it]

74362 episode score is 759.07


  8%|▊         | 7560/100000 [2:31:41<29:48:29,  1.16s/it]

74372 episode score is 744.70


  8%|▊         | 7561/100000 [2:31:43<29:26:02,  1.15s/it]

74382 episode score is 706.15


  8%|▊         | 7562/100000 [2:31:44<29:35:36,  1.15s/it]

74392 episode score is 742.51


  8%|▊         | 7563/100000 [2:31:45<29:23:33,  1.14s/it]

74402 episode score is 718.06


  8%|▊         | 7564/100000 [2:31:46<29:19:21,  1.14s/it]

74412 episode score is 724.18


  8%|▊         | 7565/100000 [2:31:47<29:25:00,  1.15s/it]

74422 episode score is 734.50


  8%|▊         | 7566/100000 [2:31:48<29:31:17,  1.15s/it]

74432 episode score is 737.26


  8%|▊         | 7567/100000 [2:31:50<29:40:37,  1.16s/it]

74442 episode score is 744.65


  8%|▊         | 7568/100000 [2:31:51<29:40:08,  1.16s/it]

74452 episode score is 742.66


  8%|▊         | 7569/100000 [2:31:52<29:37:38,  1.15s/it]

74462 episode score is 737.15


  8%|▊         | 7570/100000 [2:31:53<29:34:24,  1.15s/it]

74472 episode score is 730.72


  8%|▊         | 7571/100000 [2:31:54<29:21:06,  1.14s/it]

74482 episode score is 723.88


  8%|▊         | 7572/100000 [2:31:55<29:54:31,  1.16s/it]

74493 episode score is 703.67


  8%|▊         | 7573/100000 [2:31:56<29:35:43,  1.15s/it]

74503 episode score is 716.23


  8%|▊         | 7574/100000 [2:31:58<29:12:49,  1.14s/it]

74513 episode score is 705.18


  8%|▊         | 7575/100000 [2:31:59<29:30:26,  1.15s/it]

74524 episode score is 671.89


  8%|▊         | 7576/100000 [2:32:00<29:37:52,  1.15s/it]

74534 episode score is 731.87


  8%|▊         | 7577/100000 [2:32:01<30:32:22,  1.19s/it]

74545 episode score is 676.98


  8%|▊         | 7578/100000 [2:32:02<30:04:43,  1.17s/it]

74555 episode score is 705.32


  8%|▊         | 7579/100000 [2:32:03<29:32:40,  1.15s/it]

74565 episode score is 701.99


  8%|▊         | 7580/100000 [2:32:05<29:58:12,  1.17s/it]

74576 episode score is 697.96


  8%|▊         | 7581/100000 [2:32:06<30:15:29,  1.18s/it]

74587 episode score is 695.11


  8%|▊         | 7582/100000 [2:32:07<29:39:32,  1.16s/it]

74597 episode score is 701.28


  8%|▊         | 7583/100000 [2:32:08<29:38:41,  1.15s/it]

74608 episode score is 662.69


  8%|▊         | 7584/100000 [2:32:09<29:24:58,  1.15s/it]

74618 episode score is 714.58


  8%|▊         | 7585/100000 [2:32:10<29:11:25,  1.14s/it]

74628 episode score is 714.08


  8%|▊         | 7586/100000 [2:32:11<29:12:35,  1.14s/it]

74638 episode score is 726.42


  8%|▊         | 7587/100000 [2:32:13<29:18:51,  1.14s/it]

74648 episode score is 735.75


  8%|▊         | 7588/100000 [2:32:14<29:17:11,  1.14s/it]

74658 episode score is 725.88


  8%|▊         | 7589/100000 [2:32:15<29:25:24,  1.15s/it]

74668 episode score is 740.94


  8%|▊         | 7590/100000 [2:32:16<29:50:15,  1.16s/it]

74679 episode score is 686.04


  8%|▊         | 7591/100000 [2:32:17<30:12:12,  1.18s/it]

74690 episode score is 684.46


  8%|▊         | 7592/100000 [2:32:18<30:20:15,  1.18s/it]

74701 episode score is 687.66


  8%|▊         | 7593/100000 [2:32:20<30:15:32,  1.18s/it]

74712 episode score is 667.95


  8%|▊         | 7594/100000 [2:32:21<29:41:21,  1.16s/it]

74722 episode score is 706.11


  8%|▊         | 7595/100000 [2:32:22<29:56:28,  1.17s/it]

74733 episode score is 688.69


  8%|▊         | 7596/100000 [2:32:23<30:12:06,  1.18s/it]

74744 episode score is 689.91


  8%|▊         | 7597/100000 [2:32:24<29:39:47,  1.16s/it]

74754 episode score is 706.70


  8%|▊         | 7598/100000 [2:32:25<29:23:13,  1.14s/it]

74764 episode score is 715.48


  8%|▊         | 7599/100000 [2:32:27<29:16:51,  1.14s/it]

74774 episode score is 715.05
74785 episode score is 658.40


  8%|▊         | 7600/100000 [2:32:29<36:50:25,  1.44s/it]

Iteration 7600: Average test reward: 675.78


  8%|▊         | 7601/100000 [2:32:30<34:17:34,  1.34s/it]

74796 episode score is 619.77


  8%|▊         | 7602/100000 [2:32:31<32:49:54,  1.28s/it]

74807 episode score is 654.51


  8%|▊         | 7603/100000 [2:32:32<32:10:19,  1.25s/it]

74818 episode score is 678.48


  8%|▊         | 7604/100000 [2:32:33<31:41:28,  1.23s/it]

74829 episode score is 679.86


  8%|▊         | 7605/100000 [2:32:34<30:58:41,  1.21s/it]

74839 episode score is 730.58


  8%|▊         | 7606/100000 [2:32:36<30:24:10,  1.18s/it]

74849 episode score is 725.80


  8%|▊         | 7607/100000 [2:32:37<30:02:08,  1.17s/it]

74859 episode score is 720.30


  8%|▊         | 7608/100000 [2:32:38<29:55:22,  1.17s/it]

74869 episode score is 740.10


  8%|▊         | 7609/100000 [2:32:39<29:31:49,  1.15s/it]

74879 episode score is 709.15


  8%|▊         | 7610/100000 [2:32:40<30:06:32,  1.17s/it]

74890 episode score is 710.32


  8%|▊         | 7611/100000 [2:32:41<29:38:34,  1.16s/it]

74900 episode score is 695.51


  8%|▊         | 7612/100000 [2:32:42<29:41:18,  1.16s/it]

74911 episode score is 661.27


  8%|▊         | 7613/100000 [2:32:44<29:21:49,  1.14s/it]

74922 episode score is 622.02


  8%|▊         | 7614/100000 [2:32:45<29:17:02,  1.14s/it]

74933 episode score is 640.90


  8%|▊         | 7615/100000 [2:32:46<29:17:46,  1.14s/it]

74944 episode score is 648.23


  8%|▊         | 7616/100000 [2:32:47<29:03:45,  1.13s/it]

74954 episode score is 700.35


  8%|▊         | 7617/100000 [2:32:48<29:47:15,  1.16s/it]

74965 episode score is 700.75


  8%|▊         | 7618/100000 [2:32:49<29:23:44,  1.15s/it]

74975 episode score is 700.52


  8%|▊         | 7619/100000 [2:32:50<29:28:48,  1.15s/it]

74985 episode score is 740.24


  8%|▊         | 7620/100000 [2:32:52<29:32:43,  1.15s/it]

74995 episode score is 741.59


  8%|▊         | 7621/100000 [2:32:53<29:43:54,  1.16s/it]

75005 episode score is 750.13


  8%|▊         | 7622/100000 [2:32:54<30:03:06,  1.17s/it]

75015 episode score is 779.40


  8%|▊         | 7623/100000 [2:32:55<29:52:43,  1.16s/it]

75025 episode score is 726.92


  8%|▊         | 7624/100000 [2:32:56<30:09:50,  1.18s/it]

75035 episode score is 762.97


  8%|▊         | 7625/100000 [2:32:57<29:36:31,  1.15s/it]

75045 episode score is 702.86


  8%|▊         | 7626/100000 [2:32:59<29:18:26,  1.14s/it]

75055 episode score is 709.55


  8%|▊         | 7627/100000 [2:33:00<29:32:57,  1.15s/it]

75066 episode score is 670.97


  8%|▊         | 7628/100000 [2:33:01<29:42:51,  1.16s/it]

75077 episode score is 676.39


  8%|▊         | 7629/100000 [2:33:02<29:49:52,  1.16s/it]

75088 episode score is 667.45


  8%|▊         | 7630/100000 [2:33:03<29:32:11,  1.15s/it]

75098 episode score is 711.87


  8%|▊         | 7631/100000 [2:33:04<29:12:16,  1.14s/it]

75107 episode score is 801.02


  8%|▊         | 7632/100000 [2:33:05<29:30:30,  1.15s/it]

75117 episode score is 758.18


  8%|▊         | 7633/100000 [2:33:07<30:05:26,  1.17s/it]

75127 episode score is 784.18


  8%|▊         | 7634/100000 [2:33:08<30:06:22,  1.17s/it]

75137 episode score is 750.57


  8%|▊         | 7635/100000 [2:33:09<29:42:50,  1.16s/it]

75147 episode score is 724.13


  8%|▊         | 7636/100000 [2:33:10<29:21:12,  1.14s/it]

75156 episode score is 796.52


  8%|▊         | 7637/100000 [2:33:11<29:44:30,  1.16s/it]

75165 episode score is 789.25


  8%|▊         | 7638/100000 [2:33:12<30:01:32,  1.17s/it]

75175 episode score is 767.86


  8%|▊         | 7639/100000 [2:33:14<29:29:28,  1.15s/it]

75185 episode score is 701.68


  8%|▊         | 7640/100000 [2:33:15<29:33:00,  1.15s/it]

75195 episode score is 744.50


  8%|▊         | 7641/100000 [2:33:16<29:48:22,  1.16s/it]

75205 episode score is 748.31


  8%|▊         | 7642/100000 [2:33:17<29:41:50,  1.16s/it]

75215 episode score is 720.53


  8%|▊         | 7643/100000 [2:33:18<29:13:57,  1.14s/it]

75224 episode score is 789.66


  8%|▊         | 7644/100000 [2:33:19<29:12:10,  1.14s/it]

75234 episode score is 720.78


  8%|▊         | 7645/100000 [2:33:20<29:21:14,  1.14s/it]

75244 episode score is 743.93


  8%|▊         | 7646/100000 [2:33:22<29:41:08,  1.16s/it]

75255 episode score is 687.62


  8%|▊         | 7647/100000 [2:33:23<30:07:33,  1.17s/it]

75265 episode score is 785.76


  8%|▊         | 7648/100000 [2:33:24<30:04:25,  1.17s/it]

75275 episode score is 754.11


  8%|▊         | 7649/100000 [2:33:25<30:20:30,  1.18s/it]

75285 episode score is 776.34


  8%|▊         | 7650/100000 [2:33:26<30:31:48,  1.19s/it]

75295 episode score is 777.39


  8%|▊         | 7651/100000 [2:33:28<30:06:17,  1.17s/it]

75304 episode score is 817.17


  8%|▊         | 7652/100000 [2:33:29<29:54:38,  1.17s/it]

75313 episode score is 820.08


  8%|▊         | 7653/100000 [2:33:30<30:13:06,  1.18s/it]

75323 episode score is 766.46


  8%|▊         | 7654/100000 [2:33:31<29:44:00,  1.16s/it]

75332 episode score is 797.07


  8%|▊         | 7655/100000 [2:33:32<30:25:39,  1.19s/it]

75342 episode score is 789.83


  8%|▊         | 7656/100000 [2:33:33<30:08:15,  1.17s/it]

75351 episode score is 826.39


  8%|▊         | 7657/100000 [2:33:35<30:15:15,  1.18s/it]

75361 episode score is 761.81


  8%|▊         | 7658/100000 [2:33:36<29:39:11,  1.16s/it]

75370 episode score is 786.54


  8%|▊         | 7659/100000 [2:33:37<29:20:14,  1.14s/it]

75380 episode score is 713.37


  8%|▊         | 7660/100000 [2:33:38<29:04:41,  1.13s/it]

75390 episode score is 712.75


  8%|▊         | 7661/100000 [2:33:39<29:36:37,  1.15s/it]

75400 episode score is 777.18


  8%|▊         | 7662/100000 [2:33:40<29:28:05,  1.15s/it]

75410 episode score is 729.44


  8%|▊         | 7663/100000 [2:33:41<29:17:52,  1.14s/it]

75420 episode score is 715.11


  8%|▊         | 7664/100000 [2:33:43<29:37:41,  1.16s/it]

75431 episode score is 682.65


  8%|▊         | 7665/100000 [2:33:44<29:48:08,  1.16s/it]

75441 episode score is 755.51


  8%|▊         | 7666/100000 [2:33:45<30:05:58,  1.17s/it]

75452 episode score is 700.28


  8%|▊         | 7667/100000 [2:33:46<29:52:10,  1.16s/it]

75463 episode score is 651.53


  8%|▊         | 7668/100000 [2:33:47<30:13:00,  1.18s/it]

75474 episode score is 684.77


  8%|▊         | 7669/100000 [2:33:48<29:51:28,  1.16s/it]

75485 episode score is 637.43


  8%|▊         | 7670/100000 [2:33:50<29:30:48,  1.15s/it]

75496 episode score is 626.45


  8%|▊         | 7671/100000 [2:33:51<29:42:17,  1.16s/it]

75507 episode score is 674.15


  8%|▊         | 7672/100000 [2:33:52<29:16:57,  1.14s/it]

75518 episode score is 617.55


  8%|▊         | 7673/100000 [2:33:53<29:41:54,  1.16s/it]

75529 episode score is 686.78


  8%|▊         | 7674/100000 [2:33:54<29:11:39,  1.14s/it]

75539 episode score is 695.16


  8%|▊         | 7675/100000 [2:33:55<29:26:01,  1.15s/it]

75550 episode score is 673.52


  8%|▊         | 7676/100000 [2:33:57<29:39:50,  1.16s/it]

75561 episode score is 671.75


  8%|▊         | 7677/100000 [2:33:58<29:43:06,  1.16s/it]

75571 episode score is 747.28


  8%|▊         | 7678/100000 [2:33:59<29:52:10,  1.16s/it]

75581 episode score is 757.27


  8%|▊         | 7679/100000 [2:34:00<30:07:46,  1.17s/it]

75591 episode score is 771.67


  8%|▊         | 7680/100000 [2:34:01<30:24:02,  1.19s/it]

75601 episode score is 772.68


  8%|▊         | 7681/100000 [2:34:02<30:31:58,  1.19s/it]

75611 episode score is 754.61


  8%|▊         | 7682/100000 [2:34:04<30:35:34,  1.19s/it]

75621 episode score is 764.20


  8%|▊         | 7683/100000 [2:34:05<30:13:08,  1.18s/it]

75631 episode score is 739.87


  8%|▊         | 7684/100000 [2:34:06<30:13:07,  1.18s/it]

75641 episode score is 760.32


  8%|▊         | 7685/100000 [2:34:07<30:15:49,  1.18s/it]

75651 episode score is 757.70


  8%|▊         | 7686/100000 [2:34:08<30:06:28,  1.17s/it]

75661 episode score is 746.66


  8%|▊         | 7687/100000 [2:34:10<29:59:55,  1.17s/it]

75671 episode score is 748.67


  8%|▊         | 7688/100000 [2:34:11<29:42:30,  1.16s/it]

75681 episode score is 727.38


  8%|▊         | 7689/100000 [2:34:12<29:46:59,  1.16s/it]

75691 episode score is 748.82


  8%|▊         | 7690/100000 [2:34:13<29:29:22,  1.15s/it]

75701 episode score is 717.00


  8%|▊         | 7691/100000 [2:34:14<29:20:20,  1.14s/it]

75711 episode score is 723.94


  8%|▊         | 7692/100000 [2:34:15<29:08:11,  1.14s/it]

75721 episode score is 717.67


  8%|▊         | 7693/100000 [2:34:16<28:55:07,  1.13s/it]

75731 episode score is 704.06


  8%|▊         | 7694/100000 [2:34:17<29:25:08,  1.15s/it]

75741 episode score is 745.05


  8%|▊         | 7695/100000 [2:34:19<29:25:16,  1.15s/it]

75751 episode score is 730.73


  8%|▊         | 7696/100000 [2:34:20<29:33:08,  1.15s/it]

75761 episode score is 743.64


  8%|▊         | 7697/100000 [2:34:21<29:34:16,  1.15s/it]

75771 episode score is 747.23


  8%|▊         | 7698/100000 [2:34:22<29:32:28,  1.15s/it]

75781 episode score is 738.12


  8%|▊         | 7699/100000 [2:34:23<30:01:10,  1.17s/it]

75792 episode score is 693.35
75803 episode score is 639.49


  8%|▊         | 7700/100000 [2:34:25<36:51:53,  1.44s/it]

Iteration 7700: Average test reward: 666.62


  8%|▊         | 7701/100000 [2:34:27<34:30:08,  1.35s/it]

75813 episode score is 723.02


  8%|▊         | 7702/100000 [2:34:28<32:44:44,  1.28s/it]

75823 episode score is 714.05


  8%|▊         | 7703/100000 [2:34:29<32:03:09,  1.25s/it]

75834 episode score is 688.34


  8%|▊         | 7704/100000 [2:34:30<30:59:24,  1.21s/it]

75844 episode score is 712.04


  8%|▊         | 7705/100000 [2:34:31<30:28:22,  1.19s/it]

75854 episode score is 734.11


  8%|▊         | 7706/100000 [2:34:32<30:21:31,  1.18s/it]

75865 episode score is 664.47


  8%|▊         | 7707/100000 [2:34:33<30:26:53,  1.19s/it]

75876 episode score is 683.05


  8%|▊         | 7708/100000 [2:34:35<29:49:47,  1.16s/it]

75886 episode score is 710.04


  8%|▊         | 7709/100000 [2:34:36<29:46:06,  1.16s/it]

75897 episode score is 659.90


  8%|▊         | 7710/100000 [2:34:37<29:21:03,  1.14s/it]

75908 episode score is 620.48


  8%|▊         | 7711/100000 [2:34:38<29:22:29,  1.15s/it]

75919 episode score is 648.92


  8%|▊         | 7712/100000 [2:34:39<29:20:13,  1.14s/it]

75930 episode score is 648.31


  8%|▊         | 7713/100000 [2:34:40<29:15:59,  1.14s/it]

75941 episode score is 646.07


  8%|▊         | 7714/100000 [2:34:41<29:29:03,  1.15s/it]

75952 episode score is 674.12


  8%|▊         | 7715/100000 [2:34:43<30:02:29,  1.17s/it]

75962 episode score is 713.70


  8%|▊         | 7716/100000 [2:34:44<29:39:56,  1.16s/it]

75972 episode score is 712.36


  8%|▊         | 7717/100000 [2:34:45<29:57:23,  1.17s/it]

75983 episode score is 675.25


  8%|▊         | 7718/100000 [2:34:46<29:46:24,  1.16s/it]

75994 episode score is 654.81


  8%|▊         | 7719/100000 [2:34:47<29:42:06,  1.16s/it]

76005 episode score is 645.21


  8%|▊         | 7720/100000 [2:34:48<30:03:32,  1.17s/it]

76016 episode score is 695.13


  8%|▊         | 7721/100000 [2:34:50<30:15:58,  1.18s/it]

76027 episode score is 685.24


  8%|▊         | 7722/100000 [2:34:51<29:51:09,  1.16s/it]

76038 episode score is 628.50


  8%|▊         | 7723/100000 [2:34:52<29:34:08,  1.15s/it]

76049 episode score is 637.32


  8%|▊         | 7724/100000 [2:34:53<29:23:47,  1.15s/it]

76060 episode score is 641.51


  8%|▊         | 7725/100000 [2:34:54<29:24:11,  1.15s/it]

76071 episode score is 655.02


  8%|▊         | 7726/100000 [2:34:55<29:51:52,  1.17s/it]

76083 episode score is 606.14


  8%|▊         | 7727/100000 [2:34:57<29:50:45,  1.16s/it]

76094 episode score is 666.16


  8%|▊         | 7728/100000 [2:34:58<30:09:13,  1.18s/it]

76105 episode score is 694.56


  8%|▊         | 7729/100000 [2:34:59<30:15:00,  1.18s/it]

76116 episode score is 684.54


  8%|▊         | 7730/100000 [2:35:00<30:00:54,  1.17s/it]

76126 episode score is 738.35


  8%|▊         | 7731/100000 [2:35:01<29:47:07,  1.16s/it]

76136 episode score is 732.58


  8%|▊         | 7732/100000 [2:35:02<30:04:30,  1.17s/it]

76146 episode score is 758.46


  8%|▊         | 7733/100000 [2:35:04<30:02:06,  1.17s/it]

76156 episode score is 745.51


  8%|▊         | 7734/100000 [2:35:05<30:01:03,  1.17s/it]

76166 episode score is 744.71


  8%|▊         | 7735/100000 [2:35:06<30:04:32,  1.17s/it]

76176 episode score is 755.98


  8%|▊         | 7736/100000 [2:35:07<29:47:55,  1.16s/it]

76186 episode score is 724.04


  8%|▊         | 7737/100000 [2:35:08<29:49:09,  1.16s/it]

76196 episode score is 743.82


  8%|▊         | 7738/100000 [2:35:09<29:46:42,  1.16s/it]

76206 episode score is 735.78


  8%|▊         | 7739/100000 [2:35:11<29:59:36,  1.17s/it]

76216 episode score is 749.61


  8%|▊         | 7740/100000 [2:35:12<29:34:48,  1.15s/it]

76226 episode score is 703.18


  8%|▊         | 7741/100000 [2:35:13<29:37:30,  1.16s/it]

76236 episode score is 744.71


  8%|▊         | 7742/100000 [2:35:14<29:49:32,  1.16s/it]

76246 episode score is 752.15


  8%|▊         | 7743/100000 [2:35:15<29:39:28,  1.16s/it]

76256 episode score is 729.34


  8%|▊         | 7744/100000 [2:35:16<29:58:30,  1.17s/it]

76266 episode score is 765.80


  8%|▊         | 7745/100000 [2:35:18<30:06:22,  1.17s/it]

76276 episode score is 749.00


  8%|▊         | 7746/100000 [2:35:19<30:10:19,  1.18s/it]

76286 episode score is 746.73


  8%|▊         | 7747/100000 [2:35:20<29:47:33,  1.16s/it]

76296 episode score is 720.58


  8%|▊         | 7748/100000 [2:35:21<29:31:03,  1.15s/it]

76306 episode score is 718.85


  8%|▊         | 7749/100000 [2:35:22<29:15:13,  1.14s/it]

76316 episode score is 715.49


  8%|▊         | 7750/100000 [2:35:23<29:23:25,  1.15s/it]

76326 episode score is 739.00


  8%|▊         | 7751/100000 [2:35:24<29:35:02,  1.15s/it]

76336 episode score is 749.77


  8%|▊         | 7752/100000 [2:35:26<29:35:10,  1.15s/it]

76346 episode score is 742.09


  8%|▊         | 7753/100000 [2:35:27<29:25:28,  1.15s/it]

76356 episode score is 728.94


  8%|▊         | 7754/100000 [2:35:28<30:21:47,  1.18s/it]

76366 episode score is 752.64


  8%|▊         | 7755/100000 [2:35:29<30:15:21,  1.18s/it]

76376 episode score is 747.71


  8%|▊         | 7756/100000 [2:35:30<30:14:11,  1.18s/it]

76386 episode score is 752.88


  8%|▊         | 7757/100000 [2:35:32<30:06:54,  1.18s/it]

76396 episode score is 745.04


  8%|▊         | 7758/100000 [2:35:33<30:08:46,  1.18s/it]

76406 episode score is 749.29


  8%|▊         | 7759/100000 [2:35:34<30:14:01,  1.18s/it]

76416 episode score is 749.10


  8%|▊         | 7760/100000 [2:35:35<30:19:18,  1.18s/it]

76426 episode score is 753.25


  8%|▊         | 7761/100000 [2:35:36<30:10:03,  1.18s/it]

76436 episode score is 736.99


  8%|▊         | 7762/100000 [2:35:37<30:03:22,  1.17s/it]

76446 episode score is 733.63


  8%|▊         | 7763/100000 [2:35:39<30:05:28,  1.17s/it]

76456 episode score is 747.15


  8%|▊         | 7764/100000 [2:35:40<30:01:56,  1.17s/it]

76466 episode score is 743.73


  8%|▊         | 7765/100000 [2:35:41<29:59:58,  1.17s/it]

76476 episode score is 745.63


  8%|▊         | 7766/100000 [2:35:42<30:07:18,  1.18s/it]

76486 episode score is 757.05


  8%|▊         | 7767/100000 [2:35:43<30:09:36,  1.18s/it]

76496 episode score is 751.54


  8%|▊         | 7768/100000 [2:35:44<29:51:09,  1.17s/it]

76506 episode score is 726.77


  8%|▊         | 7769/100000 [2:35:46<29:51:46,  1.17s/it]

76516 episode score is 741.98


  8%|▊         | 7770/100000 [2:35:47<29:47:02,  1.16s/it]

76526 episode score is 737.41


  8%|▊         | 7771/100000 [2:35:48<29:49:17,  1.16s/it]

76536 episode score is 737.35


  8%|▊         | 7772/100000 [2:35:49<29:39:00,  1.16s/it]

76546 episode score is 723.21


  8%|▊         | 7773/100000 [2:35:50<29:39:09,  1.16s/it]

76556 episode score is 739.90


  8%|▊         | 7774/100000 [2:35:51<29:27:48,  1.15s/it]

76566 episode score is 727.53


  8%|▊         | 7775/100000 [2:35:53<29:23:51,  1.15s/it]

76576 episode score is 730.81


  8%|▊         | 7776/100000 [2:35:54<29:20:49,  1.15s/it]

76586 episode score is 736.22


  8%|▊         | 7777/100000 [2:35:55<29:33:15,  1.15s/it]

76596 episode score is 749.80


  8%|▊         | 7778/100000 [2:35:56<29:30:16,  1.15s/it]

76606 episode score is 731.74


  8%|▊         | 7779/100000 [2:35:57<29:32:49,  1.15s/it]

76616 episode score is 743.24


  8%|▊         | 7780/100000 [2:35:58<29:29:22,  1.15s/it]

76626 episode score is 733.35


  8%|▊         | 7781/100000 [2:35:59<29:29:13,  1.15s/it]

76636 episode score is 739.12


  8%|▊         | 7782/100000 [2:36:01<29:29:03,  1.15s/it]

76646 episode score is 730.19


  8%|▊         | 7783/100000 [2:36:02<29:12:03,  1.14s/it]

76656 episode score is 715.01


  8%|▊         | 7784/100000 [2:36:03<29:27:25,  1.15s/it]

76667 episode score is 673.98


  8%|▊         | 7785/100000 [2:36:04<29:28:29,  1.15s/it]

76678 episode score is 650.31


  8%|▊         | 7786/100000 [2:36:05<29:28:04,  1.15s/it]

76688 episode score is 735.57


  8%|▊         | 7787/100000 [2:36:06<29:04:02,  1.13s/it]

76698 episode score is 700.64


  8%|▊         | 7788/100000 [2:36:07<28:48:34,  1.12s/it]

76708 episode score is 704.50


  8%|▊         | 7789/100000 [2:36:08<28:36:55,  1.12s/it]

76718 episode score is 700.82


  8%|▊         | 7790/100000 [2:36:10<28:43:28,  1.12s/it]

76728 episode score is 727.01


  8%|▊         | 7791/100000 [2:36:11<28:55:24,  1.13s/it]

76738 episode score is 737.35


  8%|▊         | 7792/100000 [2:36:12<28:52:18,  1.13s/it]

76748 episode score is 717.11


  8%|▊         | 7793/100000 [2:36:13<28:50:28,  1.13s/it]

76758 episode score is 719.34


  8%|▊         | 7794/100000 [2:36:14<28:46:43,  1.12s/it]

76768 episode score is 719.24


  8%|▊         | 7795/100000 [2:36:15<28:35:13,  1.12s/it]

76778 episode score is 697.18


  8%|▊         | 7796/100000 [2:36:16<29:11:03,  1.14s/it]

76789 episode score is 690.61


  8%|▊         | 7797/100000 [2:36:18<29:02:58,  1.13s/it]

76799 episode score is 698.07


  8%|▊         | 7798/100000 [2:36:19<29:31:15,  1.15s/it]

76810 episode score is 680.45


  8%|▊         | 7799/100000 [2:36:20<29:16:34,  1.14s/it]

76820 episode score is 716.13
76830 episode score is 726.97


  8%|▊         | 7800/100000 [2:36:22<36:57:43,  1.44s/it]

Iteration 7800: Average test reward: 723.46


  8%|▊         | 7801/100000 [2:36:23<34:20:23,  1.34s/it]

76840 episode score is 704.33


  8%|▊         | 7802/100000 [2:36:24<32:37:22,  1.27s/it]

76850 episode score is 717.89


  8%|▊         | 7803/100000 [2:36:25<31:43:31,  1.24s/it]

76860 episode score is 740.22


  8%|▊         | 7804/100000 [2:36:27<31:00:28,  1.21s/it]

76870 episode score is 737.98


  8%|▊         | 7805/100000 [2:36:28<30:34:39,  1.19s/it]

76880 episode score is 741.09


  8%|▊         | 7806/100000 [2:36:29<30:20:06,  1.18s/it]

76890 episode score is 737.19


  8%|▊         | 7807/100000 [2:36:30<30:11:24,  1.18s/it]

76900 episode score is 752.52


  8%|▊         | 7808/100000 [2:36:31<30:00:17,  1.17s/it]

76910 episode score is 743.30


  8%|▊         | 7809/100000 [2:36:32<29:56:43,  1.17s/it]

76920 episode score is 743.58


  8%|▊         | 7810/100000 [2:36:33<29:29:45,  1.15s/it]

76930 episode score is 717.94


  8%|▊         | 7811/100000 [2:36:35<29:30:21,  1.15s/it]

76940 episode score is 745.69


  8%|▊         | 7812/100000 [2:36:36<29:07:15,  1.14s/it]

76950 episode score is 706.06


  8%|▊         | 7813/100000 [2:36:37<28:52:07,  1.13s/it]

76960 episode score is 703.88


  8%|▊         | 7814/100000 [2:36:38<29:25:56,  1.15s/it]

76971 episode score is 699.08


  8%|▊         | 7815/100000 [2:36:39<29:40:34,  1.16s/it]

76982 episode score is 672.57


  8%|▊         | 7816/100000 [2:36:40<29:24:56,  1.15s/it]

76993 episode score is 636.48


  8%|▊         | 7817/100000 [2:36:41<29:26:18,  1.15s/it]

77004 episode score is 658.04


  8%|▊         | 7818/100000 [2:36:43<29:21:18,  1.15s/it]

77015 episode score is 651.31


  8%|▊         | 7819/100000 [2:36:44<29:28:58,  1.15s/it]

77026 episode score is 667.95


  8%|▊         | 7820/100000 [2:36:45<29:40:02,  1.16s/it]

77037 episode score is 680.13


  8%|▊         | 7821/100000 [2:36:46<29:14:26,  1.14s/it]

77047 episode score is 708.38


  8%|▊         | 7822/100000 [2:36:47<29:49:20,  1.16s/it]

77058 episode score is 697.91


  8%|▊         | 7823/100000 [2:36:48<29:53:45,  1.17s/it]

77069 episode score is 667.79


  8%|▊         | 7824/100000 [2:36:50<29:32:46,  1.15s/it]

77079 episode score is 708.09


  8%|▊         | 7825/100000 [2:36:51<29:07:05,  1.14s/it]

77089 episode score is 703.42


  8%|▊         | 7826/100000 [2:36:52<28:46:53,  1.12s/it]

77099 episode score is 699.98


  8%|▊         | 7827/100000 [2:36:53<29:23:44,  1.15s/it]

77110 episode score is 693.89


  8%|▊         | 7828/100000 [2:36:54<29:46:08,  1.16s/it]

77121 episode score is 695.78


  8%|▊         | 7829/100000 [2:36:55<29:29:35,  1.15s/it]

77131 episode score is 717.08


  8%|▊         | 7830/100000 [2:36:56<29:16:01,  1.14s/it]

77141 episode score is 719.16


  8%|▊         | 7831/100000 [2:36:58<29:22:13,  1.15s/it]

77152 episode score is 669.02


  8%|▊         | 7832/100000 [2:36:59<28:59:04,  1.13s/it]

77162 episode score is 698.81


  8%|▊         | 7833/100000 [2:37:00<28:54:40,  1.13s/it]

77172 episode score is 723.55


  8%|▊         | 7834/100000 [2:37:01<28:52:56,  1.13s/it]

77182 episode score is 722.19


  8%|▊         | 7835/100000 [2:37:02<28:50:11,  1.13s/it]

77192 episode score is 716.95


  8%|▊         | 7836/100000 [2:37:03<28:40:53,  1.12s/it]

77202 episode score is 714.10


  8%|▊         | 7837/100000 [2:37:04<28:39:34,  1.12s/it]

77212 episode score is 727.60


  8%|▊         | 7838/100000 [2:37:05<28:29:04,  1.11s/it]

77222 episode score is 715.05


  8%|▊         | 7839/100000 [2:37:06<28:11:31,  1.10s/it]

77232 episode score is 698.23


  8%|▊         | 7840/100000 [2:37:07<28:14:51,  1.10s/it]

77242 episode score is 722.35


  8%|▊         | 7841/100000 [2:37:09<28:50:09,  1.13s/it]

77253 episode score is 691.94


  8%|▊         | 7842/100000 [2:37:10<28:32:22,  1.11s/it]

77263 episode score is 705.88


  8%|▊         | 7843/100000 [2:37:11<29:43:19,  1.16s/it]

77274 episode score is 690.31


  8%|▊         | 7844/100000 [2:37:12<29:25:21,  1.15s/it]

77284 episode score is 726.57


  8%|▊         | 7845/100000 [2:37:13<29:09:26,  1.14s/it]

77294 episode score is 707.55


  8%|▊         | 7846/100000 [2:37:14<28:54:03,  1.13s/it]

77304 episode score is 718.25


  8%|▊         | 7847/100000 [2:37:15<28:40:36,  1.12s/it]

77314 episode score is 710.16


  8%|▊         | 7848/100000 [2:37:17<28:39:54,  1.12s/it]

77324 episode score is 726.80


  8%|▊         | 7849/100000 [2:37:18<28:55:27,  1.13s/it]

77334 episode score is 741.11


  8%|▊         | 7850/100000 [2:37:19<28:58:57,  1.13s/it]

77344 episode score is 726.20


  8%|▊         | 7851/100000 [2:37:20<28:38:30,  1.12s/it]

77354 episode score is 704.45


  8%|▊         | 7852/100000 [2:37:21<28:23:30,  1.11s/it]

77364 episode score is 697.56


  8%|▊         | 7853/100000 [2:37:22<28:29:25,  1.11s/it]

77375 episode score is 651.28


  8%|▊         | 7854/100000 [2:37:23<28:30:56,  1.11s/it]

77386 episode score is 641.29


  8%|▊         | 7855/100000 [2:37:24<28:37:04,  1.12s/it]

77397 episode score is 653.73


  8%|▊         | 7856/100000 [2:37:26<28:37:11,  1.12s/it]

77408 episode score is 645.21


  8%|▊         | 7857/100000 [2:37:27<28:46:48,  1.12s/it]

77419 episode score is 664.16


  8%|▊         | 7858/100000 [2:37:28<28:34:19,  1.12s/it]

77429 episode score is 709.67


  8%|▊         | 7859/100000 [2:37:29<28:24:49,  1.11s/it]

77439 episode score is 701.28


  8%|▊         | 7860/100000 [2:37:30<28:42:10,  1.12s/it]

77449 episode score is 732.66


  8%|▊         | 7861/100000 [2:37:31<28:59:30,  1.13s/it]

77459 episode score is 747.88


  8%|▊         | 7862/100000 [2:37:32<29:07:38,  1.14s/it]

77469 episode score is 742.48


  8%|▊         | 7863/100000 [2:37:33<29:05:46,  1.14s/it]

77479 episode score is 737.00


  8%|▊         | 7864/100000 [2:37:35<29:30:53,  1.15s/it]

77490 episode score is 697.54


  8%|▊         | 7865/100000 [2:37:36<28:59:00,  1.13s/it]

77500 episode score is 702.89


  8%|▊         | 7866/100000 [2:37:37<29:08:50,  1.14s/it]

77511 episode score is 672.84


  8%|▊         | 7867/100000 [2:37:38<28:55:54,  1.13s/it]

77521 episode score is 717.89


  8%|▊         | 7868/100000 [2:37:39<28:41:04,  1.12s/it]

77531 episode score is 704.38


  8%|▊         | 7869/100000 [2:37:40<28:30:10,  1.11s/it]

77541 episode score is 710.29


  8%|▊         | 7870/100000 [2:37:41<29:00:02,  1.13s/it]

77552 episode score is 682.16


  8%|▊         | 7871/100000 [2:37:42<28:50:31,  1.13s/it]

77562 episode score is 719.92


  8%|▊         | 7872/100000 [2:37:44<29:00:02,  1.13s/it]

77572 episode score is 742.28


  8%|▊         | 7873/100000 [2:37:45<29:01:13,  1.13s/it]

77582 episode score is 733.21


  8%|▊         | 7874/100000 [2:37:46<28:56:05,  1.13s/it]

77592 episode score is 732.87


  8%|▊         | 7875/100000 [2:37:47<28:49:21,  1.13s/it]

77602 episode score is 714.32


  8%|▊         | 7876/100000 [2:37:48<28:46:57,  1.12s/it]

77612 episode score is 717.28


  8%|▊         | 7877/100000 [2:37:49<28:36:43,  1.12s/it]

77622 episode score is 709.56


  8%|▊         | 7878/100000 [2:37:50<28:37:26,  1.12s/it]

77632 episode score is 724.26


  8%|▊         | 7879/100000 [2:37:51<28:28:02,  1.11s/it]

77642 episode score is 706.35


  8%|▊         | 7880/100000 [2:37:53<28:39:41,  1.12s/it]

77652 episode score is 736.63


  8%|▊         | 7881/100000 [2:37:54<28:33:54,  1.12s/it]

77662 episode score is 720.03


  8%|▊         | 7882/100000 [2:37:55<28:54:08,  1.13s/it]

77672 episode score is 752.69


  8%|▊         | 7883/100000 [2:37:56<28:51:33,  1.13s/it]

77682 episode score is 731.74


  8%|▊         | 7884/100000 [2:37:57<29:03:20,  1.14s/it]

77692 episode score is 748.97


  8%|▊         | 7885/100000 [2:37:58<28:49:54,  1.13s/it]

77702 episode score is 720.67


  8%|▊         | 7886/100000 [2:37:59<28:43:32,  1.12s/it]

77712 episode score is 719.51


  8%|▊         | 7887/100000 [2:38:00<28:34:17,  1.12s/it]

77722 episode score is 714.99


  8%|▊         | 7888/100000 [2:38:02<29:02:53,  1.14s/it]

77733 episode score is 690.90


  8%|▊         | 7889/100000 [2:38:03<29:03:38,  1.14s/it]

77743 episode score is 718.24


  8%|▊         | 7890/100000 [2:38:04<28:48:42,  1.13s/it]

77753 episode score is 708.64


  8%|▊         | 7891/100000 [2:38:05<29:09:02,  1.14s/it]

77763 episode score is 750.05


  8%|▊         | 7892/100000 [2:38:06<29:03:27,  1.14s/it]

77773 episode score is 723.07


  8%|▊         | 7893/100000 [2:38:07<28:45:26,  1.12s/it]

77783 episode score is 707.04


  8%|▊         | 7894/100000 [2:38:08<28:47:11,  1.13s/it]

77793 episode score is 729.50


  8%|▊         | 7895/100000 [2:38:10<28:34:27,  1.12s/it]

77803 episode score is 703.87


  8%|▊         | 7896/100000 [2:38:11<28:49:47,  1.13s/it]

77814 episode score is 666.43


  8%|▊         | 7897/100000 [2:38:12<29:10:03,  1.14s/it]

77825 episode score is 684.81


  8%|▊         | 7898/100000 [2:38:13<28:46:20,  1.12s/it]

77835 episode score is 692.42


  8%|▊         | 7899/100000 [2:38:14<28:45:11,  1.12s/it]

77845 episode score is 726.06
77855 episode score is 703.96


  8%|▊         | 7900/100000 [2:38:16<36:02:15,  1.41s/it]

Iteration 7900: Average test reward: 709.13


  8%|▊         | 7901/100000 [2:38:17<34:58:31,  1.37s/it]

77866 episode score is 689.48


  8%|▊         | 7902/100000 [2:38:18<32:57:21,  1.29s/it]

77876 episode score is 711.18


  8%|▊         | 7903/100000 [2:38:20<31:42:48,  1.24s/it]

77886 episode score is 712.20


  8%|▊         | 7904/100000 [2:38:21<30:53:43,  1.21s/it]

77896 episode score is 726.12


  8%|▊         | 7905/100000 [2:38:22<30:15:36,  1.18s/it]

77906 episode score is 726.11


  8%|▊         | 7906/100000 [2:38:23<29:53:28,  1.17s/it]

77916 episode score is 733.11


  8%|▊         | 7907/100000 [2:38:24<29:25:56,  1.15s/it]

77926 episode score is 712.97


  8%|▊         | 7908/100000 [2:38:25<29:25:28,  1.15s/it]

77936 episode score is 740.41


  8%|▊         | 7909/100000 [2:38:26<29:16:30,  1.14s/it]

77946 episode score is 712.61


  8%|▊         | 7910/100000 [2:38:28<29:26:10,  1.15s/it]

77957 episode score is 674.09


  8%|▊         | 7911/100000 [2:38:29<29:27:10,  1.15s/it]

77967 episode score is 748.89


  8%|▊         | 7912/100000 [2:38:30<29:44:21,  1.16s/it]

77977 episode score is 766.66


  8%|▊         | 7913/100000 [2:38:31<29:15:23,  1.14s/it]

77986 episode score is 801.15


  8%|▊         | 7914/100000 [2:38:32<29:42:49,  1.16s/it]

77996 episode score is 770.85


  8%|▊         | 7915/100000 [2:38:33<29:13:25,  1.14s/it]

78005 episode score is 792.67


  8%|▊         | 7916/100000 [2:38:34<29:18:16,  1.15s/it]

78015 episode score is 750.59


  8%|▊         | 7917/100000 [2:38:36<29:33:19,  1.16s/it]

78025 episode score is 765.50


  8%|▊         | 7918/100000 [2:38:37<29:44:03,  1.16s/it]

78035 episode score is 765.86


  8%|▊         | 7919/100000 [2:38:38<29:21:37,  1.15s/it]

78045 episode score is 712.25


  8%|▊         | 7920/100000 [2:38:39<29:09:54,  1.14s/it]

78055 episode score is 719.57


  8%|▊         | 7921/100000 [2:38:40<29:15:32,  1.14s/it]

78065 episode score is 741.49


  8%|▊         | 7922/100000 [2:38:41<29:31:16,  1.15s/it]

78075 episode score is 760.54


  8%|▊         | 7923/100000 [2:38:42<29:07:07,  1.14s/it]

78085 episode score is 704.87


  8%|▊         | 7924/100000 [2:38:44<28:42:58,  1.12s/it]

78095 episode score is 695.57


  8%|▊         | 7925/100000 [2:38:45<28:33:16,  1.12s/it]

78105 episode score is 711.76


  8%|▊         | 7926/100000 [2:38:46<28:37:43,  1.12s/it]

78115 episode score is 702.77


  8%|▊         | 7927/100000 [2:38:47<29:05:08,  1.14s/it]

78125 episode score is 757.32


  8%|▊         | 7928/100000 [2:38:48<29:13:50,  1.14s/it]

78135 episode score is 733.17


  8%|▊         | 7929/100000 [2:38:49<29:29:53,  1.15s/it]

78145 episode score is 756.94


  8%|▊         | 7930/100000 [2:38:50<29:39:19,  1.16s/it]

78155 episode score is 762.66


  8%|▊         | 7931/100000 [2:38:52<29:18:14,  1.15s/it]

78165 episode score is 718.41


  8%|▊         | 7932/100000 [2:38:53<29:14:48,  1.14s/it]

78175 episode score is 740.84


  8%|▊         | 7933/100000 [2:38:54<29:12:32,  1.14s/it]

78185 episode score is 734.57


  8%|▊         | 7934/100000 [2:38:55<28:52:41,  1.13s/it]

78194 episode score is 792.62


  8%|▊         | 7935/100000 [2:38:56<28:33:24,  1.12s/it]

78203 episode score is 792.09


  8%|▊         | 7936/100000 [2:38:57<28:49:44,  1.13s/it]

78213 episode score is 741.76


  8%|▊         | 7937/100000 [2:38:58<29:12:15,  1.14s/it]

78223 episode score is 764.87


  8%|▊         | 7938/100000 [2:39:00<29:48:49,  1.17s/it]

78233 episode score is 784.91


  8%|▊         | 7939/100000 [2:39:01<29:41:54,  1.16s/it]

78243 episode score is 746.83


  8%|▊         | 7940/100000 [2:39:02<30:08:09,  1.18s/it]

78253 episode score is 796.28


  8%|▊         | 7941/100000 [2:39:03<29:39:25,  1.16s/it]

78262 episode score is 803.49


  8%|▊         | 7942/100000 [2:39:04<29:42:16,  1.16s/it]

78272 episode score is 762.74


  8%|▊         | 7943/100000 [2:39:05<29:47:04,  1.16s/it]

78282 episode score is 764.21


  8%|▊         | 7944/100000 [2:39:07<29:47:48,  1.17s/it]

78292 episode score is 755.82


  8%|▊         | 7945/100000 [2:39:08<30:03:45,  1.18s/it]

78302 episode score is 775.31


  8%|▊         | 7946/100000 [2:39:09<29:29:37,  1.15s/it]

78311 episode score is 798.00


  8%|▊         | 7947/100000 [2:39:10<29:09:10,  1.14s/it]

78320 episode score is 796.83


  8%|▊         | 7948/100000 [2:39:11<29:09:23,  1.14s/it]

78329 episode score is 839.85


  8%|▊         | 7949/100000 [2:39:12<29:17:36,  1.15s/it]

78339 episode score is 750.37


  8%|▊         | 7950/100000 [2:39:13<29:33:43,  1.16s/it]

78349 episode score is 770.69


  8%|▊         | 7951/100000 [2:39:15<29:52:48,  1.17s/it]

78358 episode score is 801.35


  8%|▊         | 7952/100000 [2:39:16<30:07:07,  1.18s/it]

78368 episode score is 775.68


  8%|▊         | 7953/100000 [2:39:17<29:58:27,  1.17s/it]

78378 episode score is 745.38


  8%|▊         | 7954/100000 [2:39:18<29:56:47,  1.17s/it]

78388 episode score is 750.21


  8%|▊         | 7955/100000 [2:39:19<29:53:35,  1.17s/it]

78398 episode score is 750.59


  8%|▊         | 7956/100000 [2:39:21<30:04:46,  1.18s/it]

78408 episode score is 776.22


  8%|▊         | 7957/100000 [2:39:22<29:51:07,  1.17s/it]

78418 episode score is 748.69


  8%|▊         | 7958/100000 [2:39:23<29:47:46,  1.17s/it]

78428 episode score is 752.00


  8%|▊         | 7959/100000 [2:39:24<29:27:12,  1.15s/it]

78438 episode score is 731.28


  8%|▊         | 7960/100000 [2:39:25<29:20:17,  1.15s/it]

78448 episode score is 743.51


  8%|▊         | 7961/100000 [2:39:26<29:45:49,  1.16s/it]

78458 episode score is 785.57


  8%|▊         | 7962/100000 [2:39:27<29:34:40,  1.16s/it]

78468 episode score is 740.01


  8%|▊         | 7963/100000 [2:39:29<29:52:45,  1.17s/it]

78478 episode score is 768.44


  8%|▊         | 7964/100000 [2:39:30<29:22:54,  1.15s/it]

78488 episode score is 709.88


  8%|▊         | 7965/100000 [2:39:31<29:12:57,  1.14s/it]

78499 episode score is 651.25


  8%|▊         | 7966/100000 [2:39:32<29:26:40,  1.15s/it]

78510 episode score is 685.39


  8%|▊         | 7967/100000 [2:39:33<28:56:46,  1.13s/it]

78520 episode score is 688.84


  8%|▊         | 7968/100000 [2:39:34<28:33:13,  1.12s/it]

78530 episode score is 701.24


  8%|▊         | 7969/100000 [2:39:35<28:19:07,  1.11s/it]

78540 episode score is 696.06


  8%|▊         | 7970/100000 [2:39:36<28:13:26,  1.10s/it]

78548 episode score is 879.81


  8%|▊         | 7971/100000 [2:39:38<28:12:51,  1.10s/it]

78555 episode score is 1001.86


  8%|▊         | 7972/100000 [2:39:39<28:08:45,  1.10s/it]

78561 episode score is 1135.91


  8%|▊         | 7973/100000 [2:39:40<28:25:37,  1.11s/it]

78568 episode score is 1034.96


  8%|▊         | 7974/100000 [2:39:41<28:50:37,  1.13s/it]

78576 episode score is 950.39


  8%|▊         | 7975/100000 [2:39:42<28:34:13,  1.12s/it]

78583 episode score is 987.73


  8%|▊         | 7976/100000 [2:39:43<28:40:16,  1.12s/it]

78590 episode score is 1008.47


  8%|▊         | 7977/100000 [2:39:44<28:21:55,  1.11s/it]

78596 episode score is 1144.91


  8%|▊         | 7978/100000 [2:39:45<29:04:57,  1.14s/it]

78603 episode score is 1068.73


  8%|▊         | 7979/100000 [2:39:47<29:27:10,  1.15s/it]

78609 episode score is 1218.09


  8%|▊         | 7980/100000 [2:39:48<29:35:53,  1.16s/it]

78614 episode score is 1402.43


  8%|▊         | 7981/100000 [2:39:49<29:12:55,  1.14s/it]

78619 episode score is 1352.48


  8%|▊         | 7982/100000 [2:39:50<29:35:23,  1.16s/it]

78626 episode score is 1057.72


  8%|▊         | 7983/100000 [2:39:51<29:59:33,  1.17s/it]

78633 episode score is 1104.10


  8%|▊         | 7984/100000 [2:39:52<29:21:59,  1.15s/it]

78639 episode score is 1115.97


  8%|▊         | 7985/100000 [2:39:54<29:51:11,  1.17s/it]

78645 episode score is 1259.16


  8%|▊         | 7986/100000 [2:39:55<30:23:06,  1.19s/it]

78651 episode score is 1269.80


  8%|▊         | 7987/100000 [2:39:56<29:52:04,  1.17s/it]

78656 episode score is 1390.03


  8%|▊         | 7988/100000 [2:39:57<29:22:56,  1.15s/it]

78661 episode score is 1334.11


  8%|▊         | 7989/100000 [2:39:58<30:15:45,  1.18s/it]

78667 episode score is 1327.02


  8%|▊         | 7990/100000 [2:39:59<29:39:18,  1.16s/it]

78673 episode score is 1147.41


  8%|▊         | 7991/100000 [2:40:01<29:33:10,  1.16s/it]

78678 episode score is 1398.53


  8%|▊         | 7992/100000 [2:40:02<29:37:06,  1.16s/it]

78684 episode score is 1196.26


  8%|▊         | 7993/100000 [2:40:03<30:49:50,  1.21s/it]

78690 episode score is 1337.27


  8%|▊         | 7994/100000 [2:40:04<31:07:42,  1.22s/it]

78697 episode score is 1140.04


  8%|▊         | 7995/100000 [2:40:06<31:13:02,  1.22s/it]

78704 episode score is 1122.25


  8%|▊         | 7996/100000 [2:40:07<31:26:16,  1.23s/it]

78710 episode score is 1279.20


  8%|▊         | 7997/100000 [2:40:08<31:32:38,  1.23s/it]

78716 episode score is 1286.87


  8%|▊         | 7998/100000 [2:40:09<31:35:17,  1.24s/it]

78721 episode score is 1503.43


  8%|▊         | 7999/100000 [2:40:11<31:35:24,  1.24s/it]

78727 episode score is 1265.30
78732 episode score is 1370.47


  8%|▊         | 8000/100000 [2:40:14<45:05:47,  1.76s/it]

Iteration 8000: Average test reward: 1312.22


  8%|▊         | 8001/100000 [2:40:15<39:55:17,  1.56s/it]

78737 episode score is 1329.78


  8%|▊         | 8002/100000 [2:40:16<37:45:12,  1.48s/it]

78742 episode score is 1538.63


  8%|▊         | 8003/100000 [2:40:17<35:42:54,  1.40s/it]

78747 episode score is 1444.04


  8%|▊         | 8004/100000 [2:40:18<34:21:52,  1.34s/it]

78753 episode score is 1278.81


  8%|▊         | 8005/100000 [2:40:20<33:49:29,  1.32s/it]

78759 episode score is 1297.68


  8%|▊         | 8006/100000 [2:40:21<33:46:47,  1.32s/it]

78765 episode score is 1338.72


  8%|▊         | 8007/100000 [2:40:22<33:03:52,  1.29s/it]

78771 episode score is 1248.46


  8%|▊         | 8008/100000 [2:40:23<32:10:19,  1.26s/it]

78777 episode score is 1204.15


  8%|▊         | 8009/100000 [2:40:25<31:40:10,  1.24s/it]

78782 episode score is 1473.76


  8%|▊         | 8010/100000 [2:40:26<31:40:58,  1.24s/it]

78789 episode score is 1130.90


  8%|▊         | 8011/100000 [2:40:27<31:41:29,  1.24s/it]

78796 episode score is 1124.71


  8%|▊         | 8012/100000 [2:40:28<31:26:42,  1.23s/it]

78802 episode score is 1270.12


  8%|▊         | 8013/100000 [2:40:29<31:22:05,  1.23s/it]

78808 episode score is 1240.95


  8%|▊         | 8014/100000 [2:40:31<31:00:24,  1.21s/it]

78817 episode score is 841.51


  8%|▊         | 8015/100000 [2:40:32<30:47:22,  1.21s/it]

78825 episode score is 943.51


  8%|▊         | 8016/100000 [2:40:33<30:22:58,  1.19s/it]

78835 episode score is 724.45


  8%|▊         | 8017/100000 [2:40:34<30:09:49,  1.18s/it]

78844 episode score is 824.87


  8%|▊         | 8018/100000 [2:40:35<29:40:18,  1.16s/it]

78854 episode score is 717.60


  8%|▊         | 8019/100000 [2:40:36<29:48:38,  1.17s/it]

78864 episode score is 762.80


  8%|▊         | 8020/100000 [2:40:38<29:54:32,  1.17s/it]

78874 episode score is 684.92


  8%|▊         | 8021/100000 [2:40:39<29:46:48,  1.17s/it]

78883 episode score is 846.47


  8%|▊         | 8022/100000 [2:40:40<29:44:31,  1.16s/it]

78893 episode score is 743.56


  8%|▊         | 8023/100000 [2:40:41<29:25:13,  1.15s/it]

78904 episode score is 622.99


  8%|▊         | 8024/100000 [2:40:42<29:14:57,  1.14s/it]

78914 episode score is 715.09


  8%|▊         | 8025/100000 [2:40:43<29:06:35,  1.14s/it]

78925 episode score is 642.45


  8%|▊         | 8026/100000 [2:40:44<28:57:48,  1.13s/it]

78935 episode score is 715.85


  8%|▊         | 8027/100000 [2:40:46<28:48:24,  1.13s/it]

78944 episode score is 788.66


  8%|▊         | 8028/100000 [2:40:47<29:03:43,  1.14s/it]

78953 episode score is 833.91


  8%|▊         | 8029/100000 [2:40:48<29:06:04,  1.14s/it]

78962 episode score is 812.69


  8%|▊         | 8030/100000 [2:40:49<28:43:40,  1.12s/it]

78971 episode score is 775.76


  8%|▊         | 8031/100000 [2:40:50<29:04:22,  1.14s/it]

78980 episode score is 830.23


  8%|▊         | 8032/100000 [2:40:51<28:56:46,  1.13s/it]

78989 episode score is 806.44


  8%|▊         | 8033/100000 [2:40:52<28:39:23,  1.12s/it]

78998 episode score is 769.05


  8%|▊         | 8034/100000 [2:40:53<28:24:46,  1.11s/it]

79008 episode score is 697.61


  8%|▊         | 8035/100000 [2:40:54<28:29:10,  1.12s/it]

79018 episode score is 721.14


  8%|▊         | 8036/100000 [2:40:56<28:45:59,  1.13s/it]

79029 episode score is 670.21


  8%|▊         | 8037/100000 [2:40:57<29:03:11,  1.14s/it]

79040 episode score is 669.73


  8%|▊         | 8038/100000 [2:40:58<29:21:05,  1.15s/it]

79052 episode score is 614.04


  8%|▊         | 8039/100000 [2:40:59<28:49:41,  1.13s/it]

79063 episode score is 617.09


  8%|▊         | 8040/100000 [2:41:00<28:40:15,  1.12s/it]

79074 episode score is 634.71


  8%|▊         | 8041/100000 [2:41:01<28:28:11,  1.11s/it]

79085 episode score is 630.73


  8%|▊         | 8042/100000 [2:41:02<28:56:23,  1.13s/it]

79096 episode score is 683.66


  8%|▊         | 8043/100000 [2:41:04<28:47:03,  1.13s/it]

79107 episode score is 639.97


  8%|▊         | 8044/100000 [2:41:05<29:13:01,  1.14s/it]

79118 episode score is 694.93


  8%|▊         | 8045/100000 [2:41:06<28:59:59,  1.14s/it]

79129 episode score is 643.59


  8%|▊         | 8046/100000 [2:41:07<29:16:14,  1.15s/it]

79140 episode score is 682.97


  8%|▊         | 8047/100000 [2:41:08<29:04:51,  1.14s/it]

79151 episode score is 650.79


  8%|▊         | 8048/100000 [2:41:09<28:53:57,  1.13s/it]

79162 episode score is 641.09


  8%|▊         | 8049/100000 [2:41:10<28:55:20,  1.13s/it]

79173 episode score is 654.71


  8%|▊         | 8050/100000 [2:41:12<29:03:24,  1.14s/it]

79184 episode score is 665.85


  8%|▊         | 8051/100000 [2:41:13<29:02:33,  1.14s/it]

79195 episode score is 658.10


  8%|▊         | 8052/100000 [2:41:14<28:48:57,  1.13s/it]

79206 episode score is 637.79


  8%|▊         | 8053/100000 [2:41:15<28:54:16,  1.13s/it]

79217 episode score is 654.59


  8%|▊         | 8054/100000 [2:41:16<28:51:41,  1.13s/it]

79228 episode score is 653.76


  8%|▊         | 8055/100000 [2:41:17<29:16:08,  1.15s/it]

79239 episode score is 683.30


  8%|▊         | 8056/100000 [2:41:18<29:02:48,  1.14s/it]

79249 episode score is 721.82


  8%|▊         | 8057/100000 [2:41:20<29:12:26,  1.14s/it]

79260 episode score is 672.60


  8%|▊         | 8058/100000 [2:41:21<29:09:54,  1.14s/it]

79271 episode score is 659.87


  8%|▊         | 8059/100000 [2:41:22<29:29:35,  1.15s/it]

79282 episode score is 691.88


  8%|▊         | 8060/100000 [2:41:23<29:14:03,  1.14s/it]

79292 episode score is 723.56


  8%|▊         | 8061/100000 [2:41:24<29:07:21,  1.14s/it]

79302 episode score is 735.83


  8%|▊         | 8062/100000 [2:41:25<29:07:23,  1.14s/it]

79313 episode score is 668.62


  8%|▊         | 8063/100000 [2:41:26<29:18:00,  1.15s/it]

79324 episode score is 677.70


  8%|▊         | 8064/100000 [2:41:28<29:08:38,  1.14s/it]

79334 episode score is 729.92


  8%|▊         | 8065/100000 [2:41:29<29:06:16,  1.14s/it]

79344 episode score is 740.53


  8%|▊         | 8066/100000 [2:41:30<29:09:17,  1.14s/it]

79355 episode score is 673.24


  8%|▊         | 8067/100000 [2:41:31<28:47:55,  1.13s/it]

79365 episode score is 704.57


  8%|▊         | 8068/100000 [2:41:32<29:17:01,  1.15s/it]

79376 episode score is 699.56
79386 episode score is 712.00


  8%|▊         | 8070/100000 [2:41:34<29:32:55,  1.16s/it]

79396 episode score is 748.08


  8%|▊         | 8071/100000 [2:41:36<29:40:08,  1.16s/it]

79407 episode score is 696.64


  8%|▊         | 8072/100000 [2:41:37<29:27:05,  1.15s/it]

79417 episode score is 741.13


  8%|▊         | 8073/100000 [2:41:38<28:58:32,  1.13s/it]

79427 episode score is 709.28


  8%|▊         | 8074/100000 [2:41:39<29:14:31,  1.15s/it]

79437 episode score is 761.65


  8%|▊         | 8075/100000 [2:41:40<29:02:16,  1.14s/it]

79447 episode score is 729.83


  8%|▊         | 8076/100000 [2:41:41<29:07:29,  1.14s/it]

79457 episode score is 750.66


  8%|▊         | 8077/100000 [2:41:42<29:21:57,  1.15s/it]

79467 episode score is 767.71


  8%|▊         | 8078/100000 [2:41:44<29:19:01,  1.15s/it]

79477 episode score is 748.47


  8%|▊         | 8079/100000 [2:41:45<29:23:27,  1.15s/it]

79487 episode score is 756.06


  8%|▊         | 8080/100000 [2:41:46<29:17:48,  1.15s/it]

79497 episode score is 746.69


  8%|▊         | 8081/100000 [2:41:47<29:44:42,  1.16s/it]

79507 episode score is 786.02


  8%|▊         | 8082/100000 [2:41:48<29:19:34,  1.15s/it]

79517 episode score is 714.34


  8%|▊         | 8083/100000 [2:41:49<28:54:59,  1.13s/it]

79527 episode score is 705.90


  8%|▊         | 8084/100000 [2:41:50<28:45:00,  1.13s/it]

79537 episode score is 718.26


  8%|▊         | 8085/100000 [2:41:52<29:03:29,  1.14s/it]

79547 episode score is 756.79


  8%|▊         | 8086/100000 [2:41:53<29:01:04,  1.14s/it]

79557 episode score is 730.78


  8%|▊         | 8087/100000 [2:41:54<29:27:47,  1.15s/it]

79568 episode score is 688.19


  8%|▊         | 8088/100000 [2:41:55<29:00:15,  1.14s/it]

79578 episode score is 706.42


  8%|▊         | 8089/100000 [2:41:56<28:43:32,  1.13s/it]

79588 episode score is 716.55


  8%|▊         | 8090/100000 [2:41:57<28:31:51,  1.12s/it]

79598 episode score is 712.80


  8%|▊         | 8091/100000 [2:41:58<28:30:33,  1.12s/it]

79608 episode score is 729.27


  8%|▊         | 8092/100000 [2:41:59<28:40:39,  1.12s/it]

79618 episode score is 740.71


  8%|▊         | 8093/100000 [2:42:01<29:06:57,  1.14s/it]

79629 episode score is 688.56


  8%|▊         | 8094/100000 [2:42:02<29:15:29,  1.15s/it]

79640 episode score is 674.34


  8%|▊         | 8095/100000 [2:42:03<28:59:36,  1.14s/it]

79650 episode score is 707.71


  8%|▊         | 8096/100000 [2:42:04<28:43:01,  1.12s/it]

79660 episode score is 719.23


  8%|▊         | 8097/100000 [2:42:05<28:42:36,  1.12s/it]

79670 episode score is 735.50


  8%|▊         | 8098/100000 [2:42:06<28:42:06,  1.12s/it]

79680 episode score is 730.96


  8%|▊         | 8099/100000 [2:42:07<28:57:27,  1.13s/it]

79690 episode score is 740.66
79700 episode score is 711.91


  8%|▊         | 8100/100000 [2:42:09<36:09:45,  1.42s/it]

Iteration 8100: Average test reward: 685.84


  8%|▊         | 8101/100000 [2:42:11<33:44:49,  1.32s/it]

79710 episode score is 718.66


  8%|▊         | 8102/100000 [2:42:12<32:04:41,  1.26s/it]

79720 episode score is 715.62


  8%|▊         | 8103/100000 [2:42:13<31:07:14,  1.22s/it]

79730 episode score is 738.03


  8%|▊         | 8104/100000 [2:42:14<30:23:07,  1.19s/it]

79740 episode score is 739.40


  8%|▊         | 8105/100000 [2:42:15<29:58:10,  1.17s/it]

79750 episode score is 743.86


  8%|▊         | 8106/100000 [2:42:16<29:34:29,  1.16s/it]

79760 episode score is 728.81


  8%|▊         | 8107/100000 [2:42:17<29:17:04,  1.15s/it]

79770 episode score is 719.05


  8%|▊         | 8108/100000 [2:42:18<29:07:19,  1.14s/it]

79780 episode score is 723.85


  8%|▊         | 8109/100000 [2:42:20<29:28:28,  1.15s/it]

79790 episode score is 761.09


  8%|▊         | 8110/100000 [2:42:21<29:28:09,  1.15s/it]

79800 episode score is 745.70


  8%|▊         | 8111/100000 [2:42:22<29:22:04,  1.15s/it]

79810 episode score is 741.34


  8%|▊         | 8112/100000 [2:42:23<29:30:21,  1.16s/it]

79820 episode score is 758.62


  8%|▊         | 8113/100000 [2:42:24<29:19:54,  1.15s/it]

79830 episode score is 736.23


  8%|▊         | 8114/100000 [2:42:25<29:27:55,  1.15s/it]

79840 episode score is 756.04


  8%|▊         | 8115/100000 [2:42:27<29:25:56,  1.15s/it]

79850 episode score is 754.24


  8%|▊         | 8116/100000 [2:42:28<29:18:15,  1.15s/it]

79860 episode score is 752.83


  8%|▊         | 8117/100000 [2:42:29<28:57:37,  1.13s/it]

79870 episode score is 726.40


  8%|▊         | 8118/100000 [2:42:30<28:53:08,  1.13s/it]

79880 episode score is 740.17


  8%|▊         | 8119/100000 [2:42:31<28:53:55,  1.13s/it]

79890 episode score is 743.33


  8%|▊         | 8120/100000 [2:42:32<28:57:54,  1.13s/it]

79900 episode score is 745.89


  8%|▊         | 8121/100000 [2:42:33<29:19:16,  1.15s/it]

79911 episode score is 702.27


  8%|▊         | 8122/100000 [2:42:35<29:36:26,  1.16s/it]

79921 episode score is 727.29


  8%|▊         | 8123/100000 [2:42:36<29:18:09,  1.15s/it]

79931 episode score is 740.10


  8%|▊         | 8124/100000 [2:42:37<28:52:09,  1.13s/it]

79941 episode score is 719.58


  8%|▊         | 8125/100000 [2:42:38<28:42:14,  1.12s/it]

79951 episode score is 733.80


  8%|▊         | 8126/100000 [2:42:39<28:36:18,  1.12s/it]

79961 episode score is 728.69


  8%|▊         | 8127/100000 [2:42:40<28:33:56,  1.12s/it]

79971 episode score is 737.22


  8%|▊         | 8128/100000 [2:42:41<28:48:46,  1.13s/it]

79981 episode score is 747.66


  8%|▊         | 8129/100000 [2:42:42<28:28:54,  1.12s/it]

79991 episode score is 710.43


  8%|▊         | 8130/100000 [2:42:43<28:26:21,  1.11s/it]

80001 episode score is 721.63


  8%|▊         | 8131/100000 [2:42:45<28:19:30,  1.11s/it]

80011 episode score is 725.54


  8%|▊         | 8132/100000 [2:42:46<28:07:00,  1.10s/it]

80021 episode score is 713.04


  8%|▊         | 8133/100000 [2:42:47<28:29:22,  1.12s/it]

80032 episode score is 684.42


  8%|▊         | 8134/100000 [2:42:48<28:42:26,  1.12s/it]

80043 episode score is 669.13


  8%|▊         | 8135/100000 [2:42:49<28:43:52,  1.13s/it]

80054 episode score is 658.48


  8%|▊         | 8136/100000 [2:42:50<28:58:30,  1.14s/it]

80065 episode score is 685.96


  8%|▊         | 8137/100000 [2:42:51<28:35:22,  1.12s/it]

80075 episode score is 706.94


  8%|▊         | 8138/100000 [2:42:52<28:18:29,  1.11s/it]

80085 episode score is 705.83


  8%|▊         | 8139/100000 [2:42:53<28:10:40,  1.10s/it]

80095 episode score is 717.58


  8%|▊         | 8140/100000 [2:42:55<28:22:02,  1.11s/it]

80106 episode score is 663.84


  8%|▊         | 8141/100000 [2:42:56<28:30:46,  1.12s/it]

80116 episode score is 739.70


  8%|▊         | 8142/100000 [2:42:57<28:35:40,  1.12s/it]

80126 episode score is 723.64


  8%|▊         | 8143/100000 [2:42:58<28:44:46,  1.13s/it]

80136 episode score is 744.76


  8%|▊         | 8144/100000 [2:42:59<28:47:35,  1.13s/it]

80146 episode score is 744.25


  8%|▊         | 8145/100000 [2:43:00<28:53:53,  1.13s/it]

80157 episode score is 671.82


  8%|▊         | 8146/100000 [2:43:01<29:08:34,  1.14s/it]

80168 episode score is 692.60


  8%|▊         | 8147/100000 [2:43:03<29:03:18,  1.14s/it]

80178 episode score is 732.98


  8%|▊         | 8148/100000 [2:43:04<28:53:37,  1.13s/it]

80188 episode score is 736.72


  8%|▊         | 8149/100000 [2:43:05<28:41:01,  1.12s/it]

80198 episode score is 723.01


  8%|▊         | 8150/100000 [2:43:06<28:31:03,  1.12s/it]

80208 episode score is 726.16


  8%|▊         | 8151/100000 [2:43:07<28:44:05,  1.13s/it]

80218 episode score is 750.74


  8%|▊         | 8152/100000 [2:43:08<29:09:13,  1.14s/it]

80228 episode score is 774.32


  8%|▊         | 8153/100000 [2:43:09<28:58:41,  1.14s/it]

80238 episode score is 728.35


  8%|▊         | 8154/100000 [2:43:10<28:46:41,  1.13s/it]

80248 episode score is 725.65


  8%|▊         | 8155/100000 [2:43:12<28:39:58,  1.12s/it]

80258 episode score is 728.51


  8%|▊         | 8156/100000 [2:43:13<28:59:23,  1.14s/it]

80269 episode score is 690.21


  8%|▊         | 8157/100000 [2:43:14<28:34:25,  1.12s/it]

80279 episode score is 705.89


  8%|▊         | 8158/100000 [2:43:15<28:48:17,  1.13s/it]

80289 episode score is 741.77


  8%|▊         | 8159/100000 [2:43:16<29:01:02,  1.14s/it]

80299 episode score is 757.30


  8%|▊         | 8160/100000 [2:43:17<29:02:43,  1.14s/it]

80309 episode score is 736.93


  8%|▊         | 8161/100000 [2:43:18<29:00:46,  1.14s/it]

80319 episode score is 736.39


  8%|▊         | 8162/100000 [2:43:20<29:17:52,  1.15s/it]

80330 episode score is 690.12


  8%|▊         | 8163/100000 [2:43:21<29:03:14,  1.14s/it]

80340 episode score is 730.39


  8%|▊         | 8164/100000 [2:43:22<28:45:09,  1.13s/it]

80350 episode score is 704.35


  8%|▊         | 8165/100000 [2:43:23<28:28:31,  1.12s/it]

80360 episode score is 709.57


  8%|▊         | 8166/100000 [2:43:24<28:27:24,  1.12s/it]

80370 episode score is 730.76


  8%|▊         | 8167/100000 [2:43:25<28:10:32,  1.10s/it]

80380 episode score is 702.97


  8%|▊         | 8168/100000 [2:43:26<28:29:06,  1.12s/it]

80390 episode score is 751.96


  8%|▊         | 8169/100000 [2:43:27<28:30:23,  1.12s/it]

80400 episode score is 731.55


  8%|▊         | 8170/100000 [2:43:28<28:37:34,  1.12s/it]

80410 episode score is 742.07


  8%|▊         | 8171/100000 [2:43:30<28:27:11,  1.12s/it]

80420 episode score is 716.18


  8%|▊         | 8172/100000 [2:43:31<28:06:50,  1.10s/it]

80429 episode score is 781.64


  8%|▊         | 8173/100000 [2:43:32<28:05:37,  1.10s/it]

80438 episode score is 784.54


  8%|▊         | 8174/100000 [2:43:33<28:39:20,  1.12s/it]

80448 episode score is 766.44


  8%|▊         | 8175/100000 [2:43:34<28:16:16,  1.11s/it]

80457 episode score is 778.40


  8%|▊         | 8176/100000 [2:43:35<28:39:30,  1.12s/it]

80467 episode score is 765.03


  8%|▊         | 8177/100000 [2:43:36<29:04:36,  1.14s/it]

80477 episode score is 767.83


  8%|▊         | 8178/100000 [2:43:38<30:06:29,  1.18s/it]

80487 episode score is 775.90


  8%|▊         | 8179/100000 [2:43:39<30:06:16,  1.18s/it]

80497 episode score is 768.34


  8%|▊         | 8180/100000 [2:43:40<29:59:17,  1.18s/it]

80507 episode score is 766.13


  8%|▊         | 8181/100000 [2:43:41<29:40:55,  1.16s/it]

80517 episode score is 737.88


  8%|▊         | 8182/100000 [2:43:42<29:00:53,  1.14s/it]

80527 episode score is 704.96


  8%|▊         | 8183/100000 [2:43:43<28:43:28,  1.13s/it]

80537 episode score is 718.24


  8%|▊         | 8184/100000 [2:43:44<29:06:28,  1.14s/it]

80547 episode score is 769.32


  8%|▊         | 8185/100000 [2:43:46<29:25:27,  1.15s/it]

80557 episode score is 777.65


  8%|▊         | 8186/100000 [2:43:47<28:53:41,  1.13s/it]

80566 episode score is 789.78


  8%|▊         | 8187/100000 [2:43:48<29:10:18,  1.14s/it]

80576 episode score is 760.47


  8%|▊         | 8188/100000 [2:43:49<29:06:58,  1.14s/it]

80586 episode score is 742.96


  8%|▊         | 8189/100000 [2:43:50<29:04:38,  1.14s/it]

80596 episode score is 736.17


  8%|▊         | 8190/100000 [2:43:51<28:42:52,  1.13s/it]

80606 episode score is 719.58


  8%|▊         | 8191/100000 [2:43:52<28:42:02,  1.13s/it]

80616 episode score is 726.81


  8%|▊         | 8192/100000 [2:43:54<28:53:06,  1.13s/it]

80626 episode score is 750.17


  8%|▊         | 8193/100000 [2:43:55<29:14:36,  1.15s/it]

80636 episode score is 774.46


  8%|▊         | 8194/100000 [2:43:56<28:57:02,  1.14s/it]

80646 episode score is 725.18


  8%|▊         | 8195/100000 [2:43:57<28:54:37,  1.13s/it]

80656 episode score is 729.45


  8%|▊         | 8196/100000 [2:43:58<29:23:33,  1.15s/it]

80666 episode score is 773.66


  8%|▊         | 8197/100000 [2:43:59<29:33:56,  1.16s/it]

80676 episode score is 770.64


  8%|▊         | 8198/100000 [2:44:00<29:48:12,  1.17s/it]

80686 episode score is 784.34


  8%|▊         | 8199/100000 [2:44:02<29:10:17,  1.14s/it]

80696 episode score is 705.44
80706 episode score is 728.18


  8%|▊         | 8200/100000 [2:44:04<37:13:02,  1.46s/it]

Iteration 8200: Average test reward: 746.65


  8%|▊         | 8201/100000 [2:44:05<35:15:38,  1.38s/it]

80716 episode score is 782.23


  8%|▊         | 8202/100000 [2:44:06<32:59:59,  1.29s/it]

80725 episode score is 782.49


  8%|▊         | 8203/100000 [2:44:07<31:42:35,  1.24s/it]

80735 episode score is 722.55


  8%|▊         | 8204/100000 [2:44:08<30:51:32,  1.21s/it]

80745 episode score is 726.92


  8%|▊         | 8205/100000 [2:44:09<30:27:26,  1.19s/it]

80755 episode score is 743.54


  8%|▊         | 8206/100000 [2:44:11<30:26:22,  1.19s/it]

80765 episode score is 785.01


  8%|▊         | 8207/100000 [2:44:12<30:02:17,  1.18s/it]

80775 episode score is 733.82


  8%|▊         | 8208/100000 [2:44:13<29:42:47,  1.17s/it]

80785 episode score is 732.81


  8%|▊         | 8209/100000 [2:44:14<29:21:21,  1.15s/it]

80794 episode score is 807.27


  8%|▊         | 8210/100000 [2:44:15<29:46:57,  1.17s/it]

80804 episode score is 793.40


  8%|▊         | 8211/100000 [2:44:16<29:58:37,  1.18s/it]

80814 episode score is 777.59


  8%|▊         | 8212/100000 [2:44:18<29:23:12,  1.15s/it]

80823 episode score is 790.75


  8%|▊         | 8213/100000 [2:44:19<29:42:54,  1.17s/it]

80833 episode score is 768.82


  8%|▊         | 8214/100000 [2:44:20<29:53:17,  1.17s/it]

80843 episode score is 769.99


  8%|▊         | 8215/100000 [2:44:21<29:21:58,  1.15s/it]

80852 episode score is 797.33


  8%|▊         | 8216/100000 [2:44:22<29:33:40,  1.16s/it]

80862 episode score is 760.74


  8%|▊         | 8217/100000 [2:44:23<29:22:07,  1.15s/it]

80872 episode score is 733.23


  8%|▊         | 8218/100000 [2:44:25<29:17:44,  1.15s/it]

80882 episode score is 744.07


  8%|▊         | 8219/100000 [2:44:26<29:27:47,  1.16s/it]

80892 episode score is 758.25


  8%|▊         | 8220/100000 [2:44:27<29:02:32,  1.14s/it]

80901 episode score is 792.36


  8%|▊         | 8221/100000 [2:44:28<29:09:58,  1.14s/it]

80911 episode score is 744.55


  8%|▊         | 8222/100000 [2:44:29<29:23:34,  1.15s/it]

80921 episode score is 758.59


  8%|▊         | 8223/100000 [2:44:30<28:59:04,  1.14s/it]

80931 episode score is 711.09


  8%|▊         | 8224/100000 [2:44:31<29:06:48,  1.14s/it]

80941 episode score is 748.29


  8%|▊         | 8225/100000 [2:44:33<29:31:54,  1.16s/it]

80951 episode score is 773.02


  8%|▊         | 8226/100000 [2:44:34<29:09:33,  1.14s/it]

80961 episode score is 720.36


  8%|▊         | 8227/100000 [2:44:35<29:28:34,  1.16s/it]

80971 episode score is 768.80


  8%|▊         | 8228/100000 [2:44:36<29:26:02,  1.15s/it]

80981 episode score is 747.65


  8%|▊         | 8229/100000 [2:44:37<29:09:21,  1.14s/it]

80991 episode score is 723.30


  8%|▊         | 8230/100000 [2:44:38<29:00:39,  1.14s/it]

81001 episode score is 729.64


  8%|▊         | 8231/100000 [2:44:39<29:17:40,  1.15s/it]

81012 episode score is 688.87


  8%|▊         | 8232/100000 [2:44:41<28:56:23,  1.14s/it]

81022 episode score is 712.07


  8%|▊         | 8233/100000 [2:44:42<28:46:00,  1.13s/it]

81032 episode score is 719.71


  8%|▊         | 8234/100000 [2:44:43<28:51:55,  1.13s/it]

81043 episode score is 663.71


  8%|▊         | 8235/100000 [2:44:44<28:44:43,  1.13s/it]

81053 episode score is 705.97


  8%|▊         | 8236/100000 [2:44:45<28:26:41,  1.12s/it]

81063 episode score is 699.13


  8%|▊         | 8237/100000 [2:44:46<28:27:48,  1.12s/it]

81073 episode score is 729.82


  8%|▊         | 8238/100000 [2:44:47<28:40:36,  1.13s/it]

81083 episode score is 735.33


  8%|▊         | 8239/100000 [2:44:48<29:05:22,  1.14s/it]

81093 episode score is 759.39


  8%|▊         | 8240/100000 [2:44:50<29:16:42,  1.15s/it]

81103 episode score is 758.03


  8%|▊         | 8241/100000 [2:44:51<28:50:35,  1.13s/it]

81113 episode score is 704.91


  8%|▊         | 8242/100000 [2:44:52<29:05:23,  1.14s/it]

81123 episode score is 756.35


  8%|▊         | 8243/100000 [2:44:53<29:03:34,  1.14s/it]

81133 episode score is 733.58


  8%|▊         | 8244/100000 [2:44:54<29:14:54,  1.15s/it]

81143 episode score is 757.49


  8%|▊         | 8245/100000 [2:44:55<29:37:39,  1.16s/it]

81153 episode score is 767.56


  8%|▊         | 8246/100000 [2:44:57<29:37:19,  1.16s/it]

81163 episode score is 754.31


  8%|▊         | 8247/100000 [2:44:58<29:29:28,  1.16s/it]

81173 episode score is 744.34


  8%|▊         | 8248/100000 [2:44:59<30:16:04,  1.19s/it]

81183 episode score is 747.58


  8%|▊         | 8249/100000 [2:45:00<29:54:16,  1.17s/it]

81193 episode score is 734.95


  8%|▊         | 8250/100000 [2:45:01<29:38:11,  1.16s/it]

81203 episode score is 734.60


  8%|▊         | 8251/100000 [2:45:02<29:30:47,  1.16s/it]

81213 episode score is 734.23


  8%|▊         | 8252/100000 [2:45:03<29:27:55,  1.16s/it]

81224 episode score is 674.57


  8%|▊         | 8253/100000 [2:45:05<29:02:14,  1.14s/it]

81234 episode score is 719.36


  8%|▊         | 8254/100000 [2:45:06<28:36:03,  1.12s/it]

81244 episode score is 702.22


  8%|▊         | 8255/100000 [2:45:07<28:26:56,  1.12s/it]

81254 episode score is 717.90


  8%|▊         | 8256/100000 [2:45:08<28:30:54,  1.12s/it]

81264 episode score is 740.37


  8%|▊         | 8257/100000 [2:45:09<28:59:09,  1.14s/it]

81275 episode score is 698.94


  8%|▊         | 8258/100000 [2:45:10<28:37:58,  1.12s/it]

81285 episode score is 716.48


  8%|▊         | 8259/100000 [2:45:11<28:16:35,  1.11s/it]

81295 episode score is 702.74


  8%|▊         | 8260/100000 [2:45:12<28:58:43,  1.14s/it]

81306 episode score is 696.72


  8%|▊         | 8261/100000 [2:45:14<28:55:37,  1.14s/it]

81316 episode score is 742.30


  8%|▊         | 8262/100000 [2:45:15<28:28:40,  1.12s/it]

81326 episode score is 700.56


  8%|▊         | 8263/100000 [2:45:16<28:20:47,  1.11s/it]

81336 episode score is 709.84


  8%|▊         | 8264/100000 [2:45:17<28:16:05,  1.11s/it]

81346 episode score is 717.27


  8%|▊         | 8265/100000 [2:45:18<28:27:38,  1.12s/it]

81356 episode score is 736.76


  8%|▊         | 8266/100000 [2:45:19<28:25:03,  1.12s/it]

81366 episode score is 720.27


  8%|▊         | 8267/100000 [2:45:20<28:36:20,  1.12s/it]

81376 episode score is 737.66


  8%|▊         | 8268/100000 [2:45:21<28:25:05,  1.12s/it]

81386 episode score is 713.57


  8%|▊         | 8269/100000 [2:45:22<28:13:55,  1.11s/it]

81396 episode score is 711.19


  8%|▊         | 8270/100000 [2:45:24<28:44:08,  1.13s/it]

81407 episode score is 683.78


  8%|▊         | 8271/100000 [2:45:25<28:46:40,  1.13s/it]

81417 episode score is 736.96


  8%|▊         | 8272/100000 [2:45:26<28:27:49,  1.12s/it]

81427 episode score is 703.70


  8%|▊         | 8273/100000 [2:45:27<28:40:49,  1.13s/it]

81437 episode score is 747.98


  8%|▊         | 8274/100000 [2:45:28<29:10:13,  1.14s/it]

81448 episode score is 701.00


  8%|▊         | 8275/100000 [2:45:29<28:58:04,  1.14s/it]

81458 episode score is 722.26


  8%|▊         | 8276/100000 [2:45:30<29:00:24,  1.14s/it]

81469 episode score is 662.59


  8%|▊         | 8277/100000 [2:45:32<29:03:08,  1.14s/it]

81479 episode score is 728.60


  8%|▊         | 8278/100000 [2:45:33<28:53:10,  1.13s/it]

81489 episode score is 713.11


  8%|▊         | 8279/100000 [2:45:34<29:14:25,  1.15s/it]

81500 episode score is 695.22


  8%|▊         | 8280/100000 [2:45:35<28:56:00,  1.14s/it]

81510 episode score is 721.73


  8%|▊         | 8281/100000 [2:45:36<28:47:18,  1.13s/it]

81520 episode score is 720.99


  8%|▊         | 8282/100000 [2:45:37<28:55:48,  1.14s/it]

81530 episode score is 750.32


  8%|▊         | 8283/100000 [2:45:38<29:00:16,  1.14s/it]

81540 episode score is 745.93


  8%|▊         | 8284/100000 [2:45:40<29:01:12,  1.14s/it]

81550 episode score is 741.11


  8%|▊         | 8285/100000 [2:45:41<28:39:05,  1.12s/it]

81560 episode score is 705.54


  8%|▊         | 8286/100000 [2:45:42<28:57:15,  1.14s/it]

81570 episode score is 759.33


  8%|▊         | 8287/100000 [2:45:43<28:56:49,  1.14s/it]

81580 episode score is 733.74


  8%|▊         | 8288/100000 [2:45:44<29:05:46,  1.14s/it]

81590 episode score is 751.70


  8%|▊         | 8289/100000 [2:45:45<29:10:35,  1.15s/it]

81600 episode score is 750.83


  8%|▊         | 8290/100000 [2:45:46<29:12:46,  1.15s/it]

81610 episode score is 748.07


  8%|▊         | 8291/100000 [2:45:48<29:33:42,  1.16s/it]

81620 episode score is 758.99


  8%|▊         | 8292/100000 [2:45:49<28:59:45,  1.14s/it]

81629 episode score is 795.42


  8%|▊         | 8293/100000 [2:45:50<29:22:07,  1.15s/it]

81639 episode score is 768.91


  8%|▊         | 8294/100000 [2:45:51<29:10:20,  1.15s/it]

81649 episode score is 733.68


  8%|▊         | 8295/100000 [2:45:52<29:07:40,  1.14s/it]

81659 episode score is 740.16


  8%|▊         | 8296/100000 [2:45:53<28:58:46,  1.14s/it]

81669 episode score is 718.31


  8%|▊         | 8297/100000 [2:45:54<28:51:34,  1.13s/it]

81679 episode score is 734.88


  8%|▊         | 8298/100000 [2:45:56<29:11:22,  1.15s/it]

81689 episode score is 760.45


  8%|▊         | 8299/100000 [2:45:57<29:16:54,  1.15s/it]

81699 episode score is 746.55
81709 episode score is 719.71


  8%|▊         | 8300/100000 [2:45:59<37:18:04,  1.46s/it]

Iteration 8300: Average test reward: 715.76


  8%|▊         | 8301/100000 [2:46:00<34:28:24,  1.35s/it]

81719 episode score is 697.07


  8%|▊         | 8302/100000 [2:46:01<32:38:24,  1.28s/it]

81729 episode score is 722.53


  8%|▊         | 8303/100000 [2:46:02<31:40:26,  1.24s/it]

81739 episode score is 743.29


  8%|▊         | 8304/100000 [2:46:03<31:17:43,  1.23s/it]

81749 episode score is 773.23


  8%|▊         | 8305/100000 [2:46:05<30:29:04,  1.20s/it]

81759 episode score is 719.74


  8%|▊         | 8306/100000 [2:46:06<30:10:03,  1.18s/it]

81770 episode score is 669.57


  8%|▊         | 8307/100000 [2:46:07<30:05:31,  1.18s/it]

81781 episode score is 686.59


  8%|▊         | 8308/100000 [2:46:08<29:53:22,  1.17s/it]

81791 episode score is 748.51


  8%|▊         | 8309/100000 [2:46:09<29:26:19,  1.16s/it]

81801 episode score is 723.07


  8%|▊         | 8310/100000 [2:46:10<29:36:52,  1.16s/it]

81811 episode score is 755.58


  8%|▊         | 8311/100000 [2:46:12<29:49:03,  1.17s/it]

81821 episode score is 777.89


  8%|▊         | 8312/100000 [2:46:13<29:35:59,  1.16s/it]

81831 episode score is 739.40


  8%|▊         | 8313/100000 [2:46:14<29:09:35,  1.14s/it]

81840 episode score is 806.10


  8%|▊         | 8314/100000 [2:46:15<29:09:02,  1.14s/it]

81850 episode score is 751.22


  8%|▊         | 8315/100000 [2:46:16<28:44:33,  1.13s/it]

81859 episode score is 794.58


  8%|▊         | 8316/100000 [2:46:17<28:58:14,  1.14s/it]

81869 episode score is 742.63


  8%|▊         | 8317/100000 [2:46:18<29:16:04,  1.15s/it]

81879 episode score is 756.15


  8%|▊         | 8318/100000 [2:46:20<29:25:42,  1.16s/it]

81889 episode score is 762.17


  8%|▊         | 8319/100000 [2:46:21<29:04:30,  1.14s/it]

81899 episode score is 716.41


  8%|▊         | 8320/100000 [2:46:22<28:54:24,  1.14s/it]

81909 episode score is 724.31


  8%|▊         | 8321/100000 [2:46:23<29:04:25,  1.14s/it]

81919 episode score is 742.98


  8%|▊         | 8322/100000 [2:46:24<28:49:23,  1.13s/it]

81929 episode score is 713.30


  8%|▊         | 8323/100000 [2:46:25<28:37:56,  1.12s/it]

81939 episode score is 718.09


  8%|▊         | 8324/100000 [2:46:26<28:31:03,  1.12s/it]

81949 episode score is 722.95


  8%|▊         | 8325/100000 [2:46:27<28:39:00,  1.13s/it]

81959 episode score is 739.74


  8%|▊         | 8326/100000 [2:46:28<28:19:27,  1.11s/it]

81969 episode score is 704.61


  8%|▊         | 8327/100000 [2:46:30<28:10:57,  1.11s/it]

81979 episode score is 703.14


  8%|▊         | 8328/100000 [2:46:31<28:30:25,  1.12s/it]

81989 episode score is 750.76


  8%|▊         | 8329/100000 [2:46:32<28:38:47,  1.12s/it]

81999 episode score is 736.55


  8%|▊         | 8330/100000 [2:46:33<29:03:01,  1.14s/it]

82009 episode score is 759.43


  8%|▊         | 8331/100000 [2:46:34<29:20:19,  1.15s/it]

82020 episode score is 683.65


  8%|▊         | 8332/100000 [2:46:35<28:56:43,  1.14s/it]

82030 episode score is 710.04


  8%|▊         | 8333/100000 [2:46:36<29:15:13,  1.15s/it]

82041 episode score is 693.20


  8%|▊         | 8334/100000 [2:46:38<29:24:27,  1.15s/it]

82052 episode score is 693.10


  8%|▊         | 8335/100000 [2:46:39<29:08:11,  1.14s/it]

82062 episode score is 724.88


  8%|▊         | 8336/100000 [2:46:40<28:57:41,  1.14s/it]

82072 episode score is 727.77


  8%|▊         | 8337/100000 [2:46:41<28:41:04,  1.13s/it]

82083 episode score is 635.24


  8%|▊         | 8338/100000 [2:46:42<28:29:53,  1.12s/it]

82093 episode score is 710.16


  8%|▊         | 8339/100000 [2:46:43<28:15:58,  1.11s/it]

82103 episode score is 696.65


  8%|▊         | 8340/100000 [2:46:44<28:03:33,  1.10s/it]

82113 episode score is 701.81


  8%|▊         | 8341/100000 [2:46:45<28:40:39,  1.13s/it]

82124 episode score is 698.18


  8%|▊         | 8342/100000 [2:46:47<28:56:27,  1.14s/it]

82135 episode score is 680.56


  8%|▊         | 8343/100000 [2:46:48<29:09:50,  1.15s/it]

82146 episode score is 672.95


  8%|▊         | 8344/100000 [2:46:49<28:56:12,  1.14s/it]

82156 episode score is 717.85


  8%|▊         | 8345/100000 [2:46:50<28:42:10,  1.13s/it]

82166 episode score is 720.14


  8%|▊         | 8346/100000 [2:46:51<29:01:19,  1.14s/it]

82177 episode score is 681.14


  8%|▊         | 8347/100000 [2:46:52<29:18:45,  1.15s/it]

82188 episode score is 688.36


  8%|▊         | 8348/100000 [2:46:53<28:56:48,  1.14s/it]

82198 episode score is 718.49


  8%|▊         | 8349/100000 [2:46:55<28:36:33,  1.12s/it]

82208 episode score is 705.66


  8%|▊         | 8350/100000 [2:46:56<28:42:47,  1.13s/it]

82219 episode score is 661.04


  8%|▊         | 8351/100000 [2:46:57<28:26:43,  1.12s/it]

82229 episode score is 714.90


  8%|▊         | 8352/100000 [2:46:58<28:12:21,  1.11s/it]

82239 episode score is 703.40


  8%|▊         | 8353/100000 [2:46:59<28:09:34,  1.11s/it]

82249 episode score is 700.65


  8%|▊         | 8354/100000 [2:47:00<28:09:50,  1.11s/it]

82259 episode score is 721.37


  8%|▊         | 8355/100000 [2:47:01<28:20:35,  1.11s/it]

82269 episode score is 739.37


  8%|▊         | 8356/100000 [2:47:02<28:14:45,  1.11s/it]

82279 episode score is 706.84


  8%|▊         | 8357/100000 [2:47:03<28:13:11,  1.11s/it]

82289 episode score is 715.75


  8%|▊         | 8358/100000 [2:47:05<28:35:53,  1.12s/it]

82300 episode score is 674.80


  8%|▊         | 8359/100000 [2:47:06<28:28:51,  1.12s/it]

82310 episode score is 720.45


  8%|▊         | 8360/100000 [2:47:07<29:04:50,  1.14s/it]

82320 episode score is 725.44


  8%|▊         | 8361/100000 [2:47:08<29:25:09,  1.16s/it]

82331 episode score is 697.92


  8%|▊         | 8362/100000 [2:47:09<28:59:59,  1.14s/it]

82341 episode score is 712.72


  8%|▊         | 8363/100000 [2:47:10<28:38:33,  1.13s/it]

82351 episode score is 712.95


  8%|▊         | 8364/100000 [2:47:11<28:29:34,  1.12s/it]

82361 episode score is 717.01


  8%|▊         | 8365/100000 [2:47:12<28:44:02,  1.13s/it]

82371 episode score is 752.63


  8%|▊         | 8366/100000 [2:47:14<28:49:41,  1.13s/it]

82381 episode score is 741.61


  8%|▊         | 8367/100000 [2:47:15<29:10:38,  1.15s/it]

82391 episode score is 755.88


  8%|▊         | 8368/100000 [2:47:16<29:20:18,  1.15s/it]

82401 episode score is 746.80


  8%|▊         | 8369/100000 [2:47:17<29:49:31,  1.17s/it]

82411 episode score is 783.04


  8%|▊         | 8370/100000 [2:47:18<30:05:56,  1.18s/it]

82421 episode score is 779.18


  8%|▊         | 8371/100000 [2:47:20<30:06:18,  1.18s/it]

82431 episode score is 764.10


  8%|▊         | 8372/100000 [2:47:21<30:01:22,  1.18s/it]

82441 episode score is 756.59


  8%|▊         | 8373/100000 [2:47:22<29:18:44,  1.15s/it]

82450 episode score is 783.57


  8%|▊         | 8374/100000 [2:47:23<29:31:17,  1.16s/it]

82460 episode score is 764.03


  8%|▊         | 8375/100000 [2:47:24<29:50:39,  1.17s/it]

82470 episode score is 778.70


  8%|▊         | 8376/100000 [2:47:25<29:59:43,  1.18s/it]

82480 episode score is 774.99


  8%|▊         | 8377/100000 [2:47:27<29:56:55,  1.18s/it]

82490 episode score is 764.49


  8%|▊         | 8378/100000 [2:47:28<29:50:17,  1.17s/it]

82500 episode score is 747.46


  8%|▊         | 8379/100000 [2:47:29<29:57:46,  1.18s/it]

82510 episode score is 769.49


  8%|▊         | 8380/100000 [2:47:30<29:42:03,  1.17s/it]

82520 episode score is 742.10


  8%|▊         | 8381/100000 [2:47:31<29:34:29,  1.16s/it]

82530 episode score is 750.79


  8%|▊         | 8382/100000 [2:47:32<29:36:51,  1.16s/it]

82540 episode score is 748.58


  8%|▊         | 8383/100000 [2:47:34<29:24:00,  1.16s/it]

82550 episode score is 731.69


  8%|▊         | 8384/100000 [2:47:35<29:19:00,  1.15s/it]

82560 episode score is 746.40


  8%|▊         | 8385/100000 [2:47:36<29:08:24,  1.15s/it]

82570 episode score is 734.66


  8%|▊         | 8386/100000 [2:47:37<29:07:03,  1.14s/it]

82581 episode score is 666.77


  8%|▊         | 8387/100000 [2:47:38<28:40:55,  1.13s/it]

82591 episode score is 705.36


  8%|▊         | 8388/100000 [2:47:39<28:31:57,  1.12s/it]

82601 episode score is 720.59


  8%|▊         | 8389/100000 [2:47:40<28:57:36,  1.14s/it]

82612 episode score is 690.12


  8%|▊         | 8390/100000 [2:47:42<29:18:40,  1.15s/it]

82623 episode score is 693.52


  8%|▊         | 8391/100000 [2:47:43<29:22:47,  1.15s/it]

82634 episode score is 675.88


  8%|▊         | 8392/100000 [2:47:44<29:23:30,  1.16s/it]

82644 episode score is 750.54


  8%|▊         | 8393/100000 [2:47:45<29:35:45,  1.16s/it]

82655 episode score is 697.26


  8%|▊         | 8394/100000 [2:47:46<29:02:57,  1.14s/it]

82665 episode score is 715.25


  8%|▊         | 8395/100000 [2:47:47<28:41:03,  1.13s/it]

82675 episode score is 699.62


  8%|▊         | 8396/100000 [2:47:48<28:54:23,  1.14s/it]

82685 episode score is 749.29


  8%|▊         | 8397/100000 [2:47:49<28:54:18,  1.14s/it]

82695 episode score is 738.63


  8%|▊         | 8398/100000 [2:47:51<29:02:24,  1.14s/it]

82705 episode score is 753.98


  8%|▊         | 8399/100000 [2:47:52<28:49:59,  1.13s/it]

82715 episode score is 721.09
82725 episode score is 759.37


  8%|▊         | 8400/100000 [2:47:54<36:48:08,  1.45s/it]

Iteration 8400: Average test reward: 717.50


  8%|▊         | 8401/100000 [2:47:55<34:23:48,  1.35s/it]

82735 episode score is 733.46


  8%|▊         | 8402/100000 [2:47:56<32:58:36,  1.30s/it]

82745 episode score is 758.18


  8%|▊         | 8403/100000 [2:47:57<32:10:56,  1.26s/it]

82755 episode score is 772.40


  8%|▊         | 8404/100000 [2:47:59<31:30:09,  1.24s/it]

82765 episode score is 764.63


  8%|▊         | 8405/100000 [2:48:00<31:05:49,  1.22s/it]

82775 episode score is 769.17


  8%|▊         | 8406/100000 [2:48:01<30:53:20,  1.21s/it]

82785 episode score is 764.20


  8%|▊         | 8407/100000 [2:48:02<30:41:37,  1.21s/it]

82795 episode score is 770.86


  8%|▊         | 8408/100000 [2:48:03<30:28:50,  1.20s/it]

82805 episode score is 758.93


  8%|▊         | 8409/100000 [2:48:05<31:14:22,  1.23s/it]

82815 episode score is 779.37


  8%|▊         | 8410/100000 [2:48:06<30:38:50,  1.20s/it]

82825 episode score is 754.74


  8%|▊         | 8411/100000 [2:48:07<29:58:18,  1.18s/it]

82835 episode score is 722.85


  8%|▊         | 8412/100000 [2:48:08<29:48:02,  1.17s/it]

82845 episode score is 752.96


  8%|▊         | 8413/100000 [2:48:09<29:48:13,  1.17s/it]

82855 episode score is 757.99


  8%|▊         | 8414/100000 [2:48:10<29:43:58,  1.17s/it]

82865 episode score is 751.73


  8%|▊         | 8415/100000 [2:48:12<29:32:35,  1.16s/it]

82875 episode score is 735.68


  8%|▊         | 8416/100000 [2:48:13<29:27:25,  1.16s/it]

82885 episode score is 746.89


  8%|▊         | 8417/100000 [2:48:14<29:26:29,  1.16s/it]

82895 episode score is 750.88


  8%|▊         | 8418/100000 [2:48:15<29:20:49,  1.15s/it]

82905 episode score is 749.52


  8%|▊         | 8419/100000 [2:48:16<29:08:03,  1.15s/it]

82915 episode score is 738.10


  8%|▊         | 8420/100000 [2:48:17<29:06:01,  1.14s/it]

82925 episode score is 735.64


  8%|▊         | 8421/100000 [2:48:18<29:01:16,  1.14s/it]

82935 episode score is 738.31


  8%|▊         | 8422/100000 [2:48:19<28:44:22,  1.13s/it]

82945 episode score is 713.17


  8%|▊         | 8423/100000 [2:48:21<28:33:12,  1.12s/it]

82955 episode score is 721.14


  8%|▊         | 8424/100000 [2:48:22<28:24:18,  1.12s/it]

82965 episode score is 712.06


  8%|▊         | 8425/100000 [2:48:23<28:49:02,  1.13s/it]

82976 episode score is 689.75


  8%|▊         | 8426/100000 [2:48:24<28:58:32,  1.14s/it]

82986 episode score is 745.27


  8%|▊         | 8427/100000 [2:48:25<28:58:12,  1.14s/it]

82996 episode score is 739.34


  8%|▊         | 8428/100000 [2:48:26<28:50:45,  1.13s/it]

83006 episode score is 733.58


  8%|▊         | 8429/100000 [2:48:27<28:48:15,  1.13s/it]

83016 episode score is 737.20


  8%|▊         | 8430/100000 [2:48:29<28:29:23,  1.12s/it]

83026 episode score is 706.68


  8%|▊         | 8431/100000 [2:48:30<28:16:13,  1.11s/it]

83036 episode score is 710.58


  8%|▊         | 8432/100000 [2:48:31<28:23:24,  1.12s/it]

83046 episode score is 721.84


  8%|▊         | 8433/100000 [2:48:32<28:11:31,  1.11s/it]

83056 episode score is 702.20


  8%|▊         | 8434/100000 [2:48:33<28:08:46,  1.11s/it]

83066 episode score is 710.27


  8%|▊         | 8435/100000 [2:48:34<28:06:11,  1.10s/it]

83076 episode score is 714.04


  8%|▊         | 8436/100000 [2:48:35<28:05:23,  1.10s/it]

83086 episode score is 714.93


  8%|▊         | 8437/100000 [2:48:36<28:38:47,  1.13s/it]

83097 episode score is 693.22


  8%|▊         | 8438/100000 [2:48:37<28:24:32,  1.12s/it]

83107 episode score is 709.20


  8%|▊         | 8439/100000 [2:48:39<28:24:45,  1.12s/it]

83117 episode score is 724.74


  8%|▊         | 8440/100000 [2:48:40<28:27:47,  1.12s/it]

83127 episode score is 727.22


  8%|▊         | 8441/100000 [2:48:41<28:36:13,  1.12s/it]

83137 episode score is 737.54


  8%|▊         | 8442/100000 [2:48:42<28:31:44,  1.12s/it]

83147 episode score is 720.03


  8%|▊         | 8443/100000 [2:48:43<28:39:41,  1.13s/it]

83157 episode score is 747.76


  8%|▊         | 8444/100000 [2:48:44<28:43:39,  1.13s/it]

83167 episode score is 742.93


  8%|▊         | 8445/100000 [2:48:45<28:48:40,  1.13s/it]

83177 episode score is 744.66


  8%|▊         | 8446/100000 [2:48:47<29:23:05,  1.16s/it]

83188 episode score is 703.02


  8%|▊         | 8447/100000 [2:48:48<29:00:06,  1.14s/it]

83198 episode score is 704.82


  8%|▊         | 8448/100000 [2:48:49<28:55:36,  1.14s/it]

83208 episode score is 734.25


  8%|▊         | 8449/100000 [2:48:50<29:06:09,  1.14s/it]

83218 episode score is 757.89


  8%|▊         | 8450/100000 [2:48:51<28:55:38,  1.14s/it]

83228 episode score is 735.83


  8%|▊         | 8451/100000 [2:48:52<28:43:58,  1.13s/it]

83238 episode score is 716.86


  8%|▊         | 8452/100000 [2:48:53<29:07:15,  1.15s/it]

83249 episode score is 688.20


  8%|▊         | 8453/100000 [2:48:54<29:17:40,  1.15s/it]

83259 episode score is 764.55


  8%|▊         | 8454/100000 [2:48:56<29:23:19,  1.16s/it]

83269 episode score is 757.68


  8%|▊         | 8455/100000 [2:48:57<29:05:35,  1.14s/it]

83279 episode score is 728.81


  8%|▊         | 8456/100000 [2:48:58<29:31:35,  1.16s/it]

83290 episode score is 697.27


  8%|▊         | 8457/100000 [2:48:59<29:16:07,  1.15s/it]

83300 episode score is 732.94


  8%|▊         | 8458/100000 [2:49:00<29:28:50,  1.16s/it]

83311 episode score is 696.88


  8%|▊         | 8459/100000 [2:49:01<29:07:30,  1.15s/it]

83321 episode score is 730.91


  8%|▊         | 8460/100000 [2:49:03<29:35:53,  1.16s/it]

83332 episode score is 696.12


  8%|▊         | 8461/100000 [2:49:04<29:22:27,  1.16s/it]

83342 episode score is 737.72


  8%|▊         | 8462/100000 [2:49:05<29:08:53,  1.15s/it]

83352 episode score is 734.43


  8%|▊         | 8463/100000 [2:49:06<28:50:08,  1.13s/it]

83362 episode score is 722.97


  8%|▊         | 8464/100000 [2:49:07<28:44:27,  1.13s/it]

83372 episode score is 729.59


  8%|▊         | 8465/100000 [2:49:08<28:38:15,  1.13s/it]

83382 episode score is 723.45


  8%|▊         | 8466/100000 [2:49:09<28:29:48,  1.12s/it]

83392 episode score is 721.37


  8%|▊         | 8467/100000 [2:49:10<28:45:10,  1.13s/it]

83402 episode score is 754.88


  8%|▊         | 8468/100000 [2:49:12<28:36:03,  1.12s/it]

83412 episode score is 722.29


  8%|▊         | 8469/100000 [2:49:13<29:14:24,  1.15s/it]

83423 episode score is 699.47


  8%|▊         | 8470/100000 [2:49:14<28:56:25,  1.14s/it]

83433 episode score is 726.38


  8%|▊         | 8471/100000 [2:49:15<28:56:01,  1.14s/it]

83443 episode score is 737.71


  8%|▊         | 8472/100000 [2:49:16<28:48:49,  1.13s/it]

83453 episode score is 729.31


  8%|▊         | 8473/100000 [2:49:17<29:13:39,  1.15s/it]

83463 episode score is 761.24


  8%|▊         | 8474/100000 [2:49:18<29:11:12,  1.15s/it]

83473 episode score is 746.93


  8%|▊         | 8475/100000 [2:49:20<29:11:13,  1.15s/it]

83483 episode score is 744.18


  8%|▊         | 8476/100000 [2:49:21<29:18:52,  1.15s/it]

83493 episode score is 758.88


  8%|▊         | 8477/100000 [2:49:22<30:00:10,  1.18s/it]

83503 episode score is 747.46


  8%|▊         | 8478/100000 [2:49:23<29:37:01,  1.16s/it]

83513 episode score is 726.96


  8%|▊         | 8479/100000 [2:49:24<29:27:53,  1.16s/it]

83523 episode score is 741.74


  8%|▊         | 8480/100000 [2:49:25<29:30:37,  1.16s/it]

83533 episode score is 757.50


  8%|▊         | 8481/100000 [2:49:27<29:07:01,  1.15s/it]

83543 episode score is 720.86


  8%|▊         | 8482/100000 [2:49:28<29:13:10,  1.15s/it]

83553 episode score is 757.04


  8%|▊         | 8483/100000 [2:49:29<29:19:07,  1.15s/it]

83563 episode score is 760.24


  8%|▊         | 8484/100000 [2:49:30<29:15:35,  1.15s/it]

83573 episode score is 751.53


  8%|▊         | 8485/100000 [2:49:31<29:12:18,  1.15s/it]

83583 episode score is 744.56


  8%|▊         | 8486/100000 [2:49:32<29:14:08,  1.15s/it]

83593 episode score is 742.56


  8%|▊         | 8487/100000 [2:49:34<29:09:16,  1.15s/it]

83603 episode score is 742.69


  8%|▊         | 8488/100000 [2:49:35<29:11:34,  1.15s/it]

83613 episode score is 750.29


  8%|▊         | 8489/100000 [2:49:36<28:55:42,  1.14s/it]

83623 episode score is 723.41


  8%|▊         | 8490/100000 [2:49:37<28:46:08,  1.13s/it]

83633 episode score is 728.56


  8%|▊         | 8491/100000 [2:49:38<28:30:52,  1.12s/it]

83643 episode score is 715.44


  8%|▊         | 8492/100000 [2:49:39<28:15:39,  1.11s/it]

83653 episode score is 697.91


  8%|▊         | 8493/100000 [2:49:40<28:09:30,  1.11s/it]

83663 episode score is 714.47


  8%|▊         | 8494/100000 [2:49:41<27:59:36,  1.10s/it]

83673 episode score is 706.72


  8%|▊         | 8495/100000 [2:49:42<28:11:34,  1.11s/it]

83683 episode score is 726.67


  8%|▊         | 8496/100000 [2:49:43<28:14:41,  1.11s/it]

83693 episode score is 722.39


  8%|▊         | 8497/100000 [2:49:45<28:05:41,  1.11s/it]

83703 episode score is 705.35


  8%|▊         | 8498/100000 [2:49:46<28:01:01,  1.10s/it]

83713 episode score is 707.22


  8%|▊         | 8499/100000 [2:49:47<28:23:04,  1.12s/it]

83724 episode score is 674.41
83734 episode score is 705.47


  8%|▊         | 8500/100000 [2:49:49<35:57:07,  1.41s/it]

Iteration 8500: Average test reward: 726.03


  9%|▊         | 8501/100000 [2:49:50<34:12:58,  1.35s/it]

83745 episode score is 693.77


  9%|▊         | 8502/100000 [2:49:51<33:02:00,  1.30s/it]

83756 episode score is 701.37


  9%|▊         | 8503/100000 [2:49:52<31:29:18,  1.24s/it]

83766 episode score is 708.64


  9%|▊         | 8504/100000 [2:49:54<30:19:11,  1.19s/it]

83776 episode score is 706.66


  9%|▊         | 8505/100000 [2:49:55<29:54:59,  1.18s/it]

83786 episode score is 742.81


  9%|▊         | 8506/100000 [2:49:56<29:39:25,  1.17s/it]

83796 episode score is 733.79


  9%|▊         | 8507/100000 [2:49:57<29:36:57,  1.17s/it]

83807 episode score is 681.50


  9%|▊         | 8508/100000 [2:49:58<29:05:07,  1.14s/it]

83817 episode score is 707.39


  9%|▊         | 8509/100000 [2:49:59<29:24:13,  1.16s/it]

83828 episode score is 694.03


  9%|▊         | 8510/100000 [2:50:00<28:56:45,  1.14s/it]

83838 episode score is 716.94


  9%|▊         | 8511/100000 [2:50:01<28:57:51,  1.14s/it]

83848 episode score is 744.69


  9%|▊         | 8512/100000 [2:50:03<28:58:42,  1.14s/it]

83858 episode score is 734.48


  9%|▊         | 8513/100000 [2:50:04<28:57:01,  1.14s/it]

83868 episode score is 739.13


  9%|▊         | 8514/100000 [2:50:05<28:55:41,  1.14s/it]

83878 episode score is 740.34


  9%|▊         | 8515/100000 [2:50:06<28:55:22,  1.14s/it]

83888 episode score is 745.90


  9%|▊         | 8516/100000 [2:50:07<28:30:03,  1.12s/it]

83898 episode score is 698.23


  9%|▊         | 8517/100000 [2:50:08<28:13:05,  1.11s/it]

83908 episode score is 707.37


  9%|▊         | 8518/100000 [2:50:09<28:28:30,  1.12s/it]

83918 episode score is 738.69


  9%|▊         | 8519/100000 [2:50:10<28:36:19,  1.13s/it]

83928 episode score is 745.66


  9%|▊         | 8520/100000 [2:50:12<28:46:24,  1.13s/it]

83938 episode score is 737.89


  9%|▊         | 8521/100000 [2:50:13<29:03:57,  1.14s/it]

83948 episode score is 751.41


  9%|▊         | 8522/100000 [2:50:14<28:41:09,  1.13s/it]

83958 episode score is 709.02


  9%|▊         | 8523/100000 [2:50:15<28:47:58,  1.13s/it]

83968 episode score is 742.66


  9%|▊         | 8524/100000 [2:50:16<28:47:26,  1.13s/it]

83978 episode score is 733.60


  9%|▊         | 8525/100000 [2:50:17<29:07:16,  1.15s/it]

83988 episode score is 746.56


  9%|▊         | 8526/100000 [2:50:18<29:02:15,  1.14s/it]

83998 episode score is 734.30


  9%|▊         | 8527/100000 [2:50:20<29:21:13,  1.16s/it]

84008 episode score is 760.92


  9%|▊         | 8528/100000 [2:50:21<28:50:08,  1.13s/it]

84018 episode score is 705.00


  9%|▊         | 8529/100000 [2:50:22<28:49:53,  1.13s/it]

84028 episode score is 741.00


  9%|▊         | 8530/100000 [2:50:23<28:50:29,  1.14s/it]

84038 episode score is 738.42


  9%|▊         | 8531/100000 [2:50:24<28:50:54,  1.14s/it]

84048 episode score is 741.09


  9%|▊         | 8532/100000 [2:50:25<29:03:04,  1.14s/it]

84058 episode score is 756.36


  9%|▊         | 8533/100000 [2:50:26<29:06:06,  1.15s/it]

84068 episode score is 747.48


  9%|▊         | 8534/100000 [2:50:28<28:47:15,  1.13s/it]

84078 episode score is 711.06


  9%|▊         | 8535/100000 [2:50:29<28:56:14,  1.14s/it]

84088 episode score is 748.82


  9%|▊         | 8536/100000 [2:50:30<28:48:05,  1.13s/it]

84098 episode score is 731.14


  9%|▊         | 8537/100000 [2:50:31<28:24:38,  1.12s/it]

84108 episode score is 701.27


  9%|▊         | 8538/100000 [2:50:32<28:21:57,  1.12s/it]

84118 episode score is 717.77


  9%|▊         | 8539/100000 [2:50:33<28:26:21,  1.12s/it]

84128 episode score is 720.85


  9%|▊         | 8540/100000 [2:50:34<28:21:55,  1.12s/it]

84138 episode score is 724.81


  9%|▊         | 8541/100000 [2:50:36<29:23:19,  1.16s/it]

84148 episode score is 749.23


  9%|▊         | 8542/100000 [2:50:37<28:55:11,  1.14s/it]

84158 episode score is 709.96


  9%|▊         | 8543/100000 [2:50:38<28:36:26,  1.13s/it]

84168 episode score is 712.74


  9%|▊         | 8544/100000 [2:50:39<28:27:03,  1.12s/it]

84178 episode score is 717.80


  9%|▊         | 8545/100000 [2:50:40<28:33:45,  1.12s/it]

84188 episode score is 739.84


  9%|▊         | 8546/100000 [2:50:41<29:04:36,  1.14s/it]

84198 episode score is 772.50


  9%|▊         | 8547/100000 [2:50:42<29:20:46,  1.16s/it]

84208 episode score is 765.76


  9%|▊         | 8548/100000 [2:50:44<29:28:40,  1.16s/it]

84218 episode score is 762.01


  9%|▊         | 8549/100000 [2:50:45<29:03:13,  1.14s/it]

84227 episode score is 795.52


  9%|▊         | 8550/100000 [2:50:46<28:46:02,  1.13s/it]

84237 episode score is 717.21


  9%|▊         | 8551/100000 [2:50:47<28:41:25,  1.13s/it]

84247 episode score is 730.59


  9%|▊         | 8552/100000 [2:50:48<28:35:29,  1.13s/it]

84257 episode score is 719.28


  9%|▊         | 8553/100000 [2:50:49<28:59:20,  1.14s/it]

84268 episode score is 692.40


  9%|▊         | 8554/100000 [2:50:50<28:40:12,  1.13s/it]

84278 episode score is 720.15


  9%|▊         | 8555/100000 [2:50:51<28:22:27,  1.12s/it]

84288 episode score is 709.02


  9%|▊         | 8556/100000 [2:50:52<28:30:33,  1.12s/it]

84298 episode score is 735.52


  9%|▊         | 8557/100000 [2:50:54<28:27:37,  1.12s/it]

84308 episode score is 727.82


  9%|▊         | 8558/100000 [2:50:55<28:53:03,  1.14s/it]

84319 episode score is 690.62


  9%|▊         | 8559/100000 [2:50:56<28:35:42,  1.13s/it]

84329 episode score is 709.30


  9%|▊         | 8560/100000 [2:50:57<28:29:43,  1.12s/it]

84339 episode score is 725.14


  9%|▊         | 8561/100000 [2:50:58<29:05:51,  1.15s/it]

84350 episode score is 707.53


  9%|▊         | 8562/100000 [2:50:59<29:32:17,  1.16s/it]

84361 episode score is 707.41


  9%|▊         | 8563/100000 [2:51:00<29:11:36,  1.15s/it]

84371 episode score is 716.47


  9%|▊         | 8564/100000 [2:51:02<29:03:50,  1.14s/it]

84381 episode score is 740.78


  9%|▊         | 8565/100000 [2:51:03<29:02:21,  1.14s/it]

84391 episode score is 732.13


  9%|▊         | 8566/100000 [2:51:04<28:47:04,  1.13s/it]

84401 episode score is 721.65


  9%|▊         | 8567/100000 [2:51:05<28:36:11,  1.13s/it]

84411 episode score is 716.08


  9%|▊         | 8568/100000 [2:51:06<28:26:33,  1.12s/it]

84421 episode score is 699.85


  9%|▊         | 8569/100000 [2:51:07<28:37:34,  1.13s/it]

84431 episode score is 732.34


  9%|▊         | 8570/100000 [2:51:08<29:03:40,  1.14s/it]

84442 episode score is 688.65


  9%|▊         | 8571/100000 [2:51:10<29:17:27,  1.15s/it]

84453 episode score is 683.81


  9%|▊         | 8572/100000 [2:51:11<28:52:17,  1.14s/it]

84463 episode score is 702.00


  9%|▊         | 8573/100000 [2:51:12<28:49:16,  1.13s/it]

84473 episode score is 722.94


  9%|▊         | 8574/100000 [2:51:13<29:10:57,  1.15s/it]

84484 episode score is 683.37


  9%|▊         | 8575/100000 [2:51:14<29:09:21,  1.15s/it]

84495 episode score is 666.12


  9%|▊         | 8576/100000 [2:51:15<29:15:38,  1.15s/it]

84506 episode score is 675.85


  9%|▊         | 8577/100000 [2:51:16<29:29:49,  1.16s/it]

84517 episode score is 668.41


  9%|▊         | 8578/100000 [2:51:18<29:41:30,  1.17s/it]

84528 episode score is 677.78


  9%|▊         | 8579/100000 [2:51:19<29:59:20,  1.18s/it]

84539 episode score is 684.14


  9%|▊         | 8580/100000 [2:51:20<29:38:42,  1.17s/it]

84549 episode score is 730.12


  9%|▊         | 8581/100000 [2:51:21<29:22:29,  1.16s/it]

84559 episode score is 720.89


  9%|▊         | 8582/100000 [2:51:22<29:03:41,  1.14s/it]

84569 episode score is 705.99


  9%|▊         | 8583/100000 [2:51:23<28:45:00,  1.13s/it]

84579 episode score is 701.30


  9%|▊         | 8584/100000 [2:51:25<28:47:53,  1.13s/it]

84589 episode score is 725.92


  9%|▊         | 8585/100000 [2:51:26<28:55:50,  1.14s/it]

84599 episode score is 723.36


  9%|▊         | 8586/100000 [2:51:27<28:47:34,  1.13s/it]

84609 episode score is 715.81


  9%|▊         | 8587/100000 [2:51:28<28:32:55,  1.12s/it]

84619 episode score is 706.46


  9%|▊         | 8588/100000 [2:51:29<28:26:39,  1.12s/it]

84629 episode score is 706.43


  9%|▊         | 8589/100000 [2:51:30<28:29:19,  1.12s/it]

84639 episode score is 719.82


  9%|▊         | 8590/100000 [2:51:31<28:34:10,  1.13s/it]

84649 episode score is 725.65


  9%|▊         | 8591/100000 [2:51:32<28:25:37,  1.12s/it]

84658 episode score is 772.91


  9%|▊         | 8592/100000 [2:51:33<28:21:43,  1.12s/it]

84667 episode score is 791.23


  9%|▊         | 8593/100000 [2:51:35<28:38:52,  1.13s/it]

84677 episode score is 736.72


  9%|▊         | 8594/100000 [2:51:36<28:45:13,  1.13s/it]

84686 episode score is 815.07


  9%|▊         | 8595/100000 [2:51:37<29:26:21,  1.16s/it]

84695 episode score is 869.60


  9%|▊         | 8596/100000 [2:51:38<28:57:44,  1.14s/it]

84703 episode score is 875.94


  9%|▊         | 8597/100000 [2:51:39<28:57:10,  1.14s/it]

84712 episode score is 811.12


  9%|▊         | 8598/100000 [2:51:40<29:19:10,  1.15s/it]

84721 episode score is 858.34


  9%|▊         | 8599/100000 [2:51:41<28:48:17,  1.13s/it]

84730 episode score is 770.78
84740 episode score is 744.67


  9%|▊         | 8600/100000 [2:51:44<36:58:59,  1.46s/it]

Iteration 8600: Average test reward: 757.34


  9%|▊         | 8601/100000 [2:51:45<34:46:22,  1.37s/it]

84750 episode score is 748.79


  9%|▊         | 8602/100000 [2:51:46<32:45:37,  1.29s/it]

84759 episode score is 787.20


  9%|▊         | 8603/100000 [2:51:47<32:15:56,  1.27s/it]

84769 episode score is 724.31


  9%|▊         | 8604/100000 [2:51:48<31:29:09,  1.24s/it]

84779 episode score is 745.86


  9%|▊         | 8605/100000 [2:51:49<30:36:45,  1.21s/it]

84789 episode score is 720.07


  9%|▊         | 8606/100000 [2:51:51<30:09:13,  1.19s/it]

84799 episode score is 738.57


  9%|▊         | 8607/100000 [2:51:52<29:45:21,  1.17s/it]

84809 episode score is 723.77


  9%|▊         | 8608/100000 [2:51:53<29:44:02,  1.17s/it]

84819 episode score is 746.54


  9%|▊         | 8609/100000 [2:51:54<29:09:43,  1.15s/it]

84829 episode score is 704.84


  9%|▊         | 8610/100000 [2:51:55<29:31:00,  1.16s/it]

84839 episode score is 763.37


  9%|▊         | 8611/100000 [2:51:56<29:30:05,  1.16s/it]

84849 episode score is 745.79


  9%|▊         | 8612/100000 [2:51:58<29:07:54,  1.15s/it]

84859 episode score is 713.40


  9%|▊         | 8613/100000 [2:51:59<29:12:11,  1.15s/it]

84869 episode score is 743.97


  9%|▊         | 8614/100000 [2:52:00<28:48:18,  1.13s/it]

84879 episode score is 709.41


  9%|▊         | 8615/100000 [2:52:01<28:21:05,  1.12s/it]

84889 episode score is 700.82


  9%|▊         | 8616/100000 [2:52:02<28:53:40,  1.14s/it]

84899 episode score is 757.18


  9%|▊         | 8617/100000 [2:52:03<29:03:25,  1.14s/it]

84910 episode score is 681.03


  9%|▊         | 8618/100000 [2:52:04<28:47:40,  1.13s/it]

84920 episode score is 722.57


  9%|▊         | 8619/100000 [2:52:05<28:53:37,  1.14s/it]

84931 episode score is 674.56


  9%|▊         | 8620/100000 [2:52:07<28:42:01,  1.13s/it]

84941 episode score is 721.21


  9%|▊         | 8621/100000 [2:52:08<28:14:15,  1.11s/it]

84951 episode score is 697.16


  9%|▊         | 8622/100000 [2:52:09<28:31:03,  1.12s/it]

84961 episode score is 744.99


  9%|▊         | 8623/100000 [2:52:10<28:46:42,  1.13s/it]

84971 episode score is 750.50


  9%|▊         | 8624/100000 [2:52:11<28:56:58,  1.14s/it]

84981 episode score is 762.27


  9%|▊         | 8625/100000 [2:52:12<28:45:20,  1.13s/it]

84991 episode score is 731.08


  9%|▊         | 8626/100000 [2:52:13<28:51:27,  1.14s/it]

85001 episode score is 738.92


  9%|▊         | 8627/100000 [2:52:14<28:46:51,  1.13s/it]

85005 episode score is 1610.07


  9%|▊         | 8628/100000 [2:52:16<28:41:41,  1.13s/it]

85008 episode score is 2069.00
85011 episode score is 2553.81


  9%|▊         | 8630/100000 [2:52:18<30:53:55,  1.22s/it]

85014 episode score is 1923.83
85017 episode score is 2475.50


  9%|▊         | 8631/100000 [2:52:20<32:44:39,  1.29s/it]

85020 episode score is 2297.00


  9%|▊         | 8632/100000 [2:52:21<33:13:47,  1.31s/it]

85024 episode score is 1984.20


  9%|▊         | 8634/100000 [2:52:24<32:50:41,  1.29s/it]

85028 episode score is 1508.60


  9%|▊         | 8635/100000 [2:52:25<32:48:16,  1.29s/it]

85031 episode score is 2104.82


  9%|▊         | 8636/100000 [2:52:26<31:39:19,  1.25s/it]

85034 episode score is 1962.35


  9%|▊         | 8637/100000 [2:52:27<31:39:04,  1.25s/it]

85038 episode score is 1645.13


  9%|▊         | 8638/100000 [2:52:29<31:12:26,  1.23s/it]

85041 episode score is 2120.79


  9%|▊         | 8639/100000 [2:52:30<30:23:40,  1.20s/it]

85047 episode score is 1037.64


  9%|▊         | 8640/100000 [2:52:31<31:09:36,  1.23s/it]

85053 episode score is 1293.78


  9%|▊         | 8641/100000 [2:52:32<30:39:54,  1.21s/it]

85056 episode score is 2084.78


  9%|▊         | 8642/100000 [2:52:33<30:08:23,  1.19s/it]

85062 episode score is 1088.06
85068 episode score is 1433.04


  9%|▊         | 8644/100000 [2:52:36<32:07:13,  1.27s/it]

85073 episode score is 1358.98


  9%|▊         | 8645/100000 [2:52:37<31:34:45,  1.24s/it]

85079 episode score is 1155.09


  9%|▊         | 8646/100000 [2:52:38<30:50:18,  1.22s/it]

85085 episode score is 1137.11


  9%|▊         | 8647/100000 [2:52:39<30:04:19,  1.19s/it]

85088 episode score is 1924.11


  9%|▊         | 8648/100000 [2:52:41<30:02:11,  1.18s/it]

85094 episode score is 1162.89
85099 episode score is 1568.62


  9%|▊         | 8649/100000 [2:52:42<32:12:16,  1.27s/it]

85106 episode score is 1161.79


  9%|▊         | 8651/100000 [2:52:45<32:03:10,  1.26s/it]

85111 episode score is 1320.10
85115 episode score is 1969.21


  9%|▊         | 8653/100000 [2:52:47<32:11:28,  1.27s/it]

85121 episode score is 1044.11


  9%|▊         | 8654/100000 [2:52:48<31:20:21,  1.24s/it]

85126 episode score is 1345.43


  9%|▊         | 8655/100000 [2:52:50<31:03:39,  1.22s/it]

85135 episode score is 828.82


  9%|▊         | 8656/100000 [2:52:51<30:26:15,  1.20s/it]

85141 episode score is 1092.11


  9%|▊         | 8657/100000 [2:52:52<31:06:07,  1.23s/it]

85149 episode score is 936.71


  9%|▊         | 8658/100000 [2:52:53<31:06:12,  1.23s/it]

85155 episode score is 1209.34


  9%|▊         | 8659/100000 [2:52:55<31:54:22,  1.26s/it]

85160 episode score is 1474.32


  9%|▊         | 8660/100000 [2:52:56<30:52:33,  1.22s/it]

85168 episode score is 885.23


  9%|▊         | 8661/100000 [2:52:57<30:03:10,  1.18s/it]

85177 episode score is 771.63


  9%|▊         | 8662/100000 [2:52:58<30:30:11,  1.20s/it]

85184 episode score is 1067.19


  9%|▊         | 8663/100000 [2:52:59<31:07:18,  1.23s/it]

85191 episode score is 1069.79


  9%|▊         | 8664/100000 [2:53:01<30:20:19,  1.20s/it]

85198 episode score is 940.85


  9%|▊         | 8665/100000 [2:53:02<30:28:38,  1.20s/it]

85204 episode score is 1144.10


  9%|▊         | 8666/100000 [2:53:03<29:48:53,  1.18s/it]

85211 episode score is 947.03


  9%|▊         | 8667/100000 [2:53:04<29:36:42,  1.17s/it]

85217 episode score is 1135.36


  9%|▊         | 8668/100000 [2:53:05<29:15:48,  1.15s/it]

85227 episode score is 704.20


  9%|▊         | 8669/100000 [2:53:06<28:59:57,  1.14s/it]

85233 episode score is 1013.78


  9%|▊         | 8670/100000 [2:53:07<29:06:41,  1.15s/it]

85242 episode score is 816.08


  9%|▊         | 8671/100000 [2:53:09<29:36:43,  1.17s/it]

85252 episode score is 765.05


  9%|▊         | 8672/100000 [2:53:10<30:54:55,  1.22s/it]

85258 episode score is 1322.52


  9%|▊         | 8673/100000 [2:53:11<31:43:26,  1.25s/it]

85266 episode score is 1006.59


  9%|▊         | 8674/100000 [2:53:12<30:47:21,  1.21s/it]

85271 episode score is 1257.30


  9%|▊         | 8675/100000 [2:53:14<30:36:16,  1.21s/it]

85280 episode score is 830.24


  9%|▊         | 8676/100000 [2:53:15<29:53:17,  1.18s/it]

85285 episode score is 1243.19


  9%|▊         | 8677/100000 [2:53:16<29:33:36,  1.17s/it]

85294 episode score is 797.95


  9%|▊         | 8678/100000 [2:53:17<29:59:28,  1.18s/it]

85300 episode score is 1108.24


  9%|▊         | 8679/100000 [2:53:18<31:09:09,  1.23s/it]

85305 episode score is 1386.00


  9%|▊         | 8680/100000 [2:53:20<30:59:27,  1.22s/it]

85311 episode score is 1144.12


  9%|▊         | 8681/100000 [2:53:21<30:35:05,  1.21s/it]

85317 episode score is 1140.79


  9%|▊         | 8682/100000 [2:53:22<31:12:54,  1.23s/it]

85323 episode score is 1215.57


  9%|▊         | 8683/100000 [2:53:23<31:17:12,  1.23s/it]

85327 episode score is 1617.44


  9%|▊         | 8684/100000 [2:53:24<30:20:49,  1.20s/it]

85330 episode score is 1948.60


  9%|▊         | 8685/100000 [2:53:26<30:29:14,  1.20s/it]

85336 episode score is 1190.09


  9%|▊         | 8686/100000 [2:53:27<29:46:28,  1.17s/it]

85342 episode score is 1054.54


  9%|▊         | 8687/100000 [2:53:28<29:58:29,  1.18s/it]

85345 episode score is 2167.82


  9%|▊         | 8688/100000 [2:53:29<29:45:17,  1.17s/it]

85350 episode score is 1296.51


  9%|▊         | 8689/100000 [2:53:30<29:18:13,  1.16s/it]

85357 episode score is 980.59


  9%|▊         | 8690/100000 [2:53:31<29:10:34,  1.15s/it]

85364 episode score is 968.61


  9%|▊         | 8691/100000 [2:53:32<29:18:32,  1.16s/it]

85368 episode score is 1528.90
85373 episode score is 1432.85


  9%|▊         | 8692/100000 [2:53:34<30:53:35,  1.22s/it]

85380 episode score is 1326.61


  9%|▊         | 8694/100000 [2:53:37<33:01:57,  1.30s/it]

85388 episode score is 942.98


  9%|▊         | 8695/100000 [2:53:38<32:23:53,  1.28s/it]

85396 episode score is 974.77
85401 episode score is 1515.38


  9%|▊         | 8697/100000 [2:53:41<32:32:35,  1.28s/it]

85411 episode score is 750.72


  9%|▊         | 8698/100000 [2:53:42<31:50:52,  1.26s/it]

85420 episode score is 816.95


  9%|▊         | 8699/100000 [2:53:43<31:14:43,  1.23s/it]

85430 episode score is 733.18
85436 episode score is 1174.60


  9%|▊         | 8700/100000 [2:53:45<38:43:35,  1.53s/it]

Iteration 8700: Average test reward: 720.24


  9%|▊         | 8701/100000 [2:53:46<36:09:58,  1.43s/it]

85447 episode score is 671.23


  9%|▊         | 8702/100000 [2:53:47<34:06:39,  1.35s/it]

85457 episode score is 706.28


  9%|▊         | 8703/100000 [2:53:49<32:57:03,  1.30s/it]

85466 episode score is 834.56


  9%|▊         | 8704/100000 [2:53:50<31:56:51,  1.26s/it]

85476 episode score is 737.22


  9%|▊         | 8705/100000 [2:53:51<31:51:04,  1.26s/it]

85485 episode score is 819.89


  9%|▊         | 8706/100000 [2:53:52<31:08:34,  1.23s/it]

85495 episode score is 735.42


  9%|▊         | 8707/100000 [2:53:53<30:52:35,  1.22s/it]

85505 episode score is 766.05


  9%|▊         | 8708/100000 [2:53:55<30:13:22,  1.19s/it]

85514 episode score is 805.79


  9%|▊         | 8709/100000 [2:53:56<30:18:27,  1.20s/it]

85524 episode score is 769.13


  9%|▊         | 8710/100000 [2:53:57<30:20:54,  1.20s/it]

85534 episode score is 765.31


  9%|▊         | 8711/100000 [2:53:58<29:55:29,  1.18s/it]

85543 episode score is 811.62


  9%|▊         | 8712/100000 [2:53:59<29:39:33,  1.17s/it]

85552 episode score is 804.29


  9%|▊         | 8713/100000 [2:54:00<29:37:57,  1.17s/it]

85561 episode score is 827.60


  9%|▊         | 8714/100000 [2:54:02<29:09:28,  1.15s/it]

85570 episode score is 781.41


  9%|▊         | 8715/100000 [2:54:03<28:54:12,  1.14s/it]

85579 episode score is 779.60


  9%|▊         | 8716/100000 [2:54:04<28:45:48,  1.13s/it]

85588 episode score is 793.86


  9%|▊         | 8717/100000 [2:54:05<28:30:53,  1.12s/it]

85597 episode score is 784.32


  9%|▊         | 8718/100000 [2:54:06<29:08:23,  1.15s/it]

85607 episode score is 771.79


  9%|▊         | 8719/100000 [2:54:07<28:57:07,  1.14s/it]

85616 episode score is 799.02


  9%|▊         | 8720/100000 [2:54:08<29:14:43,  1.15s/it]

85626 episode score is 751.66


  9%|▊         | 8721/100000 [2:54:09<28:56:09,  1.14s/it]

85635 episode score is 779.39


  9%|▊         | 8722/100000 [2:54:11<29:10:21,  1.15s/it]

85645 episode score is 749.86


  9%|▊         | 8723/100000 [2:54:12<29:32:26,  1.17s/it]

85655 episode score is 760.75


  9%|▊         | 8724/100000 [2:54:13<29:15:30,  1.15s/it]

85665 episode score is 708.05


  9%|▊         | 8725/100000 [2:54:14<29:30:57,  1.16s/it]

85675 episode score is 752.37


  9%|▊         | 8726/100000 [2:54:15<29:11:47,  1.15s/it]

85685 episode score is 700.25


  9%|▊         | 8727/100000 [2:54:16<29:07:41,  1.15s/it]

85695 episode score is 732.61


  9%|▊         | 8728/100000 [2:54:18<28:59:06,  1.14s/it]

85705 episode score is 695.51


  9%|▊         | 8729/100000 [2:54:19<28:45:07,  1.13s/it]

85715 episode score is 691.42


  9%|▊         | 8730/100000 [2:54:20<29:15:14,  1.15s/it]

85726 episode score is 671.79


  9%|▊         | 8731/100000 [2:54:21<28:56:52,  1.14s/it]

85736 episode score is 709.82


  9%|▊         | 8732/100000 [2:54:22<28:33:37,  1.13s/it]

85745 episode score is 776.55


  9%|▊         | 8733/100000 [2:54:23<28:24:18,  1.12s/it]

85755 episode score is 710.74


  9%|▊         | 8734/100000 [2:54:24<28:31:11,  1.12s/it]

85765 episode score is 721.58


  9%|▊         | 8735/100000 [2:54:25<28:22:29,  1.12s/it]

85775 episode score is 696.45


  9%|▊         | 8736/100000 [2:54:27<28:33:18,  1.13s/it]

85785 episode score is 738.17


  9%|▊         | 8737/100000 [2:54:28<28:54:21,  1.14s/it]

85795 episode score is 746.79


  9%|▊         | 8738/100000 [2:54:29<28:42:46,  1.13s/it]

85805 episode score is 712.44


  9%|▊         | 8739/100000 [2:54:30<28:55:10,  1.14s/it]

85816 episode score is 667.06


  9%|▊         | 8740/100000 [2:54:31<28:38:40,  1.13s/it]

85826 episode score is 700.97


  9%|▊         | 8741/100000 [2:54:32<28:50:51,  1.14s/it]

85837 episode score is 662.39


  9%|▊         | 8742/100000 [2:54:33<29:12:08,  1.15s/it]

85848 episode score is 683.86


  9%|▊         | 8743/100000 [2:54:35<29:31:04,  1.16s/it]

85859 episode score is 691.65


  9%|▊         | 8744/100000 [2:54:36<29:32:26,  1.17s/it]

85870 episode score is 671.44


  9%|▊         | 8745/100000 [2:54:37<29:34:35,  1.17s/it]

85881 episode score is 665.50


  9%|▊         | 8746/100000 [2:54:38<29:24:59,  1.16s/it]

85892 episode score is 656.42


  9%|▊         | 8747/100000 [2:54:39<29:14:30,  1.15s/it]

85903 episode score is 650.47


  9%|▊         | 8748/100000 [2:54:40<29:24:24,  1.16s/it]

85914 episode score is 682.70


  9%|▊         | 8749/100000 [2:54:42<28:58:30,  1.14s/it]

85924 episode score is 709.89


  9%|▉         | 8750/100000 [2:54:43<28:44:43,  1.13s/it]

85934 episode score is 712.31


  9%|▉         | 8751/100000 [2:54:44<28:42:59,  1.13s/it]

85944 episode score is 722.13


  9%|▉         | 8752/100000 [2:54:45<28:36:32,  1.13s/it]

85954 episode score is 721.35


  9%|▉         | 8753/100000 [2:54:46<28:54:19,  1.14s/it]

85965 episode score is 678.93


  9%|▉         | 8754/100000 [2:54:47<28:56:27,  1.14s/it]

85975 episode score is 729.03


  9%|▉         | 8755/100000 [2:54:48<29:33:54,  1.17s/it]

85986 episode score is 701.36


  9%|▉         | 8756/100000 [2:54:50<29:37:00,  1.17s/it]

85997 episode score is 677.45


  9%|▉         | 8757/100000 [2:54:51<28:57:04,  1.14s/it]

86007 episode score is 695.34


  9%|▉         | 8758/100000 [2:54:52<29:14:35,  1.15s/it]

86018 episode score is 679.86


  9%|▉         | 8759/100000 [2:54:53<29:24:09,  1.16s/it]

86029 episode score is 688.73


  9%|▉         | 8760/100000 [2:54:54<29:21:11,  1.16s/it]

86039 episode score is 748.31


  9%|▉         | 8761/100000 [2:54:55<28:51:14,  1.14s/it]

86048 episode score is 788.85


  9%|▉         | 8762/100000 [2:54:57<29:43:41,  1.17s/it]

86058 episode score is 751.49


  9%|▉         | 8763/100000 [2:54:58<30:13:29,  1.19s/it]

86068 episode score is 791.97


  9%|▉         | 8764/100000 [2:54:59<29:34:40,  1.17s/it]

86078 episode score is 701.84


  9%|▉         | 8765/100000 [2:55:00<29:34:40,  1.17s/it]

86089 episode score is 681.54


  9%|▉         | 8766/100000 [2:55:01<29:20:07,  1.16s/it]

86099 episode score is 739.22


  9%|▉         | 8767/100000 [2:55:02<29:08:13,  1.15s/it]

86109 episode score is 725.76


  9%|▉         | 8768/100000 [2:55:03<28:39:57,  1.13s/it]

86119 episode score is 697.98


  9%|▉         | 8769/100000 [2:55:05<28:36:24,  1.13s/it]

86129 episode score is 729.54


  9%|▉         | 8770/100000 [2:55:06<28:19:06,  1.12s/it]

86138 episode score is 790.58


  9%|▉         | 8771/100000 [2:55:07<28:37:30,  1.13s/it]

86148 episode score is 755.40


  9%|▉         | 8772/100000 [2:55:08<28:56:49,  1.14s/it]

86158 episode score is 761.29


  9%|▉         | 8773/100000 [2:55:09<28:38:22,  1.13s/it]

86167 episode score is 794.14


  9%|▉         | 8774/100000 [2:55:10<28:16:50,  1.12s/it]

86176 episode score is 786.70


  9%|▉         | 8775/100000 [2:55:11<28:11:04,  1.11s/it]

86185 episode score is 802.81


  9%|▉         | 8776/100000 [2:55:12<28:57:18,  1.14s/it]

86194 episode score is 876.26


  9%|▉         | 8777/100000 [2:55:14<28:44:45,  1.13s/it]

86203 episode score is 801.94


  9%|▉         | 8778/100000 [2:55:15<29:04:20,  1.15s/it]

86213 episode score is 766.15


  9%|▉         | 8779/100000 [2:55:16<29:10:28,  1.15s/it]

86223 episode score is 759.99


  9%|▉         | 8780/100000 [2:55:17<28:51:20,  1.14s/it]

86232 episode score is 797.78


  9%|▉         | 8781/100000 [2:55:18<29:00:50,  1.15s/it]

86241 episode score is 821.80


  9%|▉         | 8782/100000 [2:55:19<29:07:10,  1.15s/it]

86250 episode score is 823.01


  9%|▉         | 8783/100000 [2:55:21<29:15:21,  1.15s/it]

86260 episode score is 755.41


  9%|▉         | 8784/100000 [2:55:22<29:15:12,  1.15s/it]

86270 episode score is 745.79


  9%|▉         | 8785/100000 [2:55:23<29:21:05,  1.16s/it]

86280 episode score is 750.07


  9%|▉         | 8786/100000 [2:55:24<29:13:45,  1.15s/it]

86289 episode score is 828.73


  9%|▉         | 8787/100000 [2:55:25<28:54:46,  1.14s/it]

86299 episode score is 717.92


  9%|▉         | 8788/100000 [2:55:26<29:12:24,  1.15s/it]

86309 episode score is 758.44


  9%|▉         | 8789/100000 [2:55:27<29:20:41,  1.16s/it]

86319 episode score is 749.96


  9%|▉         | 8790/100000 [2:55:29<29:20:04,  1.16s/it]

86329 episode score is 748.12


  9%|▉         | 8791/100000 [2:55:30<29:30:07,  1.16s/it]

86339 episode score is 769.33


  9%|▉         | 8792/100000 [2:55:31<28:56:18,  1.14s/it]

86348 episode score is 786.78


  9%|▉         | 8793/100000 [2:55:32<29:06:09,  1.15s/it]

86357 episode score is 829.86


  9%|▉         | 8794/100000 [2:55:33<29:02:00,  1.15s/it]

86367 episode score is 740.26


  9%|▉         | 8795/100000 [2:55:34<28:47:45,  1.14s/it]

86376 episode score is 816.26


  9%|▉         | 8796/100000 [2:55:35<28:34:52,  1.13s/it]

86385 episode score is 810.54


  9%|▉         | 8797/100000 [2:55:37<28:26:13,  1.12s/it]

86394 episode score is 805.81


  9%|▉         | 8798/100000 [2:55:38<28:17:59,  1.12s/it]

86403 episode score is 787.79


  9%|▉         | 8799/100000 [2:55:39<28:44:02,  1.13s/it]

86413 episode score is 755.93
86423 episode score is 755.42


  9%|▉         | 8800/100000 [2:55:41<37:11:38,  1.47s/it]

Iteration 8800: Average test reward: 770.48


  9%|▉         | 8801/100000 [2:55:42<35:02:03,  1.38s/it]

86433 episode score is 759.93


  9%|▉         | 8802/100000 [2:55:43<33:31:11,  1.32s/it]

86443 episode score is 763.99


  9%|▉         | 8803/100000 [2:55:45<32:06:09,  1.27s/it]

86452 episode score is 818.30


  9%|▉         | 8804/100000 [2:55:46<31:04:40,  1.23s/it]

86461 episode score is 810.66


  9%|▉         | 8805/100000 [2:55:47<30:10:56,  1.19s/it]

86470 episode score is 792.22


  9%|▉         | 8806/100000 [2:55:48<29:45:59,  1.18s/it]

86479 episode score is 800.92


  9%|▉         | 8807/100000 [2:55:49<29:43:28,  1.17s/it]

86488 episode score is 826.80


  9%|▉         | 8808/100000 [2:55:50<29:51:12,  1.18s/it]

86498 episode score is 771.58


  9%|▉         | 8809/100000 [2:55:51<29:52:56,  1.18s/it]

86508 episode score is 763.43


  9%|▉         | 8810/100000 [2:55:53<29:44:53,  1.17s/it]

86518 episode score is 751.11


  9%|▉         | 8811/100000 [2:55:54<29:20:35,  1.16s/it]

86527 episode score is 803.19


  9%|▉         | 8812/100000 [2:55:55<29:31:34,  1.17s/it]

86537 episode score is 761.76


  9%|▉         | 8813/100000 [2:55:56<29:49:55,  1.18s/it]

86547 episode score is 776.43


  9%|▉         | 8814/100000 [2:55:57<29:38:27,  1.17s/it]

86557 episode score is 743.57


  9%|▉         | 8815/100000 [2:55:58<29:36:01,  1.17s/it]

86567 episode score is 748.22


  9%|▉         | 8816/100000 [2:56:00<29:41:23,  1.17s/it]

86577 episode score is 750.78


  9%|▉         | 8817/100000 [2:56:01<29:28:43,  1.16s/it]

86587 episode score is 733.26


  9%|▉         | 8818/100000 [2:56:02<29:14:06,  1.15s/it]

86597 episode score is 721.81


  9%|▉         | 8819/100000 [2:56:03<29:11:01,  1.15s/it]

86607 episode score is 731.27


  9%|▉         | 8820/100000 [2:56:04<29:11:24,  1.15s/it]

86617 episode score is 743.36


  9%|▉         | 8821/100000 [2:56:05<29:24:10,  1.16s/it]

86627 episode score is 754.89


  9%|▉         | 8822/100000 [2:56:07<29:13:14,  1.15s/it]

86637 episode score is 735.42


  9%|▉         | 8823/100000 [2:56:08<28:54:12,  1.14s/it]

86647 episode score is 706.78


  9%|▉         | 8824/100000 [2:56:09<28:37:27,  1.13s/it]

86656 episode score is 788.75


  9%|▉         | 8825/100000 [2:56:10<28:54:18,  1.14s/it]

86666 episode score is 754.33


  9%|▉         | 8826/100000 [2:56:11<29:09:48,  1.15s/it]

86676 episode score is 754.79


  9%|▉         | 8827/100000 [2:56:12<29:18:37,  1.16s/it]

86686 episode score is 738.43


  9%|▉         | 8828/100000 [2:56:14<30:24:50,  1.20s/it]

86696 episode score is 774.94


  9%|▉         | 8829/100000 [2:56:15<30:19:19,  1.20s/it]

86706 episode score is 761.82


  9%|▉         | 8830/100000 [2:56:16<30:04:10,  1.19s/it]

86716 episode score is 743.06


  9%|▉         | 8831/100000 [2:56:17<30:03:35,  1.19s/it]

86726 episode score is 753.67


  9%|▉         | 8832/100000 [2:56:18<29:52:04,  1.18s/it]

86736 episode score is 738.20


  9%|▉         | 8833/100000 [2:56:19<29:46:52,  1.18s/it]

86746 episode score is 744.05


  9%|▉         | 8834/100000 [2:56:21<29:11:00,  1.15s/it]

86756 episode score is 701.06


  9%|▉         | 8835/100000 [2:56:22<28:52:20,  1.14s/it]

86766 episode score is 713.50


  9%|▉         | 8836/100000 [2:56:23<28:58:58,  1.14s/it]

86776 episode score is 734.83


  9%|▉         | 8837/100000 [2:56:24<28:42:43,  1.13s/it]

86786 episode score is 710.51


  9%|▉         | 8838/100000 [2:56:25<29:13:41,  1.15s/it]

86797 episode score is 699.69


  9%|▉         | 8839/100000 [2:56:26<29:32:05,  1.17s/it]

86808 episode score is 694.32


  9%|▉         | 8840/100000 [2:56:27<29:15:53,  1.16s/it]

86818 episode score is 722.57


  9%|▉         | 8841/100000 [2:56:29<29:23:02,  1.16s/it]

86828 episode score is 751.17


  9%|▉         | 8842/100000 [2:56:30<29:12:26,  1.15s/it]

86838 episode score is 732.38


  9%|▉         | 8843/100000 [2:56:31<29:17:46,  1.16s/it]

86848 episode score is 744.01


  9%|▉         | 8844/100000 [2:56:32<29:30:10,  1.17s/it]

86858 episode score is 754.48


  9%|▉         | 8845/100000 [2:56:33<29:34:09,  1.17s/it]

86868 episode score is 756.69


  9%|▉         | 8846/100000 [2:56:34<29:47:40,  1.18s/it]

86878 episode score is 777.34


  9%|▉         | 8847/100000 [2:56:36<29:27:56,  1.16s/it]

86888 episode score is 728.43


  9%|▉         | 8848/100000 [2:56:37<28:59:53,  1.15s/it]

86897 episode score is 794.43


  9%|▉         | 8849/100000 [2:56:38<29:06:48,  1.15s/it]

86907 episode score is 746.80


  9%|▉         | 8850/100000 [2:56:39<28:53:35,  1.14s/it]

86917 episode score is 720.45


  9%|▉         | 8851/100000 [2:56:40<28:53:34,  1.14s/it]

86927 episode score is 738.92


  9%|▉         | 8852/100000 [2:56:41<28:53:12,  1.14s/it]

86937 episode score is 741.20


  9%|▉         | 8853/100000 [2:56:42<28:35:23,  1.13s/it]

86947 episode score is 695.68


  9%|▉         | 8854/100000 [2:56:44<29:05:12,  1.15s/it]

86957 episode score is 775.99


  9%|▉         | 8855/100000 [2:56:45<29:01:02,  1.15s/it]

86967 episode score is 734.52


  9%|▉         | 8856/100000 [2:56:46<29:04:40,  1.15s/it]

86977 episode score is 725.95


  9%|▉         | 8857/100000 [2:56:47<29:15:32,  1.16s/it]

86987 episode score is 744.61


  9%|▉         | 8858/100000 [2:56:48<29:31:29,  1.17s/it]

86997 episode score is 768.74


  9%|▉         | 8859/100000 [2:56:49<29:04:56,  1.15s/it]

87006 episode score is 795.62


  9%|▉         | 8860/100000 [2:56:50<28:45:23,  1.14s/it]

87015 episode score is 789.45


  9%|▉         | 8861/100000 [2:56:52<29:01:52,  1.15s/it]

87025 episode score is 757.70


  9%|▉         | 8862/100000 [2:56:53<29:24:15,  1.16s/it]

87035 episode score is 762.12


  9%|▉         | 8863/100000 [2:56:54<29:22:20,  1.16s/it]

87045 episode score is 744.97


  9%|▉         | 8864/100000 [2:56:55<29:29:12,  1.16s/it]

87055 episode score is 760.63


  9%|▉         | 8865/100000 [2:56:56<29:00:49,  1.15s/it]

87065 episode score is 713.20


  9%|▉         | 8866/100000 [2:56:57<28:51:48,  1.14s/it]

87075 episode score is 724.63


  9%|▉         | 8867/100000 [2:56:59<29:05:44,  1.15s/it]

87086 episode score is 681.34


  9%|▉         | 8868/100000 [2:57:00<29:27:56,  1.16s/it]

87097 episode score is 695.93


  9%|▉         | 8869/100000 [2:57:01<29:23:05,  1.16s/it]

87108 episode score is 671.68


  9%|▉         | 8870/100000 [2:57:02<29:39:01,  1.17s/it]

87119 episode score is 683.84


  9%|▉         | 8871/100000 [2:57:03<29:43:30,  1.17s/it]

87130 episode score is 680.59


  9%|▉         | 8872/100000 [2:57:04<29:03:23,  1.15s/it]

87140 episode score is 698.07


  9%|▉         | 8873/100000 [2:57:06<29:13:38,  1.15s/it]

87151 episode score is 680.14


  9%|▉         | 8874/100000 [2:57:07<29:04:56,  1.15s/it]

87161 episode score is 731.42


  9%|▉         | 8875/100000 [2:57:08<28:49:55,  1.14s/it]

87171 episode score is 717.42


  9%|▉         | 8876/100000 [2:57:09<28:33:37,  1.13s/it]

87181 episode score is 702.96


  9%|▉         | 8877/100000 [2:57:10<28:37:38,  1.13s/it]

87191 episode score is 731.37


  9%|▉         | 8878/100000 [2:57:11<28:21:32,  1.12s/it]

87201 episode score is 692.61


  9%|▉         | 8879/100000 [2:57:12<28:32:14,  1.13s/it]

87211 episode score is 728.05


  9%|▉         | 8880/100000 [2:57:13<28:31:13,  1.13s/it]

87221 episode score is 720.20


  9%|▉         | 8881/100000 [2:57:15<28:36:56,  1.13s/it]

87231 episode score is 731.59


  9%|▉         | 8882/100000 [2:57:16<28:23:12,  1.12s/it]

87241 episode score is 705.75


  9%|▉         | 8883/100000 [2:57:17<28:24:54,  1.12s/it]

87251 episode score is 725.58


  9%|▉         | 8884/100000 [2:57:18<28:31:18,  1.13s/it]

87261 episode score is 712.42


  9%|▉         | 8885/100000 [2:57:19<28:33:11,  1.13s/it]

87271 episode score is 721.18


  9%|▉         | 8886/100000 [2:57:20<28:50:25,  1.14s/it]

87282 episode score is 672.32


  9%|▉         | 8887/100000 [2:57:21<29:08:29,  1.15s/it]

87293 episode score is 689.38


  9%|▉         | 8888/100000 [2:57:23<29:36:36,  1.17s/it]

87304 episode score is 699.21


  9%|▉         | 8889/100000 [2:57:24<29:33:15,  1.17s/it]

87315 episode score is 669.93


  9%|▉         | 8890/100000 [2:57:25<29:43:55,  1.17s/it]

87325 episode score is 699.69


  9%|▉         | 8891/100000 [2:57:26<29:26:58,  1.16s/it]

87335 episode score is 735.43


  9%|▉         | 8892/100000 [2:57:27<29:33:45,  1.17s/it]

87345 episode score is 761.44


  9%|▉         | 8893/100000 [2:57:28<29:18:27,  1.16s/it]

87355 episode score is 729.63


  9%|▉         | 8894/100000 [2:57:30<29:16:05,  1.16s/it]

87365 episode score is 740.90


  9%|▉         | 8895/100000 [2:57:31<29:18:29,  1.16s/it]

87375 episode score is 750.98


  9%|▉         | 8896/100000 [2:57:32<29:24:37,  1.16s/it]

87385 episode score is 752.11


  9%|▉         | 8897/100000 [2:57:33<29:37:36,  1.17s/it]

87395 episode score is 758.40


  9%|▉         | 8898/100000 [2:57:34<29:33:18,  1.17s/it]

87405 episode score is 748.64


  9%|▉         | 8899/100000 [2:57:35<29:48:03,  1.18s/it]

87416 episode score is 698.26
87426 episode score is 707.26


  9%|▉         | 8900/100000 [2:57:38<36:58:48,  1.46s/it]

Iteration 8900: Average test reward: 730.31


  9%|▉         | 8901/100000 [2:57:39<34:34:03,  1.37s/it]

87436 episode score is 728.04


  9%|▉         | 8902/100000 [2:57:40<33:01:45,  1.31s/it]

87446 episode score is 747.09


  9%|▉         | 8903/100000 [2:57:41<31:44:02,  1.25s/it]

87456 episode score is 721.39


  9%|▉         | 8904/100000 [2:57:42<30:37:30,  1.21s/it]

87466 episode score is 705.18


  9%|▉         | 8905/100000 [2:57:43<30:15:18,  1.20s/it]

87476 episode score is 747.25


  9%|▉         | 8906/100000 [2:57:44<29:45:51,  1.18s/it]

87486 episode score is 724.39


  9%|▉         | 8907/100000 [2:57:46<29:37:48,  1.17s/it]

87496 episode score is 738.04


  9%|▉         | 8908/100000 [2:57:47<29:25:28,  1.16s/it]

87506 episode score is 735.12


  9%|▉         | 8909/100000 [2:57:48<29:23:06,  1.16s/it]

87516 episode score is 734.08


  9%|▉         | 8910/100000 [2:57:49<29:21:51,  1.16s/it]

87526 episode score is 733.94


  9%|▉         | 8911/100000 [2:57:50<29:18:32,  1.16s/it]

87536 episode score is 742.91


  9%|▉         | 8912/100000 [2:57:51<29:04:15,  1.15s/it]

87546 episode score is 723.83


  9%|▉         | 8913/100000 [2:57:52<28:56:42,  1.14s/it]

87556 episode score is 722.82


  9%|▉         | 8914/100000 [2:57:54<28:44:06,  1.14s/it]

87566 episode score is 709.77


  9%|▉         | 8915/100000 [2:57:55<28:50:41,  1.14s/it]

87576 episode score is 742.76


  9%|▉         | 8916/100000 [2:57:56<29:00:25,  1.15s/it]

87586 episode score is 737.59


  9%|▉         | 8917/100000 [2:57:57<28:36:57,  1.13s/it]

87596 episode score is 697.34


  9%|▉         | 8918/100000 [2:57:58<28:38:57,  1.13s/it]

87606 episode score is 733.56


  9%|▉         | 8919/100000 [2:57:59<28:37:23,  1.13s/it]

87616 episode score is 724.02


  9%|▉         | 8920/100000 [2:58:00<29:03:20,  1.15s/it]

87627 episode score is 692.99


  9%|▉         | 8921/100000 [2:58:01<28:43:41,  1.14s/it]

87637 episode score is 708.78


  9%|▉         | 8922/100000 [2:58:03<29:18:51,  1.16s/it]

87647 episode score is 765.73


  9%|▉         | 8923/100000 [2:58:04<29:26:27,  1.16s/it]

87657 episode score is 757.18


  9%|▉         | 8924/100000 [2:58:05<29:37:25,  1.17s/it]

87667 episode score is 766.34


  9%|▉         | 8925/100000 [2:58:06<29:49:41,  1.18s/it]

87677 episode score is 770.36


  9%|▉         | 8926/100000 [2:58:07<29:46:39,  1.18s/it]

87687 episode score is 754.74


  9%|▉         | 8927/100000 [2:58:09<29:48:11,  1.18s/it]

87697 episode score is 756.38


  9%|▉         | 8928/100000 [2:58:10<29:43:55,  1.18s/it]

87707 episode score is 756.56


  9%|▉         | 8929/100000 [2:58:11<29:29:44,  1.17s/it]

87717 episode score is 731.22


  9%|▉         | 8930/100000 [2:58:12<29:32:44,  1.17s/it]

87727 episode score is 754.05


  9%|▉         | 8931/100000 [2:58:13<29:47:20,  1.18s/it]

87738 episode score is 699.88


  9%|▉         | 8932/100000 [2:58:14<29:14:20,  1.16s/it]

87748 episode score is 708.81


  9%|▉         | 8933/100000 [2:58:16<29:30:38,  1.17s/it]

87758 episode score is 754.26


  9%|▉         | 8934/100000 [2:58:17<29:16:16,  1.16s/it]

87768 episode score is 716.79


  9%|▉         | 8935/100000 [2:58:18<29:15:38,  1.16s/it]

87778 episode score is 729.50


  9%|▉         | 8936/100000 [2:58:19<29:03:08,  1.15s/it]

87788 episode score is 720.16


  9%|▉         | 8937/100000 [2:58:20<29:22:32,  1.16s/it]

87799 episode score is 693.15


  9%|▉         | 8938/100000 [2:58:21<28:58:08,  1.15s/it]

87809 episode score is 712.78


  9%|▉         | 8939/100000 [2:58:22<28:44:36,  1.14s/it]

87819 episode score is 714.45


  9%|▉         | 8940/100000 [2:58:24<28:59:01,  1.15s/it]

87830 episode score is 682.69


  9%|▉         | 8941/100000 [2:58:25<29:16:04,  1.16s/it]

87841 episode score is 693.27


  9%|▉         | 8942/100000 [2:58:26<29:30:44,  1.17s/it]

87852 episode score is 694.54


  9%|▉         | 8943/100000 [2:58:27<29:03:59,  1.15s/it]

87862 episode score is 710.30


  9%|▉         | 8944/100000 [2:58:28<28:39:26,  1.13s/it]

87872 episode score is 704.31


  9%|▉         | 8945/100000 [2:58:29<29:02:08,  1.15s/it]

87883 episode score is 687.01


  9%|▉         | 8946/100000 [2:58:30<28:43:23,  1.14s/it]

87893 episode score is 706.35


  9%|▉         | 8947/100000 [2:58:32<28:31:35,  1.13s/it]

87903 episode score is 714.75


  9%|▉         | 8948/100000 [2:58:33<28:30:21,  1.13s/it]

87913 episode score is 707.48


  9%|▉         | 8949/100000 [2:58:34<29:22:22,  1.16s/it]

87923 episode score is 735.06


  9%|▉         | 8950/100000 [2:58:35<29:17:54,  1.16s/it]

87933 episode score is 739.04


  9%|▉         | 8951/100000 [2:58:36<29:22:55,  1.16s/it]

87943 episode score is 750.70


  9%|▉         | 8952/100000 [2:58:37<29:17:00,  1.16s/it]

87953 episode score is 731.64


  9%|▉         | 8953/100000 [2:58:39<29:11:28,  1.15s/it]

87963 episode score is 740.00


  9%|▉         | 8954/100000 [2:58:40<29:11:28,  1.15s/it]

87973 episode score is 739.94


  9%|▉         | 8955/100000 [2:58:41<29:05:13,  1.15s/it]

87983 episode score is 737.03


  9%|▉         | 8956/100000 [2:58:42<29:11:15,  1.15s/it]

87993 episode score is 739.54


  9%|▉         | 8957/100000 [2:58:43<29:17:01,  1.16s/it]

88003 episode score is 745.65


  9%|▉         | 8958/100000 [2:58:44<29:15:39,  1.16s/it]

88013 episode score is 743.39


  9%|▉         | 8959/100000 [2:58:46<29:24:39,  1.16s/it]

88023 episode score is 756.19


  9%|▉         | 8960/100000 [2:58:47<29:22:25,  1.16s/it]

88033 episode score is 749.62


  9%|▉         | 8961/100000 [2:58:48<29:26:55,  1.16s/it]

88043 episode score is 741.81


  9%|▉         | 8962/100000 [2:58:49<29:20:38,  1.16s/it]

88053 episode score is 726.10


  9%|▉         | 8963/100000 [2:58:50<29:14:05,  1.16s/it]

88063 episode score is 736.52


  9%|▉         | 8964/100000 [2:58:51<29:11:07,  1.15s/it]

88073 episode score is 742.03


  9%|▉         | 8965/100000 [2:58:52<29:21:13,  1.16s/it]

88083 episode score is 754.44


  9%|▉         | 8966/100000 [2:58:54<29:14:51,  1.16s/it]

88093 episode score is 743.01


  9%|▉         | 8967/100000 [2:58:55<29:06:44,  1.15s/it]

88103 episode score is 733.27


  9%|▉         | 8968/100000 [2:58:56<28:58:45,  1.15s/it]

88113 episode score is 730.68


  9%|▉         | 8969/100000 [2:58:57<28:45:57,  1.14s/it]

88123 episode score is 718.43


  9%|▉         | 8970/100000 [2:58:58<28:56:35,  1.14s/it]

88133 episode score is 751.28


  9%|▉         | 8971/100000 [2:58:59<28:54:26,  1.14s/it]

88143 episode score is 733.55


  9%|▉         | 8972/100000 [2:59:00<28:58:18,  1.15s/it]

88153 episode score is 746.38


  9%|▉         | 8973/100000 [2:59:02<28:41:14,  1.13s/it]

88163 episode score is 713.29


  9%|▉         | 8974/100000 [2:59:03<28:34:33,  1.13s/it]

88173 episode score is 709.04


  9%|▉         | 8975/100000 [2:59:04<28:40:54,  1.13s/it]

88184 episode score is 663.25


  9%|▉         | 8976/100000 [2:59:05<29:11:39,  1.15s/it]

88195 episode score is 694.93


  9%|▉         | 8977/100000 [2:59:06<28:46:31,  1.14s/it]

88205 episode score is 711.28


  9%|▉         | 8978/100000 [2:59:07<29:01:47,  1.15s/it]

88216 episode score is 684.68


  9%|▉         | 8979/100000 [2:59:08<28:39:07,  1.13s/it]

88226 episode score is 708.44


  9%|▉         | 8980/100000 [2:59:10<28:30:35,  1.13s/it]

88236 episode score is 713.97


  9%|▉         | 8981/100000 [2:59:11<28:24:56,  1.12s/it]

88246 episode score is 714.33


  9%|▉         | 8982/100000 [2:59:12<28:51:03,  1.14s/it]

88257 episode score is 682.80


  9%|▉         | 8983/100000 [2:59:13<28:44:41,  1.14s/it]

88267 episode score is 718.36


  9%|▉         | 8984/100000 [2:59:14<29:13:09,  1.16s/it]

88278 episode score is 699.54


  9%|▉         | 8985/100000 [2:59:15<28:59:08,  1.15s/it]

88289 episode score is 636.27


  9%|▉         | 8986/100000 [2:59:16<29:05:07,  1.15s/it]

88300 episode score is 673.66


  9%|▉         | 8987/100000 [2:59:18<29:22:28,  1.16s/it]

88312 episode score is 598.84


  9%|▉         | 8988/100000 [2:59:19<29:39:27,  1.17s/it]

88323 episode score is 686.13


  9%|▉         | 8989/100000 [2:59:20<30:33:57,  1.21s/it]

88334 episode score is 699.82


  9%|▉         | 8990/100000 [2:59:21<29:48:22,  1.18s/it]

88344 episode score is 718.73


  9%|▉         | 8991/100000 [2:59:22<29:31:20,  1.17s/it]

88354 episode score is 735.15


  9%|▉         | 8992/100000 [2:59:24<29:31:16,  1.17s/it]

88365 episode score is 673.00


  9%|▉         | 8993/100000 [2:59:25<29:24:23,  1.16s/it]

88377 episode score is 585.64


  9%|▉         | 8994/100000 [2:59:26<29:10:38,  1.15s/it]

88388 episode score is 653.54


  9%|▉         | 8995/100000 [2:59:27<28:53:31,  1.14s/it]

88398 episode score is 719.71


  9%|▉         | 8996/100000 [2:59:28<28:38:47,  1.13s/it]

88408 episode score is 717.63


  9%|▉         | 8997/100000 [2:59:29<28:57:20,  1.15s/it]

88418 episode score is 759.14


  9%|▉         | 8998/100000 [2:59:30<28:41:09,  1.13s/it]

88428 episode score is 721.07


  9%|▉         | 8999/100000 [2:59:31<28:25:00,  1.12s/it]

88438 episode score is 714.85
88448 episode score is 709.43


  9%|▉         | 9000/100000 [2:59:34<36:11:56,  1.43s/it]

Iteration 9000: Average test reward: 738.54


  9%|▉         | 9001/100000 [2:59:35<33:50:04,  1.34s/it]

88458 episode score is 713.39


  9%|▉         | 9002/100000 [2:59:36<32:43:35,  1.29s/it]

88469 episode score is 698.87


  9%|▉         | 9003/100000 [2:59:37<31:28:29,  1.25s/it]

88479 episode score is 731.59


  9%|▉         | 9004/100000 [2:59:38<30:31:38,  1.21s/it]

88489 episode score is 719.65


  9%|▉         | 9005/100000 [2:59:39<29:56:31,  1.18s/it]

88499 episode score is 721.80


  9%|▉         | 9006/100000 [2:59:40<29:19:25,  1.16s/it]

88509 episode score is 708.38


  9%|▉         | 9007/100000 [2:59:41<29:00:41,  1.15s/it]

88519 episode score is 727.04


  9%|▉         | 9008/100000 [2:59:43<28:41:07,  1.13s/it]

88529 episode score is 706.69


  9%|▉         | 9009/100000 [2:59:44<28:30:33,  1.13s/it]

88539 episode score is 721.35


  9%|▉         | 9010/100000 [2:59:45<28:32:20,  1.13s/it]

88549 episode score is 729.68


  9%|▉         | 9011/100000 [2:59:46<28:22:13,  1.12s/it]

88559 episode score is 709.47


  9%|▉         | 9012/100000 [2:59:47<28:52:09,  1.14s/it]

88570 episode score is 680.94


  9%|▉         | 9013/100000 [2:59:48<29:14:02,  1.16s/it]

88582 episode score is 568.59


  9%|▉         | 9014/100000 [2:59:50<29:42:44,  1.18s/it]

88591 episode score is 818.51


  9%|▉         | 9015/100000 [2:59:51<30:01:31,  1.19s/it]

88600 episode score is 819.04


  9%|▉         | 9016/100000 [2:59:52<29:45:41,  1.18s/it]

88609 episode score is 779.24


  9%|▉         | 9017/100000 [2:59:53<30:06:10,  1.19s/it]

88618 episode score is 826.01


  9%|▉         | 9018/100000 [2:59:54<30:10:53,  1.19s/it]

88627 episode score is 811.31


  9%|▉         | 9019/100000 [2:59:56<30:36:19,  1.21s/it]

88636 episode score is 836.78


  9%|▉         | 9020/100000 [2:59:57<30:34:38,  1.21s/it]

88645 episode score is 810.65


  9%|▉         | 9021/100000 [2:59:58<30:36:56,  1.21s/it]

88654 episode score is 830.32


  9%|▉         | 9022/100000 [2:59:59<30:45:55,  1.22s/it]

88663 episode score is 824.06


  9%|▉         | 9023/100000 [3:00:00<30:37:52,  1.21s/it]

88672 episode score is 820.52


  9%|▉         | 9024/100000 [3:00:02<30:33:39,  1.21s/it]

88681 episode score is 811.06


  9%|▉         | 9025/100000 [3:00:03<30:46:00,  1.22s/it]

88690 episode score is 829.86


  9%|▉         | 9026/100000 [3:00:04<30:41:33,  1.21s/it]

88699 episode score is 825.30


  9%|▉         | 9027/100000 [3:00:05<29:57:49,  1.19s/it]

88707 episode score is 859.01


  9%|▉         | 9028/100000 [3:00:06<29:33:52,  1.17s/it]

88716 episode score is 758.43


  9%|▉         | 9029/100000 [3:00:07<28:57:44,  1.15s/it]

88724 episode score is 840.43


  9%|▉         | 9030/100000 [3:00:09<29:07:38,  1.15s/it]

88733 episode score is 798.50


  9%|▉         | 9031/100000 [3:00:10<29:40:47,  1.17s/it]

88742 episode score is 830.09


  9%|▉         | 9032/100000 [3:00:11<29:35:43,  1.17s/it]

88751 episode score is 771.95


  9%|▉         | 9033/100000 [3:00:12<29:10:48,  1.15s/it]

88759 episode score is 875.23


  9%|▉         | 9034/100000 [3:00:13<29:18:03,  1.16s/it]

88768 episode score is 788.61


  9%|▉         | 9035/100000 [3:00:14<29:37:26,  1.17s/it]

88777 episode score is 832.08


  9%|▉         | 9036/100000 [3:00:16<29:49:51,  1.18s/it]

88786 episode score is 816.53


  9%|▉         | 9037/100000 [3:00:17<29:50:26,  1.18s/it]

88795 episode score is 801.37


  9%|▉         | 9038/100000 [3:00:18<29:22:47,  1.16s/it]

88804 episode score is 733.25


  9%|▉         | 9039/100000 [3:00:19<29:47:02,  1.18s/it]

88813 episode score is 812.43


  9%|▉         | 9040/100000 [3:00:20<30:02:58,  1.19s/it]

88822 episode score is 822.61


  9%|▉         | 9041/100000 [3:00:22<29:57:16,  1.19s/it]

88831 episode score is 803.92


  9%|▉         | 9042/100000 [3:00:23<30:04:09,  1.19s/it]

88840 episode score is 816.18


  9%|▉         | 9043/100000 [3:00:24<29:59:17,  1.19s/it]

88849 episode score is 803.83


  9%|▉         | 9044/100000 [3:00:25<29:59:21,  1.19s/it]

88858 episode score is 804.75


  9%|▉         | 9045/100000 [3:00:26<30:09:27,  1.19s/it]

88867 episode score is 827.82


  9%|▉         | 9046/100000 [3:00:27<29:26:14,  1.17s/it]

88875 episode score is 829.52


  9%|▉         | 9047/100000 [3:00:29<29:45:07,  1.18s/it]

88884 episode score is 818.03


  9%|▉         | 9048/100000 [3:00:30<29:55:45,  1.18s/it]

88893 episode score is 812.70


  9%|▉         | 9049/100000 [3:00:31<29:49:41,  1.18s/it]

88902 episode score is 774.04


  9%|▉         | 9050/100000 [3:00:32<29:16:58,  1.16s/it]

88910 episode score is 868.16


  9%|▉         | 9051/100000 [3:00:33<30:01:56,  1.19s/it]

88919 episode score is 853.42


  9%|▉         | 9052/100000 [3:00:35<30:50:40,  1.22s/it]

88928 episode score is 822.75


  9%|▉         | 9053/100000 [3:00:36<30:17:31,  1.20s/it]

88936 episode score is 891.34


  9%|▉         | 9054/100000 [3:00:37<30:23:24,  1.20s/it]

88945 episode score is 839.46


  9%|▉         | 9055/100000 [3:00:38<30:32:45,  1.21s/it]

88955 episode score is 758.61


  9%|▉         | 9056/100000 [3:00:39<29:59:04,  1.19s/it]

88964 episode score is 769.92


  9%|▉         | 9057/100000 [3:00:41<30:04:07,  1.19s/it]

88973 episode score is 820.46


  9%|▉         | 9058/100000 [3:00:42<30:02:18,  1.19s/it]

88982 episode score is 805.36


  9%|▉         | 9059/100000 [3:00:43<29:35:44,  1.17s/it]

88991 episode score is 759.61


  9%|▉         | 9060/100000 [3:00:44<29:05:41,  1.15s/it]

89000 episode score is 751.30


  9%|▉         | 9061/100000 [3:00:45<29:17:05,  1.16s/it]

89010 episode score is 708.92


  9%|▉         | 9062/100000 [3:00:46<28:57:18,  1.15s/it]

89019 episode score is 768.45


  9%|▉         | 9063/100000 [3:00:47<29:00:15,  1.15s/it]

89029 episode score is 676.75


  9%|▉         | 9064/100000 [3:00:49<28:51:32,  1.14s/it]

89039 episode score is 678.52


  9%|▉         | 9065/100000 [3:00:50<28:31:23,  1.13s/it]

89049 episode score is 648.47


  9%|▉         | 9066/100000 [3:00:51<28:28:00,  1.13s/it]

89059 episode score is 665.72


  9%|▉         | 9067/100000 [3:00:52<28:52:33,  1.14s/it]

89070 episode score is 634.07


  9%|▉         | 9068/100000 [3:00:53<28:41:19,  1.14s/it]

89080 episode score is 657.53


  9%|▉         | 9069/100000 [3:00:54<28:40:00,  1.13s/it]

89090 episode score is 679.91


  9%|▉         | 9070/100000 [3:00:55<28:46:59,  1.14s/it]

89100 episode score is 693.35


  9%|▉         | 9071/100000 [3:00:57<28:52:43,  1.14s/it]

89110 episode score is 705.18


  9%|▉         | 9072/100000 [3:00:58<28:36:12,  1.13s/it]

89121 episode score is 589.56


  9%|▉         | 9073/100000 [3:00:59<28:38:02,  1.13s/it]

89131 episode score is 679.77


  9%|▉         | 9074/100000 [3:01:00<28:28:59,  1.13s/it]

89141 episode score is 657.18


  9%|▉         | 9075/100000 [3:01:01<28:16:45,  1.12s/it]

89150 episode score is 750.59


  9%|▉         | 9076/100000 [3:01:02<28:28:12,  1.13s/it]

89161 episode score is 603.53


  9%|▉         | 9077/100000 [3:01:03<28:46:37,  1.14s/it]

89170 episode score is 812.67


  9%|▉         | 9078/100000 [3:01:04<28:38:53,  1.13s/it]

89180 episode score is 663.96


  9%|▉         | 9079/100000 [3:01:06<29:25:36,  1.17s/it]

89186 episode score is 1182.85


  9%|▉         | 9080/100000 [3:01:07<29:38:57,  1.17s/it]

89195 episode score is 822.65


  9%|▉         | 9081/100000 [3:01:08<29:46:50,  1.18s/it]

89204 episode score is 831.25


  9%|▉         | 9082/100000 [3:01:09<29:26:11,  1.17s/it]

89214 episode score is 672.27


  9%|▉         | 9083/100000 [3:01:10<29:58:28,  1.19s/it]

89223 episode score is 845.41


  9%|▉         | 9084/100000 [3:01:12<29:25:34,  1.17s/it]

89231 episode score is 887.59


  9%|▉         | 9085/100000 [3:01:13<29:28:34,  1.17s/it]

89240 episode score is 800.37


  9%|▉         | 9086/100000 [3:01:14<29:08:34,  1.15s/it]

89249 episode score is 771.92


  9%|▉         | 9087/100000 [3:01:15<28:39:46,  1.14s/it]

89257 episode score is 854.53


  9%|▉         | 9088/100000 [3:01:16<28:31:25,  1.13s/it]

89266 episode score is 748.49


  9%|▉         | 9089/100000 [3:01:17<29:27:02,  1.17s/it]

89275 episode score is 863.91


  9%|▉         | 9090/100000 [3:01:19<29:47:03,  1.18s/it]

89284 episode score is 830.49


  9%|▉         | 9091/100000 [3:01:20<29:23:16,  1.16s/it]

89292 episode score is 880.28


  9%|▉         | 9092/100000 [3:01:21<29:24:54,  1.16s/it]

89297 episode score is 1351.56


  9%|▉         | 9093/100000 [3:01:22<29:13:53,  1.16s/it]

89302 episode score is 1300.02


  9%|▉         | 9094/100000 [3:01:23<29:04:16,  1.15s/it]

89311 episode score is 777.39


  9%|▉         | 9095/100000 [3:01:24<28:47:24,  1.14s/it]

89319 episode score is 876.43


  9%|▉         | 9096/100000 [3:01:25<29:05:29,  1.15s/it]

89328 episode score is 826.39


  9%|▉         | 9097/100000 [3:01:26<28:38:28,  1.13s/it]

89335 episode score is 964.39


  9%|▉         | 9098/100000 [3:01:28<28:51:22,  1.14s/it]

89344 episode score is 763.14


  9%|▉         | 9099/100000 [3:01:29<29:14:14,  1.16s/it]

89351 episode score is 1084.24
89359 episode score is 876.79


  9%|▉         | 9100/100000 [3:01:31<38:01:40,  1.51s/it]

Iteration 9100: Average test reward: 839.72


  9%|▉         | 9101/100000 [3:01:32<35:06:16,  1.39s/it]

89366 episode score is 967.82


  9%|▉         | 9102/100000 [3:01:33<33:29:48,  1.33s/it]

89375 episode score is 816.94


  9%|▉         | 9103/100000 [3:01:35<32:14:20,  1.28s/it]

89384 episode score is 812.08


  9%|▉         | 9104/100000 [3:01:36<31:44:14,  1.26s/it]

89393 episode score is 865.33


  9%|▉         | 9105/100000 [3:01:37<30:43:41,  1.22s/it]

89398 episode score is 1285.53


  9%|▉         | 9106/100000 [3:01:38<30:33:36,  1.21s/it]

89405 episode score is 1024.86


  9%|▉         | 9107/100000 [3:01:39<31:07:06,  1.23s/it]

89413 episode score is 1019.72


  9%|▉         | 9108/100000 [3:01:41<30:28:40,  1.21s/it]

89421 episode score is 898.55
89428 episode score is 1335.92


  9%|▉         | 9110/100000 [3:01:43<32:09:27,  1.27s/it]

89435 episode score is 1061.97


  9%|▉         | 9111/100000 [3:01:44<30:49:46,  1.22s/it]

89440 episode score is 1306.53


  9%|▉         | 9112/100000 [3:01:46<30:00:22,  1.19s/it]

89448 episode score is 877.24


  9%|▉         | 9113/100000 [3:01:47<29:54:37,  1.18s/it]

89456 episode score is 955.26


  9%|▉         | 9114/100000 [3:01:48<30:27:28,  1.21s/it]

89465 episode score is 865.04


  9%|▉         | 9115/100000 [3:01:49<30:04:37,  1.19s/it]

89473 episode score is 926.18


  9%|▉         | 9116/100000 [3:01:50<29:30:41,  1.17s/it]

89481 episode score is 903.76


  9%|▉         | 9117/100000 [3:01:51<29:46:15,  1.18s/it]

89490 episode score is 861.78


  9%|▉         | 9118/100000 [3:01:53<30:08:26,  1.19s/it]

89499 episode score is 883.40


  9%|▉         | 9119/100000 [3:01:54<30:21:23,  1.20s/it]

89508 episode score is 858.39


  9%|▉         | 9120/100000 [3:01:55<29:34:18,  1.17s/it]

89516 episode score is 858.09


  9%|▉         | 9121/100000 [3:01:56<29:47:08,  1.18s/it]

89523 episode score is 1103.83


  9%|▉         | 9122/100000 [3:01:58<30:54:05,  1.22s/it]

89531 episode score is 1070.90


  9%|▉         | 9123/100000 [3:01:59<30:25:22,  1.21s/it]

89538 episode score is 1059.44


  9%|▉         | 9124/100000 [3:02:00<29:59:07,  1.19s/it]

89546 episode score is 909.17


  9%|▉         | 9125/100000 [3:02:01<29:17:37,  1.16s/it]

89555 episode score is 748.05


  9%|▉         | 9126/100000 [3:02:02<29:18:50,  1.16s/it]

89563 episode score is 909.62


  9%|▉         | 9127/100000 [3:02:03<28:58:59,  1.15s/it]

89570 episode score is 1031.40


  9%|▉         | 9128/100000 [3:02:04<28:36:42,  1.13s/it]

89578 episode score is 896.81


  9%|▉         | 9129/100000 [3:02:05<28:18:31,  1.12s/it]

89585 episode score is 1010.25


  9%|▉         | 9130/100000 [3:02:07<29:21:03,  1.16s/it]

89594 episode score is 892.07


  9%|▉         | 9131/100000 [3:02:08<30:17:47,  1.20s/it]

89602 episode score is 1022.67


  9%|▉         | 9132/100000 [3:02:09<29:44:01,  1.18s/it]

89610 episode score is 883.29


  9%|▉         | 9133/100000 [3:02:10<30:05:15,  1.19s/it]

89618 episode score is 984.00


  9%|▉         | 9134/100000 [3:02:11<29:51:15,  1.18s/it]

89625 episode score is 1081.96


  9%|▉         | 9135/100000 [3:02:13<29:12:57,  1.16s/it]

89632 episode score is 990.53


  9%|▉         | 9136/100000 [3:02:14<29:35:17,  1.17s/it]

89640 episode score is 967.43


  9%|▉         | 9137/100000 [3:02:15<29:20:09,  1.16s/it]

89648 episode score is 909.58


  9%|▉         | 9138/100000 [3:02:16<28:55:12,  1.15s/it]

89656 episode score is 877.64


  9%|▉         | 9139/100000 [3:02:17<29:17:36,  1.16s/it]

89663 episode score is 1077.07


  9%|▉         | 9140/100000 [3:02:18<28:48:55,  1.14s/it]

89671 episode score is 882.72


  9%|▉         | 9141/100000 [3:02:20<29:11:09,  1.16s/it]

89680 episode score is 807.09


  9%|▉         | 9142/100000 [3:02:21<29:10:56,  1.16s/it]

89689 episode score is 781.89


  9%|▉         | 9143/100000 [3:02:22<28:45:10,  1.14s/it]

89697 episode score is 874.83


  9%|▉         | 9144/100000 [3:02:23<28:27:00,  1.13s/it]

89706 episode score is 771.39


  9%|▉         | 9145/100000 [3:02:24<30:00:47,  1.19s/it]

89714 episode score is 958.22


  9%|▉         | 9146/100000 [3:02:25<29:39:19,  1.18s/it]

89722 episode score is 913.98


  9%|▉         | 9147/100000 [3:02:27<29:34:09,  1.17s/it]

89731 episode score is 812.31


  9%|▉         | 9148/100000 [3:02:28<28:55:56,  1.15s/it]

89739 episode score is 870.99


  9%|▉         | 9149/100000 [3:02:29<28:35:43,  1.13s/it]

89747 episode score is 911.25


  9%|▉         | 9150/100000 [3:02:30<28:16:09,  1.12s/it]

89755 episode score is 881.29


  9%|▉         | 9151/100000 [3:02:31<28:38:27,  1.13s/it]

89763 episode score is 928.10


  9%|▉         | 9152/100000 [3:02:32<28:30:21,  1.13s/it]

89772 episode score is 770.63


  9%|▉         | 9153/100000 [3:02:33<29:13:54,  1.16s/it]

89783 episode score is 690.54


  9%|▉         | 9154/100000 [3:02:34<29:05:02,  1.15s/it]

89792 episode score is 809.50


  9%|▉         | 9155/100000 [3:02:36<29:07:46,  1.15s/it]

89800 episode score is 921.58


  9%|▉         | 9156/100000 [3:02:37<28:40:57,  1.14s/it]

89809 episode score is 763.68


  9%|▉         | 9157/100000 [3:02:38<28:59:08,  1.15s/it]

89816 episode score is 944.59


  9%|▉         | 9158/100000 [3:02:39<28:40:12,  1.14s/it]

89823 episode score is 1043.23


  9%|▉         | 9159/100000 [3:02:40<30:05:33,  1.19s/it]

89832 episode score is 849.06


  9%|▉         | 9160/100000 [3:02:41<29:33:04,  1.17s/it]

89838 episode score is 1181.20


  9%|▉         | 9161/100000 [3:02:43<29:42:02,  1.18s/it]

89845 episode score is 1075.19


  9%|▉         | 9162/100000 [3:02:44<29:14:59,  1.16s/it]

89854 episode score is 763.45


  9%|▉         | 9163/100000 [3:02:45<29:18:32,  1.16s/it]

89861 episode score is 1088.70


  9%|▉         | 9164/100000 [3:02:46<29:07:09,  1.15s/it]

89871 episode score is 680.87


  9%|▉         | 9165/100000 [3:02:47<29:03:26,  1.15s/it]

89881 episode score is 707.67


  9%|▉         | 9166/100000 [3:02:48<29:04:24,  1.15s/it]

89891 episode score is 712.09


  9%|▉         | 9167/100000 [3:02:50<29:27:47,  1.17s/it]

89901 episode score is 750.57


  9%|▉         | 9168/100000 [3:02:51<29:31:34,  1.17s/it]

89911 episode score is 745.64


  9%|▉         | 9169/100000 [3:02:52<29:33:49,  1.17s/it]

89920 episode score is 857.46


  9%|▉         | 9170/100000 [3:02:53<29:03:09,  1.15s/it]

89929 episode score is 787.96


  9%|▉         | 9171/100000 [3:02:54<29:22:01,  1.16s/it]

89939 episode score is 778.24


  9%|▉         | 9172/100000 [3:02:55<29:09:17,  1.16s/it]

89947 episode score is 909.23


  9%|▉         | 9173/100000 [3:02:56<28:49:39,  1.14s/it]

89957 episode score is 702.48


  9%|▉         | 9174/100000 [3:02:58<29:01:45,  1.15s/it]

89967 episode score is 735.95


  9%|▉         | 9175/100000 [3:02:59<28:53:49,  1.15s/it]

89976 episode score is 814.21


  9%|▉         | 9176/100000 [3:03:00<29:09:56,  1.16s/it]

89986 episode score is 756.55


  9%|▉         | 9177/100000 [3:03:01<29:41:10,  1.18s/it]

89996 episode score is 753.59


  9%|▉         | 9178/100000 [3:03:02<29:35:11,  1.17s/it]

90006 episode score is 729.62


  9%|▉         | 9179/100000 [3:03:04<29:59:28,  1.19s/it]

90016 episode score is 783.81


  9%|▉         | 9180/100000 [3:03:05<29:23:06,  1.16s/it]

90025 episode score is 798.16


  9%|▉         | 9181/100000 [3:03:06<29:16:45,  1.16s/it]

90034 episode score is 833.47


  9%|▉         | 9182/100000 [3:03:07<28:44:14,  1.14s/it]

90043 episode score is 772.09


  9%|▉         | 9183/100000 [3:03:08<28:49:03,  1.14s/it]

90052 episode score is 823.30


  9%|▉         | 9184/100000 [3:03:09<28:22:39,  1.12s/it]

90060 episode score is 875.08


  9%|▉         | 9185/100000 [3:03:10<28:17:55,  1.12s/it]

90069 episode score is 791.22


  9%|▉         | 9186/100000 [3:03:11<28:36:00,  1.13s/it]

90078 episode score is 829.46


  9%|▉         | 9187/100000 [3:03:13<28:35:27,  1.13s/it]

90087 episode score is 817.90


  9%|▉         | 9188/100000 [3:03:14<28:16:50,  1.12s/it]

90095 episode score is 882.47


  9%|▉         | 9189/100000 [3:03:15<28:39:14,  1.14s/it]

90104 episode score is 851.21


  9%|▉         | 9190/100000 [3:03:16<28:51:58,  1.14s/it]

90114 episode score is 740.20


  9%|▉         | 9191/100000 [3:03:17<29:19:38,  1.16s/it]

90124 episode score is 748.21
90134 episode score is 850.06


  9%|▉         | 9193/100000 [3:03:20<30:33:10,  1.21s/it]

90144 episode score is 762.12


  9%|▉         | 9194/100000 [3:03:21<29:59:40,  1.19s/it]

90152 episode score is 924.27


  9%|▉         | 9195/100000 [3:03:22<29:57:56,  1.19s/it]

90161 episode score is 869.43


  9%|▉         | 9196/100000 [3:03:23<29:44:41,  1.18s/it]

90170 episode score is 849.61


  9%|▉         | 9197/100000 [3:03:24<29:27:31,  1.17s/it]

90179 episode score is 832.13


  9%|▉         | 9198/100000 [3:03:25<29:11:31,  1.16s/it]

90188 episode score is 831.81


  9%|▉         | 9199/100000 [3:03:27<29:21:52,  1.16s/it]

90197 episode score is 866.15
90207 episode score is 762.12


  9%|▉         | 9200/100000 [3:03:29<37:25:27,  1.48s/it]

Iteration 9200: Average test reward: 754.86


  9%|▉         | 9201/100000 [3:03:30<35:17:06,  1.40s/it]

90217 episode score is 778.28


  9%|▉         | 9202/100000 [3:03:31<33:48:21,  1.34s/it]

90226 episode score is 873.64


  9%|▉         | 9203/100000 [3:03:32<32:36:31,  1.29s/it]

90236 episode score is 754.74


  9%|▉         | 9204/100000 [3:03:34<31:15:27,  1.24s/it]

90244 episode score is 921.74


  9%|▉         | 9205/100000 [3:03:35<30:30:38,  1.21s/it]

90253 episode score is 834.55


  9%|▉         | 9206/100000 [3:03:36<30:18:07,  1.20s/it]

90262 episode score is 870.38


  9%|▉         | 9207/100000 [3:03:37<29:49:45,  1.18s/it]

90271 episode score is 833.50


  9%|▉         | 9208/100000 [3:03:38<29:38:58,  1.18s/it]

90280 episode score is 860.34


  9%|▉         | 9209/100000 [3:03:39<29:09:07,  1.16s/it]

90289 episode score is 801.51


  9%|▉         | 9210/100000 [3:03:40<28:41:30,  1.14s/it]

90298 episode score is 796.27


  9%|▉         | 9211/100000 [3:03:41<28:25:12,  1.13s/it]

90307 episode score is 789.50


  9%|▉         | 9212/100000 [3:03:43<28:55:05,  1.15s/it]

90317 episode score is 769.54


  9%|▉         | 9213/100000 [3:03:44<28:50:23,  1.14s/it]

90326 episode score is 828.51


  9%|▉         | 9214/100000 [3:03:45<29:00:40,  1.15s/it]

90336 episode score is 751.29


  9%|▉         | 9215/100000 [3:03:46<29:02:50,  1.15s/it]

90345 episode score is 838.44


  9%|▉         | 9216/100000 [3:03:47<28:44:38,  1.14s/it]

90354 episode score is 788.52


  9%|▉         | 9217/100000 [3:03:48<28:43:11,  1.14s/it]

90363 episode score is 821.10


  9%|▉         | 9218/100000 [3:03:50<29:02:46,  1.15s/it]

90373 episode score is 743.33


  9%|▉         | 9219/100000 [3:03:51<28:42:44,  1.14s/it]

90382 episode score is 802.60


  9%|▉         | 9220/100000 [3:03:52<28:49:54,  1.14s/it]

90392 episode score is 734.32


  9%|▉         | 9221/100000 [3:03:53<29:18:12,  1.16s/it]

90402 episode score is 778.25


  9%|▉         | 9222/100000 [3:03:54<29:13:34,  1.16s/it]

90412 episode score is 743.29


  9%|▉         | 9223/100000 [3:03:55<29:01:04,  1.15s/it]

90422 episode score is 710.48


  9%|▉         | 9224/100000 [3:03:57<29:13:43,  1.16s/it]

90432 episode score is 774.54


  9%|▉         | 9225/100000 [3:03:58<28:59:16,  1.15s/it]

90442 episode score is 722.33


  9%|▉         | 9226/100000 [3:03:59<29:27:58,  1.17s/it]

90450 episode score is 937.97


  9%|▉         | 9227/100000 [3:04:00<29:10:08,  1.16s/it]

90458 episode score is 944.31


  9%|▉         | 9228/100000 [3:04:01<28:44:38,  1.14s/it]

90466 episode score is 915.28


  9%|▉         | 9229/100000 [3:04:02<28:46:02,  1.14s/it]

90475 episode score is 821.57


  9%|▉         | 9230/100000 [3:04:03<29:19:28,  1.16s/it]

90484 episode score is 881.16


  9%|▉         | 9231/100000 [3:04:05<29:13:04,  1.16s/it]

90493 episode score is 838.05


  9%|▉         | 9232/100000 [3:04:06<29:30:36,  1.17s/it]

90504 episode score is 671.64


  9%|▉         | 9233/100000 [3:04:07<29:02:06,  1.15s/it]

90514 episode score is 695.47


  9%|▉         | 9234/100000 [3:04:08<29:07:25,  1.16s/it]

90524 episode score is 733.27


  9%|▉         | 9235/100000 [3:04:09<29:04:25,  1.15s/it]

90534 episode score is 716.60


  9%|▉         | 9236/100000 [3:04:10<28:43:08,  1.14s/it]

90545 episode score is 601.14


  9%|▉         | 9237/100000 [3:04:11<28:23:31,  1.13s/it]

90555 episode score is 678.71


  9%|▉         | 9238/100000 [3:04:13<28:14:00,  1.12s/it]

90565 episode score is 669.84


  9%|▉         | 9239/100000 [3:04:14<28:32:53,  1.13s/it]

90576 episode score is 645.19


  9%|▉         | 9240/100000 [3:04:15<28:15:46,  1.12s/it]

90586 episode score is 676.06


  9%|▉         | 9241/100000 [3:04:16<28:18:42,  1.12s/it]

90596 episode score is 696.73


  9%|▉         | 9242/100000 [3:04:17<28:34:57,  1.13s/it]

90606 episode score is 706.89


  9%|▉         | 9243/100000 [3:04:18<28:29:13,  1.13s/it]

90615 episode score is 782.12


  9%|▉         | 9244/100000 [3:04:19<28:56:08,  1.15s/it]

90624 episode score is 851.03


  9%|▉         | 9245/100000 [3:04:21<29:25:56,  1.17s/it]

90633 episode score is 874.96


  9%|▉         | 9246/100000 [3:04:22<29:32:21,  1.17s/it]

90642 episode score is 834.34


  9%|▉         | 9247/100000 [3:04:23<29:11:42,  1.16s/it]

90651 episode score is 788.50


  9%|▉         | 9248/100000 [3:04:24<29:34:38,  1.17s/it]

90661 episode score is 762.21


  9%|▉         | 9249/100000 [3:04:25<29:06:15,  1.15s/it]

90670 episode score is 784.41


  9%|▉         | 9250/100000 [3:04:26<29:06:26,  1.15s/it]

90680 episode score is 727.13


  9%|▉         | 9251/100000 [3:04:28<29:35:05,  1.17s/it]

90689 episode score is 888.28


  9%|▉         | 9252/100000 [3:04:29<29:10:09,  1.16s/it]

90698 episode score is 793.71


  9%|▉         | 9253/100000 [3:04:30<28:48:03,  1.14s/it]

90707 episode score is 781.47


  9%|▉         | 9254/100000 [3:04:31<28:41:59,  1.14s/it]

90716 episode score is 795.34


  9%|▉         | 9255/100000 [3:04:32<29:12:51,  1.16s/it]

90725 episode score is 828.38


  9%|▉         | 9256/100000 [3:04:33<28:58:36,  1.15s/it]

90734 episode score is 796.59


  9%|▉         | 9257/100000 [3:04:34<29:16:07,  1.16s/it]

90743 episode score is 850.36


  9%|▉         | 9258/100000 [3:04:36<28:55:04,  1.15s/it]

90752 episode score is 785.85


  9%|▉         | 9259/100000 [3:04:37<28:44:51,  1.14s/it]

90761 episode score is 798.21


  9%|▉         | 9260/100000 [3:04:38<28:31:36,  1.13s/it]

90770 episode score is 794.03


  9%|▉         | 9261/100000 [3:04:39<28:53:24,  1.15s/it]

90780 episode score is 741.14


  9%|▉         | 9262/100000 [3:04:40<29:04:27,  1.15s/it]

90790 episode score is 744.55


  9%|▉         | 9263/100000 [3:04:41<29:16:34,  1.16s/it]

90800 episode score is 747.81


  9%|▉         | 9264/100000 [3:04:43<29:27:01,  1.17s/it]

90811 episode score is 656.80


  9%|▉         | 9265/100000 [3:04:44<28:53:34,  1.15s/it]

90820 episode score is 768.36


  9%|▉         | 9266/100000 [3:04:45<28:47:23,  1.14s/it]

90829 episode score is 814.73


  9%|▉         | 9267/100000 [3:04:46<28:53:16,  1.15s/it]

90839 episode score is 728.83


  9%|▉         | 9268/100000 [3:04:47<29:07:32,  1.16s/it]

90849 episode score is 729.41


  9%|▉         | 9269/100000 [3:04:48<29:25:38,  1.17s/it]

90859 episode score is 752.58


  9%|▉         | 9270/100000 [3:04:49<29:31:31,  1.17s/it]

90869 episode score is 726.21


  9%|▉         | 9271/100000 [3:04:51<29:03:15,  1.15s/it]

90878 episode score is 785.75


  9%|▉         | 9272/100000 [3:04:52<28:52:06,  1.15s/it]

90887 episode score is 802.82


  9%|▉         | 9273/100000 [3:04:53<28:38:21,  1.14s/it]

90896 episode score is 771.67


  9%|▉         | 9274/100000 [3:04:54<28:37:53,  1.14s/it]

90906 episode score is 698.63


  9%|▉         | 9275/100000 [3:04:55<29:09:01,  1.16s/it]

90916 episode score is 764.69


  9%|▉         | 9276/100000 [3:04:56<28:36:34,  1.14s/it]

90925 episode score is 763.17


  9%|▉         | 9277/100000 [3:04:57<28:17:34,  1.12s/it]

90934 episode score is 771.45


  9%|▉         | 9278/100000 [3:04:59<28:40:46,  1.14s/it]

90943 episode score is 847.95


  9%|▉         | 9279/100000 [3:05:00<28:32:34,  1.13s/it]

90952 episode score is 779.09


  9%|▉         | 9280/100000 [3:05:01<28:40:04,  1.14s/it]

90961 episode score is 809.45


  9%|▉         | 9281/100000 [3:05:02<29:09:52,  1.16s/it]

90971 episode score is 758.70


  9%|▉         | 9282/100000 [3:05:03<29:59:52,  1.19s/it]

90981 episode score is 740.02


  9%|▉         | 9283/100000 [3:05:04<30:00:49,  1.19s/it]

90990 episode score is 861.06


  9%|▉         | 9284/100000 [3:05:06<29:21:49,  1.17s/it]

90999 episode score is 774.33


  9%|▉         | 9285/100000 [3:05:07<29:06:54,  1.16s/it]

91008 episode score is 804.85


  9%|▉         | 9286/100000 [3:05:08<28:56:07,  1.15s/it]

91017 episode score is 799.21


  9%|▉         | 9287/100000 [3:05:09<28:39:04,  1.14s/it]

91026 episode score is 787.67


  9%|▉         | 9288/100000 [3:05:10<28:23:29,  1.13s/it]

91035 episode score is 781.97


  9%|▉         | 9289/100000 [3:05:11<28:51:59,  1.15s/it]

91045 episode score is 756.81


  9%|▉         | 9290/100000 [3:05:12<29:00:15,  1.15s/it]

91055 episode score is 723.79


  9%|▉         | 9291/100000 [3:05:13<28:39:10,  1.14s/it]

91064 episode score is 784.49


  9%|▉         | 9292/100000 [3:05:15<29:08:43,  1.16s/it]

91074 episode score is 763.33


  9%|▉         | 9293/100000 [3:05:16<29:37:06,  1.18s/it]

91084 episode score is 759.33


  9%|▉         | 9294/100000 [3:05:17<29:10:04,  1.16s/it]

91093 episode score is 780.19


  9%|▉         | 9295/100000 [3:05:18<28:39:50,  1.14s/it]

91102 episode score is 777.63


  9%|▉         | 9296/100000 [3:05:19<29:13:54,  1.16s/it]

91112 episode score is 762.32


  9%|▉         | 9297/100000 [3:05:20<28:52:55,  1.15s/it]

91121 episode score is 790.22


  9%|▉         | 9298/100000 [3:05:22<28:35:06,  1.13s/it]

91130 episode score is 785.30


  9%|▉         | 9299/100000 [3:05:23<29:15:38,  1.16s/it]

91140 episode score is 767.19
91150 episode score is 731.52


  9%|▉         | 9300/100000 [3:05:25<37:03:32,  1.47s/it]

Iteration 9300: Average test reward: 720.13


  9%|▉         | 9301/100000 [3:05:26<34:46:35,  1.38s/it]

91160 episode score is 743.08


  9%|▉         | 9302/100000 [3:05:27<32:44:05,  1.30s/it]

91169 episode score is 787.71


  9%|▉         | 9303/100000 [3:05:28<31:11:08,  1.24s/it]

91178 episode score is 778.62


  9%|▉         | 9304/100000 [3:05:29<30:04:03,  1.19s/it]

91187 episode score is 768.49


  9%|▉         | 9305/100000 [3:05:31<30:20:01,  1.20s/it]

91197 episode score is 785.24


  9%|▉         | 9306/100000 [3:05:32<30:26:55,  1.21s/it]

91207 episode score is 769.09


  9%|▉         | 9307/100000 [3:05:33<30:24:11,  1.21s/it]

91217 episode score is 746.39


  9%|▉         | 9308/100000 [3:05:34<30:04:26,  1.19s/it]

91227 episode score is 734.95


  9%|▉         | 9309/100000 [3:05:35<30:03:37,  1.19s/it]

91237 episode score is 753.36


  9%|▉         | 9310/100000 [3:05:37<29:20:14,  1.16s/it]

91246 episode score is 773.93


  9%|▉         | 9311/100000 [3:05:38<29:13:06,  1.16s/it]

91256 episode score is 713.48


  9%|▉         | 9312/100000 [3:05:39<28:45:19,  1.14s/it]

91265 episode score is 781.98


  9%|▉         | 9313/100000 [3:05:40<29:12:14,  1.16s/it]

91275 episode score is 773.49


  9%|▉         | 9314/100000 [3:05:41<29:25:18,  1.17s/it]

91285 episode score is 756.22


  9%|▉         | 9315/100000 [3:05:42<29:29:56,  1.17s/it]

91295 episode score is 745.19


  9%|▉         | 9316/100000 [3:05:44<29:27:52,  1.17s/it]

91304 episode score is 836.64


  9%|▉         | 9317/100000 [3:05:45<29:24:34,  1.17s/it]

91313 episode score is 837.43


  9%|▉         | 9318/100000 [3:05:46<29:35:46,  1.17s/it]

91322 episode score is 853.75


  9%|▉         | 9319/100000 [3:05:47<29:03:11,  1.15s/it]

91331 episode score is 770.43


  9%|▉         | 9320/100000 [3:05:48<28:57:14,  1.15s/it]

91339 episode score is 915.87


  9%|▉         | 9321/100000 [3:05:49<29:02:32,  1.15s/it]

91349 episode score is 718.75


  9%|▉         | 9322/100000 [3:05:50<29:05:22,  1.15s/it]

91358 episode score is 832.73


  9%|▉         | 9323/100000 [3:05:52<28:46:17,  1.14s/it]

91367 episode score is 791.03


  9%|▉         | 9324/100000 [3:05:53<29:00:16,  1.15s/it]

91376 episode score is 833.24


  9%|▉         | 9325/100000 [3:05:54<29:06:38,  1.16s/it]

91386 episode score is 732.87


  9%|▉         | 9326/100000 [3:05:55<28:47:12,  1.14s/it]

91395 episode score is 790.13


  9%|▉         | 9327/100000 [3:05:56<28:34:36,  1.13s/it]

91405 episode score is 693.12


  9%|▉         | 9328/100000 [3:05:57<28:15:34,  1.12s/it]

91415 episode score is 684.93


  9%|▉         | 9329/100000 [3:05:58<28:40:56,  1.14s/it]

91425 episode score is 746.93


  9%|▉         | 9330/100000 [3:06:00<28:35:31,  1.14s/it]

91435 episode score is 702.40


  9%|▉         | 9331/100000 [3:06:01<29:02:08,  1.15s/it]

91445 episode score is 761.29


  9%|▉         | 9332/100000 [3:06:02<28:58:24,  1.15s/it]

91455 episode score is 726.79


  9%|▉         | 9333/100000 [3:06:03<29:10:17,  1.16s/it]

91465 episode score is 738.70


  9%|▉         | 9334/100000 [3:06:04<29:34:55,  1.17s/it]

91475 episode score is 773.25


  9%|▉         | 9335/100000 [3:06:05<29:42:46,  1.18s/it]

91485 episode score is 756.72


  9%|▉         | 9336/100000 [3:06:07<30:00:18,  1.19s/it]

91494 episode score is 810.99


  9%|▉         | 9337/100000 [3:06:08<29:35:09,  1.17s/it]

91504 episode score is 709.21


  9%|▉         | 9338/100000 [3:06:09<29:22:28,  1.17s/it]

91514 episode score is 729.76


  9%|▉         | 9339/100000 [3:06:10<29:27:44,  1.17s/it]

91524 episode score is 742.41


  9%|▉         | 9340/100000 [3:06:11<28:52:07,  1.15s/it]

91535 episode score is 601.52


  9%|▉         | 9341/100000 [3:06:12<29:44:27,  1.18s/it]

91538 episode score is 2158.49
91543 episode score is 1756.78


  9%|▉         | 9343/100000 [3:06:15<31:21:32,  1.25s/it]

91546 episode score is 2030.15


  9%|▉         | 9344/100000 [3:06:16<31:32:11,  1.25s/it]

91550 episode score is 1716.87


  9%|▉         | 9345/100000 [3:06:18<31:13:12,  1.24s/it]

91553 episode score is 2053.11


  9%|▉         | 9346/100000 [3:06:19<30:48:42,  1.22s/it]

91556 episode score is 2007.11
91559 episode score is 2575.09


  9%|▉         | 9348/100000 [3:06:22<32:20:06,  1.28s/it]

91564 episode score is 1377.63


  9%|▉         | 9349/100000 [3:06:23<32:37:00,  1.30s/it]

91568 episode score is 1840.57


  9%|▉         | 9350/100000 [3:06:24<32:02:40,  1.27s/it]

91572 episode score is 1423.53


  9%|▉         | 9351/100000 [3:06:25<31:37:23,  1.26s/it]

91576 episode score is 1652.65
91579 episode score is 2610.56


  9%|▉         | 9353/100000 [3:06:28<31:58:13,  1.27s/it]

91583 episode score is 1418.43


  9%|▉         | 9354/100000 [3:06:29<31:55:26,  1.27s/it]

91586 episode score is 1989.55
91589 episode score is 2218.57


  9%|▉         | 9356/100000 [3:06:32<32:45:31,  1.30s/it]

91592 episode score is 2195.21


  9%|▉         | 9357/100000 [3:06:33<31:26:01,  1.25s/it]

91595 episode score is 1893.87


  9%|▉         | 9358/100000 [3:06:34<31:33:36,  1.25s/it]

91598 episode score is 2196.32


  9%|▉         | 9359/100000 [3:06:35<30:34:16,  1.21s/it]

91602 episode score is 1564.01


  9%|▉         | 9360/100000 [3:06:37<29:56:55,  1.19s/it]

91606 episode score is 1531.13
91610 episode score is 2074.83


  9%|▉         | 9361/100000 [3:06:38<32:22:39,  1.29s/it]

91613 episode score is 2335.31


  9%|▉         | 9363/100000 [3:06:41<31:32:56,  1.25s/it]

91619 episode score is 1147.36


  9%|▉         | 9364/100000 [3:06:42<31:28:12,  1.25s/it]

91624 episode score is 1356.82


  9%|▉         | 9365/100000 [3:06:43<31:31:56,  1.25s/it]

91628 episode score is 1909.71


  9%|▉         | 9366/100000 [3:06:44<31:22:31,  1.25s/it]

91632 episode score is 1691.69


  9%|▉         | 9367/100000 [3:06:46<31:29:21,  1.25s/it]

91636 episode score is 1688.14


  9%|▉         | 9368/100000 [3:06:47<31:09:45,  1.24s/it]

91639 episode score is 2117.32
91643 episode score is 1925.43


  9%|▉         | 9370/100000 [3:06:49<32:12:06,  1.28s/it]

91649 episode score is 1280.05
91653 episode score is 2142.40


  9%|▉         | 9372/100000 [3:06:52<33:38:54,  1.34s/it]

91657 episode score is 1861.81


  9%|▉         | 9373/100000 [3:06:53<32:49:23,  1.30s/it]

91662 episode score is 1366.40


  9%|▉         | 9374/100000 [3:06:55<32:47:27,  1.30s/it]

91667 episode score is 1448.16
91673 episode score is 1469.67


  9%|▉         | 9376/100000 [3:06:57<32:24:23,  1.29s/it]

91679 episode score is 1248.66


  9%|▉         | 9377/100000 [3:06:59<31:12:05,  1.24s/it]

91685 episode score is 1214.54


  9%|▉         | 9378/100000 [3:07:00<30:32:15,  1.21s/it]

91691 episode score is 1172.67


  9%|▉         | 9379/100000 [3:07:01<30:08:04,  1.20s/it]

91698 episode score is 1067.51


  9%|▉         | 9380/100000 [3:07:02<30:28:44,  1.21s/it]

91703 episode score is 1501.32


  9%|▉         | 9381/100000 [3:07:03<29:56:53,  1.19s/it]

91708 episode score is 1298.92


  9%|▉         | 9382/100000 [3:07:04<30:13:45,  1.20s/it]

91714 episode score is 1223.93


  9%|▉         | 9383/100000 [3:07:06<29:47:27,  1.18s/it]

91720 episode score is 1230.12


  9%|▉         | 9384/100000 [3:07:07<29:36:42,  1.18s/it]

91727 episode score is 1052.06


  9%|▉         | 9385/100000 [3:07:08<30:13:16,  1.20s/it]

91733 episode score is 1347.81


  9%|▉         | 9386/100000 [3:07:09<29:56:06,  1.19s/it]

91740 episode score is 1049.64


  9%|▉         | 9387/100000 [3:07:10<30:38:29,  1.22s/it]

91747 episode score is 1162.39


  9%|▉         | 9388/100000 [3:07:12<30:41:22,  1.22s/it]

91753 episode score is 1195.86


  9%|▉         | 9389/100000 [3:07:13<30:07:05,  1.20s/it]

91757 episode score is 1650.55
91764 episode score is 1298.33


  9%|▉         | 9391/100000 [3:07:16<33:04:01,  1.31s/it]

91771 episode score is 1125.41


  9%|▉         | 9392/100000 [3:07:17<32:46:41,  1.30s/it]

91779 episode score is 1026.14


  9%|▉         | 9393/100000 [3:07:18<31:37:27,  1.26s/it]

91787 episode score is 902.02


  9%|▉         | 9394/100000 [3:07:19<30:26:04,  1.21s/it]

91794 episode score is 1001.95


  9%|▉         | 9395/100000 [3:07:20<29:59:59,  1.19s/it]

91799 episode score is 1369.95


  9%|▉         | 9396/100000 [3:07:22<30:02:30,  1.19s/it]

91806 episode score is 1116.27


  9%|▉         | 9397/100000 [3:07:23<30:00:59,  1.19s/it]

91813 episode score is 1091.66


  9%|▉         | 9398/100000 [3:07:24<29:22:47,  1.17s/it]

91820 episode score is 994.50


  9%|▉         | 9399/100000 [3:07:25<29:29:56,  1.17s/it]

91828 episode score is 952.34
91836 episode score is 997.78


  9%|▉         | 9400/100000 [3:07:28<39:49:08,  1.58s/it]

Iteration 9400: Average test reward: 949.15
91844 episode score is 1084.35


  9%|▉         | 9402/100000 [3:07:30<36:17:28,  1.44s/it]

91852 episode score is 1002.29


  9%|▉         | 9403/100000 [3:07:31<33:50:30,  1.34s/it]

91860 episode score is 883.94


  9%|▉         | 9404/100000 [3:07:33<33:09:13,  1.32s/it]

91868 episode score is 990.76


  9%|▉         | 9405/100000 [3:07:34<31:36:02,  1.26s/it]

91876 episode score is 881.99


  9%|▉         | 9406/100000 [3:07:35<30:20:27,  1.21s/it]

91884 episode score is 870.14


  9%|▉         | 9407/100000 [3:07:36<30:01:34,  1.19s/it]

91892 episode score is 942.40


  9%|▉         | 9408/100000 [3:07:37<29:28:45,  1.17s/it]

91900 episode score is 913.74


  9%|▉         | 9409/100000 [3:07:38<29:30:43,  1.17s/it]

91908 episode score is 973.60


  9%|▉         | 9410/100000 [3:07:39<29:15:08,  1.16s/it]

91916 episode score is 911.29


  9%|▉         | 9411/100000 [3:07:41<29:32:04,  1.17s/it]

91924 episode score is 995.52


  9%|▉         | 9412/100000 [3:07:42<29:45:35,  1.18s/it]

91933 episode score is 848.99


  9%|▉         | 9413/100000 [3:07:43<30:18:42,  1.20s/it]

91941 episode score is 1014.98


  9%|▉         | 9414/100000 [3:07:44<30:16:11,  1.20s/it]

91950 episode score is 844.06


  9%|▉         | 9415/100000 [3:07:45<30:02:17,  1.19s/it]

91959 episode score is 837.19


  9%|▉         | 9416/100000 [3:07:47<29:56:25,  1.19s/it]

91968 episode score is 835.76


  9%|▉         | 9417/100000 [3:07:48<30:01:14,  1.19s/it]

91976 episode score is 963.00


  9%|▉         | 9418/100000 [3:07:49<29:38:25,  1.18s/it]

91984 episode score is 935.39


  9%|▉         | 9419/100000 [3:07:50<29:11:34,  1.16s/it]

91993 episode score is 787.67


  9%|▉         | 9420/100000 [3:07:51<29:36:50,  1.18s/it]

92002 episode score is 879.88


  9%|▉         | 9421/100000 [3:07:52<29:35:25,  1.18s/it]

92011 episode score is 842.52


  9%|▉         | 9422/100000 [3:07:54<28:59:16,  1.15s/it]

92019 episode score is 890.82


  9%|▉         | 9423/100000 [3:07:55<29:04:54,  1.16s/it]

92027 episode score is 948.07


  9%|▉         | 9424/100000 [3:07:56<29:23:33,  1.17s/it]

92036 episode score is 863.73


  9%|▉         | 9425/100000 [3:07:57<29:40:35,  1.18s/it]

92045 episode score is 861.39


  9%|▉         | 9426/100000 [3:07:58<29:43:10,  1.18s/it]

92053 episode score is 975.70


  9%|▉         | 9427/100000 [3:07:59<29:26:12,  1.17s/it]

92061 episode score is 921.01


  9%|▉         | 9428/100000 [3:08:01<29:41:26,  1.18s/it]

92069 episode score is 972.33


  9%|▉         | 9429/100000 [3:08:02<29:27:11,  1.17s/it]

92077 episode score is 934.63


  9%|▉         | 9430/100000 [3:08:03<29:31:52,  1.17s/it]

92085 episode score is 973.07


  9%|▉         | 9431/100000 [3:08:04<29:28:08,  1.17s/it]

92093 episode score is 952.74


  9%|▉         | 9432/100000 [3:08:05<29:32:21,  1.17s/it]

92101 episode score is 979.01


  9%|▉         | 9433/100000 [3:08:06<28:57:55,  1.15s/it]

92109 episode score is 893.04


  9%|▉         | 9434/100000 [3:08:08<29:11:26,  1.16s/it]

92118 episode score is 848.65


  9%|▉         | 9435/100000 [3:08:09<29:36:44,  1.18s/it]

92127 episode score is 881.59


  9%|▉         | 9436/100000 [3:08:10<29:27:56,  1.17s/it]

92135 episode score is 946.46


  9%|▉         | 9437/100000 [3:08:11<29:10:26,  1.16s/it]

92144 episode score is 802.31


  9%|▉         | 9438/100000 [3:08:12<29:22:34,  1.17s/it]

92153 episode score is 845.84


  9%|▉         | 9439/100000 [3:08:13<28:47:49,  1.14s/it]

92162 episode score is 767.85


  9%|▉         | 9440/100000 [3:08:15<28:38:55,  1.14s/it]

92171 episode score is 792.45


  9%|▉         | 9441/100000 [3:08:16<28:45:12,  1.14s/it]

92180 episode score is 824.06


  9%|▉         | 9442/100000 [3:08:17<29:27:00,  1.17s/it]

92190 episode score is 772.42


  9%|▉         | 9443/100000 [3:08:18<29:23:15,  1.17s/it]

92200 episode score is 714.56


  9%|▉         | 9444/100000 [3:08:19<29:07:21,  1.16s/it]

92209 episode score is 804.86


  9%|▉         | 9445/100000 [3:08:20<28:44:30,  1.14s/it]

92217 episode score is 909.37


  9%|▉         | 9446/100000 [3:08:22<29:02:02,  1.15s/it]

92226 episode score is 876.19


  9%|▉         | 9447/100000 [3:08:23<28:42:31,  1.14s/it]

92235 episode score is 784.58


  9%|▉         | 9448/100000 [3:08:24<29:55:00,  1.19s/it]

92244 episode score is 854.51


  9%|▉         | 9449/100000 [3:08:25<29:24:33,  1.17s/it]

92253 episode score is 810.95


  9%|▉         | 9450/100000 [3:08:26<29:08:02,  1.16s/it]

92262 episode score is 821.50


  9%|▉         | 9451/100000 [3:08:27<28:58:01,  1.15s/it]

92271 episode score is 814.97


  9%|▉         | 9452/100000 [3:08:28<28:45:20,  1.14s/it]

92280 episode score is 802.36


  9%|▉         | 9453/100000 [3:08:30<28:21:55,  1.13s/it]

92289 episode score is 775.49


  9%|▉         | 9454/100000 [3:08:31<28:33:17,  1.14s/it]

92299 episode score is 732.46


  9%|▉         | 9455/100000 [3:08:32<28:27:47,  1.13s/it]

92307 episode score is 936.78


  9%|▉         | 9456/100000 [3:08:33<28:54:19,  1.15s/it]

92316 episode score is 860.82


  9%|▉         | 9457/100000 [3:08:34<28:57:45,  1.15s/it]

92325 episode score is 833.21


  9%|▉         | 9458/100000 [3:08:35<29:07:00,  1.16s/it]

92334 episode score is 850.77


  9%|▉         | 9459/100000 [3:08:36<29:15:37,  1.16s/it]

92343 episode score is 866.73


  9%|▉         | 9460/100000 [3:08:38<29:28:39,  1.17s/it]

92352 episode score is 870.03


  9%|▉         | 9461/100000 [3:08:39<29:12:47,  1.16s/it]

92360 episode score is 962.79


  9%|▉         | 9462/100000 [3:08:40<29:29:24,  1.17s/it]

92369 episode score is 878.93


  9%|▉         | 9463/100000 [3:08:41<29:40:00,  1.18s/it]

92378 episode score is 879.53


  9%|▉         | 9464/100000 [3:08:42<29:07:54,  1.16s/it]

92386 episode score is 916.83


  9%|▉         | 9465/100000 [3:08:43<28:37:32,  1.14s/it]

92394 episode score is 899.46


  9%|▉         | 9466/100000 [3:08:45<28:47:57,  1.15s/it]

92402 episode score is 966.54


  9%|▉         | 9467/100000 [3:08:46<28:27:18,  1.13s/it]

92410 episode score is 901.46


  9%|▉         | 9468/100000 [3:08:47<29:13:06,  1.16s/it]

92418 episode score is 1043.09


  9%|▉         | 9469/100000 [3:08:48<28:50:59,  1.15s/it]

92426 episode score is 900.66


  9%|▉         | 9470/100000 [3:08:49<28:43:37,  1.14s/it]

92434 episode score is 943.81


  9%|▉         | 9471/100000 [3:08:50<28:45:16,  1.14s/it]

92443 episode score is 837.41


  9%|▉         | 9472/100000 [3:08:51<28:57:03,  1.15s/it]

92451 episode score is 977.39


  9%|▉         | 9473/100000 [3:08:53<29:02:27,  1.15s/it]

92460 episode score is 840.99


  9%|▉         | 9474/100000 [3:08:54<28:36:17,  1.14s/it]

92469 episode score is 789.97


  9%|▉         | 9475/100000 [3:08:55<28:53:22,  1.15s/it]

92478 episode score is 850.58


  9%|▉         | 9476/100000 [3:08:56<29:24:46,  1.17s/it]

92487 episode score is 884.73


  9%|▉         | 9477/100000 [3:08:57<29:20:42,  1.17s/it]

92496 episode score is 842.61


  9%|▉         | 9478/100000 [3:08:58<29:36:46,  1.18s/it]

92506 episode score is 775.65


  9%|▉         | 9479/100000 [3:09:00<29:27:21,  1.17s/it]

92515 episode score is 845.12


  9%|▉         | 9480/100000 [3:09:01<29:11:19,  1.16s/it]

92524 episode score is 815.76


  9%|▉         | 9481/100000 [3:09:02<28:54:38,  1.15s/it]

92533 episode score is 795.22


  9%|▉         | 9482/100000 [3:09:03<28:40:10,  1.14s/it]

92542 episode score is 788.68


  9%|▉         | 9483/100000 [3:09:04<28:30:00,  1.13s/it]

92551 episode score is 784.39


  9%|▉         | 9484/100000 [3:09:05<28:56:30,  1.15s/it]

92561 episode score is 768.14


  9%|▉         | 9485/100000 [3:09:07<29:11:11,  1.16s/it]

92571 episode score is 769.70


  9%|▉         | 9486/100000 [3:09:08<28:45:48,  1.14s/it]

92580 episode score is 789.67


  9%|▉         | 9487/100000 [3:09:09<28:44:45,  1.14s/it]

92590 episode score is 721.23


  9%|▉         | 9488/100000 [3:09:10<28:54:39,  1.15s/it]

92600 episode score is 750.39


  9%|▉         | 9489/100000 [3:09:11<28:50:50,  1.15s/it]

92609 episode score is 826.88


  9%|▉         | 9490/100000 [3:09:12<28:56:54,  1.15s/it]

92619 episode score is 743.43


  9%|▉         | 9491/100000 [3:09:13<29:14:43,  1.16s/it]

92629 episode score is 757.57


  9%|▉         | 9492/100000 [3:09:15<28:48:53,  1.15s/it]

92639 episode score is 703.93


  9%|▉         | 9493/100000 [3:09:16<29:09:20,  1.16s/it]

92650 episode score is 676.87


  9%|▉         | 9494/100000 [3:09:17<28:44:22,  1.14s/it]

92660 episode score is 693.41


  9%|▉         | 9495/100000 [3:09:18<28:33:49,  1.14s/it]

92669 episode score is 788.75


  9%|▉         | 9496/100000 [3:09:19<28:52:03,  1.15s/it]

92679 episode score is 751.66


  9%|▉         | 9497/100000 [3:09:20<29:16:34,  1.16s/it]

92690 episode score is 685.12


  9%|▉         | 9498/100000 [3:09:21<29:08:45,  1.16s/it]

92700 episode score is 747.88


  9%|▉         | 9499/100000 [3:09:23<29:16:17,  1.16s/it]

92710 episode score is 751.72
92720 episode score is 772.89


 10%|▉         | 9500/100000 [3:09:25<37:10:08,  1.48s/it]

Iteration 9500: Average test reward: 719.07


 10%|▉         | 9501/100000 [3:09:26<34:39:29,  1.38s/it]

92730 episode score is 746.21


 10%|▉         | 9502/100000 [3:09:27<32:38:40,  1.30s/it]

92740 episode score is 715.49


 10%|▉         | 9503/100000 [3:09:28<31:28:45,  1.25s/it]

92750 episode score is 736.95


 10%|▉         | 9504/100000 [3:09:29<30:40:18,  1.22s/it]

92760 episode score is 734.00


 10%|▉         | 9505/100000 [3:09:31<30:24:52,  1.21s/it]

92770 episode score is 773.25


 10%|▉         | 9506/100000 [3:09:32<30:06:55,  1.20s/it]

92780 episode score is 733.96


 10%|▉         | 9507/100000 [3:09:33<30:02:45,  1.20s/it]

92790 episode score is 762.83


 10%|▉         | 9508/100000 [3:09:34<29:50:30,  1.19s/it]

92800 episode score is 759.35


 10%|▉         | 9509/100000 [3:09:35<29:11:36,  1.16s/it]

92809 episode score is 786.72


 10%|▉         | 9510/100000 [3:09:36<29:50:00,  1.19s/it]

92819 episode score is 758.03


 10%|▉         | 9511/100000 [3:09:38<29:28:03,  1.17s/it]

92829 episode score is 748.70


 10%|▉         | 9512/100000 [3:09:39<29:27:02,  1.17s/it]

92839 episode score is 768.39


 10%|▉         | 9513/100000 [3:09:40<29:32:28,  1.18s/it]

92850 episode score is 676.87


 10%|▉         | 9514/100000 [3:09:41<29:14:41,  1.16s/it]

92860 episode score is 748.31


 10%|▉         | 9515/100000 [3:09:42<28:46:00,  1.14s/it]

92869 episode score is 802.30


 10%|▉         | 9516/100000 [3:09:43<28:36:12,  1.14s/it]

92879 episode score is 734.21


 10%|▉         | 9517/100000 [3:09:44<28:13:45,  1.12s/it]

92888 episode score is 787.03


 10%|▉         | 9518/100000 [3:09:46<28:40:01,  1.14s/it]

92898 episode score is 773.63


 10%|▉         | 9519/100000 [3:09:47<28:22:21,  1.13s/it]

92908 episode score is 707.72


 10%|▉         | 9520/100000 [3:09:48<28:41:44,  1.14s/it]

92918 episode score is 758.72


 10%|▉         | 9521/100000 [3:09:49<29:04:55,  1.16s/it]

92928 episode score is 766.33


 10%|▉         | 9522/100000 [3:09:50<28:40:31,  1.14s/it]

92938 episode score is 712.39


 10%|▉         | 9523/100000 [3:09:51<28:35:29,  1.14s/it]

92948 episode score is 733.05


 10%|▉         | 9524/100000 [3:09:52<28:31:29,  1.13s/it]

92957 episode score is 825.45


 10%|▉         | 9525/100000 [3:09:54<28:27:03,  1.13s/it]

92967 episode score is 735.71


 10%|▉         | 9526/100000 [3:09:55<28:31:48,  1.14s/it]

92977 episode score is 740.61


 10%|▉         | 9527/100000 [3:09:56<28:23:42,  1.13s/it]

92987 episode score is 727.28


 10%|▉         | 9528/100000 [3:09:57<28:20:47,  1.13s/it]

92997 episode score is 733.62


 10%|▉         | 9529/100000 [3:09:58<28:12:28,  1.12s/it]

93007 episode score is 712.90


 10%|▉         | 9530/100000 [3:09:59<28:03:43,  1.12s/it]

93017 episode score is 709.70


 10%|▉         | 9531/100000 [3:10:00<27:59:05,  1.11s/it]

93027 episode score is 714.98


 10%|▉         | 9532/100000 [3:10:01<27:57:39,  1.11s/it]

93037 episode score is 723.18


 10%|▉         | 9533/100000 [3:10:03<28:34:39,  1.14s/it]

93048 episode score is 701.26


 10%|▉         | 9534/100000 [3:10:04<28:31:34,  1.14s/it]

93058 episode score is 740.52


 10%|▉         | 9535/100000 [3:10:05<28:38:06,  1.14s/it]

93069 episode score is 661.50


 10%|▉         | 9536/100000 [3:10:06<28:36:36,  1.14s/it]

93079 episode score is 743.04


 10%|▉         | 9537/100000 [3:10:07<28:34:41,  1.14s/it]

93089 episode score is 734.15


 10%|▉         | 9538/100000 [3:10:08<28:33:02,  1.14s/it]

93099 episode score is 729.89


 10%|▉         | 9539/100000 [3:10:09<28:11:46,  1.12s/it]

93109 episode score is 698.25


 10%|▉         | 9540/100000 [3:10:10<28:25:25,  1.13s/it]

93119 episode score is 748.52


 10%|▉         | 9541/100000 [3:10:12<28:08:58,  1.12s/it]

93129 episode score is 707.22


 10%|▉         | 9542/100000 [3:10:13<28:06:24,  1.12s/it]

93139 episode score is 726.97


 10%|▉         | 9543/100000 [3:10:14<28:13:32,  1.12s/it]

93149 episode score is 740.70


 10%|▉         | 9544/100000 [3:10:15<28:37:12,  1.14s/it]

93159 episode score is 755.10


 10%|▉         | 9545/100000 [3:10:16<28:28:22,  1.13s/it]

93169 episode score is 722.34


 10%|▉         | 9546/100000 [3:10:17<28:49:20,  1.15s/it]

93179 episode score is 758.02


 10%|▉         | 9547/100000 [3:10:18<28:54:28,  1.15s/it]

93189 episode score is 755.78


 10%|▉         | 9548/100000 [3:10:20<28:42:31,  1.14s/it]

93199 episode score is 724.34


 10%|▉         | 9549/100000 [3:10:21<28:52:39,  1.15s/it]

93209 episode score is 760.00


 10%|▉         | 9550/100000 [3:10:22<28:48:01,  1.15s/it]

93219 episode score is 743.42


 10%|▉         | 9551/100000 [3:10:23<28:36:41,  1.14s/it]

93229 episode score is 728.78


 10%|▉         | 9552/100000 [3:10:24<28:22:07,  1.13s/it]

93239 episode score is 716.20


 10%|▉         | 9553/100000 [3:10:25<28:06:55,  1.12s/it]

93248 episode score is 793.19


 10%|▉         | 9554/100000 [3:10:26<28:38:14,  1.14s/it]

93258 episode score is 779.54


 10%|▉         | 9555/100000 [3:10:28<28:45:49,  1.14s/it]

93268 episode score is 755.72


 10%|▉         | 9556/100000 [3:10:29<29:06:01,  1.16s/it]

93278 episode score is 783.58


 10%|▉         | 9557/100000 [3:10:30<28:53:10,  1.15s/it]

93288 episode score is 742.68


 10%|▉         | 9558/100000 [3:10:31<28:32:32,  1.14s/it]

93298 episode score is 707.50


 10%|▉         | 9559/100000 [3:10:32<28:45:34,  1.14s/it]

93308 episode score is 758.74


 10%|▉         | 9560/100000 [3:10:33<28:45:21,  1.14s/it]

93318 episode score is 744.93


 10%|▉         | 9561/100000 [3:10:34<28:55:49,  1.15s/it]

93328 episode score is 764.24


 10%|▉         | 9562/100000 [3:10:36<29:01:04,  1.16s/it]

93338 episode score is 755.15


 10%|▉         | 9563/100000 [3:10:37<29:11:08,  1.16s/it]

93349 episode score is 684.27


 10%|▉         | 9564/100000 [3:10:38<29:22:04,  1.17s/it]

93360 episode score is 691.16


 10%|▉         | 9565/100000 [3:10:39<29:01:32,  1.16s/it]

93370 episode score is 730.22


 10%|▉         | 9566/100000 [3:10:40<28:39:59,  1.14s/it]

93380 episode score is 719.40


 10%|▉         | 9567/100000 [3:10:41<29:23:53,  1.17s/it]

93390 episode score is 758.51


 10%|▉         | 9568/100000 [3:10:43<29:01:17,  1.16s/it]

93400 episode score is 715.58


 10%|▉         | 9569/100000 [3:10:44<29:06:31,  1.16s/it]

93410 episode score is 756.48


 10%|▉         | 9570/100000 [3:10:45<29:19:15,  1.17s/it]

93420 episode score is 773.92


 10%|▉         | 9571/100000 [3:10:46<29:47:50,  1.19s/it]

93430 episode score is 797.51


 10%|▉         | 9572/100000 [3:10:47<29:35:28,  1.18s/it]

93440 episode score is 736.47


 10%|▉         | 9573/100000 [3:10:49<30:12:03,  1.20s/it]

93450 episode score is 818.59


 10%|▉         | 9574/100000 [3:10:50<30:13:59,  1.20s/it]

93460 episode score is 793.03


 10%|▉         | 9575/100000 [3:10:51<30:06:18,  1.20s/it]

93470 episode score is 772.55


 10%|▉         | 9576/100000 [3:10:52<29:49:00,  1.19s/it]

93480 episode score is 759.42


 10%|▉         | 9577/100000 [3:10:53<29:08:50,  1.16s/it]

93489 episode score is 801.53


 10%|▉         | 9578/100000 [3:10:54<29:12:04,  1.16s/it]

93499 episode score is 774.41


 10%|▉         | 9579/100000 [3:10:55<28:39:34,  1.14s/it]

93508 episode score is 801.35


 10%|▉         | 9580/100000 [3:10:57<28:30:31,  1.14s/it]

93517 episode score is 823.48


 10%|▉         | 9581/100000 [3:10:58<28:19:45,  1.13s/it]

93526 episode score is 815.90


 10%|▉         | 9582/100000 [3:10:59<28:51:50,  1.15s/it]

93536 episode score is 788.07


 10%|▉         | 9583/100000 [3:11:00<28:23:08,  1.13s/it]

93545 episode score is 794.80


 10%|▉         | 9584/100000 [3:11:01<28:11:19,  1.12s/it]

93554 episode score is 808.97


 10%|▉         | 9585/100000 [3:11:02<28:17:21,  1.13s/it]

93564 episode score is 729.07


 10%|▉         | 9586/100000 [3:11:03<28:17:21,  1.13s/it]

93574 episode score is 730.57


 10%|▉         | 9587/100000 [3:11:04<28:09:49,  1.12s/it]

93583 episode score is 809.53


 10%|▉         | 9588/100000 [3:11:06<28:27:38,  1.13s/it]

93593 episode score is 748.46


 10%|▉         | 9589/100000 [3:11:07<28:34:10,  1.14s/it]

93603 episode score is 747.84


 10%|▉         | 9590/100000 [3:11:08<28:26:59,  1.13s/it]

93613 episode score is 728.93


 10%|▉         | 9591/100000 [3:11:09<28:16:21,  1.13s/it]

93623 episode score is 717.40


 10%|▉         | 9592/100000 [3:11:10<28:08:39,  1.12s/it]

93633 episode score is 706.57


 10%|▉         | 9593/100000 [3:11:11<27:52:38,  1.11s/it]

93643 episode score is 698.76


 10%|▉         | 9594/100000 [3:11:12<28:13:43,  1.12s/it]

93653 episode score is 744.66


 10%|▉         | 9595/100000 [3:11:13<28:06:04,  1.12s/it]

93663 episode score is 712.92


 10%|▉         | 9596/100000 [3:11:15<28:08:54,  1.12s/it]

93673 episode score is 735.27


 10%|▉         | 9597/100000 [3:11:16<28:05:41,  1.12s/it]

93683 episode score is 714.93


 10%|▉         | 9598/100000 [3:11:17<28:25:37,  1.13s/it]

93694 episode score is 664.54


 10%|▉         | 9599/100000 [3:11:18<28:15:11,  1.13s/it]

93704 episode score is 705.00
93714 episode score is 690.44


 10%|▉         | 9600/100000 [3:11:20<35:20:05,  1.41s/it]

Iteration 9600: Average test reward: 714.30


 10%|▉         | 9601/100000 [3:11:21<32:49:45,  1.31s/it]

93724 episode score is 697.96


 10%|▉         | 9602/100000 [3:11:22<31:39:53,  1.26s/it]

93735 episode score is 665.83


 10%|▉         | 9603/100000 [3:11:23<31:06:01,  1.24s/it]

93746 episode score is 687.24


 10%|▉         | 9604/100000 [3:11:25<30:04:04,  1.20s/it]

93756 episode score is 714.35


 10%|▉         | 9605/100000 [3:11:26<29:54:48,  1.19s/it]

93767 episode score is 685.82


 10%|▉         | 9606/100000 [3:11:27<29:39:18,  1.18s/it]

93777 episode score is 756.78


 10%|▉         | 9607/100000 [3:11:28<28:58:05,  1.15s/it]

93787 episode score is 706.69


 10%|▉         | 9608/100000 [3:11:29<28:36:14,  1.14s/it]

93797 episode score is 719.01


 10%|▉         | 9609/100000 [3:11:30<28:35:47,  1.14s/it]

93807 episode score is 746.64


 10%|▉         | 9610/100000 [3:11:31<28:23:57,  1.13s/it]

93817 episode score is 728.39


 10%|▉         | 9611/100000 [3:11:32<28:12:10,  1.12s/it]

93827 episode score is 704.69


 10%|▉         | 9612/100000 [3:11:34<27:53:41,  1.11s/it]

93837 episode score is 697.82


 10%|▉         | 9613/100000 [3:11:35<28:05:06,  1.12s/it]

93847 episode score is 746.40


 10%|▉         | 9614/100000 [3:11:36<28:10:41,  1.12s/it]

93857 episode score is 733.57


 10%|▉         | 9615/100000 [3:11:37<28:22:29,  1.13s/it]

93867 episode score is 748.08


 10%|▉         | 9616/100000 [3:11:38<28:50:56,  1.15s/it]

93877 episode score is 781.44


 10%|▉         | 9617/100000 [3:11:39<28:50:10,  1.15s/it]

93887 episode score is 751.40


 10%|▉         | 9618/100000 [3:11:40<28:55:52,  1.15s/it]

93897 episode score is 761.77


 10%|▉         | 9619/100000 [3:11:42<29:15:32,  1.17s/it]

93907 episode score is 775.05


 10%|▉         | 9620/100000 [3:11:43<29:19:36,  1.17s/it]

93917 episode score is 765.12


 10%|▉         | 9621/100000 [3:11:44<29:21:57,  1.17s/it]

93927 episode score is 767.66


 10%|▉         | 9622/100000 [3:11:45<29:12:06,  1.16s/it]

93937 episode score is 754.08


 10%|▉         | 9623/100000 [3:11:46<29:19:04,  1.17s/it]

93947 episode score is 769.98


 10%|▉         | 9624/100000 [3:11:47<29:15:17,  1.17s/it]

93957 episode score is 751.25


 10%|▉         | 9625/100000 [3:11:49<29:11:04,  1.16s/it]

93967 episode score is 755.18


 10%|▉         | 9626/100000 [3:11:50<29:22:59,  1.17s/it]

93977 episode score is 779.89


 10%|▉         | 9627/100000 [3:11:51<29:13:49,  1.16s/it]

93987 episode score is 749.08


 10%|▉         | 9628/100000 [3:11:52<29:14:37,  1.16s/it]

93997 episode score is 764.08


 10%|▉         | 9629/100000 [3:11:53<29:21:39,  1.17s/it]

94007 episode score is 774.96


 10%|▉         | 9630/100000 [3:11:54<29:18:55,  1.17s/it]

94017 episode score is 765.59


 10%|▉         | 9631/100000 [3:11:56<28:50:37,  1.15s/it]

94027 episode score is 716.03


 10%|▉         | 9632/100000 [3:11:57<28:33:32,  1.14s/it]

94037 episode score is 712.48


 10%|▉         | 9633/100000 [3:11:58<28:43:15,  1.14s/it]

94047 episode score is 748.40


 10%|▉         | 9634/100000 [3:11:59<28:41:02,  1.14s/it]

94057 episode score is 748.81


 10%|▉         | 9635/100000 [3:12:00<28:19:18,  1.13s/it]

94067 episode score is 712.48


 10%|▉         | 9636/100000 [3:12:01<28:23:00,  1.13s/it]

94077 episode score is 749.58


 10%|▉         | 9637/100000 [3:12:02<28:29:54,  1.14s/it]

94087 episode score is 743.08


 10%|▉         | 9638/100000 [3:12:04<28:38:15,  1.14s/it]

94097 episode score is 755.12


 10%|▉         | 9639/100000 [3:12:05<28:46:57,  1.15s/it]

94107 episode score is 763.81


 10%|▉         | 9640/100000 [3:12:06<28:55:57,  1.15s/it]

94117 episode score is 762.23


 10%|▉         | 9641/100000 [3:12:07<28:26:39,  1.13s/it]

94126 episode score is 796.19


 10%|▉         | 9642/100000 [3:12:08<28:48:16,  1.15s/it]

94136 episode score is 774.59


 10%|▉         | 9643/100000 [3:12:09<28:52:24,  1.15s/it]

94146 episode score is 759.73


 10%|▉         | 9644/100000 [3:12:10<29:01:35,  1.16s/it]

94156 episode score is 775.87


 10%|▉         | 9645/100000 [3:12:12<28:50:35,  1.15s/it]

94166 episode score is 734.67


 10%|▉         | 9646/100000 [3:12:13<29:02:00,  1.16s/it]

94176 episode score is 756.75


 10%|▉         | 9647/100000 [3:12:14<28:57:52,  1.15s/it]

94186 episode score is 747.29


 10%|▉         | 9648/100000 [3:12:15<28:42:16,  1.14s/it]

94196 episode score is 726.55


 10%|▉         | 9649/100000 [3:12:16<28:26:59,  1.13s/it]

94206 episode score is 718.01


 10%|▉         | 9650/100000 [3:12:17<29:00:02,  1.16s/it]

94217 episode score is 694.47


 10%|▉         | 9651/100000 [3:12:18<29:00:24,  1.16s/it]

94227 episode score is 762.19


 10%|▉         | 9652/100000 [3:12:20<28:54:48,  1.15s/it]

94237 episode score is 749.75


 10%|▉         | 9653/100000 [3:12:21<29:46:25,  1.19s/it]

94247 episode score is 770.22


 10%|▉         | 9654/100000 [3:12:22<29:20:41,  1.17s/it]

94257 episode score is 743.87


 10%|▉         | 9655/100000 [3:12:23<29:18:29,  1.17s/it]

94267 episode score is 763.26


 10%|▉         | 9656/100000 [3:12:24<29:09:09,  1.16s/it]

94277 episode score is 754.59


 10%|▉         | 9657/100000 [3:12:25<29:05:21,  1.16s/it]

94287 episode score is 754.93


 10%|▉         | 9658/100000 [3:12:27<28:55:48,  1.15s/it]

94297 episode score is 745.90


 10%|▉         | 9659/100000 [3:12:28<28:45:28,  1.15s/it]

94307 episode score is 740.78


 10%|▉         | 9660/100000 [3:12:29<28:39:17,  1.14s/it]

94317 episode score is 741.23


 10%|▉         | 9661/100000 [3:12:30<28:31:53,  1.14s/it]

94327 episode score is 733.75


 10%|▉         | 9662/100000 [3:12:31<28:33:37,  1.14s/it]

94337 episode score is 731.44


 10%|▉         | 9663/100000 [3:12:32<28:35:22,  1.14s/it]

94347 episode score is 740.02


 10%|▉         | 9664/100000 [3:12:33<28:36:58,  1.14s/it]

94357 episode score is 750.61


 10%|▉         | 9665/100000 [3:12:35<28:28:32,  1.13s/it]

94367 episode score is 744.69


 10%|▉         | 9666/100000 [3:12:36<28:30:21,  1.14s/it]

94377 episode score is 743.47


 10%|▉         | 9667/100000 [3:12:37<28:49:24,  1.15s/it]

94387 episode score is 768.72


 10%|▉         | 9668/100000 [3:12:38<28:43:08,  1.14s/it]

94397 episode score is 752.45


 10%|▉         | 9669/100000 [3:12:39<28:57:04,  1.15s/it]

94407 episode score is 772.04


 10%|▉         | 9670/100000 [3:12:40<28:56:17,  1.15s/it]

94417 episode score is 758.30


 10%|▉         | 9671/100000 [3:12:41<28:49:48,  1.15s/it]

94427 episode score is 741.59


 10%|▉         | 9672/100000 [3:12:43<29:02:28,  1.16s/it]

94437 episode score is 771.57


 10%|▉         | 9673/100000 [3:12:44<28:55:36,  1.15s/it]

94447 episode score is 751.37


 10%|▉         | 9674/100000 [3:12:45<29:00:19,  1.16s/it]

94457 episode score is 765.87


 10%|▉         | 9675/100000 [3:12:46<29:09:44,  1.16s/it]

94467 episode score is 775.10


 10%|▉         | 9676/100000 [3:12:47<29:15:35,  1.17s/it]

94477 episode score is 749.10


 10%|▉         | 9677/100000 [3:12:48<28:54:44,  1.15s/it]

94487 episode score is 740.54


 10%|▉         | 9678/100000 [3:12:50<28:32:32,  1.14s/it]

94497 episode score is 717.50


 10%|▉         | 9679/100000 [3:12:51<28:25:23,  1.13s/it]

94507 episode score is 734.55


 10%|▉         | 9680/100000 [3:12:52<28:10:52,  1.12s/it]

94517 episode score is 724.90


 10%|▉         | 9681/100000 [3:12:53<28:05:39,  1.12s/it]

94527 episode score is 732.35


 10%|▉         | 9682/100000 [3:12:54<28:26:30,  1.13s/it]

94538 episode score is 692.85


 10%|▉         | 9683/100000 [3:12:55<28:32:25,  1.14s/it]

94549 episode score is 669.81


 10%|▉         | 9684/100000 [3:12:56<28:24:27,  1.13s/it]

94559 episode score is 734.11


 10%|▉         | 9685/100000 [3:12:57<28:10:01,  1.12s/it]

94569 episode score is 724.42


 10%|▉         | 9686/100000 [3:12:59<28:27:34,  1.13s/it]

94580 episode score is 683.27


 10%|▉         | 9687/100000 [3:13:00<28:43:12,  1.14s/it]

94591 episode score is 691.65


 10%|▉         | 9688/100000 [3:13:01<28:35:35,  1.14s/it]

94602 episode score is 661.72


 10%|▉         | 9689/100000 [3:13:02<28:41:18,  1.14s/it]

94613 episode score is 663.89


 10%|▉         | 9690/100000 [3:13:03<28:53:38,  1.15s/it]

94624 episode score is 688.82


 10%|▉         | 9691/100000 [3:13:04<28:24:57,  1.13s/it]

94635 episode score is 620.56


 10%|▉         | 9692/100000 [3:13:05<28:01:46,  1.12s/it]

94646 episode score is 615.89


 10%|▉         | 9693/100000 [3:13:06<28:06:16,  1.12s/it]

94658 episode score is 573.11


 10%|▉         | 9694/100000 [3:13:08<28:13:06,  1.12s/it]

94670 episode score is 570.57


 10%|▉         | 9695/100000 [3:13:09<28:31:45,  1.14s/it]

94681 episode score is 681.91


 10%|▉         | 9696/100000 [3:13:10<28:42:50,  1.14s/it]

94692 episode score is 677.32


 10%|▉         | 9697/100000 [3:13:11<28:25:24,  1.13s/it]

94702 episode score is 714.52


 10%|▉         | 9698/100000 [3:13:12<28:01:24,  1.12s/it]

94712 episode score is 699.37


 10%|▉         | 9699/100000 [3:13:13<28:38:20,  1.14s/it]

94723 episode score is 697.34
94733 episode score is 727.05


 10%|▉         | 9700/100000 [3:13:15<36:11:33,  1.44s/it]

Iteration 9700: Average test reward: 747.87


 10%|▉         | 9701/100000 [3:13:17<33:56:25,  1.35s/it]

94743 episode score is 744.50


 10%|▉         | 9702/100000 [3:13:18<32:54:42,  1.31s/it]

94754 episode score is 692.69


 10%|▉         | 9703/100000 [3:13:19<31:48:31,  1.27s/it]

94764 episode score is 755.22


 10%|▉         | 9704/100000 [3:13:20<30:52:44,  1.23s/it]

94774 episode score is 743.48


 10%|▉         | 9705/100000 [3:13:21<29:57:59,  1.19s/it]

94784 episode score is 716.42


 10%|▉         | 9706/100000 [3:13:23<30:22:45,  1.21s/it]

94794 episode score is 765.67


 10%|▉         | 9707/100000 [3:13:24<30:02:30,  1.20s/it]

94804 episode score is 765.50


 10%|▉         | 9708/100000 [3:13:25<29:59:30,  1.20s/it]

94814 episode score is 776.04


 10%|▉         | 9709/100000 [3:13:26<29:44:32,  1.19s/it]

94824 episode score is 761.34


 10%|▉         | 9710/100000 [3:13:27<29:49:02,  1.19s/it]

94834 episode score is 773.48


 10%|▉         | 9711/100000 [3:13:28<29:40:59,  1.18s/it]

94844 episode score is 760.70


 10%|▉         | 9712/100000 [3:13:30<29:12:03,  1.16s/it]

94853 episode score is 807.46


 10%|▉         | 9713/100000 [3:13:31<28:36:33,  1.14s/it]

94862 episode score is 789.22


 10%|▉         | 9714/100000 [3:13:32<28:20:10,  1.13s/it]

94871 episode score is 807.66


 10%|▉         | 9715/100000 [3:13:33<28:06:16,  1.12s/it]

94880 episode score is 789.50


 10%|▉         | 9716/100000 [3:13:34<28:32:32,  1.14s/it]

94890 episode score is 769.24


 10%|▉         | 9717/100000 [3:13:35<28:14:34,  1.13s/it]

94899 episode score is 804.72


 10%|▉         | 9718/100000 [3:13:36<28:48:11,  1.15s/it]

94909 episode score is 779.51


 10%|▉         | 9719/100000 [3:13:37<29:08:36,  1.16s/it]

94919 episode score is 778.15


 10%|▉         | 9720/100000 [3:13:39<28:57:44,  1.15s/it]

94928 episode score is 825.70


 10%|▉         | 9721/100000 [3:13:40<29:10:15,  1.16s/it]

94938 episode score is 769.36


 10%|▉         | 9722/100000 [3:13:41<29:13:06,  1.17s/it]

94948 episode score is 763.88


 10%|▉         | 9723/100000 [3:13:42<29:20:35,  1.17s/it]

94958 episode score is 769.64


 10%|▉         | 9724/100000 [3:13:43<28:51:54,  1.15s/it]

94967 episode score is 789.63


 10%|▉         | 9725/100000 [3:13:44<28:19:09,  1.13s/it]

94976 episode score is 782.96


 10%|▉         | 9726/100000 [3:13:45<27:56:55,  1.11s/it]

94985 episode score is 787.90


 10%|▉         | 9727/100000 [3:13:47<28:25:14,  1.13s/it]

94995 episode score is 767.12


 10%|▉         | 9728/100000 [3:13:48<28:53:25,  1.15s/it]

95005 episode score is 761.39


 10%|▉         | 9729/100000 [3:13:49<29:03:36,  1.16s/it]

95015 episode score is 763.39


 10%|▉         | 9730/100000 [3:13:50<29:14:00,  1.17s/it]

95025 episode score is 775.89


 10%|▉         | 9731/100000 [3:13:51<29:19:12,  1.17s/it]

95035 episode score is 763.46


 10%|▉         | 9732/100000 [3:13:53<29:26:46,  1.17s/it]

95045 episode score is 769.12


 10%|▉         | 9733/100000 [3:13:54<29:32:35,  1.18s/it]

95055 episode score is 764.73


 10%|▉         | 9734/100000 [3:13:55<28:52:11,  1.15s/it]

95064 episode score is 781.52


 10%|▉         | 9735/100000 [3:13:56<28:22:09,  1.13s/it]

95073 episode score is 781.63


 10%|▉         | 9736/100000 [3:13:57<28:50:14,  1.15s/it]

95083 episode score is 772.72


 10%|▉         | 9737/100000 [3:13:58<28:22:39,  1.13s/it]

95092 episode score is 794.42


 10%|▉         | 9738/100000 [3:13:59<28:46:43,  1.15s/it]

95102 episode score is 773.29


 10%|▉         | 9739/100000 [3:14:01<29:18:51,  1.17s/it]

95112 episode score is 784.07


 10%|▉         | 9740/100000 [3:14:02<29:27:15,  1.17s/it]

95122 episode score is 774.71


 10%|▉         | 9741/100000 [3:14:03<28:50:54,  1.15s/it]

95132 episode score is 693.74


 10%|▉         | 9742/100000 [3:14:04<28:51:11,  1.15s/it]

95142 episode score is 756.47


 10%|▉         | 9743/100000 [3:14:05<28:48:47,  1.15s/it]

95152 episode score is 747.41


 10%|▉         | 9744/100000 [3:14:06<28:47:19,  1.15s/it]

95162 episode score is 748.19


 10%|▉         | 9745/100000 [3:14:07<28:40:19,  1.14s/it]

95172 episode score is 735.19


 10%|▉         | 9746/100000 [3:14:09<28:16:25,  1.13s/it]

95182 episode score is 699.39


 10%|▉         | 9747/100000 [3:14:10<28:03:24,  1.12s/it]

95192 episode score is 701.42


 10%|▉         | 9748/100000 [3:14:11<28:29:14,  1.14s/it]

95202 episode score is 771.51


 10%|▉         | 9749/100000 [3:14:12<28:36:14,  1.14s/it]

95212 episode score is 743.63


 10%|▉         | 9750/100000 [3:14:13<28:34:50,  1.14s/it]

95222 episode score is 730.83


 10%|▉         | 9751/100000 [3:14:14<28:24:22,  1.13s/it]

95232 episode score is 715.55


 10%|▉         | 9752/100000 [3:14:15<28:44:04,  1.15s/it]

95242 episode score is 750.04


 10%|▉         | 9753/100000 [3:14:17<28:41:03,  1.14s/it]

95252 episode score is 726.46


 10%|▉         | 9754/100000 [3:14:18<29:19:22,  1.17s/it]

95263 episode score is 686.72


 10%|▉         | 9755/100000 [3:14:19<29:17:52,  1.17s/it]

95273 episode score is 759.51


 10%|▉         | 9756/100000 [3:14:20<29:09:36,  1.16s/it]

95283 episode score is 747.21


 10%|▉         | 9757/100000 [3:14:21<28:54:37,  1.15s/it]

95293 episode score is 726.81


 10%|▉         | 9758/100000 [3:14:22<29:01:22,  1.16s/it]

95303 episode score is 754.20


 10%|▉         | 9759/100000 [3:14:24<29:14:21,  1.17s/it]

95313 episode score is 761.00


 10%|▉         | 9760/100000 [3:14:25<29:13:51,  1.17s/it]

95323 episode score is 749.62


 10%|▉         | 9761/100000 [3:14:26<30:14:28,  1.21s/it]

95334 episode score is 684.64


 10%|▉         | 9762/100000 [3:14:27<30:04:21,  1.20s/it]

95344 episode score is 746.78


 10%|▉         | 9763/100000 [3:14:28<30:01:55,  1.20s/it]

95354 episode score is 773.82


 10%|▉         | 9764/100000 [3:14:30<29:40:01,  1.18s/it]

95364 episode score is 723.13


 10%|▉         | 9765/100000 [3:14:31<29:12:41,  1.17s/it]

95373 episode score is 821.03


 10%|▉         | 9766/100000 [3:14:32<28:37:09,  1.14s/it]

95382 episode score is 783.92


 10%|▉         | 9767/100000 [3:14:33<28:17:25,  1.13s/it]

95391 episode score is 776.64


 10%|▉         | 9768/100000 [3:14:34<28:21:42,  1.13s/it]

95400 episode score is 825.47


 10%|▉         | 9769/100000 [3:14:35<28:08:44,  1.12s/it]

95409 episode score is 808.08


 10%|▉         | 9770/100000 [3:14:36<28:25:20,  1.13s/it]

95418 episode score is 849.02


 10%|▉         | 9771/100000 [3:14:37<28:34:03,  1.14s/it]

95428 episode score is 740.04


 10%|▉         | 9772/100000 [3:14:39<28:42:51,  1.15s/it]

95438 episode score is 745.74


 10%|▉         | 9773/100000 [3:14:40<28:47:00,  1.15s/it]

95448 episode score is 747.64


 10%|▉         | 9774/100000 [3:14:41<28:22:49,  1.13s/it]

95457 episode score is 784.74


 10%|▉         | 9775/100000 [3:14:42<28:40:41,  1.14s/it]

95467 episode score is 761.56


 10%|▉         | 9776/100000 [3:14:43<28:49:59,  1.15s/it]

95477 episode score is 753.02


 10%|▉         | 9777/100000 [3:14:44<29:00:34,  1.16s/it]

95487 episode score is 761.93


 10%|▉         | 9778/100000 [3:14:45<28:34:00,  1.14s/it]

95496 episode score is 810.95


 10%|▉         | 9779/100000 [3:14:47<28:49:49,  1.15s/it]

95506 episode score is 772.32


 10%|▉         | 9780/100000 [3:14:48<29:02:12,  1.16s/it]

95516 episode score is 750.58


 10%|▉         | 9781/100000 [3:14:49<29:17:35,  1.17s/it]

95526 episode score is 771.48


 10%|▉         | 9782/100000 [3:14:50<29:16:51,  1.17s/it]

95536 episode score is 754.59


 10%|▉         | 9783/100000 [3:14:51<28:59:38,  1.16s/it]

95546 episode score is 739.30


 10%|▉         | 9784/100000 [3:14:52<29:07:53,  1.16s/it]

95556 episode score is 751.90


 10%|▉         | 9785/100000 [3:14:54<28:31:15,  1.14s/it]

95565 episode score is 778.99


 10%|▉         | 9786/100000 [3:14:55<28:48:26,  1.15s/it]

95575 episode score is 760.01


 10%|▉         | 9787/100000 [3:14:56<28:52:23,  1.15s/it]

95585 episode score is 753.58


 10%|▉         | 9788/100000 [3:14:57<28:40:39,  1.14s/it]

95595 episode score is 725.10


 10%|▉         | 9789/100000 [3:14:58<28:40:10,  1.14s/it]

95605 episode score is 746.64


 10%|▉         | 9790/100000 [3:14:59<28:35:13,  1.14s/it]

95615 episode score is 732.71


 10%|▉         | 9791/100000 [3:15:00<28:47:06,  1.15s/it]

95625 episode score is 766.44


 10%|▉         | 9792/100000 [3:15:02<28:20:25,  1.13s/it]

95635 episode score is 694.72


 10%|▉         | 9793/100000 [3:15:03<28:54:18,  1.15s/it]

95645 episode score is 773.80


 10%|▉         | 9794/100000 [3:15:04<28:26:03,  1.13s/it]

95654 episode score is 803.02


 10%|▉         | 9795/100000 [3:15:05<28:09:34,  1.12s/it]

95663 episode score is 808.01


 10%|▉         | 9796/100000 [3:15:06<28:44:45,  1.15s/it]

95673 episode score is 786.46


 10%|▉         | 9797/100000 [3:15:07<29:04:26,  1.16s/it]

95683 episode score is 774.76


 10%|▉         | 9798/100000 [3:15:08<28:40:24,  1.14s/it]

95692 episode score is 801.83


 10%|▉         | 9799/100000 [3:15:10<28:44:26,  1.15s/it]

95702 episode score is 752.72
95711 episode score is 798.96


 10%|▉         | 9800/100000 [3:15:12<35:52:22,  1.43s/it]

Iteration 9800: Average test reward: 731.85


 10%|▉         | 9801/100000 [3:15:13<33:25:49,  1.33s/it]

95720 episode score is 817.78


 10%|▉         | 9802/100000 [3:15:14<31:55:56,  1.27s/it]

95729 episode score is 840.06


 10%|▉         | 9803/100000 [3:15:15<30:47:05,  1.23s/it]

95738 episode score is 830.84


 10%|▉         | 9804/100000 [3:15:16<30:05:13,  1.20s/it]

95747 episode score is 850.27


 10%|▉         | 9805/100000 [3:15:17<30:03:28,  1.20s/it]

95757 episode score is 749.14


 10%|▉         | 9806/100000 [3:15:18<29:22:37,  1.17s/it]

95766 episode score is 802.53


 10%|▉         | 9807/100000 [3:15:20<29:24:48,  1.17s/it]

95775 episode score is 871.58


 10%|▉         | 9808/100000 [3:15:21<29:11:08,  1.16s/it]

95784 episode score is 832.60


 10%|▉         | 9809/100000 [3:15:22<29:08:16,  1.16s/it]

95793 episode score is 861.16


 10%|▉         | 9810/100000 [3:15:23<28:33:42,  1.14s/it]

95802 episode score is 787.46


 10%|▉         | 9811/100000 [3:15:24<28:45:20,  1.15s/it]

95811 episode score is 875.88


 10%|▉         | 9812/100000 [3:15:25<28:15:28,  1.13s/it]

95820 episode score is 804.71


 10%|▉         | 9813/100000 [3:15:26<27:59:06,  1.12s/it]

95829 episode score is 806.63


 10%|▉         | 9814/100000 [3:15:28<28:30:25,  1.14s/it]

95839 episode score is 777.69


 10%|▉         | 9815/100000 [3:15:29<28:46:57,  1.15s/it]

95849 episode score is 771.73


 10%|▉         | 9816/100000 [3:15:30<28:38:35,  1.14s/it]

95858 episode score is 842.72


 10%|▉         | 9817/100000 [3:15:31<28:42:20,  1.15s/it]

95867 episode score is 853.33


 10%|▉         | 9818/100000 [3:15:32<29:01:14,  1.16s/it]

95876 episode score is 856.36


 10%|▉         | 9819/100000 [3:15:33<28:35:13,  1.14s/it]

95885 episode score is 804.33


 10%|▉         | 9820/100000 [3:15:34<28:14:03,  1.13s/it]

95894 episode score is 799.94


 10%|▉         | 9821/100000 [3:15:35<27:59:42,  1.12s/it]

95903 episode score is 806.18


 10%|▉         | 9822/100000 [3:15:37<27:44:23,  1.11s/it]

95912 episode score is 797.53


 10%|▉         | 9823/100000 [3:15:38<27:52:21,  1.11s/it]

95922 episode score is 724.12


 10%|▉         | 9824/100000 [3:15:39<28:23:52,  1.13s/it]

95932 episode score is 775.70


 10%|▉         | 9825/100000 [3:15:40<27:58:02,  1.12s/it]

95941 episode score is 789.07


 10%|▉         | 9826/100000 [3:15:41<27:39:32,  1.10s/it]

95950 episode score is 791.06


 10%|▉         | 9827/100000 [3:15:42<27:51:37,  1.11s/it]

95960 episode score is 730.96


 10%|▉         | 9828/100000 [3:15:43<28:47:18,  1.15s/it]

95969 episode score is 851.95


 10%|▉         | 9829/100000 [3:15:45<28:35:42,  1.14s/it]

95978 episode score is 832.47


 10%|▉         | 9830/100000 [3:15:46<28:47:43,  1.15s/it]

95988 episode score is 769.34


 10%|▉         | 9831/100000 [3:15:47<28:31:00,  1.14s/it]

95997 episode score is 818.40


 10%|▉         | 9832/100000 [3:15:48<28:28:19,  1.14s/it]

96006 episode score is 822.57


 10%|▉         | 9833/100000 [3:15:49<28:42:28,  1.15s/it]

96016 episode score is 750.88


 10%|▉         | 9834/100000 [3:15:50<28:21:02,  1.13s/it]

96025 episode score is 805.48


 10%|▉         | 9835/100000 [3:15:51<28:33:53,  1.14s/it]

96035 episode score is 767.60


 10%|▉         | 9836/100000 [3:15:53<28:31:53,  1.14s/it]

96044 episode score is 849.12


 10%|▉         | 9837/100000 [3:15:54<28:37:24,  1.14s/it]

96053 episode score is 847.08


 10%|▉         | 9838/100000 [3:15:55<28:15:55,  1.13s/it]

96062 episode score is 804.33


 10%|▉         | 9839/100000 [3:15:56<28:24:20,  1.13s/it]

96071 episode score is 834.20


 10%|▉         | 9840/100000 [3:15:57<28:18:33,  1.13s/it]

96080 episode score is 824.16


 10%|▉         | 9841/100000 [3:15:58<28:16:43,  1.13s/it]

96089 episode score is 828.63


 10%|▉         | 9842/100000 [3:15:59<28:44:58,  1.15s/it]

96099 episode score is 784.13


 10%|▉         | 9843/100000 [3:16:00<28:27:01,  1.14s/it]

96108 episode score is 812.30


 10%|▉         | 9844/100000 [3:16:02<28:01:38,  1.12s/it]

96117 episode score is 785.76


 10%|▉         | 9845/100000 [3:16:03<28:37:22,  1.14s/it]

96127 episode score is 766.51


 10%|▉         | 9846/100000 [3:16:04<28:27:43,  1.14s/it]

96136 episode score is 814.87


 10%|▉         | 9847/100000 [3:16:05<28:02:52,  1.12s/it]

96145 episode score is 794.14


 10%|▉         | 9848/100000 [3:16:06<28:29:06,  1.14s/it]

96155 episode score is 773.46


 10%|▉         | 9849/100000 [3:16:07<28:07:29,  1.12s/it]

96164 episode score is 796.77


 10%|▉         | 9850/100000 [3:16:08<28:38:18,  1.14s/it]

96174 episode score is 772.25


 10%|▉         | 9851/100000 [3:16:10<28:53:26,  1.15s/it]

96184 episode score is 776.92


 10%|▉         | 9852/100000 [3:16:11<28:56:10,  1.16s/it]

96194 episode score is 755.59


 10%|▉         | 9853/100000 [3:16:12<28:54:14,  1.15s/it]

96204 episode score is 744.40


 10%|▉         | 9854/100000 [3:16:13<28:21:20,  1.13s/it]

96213 episode score is 786.26


 10%|▉         | 9855/100000 [3:16:14<28:33:38,  1.14s/it]

96223 episode score is 746.54


 10%|▉         | 9856/100000 [3:16:15<28:19:07,  1.13s/it]

96232 episode score is 812.39


 10%|▉         | 9857/100000 [3:16:16<28:32:20,  1.14s/it]

96242 episode score is 755.63


 10%|▉         | 9858/100000 [3:16:18<28:21:57,  1.13s/it]

96251 episode score is 800.64


 10%|▉         | 9859/100000 [3:16:19<28:05:53,  1.12s/it]

96260 episode score is 798.99


 10%|▉         | 9860/100000 [3:16:20<28:30:22,  1.14s/it]

96270 episode score is 766.30


 10%|▉         | 9861/100000 [3:16:21<28:29:51,  1.14s/it]

96280 episode score is 748.43


 10%|▉         | 9862/100000 [3:16:22<28:42:02,  1.15s/it]

96290 episode score is 754.48


 10%|▉         | 9863/100000 [3:16:23<28:44:28,  1.15s/it]

96300 episode score is 747.72


 10%|▉         | 9864/100000 [3:16:24<28:51:47,  1.15s/it]

96310 episode score is 752.82


 10%|▉         | 9865/100000 [3:16:26<28:53:04,  1.15s/it]

96320 episode score is 761.61


 10%|▉         | 9866/100000 [3:16:27<28:44:51,  1.15s/it]

96330 episode score is 739.81


 10%|▉         | 9867/100000 [3:16:28<28:21:45,  1.13s/it]

96339 episode score is 797.81


 10%|▉         | 9868/100000 [3:16:29<28:36:02,  1.14s/it]

96349 episode score is 751.20


 10%|▉         | 9869/100000 [3:16:30<28:44:57,  1.15s/it]

96359 episode score is 766.69


 10%|▉         | 9870/100000 [3:16:31<28:58:43,  1.16s/it]

96369 episode score is 778.59


 10%|▉         | 9871/100000 [3:16:32<29:13:30,  1.17s/it]

96379 episode score is 752.30


 10%|▉         | 9872/100000 [3:16:34<29:22:07,  1.17s/it]

96389 episode score is 784.88


 10%|▉         | 9873/100000 [3:16:35<28:58:52,  1.16s/it]

96399 episode score is 732.67


 10%|▉         | 9874/100000 [3:16:36<28:50:34,  1.15s/it]

96409 episode score is 750.46


 10%|▉         | 9875/100000 [3:16:37<28:53:00,  1.15s/it]

96420 episode score is 667.91


 10%|▉         | 9876/100000 [3:16:38<28:33:04,  1.14s/it]

96430 episode score is 706.82


 10%|▉         | 9877/100000 [3:16:39<28:40:50,  1.15s/it]

96441 episode score is 672.58


 10%|▉         | 9878/100000 [3:16:41<28:47:56,  1.15s/it]

96451 episode score is 760.43


 10%|▉         | 9879/100000 [3:16:42<28:29:10,  1.14s/it]

96461 episode score is 723.75


 10%|▉         | 9880/100000 [3:16:43<28:11:47,  1.13s/it]

96470 episode score is 801.41


 10%|▉         | 9881/100000 [3:16:44<28:15:04,  1.13s/it]

96480 episode score is 730.51


 10%|▉         | 9882/100000 [3:16:45<28:30:44,  1.14s/it]

96490 episode score is 760.40


 10%|▉         | 9883/100000 [3:16:46<28:57:30,  1.16s/it]

96500 episode score is 799.76


 10%|▉         | 9884/100000 [3:16:47<28:43:07,  1.15s/it]

96509 episode score is 815.01


 10%|▉         | 9885/100000 [3:16:48<28:24:44,  1.14s/it]

96518 episode score is 811.20


 10%|▉         | 9886/100000 [3:16:50<28:40:12,  1.15s/it]

96528 episode score is 749.57


 10%|▉         | 9887/100000 [3:16:51<29:15:11,  1.17s/it]

96538 episode score is 806.09


 10%|▉         | 9888/100000 [3:16:52<29:10:01,  1.17s/it]

96548 episode score is 757.76


 10%|▉         | 9889/100000 [3:16:53<29:20:46,  1.17s/it]

96558 episode score is 784.32


 10%|▉         | 9890/100000 [3:16:54<29:26:07,  1.18s/it]

96568 episode score is 781.04


 10%|▉         | 9891/100000 [3:16:55<28:47:55,  1.15s/it]

96577 episode score is 801.05


 10%|▉         | 9892/100000 [3:16:57<29:10:04,  1.17s/it]

96587 episode score is 793.47


 10%|▉         | 9893/100000 [3:16:58<28:56:17,  1.16s/it]

96597 episode score is 745.87


 10%|▉         | 9894/100000 [3:16:59<28:41:45,  1.15s/it]

96607 episode score is 722.45


 10%|▉         | 9895/100000 [3:17:00<29:02:07,  1.16s/it]

96617 episode score is 783.88


 10%|▉         | 9896/100000 [3:17:01<29:11:22,  1.17s/it]

96627 episode score is 777.18


 10%|▉         | 9897/100000 [3:17:02<29:00:52,  1.16s/it]

96637 episode score is 743.27


 10%|▉         | 9898/100000 [3:17:04<29:03:16,  1.16s/it]

96647 episode score is 766.40


 10%|▉         | 9899/100000 [3:17:05<28:26:38,  1.14s/it]

96656 episode score is 795.07
96665 episode score is 797.20


 10%|▉         | 9900/100000 [3:17:07<36:10:31,  1.45s/it]

Iteration 9900: Average test reward: 803.11


 10%|▉         | 9901/100000 [3:17:08<33:58:28,  1.36s/it]

96674 episode score is 847.25


 10%|▉         | 9902/100000 [3:17:09<32:06:37,  1.28s/it]

96683 episode score is 815.74


 10%|▉         | 9903/100000 [3:17:10<31:58:48,  1.28s/it]

96693 episode score is 771.78


 10%|▉         | 9904/100000 [3:17:12<31:22:08,  1.25s/it]

96703 episode score is 785.57


 10%|▉         | 9905/100000 [3:17:13<30:57:47,  1.24s/it]

96713 episode score is 784.85


 10%|▉         | 9906/100000 [3:17:14<29:53:31,  1.19s/it]

96723 episode score is 704.05


 10%|▉         | 9907/100000 [3:17:15<29:34:11,  1.18s/it]

96733 episode score is 753.78


 10%|▉         | 9908/100000 [3:17:16<29:04:55,  1.16s/it]

96743 episode score is 725.21


 10%|▉         | 9909/100000 [3:17:17<28:37:53,  1.14s/it]

96753 episode score is 703.46


 10%|▉         | 9910/100000 [3:17:18<28:19:18,  1.13s/it]

96763 episode score is 697.78


 10%|▉         | 9911/100000 [3:17:19<28:18:50,  1.13s/it]

96773 episode score is 726.96


 10%|▉         | 9912/100000 [3:17:21<28:12:13,  1.13s/it]

96783 episode score is 735.33


 10%|▉         | 9913/100000 [3:17:22<28:05:54,  1.12s/it]

96793 episode score is 728.97


 10%|▉         | 9914/100000 [3:17:23<28:26:07,  1.14s/it]

96803 episode score is 769.30


 10%|▉         | 9915/100000 [3:17:24<28:06:59,  1.12s/it]

96813 episode score is 706.94


 10%|▉         | 9916/100000 [3:17:25<28:14:55,  1.13s/it]

96824 episode score is 667.31


 10%|▉         | 9917/100000 [3:17:26<28:12:17,  1.13s/it]

96834 episode score is 743.10


 10%|▉         | 9918/100000 [3:17:27<28:14:41,  1.13s/it]

96844 episode score is 743.32


 10%|▉         | 9919/100000 [3:17:28<28:19:52,  1.13s/it]

96854 episode score is 748.70


 10%|▉         | 9920/100000 [3:17:30<28:30:23,  1.14s/it]

96864 episode score is 763.81


 10%|▉         | 9921/100000 [3:17:31<28:35:49,  1.14s/it]

96874 episode score is 766.56


 10%|▉         | 9922/100000 [3:17:32<28:33:53,  1.14s/it]

96884 episode score is 754.52


 10%|▉         | 9923/100000 [3:17:33<28:38:46,  1.14s/it]

96894 episode score is 760.91


 10%|▉         | 9924/100000 [3:17:34<28:13:25,  1.13s/it]

96904 episode score is 718.44


 10%|▉         | 9925/100000 [3:17:35<27:56:54,  1.12s/it]

96914 episode score is 707.49


 10%|▉         | 9926/100000 [3:17:36<27:50:21,  1.11s/it]

96924 episode score is 727.85


 10%|▉         | 9927/100000 [3:17:38<28:26:47,  1.14s/it]

96934 episode score is 788.02


 10%|▉         | 9928/100000 [3:17:39<28:37:22,  1.14s/it]

96944 episode score is 762.43


 10%|▉         | 9929/100000 [3:17:40<28:28:51,  1.14s/it]

96954 episode score is 742.16


 10%|▉         | 9930/100000 [3:17:41<28:18:48,  1.13s/it]

96964 episode score is 733.56


 10%|▉         | 9931/100000 [3:17:42<28:01:50,  1.12s/it]

96974 episode score is 717.15


 10%|▉         | 9932/100000 [3:17:43<28:09:38,  1.13s/it]

96984 episode score is 745.59


 10%|▉         | 9933/100000 [3:17:44<28:19:49,  1.13s/it]

96994 episode score is 758.63


 10%|▉         | 9934/100000 [3:17:45<28:05:12,  1.12s/it]

97004 episode score is 716.85


 10%|▉         | 9935/100000 [3:17:47<28:07:03,  1.12s/it]

97014 episode score is 738.22


 10%|▉         | 9936/100000 [3:17:48<28:00:00,  1.12s/it]

97024 episode score is 712.73


 10%|▉         | 9937/100000 [3:17:49<27:58:16,  1.12s/it]

97034 episode score is 725.61


 10%|▉         | 9938/100000 [3:17:50<28:20:58,  1.13s/it]

97045 episode score is 691.04


 10%|▉         | 9939/100000 [3:17:51<28:24:17,  1.14s/it]

97055 episode score is 756.45


 10%|▉         | 9940/100000 [3:17:52<28:30:52,  1.14s/it]

97065 episode score is 760.43


 10%|▉         | 9941/100000 [3:17:53<28:25:51,  1.14s/it]

97075 episode score is 750.71


 10%|▉         | 9942/100000 [3:17:54<28:13:53,  1.13s/it]

97085 episode score is 735.50


 10%|▉         | 9943/100000 [3:17:56<28:18:42,  1.13s/it]

97095 episode score is 754.56


 10%|▉         | 9944/100000 [3:17:57<28:18:27,  1.13s/it]

97105 episode score is 742.64


 10%|▉         | 9945/100000 [3:17:58<28:18:01,  1.13s/it]

97115 episode score is 744.83


 10%|▉         | 9946/100000 [3:17:59<28:16:34,  1.13s/it]

97125 episode score is 745.23


 10%|▉         | 9947/100000 [3:18:00<28:17:13,  1.13s/it]

97135 episode score is 748.54


 10%|▉         | 9948/100000 [3:18:01<28:31:42,  1.14s/it]

97145 episode score is 757.58


 10%|▉         | 9949/100000 [3:18:02<28:26:52,  1.14s/it]

97155 episode score is 741.40


 10%|▉         | 9950/100000 [3:18:04<28:16:43,  1.13s/it]

97165 episode score is 731.03


 10%|▉         | 9951/100000 [3:18:05<28:24:44,  1.14s/it]

97175 episode score is 754.26


 10%|▉         | 9952/100000 [3:18:06<28:17:04,  1.13s/it]

97185 episode score is 739.01


 10%|▉         | 9953/100000 [3:18:07<28:32:22,  1.14s/it]

97195 episode score is 775.71


 10%|▉         | 9954/100000 [3:18:08<28:14:48,  1.13s/it]

97205 episode score is 732.46


 10%|▉         | 9955/100000 [3:18:09<28:33:18,  1.14s/it]

97215 episode score is 768.69


 10%|▉         | 9956/100000 [3:18:10<28:18:53,  1.13s/it]

97225 episode score is 722.23


 10%|▉         | 9957/100000 [3:18:11<27:58:49,  1.12s/it]

97235 episode score is 713.55


 10%|▉         | 9958/100000 [3:18:13<28:17:30,  1.13s/it]

97245 episode score is 759.19


 10%|▉         | 9959/100000 [3:18:14<28:28:09,  1.14s/it]

97255 episode score is 753.79


 10%|▉         | 9960/100000 [3:18:15<28:36:33,  1.14s/it]

97265 episode score is 764.23


 10%|▉         | 9961/100000 [3:18:16<28:27:24,  1.14s/it]

97274 episode score is 833.93


 10%|▉         | 9962/100000 [3:18:17<28:22:53,  1.13s/it]

97283 episode score is 828.26


 10%|▉         | 9963/100000 [3:18:18<28:06:07,  1.12s/it]

97292 episode score is 795.83


 10%|▉         | 9964/100000 [3:18:19<28:31:24,  1.14s/it]

97302 episode score is 774.59


 10%|▉         | 9965/100000 [3:18:21<28:47:19,  1.15s/it]

97312 episode score is 777.55


 10%|▉         | 9966/100000 [3:18:22<28:38:05,  1.14s/it]

97322 episode score is 734.41


 10%|▉         | 9967/100000 [3:18:23<28:08:33,  1.13s/it]

97332 episode score is 689.19


 10%|▉         | 9968/100000 [3:18:24<28:13:35,  1.13s/it]

97342 episode score is 754.70


 10%|▉         | 9969/100000 [3:18:25<28:21:30,  1.13s/it]

97352 episode score is 757.16


 10%|▉         | 9970/100000 [3:18:26<28:34:38,  1.14s/it]

97362 episode score is 765.51


 10%|▉         | 9971/100000 [3:18:27<28:14:29,  1.13s/it]

97372 episode score is 708.83


 10%|▉         | 9972/100000 [3:18:29<28:39:33,  1.15s/it]

97382 episode score is 772.56


 10%|▉         | 9973/100000 [3:18:30<28:51:02,  1.15s/it]

97392 episode score is 765.16


 10%|▉         | 9974/100000 [3:18:31<28:34:57,  1.14s/it]

97401 episode score is 814.55


 10%|▉         | 9975/100000 [3:18:32<29:26:45,  1.18s/it]

97410 episode score is 860.87


 10%|▉         | 9976/100000 [3:18:33<28:47:54,  1.15s/it]

97419 episode score is 797.12


 10%|▉         | 9977/100000 [3:18:34<28:59:20,  1.16s/it]

97429 episode score is 773.51


 10%|▉         | 9978/100000 [3:18:36<29:07:17,  1.16s/it]

97440 episode score is 693.02


 10%|▉         | 9979/100000 [3:18:37<28:57:08,  1.16s/it]

97450 episode score is 750.23


 10%|▉         | 9980/100000 [3:18:38<28:41:17,  1.15s/it]

97460 episode score is 729.85


 10%|▉         | 9981/100000 [3:18:39<28:46:49,  1.15s/it]

97470 episode score is 755.49


 10%|▉         | 9982/100000 [3:18:40<28:55:47,  1.16s/it]

97481 episode score is 673.27


 10%|▉         | 9983/100000 [3:18:41<28:35:06,  1.14s/it]

97491 episode score is 717.75


 10%|▉         | 9984/100000 [3:18:42<28:26:25,  1.14s/it]

97501 episode score is 735.68


 10%|▉         | 9985/100000 [3:18:44<28:23:23,  1.14s/it]

97511 episode score is 745.24


 10%|▉         | 9986/100000 [3:18:45<28:20:05,  1.13s/it]

97522 episode score is 655.31


 10%|▉         | 9987/100000 [3:18:46<28:47:24,  1.15s/it]

97533 episode score is 706.48


 10%|▉         | 9988/100000 [3:18:47<28:23:51,  1.14s/it]

97543 episode score is 702.40


 10%|▉         | 9989/100000 [3:18:48<28:44:48,  1.15s/it]

97553 episode score is 773.13


 10%|▉         | 9990/100000 [3:18:49<29:03:12,  1.16s/it]

97563 episode score is 765.39


 10%|▉         | 9991/100000 [3:18:50<28:32:53,  1.14s/it]

97573 episode score is 713.32


 10%|▉         | 9992/100000 [3:18:52<28:12:31,  1.13s/it]

97583 episode score is 705.76


 10%|▉         | 9993/100000 [3:18:53<28:11:33,  1.13s/it]

97593 episode score is 729.99


 10%|▉         | 9994/100000 [3:18:54<27:56:02,  1.12s/it]

97603 episode score is 698.00


 10%|▉         | 9995/100000 [3:18:55<27:43:10,  1.11s/it]

97614 episode score is 600.22


 10%|▉         | 9996/100000 [3:18:56<28:01:36,  1.12s/it]

97626 episode score is 581.71


 10%|▉         | 9997/100000 [3:18:57<28:08:27,  1.13s/it]

97637 episode score is 654.10


 10%|▉         | 9998/100000 [3:18:58<27:51:40,  1.11s/it]

97647 episode score is 700.50


 10%|▉         | 9999/100000 [3:18:59<28:00:07,  1.12s/it]

97658 episode score is 644.66
97668 episode score is 709.88


 10%|█         | 10000/100000 [3:19:01<35:16:01,  1.41s/it]

Iteration 10000: Average test reward: 709.57


 10%|█         | 10001/100000 [3:19:03<33:21:22,  1.33s/it]

97678 episode score is 747.78


 10%|█         | 10002/100000 [3:19:04<31:32:35,  1.26s/it]

97687 episode score is 804.70


 10%|█         | 10003/100000 [3:19:05<30:38:22,  1.23s/it]

97697 episode score is 745.01


 10%|█         | 10004/100000 [3:19:06<29:28:56,  1.18s/it]

97706 episode score is 794.17


 10%|█         | 10005/100000 [3:19:07<28:50:41,  1.15s/it]

97715 episode score is 811.49


 10%|█         | 10006/100000 [3:19:08<28:28:00,  1.14s/it]

97724 episode score is 809.52


 10%|█         | 10007/100000 [3:19:09<28:05:44,  1.12s/it]

97733 episode score is 798.85


 10%|█         | 10008/100000 [3:19:10<28:19:57,  1.13s/it]

97742 episode score is 859.57


 10%|█         | 10009/100000 [3:19:11<28:32:54,  1.14s/it]

97751 episode score is 864.55


 10%|█         | 10010/100000 [3:19:13<28:35:57,  1.14s/it]

97760 episode score is 841.52


 10%|█         | 10011/100000 [3:19:14<28:59:17,  1.16s/it]

97770 episode score is 792.54


 10%|█         | 10012/100000 [3:19:15<29:18:25,  1.17s/it]

97779 episode score is 910.57


 10%|█         | 10013/100000 [3:19:16<29:21:17,  1.17s/it]

97788 episode score is 886.65


 10%|█         | 10014/100000 [3:19:17<29:14:27,  1.17s/it]

97797 episode score is 856.11


 10%|█         | 10015/100000 [3:19:18<28:38:41,  1.15s/it]

97806 episode score is 805.89


 10%|█         | 10016/100000 [3:19:20<28:49:55,  1.15s/it]

97816 episode score is 763.22


 10%|█         | 10017/100000 [3:19:21<29:29:38,  1.18s/it]

97826 episode score is 767.38


 10%|█         | 10018/100000 [3:19:22<28:50:40,  1.15s/it]

97835 episode score is 799.57


 10%|█         | 10019/100000 [3:19:23<29:07:31,  1.17s/it]

97845 episode score is 785.45


 10%|█         | 10020/100000 [3:19:24<28:43:22,  1.15s/it]

97854 episode score is 824.55


 10%|█         | 10021/100000 [3:19:25<28:21:18,  1.13s/it]

97863 episode score is 814.30


 10%|█         | 10022/100000 [3:19:27<28:27:46,  1.14s/it]

97872 episode score is 869.71


 10%|█         | 10023/100000 [3:19:28<28:21:40,  1.13s/it]

97881 episode score is 844.90


 10%|█         | 10024/100000 [3:19:29<28:04:00,  1.12s/it]

97890 episode score is 823.72


 10%|█         | 10025/100000 [3:19:30<27:59:44,  1.12s/it]

97899 episode score is 799.62


 10%|█         | 10026/100000 [3:19:31<27:51:28,  1.11s/it]

97908 episode score is 804.33


 10%|█         | 10027/100000 [3:19:32<28:11:14,  1.13s/it]

97917 episode score is 862.83


 10%|█         | 10028/100000 [3:19:33<28:32:29,  1.14s/it]

97927 episode score is 763.18


 10%|█         | 10029/100000 [3:19:34<28:43:33,  1.15s/it]

97936 episode score is 865.35


 10%|█         | 10030/100000 [3:19:36<28:52:39,  1.16s/it]

97945 episode score is 855.86


 10%|█         | 10031/100000 [3:19:37<28:46:42,  1.15s/it]

97954 episode score is 850.37


 10%|█         | 10032/100000 [3:19:38<28:20:52,  1.13s/it]

97963 episode score is 806.33


 10%|█         | 10033/100000 [3:19:39<27:58:29,  1.12s/it]

97972 episode score is 791.74


 10%|█         | 10034/100000 [3:19:40<27:54:02,  1.12s/it]

97982 episode score is 706.49


 10%|█         | 10035/100000 [3:19:41<27:36:17,  1.10s/it]

97991 episode score is 786.52


 10%|█         | 10036/100000 [3:19:42<27:33:14,  1.10s/it]

98000 episode score is 801.49


 10%|█         | 10037/100000 [3:19:43<27:44:35,  1.11s/it]

98009 episode score is 827.57


 10%|█         | 10038/100000 [3:19:44<27:29:06,  1.10s/it]

98018 episode score is 789.37


 10%|█         | 10039/100000 [3:19:46<27:31:40,  1.10s/it]

98027 episode score is 829.69


 10%|█         | 10040/100000 [3:19:47<28:21:06,  1.13s/it]

98036 episode score is 905.45


 10%|█         | 10041/100000 [3:19:48<28:15:10,  1.13s/it]

98045 episode score is 806.68


 10%|█         | 10042/100000 [3:19:49<28:31:46,  1.14s/it]

98055 episode score is 747.49


 10%|█         | 10043/100000 [3:19:50<28:09:56,  1.13s/it]

98064 episode score is 792.92


 10%|█         | 10044/100000 [3:19:51<28:07:18,  1.13s/it]

98073 episode score is 824.95


 10%|█         | 10045/100000 [3:19:52<28:09:33,  1.13s/it]

98082 episode score is 831.85


 10%|█         | 10046/100000 [3:19:53<27:55:10,  1.12s/it]

98091 episode score is 795.32


 10%|█         | 10047/100000 [3:19:55<27:47:00,  1.11s/it]

98100 episode score is 816.92


 10%|█         | 10048/100000 [3:19:56<27:55:32,  1.12s/it]

98108 episode score is 956.12


 10%|█         | 10049/100000 [3:19:57<28:00:44,  1.12s/it]

98117 episode score is 829.27


 10%|█         | 10050/100000 [3:19:58<28:30:21,  1.14s/it]

98126 episode score is 872.18


 10%|█         | 10051/100000 [3:19:59<28:38:37,  1.15s/it]

98135 episode score is 862.97


 10%|█         | 10052/100000 [3:20:00<28:33:15,  1.14s/it]

98144 episode score is 844.01


 10%|█         | 10053/100000 [3:20:01<28:34:12,  1.14s/it]

98153 episode score is 843.75


 10%|█         | 10054/100000 [3:20:03<28:45:38,  1.15s/it]

98162 episode score is 851.27


 10%|█         | 10055/100000 [3:20:04<29:01:21,  1.16s/it]

98171 episode score is 871.11


 10%|█         | 10056/100000 [3:20:05<28:47:14,  1.15s/it]

98180 episode score is 839.10


 10%|█         | 10057/100000 [3:20:06<28:54:15,  1.16s/it]

98190 episode score is 771.58


 10%|█         | 10058/100000 [3:20:07<29:00:26,  1.16s/it]

98200 episode score is 773.10


 10%|█         | 10059/100000 [3:20:08<28:53:20,  1.16s/it]

98210 episode score is 755.58


 10%|█         | 10060/100000 [3:20:10<28:20:51,  1.13s/it]

98219 episode score is 795.17


 10%|█         | 10061/100000 [3:20:11<28:34:30,  1.14s/it]

98229 episode score is 767.00


 10%|█         | 10062/100000 [3:20:12<28:42:20,  1.15s/it]

98239 episode score is 746.39


 10%|█         | 10063/100000 [3:20:13<28:10:34,  1.13s/it]

98248 episode score is 784.48


 10%|█         | 10064/100000 [3:20:14<28:47:24,  1.15s/it]

98258 episode score is 782.69


 10%|█         | 10065/100000 [3:20:15<28:37:28,  1.15s/it]

98268 episode score is 738.91


 10%|█         | 10066/100000 [3:20:16<28:07:26,  1.13s/it]

98277 episode score is 792.23


 10%|█         | 10067/100000 [3:20:17<27:56:11,  1.12s/it]

98286 episode score is 795.92


 10%|█         | 10068/100000 [3:20:19<28:17:56,  1.13s/it]

98295 episode score is 863.60


 10%|█         | 10069/100000 [3:20:20<27:57:40,  1.12s/it]

98304 episode score is 780.84


 10%|█         | 10070/100000 [3:20:21<28:03:14,  1.12s/it]

98314 episode score is 737.83


 10%|█         | 10071/100000 [3:20:22<28:26:35,  1.14s/it]

98324 episode score is 766.05


 10%|█         | 10072/100000 [3:20:23<28:56:51,  1.16s/it]

98334 episode score is 792.87


 10%|█         | 10073/100000 [3:20:24<28:33:24,  1.14s/it]

98344 episode score is 721.85


 10%|█         | 10074/100000 [3:20:26<28:56:19,  1.16s/it]

98354 episode score is 792.09


 10%|█         | 10075/100000 [3:20:27<28:35:57,  1.14s/it]

98364 episode score is 726.25


 10%|█         | 10076/100000 [3:20:28<28:53:05,  1.16s/it]

98374 episode score is 785.49


 10%|█         | 10077/100000 [3:20:29<28:50:02,  1.15s/it]

98384 episode score is 754.26


 10%|█         | 10078/100000 [3:20:30<28:31:34,  1.14s/it]

98393 episode score is 810.07


 10%|█         | 10079/100000 [3:20:31<28:16:58,  1.13s/it]

98402 episode score is 820.50


 10%|█         | 10080/100000 [3:20:32<28:42:10,  1.15s/it]

98412 episode score is 775.38


 10%|█         | 10081/100000 [3:20:34<28:38:39,  1.15s/it]

98422 episode score is 751.49


 10%|█         | 10082/100000 [3:20:35<28:15:04,  1.13s/it]

98432 episode score is 715.05


 10%|█         | 10083/100000 [3:20:36<27:57:14,  1.12s/it]

98441 episode score is 814.68


 10%|█         | 10084/100000 [3:20:37<28:25:58,  1.14s/it]

98451 episode score is 769.79


 10%|█         | 10085/100000 [3:20:38<28:38:16,  1.15s/it]

98461 episode score is 780.15


 10%|█         | 10086/100000 [3:20:39<28:40:25,  1.15s/it]

98471 episode score is 759.98


 10%|█         | 10087/100000 [3:20:40<28:45:10,  1.15s/it]

98481 episode score is 777.54


 10%|█         | 10088/100000 [3:20:41<28:14:00,  1.13s/it]

98490 episode score is 809.51


 10%|█         | 10089/100000 [3:20:43<28:25:10,  1.14s/it]

98500 episode score is 766.62


 10%|█         | 10090/100000 [3:20:44<28:31:24,  1.14s/it]

98510 episode score is 768.20


 10%|█         | 10091/100000 [3:20:45<28:19:19,  1.13s/it]

98520 episode score is 728.30


 10%|█         | 10092/100000 [3:20:46<28:12:43,  1.13s/it]

98530 episode score is 744.65


 10%|█         | 10093/100000 [3:20:47<29:10:33,  1.17s/it]

98540 episode score is 759.74


 10%|█         | 10094/100000 [3:20:48<29:15:35,  1.17s/it]

98550 episode score is 778.76


 10%|█         | 10095/100000 [3:20:50<29:06:32,  1.17s/it]

98560 episode score is 757.60


 10%|█         | 10096/100000 [3:20:51<28:50:56,  1.16s/it]

98570 episode score is 747.88


 10%|█         | 10097/100000 [3:20:52<28:42:41,  1.15s/it]

98580 episode score is 751.07


 10%|█         | 10098/100000 [3:20:53<28:49:11,  1.15s/it]

98590 episode score is 771.76


 10%|█         | 10099/100000 [3:20:54<28:18:51,  1.13s/it]

98600 episode score is 710.93
98610 episode score is 747.78


 10%|█         | 10100/100000 [3:20:56<35:49:39,  1.43s/it]

Iteration 10100: Average test reward: 734.57


 10%|█         | 10101/100000 [3:20:57<33:10:44,  1.33s/it]

98620 episode score is 709.77


 10%|█         | 10102/100000 [3:20:58<31:49:40,  1.27s/it]

98631 episode score is 680.05


 10%|█         | 10103/100000 [3:21:00<30:39:52,  1.23s/it]

98641 episode score is 741.39


 10%|█         | 10104/100000 [3:21:01<29:38:59,  1.19s/it]

98651 episode score is 722.27


 10%|█         | 10105/100000 [3:21:02<28:56:34,  1.16s/it]

98661 episode score is 721.93


 10%|█         | 10106/100000 [3:21:03<28:41:48,  1.15s/it]

98671 episode score is 720.72


 10%|█         | 10107/100000 [3:21:04<28:26:34,  1.14s/it]

98681 episode score is 730.74


 10%|█         | 10108/100000 [3:21:05<28:01:28,  1.12s/it]

98691 episode score is 712.06


 10%|█         | 10109/100000 [3:21:06<28:03:12,  1.12s/it]

98701 episode score is 737.51


 10%|█         | 10110/100000 [3:21:07<27:55:27,  1.12s/it]

98711 episode score is 726.65


 10%|█         | 10111/100000 [3:21:09<28:21:13,  1.14s/it]

98722 episode score is 697.88


 10%|█         | 10112/100000 [3:21:10<28:14:45,  1.13s/it]

98733 episode score is 645.79


 10%|█         | 10113/100000 [3:21:11<28:06:00,  1.13s/it]

98744 episode score is 637.36


 10%|█         | 10114/100000 [3:21:12<28:04:32,  1.12s/it]

98754 episode score is 737.81


 10%|█         | 10115/100000 [3:21:13<28:01:51,  1.12s/it]

98764 episode score is 729.62


 10%|█         | 10116/100000 [3:21:14<28:00:29,  1.12s/it]

98774 episode score is 734.40


 10%|█         | 10117/100000 [3:21:15<28:22:03,  1.14s/it]

98785 episode score is 685.10


 10%|█         | 10118/100000 [3:21:16<28:21:55,  1.14s/it]

98795 episode score is 748.85


 10%|█         | 10119/100000 [3:21:18<28:11:58,  1.13s/it]

98805 episode score is 709.33


 10%|█         | 10120/100000 [3:21:19<28:30:40,  1.14s/it]

98815 episode score is 758.93


 10%|█         | 10121/100000 [3:21:20<28:22:27,  1.14s/it]

98825 episode score is 737.68


 10%|█         | 10122/100000 [3:21:21<28:29:26,  1.14s/it]

98835 episode score is 757.55


 10%|█         | 10123/100000 [3:21:22<28:35:41,  1.15s/it]

98845 episode score is 754.90


 10%|█         | 10124/100000 [3:21:23<28:13:35,  1.13s/it]

98855 episode score is 719.79


 10%|█         | 10125/100000 [3:21:24<28:38:08,  1.15s/it]

98865 episode score is 780.53


 10%|█         | 10126/100000 [3:21:26<28:43:13,  1.15s/it]

98875 episode score is 765.62


 10%|█         | 10127/100000 [3:21:27<28:27:38,  1.14s/it]

98885 episode score is 744.33


 10%|█         | 10128/100000 [3:21:28<28:02:59,  1.12s/it]

98895 episode score is 704.89


 10%|█         | 10129/100000 [3:21:29<27:53:06,  1.12s/it]

98904 episode score is 802.32


 10%|█         | 10130/100000 [3:21:30<27:40:23,  1.11s/it]

98913 episode score is 809.47


 10%|█         | 10131/100000 [3:21:31<27:56:59,  1.12s/it]

98922 episode score is 839.31


 10%|█         | 10132/100000 [3:21:32<28:23:00,  1.14s/it]

98931 episode score is 861.53


 10%|█         | 10133/100000 [3:21:33<28:19:07,  1.13s/it]

98940 episode score is 828.37


 10%|█         | 10134/100000 [3:21:35<28:21:46,  1.14s/it]

98950 episode score is 747.99


 10%|█         | 10135/100000 [3:21:36<28:36:17,  1.15s/it]

98960 episode score is 773.07


 10%|█         | 10136/100000 [3:21:37<28:34:58,  1.15s/it]

98970 episode score is 749.82


 10%|█         | 10137/100000 [3:21:38<28:44:51,  1.15s/it]

98980 episode score is 763.27


 10%|█         | 10138/100000 [3:21:39<28:39:37,  1.15s/it]

98990 episode score is 746.63


 10%|█         | 10139/100000 [3:21:40<28:18:32,  1.13s/it]

98999 episode score is 809.45


 10%|█         | 10140/100000 [3:21:41<28:42:17,  1.15s/it]

99008 episode score is 873.47


 10%|█         | 10141/100000 [3:21:43<28:56:03,  1.16s/it]

99017 episode score is 865.84


 10%|█         | 10142/100000 [3:21:44<28:48:32,  1.15s/it]

99026 episode score is 843.49


 10%|█         | 10143/100000 [3:21:45<29:08:12,  1.17s/it]

99036 episode score is 785.04


 10%|█         | 10144/100000 [3:21:46<28:46:32,  1.15s/it]

99045 episode score is 835.38


 10%|█         | 10145/100000 [3:21:47<28:43:36,  1.15s/it]

99054 episode score is 842.29


 10%|█         | 10146/100000 [3:21:48<28:31:42,  1.14s/it]

99063 episode score is 831.35


 10%|█         | 10147/100000 [3:21:49<28:28:13,  1.14s/it]

99072 episode score is 825.79


 10%|█         | 10148/100000 [3:21:51<28:09:44,  1.13s/it]

99081 episode score is 806.96


 10%|█         | 10149/100000 [3:21:52<28:28:20,  1.14s/it]

99091 episode score is 766.66


 10%|█         | 10150/100000 [3:21:53<28:27:27,  1.14s/it]

99100 episode score is 835.67


 10%|█         | 10151/100000 [3:21:54<28:13:53,  1.13s/it]

99109 episode score is 814.14


 10%|█         | 10152/100000 [3:21:55<27:53:51,  1.12s/it]

99118 episode score is 785.15


 10%|█         | 10153/100000 [3:21:56<28:12:52,  1.13s/it]

99127 episode score is 866.37


 10%|█         | 10154/100000 [3:21:57<28:14:07,  1.13s/it]

99136 episode score is 831.31


 10%|█         | 10155/100000 [3:21:59<28:25:55,  1.14s/it]

99145 episode score is 857.23


 10%|█         | 10156/100000 [3:22:00<28:29:06,  1.14s/it]

99154 episode score is 849.60


 10%|█         | 10157/100000 [3:22:01<29:01:04,  1.16s/it]

99164 episode score is 778.70


 10%|█         | 10158/100000 [3:22:02<29:19:53,  1.18s/it]

99173 episode score is 816.96


 10%|█         | 10159/100000 [3:22:03<29:06:10,  1.17s/it]

99182 episode score is 846.01


 10%|█         | 10160/100000 [3:22:04<29:19:57,  1.18s/it]

99193 episode score is 700.07


 10%|█         | 10161/100000 [3:22:06<28:42:36,  1.15s/it]

99202 episode score is 805.02


 10%|█         | 10162/100000 [3:22:07<28:11:24,  1.13s/it]

99212 episode score is 692.76


 10%|█         | 10163/100000 [3:22:08<28:08:46,  1.13s/it]

99221 episode score is 833.45


 10%|█         | 10164/100000 [3:22:09<28:39:00,  1.15s/it]

99231 episode score is 781.09


 10%|█         | 10165/100000 [3:22:10<28:15:49,  1.13s/it]

99240 episode score is 806.44


 10%|█         | 10166/100000 [3:22:11<28:05:59,  1.13s/it]

99249 episode score is 818.40


 10%|█         | 10167/100000 [3:22:12<28:33:29,  1.14s/it]

99259 episode score is 776.88


 10%|█         | 10168/100000 [3:22:13<28:28:36,  1.14s/it]

99269 episode score is 739.31


 10%|█         | 10169/100000 [3:22:15<28:15:00,  1.13s/it]

99278 episode score is 822.50


 10%|█         | 10170/100000 [3:22:16<28:17:47,  1.13s/it]

99288 episode score is 742.73


 10%|█         | 10171/100000 [3:22:17<28:01:08,  1.12s/it]

99298 episode score is 711.70


 10%|█         | 10172/100000 [3:22:18<28:30:53,  1.14s/it]

99308 episode score is 782.06


 10%|█         | 10173/100000 [3:22:19<27:57:55,  1.12s/it]

99317 episode score is 784.01


 10%|█         | 10174/100000 [3:22:20<27:56:23,  1.12s/it]

99326 episode score is 827.73


 10%|█         | 10175/100000 [3:22:21<27:36:56,  1.11s/it]

99335 episode score is 793.19


 10%|█         | 10176/100000 [3:22:22<27:37:03,  1.11s/it]

99344 episode score is 833.52


 10%|█         | 10177/100000 [3:22:24<28:19:59,  1.14s/it]

99354 episode score is 814.61


 10%|█         | 10178/100000 [3:22:25<27:51:11,  1.12s/it]

99363 episode score is 794.61


 10%|█         | 10179/100000 [3:22:26<28:06:35,  1.13s/it]

99373 episode score is 766.37


 10%|█         | 10180/100000 [3:22:27<28:27:54,  1.14s/it]

99383 episode score is 781.32


 10%|█         | 10181/100000 [3:22:28<27:59:30,  1.12s/it]

99392 episode score is 799.65


 10%|█         | 10182/100000 [3:22:29<27:55:25,  1.12s/it]

99401 episode score is 829.50


 10%|█         | 10183/100000 [3:22:30<27:35:53,  1.11s/it]

99410 episode score is 793.90


 10%|█         | 10184/100000 [3:22:31<27:57:56,  1.12s/it]

99420 episode score is 763.10


 10%|█         | 10185/100000 [3:22:33<28:01:23,  1.12s/it]

99429 episode score is 820.70


 10%|█         | 10186/100000 [3:22:34<27:43:07,  1.11s/it]

99438 episode score is 801.71


 10%|█         | 10187/100000 [3:22:35<27:59:20,  1.12s/it]

99448 episode score is 752.17


 10%|█         | 10188/100000 [3:22:36<27:39:03,  1.11s/it]

99457 episode score is 798.58


 10%|█         | 10189/100000 [3:22:37<27:29:31,  1.10s/it]

99466 episode score is 800.13


 10%|█         | 10190/100000 [3:22:38<27:31:13,  1.10s/it]

99475 episode score is 821.91


 10%|█         | 10191/100000 [3:22:39<27:30:30,  1.10s/it]

99484 episode score is 809.71


 10%|█         | 10192/100000 [3:22:40<27:50:08,  1.12s/it]

99494 episode score is 739.53


 10%|█         | 10193/100000 [3:22:41<28:26:53,  1.14s/it]

99504 episode score is 777.17


 10%|█         | 10194/100000 [3:22:43<28:03:26,  1.12s/it]

99514 episode score is 685.24


 10%|█         | 10195/100000 [3:22:44<27:42:00,  1.11s/it]

99523 episode score is 787.36


 10%|█         | 10196/100000 [3:22:45<27:51:23,  1.12s/it]

99533 episode score is 734.80


 10%|█         | 10197/100000 [3:22:46<28:32:43,  1.14s/it]

99543 episode score is 798.70


 10%|█         | 10198/100000 [3:22:47<28:51:16,  1.16s/it]

99553 episode score is 773.07


 10%|█         | 10199/100000 [3:22:48<29:00:10,  1.16s/it]

99563 episode score is 760.83
99572 episode score is 790.77


 10%|█         | 10200/100000 [3:22:50<35:31:26,  1.42s/it]

Iteration 10200: Average test reward: 647.76


 10%|█         | 10201/100000 [3:22:51<33:10:18,  1.33s/it]

99582 episode score is 729.61


 10%|█         | 10202/100000 [3:22:53<31:49:55,  1.28s/it]

99592 episode score is 692.02


 10%|█         | 10203/100000 [3:22:54<30:27:31,  1.22s/it]

99602 episode score is 702.73


 10%|█         | 10204/100000 [3:22:55<29:41:41,  1.19s/it]

99612 episode score is 710.95


 10%|█         | 10205/100000 [3:22:56<29:41:53,  1.19s/it]

99622 episode score is 794.67


 10%|█         | 10206/100000 [3:22:57<29:03:35,  1.17s/it]

99632 episode score is 709.28


 10%|█         | 10207/100000 [3:22:58<28:45:51,  1.15s/it]

99639 episode score is 971.69


 10%|█         | 10208/100000 [3:22:59<28:37:46,  1.15s/it]

99649 episode score is 743.08


 10%|█         | 10209/100000 [3:23:01<28:27:05,  1.14s/it]

99659 episode score is 740.06


 10%|█         | 10210/100000 [3:23:02<28:29:15,  1.14s/it]

99669 episode score is 744.43


 10%|█         | 10211/100000 [3:23:03<28:02:18,  1.12s/it]

99678 episode score is 779.68


 10%|█         | 10212/100000 [3:23:04<28:21:57,  1.14s/it]

99687 episode score is 862.87


 10%|█         | 10213/100000 [3:23:05<28:10:54,  1.13s/it]

99697 episode score is 719.62


 10%|█         | 10214/100000 [3:23:06<28:39:36,  1.15s/it]

99707 episode score is 788.28


 10%|█         | 10215/100000 [3:23:07<28:04:17,  1.13s/it]

99716 episode score is 785.21


 10%|█         | 10216/100000 [3:23:08<28:27:09,  1.14s/it]

99726 episode score is 779.62


 10%|█         | 10217/100000 [3:23:10<28:19:39,  1.14s/it]

99735 episode score is 831.18


 10%|█         | 10218/100000 [3:23:11<28:22:52,  1.14s/it]

99745 episode score is 745.36


 10%|█         | 10219/100000 [3:23:12<28:44:32,  1.15s/it]

99754 episode score is 800.64


 10%|█         | 10220/100000 [3:23:13<28:48:28,  1.16s/it]

99764 episode score is 772.34


 10%|█         | 10221/100000 [3:23:14<28:12:21,  1.13s/it]

99773 episode score is 800.27


 10%|█         | 10222/100000 [3:23:15<27:50:13,  1.12s/it]

99782 episode score is 799.68


 10%|█         | 10223/100000 [3:23:16<28:00:23,  1.12s/it]

99792 episode score is 746.99


 10%|█         | 10224/100000 [3:23:18<28:06:25,  1.13s/it]

99802 episode score is 744.33


 10%|█         | 10225/100000 [3:23:19<28:07:55,  1.13s/it]

99812 episode score is 734.78


 10%|█         | 10226/100000 [3:23:20<28:26:56,  1.14s/it]

99822 episode score is 782.39


 10%|█         | 10227/100000 [3:23:21<28:06:04,  1.13s/it]

99831 episode score is 828.18


 10%|█         | 10228/100000 [3:23:22<28:25:20,  1.14s/it]

99841 episode score is 770.96


 10%|█         | 10229/100000 [3:23:23<28:07:15,  1.13s/it]

99850 episode score is 802.66


 10%|█         | 10230/100000 [3:23:24<27:46:20,  1.11s/it]

99859 episode score is 800.61


 10%|█         | 10231/100000 [3:23:25<28:05:40,  1.13s/it]

99869 episode score is 773.69


 10%|█         | 10232/100000 [3:23:27<27:49:00,  1.12s/it]

99878 episode score is 811.83


 10%|█         | 10233/100000 [3:23:28<27:46:30,  1.11s/it]

99887 episode score is 821.94


 10%|█         | 10234/100000 [3:23:29<27:43:42,  1.11s/it]

99896 episode score is 828.48


 10%|█         | 10235/100000 [3:23:30<27:38:45,  1.11s/it]

99905 episode score is 809.58


 10%|█         | 10236/100000 [3:23:31<27:22:24,  1.10s/it]

99914 episode score is 794.96


 10%|█         | 10237/100000 [3:23:32<27:17:19,  1.09s/it]

99923 episode score is 793.20


 10%|█         | 10238/100000 [3:23:33<27:48:46,  1.12s/it]

99933 episode score is 767.15


 10%|█         | 10239/100000 [3:23:34<28:24:10,  1.14s/it]

99943 episode score is 799.21


 10%|█         | 10240/100000 [3:23:35<27:54:12,  1.12s/it]

99952 episode score is 790.52


 10%|█         | 10241/100000 [3:23:37<27:46:46,  1.11s/it]

99962 episode score is 719.57


 10%|█         | 10242/100000 [3:23:38<27:47:06,  1.11s/it]

99972 episode score is 730.87


 10%|█         | 10243/100000 [3:23:39<27:42:58,  1.11s/it]

99982 episode score is 728.05


 10%|█         | 10244/100000 [3:23:40<28:02:43,  1.12s/it]

99992 episode score is 772.88


 10%|█         | 10245/100000 [3:23:41<28:26:55,  1.14s/it]

100002 episode score is 791.30


 10%|█         | 10246/100000 [3:23:42<28:38:29,  1.15s/it]

100012 episode score is 759.47


 10%|█         | 10247/100000 [3:23:43<28:31:41,  1.14s/it]

100022 episode score is 743.36


 10%|█         | 10248/100000 [3:23:45<28:25:53,  1.14s/it]

100032 episode score is 738.59


 10%|█         | 10249/100000 [3:23:46<28:40:28,  1.15s/it]

100042 episode score is 766.06


 10%|█         | 10250/100000 [3:23:47<28:45:34,  1.15s/it]

100052 episode score is 772.79


 10%|█         | 10251/100000 [3:23:48<28:52:27,  1.16s/it]

100062 episode score is 770.45


 10%|█         | 10252/100000 [3:23:49<29:00:21,  1.16s/it]

100072 episode score is 780.03


 10%|█         | 10253/100000 [3:23:50<28:47:47,  1.16s/it]

100082 episode score is 748.95


 10%|█         | 10254/100000 [3:23:51<28:24:22,  1.14s/it]

100092 episode score is 722.75


 10%|█         | 10255/100000 [3:23:53<28:00:52,  1.12s/it]

100102 episode score is 709.27


 10%|█         | 10256/100000 [3:23:54<28:04:58,  1.13s/it]

100113 episode score is 663.18


 10%|█         | 10257/100000 [3:23:55<28:24:03,  1.14s/it]

100124 episode score is 696.03


 10%|█         | 10258/100000 [3:23:56<28:16:56,  1.13s/it]

100134 episode score is 740.84


 10%|█         | 10259/100000 [3:23:57<27:59:58,  1.12s/it]

100145 episode score is 635.61


 10%|█         | 10260/100000 [3:23:58<27:50:32,  1.12s/it]

100155 episode score is 729.48


 10%|█         | 10261/100000 [3:23:59<28:07:49,  1.13s/it]

100166 episode score is 683.34


 10%|█         | 10262/100000 [3:24:00<28:25:59,  1.14s/it]

100177 episode score is 692.11


 10%|█         | 10263/100000 [3:24:02<28:25:16,  1.14s/it]

100187 episode score is 740.15


 10%|█         | 10264/100000 [3:24:03<27:57:55,  1.12s/it]

100197 episode score is 699.36


 10%|█         | 10265/100000 [3:24:04<27:55:59,  1.12s/it]

100207 episode score is 737.32


 10%|█         | 10266/100000 [3:24:05<28:00:55,  1.12s/it]

100217 episode score is 754.27


 10%|█         | 10267/100000 [3:24:06<28:26:03,  1.14s/it]

100228 episode score is 700.52


 10%|█         | 10268/100000 [3:24:07<28:19:55,  1.14s/it]

100238 episode score is 758.35


 10%|█         | 10269/100000 [3:24:08<28:22:30,  1.14s/it]

100248 episode score is 762.57


 10%|█         | 10270/100000 [3:24:10<28:25:43,  1.14s/it]

100258 episode score is 769.69


 10%|█         | 10271/100000 [3:24:11<28:07:14,  1.13s/it]

100268 episode score is 722.22


 10%|█         | 10272/100000 [3:24:12<27:48:26,  1.12s/it]

100278 episode score is 713.14


 10%|█         | 10273/100000 [3:24:13<28:06:57,  1.13s/it]

100288 episode score is 759.03


 10%|█         | 10274/100000 [3:24:14<28:10:04,  1.13s/it]

100298 episode score is 756.46


 10%|█         | 10275/100000 [3:24:15<27:41:47,  1.11s/it]

100307 episode score is 790.43


 10%|█         | 10276/100000 [3:24:16<27:31:45,  1.10s/it]

100316 episode score is 813.23


 10%|█         | 10277/100000 [3:24:17<27:28:22,  1.10s/it]

100325 episode score is 797.27


 10%|█         | 10278/100000 [3:24:18<27:27:04,  1.10s/it]

100334 episode score is 822.84


 10%|█         | 10279/100000 [3:24:19<27:34:49,  1.11s/it]

100344 episode score is 734.21


 10%|█         | 10280/100000 [3:24:21<27:36:34,  1.11s/it]

100353 episode score is 835.80


 10%|█         | 10281/100000 [3:24:22<28:09:26,  1.13s/it]

100363 episode score is 780.58


 10%|█         | 10282/100000 [3:24:23<28:04:21,  1.13s/it]

100372 episode score is 835.54


 10%|█         | 10283/100000 [3:24:24<27:42:08,  1.11s/it]

100381 episode score is 802.47


 10%|█         | 10284/100000 [3:24:25<28:07:54,  1.13s/it]

100391 episode score is 770.27


 10%|█         | 10285/100000 [3:24:26<27:56:07,  1.12s/it]

100400 episode score is 821.18


 10%|█         | 10286/100000 [3:24:27<27:52:24,  1.12s/it]

100409 episode score is 831.29


 10%|█         | 10287/100000 [3:24:28<27:33:44,  1.11s/it]

100418 episode score is 801.52


 10%|█         | 10288/100000 [3:24:30<27:36:55,  1.11s/it]

100427 episode score is 831.42


 10%|█         | 10289/100000 [3:24:31<27:59:54,  1.12s/it]

100437 episode score is 770.26


 10%|█         | 10290/100000 [3:24:32<28:02:11,  1.13s/it]

100446 episode score is 842.98


 10%|█         | 10291/100000 [3:24:33<28:57:04,  1.16s/it]

100455 episode score is 842.50


 10%|█         | 10292/100000 [3:24:34<28:20:30,  1.14s/it]

100464 episode score is 800.00


 10%|█         | 10293/100000 [3:24:35<28:31:10,  1.14s/it]

100474 episode score is 776.55


 10%|█         | 10294/100000 [3:24:36<27:59:27,  1.12s/it]

100483 episode score is 804.18


 10%|█         | 10295/100000 [3:24:38<28:18:48,  1.14s/it]

100493 episode score is 770.33


 10%|█         | 10296/100000 [3:24:39<28:07:05,  1.13s/it]

100503 episode score is 734.41


 10%|█         | 10297/100000 [3:24:40<28:24:06,  1.14s/it]

100513 episode score is 777.56


 10%|█         | 10298/100000 [3:24:41<28:23:14,  1.14s/it]

100523 episode score is 763.03


 10%|█         | 10299/100000 [3:24:42<28:23:58,  1.14s/it]

100533 episode score is 760.11
100543 episode score is 760.64


 10%|█         | 10300/100000 [3:24:44<35:01:11,  1.41s/it]

Iteration 10300: Average test reward: 635.43


 10%|█         | 10301/100000 [3:24:45<32:54:05,  1.32s/it]

100553 episode score is 735.44


 10%|█         | 10302/100000 [3:24:46<31:04:42,  1.25s/it]

100562 episode score is 795.50


 10%|█         | 10303/100000 [3:24:47<30:14:15,  1.21s/it]

100572 episode score is 738.98


 10%|█         | 10304/100000 [3:24:49<29:44:36,  1.19s/it]

100582 episode score is 765.84


 10%|█         | 10305/100000 [3:24:50<29:18:42,  1.18s/it]

100592 episode score is 746.33


 10%|█         | 10306/100000 [3:24:51<28:32:07,  1.15s/it]

100601 episode score is 811.35


 10%|█         | 10307/100000 [3:24:52<28:38:36,  1.15s/it]

100611 episode score is 767.67


 10%|█         | 10308/100000 [3:24:53<28:20:35,  1.14s/it]

100621 episode score is 719.92


 10%|█         | 10309/100000 [3:24:54<28:25:54,  1.14s/it]

100631 episode score is 767.11


 10%|█         | 10310/100000 [3:24:55<28:45:23,  1.15s/it]

100641 episode score is 783.83


 10%|█         | 10311/100000 [3:24:57<29:07:11,  1.17s/it]

100652 episode score is 708.68


 10%|█         | 10312/100000 [3:24:58<28:56:24,  1.16s/it]

100662 episode score is 756.47


 10%|█         | 10313/100000 [3:24:59<28:52:41,  1.16s/it]

100672 episode score is 773.84


 10%|█         | 10314/100000 [3:25:00<28:13:49,  1.13s/it]

100681 episode score is 796.69


 10%|█         | 10315/100000 [3:25:01<28:43:18,  1.15s/it]

100691 episode score is 794.86


 10%|█         | 10316/100000 [3:25:02<28:25:08,  1.14s/it]

100701 episode score is 716.43


 10%|█         | 10317/100000 [3:25:03<28:24:15,  1.14s/it]

100711 episode score is 745.18


 10%|█         | 10318/100000 [3:25:05<28:20:58,  1.14s/it]

100722 episode score is 663.29


 10%|█         | 10319/100000 [3:25:06<28:19:50,  1.14s/it]

100733 episode score is 651.66


 10%|█         | 10320/100000 [3:25:07<28:16:57,  1.14s/it]

100743 episode score is 752.85


 10%|█         | 10321/100000 [3:25:08<27:57:17,  1.12s/it]

100753 episode score is 724.64


 10%|█         | 10322/100000 [3:25:09<28:00:16,  1.12s/it]

100763 episode score is 755.16


 10%|█         | 10323/100000 [3:25:10<27:46:56,  1.12s/it]

100773 episode score is 720.93


 10%|█         | 10324/100000 [3:25:11<28:16:38,  1.14s/it]

100783 episode score is 789.51


 10%|█         | 10325/100000 [3:25:13<28:23:54,  1.14s/it]

100793 episode score is 765.83


 10%|█         | 10326/100000 [3:25:14<27:52:50,  1.12s/it]

100802 episode score is 798.20


 10%|█         | 10327/100000 [3:25:15<27:51:57,  1.12s/it]

100812 episode score is 740.66


 10%|█         | 10328/100000 [3:25:16<28:09:56,  1.13s/it]

100822 episode score is 775.95


 10%|█         | 10329/100000 [3:25:17<27:58:06,  1.12s/it]

100831 episode score is 815.02


 10%|█         | 10330/100000 [3:25:18<28:16:16,  1.14s/it]

100841 episode score is 774.64


 10%|█         | 10331/100000 [3:25:19<28:04:44,  1.13s/it]

100850 episode score is 830.91


 10%|█         | 10332/100000 [3:25:20<28:21:54,  1.14s/it]

100860 episode score is 786.99


 10%|█         | 10333/100000 [3:25:22<28:18:10,  1.14s/it]

100870 episode score is 754.02


 10%|█         | 10334/100000 [3:25:23<28:11:38,  1.13s/it]

100880 episode score is 722.70


 10%|█         | 10335/100000 [3:25:24<28:28:48,  1.14s/it]

100890 episode score is 779.97


 10%|█         | 10336/100000 [3:25:25<28:17:03,  1.14s/it]

100900 episode score is 740.64


 10%|█         | 10337/100000 [3:25:26<28:08:04,  1.13s/it]

100909 episode score is 839.44


 10%|█         | 10338/100000 [3:25:27<27:52:01,  1.12s/it]

100918 episode score is 817.66


 10%|█         | 10339/100000 [3:25:28<27:37:57,  1.11s/it]

100927 episode score is 807.83


 10%|█         | 10340/100000 [3:25:29<27:21:10,  1.10s/it]

100936 episode score is 788.93


 10%|█         | 10341/100000 [3:25:30<27:56:29,  1.12s/it]

100946 episode score is 793.57


 10%|█         | 10342/100000 [3:25:32<27:57:06,  1.12s/it]

100955 episode score is 846.35


 10%|█         | 10343/100000 [3:25:33<27:43:43,  1.11s/it]

100964 episode score is 798.00


 10%|█         | 10344/100000 [3:25:34<27:57:54,  1.12s/it]

100974 episode score is 763.43


 10%|█         | 10345/100000 [3:25:35<28:13:27,  1.13s/it]

100984 episode score is 775.73


 10%|█         | 10346/100000 [3:25:36<27:47:32,  1.12s/it]

100993 episode score is 804.09


 10%|█         | 10347/100000 [3:25:37<27:30:05,  1.10s/it]

101002 episode score is 811.48


 10%|█         | 10348/100000 [3:25:38<27:17:35,  1.10s/it]

101011 episode score is 807.32


 10%|█         | 10349/100000 [3:25:39<27:43:33,  1.11s/it]

101020 episode score is 809.69


 10%|█         | 10350/100000 [3:25:40<27:39:25,  1.11s/it]

101029 episode score is 832.51


 10%|█         | 10351/100000 [3:25:42<27:50:45,  1.12s/it]

101039 episode score is 747.89


 10%|█         | 10352/100000 [3:25:43<27:32:17,  1.11s/it]

101048 episode score is 794.47


 10%|█         | 10353/100000 [3:25:44<28:00:02,  1.12s/it]

101058 episode score is 765.45


 10%|█         | 10354/100000 [3:25:45<27:58:12,  1.12s/it]

101068 episode score is 743.42


 10%|█         | 10355/100000 [3:25:46<27:33:45,  1.11s/it]

101077 episode score is 786.55


 10%|█         | 10356/100000 [3:25:47<27:39:08,  1.11s/it]

101087 episode score is 731.03


 10%|█         | 10357/100000 [3:25:48<28:07:26,  1.13s/it]

101097 episode score is 778.54


 10%|█         | 10358/100000 [3:25:49<27:59:27,  1.12s/it]

101106 episode score is 814.52


 10%|█         | 10359/100000 [3:25:51<27:51:46,  1.12s/it]

101115 episode score is 834.08


 10%|█         | 10360/100000 [3:25:52<28:14:07,  1.13s/it]

101125 episode score is 789.39


 10%|█         | 10361/100000 [3:25:53<28:34:03,  1.15s/it]

101135 episode score is 785.22


 10%|█         | 10362/100000 [3:25:54<28:46:32,  1.16s/it]

101145 episode score is 783.40


 10%|█         | 10363/100000 [3:25:55<28:23:17,  1.14s/it]

101154 episode score is 816.57


 10%|█         | 10364/100000 [3:25:56<28:28:11,  1.14s/it]

101164 episode score is 770.75


 10%|█         | 10365/100000 [3:25:57<27:58:18,  1.12s/it]

101173 episode score is 801.04


 10%|█         | 10366/100000 [3:25:59<27:41:26,  1.11s/it]

101182 episode score is 819.04


 10%|█         | 10367/100000 [3:26:00<28:08:56,  1.13s/it]

101192 episode score is 770.57


 10%|█         | 10368/100000 [3:26:01<28:40:51,  1.15s/it]

101202 episode score is 792.08


 10%|█         | 10369/100000 [3:26:02<28:52:51,  1.16s/it]

101212 episode score is 794.81


 10%|█         | 10370/100000 [3:26:03<28:53:42,  1.16s/it]

101222 episode score is 768.81


 10%|█         | 10371/100000 [3:26:04<28:18:21,  1.14s/it]

101232 episode score is 709.22


 10%|█         | 10372/100000 [3:26:05<27:59:54,  1.12s/it]

101242 episode score is 709.93


 10%|█         | 10373/100000 [3:26:07<28:11:19,  1.13s/it]

101252 episode score is 776.30


 10%|█         | 10374/100000 [3:26:08<28:21:10,  1.14s/it]

101262 episode score is 767.45


 10%|█         | 10375/100000 [3:26:09<28:17:28,  1.14s/it]

101271 episode score is 850.74


 10%|█         | 10376/100000 [3:26:10<27:45:31,  1.12s/it]

101280 episode score is 797.25


 10%|█         | 10377/100000 [3:26:11<27:25:26,  1.10s/it]

101289 episode score is 795.86


 10%|█         | 10378/100000 [3:26:12<27:19:28,  1.10s/it]

101298 episode score is 811.07


 10%|█         | 10379/100000 [3:26:13<28:02:51,  1.13s/it]

101308 episode score is 798.24


 10%|█         | 10380/100000 [3:26:14<28:26:40,  1.14s/it]

101318 episode score is 795.44


 10%|█         | 10381/100000 [3:26:16<28:33:05,  1.15s/it]

101328 episode score is 776.23


 10%|█         | 10382/100000 [3:26:17<28:48:17,  1.16s/it]

101338 episode score is 769.24


 10%|█         | 10383/100000 [3:26:18<29:04:50,  1.17s/it]

101347 episode score is 888.89


 10%|█         | 10384/100000 [3:26:19<28:33:46,  1.15s/it]

101356 episode score is 804.77


 10%|█         | 10385/100000 [3:26:20<28:38:36,  1.15s/it]

101365 episode score is 872.79


 10%|█         | 10386/100000 [3:26:21<28:25:15,  1.14s/it]

101374 episode score is 831.66


 10%|█         | 10387/100000 [3:26:22<28:20:59,  1.14s/it]

101383 episode score is 825.65


 10%|█         | 10388/100000 [3:26:24<28:13:10,  1.13s/it]

101392 episode score is 833.75


 10%|█         | 10389/100000 [3:26:25<27:57:55,  1.12s/it]

101401 episode score is 813.19


 10%|█         | 10390/100000 [3:26:26<27:36:08,  1.11s/it]

101410 episode score is 794.67


 10%|█         | 10391/100000 [3:26:27<27:31:33,  1.11s/it]

101419 episode score is 816.77


 10%|█         | 10392/100000 [3:26:28<27:31:08,  1.11s/it]

101428 episode score is 823.86


 10%|█         | 10393/100000 [3:26:29<27:25:48,  1.10s/it]

101437 episode score is 818.10


 10%|█         | 10394/100000 [3:26:30<27:11:20,  1.09s/it]

101446 episode score is 794.30


 10%|█         | 10395/100000 [3:26:31<27:05:13,  1.09s/it]

101455 episode score is 805.63


 10%|█         | 10396/100000 [3:26:32<27:37:45,  1.11s/it]

101465 episode score is 747.73


 10%|█         | 10397/100000 [3:26:33<27:21:05,  1.10s/it]

101474 episode score is 790.56


 10%|█         | 10398/100000 [3:26:35<27:59:00,  1.12s/it]

101484 episode score is 772.56


 10%|█         | 10399/100000 [3:26:36<27:46:57,  1.12s/it]

101494 episode score is 716.45
101503 episode score is 791.58


 10%|█         | 10400/100000 [3:26:38<35:29:10,  1.43s/it]

Iteration 10400: Average test reward: 806.03


 10%|█         | 10401/100000 [3:26:39<33:08:06,  1.33s/it]

101512 episode score is 828.90


 10%|█         | 10402/100000 [3:26:40<31:48:22,  1.28s/it]

101522 episode score is 768.32


 10%|█         | 10403/100000 [3:26:41<30:51:45,  1.24s/it]

101532 episode score is 757.91


 10%|█         | 10404/100000 [3:26:42<30:19:56,  1.22s/it]

101542 episode score is 769.38


 10%|█         | 10405/100000 [3:26:44<29:27:19,  1.18s/it]

101551 episode score is 813.91


 10%|█         | 10406/100000 [3:26:45<28:40:47,  1.15s/it]

101560 episode score is 805.99


 10%|█         | 10407/100000 [3:26:46<28:47:19,  1.16s/it]

101569 episode score is 794.30


 10%|█         | 10408/100000 [3:26:47<28:39:34,  1.15s/it]

101579 episode score is 744.98


 10%|█         | 10409/100000 [3:26:48<28:06:32,  1.13s/it]

101588 episode score is 798.34


 10%|█         | 10410/100000 [3:26:49<27:51:31,  1.12s/it]

101597 episode score is 800.81


 10%|█         | 10411/100000 [3:26:50<27:40:25,  1.11s/it]

101606 episode score is 816.57


 10%|█         | 10412/100000 [3:26:51<27:56:10,  1.12s/it]

101616 episode score is 748.29


 10%|█         | 10413/100000 [3:26:53<28:08:00,  1.13s/it]

101626 episode score is 763.29


 10%|█         | 10414/100000 [3:26:54<28:07:08,  1.13s/it]

101636 episode score is 750.62


 10%|█         | 10415/100000 [3:26:55<28:13:23,  1.13s/it]

101646 episode score is 751.69


 10%|█         | 10416/100000 [3:26:56<28:34:31,  1.15s/it]

101657 episode score is 704.71


 10%|█         | 10417/100000 [3:26:57<28:24:46,  1.14s/it]

101667 episode score is 745.85


 10%|█         | 10418/100000 [3:26:58<28:12:04,  1.13s/it]

101677 episode score is 740.21


 10%|█         | 10419/100000 [3:26:59<28:19:12,  1.14s/it]

101687 episode score is 763.46


 10%|█         | 10420/100000 [3:27:00<28:14:34,  1.14s/it]

101697 episode score is 755.04


 10%|█         | 10421/100000 [3:27:02<27:44:48,  1.12s/it]

101706 episode score is 790.81


 10%|█         | 10422/100000 [3:27:03<28:20:41,  1.14s/it]

101716 episode score is 786.77


 10%|█         | 10423/100000 [3:27:04<28:41:12,  1.15s/it]

101726 episode score is 789.85


 10%|█         | 10424/100000 [3:27:05<28:33:42,  1.15s/it]

101736 episode score is 750.73


 10%|█         | 10425/100000 [3:27:06<28:28:49,  1.14s/it]

101746 episode score is 762.77


 10%|█         | 10426/100000 [3:27:07<28:43:23,  1.15s/it]

101756 episode score is 785.91


 10%|█         | 10427/100000 [3:27:09<28:24:46,  1.14s/it]

101766 episode score is 739.98


 10%|█         | 10428/100000 [3:27:10<28:31:57,  1.15s/it]

101776 episode score is 773.52


 10%|█         | 10429/100000 [3:27:11<28:39:48,  1.15s/it]

101786 episode score is 777.69


 10%|█         | 10430/100000 [3:27:12<28:52:30,  1.16s/it]

101796 episode score is 787.80


 10%|█         | 10431/100000 [3:27:13<28:52:37,  1.16s/it]

101806 episode score is 774.77


 10%|█         | 10432/100000 [3:27:14<28:45:21,  1.16s/it]

101816 episode score is 766.71


 10%|█         | 10433/100000 [3:27:15<28:45:19,  1.16s/it]

101826 episode score is 781.79


 10%|█         | 10434/100000 [3:27:17<28:33:27,  1.15s/it]

101836 episode score is 760.22


 10%|█         | 10435/100000 [3:27:18<28:56:26,  1.16s/it]

101847 episode score is 707.13


 10%|█         | 10436/100000 [3:27:19<28:42:56,  1.15s/it]

101857 episode score is 749.47


 10%|█         | 10437/100000 [3:27:20<28:19:02,  1.14s/it]

101867 episode score is 732.23


 10%|█         | 10438/100000 [3:27:21<28:13:12,  1.13s/it]

101877 episode score is 753.84


 10%|█         | 10439/100000 [3:27:22<28:21:34,  1.14s/it]

101887 episode score is 773.14


 10%|█         | 10440/100000 [3:27:23<28:26:21,  1.14s/it]

101897 episode score is 772.00


 10%|█         | 10441/100000 [3:27:25<28:37:14,  1.15s/it]

101907 episode score is 786.02


 10%|█         | 10442/100000 [3:27:26<28:38:45,  1.15s/it]

101917 episode score is 774.71


 10%|█         | 10443/100000 [3:27:27<28:27:47,  1.14s/it]

101927 episode score is 756.81


 10%|█         | 10444/100000 [3:27:28<28:19:35,  1.14s/it]

101937 episode score is 758.94


 10%|█         | 10445/100000 [3:27:29<28:19:46,  1.14s/it]

101947 episode score is 770.05


 10%|█         | 10446/100000 [3:27:30<28:18:11,  1.14s/it]

101957 episode score is 770.00


 10%|█         | 10447/100000 [3:27:31<28:01:25,  1.13s/it]

101967 episode score is 738.51


 10%|█         | 10448/100000 [3:27:33<28:01:32,  1.13s/it]

101977 episode score is 752.54


 10%|█         | 10449/100000 [3:27:34<27:59:53,  1.13s/it]

101987 episode score is 740.59


 10%|█         | 10450/100000 [3:27:35<27:40:14,  1.11s/it]

101997 episode score is 709.37


 10%|█         | 10451/100000 [3:27:36<27:29:26,  1.11s/it]

102007 episode score is 719.63


 10%|█         | 10452/100000 [3:27:37<27:20:57,  1.10s/it]

102017 episode score is 720.36


 10%|█         | 10453/100000 [3:27:38<27:29:27,  1.11s/it]

102028 episode score is 647.27


 10%|█         | 10454/100000 [3:27:39<27:54:49,  1.12s/it]

102039 episode score is 686.48


 10%|█         | 10455/100000 [3:27:40<28:02:13,  1.13s/it]

102050 episode score is 672.78


 10%|█         | 10456/100000 [3:27:42<28:21:48,  1.14s/it]

102061 episode score is 694.48


 10%|█         | 10457/100000 [3:27:43<28:08:56,  1.13s/it]

102071 episode score is 740.60


 10%|█         | 10458/100000 [3:27:44<28:11:37,  1.13s/it]

102081 episode score is 762.12


 10%|█         | 10459/100000 [3:27:45<28:03:47,  1.13s/it]

102091 episode score is 747.15


 10%|█         | 10460/100000 [3:27:46<28:00:43,  1.13s/it]

102101 episode score is 719.58


 10%|█         | 10461/100000 [3:27:47<27:50:41,  1.12s/it]

102111 episode score is 708.98


 10%|█         | 10462/100000 [3:27:48<27:49:18,  1.12s/it]

102121 episode score is 732.88


 10%|█         | 10463/100000 [3:27:49<28:04:48,  1.13s/it]

102131 episode score is 772.95


 10%|█         | 10464/100000 [3:27:51<28:19:59,  1.14s/it]

102141 episode score is 766.82


 10%|█         | 10465/100000 [3:27:52<28:26:45,  1.14s/it]

102151 episode score is 769.64


 10%|█         | 10466/100000 [3:27:53<28:44:07,  1.16s/it]

102161 episode score is 778.59


 10%|█         | 10467/100000 [3:27:54<28:51:12,  1.16s/it]

102171 episode score is 784.39


 10%|█         | 10468/100000 [3:27:55<28:44:46,  1.16s/it]

102181 episode score is 763.58


 10%|█         | 10469/100000 [3:27:56<28:43:00,  1.15s/it]

102191 episode score is 778.33


 10%|█         | 10470/100000 [3:27:57<28:25:02,  1.14s/it]

102201 episode score is 747.49


 10%|█         | 10471/100000 [3:27:59<28:07:08,  1.13s/it]

102211 episode score is 736.45


 10%|█         | 10472/100000 [3:28:00<28:14:31,  1.14s/it]

102222 episode score is 678.29


 10%|█         | 10473/100000 [3:28:01<27:48:10,  1.12s/it]

102232 episode score is 700.56


 10%|█         | 10474/100000 [3:28:02<27:46:09,  1.12s/it]

102242 episode score is 733.08


 10%|█         | 10475/100000 [3:28:03<27:55:00,  1.12s/it]

102252 episode score is 749.52


 10%|█         | 10476/100000 [3:28:04<28:04:52,  1.13s/it]

102262 episode score is 757.53


 10%|█         | 10477/100000 [3:28:05<27:40:57,  1.11s/it]

102272 episode score is 703.97


 10%|█         | 10478/100000 [3:28:06<27:42:57,  1.11s/it]

102282 episode score is 739.73


 10%|█         | 10479/100000 [3:28:07<27:49:31,  1.12s/it]

102292 episode score is 748.45


 10%|█         | 10480/100000 [3:28:09<27:51:45,  1.12s/it]

102302 episode score is 750.94


 10%|█         | 10481/100000 [3:28:10<27:51:10,  1.12s/it]

102312 episode score is 743.15


 10%|█         | 10482/100000 [3:28:11<27:46:37,  1.12s/it]

102322 episode score is 738.99


 10%|█         | 10483/100000 [3:28:12<28:00:45,  1.13s/it]

102332 episode score is 767.12


 10%|█         | 10484/100000 [3:28:13<28:07:46,  1.13s/it]

102343 episode score is 670.51


 10%|█         | 10485/100000 [3:28:14<28:30:38,  1.15s/it]

102354 episode score is 690.08


 10%|█         | 10486/100000 [3:28:16<28:58:11,  1.17s/it]

102364 episode score is 726.16


 10%|█         | 10487/100000 [3:28:17<28:49:11,  1.16s/it]

102374 episode score is 752.56


 10%|█         | 10488/100000 [3:28:18<28:41:04,  1.15s/it]

102384 episode score is 736.86


 10%|█         | 10489/100000 [3:28:19<28:31:53,  1.15s/it]

102394 episode score is 743.87


 10%|█         | 10490/100000 [3:28:20<28:37:36,  1.15s/it]

102404 episode score is 762.31


 10%|█         | 10491/100000 [3:28:21<28:38:24,  1.15s/it]

102414 episode score is 776.78


 10%|█         | 10492/100000 [3:28:22<28:35:04,  1.15s/it]

102424 episode score is 757.00


 10%|█         | 10493/100000 [3:28:24<28:46:50,  1.16s/it]

102434 episode score is 783.93


 10%|█         | 10494/100000 [3:28:25<28:32:06,  1.15s/it]

102444 episode score is 737.90


 10%|█         | 10495/100000 [3:28:26<28:30:45,  1.15s/it]

102454 episode score is 766.70


 10%|█         | 10496/100000 [3:28:27<28:32:53,  1.15s/it]

102464 episode score is 774.77


 10%|█         | 10497/100000 [3:28:28<28:38:20,  1.15s/it]

102474 episode score is 778.83


 10%|█         | 10498/100000 [3:28:29<28:32:16,  1.15s/it]

102484 episode score is 759.24


 10%|█         | 10499/100000 [3:28:30<27:55:03,  1.12s/it]

102493 episode score is 802.92
102503 episode score is 739.63


 10%|█         | 10500/100000 [3:28:33<35:40:09,  1.43s/it]

Iteration 10500: Average test reward: 752.59


 11%|█         | 10501/100000 [3:28:34<33:27:33,  1.35s/it]

102513 episode score is 756.45


 11%|█         | 10502/100000 [3:28:35<31:54:00,  1.28s/it]

102523 episode score is 762.39


 11%|█         | 10503/100000 [3:28:36<30:30:10,  1.23s/it]

102533 episode score is 726.45


 11%|█         | 10504/100000 [3:28:37<29:43:33,  1.20s/it]

102543 episode score is 748.81


 11%|█         | 10505/100000 [3:28:38<29:24:02,  1.18s/it]

102553 episode score is 776.13


 11%|█         | 10506/100000 [3:28:39<28:50:24,  1.16s/it]

102563 episode score is 733.73


 11%|█         | 10507/100000 [3:28:40<28:22:19,  1.14s/it]

102573 episode score is 727.02


 11%|█         | 10508/100000 [3:28:41<27:54:12,  1.12s/it]

102583 episode score is 712.06


 11%|█         | 10509/100000 [3:28:43<27:48:11,  1.12s/it]

102593 episode score is 734.30


 11%|█         | 10510/100000 [3:28:44<27:57:35,  1.12s/it]

102603 episode score is 756.84


 11%|█         | 10511/100000 [3:28:45<27:46:39,  1.12s/it]

102613 episode score is 716.27


 11%|█         | 10512/100000 [3:28:46<28:12:52,  1.14s/it]

102623 episode score is 784.61


 11%|█         | 10513/100000 [3:28:47<28:20:25,  1.14s/it]

102633 episode score is 758.11


 11%|█         | 10514/100000 [3:28:48<28:03:08,  1.13s/it]

102643 episode score is 716.91


 11%|█         | 10515/100000 [3:28:49<28:09:59,  1.13s/it]

102653 episode score is 746.56


 11%|█         | 10516/100000 [3:28:51<28:32:49,  1.15s/it]

102663 episode score is 785.67


 11%|█         | 10517/100000 [3:28:52<28:38:35,  1.15s/it]

102673 episode score is 772.80


 11%|█         | 10518/100000 [3:28:53<28:48:04,  1.16s/it]

102683 episode score is 780.19


 11%|█         | 10519/100000 [3:28:54<28:12:22,  1.13s/it]

102692 episode score is 802.75


 11%|█         | 10520/100000 [3:28:55<28:36:36,  1.15s/it]

102702 episode score is 785.96


 11%|█         | 10521/100000 [3:28:56<28:00:03,  1.13s/it]

102711 episode score is 795.45


 11%|█         | 10522/100000 [3:28:57<27:41:39,  1.11s/it]

102721 episode score is 708.80


 11%|█         | 10523/100000 [3:28:58<28:00:01,  1.13s/it]

102731 episode score is 766.53


 11%|█         | 10524/100000 [3:29:00<27:46:02,  1.12s/it]

102740 episode score is 812.96


 11%|█         | 10525/100000 [3:29:01<27:31:07,  1.11s/it]

102749 episode score is 812.15


 11%|█         | 10526/100000 [3:29:02<27:33:45,  1.11s/it]

102759 episode score is 730.47


 11%|█         | 10527/100000 [3:29:03<28:05:08,  1.13s/it]

102769 episode score is 779.90


 11%|█         | 10528/100000 [3:29:04<28:02:21,  1.13s/it]

102779 episode score is 745.96


 11%|█         | 10529/100000 [3:29:05<28:27:07,  1.14s/it]

102789 episode score is 796.53


 11%|█         | 10530/100000 [3:29:06<28:23:35,  1.14s/it]

102799 episode score is 766.87


 11%|█         | 10531/100000 [3:29:08<28:31:56,  1.15s/it]

102809 episode score is 782.80


 11%|█         | 10532/100000 [3:29:09<28:08:35,  1.13s/it]

102819 episode score is 730.61


 11%|█         | 10533/100000 [3:29:10<28:21:04,  1.14s/it]

102829 episode score is 775.79


 11%|█         | 10534/100000 [3:29:11<28:25:58,  1.14s/it]

102839 episode score is 767.32


 11%|█         | 10535/100000 [3:29:12<28:20:47,  1.14s/it]

102849 episode score is 748.87


 11%|█         | 10536/100000 [3:29:13<28:28:35,  1.15s/it]

102859 episode score is 774.95


 11%|█         | 10537/100000 [3:29:14<28:35:09,  1.15s/it]

102869 episode score is 768.85


 11%|█         | 10538/100000 [3:29:16<28:11:54,  1.13s/it]

102879 episode score is 714.33


 11%|█         | 10539/100000 [3:29:17<28:19:49,  1.14s/it]

102889 episode score is 771.72


 11%|█         | 10540/100000 [3:29:18<28:21:16,  1.14s/it]

102899 episode score is 749.87


 11%|█         | 10541/100000 [3:29:19<28:20:00,  1.14s/it]

102909 episode score is 760.88


 11%|█         | 10542/100000 [3:29:20<28:18:14,  1.14s/it]

102919 episode score is 757.17


 11%|█         | 10543/100000 [3:29:21<28:55:35,  1.16s/it]

102929 episode score is 747.42


 11%|█         | 10544/100000 [3:29:22<28:50:34,  1.16s/it]

102939 episode score is 763.40


 11%|█         | 10545/100000 [3:29:24<28:36:26,  1.15s/it]

102949 episode score is 755.65


 11%|█         | 10546/100000 [3:29:25<28:11:52,  1.13s/it]

102959 episode score is 723.12


 11%|█         | 10547/100000 [3:29:26<27:48:01,  1.12s/it]

102969 episode score is 714.59


 11%|█         | 10548/100000 [3:29:27<27:29:55,  1.11s/it]

102979 episode score is 710.40


 11%|█         | 10549/100000 [3:29:28<27:31:57,  1.11s/it]

102989 episode score is 745.24


 11%|█         | 10550/100000 [3:29:29<27:29:52,  1.11s/it]

102999 episode score is 735.54


 11%|█         | 10551/100000 [3:29:30<27:46:52,  1.12s/it]

103009 episode score is 764.37


 11%|█         | 10552/100000 [3:29:31<27:58:27,  1.13s/it]

103019 episode score is 767.05


 11%|█         | 10553/100000 [3:29:33<28:21:02,  1.14s/it]

103029 episode score is 774.24


 11%|█         | 10554/100000 [3:29:34<28:23:54,  1.14s/it]

103039 episode score is 773.03


 11%|█         | 10555/100000 [3:29:35<28:11:34,  1.13s/it]

103049 episode score is 736.27


 11%|█         | 10556/100000 [3:29:36<28:10:59,  1.13s/it]

103059 episode score is 766.74


 11%|█         | 10557/100000 [3:29:37<28:12:50,  1.14s/it]

103069 episode score is 766.88


 11%|█         | 10558/100000 [3:29:38<28:21:49,  1.14s/it]

103079 episode score is 777.16


 11%|█         | 10559/100000 [3:29:39<28:14:28,  1.14s/it]

103089 episode score is 759.72


 11%|█         | 10560/100000 [3:29:40<27:46:46,  1.12s/it]

103099 episode score is 712.60


 11%|█         | 10561/100000 [3:29:42<28:13:19,  1.14s/it]

103110 episode score is 704.15


 11%|█         | 10562/100000 [3:29:43<27:56:20,  1.12s/it]

103120 episode score is 728.67


 11%|█         | 10563/100000 [3:29:44<27:36:08,  1.11s/it]

103130 episode score is 712.93


 11%|█         | 10564/100000 [3:29:45<27:29:55,  1.11s/it]

103140 episode score is 726.14


 11%|█         | 10565/100000 [3:29:46<27:34:44,  1.11s/it]

103150 episode score is 743.44


 11%|█         | 10566/100000 [3:29:47<27:54:11,  1.12s/it]

103160 episode score is 766.39


 11%|█         | 10567/100000 [3:29:48<28:21:07,  1.14s/it]

103170 episode score is 787.11


 11%|█         | 10568/100000 [3:29:49<28:10:06,  1.13s/it]

103180 episode score is 737.06


 11%|█         | 10569/100000 [3:29:51<28:16:27,  1.14s/it]

103190 episode score is 776.10


 11%|█         | 10570/100000 [3:29:52<27:50:55,  1.12s/it]

103200 episode score is 721.97


 11%|█         | 10571/100000 [3:29:53<28:06:42,  1.13s/it]

103210 episode score is 784.65


 11%|█         | 10572/100000 [3:29:54<28:06:00,  1.13s/it]

103220 episode score is 769.04


 11%|█         | 10573/100000 [3:29:55<28:23:34,  1.14s/it]

103230 episode score is 778.28


 11%|█         | 10574/100000 [3:29:56<28:12:41,  1.14s/it]

103240 episode score is 758.26


 11%|█         | 10575/100000 [3:29:57<28:22:22,  1.14s/it]

103250 episode score is 775.07


 11%|█         | 10576/100000 [3:29:59<28:14:10,  1.14s/it]

103260 episode score is 754.85


 11%|█         | 10577/100000 [3:30:00<28:45:49,  1.16s/it]

103270 episode score is 794.41


 11%|█         | 10578/100000 [3:30:01<28:15:49,  1.14s/it]

103281 episode score is 639.63


 11%|█         | 10579/100000 [3:30:02<28:08:24,  1.13s/it]

103291 episode score is 752.45


 11%|█         | 10580/100000 [3:30:03<28:12:14,  1.14s/it]

103301 episode score is 770.04


 11%|█         | 10581/100000 [3:30:04<28:07:11,  1.13s/it]

103311 episode score is 757.30


 11%|█         | 10582/100000 [3:30:05<28:13:03,  1.14s/it]

103322 episode score is 681.01


 11%|█         | 10583/100000 [3:30:06<28:03:06,  1.13s/it]

103334 episode score is 578.06


 11%|█         | 10584/100000 [3:30:08<27:56:22,  1.12s/it]

103345 episode score is 647.35


 11%|█         | 10585/100000 [3:30:09<28:14:48,  1.14s/it]

103356 episode score is 690.18


 11%|█         | 10586/100000 [3:30:10<28:36:37,  1.15s/it]

103367 episode score is 705.40


 11%|█         | 10587/100000 [3:30:11<28:16:14,  1.14s/it]

103378 episode score is 638.55


 11%|█         | 10588/100000 [3:30:12<28:27:21,  1.15s/it]

103389 episode score is 690.18


 11%|█         | 10589/100000 [3:30:13<28:22:43,  1.14s/it]

103399 episode score is 751.11


 11%|█         | 10590/100000 [3:30:14<28:00:17,  1.13s/it]

103409 episode score is 721.02


 11%|█         | 10591/100000 [3:30:16<27:40:10,  1.11s/it]

103419 episode score is 718.57


 11%|█         | 10592/100000 [3:30:17<27:39:54,  1.11s/it]

103429 episode score is 745.05


 11%|█         | 10593/100000 [3:30:18<27:59:52,  1.13s/it]

103439 episode score is 765.23


 11%|█         | 10594/100000 [3:30:19<27:36:31,  1.11s/it]

103449 episode score is 701.47


 11%|█         | 10595/100000 [3:30:20<27:48:34,  1.12s/it]

103459 episode score is 759.46


 11%|█         | 10596/100000 [3:30:21<27:55:22,  1.12s/it]

103469 episode score is 754.31


 11%|█         | 10597/100000 [3:30:22<28:21:31,  1.14s/it]

103480 episode score is 694.14


 11%|█         | 10598/100000 [3:30:23<28:09:52,  1.13s/it]

103490 episode score is 741.85


 11%|█         | 10599/100000 [3:30:25<27:59:16,  1.13s/it]

103500 episode score is 728.92
103510 episode score is 751.14


 11%|█         | 10600/100000 [3:30:27<35:38:17,  1.44s/it]

Iteration 10600: Average test reward: 771.01


 11%|█         | 10601/100000 [3:30:28<34:11:43,  1.38s/it]

103520 episode score is 763.83


 11%|█         | 10602/100000 [3:30:29<32:08:53,  1.29s/it]

103530 episode score is 739.09


 11%|█         | 10603/100000 [3:30:30<30:31:32,  1.23s/it]

103540 episode score is 706.59


 11%|█         | 10604/100000 [3:30:31<29:49:52,  1.20s/it]

103550 episode score is 765.50


 11%|█         | 10605/100000 [3:30:32<29:13:03,  1.18s/it]

103560 episode score is 720.05


 11%|█         | 10606/100000 [3:30:34<28:57:19,  1.17s/it]

103570 episode score is 759.18


 11%|█         | 10607/100000 [3:30:35<28:56:23,  1.17s/it]

103580 episode score is 779.31


 11%|█         | 10608/100000 [3:30:36<28:39:57,  1.15s/it]

103590 episode score is 756.55


 11%|█         | 10609/100000 [3:30:37<28:47:14,  1.16s/it]

103600 episode score is 783.94


 11%|█         | 10610/100000 [3:30:38<28:49:09,  1.16s/it]

103610 episode score is 772.60


 11%|█         | 10611/100000 [3:30:39<28:21:10,  1.14s/it]

103620 episode score is 721.95


 11%|█         | 10612/100000 [3:30:40<28:34:00,  1.15s/it]

103630 episode score is 783.04


 11%|█         | 10613/100000 [3:30:42<28:51:55,  1.16s/it]

103641 episode score is 701.14


 11%|█         | 10614/100000 [3:30:43<28:58:40,  1.17s/it]

103652 episode score is 692.59


 11%|█         | 10615/100000 [3:30:44<28:58:52,  1.17s/it]

103663 episode score is 680.96


 11%|█         | 10616/100000 [3:30:45<28:31:36,  1.15s/it]

103673 episode score is 734.73


 11%|█         | 10617/100000 [3:30:46<28:20:45,  1.14s/it]

103683 episode score is 751.51


 11%|█         | 10618/100000 [3:30:47<28:22:29,  1.14s/it]

103693 episode score is 745.72


 11%|█         | 10619/100000 [3:30:48<28:20:52,  1.14s/it]

103703 episode score is 762.20


 11%|█         | 10620/100000 [3:30:50<27:55:56,  1.13s/it]

103713 episode score is 705.13


 11%|█         | 10621/100000 [3:30:51<27:43:18,  1.12s/it]

103723 episode score is 724.32


 11%|█         | 10622/100000 [3:30:52<28:02:45,  1.13s/it]

103733 episode score is 771.73


 11%|█         | 10623/100000 [3:30:53<28:02:16,  1.13s/it]

103743 episode score is 746.22


 11%|█         | 10624/100000 [3:30:54<27:52:43,  1.12s/it]

103753 episode score is 724.62


 11%|█         | 10625/100000 [3:30:55<27:56:02,  1.13s/it]

103763 episode score is 747.96


 11%|█         | 10626/100000 [3:30:56<27:56:44,  1.13s/it]

103773 episode score is 755.84


 11%|█         | 10627/100000 [3:30:57<27:50:32,  1.12s/it]

103783 episode score is 729.29


 11%|█         | 10628/100000 [3:30:59<28:19:40,  1.14s/it]

103794 episode score is 695.85


 11%|█         | 10629/100000 [3:31:00<27:53:00,  1.12s/it]

103804 episode score is 713.64


 11%|█         | 10630/100000 [3:31:01<27:54:52,  1.12s/it]

103814 episode score is 736.00


 11%|█         | 10631/100000 [3:31:02<28:07:05,  1.13s/it]

103824 episode score is 746.71


 11%|█         | 10632/100000 [3:31:03<28:22:48,  1.14s/it]

103835 episode score is 688.15


 11%|█         | 10633/100000 [3:31:04<28:02:19,  1.13s/it]

103845 episode score is 724.47


 11%|█         | 10634/100000 [3:31:05<27:41:32,  1.12s/it]

103855 episode score is 708.20


 11%|█         | 10635/100000 [3:31:06<27:27:07,  1.11s/it]

103865 episode score is 716.00


 11%|█         | 10636/100000 [3:31:08<27:44:45,  1.12s/it]

103875 episode score is 764.01


 11%|█         | 10637/100000 [3:31:09<27:46:46,  1.12s/it]

103885 episode score is 747.78


 11%|█         | 10638/100000 [3:31:10<27:51:21,  1.12s/it]

103895 episode score is 751.29


 11%|█         | 10639/100000 [3:31:11<27:33:39,  1.11s/it]

103905 episode score is 715.33


 11%|█         | 10640/100000 [3:31:12<27:32:06,  1.11s/it]

103915 episode score is 732.10


 11%|█         | 10641/100000 [3:31:13<27:48:53,  1.12s/it]

103926 episode score is 665.93


 11%|█         | 10642/100000 [3:31:14<27:42:36,  1.12s/it]

103937 episode score is 631.88


 11%|█         | 10643/100000 [3:31:15<28:07:15,  1.13s/it]

103947 episode score is 776.20


 11%|█         | 10644/100000 [3:31:17<27:59:18,  1.13s/it]

103957 episode score is 725.68


 11%|█         | 10645/100000 [3:31:18<28:18:25,  1.14s/it]

103967 episode score is 767.71


 11%|█         | 10646/100000 [3:31:19<28:14:44,  1.14s/it]

103977 episode score is 738.14


 11%|█         | 10647/100000 [3:31:20<28:36:38,  1.15s/it]

103987 episode score is 793.57


 11%|█         | 10648/100000 [3:31:21<28:41:01,  1.16s/it]

103997 episode score is 773.38


 11%|█         | 10649/100000 [3:31:22<28:23:11,  1.14s/it]

104007 episode score is 737.39


 11%|█         | 10650/100000 [3:31:23<27:56:51,  1.13s/it]

104017 episode score is 711.12


 11%|█         | 10651/100000 [3:31:25<28:05:49,  1.13s/it]

104027 episode score is 766.85


 11%|█         | 10652/100000 [3:31:26<28:29:54,  1.15s/it]

104037 episode score is 728.04


 11%|█         | 10653/100000 [3:31:27<28:04:10,  1.13s/it]

104047 episode score is 719.69


 11%|█         | 10654/100000 [3:31:28<28:20:51,  1.14s/it]

104057 episode score is 773.89


 11%|█         | 10655/100000 [3:31:29<28:20:24,  1.14s/it]

104067 episode score is 761.07


 11%|█         | 10656/100000 [3:31:30<28:21:56,  1.14s/it]

104077 episode score is 761.58


 11%|█         | 10657/100000 [3:31:31<28:24:43,  1.14s/it]

104087 episode score is 762.00


 11%|█         | 10658/100000 [3:31:33<28:27:46,  1.15s/it]

104097 episode score is 757.85


 11%|█         | 10659/100000 [3:31:34<28:35:37,  1.15s/it]

104107 episode score is 769.49


 11%|█         | 10660/100000 [3:31:35<28:43:43,  1.16s/it]

104117 episode score is 779.24


 11%|█         | 10661/100000 [3:31:36<28:51:34,  1.16s/it]

104127 episode score is 775.36


 11%|█         | 10662/100000 [3:31:37<28:54:17,  1.16s/it]

104137 episode score is 778.72


 11%|█         | 10663/100000 [3:31:38<28:52:56,  1.16s/it]

104147 episode score is 776.83


 11%|█         | 10664/100000 [3:31:40<28:47:43,  1.16s/it]

104157 episode score is 761.08


 11%|█         | 10665/100000 [3:31:41<28:58:57,  1.17s/it]

104167 episode score is 785.42


 11%|█         | 10666/100000 [3:31:42<29:06:03,  1.17s/it]

104177 episode score is 786.90


 11%|█         | 10667/100000 [3:31:43<29:05:32,  1.17s/it]

104187 episode score is 776.05


 11%|█         | 10668/100000 [3:31:44<29:11:33,  1.18s/it]

104197 episode score is 785.66


 11%|█         | 10669/100000 [3:31:45<29:10:09,  1.18s/it]

104207 episode score is 781.39


 11%|█         | 10670/100000 [3:31:47<29:14:09,  1.18s/it]

104217 episode score is 787.85


 11%|█         | 10671/100000 [3:31:48<28:47:47,  1.16s/it]

104227 episode score is 710.41


 11%|█         | 10672/100000 [3:31:49<28:39:07,  1.15s/it]

104237 episode score is 736.14


 11%|█         | 10673/100000 [3:31:50<28:51:03,  1.16s/it]

104247 episode score is 779.49


 11%|█         | 10674/100000 [3:31:51<29:02:17,  1.17s/it]

104257 episode score is 788.62


 11%|█         | 10675/100000 [3:31:52<29:03:50,  1.17s/it]

104267 episode score is 782.51


 11%|█         | 10676/100000 [3:31:54<28:30:23,  1.15s/it]

104276 episode score is 807.10


 11%|█         | 10677/100000 [3:31:55<28:37:37,  1.15s/it]

104286 episode score is 775.63


 11%|█         | 10678/100000 [3:31:56<28:45:22,  1.16s/it]

104296 episode score is 782.00


 11%|█         | 10679/100000 [3:31:57<28:53:58,  1.16s/it]

104306 episode score is 783.05


 11%|█         | 10680/100000 [3:31:58<28:25:47,  1.15s/it]

104315 episode score is 798.35


 11%|█         | 10681/100000 [3:31:59<28:28:07,  1.15s/it]

104325 episode score is 765.85


 11%|█         | 10682/100000 [3:32:00<28:36:10,  1.15s/it]

104335 episode score is 772.66


 11%|█         | 10683/100000 [3:32:02<28:34:47,  1.15s/it]

104345 episode score is 767.28


 11%|█         | 10684/100000 [3:32:03<28:09:53,  1.14s/it]

104354 episode score is 802.36


 11%|█         | 10685/100000 [3:32:04<28:32:47,  1.15s/it]

104364 episode score is 787.62


 11%|█         | 10686/100000 [3:32:05<28:18:33,  1.14s/it]

104374 episode score is 740.17


 11%|█         | 10687/100000 [3:32:06<28:29:18,  1.15s/it]

104384 episode score is 781.15


 11%|█         | 10688/100000 [3:32:07<28:18:17,  1.14s/it]

104394 episode score is 749.42


 11%|█         | 10689/100000 [3:32:08<28:24:12,  1.14s/it]

104404 episode score is 767.96


 11%|█         | 10690/100000 [3:32:10<28:21:32,  1.14s/it]

104414 episode score is 762.24


 11%|█         | 10691/100000 [3:32:11<28:03:41,  1.13s/it]

104425 episode score is 644.22


 11%|█         | 10692/100000 [3:32:12<28:14:15,  1.14s/it]

104435 episode score is 774.30


 11%|█         | 10693/100000 [3:32:13<28:30:30,  1.15s/it]

104445 episode score is 781.58


 11%|█         | 10694/100000 [3:32:14<28:35:23,  1.15s/it]

104455 episode score is 779.94


 11%|█         | 10695/100000 [3:32:15<28:34:41,  1.15s/it]

104465 episode score is 761.27


 11%|█         | 10696/100000 [3:32:16<28:20:54,  1.14s/it]

104475 episode score is 740.56


 11%|█         | 10697/100000 [3:32:18<28:15:59,  1.14s/it]

104485 episode score is 743.53


 11%|█         | 10698/100000 [3:32:19<28:08:09,  1.13s/it]

104496 episode score is 651.99


 11%|█         | 10699/100000 [3:32:20<27:53:18,  1.12s/it]

104506 episode score is 726.20
104516 episode score is 769.46


 11%|█         | 10700/100000 [3:32:22<35:38:30,  1.44s/it]

Iteration 10700: Average test reward: 774.40


 11%|█         | 10701/100000 [3:32:23<34:01:04,  1.37s/it]

104526 episode score is 744.13


 11%|█         | 10702/100000 [3:32:24<32:05:17,  1.29s/it]

104536 episode score is 736.10


 11%|█         | 10703/100000 [3:32:25<30:53:04,  1.25s/it]

104546 episode score is 759.63


 11%|█         | 10704/100000 [3:32:27<29:54:50,  1.21s/it]

104556 episode score is 738.58


 11%|█         | 10705/100000 [3:32:28<29:21:10,  1.18s/it]

104566 episode score is 758.60


 11%|█         | 10706/100000 [3:32:29<29:00:34,  1.17s/it]

104576 episode score is 760.65


 11%|█         | 10707/100000 [3:32:30<28:32:35,  1.15s/it]

104586 episode score is 736.50


 11%|█         | 10708/100000 [3:32:31<28:29:28,  1.15s/it]

104596 episode score is 761.18


 11%|█         | 10709/100000 [3:32:32<28:38:42,  1.15s/it]

104606 episode score is 780.66


 11%|█         | 10710/100000 [3:32:33<28:37:33,  1.15s/it]

104616 episode score is 775.48


 11%|█         | 10711/100000 [3:32:35<28:37:02,  1.15s/it]

104626 episode score is 772.98


 11%|█         | 10712/100000 [3:32:36<28:24:48,  1.15s/it]

104636 episode score is 749.75


 11%|█         | 10713/100000 [3:32:37<28:44:15,  1.16s/it]

104646 episode score is 788.28


 11%|█         | 10714/100000 [3:32:38<28:47:46,  1.16s/it]

104656 episode score is 790.41


 11%|█         | 10715/100000 [3:32:39<28:45:16,  1.16s/it]

104666 episode score is 773.90


 11%|█         | 10716/100000 [3:32:40<28:48:10,  1.16s/it]

104676 episode score is 782.92


 11%|█         | 10717/100000 [3:32:42<28:49:25,  1.16s/it]

104686 episode score is 785.84


 11%|█         | 10718/100000 [3:32:43<28:35:58,  1.15s/it]

104696 episode score is 769.08


 11%|█         | 10719/100000 [3:32:44<28:39:04,  1.16s/it]

104706 episode score is 790.13


 11%|█         | 10720/100000 [3:32:45<28:17:16,  1.14s/it]

104716 episode score is 743.78


 11%|█         | 10721/100000 [3:32:46<28:13:01,  1.14s/it]

104726 episode score is 759.60


 11%|█         | 10722/100000 [3:32:47<28:28:49,  1.15s/it]

104736 episode score is 782.17


 11%|█         | 10723/100000 [3:32:48<28:32:01,  1.15s/it]

104746 episode score is 781.01


 11%|█         | 10724/100000 [3:32:50<28:49:45,  1.16s/it]

104756 episode score is 788.58


 11%|█         | 10725/100000 [3:32:51<28:47:49,  1.16s/it]

104767 episode score is 673.53


 11%|█         | 10726/100000 [3:32:52<28:37:39,  1.15s/it]

104777 episode score is 762.71


 11%|█         | 10727/100000 [3:32:53<28:06:10,  1.13s/it]

104786 episode score is 810.10


 11%|█         | 10728/100000 [3:32:54<28:22:24,  1.14s/it]

104796 episode score is 789.32


 11%|█         | 10729/100000 [3:32:55<28:29:45,  1.15s/it]

104806 episode score is 776.71


 11%|█         | 10730/100000 [3:32:56<28:00:50,  1.13s/it]

104817 episode score is 620.66


 11%|█         | 10731/100000 [3:32:58<28:10:05,  1.14s/it]

104827 episode score is 778.04


 11%|█         | 10732/100000 [3:32:59<27:48:53,  1.12s/it]

104837 episode score is 719.94


 11%|█         | 10733/100000 [3:33:00<28:16:40,  1.14s/it]

104847 episode score is 799.51


 11%|█         | 10734/100000 [3:33:01<28:21:45,  1.14s/it]

104857 episode score is 770.21


 11%|█         | 10735/100000 [3:33:02<28:37:47,  1.15s/it]

104867 episode score is 787.89


 11%|█         | 10736/100000 [3:33:03<28:47:11,  1.16s/it]

104877 episode score is 800.61


 11%|█         | 10737/100000 [3:33:04<28:35:31,  1.15s/it]

104887 episode score is 769.91


 11%|█         | 10738/100000 [3:33:06<28:21:23,  1.14s/it]

104897 episode score is 761.28


 11%|█         | 10739/100000 [3:33:07<28:26:14,  1.15s/it]

104907 episode score is 789.38


 11%|█         | 10740/100000 [3:33:08<28:17:35,  1.14s/it]

104917 episode score is 757.01


 11%|█         | 10741/100000 [3:33:09<28:08:48,  1.14s/it]

104927 episode score is 752.42


 11%|█         | 10742/100000 [3:33:10<28:16:10,  1.14s/it]

104937 episode score is 777.92


 11%|█         | 10743/100000 [3:33:11<28:14:47,  1.14s/it]

104947 episode score is 775.92


 11%|█         | 10744/100000 [3:33:12<28:14:02,  1.14s/it]

104957 episode score is 768.46


 11%|█         | 10745/100000 [3:33:13<28:02:55,  1.13s/it]

104967 episode score is 746.28


 11%|█         | 10746/100000 [3:33:15<28:01:02,  1.13s/it]

104977 episode score is 761.58


 11%|█         | 10747/100000 [3:33:16<27:47:04,  1.12s/it]

104987 episode score is 739.55


 11%|█         | 10748/100000 [3:33:17<28:01:31,  1.13s/it]

104997 episode score is 783.53


 11%|█         | 10749/100000 [3:33:18<28:12:27,  1.14s/it]

105007 episode score is 775.63


 11%|█         | 10750/100000 [3:33:19<28:12:14,  1.14s/it]

105018 episode score is 663.17


 11%|█         | 10751/100000 [3:33:20<28:09:42,  1.14s/it]

105028 episode score is 757.34


 11%|█         | 10752/100000 [3:33:21<28:15:27,  1.14s/it]

105040 episode score is 605.58


 11%|█         | 10753/100000 [3:33:23<27:52:12,  1.12s/it]

105050 episode score is 728.38


 11%|█         | 10754/100000 [3:33:24<27:49:25,  1.12s/it]

105061 episode score is 659.91


 11%|█         | 10755/100000 [3:33:25<28:00:43,  1.13s/it]

105071 episode score is 774.46


 11%|█         | 10756/100000 [3:33:26<27:56:43,  1.13s/it]

105081 episode score is 752.15


 11%|█         | 10757/100000 [3:33:27<28:15:09,  1.14s/it]

105091 episode score is 784.33


 11%|█         | 10758/100000 [3:33:28<28:19:25,  1.14s/it]

105101 episode score is 774.84


 11%|█         | 10759/100000 [3:33:29<28:17:14,  1.14s/it]

105111 episode score is 750.17


 11%|█         | 10760/100000 [3:33:30<27:56:49,  1.13s/it]

105120 episode score is 818.61


 11%|█         | 10761/100000 [3:33:32<28:14:15,  1.14s/it]

105130 episode score is 789.61


 11%|█         | 10762/100000 [3:33:33<28:42:28,  1.16s/it]

105141 episode score is 692.16


 11%|█         | 10763/100000 [3:33:34<28:34:53,  1.15s/it]

105151 episode score is 753.31


 11%|█         | 10764/100000 [3:33:35<28:33:33,  1.15s/it]

105161 episode score is 767.47


 11%|█         | 10765/100000 [3:33:36<28:21:29,  1.14s/it]

105171 episode score is 736.82


 11%|█         | 10766/100000 [3:33:37<27:58:37,  1.13s/it]

105181 episode score is 711.65


 11%|█         | 10767/100000 [3:33:38<27:55:49,  1.13s/it]

105191 episode score is 747.52


 11%|█         | 10768/100000 [3:33:40<28:17:58,  1.14s/it]

105201 episode score is 788.88


 11%|█         | 10769/100000 [3:33:41<28:11:33,  1.14s/it]

105211 episode score is 740.11


 11%|█         | 10770/100000 [3:33:42<28:25:11,  1.15s/it]

105221 episode score is 785.48


 11%|█         | 10771/100000 [3:33:43<28:27:46,  1.15s/it]

105231 episode score is 772.21


 11%|█         | 10772/100000 [3:33:44<28:35:39,  1.15s/it]

105241 episode score is 766.67


 11%|█         | 10773/100000 [3:33:45<28:34:59,  1.15s/it]

105251 episode score is 765.77


 11%|█         | 10774/100000 [3:33:47<29:27:10,  1.19s/it]

105261 episode score is 789.74


 11%|█         | 10775/100000 [3:33:48<29:24:11,  1.19s/it]

105271 episode score is 775.41


 11%|█         | 10776/100000 [3:33:49<28:42:11,  1.16s/it]

105280 episode score is 807.51


 11%|█         | 10777/100000 [3:33:50<28:46:29,  1.16s/it]

105290 episode score is 785.87


 11%|█         | 10778/100000 [3:33:51<28:37:52,  1.16s/it]

105300 episode score is 752.81


 11%|█         | 10779/100000 [3:33:52<28:14:43,  1.14s/it]

105310 episode score is 718.42


 11%|█         | 10780/100000 [3:33:54<28:31:34,  1.15s/it]

105320 episode score is 792.55


 11%|█         | 10781/100000 [3:33:55<28:42:23,  1.16s/it]

105330 episode score is 788.98


 11%|█         | 10782/100000 [3:33:56<28:27:29,  1.15s/it]

105340 episode score is 748.85


 11%|█         | 10783/100000 [3:33:57<27:57:57,  1.13s/it]

105350 episode score is 700.65


 11%|█         | 10784/100000 [3:33:58<28:12:34,  1.14s/it]

105360 episode score is 776.75


 11%|█         | 10785/100000 [3:33:59<28:21:55,  1.14s/it]

105370 episode score is 774.14


 11%|█         | 10786/100000 [3:34:00<28:14:46,  1.14s/it]

105381 episode score is 636.73


 11%|█         | 10787/100000 [3:34:02<28:40:50,  1.16s/it]

105391 episode score is 796.85


 11%|█         | 10788/100000 [3:34:03<28:12:20,  1.14s/it]

105401 episode score is 685.64


 11%|█         | 10789/100000 [3:34:04<28:31:37,  1.15s/it]

105412 episode score is 695.87


 11%|█         | 10790/100000 [3:34:05<28:45:44,  1.16s/it]

105422 episode score is 793.46


 11%|█         | 10791/100000 [3:34:06<28:52:45,  1.17s/it]

105432 episode score is 776.12


 11%|█         | 10792/100000 [3:34:07<29:05:10,  1.17s/it]

105442 episode score is 790.96


 11%|█         | 10793/100000 [3:34:09<28:28:46,  1.15s/it]

105451 episode score is 807.14


 11%|█         | 10794/100000 [3:34:10<28:14:46,  1.14s/it]

105460 episode score is 839.91


 11%|█         | 10795/100000 [3:34:11<28:19:35,  1.14s/it]

105470 episode score is 766.78


 11%|█         | 10796/100000 [3:34:12<28:10:41,  1.14s/it]

105481 episode score is 633.13


 11%|█         | 10797/100000 [3:34:13<28:05:19,  1.13s/it]

105491 episode score is 733.03


 11%|█         | 10798/100000 [3:34:14<28:16:01,  1.14s/it]

105501 episode score is 779.56


 11%|█         | 10799/100000 [3:34:15<27:49:41,  1.12s/it]

105510 episode score is 815.11
105520 episode score is 717.39


 11%|█         | 10800/100000 [3:34:17<35:42:18,  1.44s/it]

Iteration 10800: Average test reward: 804.90


 11%|█         | 10801/100000 [3:34:19<33:40:57,  1.36s/it]

105530 episode score is 790.43


 11%|█         | 10802/100000 [3:34:20<32:02:42,  1.29s/it]

105540 episode score is 763.46


 11%|█         | 10803/100000 [3:34:21<31:20:35,  1.27s/it]

105550 episode score is 802.35


 11%|█         | 10804/100000 [3:34:22<30:20:03,  1.22s/it]

105560 episode score is 764.38


 11%|█         | 10805/100000 [3:34:23<29:37:47,  1.20s/it]

105570 episode score is 743.48


 11%|█         | 10806/100000 [3:34:24<28:52:49,  1.17s/it]

105579 episode score is 823.18


 11%|█         | 10807/100000 [3:34:25<29:02:47,  1.17s/it]

105589 episode score is 790.77


 11%|█         | 10808/100000 [3:34:27<28:53:51,  1.17s/it]

105599 episode score is 759.68


 11%|█         | 10809/100000 [3:34:28<28:37:46,  1.16s/it]

105609 episode score is 756.26


 11%|█         | 10810/100000 [3:34:29<28:41:35,  1.16s/it]

105620 episode score is 684.91


 11%|█         | 10811/100000 [3:34:30<28:38:03,  1.16s/it]

105630 episode score is 769.82


 11%|█         | 10812/100000 [3:34:31<28:44:32,  1.16s/it]

105640 episode score is 785.47


 11%|█         | 10813/100000 [3:34:32<28:27:23,  1.15s/it]

105650 episode score is 733.65


 11%|█         | 10814/100000 [3:34:34<28:40:53,  1.16s/it]

105660 episode score is 790.37


 11%|█         | 10815/100000 [3:34:35<28:48:22,  1.16s/it]

105670 episode score is 791.89


 11%|█         | 10816/100000 [3:34:36<28:42:39,  1.16s/it]

105680 episode score is 769.50


 11%|█         | 10817/100000 [3:34:37<28:31:35,  1.15s/it]

105690 episode score is 759.55


 11%|█         | 10818/100000 [3:34:38<28:16:12,  1.14s/it]

105700 episode score is 759.24


 11%|█         | 10819/100000 [3:34:39<27:50:55,  1.12s/it]

105710 episode score is 708.72


 11%|█         | 10820/100000 [3:34:40<27:55:14,  1.13s/it]

105720 episode score is 755.44


 11%|█         | 10821/100000 [3:34:41<27:47:27,  1.12s/it]

105730 episode score is 736.06


 11%|█         | 10822/100000 [3:34:43<27:49:38,  1.12s/it]

105740 episode score is 754.57


 11%|█         | 10823/100000 [3:34:44<27:39:59,  1.12s/it]

105750 episode score is 730.78


 11%|█         | 10824/100000 [3:34:45<27:51:32,  1.12s/it]

105760 episode score is 761.83


 11%|█         | 10825/100000 [3:34:46<27:37:07,  1.11s/it]

105769 episode score is 823.60


 11%|█         | 10826/100000 [3:34:47<27:58:06,  1.13s/it]

105779 episode score is 766.79


 11%|█         | 10827/100000 [3:34:48<28:14:15,  1.14s/it]

105789 episode score is 778.16


 11%|█         | 10828/100000 [3:34:49<28:39:20,  1.16s/it]

105799 episode score is 796.61


 11%|█         | 10829/100000 [3:34:51<28:45:43,  1.16s/it]

105809 episode score is 780.77


 11%|█         | 10830/100000 [3:34:52<28:52:23,  1.17s/it]

105819 episode score is 798.56


 11%|█         | 10831/100000 [3:34:53<28:20:19,  1.14s/it]

105828 episode score is 824.67


 11%|█         | 10832/100000 [3:34:54<29:12:51,  1.18s/it]

105838 episode score is 789.52


 11%|█         | 10833/100000 [3:34:55<28:44:33,  1.16s/it]

105847 episode score is 835.55


 11%|█         | 10834/100000 [3:34:56<28:14:41,  1.14s/it]

105856 episode score is 818.78


 11%|█         | 10835/100000 [3:34:57<27:59:00,  1.13s/it]

105865 episode score is 838.31


 11%|█         | 10836/100000 [3:34:59<28:26:49,  1.15s/it]

105875 episode score is 799.39


 11%|█         | 10837/100000 [3:35:00<27:59:23,  1.13s/it]

105884 episode score is 813.55


 11%|█         | 10838/100000 [3:35:01<28:24:28,  1.15s/it]

105893 episode score is 883.76


 11%|█         | 10839/100000 [3:35:02<28:22:22,  1.15s/it]

105902 episode score is 857.41


 11%|█         | 10840/100000 [3:35:03<28:07:15,  1.14s/it]

105911 episode score is 829.68


 11%|█         | 10841/100000 [3:35:04<28:10:27,  1.14s/it]

105920 episode score is 859.14


 11%|█         | 10842/100000 [3:35:05<28:20:21,  1.14s/it]

105930 episode score is 788.20


 11%|█         | 10843/100000 [3:35:07<28:21:16,  1.14s/it]

105939 episode score is 855.94


 11%|█         | 10844/100000 [3:35:08<28:27:21,  1.15s/it]

105948 episode score is 855.72


 11%|█         | 10845/100000 [3:35:09<28:04:46,  1.13s/it]

105957 episode score is 826.77


 11%|█         | 10846/100000 [3:35:10<28:21:54,  1.15s/it]

105967 episode score is 782.44


 11%|█         | 10847/100000 [3:35:11<28:33:40,  1.15s/it]

105977 episode score is 774.80


 11%|█         | 10848/100000 [3:35:12<28:10:16,  1.14s/it]

105986 episode score is 826.09


 11%|█         | 10849/100000 [3:35:13<27:59:49,  1.13s/it]

105995 episode score is 833.54


 11%|█         | 10850/100000 [3:35:15<27:37:44,  1.12s/it]

106004 episode score is 816.10


 11%|█         | 10851/100000 [3:35:16<27:37:13,  1.12s/it]

106013 episode score is 835.14


 11%|█         | 10852/100000 [3:35:17<27:41:54,  1.12s/it]

106022 episode score is 843.53


 11%|█         | 10853/100000 [3:35:18<27:49:55,  1.12s/it]

106031 episode score is 828.89


 11%|█         | 10854/100000 [3:35:19<28:17:20,  1.14s/it]

106041 episode score is 790.21


 11%|█         | 10855/100000 [3:35:20<27:54:10,  1.13s/it]

106050 episode score is 818.78


 11%|█         | 10856/100000 [3:35:21<27:51:48,  1.13s/it]

106059 episode score is 844.65


 11%|█         | 10857/100000 [3:35:22<27:41:06,  1.12s/it]

106068 episode score is 831.25


 11%|█         | 10858/100000 [3:35:24<28:12:52,  1.14s/it]

106078 episode score is 802.65


 11%|█         | 10859/100000 [3:35:25<28:02:02,  1.13s/it]

106087 episode score is 842.66


 11%|█         | 10860/100000 [3:35:26<28:06:28,  1.14s/it]

106096 episode score is 863.91


 11%|█         | 10861/100000 [3:35:27<27:58:15,  1.13s/it]

106105 episode score is 837.36


 11%|█         | 10862/100000 [3:35:28<27:59:48,  1.13s/it]

106114 episode score is 849.82


 11%|█         | 10863/100000 [3:35:29<27:56:29,  1.13s/it]

106123 episode score is 839.06


 11%|█         | 10864/100000 [3:35:30<27:45:54,  1.12s/it]

106132 episode score is 832.53


 11%|█         | 10865/100000 [3:35:31<27:33:55,  1.11s/it]

106141 episode score is 820.79


 11%|█         | 10866/100000 [3:35:33<27:25:12,  1.11s/it]

106150 episode score is 813.87


 11%|█         | 10867/100000 [3:35:34<27:14:46,  1.10s/it]

106159 episode score is 818.58


 11%|█         | 10868/100000 [3:35:35<27:05:18,  1.09s/it]

106168 episode score is 812.50


 11%|█         | 10869/100000 [3:35:36<27:32:21,  1.11s/it]

106178 episode score is 786.80


 11%|█         | 10870/100000 [3:35:37<28:02:08,  1.13s/it]

106188 episode score is 804.38


 11%|█         | 10871/100000 [3:35:38<27:43:26,  1.12s/it]

106198 episode score is 735.59


 11%|█         | 10872/100000 [3:35:39<27:59:36,  1.13s/it]

106208 episode score is 788.09


 11%|█         | 10873/100000 [3:35:40<28:04:46,  1.13s/it]

106218 episode score is 782.77


 11%|█         | 10874/100000 [3:35:42<27:45:32,  1.12s/it]

106228 episode score is 734.28


 11%|█         | 10875/100000 [3:35:43<28:04:39,  1.13s/it]

106238 episode score is 785.76


 11%|█         | 10876/100000 [3:35:44<27:50:19,  1.12s/it]

106248 episode score is 724.95


 11%|█         | 10877/100000 [3:35:45<28:13:45,  1.14s/it]

106258 episode score is 797.07


 11%|█         | 10878/100000 [3:35:46<27:55:26,  1.13s/it]

106268 episode score is 739.46


 11%|█         | 10879/100000 [3:35:47<28:07:09,  1.14s/it]

106278 episode score is 763.03


 11%|█         | 10880/100000 [3:35:48<28:09:42,  1.14s/it]

106288 episode score is 754.54


 11%|█         | 10881/100000 [3:35:49<28:10:39,  1.14s/it]

106298 episode score is 763.17


 11%|█         | 10882/100000 [3:35:51<27:52:05,  1.13s/it]

106308 episode score is 739.16


 11%|█         | 10883/100000 [3:35:52<28:04:19,  1.13s/it]

106318 episode score is 791.45


 11%|█         | 10884/100000 [3:35:53<28:14:06,  1.14s/it]

106328 episode score is 790.91


 11%|█         | 10885/100000 [3:35:54<28:09:43,  1.14s/it]

106338 episode score is 776.90


 11%|█         | 10886/100000 [3:35:55<28:21:38,  1.15s/it]

106348 episode score is 788.18


 11%|█         | 10887/100000 [3:35:56<28:58:26,  1.17s/it]

106358 episode score is 780.74


 11%|█         | 10888/100000 [3:35:58<28:31:01,  1.15s/it]

106368 episode score is 738.71


 11%|█         | 10889/100000 [3:35:59<28:41:10,  1.16s/it]

106378 episode score is 794.26


 11%|█         | 10890/100000 [3:36:00<28:53:16,  1.17s/it]

106388 episode score is 785.13


 11%|█         | 10891/100000 [3:36:01<28:39:23,  1.16s/it]

106398 episode score is 774.28


 11%|█         | 10892/100000 [3:36:02<28:16:53,  1.14s/it]

106408 episode score is 741.31


 11%|█         | 10893/100000 [3:36:03<28:17:40,  1.14s/it]

106418 episode score is 765.79


 11%|█         | 10894/100000 [3:36:04<28:20:47,  1.15s/it]

106428 episode score is 790.89


 11%|█         | 10895/100000 [3:36:06<28:35:14,  1.15s/it]

106438 episode score is 806.23


 11%|█         | 10896/100000 [3:36:07<28:25:23,  1.15s/it]

106448 episode score is 771.20


 11%|█         | 10897/100000 [3:36:08<28:41:57,  1.16s/it]

106458 episode score is 801.08


 11%|█         | 10898/100000 [3:36:09<28:40:59,  1.16s/it]

106468 episode score is 790.98


 11%|█         | 10899/100000 [3:36:10<28:48:21,  1.16s/it]

106478 episode score is 805.78
106488 episode score is 795.12


 11%|█         | 10900/100000 [3:36:12<36:37:27,  1.48s/it]

Iteration 10900: Average test reward: 794.66


 11%|█         | 10901/100000 [3:36:14<34:17:39,  1.39s/it]

106498 episode score is 792.00


 11%|█         | 10902/100000 [3:36:15<32:14:49,  1.30s/it]

106508 episode score is 737.82


 11%|█         | 10903/100000 [3:36:16<31:07:22,  1.26s/it]

106518 episode score is 790.15


 11%|█         | 10904/100000 [3:36:17<30:17:37,  1.22s/it]

106528 episode score is 777.19


 11%|█         | 10905/100000 [3:36:18<29:26:54,  1.19s/it]

106537 episode score is 825.70


 11%|█         | 10906/100000 [3:36:19<29:12:14,  1.18s/it]

106547 episode score is 778.82


 11%|█         | 10907/100000 [3:36:20<29:04:42,  1.17s/it]

106557 episode score is 778.00


 11%|█         | 10908/100000 [3:36:22<28:36:13,  1.16s/it]

106567 episode score is 751.66


 11%|█         | 10909/100000 [3:36:23<28:50:56,  1.17s/it]

106577 episode score is 802.38


 11%|█         | 10910/100000 [3:36:24<28:58:18,  1.17s/it]

106587 episode score is 802.05


 11%|█         | 10911/100000 [3:36:25<28:50:19,  1.17s/it]

106597 episode score is 784.26


 11%|█         | 10912/100000 [3:36:26<28:40:11,  1.16s/it]

106607 episode score is 770.25


 11%|█         | 10913/100000 [3:36:27<28:08:26,  1.14s/it]

106616 episode score is 820.62


 11%|█         | 10914/100000 [3:36:29<28:30:33,  1.15s/it]

106626 episode score is 800.32


 11%|█         | 10915/100000 [3:36:30<28:23:09,  1.15s/it]

106636 episode score is 759.21


 11%|█         | 10916/100000 [3:36:31<28:25:52,  1.15s/it]

106646 episode score is 779.87


 11%|█         | 10917/100000 [3:36:32<28:31:55,  1.15s/it]

106656 episode score is 773.33


 11%|█         | 10918/100000 [3:36:33<28:18:28,  1.14s/it]

106667 episode score is 648.84


 11%|█         | 10919/100000 [3:36:34<28:08:37,  1.14s/it]

106677 episode score is 745.51


 11%|█         | 10920/100000 [3:36:35<28:33:11,  1.15s/it]

106688 episode score is 705.57


 11%|█         | 10921/100000 [3:36:36<28:02:31,  1.13s/it]

106698 episode score is 713.01


 11%|█         | 10922/100000 [3:36:38<27:51:01,  1.13s/it]

106708 episode score is 732.94


 11%|█         | 10923/100000 [3:36:39<28:02:49,  1.13s/it]

106718 episode score is 779.57


 11%|█         | 10924/100000 [3:36:40<27:43:59,  1.12s/it]

106728 episode score is 703.33


 11%|█         | 10925/100000 [3:36:41<27:43:25,  1.12s/it]

106738 episode score is 745.85


 11%|█         | 10926/100000 [3:36:42<27:45:45,  1.12s/it]

106748 episode score is 746.46


 11%|█         | 10927/100000 [3:36:43<27:53:27,  1.13s/it]

106758 episode score is 767.18


 11%|█         | 10928/100000 [3:36:44<28:05:32,  1.14s/it]

106769 episode score is 675.12


 11%|█         | 10929/100000 [3:36:46<28:08:29,  1.14s/it]

106779 episode score is 770.40


 11%|█         | 10930/100000 [3:36:47<27:56:19,  1.13s/it]

106789 episode score is 733.44


 11%|█         | 10931/100000 [3:36:48<28:00:45,  1.13s/it]

106799 episode score is 744.39


 11%|█         | 10932/100000 [3:36:49<27:43:49,  1.12s/it]

106809 episode score is 712.51


 11%|█         | 10933/100000 [3:36:50<27:46:07,  1.12s/it]

106819 episode score is 752.42


 11%|█         | 10934/100000 [3:36:51<27:51:57,  1.13s/it]

106829 episode score is 762.04


 11%|█         | 10935/100000 [3:36:52<27:58:26,  1.13s/it]

106839 episode score is 763.18


 11%|█         | 10936/100000 [3:36:53<27:54:15,  1.13s/it]

106849 episode score is 751.87


 11%|█         | 10937/100000 [3:36:55<28:08:45,  1.14s/it]

106860 episode score is 695.28


 11%|█         | 10938/100000 [3:36:56<29:03:39,  1.17s/it]

106870 episode score is 771.80


 11%|█         | 10939/100000 [3:36:57<28:52:27,  1.17s/it]

106880 episode score is 765.02


 11%|█         | 10940/100000 [3:36:58<28:42:23,  1.16s/it]

106890 episode score is 766.12


 11%|█         | 10941/100000 [3:36:59<28:13:36,  1.14s/it]

106900 episode score is 725.49


 11%|█         | 10942/100000 [3:37:00<28:31:40,  1.15s/it]

106910 episode score is 798.17


 11%|█         | 10943/100000 [3:37:02<28:44:32,  1.16s/it]

106920 episode score is 795.83


 11%|█         | 10944/100000 [3:37:03<28:59:32,  1.17s/it]

106931 episode score is 703.06


 11%|█         | 10945/100000 [3:37:04<29:00:54,  1.17s/it]

106941 episode score is 798.77


 11%|█         | 10946/100000 [3:37:05<28:26:24,  1.15s/it]

106950 episode score is 825.13


 11%|█         | 10947/100000 [3:37:06<28:37:28,  1.16s/it]

106960 episode score is 797.36


 11%|█         | 10948/100000 [3:37:07<28:39:01,  1.16s/it]

106970 episode score is 786.20


 11%|█         | 10949/100000 [3:37:09<28:49:17,  1.17s/it]

106980 episode score is 793.67


 11%|█         | 10950/100000 [3:37:10<28:48:59,  1.16s/it]

106990 episode score is 768.66


 11%|█         | 10951/100000 [3:37:11<28:18:18,  1.14s/it]

107000 episode score is 728.07


 11%|█         | 10952/100000 [3:37:12<28:39:15,  1.16s/it]

107011 episode score is 717.23


 11%|█         | 10953/100000 [3:37:13<28:17:38,  1.14s/it]

107021 episode score is 718.69


 11%|█         | 10954/100000 [3:37:14<28:16:00,  1.14s/it]

107031 episode score is 759.33


 11%|█         | 10955/100000 [3:37:15<28:17:42,  1.14s/it]

107041 episode score is 757.12


 11%|█         | 10956/100000 [3:37:17<28:12:16,  1.14s/it]

107051 episode score is 760.03


 11%|█         | 10957/100000 [3:37:18<28:02:26,  1.13s/it]

107061 episode score is 746.23


 11%|█         | 10958/100000 [3:37:19<27:43:47,  1.12s/it]

107071 episode score is 725.26


 11%|█         | 10959/100000 [3:37:20<27:38:02,  1.12s/it]

107081 episode score is 732.12


 11%|█         | 10960/100000 [3:37:21<27:27:05,  1.11s/it]

107091 episode score is 734.95


 11%|█         | 10961/100000 [3:37:22<27:40:37,  1.12s/it]

107102 episode score is 677.74


 11%|█         | 10962/100000 [3:37:23<28:04:39,  1.14s/it]

107113 episode score is 713.45


 11%|█         | 10963/100000 [3:37:24<27:50:39,  1.13s/it]

107123 episode score is 751.35


 11%|█         | 10964/100000 [3:37:25<27:37:27,  1.12s/it]

107133 episode score is 747.44


 11%|█         | 10965/100000 [3:37:27<27:20:30,  1.11s/it]

107143 episode score is 729.06


 11%|█         | 10966/100000 [3:37:28<27:38:22,  1.12s/it]

107154 episode score is 696.32


 11%|█         | 10967/100000 [3:37:29<27:57:07,  1.13s/it]

107165 episode score is 705.47


 11%|█         | 10968/100000 [3:37:30<27:30:43,  1.11s/it]

107175 episode score is 723.84


 11%|█         | 10969/100000 [3:37:31<27:20:53,  1.11s/it]

107185 episode score is 722.08


 11%|█         | 10970/100000 [3:37:32<27:49:24,  1.13s/it]

107196 episode score is 699.71


 11%|█         | 10971/100000 [3:37:33<27:56:41,  1.13s/it]

107207 episode score is 680.79


 11%|█         | 10972/100000 [3:37:34<28:19:40,  1.15s/it]

107218 episode score is 713.91


 11%|█         | 10973/100000 [3:37:36<28:23:50,  1.15s/it]

107229 episode score is 697.84


 11%|█         | 10974/100000 [3:37:37<28:23:47,  1.15s/it]

107240 episode score is 686.47


 11%|█         | 10975/100000 [3:37:38<28:18:36,  1.14s/it]

107251 episode score is 685.75


 11%|█         | 10976/100000 [3:37:39<28:21:58,  1.15s/it]

107262 episode score is 692.29


 11%|█         | 10977/100000 [3:37:40<28:22:56,  1.15s/it]

107273 episode score is 693.11


 11%|█         | 10978/100000 [3:37:41<28:19:51,  1.15s/it]

107284 episode score is 681.81


 11%|█         | 10979/100000 [3:37:42<28:06:58,  1.14s/it]

107295 episode score is 652.35


 11%|█         | 10980/100000 [3:37:44<27:53:52,  1.13s/it]

107305 episode score is 750.42


 11%|█         | 10981/100000 [3:37:45<27:52:31,  1.13s/it]

107316 episode score is 663.82


 11%|█         | 10982/100000 [3:37:46<28:18:08,  1.14s/it]

107327 episode score is 722.46


 11%|█         | 10983/100000 [3:37:47<28:00:40,  1.13s/it]

107337 episode score is 726.55


 11%|█         | 10984/100000 [3:37:48<27:50:58,  1.13s/it]

107347 episode score is 740.03


 11%|█         | 10985/100000 [3:37:49<28:14:06,  1.14s/it]

107358 episode score is 704.23


 11%|█         | 10986/100000 [3:37:50<27:50:42,  1.13s/it]

107368 episode score is 729.93


 11%|█         | 10987/100000 [3:37:52<27:41:10,  1.12s/it]

107378 episode score is 731.95


 11%|█         | 10988/100000 [3:37:53<28:05:30,  1.14s/it]

107389 episode score is 716.66


 11%|█         | 10989/100000 [3:37:54<27:43:37,  1.12s/it]

107399 episode score is 727.41


 11%|█         | 10990/100000 [3:37:55<28:14:27,  1.14s/it]

107410 episode score is 722.78


 11%|█         | 10991/100000 [3:37:56<27:50:49,  1.13s/it]

107420 episode score is 731.90


 11%|█         | 10992/100000 [3:37:57<27:53:01,  1.13s/it]

107430 episode score is 766.35


 11%|█         | 10993/100000 [3:37:58<27:41:41,  1.12s/it]

107440 episode score is 747.12


 11%|█         | 10994/100000 [3:37:59<27:41:06,  1.12s/it]

107450 episode score is 763.65


 11%|█         | 10995/100000 [3:38:01<27:35:58,  1.12s/it]

107460 episode score is 755.48


 11%|█         | 10996/100000 [3:38:02<28:04:40,  1.14s/it]

107471 episode score is 723.15


 11%|█         | 10997/100000 [3:38:03<27:50:28,  1.13s/it]

107481 episode score is 722.34


 11%|█         | 10998/100000 [3:38:04<27:31:58,  1.11s/it]

107491 episode score is 730.20


 11%|█         | 10999/100000 [3:38:05<27:18:03,  1.10s/it]

107501 episode score is 730.87
107512 episode score is 718.84


 11%|█         | 11000/100000 [3:38:07<34:55:24,  1.41s/it]

Iteration 11000: Average test reward: 696.89


 11%|█         | 11001/100000 [3:38:08<33:03:43,  1.34s/it]

107523 episode score is 698.66


 11%|█         | 11002/100000 [3:38:09<31:57:13,  1.29s/it]

107533 episode score is 732.49


 11%|█         | 11003/100000 [3:38:11<30:30:34,  1.23s/it]

107543 episode score is 741.92


 11%|█         | 11004/100000 [3:38:12<30:08:25,  1.22s/it]

107554 episode score is 722.00


 11%|█         | 11005/100000 [3:38:13<29:32:22,  1.19s/it]

107564 episode score is 758.56


 11%|█         | 11006/100000 [3:38:14<29:21:42,  1.19s/it]

107575 episode score is 705.58


 11%|█         | 11007/100000 [3:38:15<28:57:03,  1.17s/it]

107585 episode score is 770.72


 11%|█         | 11008/100000 [3:38:16<28:34:23,  1.16s/it]

107595 episode score is 757.45


 11%|█         | 11009/100000 [3:38:17<28:29:14,  1.15s/it]

107605 episode score is 759.68


 11%|█         | 11010/100000 [3:38:19<28:12:39,  1.14s/it]

107615 episode score is 740.73


 11%|█         | 11011/100000 [3:38:20<28:09:55,  1.14s/it]

107625 episode score is 743.32


 11%|█         | 11012/100000 [3:38:21<27:52:13,  1.13s/it]

107635 episode score is 733.47


 11%|█         | 11013/100000 [3:38:22<27:48:31,  1.13s/it]

107645 episode score is 763.69


 11%|█         | 11014/100000 [3:38:23<27:44:24,  1.12s/it]

107655 episode score is 749.33


 11%|█         | 11015/100000 [3:38:24<27:58:42,  1.13s/it]

107665 episode score is 781.00


 11%|█         | 11016/100000 [3:38:25<27:54:02,  1.13s/it]

107675 episode score is 756.92


 11%|█         | 11017/100000 [3:38:26<27:57:37,  1.13s/it]

107685 episode score is 779.38


 11%|█         | 11018/100000 [3:38:28<27:53:06,  1.13s/it]

107695 episode score is 765.87


 11%|█         | 11019/100000 [3:38:29<27:44:50,  1.12s/it]

107706 episode score is 662.11


 11%|█         | 11020/100000 [3:38:30<27:29:51,  1.11s/it]

107716 episode score is 734.63


 11%|█         | 11021/100000 [3:38:31<27:42:51,  1.12s/it]

107727 episode score is 692.92


 11%|█         | 11022/100000 [3:38:32<27:43:59,  1.12s/it]

107737 episode score is 758.18


 11%|█         | 11023/100000 [3:38:33<27:40:13,  1.12s/it]

107747 episode score is 745.08


 11%|█         | 11024/100000 [3:38:34<27:25:30,  1.11s/it]

107757 episode score is 724.83


 11%|█         | 11025/100000 [3:38:35<27:37:03,  1.12s/it]

107767 episode score is 768.21


 11%|█         | 11026/100000 [3:38:36<27:50:17,  1.13s/it]

107777 episode score is 786.56


 11%|█         | 11027/100000 [3:38:38<28:11:14,  1.14s/it]

107787 episode score is 804.25


 11%|█         | 11028/100000 [3:38:39<28:20:35,  1.15s/it]

107797 episode score is 783.20


 11%|█         | 11029/100000 [3:38:40<28:01:40,  1.13s/it]

107807 episode score is 744.06


 11%|█         | 11030/100000 [3:38:41<28:04:59,  1.14s/it]

107817 episode score is 769.24


 11%|█         | 11031/100000 [3:38:42<28:07:49,  1.14s/it]

107827 episode score is 777.94


 11%|█         | 11032/100000 [3:38:43<28:04:40,  1.14s/it]

107837 episode score is 756.58


 11%|█         | 11033/100000 [3:38:45<28:30:44,  1.15s/it]

107847 episode score is 792.48


 11%|█         | 11034/100000 [3:38:46<28:21:55,  1.15s/it]

107857 episode score is 767.63


 11%|█         | 11035/100000 [3:38:47<28:16:59,  1.14s/it]

107867 episode score is 773.96


 11%|█         | 11036/100000 [3:38:48<28:26:21,  1.15s/it]

107877 episode score is 784.92


 11%|█         | 11037/100000 [3:38:49<28:45:50,  1.16s/it]

107888 episode score is 715.37


 11%|█         | 11038/100000 [3:38:50<28:17:46,  1.15s/it]

107898 episode score is 743.85


 11%|█         | 11039/100000 [3:38:51<28:03:40,  1.14s/it]

107908 episode score is 748.65


 11%|█         | 11040/100000 [3:38:53<28:07:19,  1.14s/it]

107918 episode score is 775.10


 11%|█         | 11041/100000 [3:38:54<27:52:01,  1.13s/it]

107928 episode score is 746.06


 11%|█         | 11042/100000 [3:38:55<28:07:05,  1.14s/it]

107938 episode score is 785.25


 11%|█         | 11043/100000 [3:38:56<27:51:44,  1.13s/it]

107948 episode score is 753.38


 11%|█         | 11044/100000 [3:38:57<27:56:56,  1.13s/it]

107958 episode score is 777.78


 11%|█         | 11045/100000 [3:38:58<28:19:05,  1.15s/it]

107968 episode score is 734.97


 11%|█         | 11046/100000 [3:38:59<28:00:55,  1.13s/it]

107978 episode score is 727.75


 11%|█         | 11047/100000 [3:39:00<27:43:14,  1.12s/it]

107988 episode score is 741.21


 11%|█         | 11048/100000 [3:39:02<27:34:00,  1.12s/it]

107998 episode score is 745.84


 11%|█         | 11049/100000 [3:39:03<27:35:51,  1.12s/it]

108008 episode score is 753.83


 11%|█         | 11050/100000 [3:39:04<28:03:18,  1.14s/it]

108019 episode score is 717.67


 11%|█         | 11051/100000 [3:39:05<28:18:43,  1.15s/it]

108030 episode score is 718.50


 11%|█         | 11052/100000 [3:39:06<28:00:09,  1.13s/it]

108040 episode score is 748.73


 11%|█         | 11053/100000 [3:39:07<28:18:55,  1.15s/it]

108051 episode score is 720.75


 11%|█         | 11054/100000 [3:39:08<28:28:04,  1.15s/it]

108062 episode score is 704.40


 11%|█         | 11055/100000 [3:39:10<28:05:46,  1.14s/it]

108072 episode score is 743.76


 11%|█         | 11056/100000 [3:39:11<27:55:28,  1.13s/it]

108082 episode score is 766.55


 11%|█         | 11057/100000 [3:39:12<27:57:46,  1.13s/it]

108092 episode score is 777.39


 11%|█         | 11058/100000 [3:39:13<27:43:50,  1.12s/it]

108102 episode score is 733.68


 11%|█         | 11059/100000 [3:39:14<27:40:06,  1.12s/it]

108112 episode score is 752.68


 11%|█         | 11060/100000 [3:39:15<27:36:19,  1.12s/it]

108122 episode score is 749.81


 11%|█         | 11061/100000 [3:39:16<27:31:01,  1.11s/it]

108132 episode score is 746.99


 11%|█         | 11062/100000 [3:39:17<27:46:55,  1.12s/it]

108142 episode score is 763.56


 11%|█         | 11063/100000 [3:39:19<28:15:42,  1.14s/it]

108153 episode score is 720.86


 11%|█         | 11064/100000 [3:39:20<27:56:12,  1.13s/it]

108163 episode score is 734.66


 11%|█         | 11065/100000 [3:39:21<27:47:35,  1.13s/it]

108173 episode score is 751.46


 11%|█         | 11066/100000 [3:39:22<27:33:17,  1.12s/it]

108183 episode score is 723.34


 11%|█         | 11067/100000 [3:39:23<27:32:54,  1.12s/it]

108193 episode score is 751.49


 11%|█         | 11068/100000 [3:39:24<27:54:23,  1.13s/it]

108204 episode score is 698.33


 11%|█         | 11069/100000 [3:39:25<27:39:25,  1.12s/it]

108214 episode score is 736.47


 11%|█         | 11070/100000 [3:39:26<27:30:59,  1.11s/it]

108224 episode score is 738.80


 11%|█         | 11071/100000 [3:39:27<27:27:29,  1.11s/it]

108234 episode score is 748.61


 11%|█         | 11072/100000 [3:39:29<27:30:09,  1.11s/it]

108244 episode score is 756.51


 11%|█         | 11073/100000 [3:39:30<27:36:40,  1.12s/it]

108254 episode score is 764.88


 11%|█         | 11074/100000 [3:39:31<27:32:36,  1.12s/it]

108264 episode score is 761.89


 11%|█         | 11075/100000 [3:39:32<27:20:50,  1.11s/it]

108274 episode score is 732.37


 11%|█         | 11076/100000 [3:39:33<27:26:59,  1.11s/it]

108284 episode score is 750.71


 11%|█         | 11077/100000 [3:39:34<27:19:09,  1.11s/it]

108294 episode score is 734.07


 11%|█         | 11078/100000 [3:39:35<27:18:02,  1.11s/it]

108304 episode score is 745.63


 11%|█         | 11079/100000 [3:39:36<27:26:19,  1.11s/it]

108314 episode score is 763.93


 11%|█         | 11080/100000 [3:39:37<27:34:51,  1.12s/it]

108324 episode score is 769.57


 11%|█         | 11081/100000 [3:39:39<27:34:58,  1.12s/it]

108334 episode score is 755.64


 11%|█         | 11082/100000 [3:39:40<27:47:40,  1.13s/it]

108344 episode score is 772.62


 11%|█         | 11083/100000 [3:39:41<27:34:28,  1.12s/it]

108354 episode score is 744.16


 11%|█         | 11084/100000 [3:39:42<27:39:55,  1.12s/it]

108364 episode score is 762.53


 11%|█         | 11085/100000 [3:39:43<27:52:44,  1.13s/it]

108374 episode score is 787.96


 11%|█         | 11086/100000 [3:39:44<27:46:38,  1.12s/it]

108384 episode score is 748.60


 11%|█         | 11087/100000 [3:39:45<27:52:09,  1.13s/it]

108394 episode score is 772.23


 11%|█         | 11088/100000 [3:39:46<28:02:56,  1.14s/it]

108404 episode score is 780.73


 11%|█         | 11089/100000 [3:39:48<27:59:47,  1.13s/it]

108414 episode score is 744.95


 11%|█         | 11090/100000 [3:39:49<27:52:25,  1.13s/it]

108424 episode score is 749.89


 11%|█         | 11091/100000 [3:39:50<27:50:22,  1.13s/it]

108434 episode score is 745.26


 11%|█         | 11092/100000 [3:39:51<28:02:32,  1.14s/it]

108444 episode score is 781.15


 11%|█         | 11093/100000 [3:39:52<28:11:46,  1.14s/it]

108454 episode score is 784.59


 11%|█         | 11094/100000 [3:39:53<28:05:04,  1.14s/it]

108464 episode score is 755.53


 11%|█         | 11095/100000 [3:39:54<28:12:50,  1.14s/it]

108474 episode score is 787.47


 11%|█         | 11096/100000 [3:39:56<28:19:36,  1.15s/it]

108484 episode score is 774.37


 11%|█         | 11097/100000 [3:39:57<28:17:32,  1.15s/it]

108494 episode score is 784.00


 11%|█         | 11098/100000 [3:39:58<27:55:31,  1.13s/it]

108504 episode score is 738.40


 11%|█         | 11099/100000 [3:39:59<27:55:33,  1.13s/it]

108514 episode score is 767.58
108524 episode score is 792.38


 11%|█         | 11100/100000 [3:40:01<35:55:16,  1.45s/it]

Iteration 11100: Average test reward: 781.01


 11%|█         | 11101/100000 [3:40:02<33:48:29,  1.37s/it]

108534 episode score is 792.19


 11%|█         | 11102/100000 [3:40:04<32:13:03,  1.30s/it]

108544 episode score is 777.61


 11%|█         | 11103/100000 [3:40:05<31:02:36,  1.26s/it]

108554 episode score is 780.89


 11%|█         | 11104/100000 [3:40:06<30:16:56,  1.23s/it]

108564 episode score is 787.52


 11%|█         | 11105/100000 [3:40:07<29:44:43,  1.20s/it]

108574 episode score is 774.18


 11%|█         | 11106/100000 [3:40:08<30:01:31,  1.22s/it]

108584 episode score is 790.83


 11%|█         | 11107/100000 [3:40:09<29:32:35,  1.20s/it]

108594 episode score is 768.95


 11%|█         | 11108/100000 [3:40:11<29:18:08,  1.19s/it]

108604 episode score is 797.10


 11%|█         | 11109/100000 [3:40:12<29:05:31,  1.18s/it]

108614 episode score is 794.39


 11%|█         | 11110/100000 [3:40:13<28:57:15,  1.17s/it]

108624 episode score is 794.70


 11%|█         | 11111/100000 [3:40:14<28:37:37,  1.16s/it]

108634 episode score is 773.73


 11%|█         | 11112/100000 [3:40:15<28:12:43,  1.14s/it]

108644 episode score is 746.96


 11%|█         | 11113/100000 [3:40:16<28:12:11,  1.14s/it]

108654 episode score is 777.26


 11%|█         | 11114/100000 [3:40:17<28:25:54,  1.15s/it]

108664 episode score is 777.52


 11%|█         | 11115/100000 [3:40:19<28:19:06,  1.15s/it]

108674 episode score is 773.50


 11%|█         | 11116/100000 [3:40:20<28:12:38,  1.14s/it]

108684 episode score is 752.19


 11%|█         | 11117/100000 [3:40:21<28:06:13,  1.14s/it]

108694 episode score is 767.34


 11%|█         | 11118/100000 [3:40:22<28:10:47,  1.14s/it]

108704 episode score is 777.67


 11%|█         | 11119/100000 [3:40:23<28:11:55,  1.14s/it]

108714 episode score is 781.18


 11%|█         | 11120/100000 [3:40:24<28:27:49,  1.15s/it]

108724 episode score is 788.19


 11%|█         | 11121/100000 [3:40:25<28:23:11,  1.15s/it]

108734 episode score is 778.83


 11%|█         | 11122/100000 [3:40:27<28:32:49,  1.16s/it]

108744 episode score is 812.28


 11%|█         | 11123/100000 [3:40:28<28:33:23,  1.16s/it]

108754 episode score is 797.63


 11%|█         | 11124/100000 [3:40:29<28:34:02,  1.16s/it]

108764 episode score is 801.26


 11%|█         | 11125/100000 [3:40:30<28:36:53,  1.16s/it]

108774 episode score is 798.71


 11%|█         | 11126/100000 [3:40:31<28:28:56,  1.15s/it]

108784 episode score is 790.17


 11%|█         | 11127/100000 [3:40:32<28:39:21,  1.16s/it]

108794 episode score is 806.80


 11%|█         | 11128/100000 [3:40:34<28:42:35,  1.16s/it]

108805 episode score is 709.84


 11%|█         | 11129/100000 [3:40:35<28:28:16,  1.15s/it]

108815 episode score is 779.30


 11%|█         | 11130/100000 [3:40:36<28:28:36,  1.15s/it]

108825 episode score is 789.42


 11%|█         | 11131/100000 [3:40:37<28:32:45,  1.16s/it]

108835 episode score is 795.42


 11%|█         | 11132/100000 [3:40:38<28:39:08,  1.16s/it]

108845 episode score is 805.56


 11%|█         | 11133/100000 [3:40:39<28:13:13,  1.14s/it]

108855 episode score is 739.68


 11%|█         | 11134/100000 [3:40:40<28:20:37,  1.15s/it]

108865 episode score is 789.74


 11%|█         | 11135/100000 [3:40:42<27:53:55,  1.13s/it]

108875 episode score is 740.27


 11%|█         | 11136/100000 [3:40:43<28:06:14,  1.14s/it]

108885 episode score is 782.94


 11%|█         | 11137/100000 [3:40:44<27:58:26,  1.13s/it]

108895 episode score is 772.09


 11%|█         | 11138/100000 [3:40:45<27:55:08,  1.13s/it]

108905 episode score is 761.08


 11%|█         | 11139/100000 [3:40:46<27:39:24,  1.12s/it]

108915 episode score is 745.41


 11%|█         | 11140/100000 [3:40:47<27:40:02,  1.12s/it]

108925 episode score is 752.79


 11%|█         | 11141/100000 [3:40:48<27:43:45,  1.12s/it]

108935 episode score is 772.81


 11%|█         | 11142/100000 [3:40:49<27:36:08,  1.12s/it]

108945 episode score is 744.58


 11%|█         | 11143/100000 [3:40:51<27:51:43,  1.13s/it]

108955 episode score is 788.78


 11%|█         | 11144/100000 [3:40:52<27:48:29,  1.13s/it]

108965 episode score is 768.15


 11%|█         | 11145/100000 [3:40:53<27:50:18,  1.13s/it]

108975 episode score is 752.28


 11%|█         | 11146/100000 [3:40:54<27:50:23,  1.13s/it]

108985 episode score is 765.84


 11%|█         | 11147/100000 [3:40:55<27:43:16,  1.12s/it]

108995 episode score is 756.48


 11%|█         | 11148/100000 [3:40:56<27:33:22,  1.12s/it]

109005 episode score is 751.04


 11%|█         | 11149/100000 [3:40:57<28:03:48,  1.14s/it]

109015 episode score is 797.56


 11%|█         | 11150/100000 [3:40:58<28:01:08,  1.14s/it]

109025 episode score is 775.77


 11%|█         | 11151/100000 [3:41:00<28:03:20,  1.14s/it]

109036 episode score is 701.97


 11%|█         | 11152/100000 [3:41:01<28:22:43,  1.15s/it]

109047 episode score is 732.60


 11%|█         | 11153/100000 [3:41:02<28:15:42,  1.15s/it]

109057 episode score is 777.50


 11%|█         | 11154/100000 [3:41:03<27:54:36,  1.13s/it]

109067 episode score is 738.30


 11%|█         | 11155/100000 [3:41:04<28:58:50,  1.17s/it]

109077 episode score is 806.95


 11%|█         | 11156/100000 [3:41:05<29:06:44,  1.18s/it]

109088 episode score is 717.85


 11%|█         | 11157/100000 [3:41:07<28:59:45,  1.17s/it]

109098 episode score is 797.86


 11%|█         | 11158/100000 [3:41:08<28:57:13,  1.17s/it]

109108 episode score is 786.04


 11%|█         | 11159/100000 [3:41:09<28:28:06,  1.15s/it]

109118 episode score is 734.01


 11%|█         | 11160/100000 [3:41:10<28:45:47,  1.17s/it]

109128 episode score is 798.65


 11%|█         | 11161/100000 [3:41:11<28:50:34,  1.17s/it]

109138 episode score is 795.19


 11%|█         | 11162/100000 [3:41:12<29:00:40,  1.18s/it]

109148 episode score is 808.91


 11%|█         | 11163/100000 [3:41:14<28:48:59,  1.17s/it]

109158 episode score is 783.58


 11%|█         | 11164/100000 [3:41:15<28:21:15,  1.15s/it]

109167 episode score is 819.63


 11%|█         | 11165/100000 [3:41:16<28:09:31,  1.14s/it]

109177 episode score is 752.55


 11%|█         | 11166/100000 [3:41:17<28:20:12,  1.15s/it]

109187 episode score is 776.44


 11%|█         | 11167/100000 [3:41:18<28:40:51,  1.16s/it]

109197 episode score is 807.17


 11%|█         | 11168/100000 [3:41:19<28:54:32,  1.17s/it]

109207 episode score is 808.05


 11%|█         | 11169/100000 [3:41:21<29:04:49,  1.18s/it]

109217 episode score is 805.14


 11%|█         | 11170/100000 [3:41:22<29:13:15,  1.18s/it]

109227 episode score is 815.68


 11%|█         | 11171/100000 [3:41:23<29:07:04,  1.18s/it]

109237 episode score is 794.31


 11%|█         | 11172/100000 [3:41:24<28:55:27,  1.17s/it]

109247 episode score is 787.93


 11%|█         | 11173/100000 [3:41:25<28:49:05,  1.17s/it]

109257 episode score is 782.72


 11%|█         | 11174/100000 [3:41:26<28:59:42,  1.18s/it]

109268 episode score is 710.18


 11%|█         | 11175/100000 [3:41:28<28:18:06,  1.15s/it]

109278 episode score is 714.01


 11%|█         | 11176/100000 [3:41:29<28:23:19,  1.15s/it]

109288 episode score is 794.92


 11%|█         | 11177/100000 [3:41:30<27:55:00,  1.13s/it]

109298 episode score is 731.58


 11%|█         | 11178/100000 [3:41:31<27:56:14,  1.13s/it]

109308 episode score is 750.26


 11%|█         | 11179/100000 [3:41:32<27:45:58,  1.13s/it]

109318 episode score is 728.98


 11%|█         | 11180/100000 [3:41:33<27:45:34,  1.13s/it]

109329 episode score is 662.43


 11%|█         | 11181/100000 [3:41:34<27:28:37,  1.11s/it]

109339 episode score is 720.05


 11%|█         | 11182/100000 [3:41:35<27:46:51,  1.13s/it]

109349 episode score is 786.25


 11%|█         | 11183/100000 [3:41:37<27:58:06,  1.13s/it]

109359 episode score is 769.99


 11%|█         | 11184/100000 [3:41:38<28:11:12,  1.14s/it]

109369 episode score is 793.32


 11%|█         | 11185/100000 [3:41:39<28:07:47,  1.14s/it]

109379 episode score is 766.80


 11%|█         | 11186/100000 [3:41:40<28:26:35,  1.15s/it]

109389 episode score is 803.38


 11%|█         | 11187/100000 [3:41:41<28:05:26,  1.14s/it]

109399 episode score is 736.32


 11%|█         | 11188/100000 [3:41:42<28:10:52,  1.14s/it]

109409 episode score is 782.33


 11%|█         | 11189/100000 [3:41:43<28:23:13,  1.15s/it]

109419 episode score is 798.57


 11%|█         | 11190/100000 [3:41:45<28:35:32,  1.16s/it]

109429 episode score is 806.06


 11%|█         | 11191/100000 [3:41:46<28:24:23,  1.15s/it]

109439 episode score is 768.13


 11%|█         | 11192/100000 [3:41:47<28:19:43,  1.15s/it]

109449 episode score is 758.90


 11%|█         | 11193/100000 [3:41:48<28:39:02,  1.16s/it]

109459 episode score is 805.84


 11%|█         | 11194/100000 [3:41:49<28:44:24,  1.17s/it]

109469 episode score is 795.18


 11%|█         | 11195/100000 [3:41:50<28:46:44,  1.17s/it]

109479 episode score is 797.58


 11%|█         | 11196/100000 [3:41:52<28:43:28,  1.16s/it]

109489 episode score is 786.15


 11%|█         | 11197/100000 [3:41:53<28:33:01,  1.16s/it]

109499 episode score is 773.27


 11%|█         | 11198/100000 [3:41:54<28:16:43,  1.15s/it]

109509 episode score is 768.91


 11%|█         | 11199/100000 [3:41:55<28:24:26,  1.15s/it]

109520 episode score is 707.52
109530 episode score is 767.50


 11%|█         | 11200/100000 [3:41:57<35:22:55,  1.43s/it]

Iteration 11200: Average test reward: 718.83


 11%|█         | 11201/100000 [3:41:58<32:53:56,  1.33s/it]

109540 episode score is 751.85


 11%|█         | 11202/100000 [3:41:59<31:15:11,  1.27s/it]

109550 episode score is 762.34


 11%|█         | 11203/100000 [3:42:00<29:56:46,  1.21s/it]

109560 episode score is 743.65


 11%|█         | 11204/100000 [3:42:02<29:22:28,  1.19s/it]

109570 episode score is 785.12


 11%|█         | 11205/100000 [3:42:03<29:24:12,  1.19s/it]

109580 episode score is 803.21


 11%|█         | 11206/100000 [3:42:04<28:44:30,  1.17s/it]

109590 episode score is 744.97


 11%|█         | 11207/100000 [3:42:05<28:22:42,  1.15s/it]

109600 episode score is 755.21


 11%|█         | 11208/100000 [3:42:06<28:00:08,  1.14s/it]

109610 episode score is 746.19


 11%|█         | 11209/100000 [3:42:07<27:58:52,  1.13s/it]

109620 episode score is 768.90


 11%|█         | 11210/100000 [3:42:08<27:48:31,  1.13s/it]

109630 episode score is 747.06


 11%|█         | 11211/100000 [3:42:09<28:05:49,  1.14s/it]

109640 episode score is 794.93


 11%|█         | 11212/100000 [3:42:11<28:06:47,  1.14s/it]

109650 episode score is 779.75


 11%|█         | 11213/100000 [3:42:12<28:02:41,  1.14s/it]

109660 episode score is 758.39


 11%|█         | 11214/100000 [3:42:13<27:40:28,  1.12s/it]

109670 episode score is 734.02


 11%|█         | 11215/100000 [3:42:14<27:48:50,  1.13s/it]

109680 episode score is 779.43


 11%|█         | 11216/100000 [3:42:15<27:53:10,  1.13s/it]

109690 episode score is 780.62


 11%|█         | 11217/100000 [3:42:16<28:16:23,  1.15s/it]

109700 episode score is 809.87


 11%|█         | 11218/100000 [3:42:17<28:07:11,  1.14s/it]

109710 episode score is 749.30


 11%|█         | 11219/100000 [3:42:19<28:15:33,  1.15s/it]

109720 episode score is 787.60


 11%|█         | 11220/100000 [3:42:20<28:27:48,  1.15s/it]

109730 episode score is 803.30


 11%|█         | 11221/100000 [3:42:21<28:15:01,  1.15s/it]

109740 episode score is 753.38


 11%|█         | 11222/100000 [3:42:22<27:56:06,  1.13s/it]

109750 episode score is 741.28


 11%|█         | 11223/100000 [3:42:23<27:51:27,  1.13s/it]

109760 episode score is 760.76


 11%|█         | 11224/100000 [3:42:24<27:48:16,  1.13s/it]

109770 episode score is 756.28


 11%|█         | 11225/100000 [3:42:25<27:39:33,  1.12s/it]

109780 episode score is 755.57


 11%|█         | 11226/100000 [3:42:26<27:31:09,  1.12s/it]

109790 episode score is 744.34


 11%|█         | 11227/100000 [3:42:28<27:40:46,  1.12s/it]

109800 episode score is 769.70


 11%|█         | 11228/100000 [3:42:29<27:52:17,  1.13s/it]

109810 episode score is 772.18


 11%|█         | 11229/100000 [3:42:30<28:01:19,  1.14s/it]

109820 episode score is 786.99


 11%|█         | 11230/100000 [3:42:31<27:46:19,  1.13s/it]

109830 episode score is 727.54


 11%|█         | 11231/100000 [3:42:32<27:52:04,  1.13s/it]

109840 episode score is 772.94


 11%|█         | 11232/100000 [3:42:33<27:51:02,  1.13s/it]

109850 episode score is 767.62


 11%|█         | 11233/100000 [3:42:34<27:34:00,  1.12s/it]

109860 episode score is 751.31


 11%|█         | 11234/100000 [3:42:36<28:30:24,  1.16s/it]

109870 episode score is 778.01


 11%|█         | 11235/100000 [3:42:37<28:38:07,  1.16s/it]

109880 episode score is 798.83


 11%|█         | 11236/100000 [3:42:38<28:10:36,  1.14s/it]

109890 episode score is 748.05


 11%|█         | 11237/100000 [3:42:39<28:03:06,  1.14s/it]

109900 episode score is 761.69


 11%|█         | 11238/100000 [3:42:40<27:40:29,  1.12s/it]

109909 episode score is 820.20


 11%|█         | 11239/100000 [3:42:41<27:53:49,  1.13s/it]

109919 episode score is 777.35


 11%|█         | 11240/100000 [3:42:42<28:05:53,  1.14s/it]

109929 episode score is 791.08


 11%|█         | 11241/100000 [3:42:44<27:57:39,  1.13s/it]

109939 episode score is 772.39


 11%|█         | 11242/100000 [3:42:45<27:35:43,  1.12s/it]

109949 episode score is 732.84


 11%|█         | 11243/100000 [3:42:46<27:43:31,  1.12s/it]

109960 episode score is 688.48


 11%|█         | 11244/100000 [3:42:47<27:50:30,  1.13s/it]

109971 episode score is 685.20


 11%|█         | 11245/100000 [3:42:48<27:35:48,  1.12s/it]

109981 episode score is 730.93


 11%|█         | 11246/100000 [3:42:49<27:32:08,  1.12s/it]

109991 episode score is 755.98


 11%|█         | 11247/100000 [3:42:50<27:40:11,  1.12s/it]

110001 episode score is 772.72


 11%|█         | 11248/100000 [3:42:51<28:03:03,  1.14s/it]

110011 episode score is 799.72


 11%|█         | 11249/100000 [3:42:52<27:51:56,  1.13s/it]

110021 episode score is 762.34


 11%|█▏        | 11250/100000 [3:42:54<27:44:38,  1.13s/it]

110031 episode score is 760.87


 11%|█▏        | 11251/100000 [3:42:55<27:38:10,  1.12s/it]

110041 episode score is 762.89


 11%|█▏        | 11252/100000 [3:42:56<27:54:06,  1.13s/it]

110052 episode score is 695.09


 11%|█▏        | 11253/100000 [3:42:57<28:17:29,  1.15s/it]

110064 episode score is 628.32


 11%|█▏        | 11254/100000 [3:42:58<27:53:42,  1.13s/it]

110074 episode score is 740.95


 11%|█▏        | 11255/100000 [3:42:59<27:49:39,  1.13s/it]

110084 episode score is 768.28


 11%|█▏        | 11256/100000 [3:43:00<27:36:57,  1.12s/it]

110094 episode score is 740.88


 11%|█▏        | 11257/100000 [3:43:02<28:06:06,  1.14s/it]

110105 episode score is 716.07


 11%|█▏        | 11258/100000 [3:43:03<28:13:45,  1.15s/it]

110115 episode score is 784.62


 11%|█▏        | 11259/100000 [3:43:04<28:03:27,  1.14s/it]

110125 episode score is 762.52


 11%|█▏        | 11260/100000 [3:43:05<27:51:39,  1.13s/it]

110135 episode score is 755.59


 11%|█▏        | 11261/100000 [3:43:06<27:43:49,  1.12s/it]

110145 episode score is 758.31


 11%|█▏        | 11262/100000 [3:43:07<27:44:16,  1.13s/it]

110155 episode score is 770.00


 11%|█▏        | 11263/100000 [3:43:08<27:44:24,  1.13s/it]

110165 episode score is 764.57


 11%|█▏        | 11264/100000 [3:43:09<28:01:59,  1.14s/it]

110175 episode score is 792.93


 11%|█▏        | 11265/100000 [3:43:11<28:17:10,  1.15s/it]

110185 episode score is 798.49


 11%|█▏        | 11266/100000 [3:43:12<28:10:07,  1.14s/it]

110195 episode score is 760.77


 11%|█▏        | 11267/100000 [3:43:13<28:21:03,  1.15s/it]

110205 episode score is 776.86


 11%|█▏        | 11268/100000 [3:43:14<28:13:39,  1.15s/it]

110215 episode score is 778.25


 11%|█▏        | 11269/100000 [3:43:15<28:06:44,  1.14s/it]

110225 episode score is 771.40


 11%|█▏        | 11270/100000 [3:43:16<27:42:29,  1.12s/it]

110235 episode score is 727.95


 11%|█▏        | 11271/100000 [3:43:17<28:08:13,  1.14s/it]

110246 episode score is 696.75


 11%|█▏        | 11272/100000 [3:43:19<28:07:53,  1.14s/it]

110256 episode score is 776.03


 11%|█▏        | 11273/100000 [3:43:20<28:06:35,  1.14s/it]

110266 episode score is 769.25


 11%|█▏        | 11274/100000 [3:43:21<28:04:35,  1.14s/it]

110276 episode score is 778.24


 11%|█▏        | 11275/100000 [3:43:22<28:05:00,  1.14s/it]

110286 episode score is 769.70


 11%|█▏        | 11276/100000 [3:43:23<28:07:59,  1.14s/it]

110296 episode score is 785.78


 11%|█▏        | 11277/100000 [3:43:24<27:57:49,  1.13s/it]

110306 episode score is 762.09


 11%|█▏        | 11278/100000 [3:43:25<28:07:39,  1.14s/it]

110316 episode score is 786.98


 11%|█▏        | 11279/100000 [3:43:27<28:16:25,  1.15s/it]

110326 episode score is 793.56


 11%|█▏        | 11280/100000 [3:43:28<28:04:02,  1.14s/it]

110336 episode score is 762.93


 11%|█▏        | 11281/100000 [3:43:29<28:03:10,  1.14s/it]

110347 episode score is 688.70


 11%|█▏        | 11282/100000 [3:43:30<27:58:27,  1.14s/it]

110357 episode score is 764.66


 11%|█▏        | 11283/100000 [3:43:31<28:21:26,  1.15s/it]

110367 episode score is 801.22


 11%|█▏        | 11284/100000 [3:43:32<27:58:14,  1.14s/it]

110377 episode score is 725.67


 11%|█▏        | 11285/100000 [3:43:33<27:54:02,  1.13s/it]

110387 episode score is 752.00


 11%|█▏        | 11286/100000 [3:43:35<28:12:21,  1.14s/it]

110397 episode score is 795.71


 11%|█▏        | 11287/100000 [3:43:36<28:19:38,  1.15s/it]

110407 episode score is 777.41


 11%|█▏        | 11288/100000 [3:43:37<28:01:24,  1.14s/it]

110416 episode score is 833.54


 11%|█▏        | 11289/100000 [3:43:38<28:08:10,  1.14s/it]

110426 episode score is 788.24


 11%|█▏        | 11290/100000 [3:43:39<28:17:06,  1.15s/it]

110436 episode score is 794.93


 11%|█▏        | 11291/100000 [3:43:40<28:29:10,  1.16s/it]

110446 episode score is 797.85


 11%|█▏        | 11292/100000 [3:43:42<28:49:30,  1.17s/it]

110456 episode score is 813.78


 11%|█▏        | 11293/100000 [3:43:43<28:52:44,  1.17s/it]

110466 episode score is 804.17


 11%|█▏        | 11294/100000 [3:43:44<28:55:42,  1.17s/it]

110476 episode score is 800.91


 11%|█▏        | 11295/100000 [3:43:45<28:31:32,  1.16s/it]

110486 episode score is 767.43


 11%|█▏        | 11296/100000 [3:43:46<28:21:21,  1.15s/it]

110496 episode score is 771.82


 11%|█▏        | 11297/100000 [3:43:47<28:29:31,  1.16s/it]

110506 episode score is 783.59


 11%|█▏        | 11298/100000 [3:43:49<28:35:46,  1.16s/it]

110516 episode score is 795.40


 11%|█▏        | 11299/100000 [3:43:50<28:40:25,  1.16s/it]

110527 episode score is 706.21
110539 episode score is 586.90


 11%|█▏        | 11300/100000 [3:43:52<35:43:37,  1.45s/it]

Iteration 11300: Average test reward: 736.20


 11%|█▏        | 11301/100000 [3:43:53<33:11:42,  1.35s/it]

110550 episode score is 654.03


 11%|█▏        | 11302/100000 [3:43:54<31:54:23,  1.29s/it]

110561 episode score is 694.06


 11%|█▏        | 11303/100000 [3:43:55<31:23:39,  1.27s/it]

110571 episode score is 759.84


 11%|█▏        | 11304/100000 [3:43:56<30:29:51,  1.24s/it]

110581 episode score is 785.74


 11%|█▏        | 11305/100000 [3:43:58<29:40:41,  1.20s/it]

110591 episode score is 745.43


 11%|█▏        | 11306/100000 [3:43:59<28:47:37,  1.17s/it]

110600 episode score is 812.33


 11%|█▏        | 11307/100000 [3:44:00<28:08:45,  1.14s/it]

110609 episode score is 818.33


 11%|█▏        | 11308/100000 [3:44:01<27:47:10,  1.13s/it]

110618 episode score is 818.42


 11%|█▏        | 11309/100000 [3:44:02<27:48:13,  1.13s/it]

110627 episode score is 830.98


 11%|█▏        | 11310/100000 [3:44:03<28:19:02,  1.15s/it]

110637 episode score is 809.49


 11%|█▏        | 11311/100000 [3:44:04<28:37:17,  1.16s/it]

110647 episode score is 809.91


 11%|█▏        | 11312/100000 [3:44:06<28:44:26,  1.17s/it]

110657 episode score is 802.99


 11%|█▏        | 11313/100000 [3:44:07<28:11:56,  1.14s/it]

110666 episode score is 829.77


 11%|█▏        | 11314/100000 [3:44:08<27:55:19,  1.13s/it]

110675 episode score is 812.88


 11%|█▏        | 11315/100000 [3:44:09<27:35:02,  1.12s/it]

110684 episode score is 818.65


 11%|█▏        | 11316/100000 [3:44:10<27:32:31,  1.12s/it]

110693 episode score is 839.35


 11%|█▏        | 11317/100000 [3:44:11<27:26:17,  1.11s/it]

110702 episode score is 836.61


 11%|█▏        | 11318/100000 [3:44:12<27:32:53,  1.12s/it]

110711 episode score is 841.76


 11%|█▏        | 11319/100000 [3:44:13<27:24:59,  1.11s/it]

110720 episode score is 836.35


 11%|█▏        | 11320/100000 [3:44:14<27:32:10,  1.12s/it]

110729 episode score is 845.96


 11%|█▏        | 11321/100000 [3:44:15<27:15:47,  1.11s/it]

110738 episode score is 814.62


 11%|█▏        | 11322/100000 [3:44:17<27:04:50,  1.10s/it]

110747 episode score is 819.88


 11%|█▏        | 11323/100000 [3:44:18<28:01:19,  1.14s/it]

110757 episode score is 815.63


 11%|█▏        | 11324/100000 [3:44:19<28:21:20,  1.15s/it]

110767 episode score is 800.09


 11%|█▏        | 11325/100000 [3:44:20<28:29:17,  1.16s/it]

110777 episode score is 786.96


 11%|█▏        | 11326/100000 [3:44:21<28:04:16,  1.14s/it]

110786 episode score is 827.49


 11%|█▏        | 11327/100000 [3:44:22<28:20:58,  1.15s/it]

110796 episode score is 786.78


 11%|█▏        | 11328/100000 [3:44:24<27:58:02,  1.14s/it]

110805 episode score is 814.11


 11%|█▏        | 11329/100000 [3:44:25<28:26:43,  1.15s/it]

110815 episode score is 811.10


 11%|█▏        | 11330/100000 [3:44:26<28:14:22,  1.15s/it]

110824 episode score is 848.93


 11%|█▏        | 11331/100000 [3:44:27<27:45:03,  1.13s/it]

110834 episode score is 717.01


 11%|█▏        | 11332/100000 [3:44:28<27:45:10,  1.13s/it]

110844 episode score is 762.43


 11%|█▏        | 11333/100000 [3:44:29<27:25:29,  1.11s/it]

110853 episode score is 822.25


 11%|█▏        | 11334/100000 [3:44:30<27:07:01,  1.10s/it]

110862 episode score is 815.16


 11%|█▏        | 11335/100000 [3:44:31<27:44:03,  1.13s/it]

110872 episode score is 814.25


 11%|█▏        | 11336/100000 [3:44:32<27:30:56,  1.12s/it]

110881 episode score is 817.16


 11%|█▏        | 11337/100000 [3:44:34<27:39:45,  1.12s/it]

110891 episode score is 753.60


 11%|█▏        | 11338/100000 [3:44:35<27:24:11,  1.11s/it]

110900 episode score is 823.48


 11%|█▏        | 11339/100000 [3:44:36<27:55:25,  1.13s/it]

110910 episode score is 803.50


 11%|█▏        | 11340/100000 [3:44:37<28:17:15,  1.15s/it]

110920 episode score is 798.75


 11%|█▏        | 11341/100000 [3:44:38<27:45:34,  1.13s/it]

110929 episode score is 811.85


 11%|█▏        | 11342/100000 [3:44:39<28:05:12,  1.14s/it]

110939 episode score is 796.85


 11%|█▏        | 11343/100000 [3:44:41<28:21:21,  1.15s/it]

110949 episode score is 808.92


 11%|█▏        | 11344/100000 [3:44:42<27:54:50,  1.13s/it]

110958 episode score is 820.90


 11%|█▏        | 11345/100000 [3:44:43<28:04:56,  1.14s/it]

110968 episode score is 773.53


 11%|█▏        | 11346/100000 [3:44:44<28:01:58,  1.14s/it]

110978 episode score is 766.17


 11%|█▏        | 11347/100000 [3:44:45<28:17:54,  1.15s/it]

110988 episode score is 800.57


 11%|█▏        | 11348/100000 [3:44:46<28:38:56,  1.16s/it]

110998 episode score is 817.32


 11%|█▏        | 11349/100000 [3:44:47<28:11:20,  1.14s/it]

111008 episode score is 731.39


 11%|█▏        | 11350/100000 [3:44:48<27:53:02,  1.13s/it]

111018 episode score is 742.63


 11%|█▏        | 11351/100000 [3:44:50<27:59:37,  1.14s/it]

111028 episode score is 768.22


 11%|█▏        | 11352/100000 [3:44:51<27:50:53,  1.13s/it]

111038 episode score is 757.76


 11%|█▏        | 11353/100000 [3:44:52<27:41:43,  1.12s/it]

111048 episode score is 734.88


 11%|█▏        | 11354/100000 [3:44:53<27:50:14,  1.13s/it]

111058 episode score is 774.66


 11%|█▏        | 11355/100000 [3:44:54<27:55:13,  1.13s/it]

111068 episode score is 781.38


 11%|█▏        | 11356/100000 [3:44:55<28:13:44,  1.15s/it]

111078 episode score is 781.47


 11%|█▏        | 11357/100000 [3:44:56<28:00:03,  1.14s/it]

111088 episode score is 760.43


 11%|█▏        | 11358/100000 [3:44:58<27:43:29,  1.13s/it]

111098 episode score is 745.12


 11%|█▏        | 11359/100000 [3:44:59<27:49:40,  1.13s/it]

111108 episode score is 776.86


 11%|█▏        | 11360/100000 [3:45:00<27:33:07,  1.12s/it]

111118 episode score is 747.52


 11%|█▏        | 11361/100000 [3:45:01<27:27:45,  1.12s/it]

111128 episode score is 760.02


 11%|█▏        | 11362/100000 [3:45:02<27:17:10,  1.11s/it]

111138 episode score is 737.22


 11%|█▏        | 11363/100000 [3:45:03<27:13:46,  1.11s/it]

111148 episode score is 729.80
111158 episode score is 755.56


 11%|█▏        | 11365/100000 [3:45:05<28:15:10,  1.15s/it]

111169 episode score is 727.20


 11%|█▏        | 11366/100000 [3:45:07<27:48:17,  1.13s/it]

111179 episode score is 739.54


 11%|█▏        | 11367/100000 [3:45:08<28:13:00,  1.15s/it]

111190 episode score is 713.70


 11%|█▏        | 11368/100000 [3:45:09<27:46:50,  1.13s/it]

111200 episode score is 735.26


 11%|█▏        | 11369/100000 [3:45:10<28:05:47,  1.14s/it]

111211 episode score is 724.61


 11%|█▏        | 11370/100000 [3:45:11<27:50:07,  1.13s/it]

111221 episode score is 753.36


 11%|█▏        | 11371/100000 [3:45:12<28:15:34,  1.15s/it]

111232 episode score is 724.60


 11%|█▏        | 11372/100000 [3:45:13<27:47:19,  1.13s/it]

111242 episode score is 735.50


 11%|█▏        | 11373/100000 [3:45:14<27:51:38,  1.13s/it]

111252 episode score is 777.16


 11%|█▏        | 11374/100000 [3:45:16<27:36:45,  1.12s/it]

111262 episode score is 751.57


 11%|█▏        | 11375/100000 [3:45:17<28:06:55,  1.14s/it]

111273 episode score is 737.27


 11%|█▏        | 11376/100000 [3:45:18<28:00:46,  1.14s/it]

111283 episode score is 768.37


 11%|█▏        | 11377/100000 [3:45:19<27:56:51,  1.14s/it]

111294 episode score is 670.88


 11%|█▏        | 11378/100000 [3:45:20<28:01:56,  1.14s/it]

111305 episode score is 697.59


 11%|█▏        | 11379/100000 [3:45:21<27:38:51,  1.12s/it]

111315 episode score is 737.41


 11%|█▏        | 11380/100000 [3:45:22<27:24:38,  1.11s/it]

111325 episode score is 737.04


 11%|█▏        | 11381/100000 [3:45:24<27:58:10,  1.14s/it]

111336 episode score is 728.61


 11%|█▏        | 11382/100000 [3:45:25<27:33:42,  1.12s/it]

111346 episode score is 731.83


 11%|█▏        | 11383/100000 [3:45:26<27:24:07,  1.11s/it]

111356 episode score is 747.28


 11%|█▏        | 11384/100000 [3:45:27<27:15:22,  1.11s/it]

111366 episode score is 737.67


 11%|█▏        | 11385/100000 [3:45:28<27:49:11,  1.13s/it]

111377 episode score is 722.11


 11%|█▏        | 11386/100000 [3:45:29<28:11:13,  1.15s/it]

111388 episode score is 711.17


 11%|█▏        | 11387/100000 [3:45:30<27:41:26,  1.12s/it]

111399 episode score is 638.84


 11%|█▏        | 11388/100000 [3:45:31<27:38:10,  1.12s/it]

111410 episode score is 668.98


 11%|█▏        | 11389/100000 [3:45:33<27:43:32,  1.13s/it]

111422 episode score is 593.22


 11%|█▏        | 11390/100000 [3:45:34<27:32:09,  1.12s/it]

111432 episode score is 728.18


 11%|█▏        | 11391/100000 [3:45:35<28:02:20,  1.14s/it]

111443 episode score is 723.37


 11%|█▏        | 11392/100000 [3:45:36<28:14:44,  1.15s/it]

111454 episode score is 712.69


 11%|█▏        | 11393/100000 [3:45:37<27:51:49,  1.13s/it]

111464 episode score is 748.02


 11%|█▏        | 11394/100000 [3:45:38<27:36:50,  1.12s/it]

111474 episode score is 742.02


 11%|█▏        | 11395/100000 [3:45:39<28:09:46,  1.14s/it]

111485 episode score is 729.13


 11%|█▏        | 11396/100000 [3:45:40<27:50:10,  1.13s/it]

111495 episode score is 750.28


 11%|█▏        | 11397/100000 [3:45:42<27:49:27,  1.13s/it]

111505 episode score is 773.61


 11%|█▏        | 11398/100000 [3:45:43<28:07:58,  1.14s/it]

111516 episode score is 698.09


 11%|█▏        | 11399/100000 [3:45:44<28:01:04,  1.14s/it]

111526 episode score is 765.14
111536 episode score is 794.29


 11%|█▏        | 11400/100000 [3:45:46<35:45:21,  1.45s/it]

Iteration 11400: Average test reward: 766.56


 11%|█▏        | 11401/100000 [3:45:47<33:16:25,  1.35s/it]

111546 episode score is 736.25


 11%|█▏        | 11402/100000 [3:45:48<31:48:07,  1.29s/it]

111556 episode score is 785.62


 11%|█▏        | 11403/100000 [3:45:49<30:25:58,  1.24s/it]

111566 episode score is 740.46


 11%|█▏        | 11404/100000 [3:45:51<29:27:41,  1.20s/it]

111576 episode score is 745.83


 11%|█▏        | 11405/100000 [3:45:52<29:02:18,  1.18s/it]

111586 episode score is 779.02


 11%|█▏        | 11406/100000 [3:45:53<28:54:06,  1.17s/it]

111596 episode score is 781.64


 11%|█▏        | 11407/100000 [3:45:54<28:44:25,  1.17s/it]

111606 episode score is 790.79


 11%|█▏        | 11408/100000 [3:45:55<28:22:14,  1.15s/it]

111616 episode score is 748.25


 11%|█▏        | 11409/100000 [3:45:56<28:13:21,  1.15s/it]

111626 episode score is 771.72


 11%|█▏        | 11410/100000 [3:45:57<28:15:31,  1.15s/it]

111636 episode score is 785.61


 11%|█▏        | 11411/100000 [3:45:59<27:59:23,  1.14s/it]

111646 episode score is 764.48


 11%|█▏        | 11412/100000 [3:46:00<27:34:36,  1.12s/it]

111656 episode score is 728.81


 11%|█▏        | 11413/100000 [3:46:01<27:25:23,  1.11s/it]

111666 episode score is 762.02


 11%|█▏        | 11414/100000 [3:46:02<27:45:06,  1.13s/it]

111676 episode score is 777.17


 11%|█▏        | 11415/100000 [3:46:03<27:34:02,  1.12s/it]

111686 episode score is 740.28


 11%|█▏        | 11416/100000 [3:46:04<27:50:42,  1.13s/it]

111696 episode score is 771.01


 11%|█▏        | 11417/100000 [3:46:05<27:57:42,  1.14s/it]

111706 episode score is 768.82


 11%|█▏        | 11418/100000 [3:46:06<28:23:15,  1.15s/it]

111716 episode score is 807.47


 11%|█▏        | 11419/100000 [3:46:08<28:00:29,  1.14s/it]

111725 episode score is 826.72


 11%|█▏        | 11420/100000 [3:46:09<27:54:55,  1.13s/it]

111735 episode score is 751.93


 11%|█▏        | 11421/100000 [3:46:10<28:01:25,  1.14s/it]

111744 episode score is 847.16


 11%|█▏        | 11422/100000 [3:46:11<27:35:26,  1.12s/it]

111753 episode score is 804.66


 11%|█▏        | 11423/100000 [3:46:12<28:44:15,  1.17s/it]

111763 episode score is 810.59


 11%|█▏        | 11424/100000 [3:46:13<28:06:22,  1.14s/it]

111772 episode score is 815.30


 11%|█▏        | 11425/100000 [3:46:14<28:06:28,  1.14s/it]

111782 episode score is 775.74


 11%|█▏        | 11426/100000 [3:46:16<27:53:28,  1.13s/it]

111792 episode score is 760.65


 11%|█▏        | 11427/100000 [3:46:17<28:10:36,  1.15s/it]

111802 episode score is 788.65


 11%|█▏        | 11428/100000 [3:46:18<28:28:53,  1.16s/it]

111812 episode score is 776.05


 11%|█▏        | 11429/100000 [3:46:19<28:02:59,  1.14s/it]

111821 episode score is 805.48


 11%|█▏        | 11430/100000 [3:46:20<28:12:03,  1.15s/it]

111831 episode score is 764.52


 11%|█▏        | 11431/100000 [3:46:21<27:51:07,  1.13s/it]

111840 episode score is 826.57


 11%|█▏        | 11432/100000 [3:46:22<27:59:26,  1.14s/it]

111849 episode score is 842.72


 11%|█▏        | 11433/100000 [3:46:24<28:01:20,  1.14s/it]

111858 episode score is 847.58


 11%|█▏        | 11434/100000 [3:46:25<27:56:09,  1.14s/it]

111867 episode score is 835.12


 11%|█▏        | 11435/100000 [3:46:26<27:32:32,  1.12s/it]

111876 episode score is 824.06


 11%|█▏        | 11436/100000 [3:46:27<27:21:28,  1.11s/it]

111885 episode score is 809.93


 11%|█▏        | 11437/100000 [3:46:28<27:09:42,  1.10s/it]

111894 episode score is 818.78


 11%|█▏        | 11438/100000 [3:46:29<27:15:46,  1.11s/it]

111903 episode score is 843.81


 11%|█▏        | 11439/100000 [3:46:30<27:38:36,  1.12s/it]

111913 episode score is 803.67


 11%|█▏        | 11440/100000 [3:46:31<27:59:08,  1.14s/it]

111923 episode score is 796.25


 11%|█▏        | 11441/100000 [3:46:33<28:20:48,  1.15s/it]

111933 episode score is 809.12


 11%|█▏        | 11442/100000 [3:46:34<28:00:16,  1.14s/it]

111942 episode score is 830.08


 11%|█▏        | 11443/100000 [3:46:35<27:52:50,  1.13s/it]

111952 episode score is 732.84


 11%|█▏        | 11444/100000 [3:46:36<28:26:44,  1.16s/it]

111962 episode score is 816.60


 11%|█▏        | 11445/100000 [3:46:37<28:38:06,  1.16s/it]

111972 episode score is 802.58


 11%|█▏        | 11446/100000 [3:46:38<28:13:50,  1.15s/it]

111982 episode score is 739.02


 11%|█▏        | 11447/100000 [3:46:39<28:14:04,  1.15s/it]

111992 episode score is 774.70


 11%|█▏        | 11448/100000 [3:46:41<27:52:25,  1.13s/it]

112001 episode score is 831.90


 11%|█▏        | 11449/100000 [3:46:42<27:31:03,  1.12s/it]

112010 episode score is 816.95


 11%|█▏        | 11450/100000 [3:46:43<28:04:10,  1.14s/it]

112020 episode score is 805.09


 11%|█▏        | 11451/100000 [3:46:44<27:45:25,  1.13s/it]

112029 episode score is 831.28


 11%|█▏        | 11452/100000 [3:46:45<28:00:21,  1.14s/it]

112039 episode score is 794.17


 11%|█▏        | 11453/100000 [3:46:46<27:48:37,  1.13s/it]

112049 episode score is 745.12


 11%|█▏        | 11454/100000 [3:46:47<27:50:50,  1.13s/it]

112059 episode score is 764.94


 11%|█▏        | 11455/100000 [3:46:48<27:36:48,  1.12s/it]

112069 episode score is 734.01


 11%|█▏        | 11456/100000 [3:46:50<28:00:48,  1.14s/it]

112079 episode score is 795.88


 11%|█▏        | 11457/100000 [3:46:51<28:08:22,  1.14s/it]

112089 episode score is 787.32


 11%|█▏        | 11458/100000 [3:46:52<27:46:20,  1.13s/it]

112099 episode score is 739.14


 11%|█▏        | 11459/100000 [3:46:53<27:47:56,  1.13s/it]

112109 episode score is 768.04


 11%|█▏        | 11460/100000 [3:46:54<27:46:46,  1.13s/it]

112119 episode score is 774.63


 11%|█▏        | 11461/100000 [3:46:55<28:01:55,  1.14s/it]

112130 episode score is 704.71


 11%|█▏        | 11462/100000 [3:46:56<28:14:13,  1.15s/it]

112141 episode score is 716.71


 11%|█▏        | 11463/100000 [3:46:58<27:53:48,  1.13s/it]

112151 episode score is 733.38


 11%|█▏        | 11464/100000 [3:46:59<28:09:32,  1.14s/it]

112162 episode score is 716.27


 11%|█▏        | 11465/100000 [3:47:00<28:24:24,  1.16s/it]

112173 episode score is 710.92


 11%|█▏        | 11466/100000 [3:47:01<28:35:22,  1.16s/it]

112184 episode score is 729.66


 11%|█▏        | 11467/100000 [3:47:02<28:00:32,  1.14s/it]

112194 episode score is 736.03


 11%|█▏        | 11468/100000 [3:47:03<27:52:39,  1.13s/it]

112204 episode score is 763.79


 11%|█▏        | 11469/100000 [3:47:04<28:15:33,  1.15s/it]

112215 episode score is 727.04


 11%|█▏        | 11470/100000 [3:47:06<27:50:51,  1.13s/it]

112225 episode score is 745.94


 11%|█▏        | 11471/100000 [3:47:07<27:40:44,  1.13s/it]

112235 episode score is 758.04


 11%|█▏        | 11472/100000 [3:47:08<27:35:19,  1.12s/it]

112245 episode score is 760.34


 11%|█▏        | 11473/100000 [3:47:09<27:43:27,  1.13s/it]

112255 episode score is 778.60


 11%|█▏        | 11474/100000 [3:47:10<28:01:02,  1.14s/it]

112265 episode score is 803.43


 11%|█▏        | 11475/100000 [3:47:11<28:16:47,  1.15s/it]

112275 episode score is 811.17


 11%|█▏        | 11476/100000 [3:47:12<28:23:35,  1.15s/it]

112285 episode score is 795.70


 11%|█▏        | 11477/100000 [3:47:14<28:39:39,  1.17s/it]

112295 episode score is 813.02


 11%|█▏        | 11478/100000 [3:47:15<28:32:41,  1.16s/it]

112305 episode score is 789.45


 11%|█▏        | 11479/100000 [3:47:16<27:57:45,  1.14s/it]

112315 episode score is 742.09


 11%|█▏        | 11480/100000 [3:47:17<28:55:41,  1.18s/it]

112325 episode score is 801.35


 11%|█▏        | 11481/100000 [3:47:18<28:25:56,  1.16s/it]

112335 episode score is 755.42


 11%|█▏        | 11482/100000 [3:47:19<28:21:07,  1.15s/it]

112345 episode score is 788.43


 11%|█▏        | 11483/100000 [3:47:21<28:16:21,  1.15s/it]

112355 episode score is 787.21


 11%|█▏        | 11484/100000 [3:47:22<28:08:10,  1.14s/it]

112366 episode score is 687.82


 11%|█▏        | 11485/100000 [3:47:23<28:18:22,  1.15s/it]

112377 episode score is 709.01


 11%|█▏        | 11486/100000 [3:47:24<28:03:48,  1.14s/it]

112387 episode score is 772.44


 11%|█▏        | 11487/100000 [3:47:25<27:37:09,  1.12s/it]

112397 episode score is 754.77


 11%|█▏        | 11488/100000 [3:47:26<27:52:18,  1.13s/it]

112409 episode score is 627.67


 11%|█▏        | 11489/100000 [3:47:27<27:59:14,  1.14s/it]

112420 episode score is 716.41


 11%|█▏        | 11490/100000 [3:47:28<27:33:47,  1.12s/it]

112430 episode score is 751.22


 11%|█▏        | 11491/100000 [3:47:30<27:23:41,  1.11s/it]

112441 episode score is 672.36


 11%|█▏        | 11492/100000 [3:47:31<27:31:46,  1.12s/it]

112452 episode score is 696.74


 11%|█▏        | 11493/100000 [3:47:32<27:38:02,  1.12s/it]

112463 episode score is 679.27


 11%|█▏        | 11494/100000 [3:47:33<27:17:02,  1.11s/it]

112473 episode score is 739.44


 11%|█▏        | 11495/100000 [3:47:34<27:03:37,  1.10s/it]

112483 episode score is 746.26


 11%|█▏        | 11496/100000 [3:47:35<27:21:27,  1.11s/it]

112494 episode score is 706.19


 11%|█▏        | 11497/100000 [3:47:36<27:39:15,  1.12s/it]

112505 episode score is 713.54


 11%|█▏        | 11498/100000 [3:47:37<27:55:41,  1.14s/it]

112516 episode score is 722.46


 11%|█▏        | 11499/100000 [3:47:39<27:56:44,  1.14s/it]

112527 episode score is 697.27
112538 episode score is 734.24


 12%|█▏        | 11500/100000 [3:47:41<35:27:46,  1.44s/it]

Iteration 11500: Average test reward: 721.97


 12%|█▏        | 11501/100000 [3:47:42<33:20:45,  1.36s/it]

112549 episode score is 697.56


 12%|█▏        | 11502/100000 [3:47:43<31:31:29,  1.28s/it]

112559 episode score is 763.66


 12%|█▏        | 11503/100000 [3:47:44<30:15:23,  1.23s/it]

112569 episode score is 762.13


 12%|█▏        | 11504/100000 [3:47:45<29:21:34,  1.19s/it]

112579 episode score is 766.23


 12%|█▏        | 11505/100000 [3:47:46<28:51:00,  1.17s/it]

112589 episode score is 781.93


 12%|█▏        | 11506/100000 [3:47:47<28:35:55,  1.16s/it]

112599 episode score is 766.30


 12%|█▏        | 11507/100000 [3:47:49<28:07:46,  1.14s/it]

112609 episode score is 753.57


 12%|█▏        | 11508/100000 [3:47:50<28:07:46,  1.14s/it]

112620 episode score is 699.09


 12%|█▏        | 11509/100000 [3:47:51<28:16:22,  1.15s/it]

112631 episode score is 711.87


 12%|█▏        | 11510/100000 [3:47:52<28:20:53,  1.15s/it]

112642 episode score is 706.07


 12%|█▏        | 11511/100000 [3:47:53<28:34:17,  1.16s/it]

112653 episode score is 712.37


 12%|█▏        | 11512/100000 [3:47:54<28:12:44,  1.15s/it]

112663 episode score is 765.93


 12%|█▏        | 11513/100000 [3:47:55<28:04:38,  1.14s/it]

112673 episode score is 768.29


 12%|█▏        | 11514/100000 [3:47:57<27:55:41,  1.14s/it]

112683 episode score is 771.55


 12%|█▏        | 11515/100000 [3:47:58<28:06:22,  1.14s/it]

112693 episode score is 802.39


 12%|█▏        | 11516/100000 [3:47:59<28:01:16,  1.14s/it]

112703 episode score is 788.85


 12%|█▏        | 11517/100000 [3:48:00<28:01:04,  1.14s/it]

112713 episode score is 793.26


 12%|█▏        | 11518/100000 [3:48:01<28:00:08,  1.14s/it]

112723 episode score is 792.94


 12%|█▏        | 11519/100000 [3:48:02<28:09:09,  1.15s/it]

112733 episode score is 788.96


 12%|█▏        | 11520/100000 [3:48:03<27:51:14,  1.13s/it]

112743 episode score is 745.68


 12%|█▏        | 11521/100000 [3:48:05<27:51:17,  1.13s/it]

112753 episode score is 779.34


 12%|█▏        | 11522/100000 [3:48:06<27:59:35,  1.14s/it]

112763 episode score is 795.68


 12%|█▏        | 11523/100000 [3:48:07<28:06:24,  1.14s/it]

112773 episode score is 797.52


 12%|█▏        | 11524/100000 [3:48:08<28:17:27,  1.15s/it]

112783 episode score is 803.33


 12%|█▏        | 11525/100000 [3:48:09<28:22:24,  1.15s/it]

112793 episode score is 804.35


 12%|█▏        | 11526/100000 [3:48:10<28:23:04,  1.15s/it]

112803 episode score is 796.63


 12%|█▏        | 11527/100000 [3:48:11<28:12:32,  1.15s/it]

112813 episode score is 773.67


 12%|█▏        | 11528/100000 [3:48:13<28:14:24,  1.15s/it]

112823 episode score is 789.81


 12%|█▏        | 11529/100000 [3:48:14<28:11:42,  1.15s/it]

112833 episode score is 787.68


 12%|█▏        | 11530/100000 [3:48:15<28:00:25,  1.14s/it]

112843 episode score is 767.41


 12%|█▏        | 11531/100000 [3:48:16<28:00:27,  1.14s/it]

112853 episode score is 789.78


 12%|█▏        | 11532/100000 [3:48:17<28:01:11,  1.14s/it]

112863 episode score is 772.62


 12%|█▏        | 11533/100000 [3:48:18<28:10:07,  1.15s/it]

112873 episode score is 792.51


 12%|█▏        | 11534/100000 [3:48:19<28:06:53,  1.14s/it]

112883 episode score is 780.71


 12%|█▏        | 11535/100000 [3:48:21<27:44:52,  1.13s/it]

112893 episode score is 747.27


 12%|█▏        | 11536/100000 [3:48:22<27:30:14,  1.12s/it]

112903 episode score is 756.35


 12%|█▏        | 11537/100000 [3:48:23<27:24:39,  1.12s/it]

112913 episode score is 758.18


 12%|█▏        | 11538/100000 [3:48:24<27:21:58,  1.11s/it]

112923 episode score is 759.42


 12%|█▏        | 11539/100000 [3:48:25<27:30:45,  1.12s/it]

112933 episode score is 778.12


 12%|█▏        | 11540/100000 [3:48:26<27:36:00,  1.12s/it]

112943 episode score is 785.76


 12%|█▏        | 11541/100000 [3:48:27<27:51:46,  1.13s/it]

112954 episode score is 709.57


 12%|█▏        | 11542/100000 [3:48:28<28:00:28,  1.14s/it]

112964 episode score is 799.19


 12%|█▏        | 11543/100000 [3:48:30<28:44:28,  1.17s/it]

112974 episode score is 792.95


 12%|█▏        | 11544/100000 [3:48:31<28:34:49,  1.16s/it]

112984 episode score is 797.35


 12%|█▏        | 11545/100000 [3:48:32<28:31:42,  1.16s/it]

112994 episode score is 798.44


 12%|█▏        | 11546/100000 [3:48:33<28:15:54,  1.15s/it]

113005 episode score is 678.81


 12%|█▏        | 11547/100000 [3:48:34<27:47:48,  1.13s/it]

113015 episode score is 736.33


 12%|█▏        | 11548/100000 [3:48:35<27:59:01,  1.14s/it]

113025 episode score is 803.75


 12%|█▏        | 11549/100000 [3:48:36<28:04:42,  1.14s/it]

113035 episode score is 799.03


 12%|█▏        | 11550/100000 [3:48:38<27:58:35,  1.14s/it]

113045 episode score is 783.75


 12%|█▏        | 11551/100000 [3:48:39<27:56:59,  1.14s/it]

113055 episode score is 781.92


 12%|█▏        | 11552/100000 [3:48:40<27:36:33,  1.12s/it]

113065 episode score is 748.41


 12%|█▏        | 11553/100000 [3:48:41<27:31:20,  1.12s/it]

113075 episode score is 767.46


 12%|█▏        | 11554/100000 [3:48:42<27:33:39,  1.12s/it]

113085 episode score is 773.75


 12%|█▏        | 11555/100000 [3:48:43<27:48:04,  1.13s/it]

113095 episode score is 788.84


 12%|█▏        | 11556/100000 [3:48:44<28:11:13,  1.15s/it]

113106 episode score is 718.51


 12%|█▏        | 11557/100000 [3:48:46<27:59:17,  1.14s/it]

113116 episode score is 771.01


 12%|█▏        | 11558/100000 [3:48:47<27:42:14,  1.13s/it]

113126 episode score is 749.88


 12%|█▏        | 11559/100000 [3:48:48<28:13:13,  1.15s/it]

113137 episode score is 723.89


 12%|█▏        | 11560/100000 [3:48:49<28:03:01,  1.14s/it]

113147 episode score is 777.26


 12%|█▏        | 11561/100000 [3:48:50<28:04:47,  1.14s/it]

113158 episode score is 700.37


 12%|█▏        | 11562/100000 [3:48:51<28:05:36,  1.14s/it]

113169 episode score is 700.28


 12%|█▏        | 11563/100000 [3:48:52<27:46:42,  1.13s/it]

113180 episode score is 665.21


 12%|█▏        | 11564/100000 [3:48:53<27:52:21,  1.13s/it]

113191 episode score is 709.17


 12%|█▏        | 11565/100000 [3:48:55<28:14:23,  1.15s/it]

113202 episode score is 737.70


 12%|█▏        | 11566/100000 [3:48:56<28:26:47,  1.16s/it]

113213 episode score is 728.35


 12%|█▏        | 11567/100000 [3:48:57<28:30:59,  1.16s/it]

113224 episode score is 722.79


 12%|█▏        | 11568/100000 [3:48:58<28:05:56,  1.14s/it]

113234 episode score is 759.88


 12%|█▏        | 11569/100000 [3:48:59<27:44:45,  1.13s/it]

113244 episode score is 746.14


 12%|█▏        | 11570/100000 [3:49:00<28:17:41,  1.15s/it]

113255 episode score is 743.38


 12%|█▏        | 11571/100000 [3:49:02<28:23:19,  1.16s/it]

113266 episode score is 719.59


 12%|█▏        | 11572/100000 [3:49:03<28:28:28,  1.16s/it]

113277 episode score is 722.15


 12%|█▏        | 11573/100000 [3:49:04<28:00:58,  1.14s/it]

113287 episode score is 755.41


 12%|█▏        | 11574/100000 [3:49:05<27:46:23,  1.13s/it]

113297 episode score is 761.26


 12%|█▏        | 11575/100000 [3:49:06<28:03:37,  1.14s/it]

113308 episode score is 725.17


 12%|█▏        | 11576/100000 [3:49:07<27:39:39,  1.13s/it]

113318 episode score is 748.06


 12%|█▏        | 11577/100000 [3:49:08<28:02:13,  1.14s/it]

113329 episode score is 730.90


 12%|█▏        | 11578/100000 [3:49:10<27:54:18,  1.14s/it]

113339 episode score is 780.01


 12%|█▏        | 11579/100000 [3:49:11<27:56:55,  1.14s/it]

113350 episode score is 700.99


 12%|█▏        | 11580/100000 [3:49:12<27:49:17,  1.13s/it]

113360 episode score is 772.05


 12%|█▏        | 11581/100000 [3:49:13<27:39:02,  1.13s/it]

113370 episode score is 774.25


 12%|█▏        | 11582/100000 [3:49:14<27:26:06,  1.12s/it]

113380 episode score is 765.20


 12%|█▏        | 11583/100000 [3:49:15<27:17:54,  1.11s/it]

113390 episode score is 769.97


 12%|█▏        | 11584/100000 [3:49:16<27:11:00,  1.11s/it]

113400 episode score is 770.21


 12%|█▏        | 11585/100000 [3:49:17<27:28:12,  1.12s/it]

113410 episode score is 777.12


 12%|█▏        | 11586/100000 [3:49:18<27:30:36,  1.12s/it]

113421 episode score is 677.86


 12%|█▏        | 11587/100000 [3:49:20<27:15:29,  1.11s/it]

113431 episode score is 741.51


 12%|█▏        | 11588/100000 [3:49:21<27:19:34,  1.11s/it]

113441 episode score is 786.64


 12%|█▏        | 11589/100000 [3:49:22<27:26:41,  1.12s/it]

113451 episode score is 790.09


 12%|█▏        | 11590/100000 [3:49:23<27:30:42,  1.12s/it]

113461 episode score is 769.88


 12%|█▏        | 11591/100000 [3:49:24<27:34:37,  1.12s/it]

113471 episode score is 783.58


 12%|█▏        | 11592/100000 [3:49:25<27:39:03,  1.13s/it]

113481 episode score is 783.68


 12%|█▏        | 11593/100000 [3:49:26<27:24:38,  1.12s/it]

113491 episode score is 760.64


 12%|█▏        | 11594/100000 [3:49:27<27:34:46,  1.12s/it]

113501 episode score is 787.36


 12%|█▏        | 11595/100000 [3:49:29<27:25:56,  1.12s/it]

113511 episode score is 774.46


 12%|█▏        | 11596/100000 [3:49:30<28:19:22,  1.15s/it]

113521 episode score is 798.96


 12%|█▏        | 11597/100000 [3:49:31<28:05:09,  1.14s/it]

113532 episode score is 682.75


 12%|█▏        | 11598/100000 [3:49:32<27:39:44,  1.13s/it]

113543 episode score is 638.07


 12%|█▏        | 11599/100000 [3:49:33<27:32:47,  1.12s/it]

113554 episode score is 646.57
113565 episode score is 660.46


 12%|█▏        | 11600/100000 [3:49:35<34:23:51,  1.40s/it]

Iteration 11600: Average test reward: 651.35


 12%|█▏        | 11601/100000 [3:49:36<32:07:04,  1.31s/it]

113576 episode score is 640.26


 12%|█▏        | 11602/100000 [3:49:37<30:47:58,  1.25s/it]

113587 episode score is 668.65


 12%|█▏        | 11603/100000 [3:49:38<30:01:40,  1.22s/it]

113598 episode score is 699.68


 12%|█▏        | 11604/100000 [3:49:40<29:24:09,  1.20s/it]

113609 episode score is 688.77


 12%|█▏        | 11605/100000 [3:49:41<28:58:43,  1.18s/it]

113620 episode score is 679.51


 12%|█▏        | 11606/100000 [3:49:42<29:05:19,  1.18s/it]

113631 episode score is 722.10


 12%|█▏        | 11607/100000 [3:49:43<29:04:11,  1.18s/it]

113642 episode score is 726.52


 12%|█▏        | 11608/100000 [3:49:44<28:23:07,  1.16s/it]

113652 episode score is 730.76


 12%|█▏        | 11609/100000 [3:49:45<27:51:27,  1.13s/it]

113662 episode score is 734.79


 12%|█▏        | 11610/100000 [3:49:46<27:29:53,  1.12s/it]

113672 episode score is 743.52


 12%|█▏        | 11611/100000 [3:49:48<27:38:15,  1.13s/it]

113682 episode score is 765.85


 12%|█▏        | 11612/100000 [3:49:49<27:42:18,  1.13s/it]

113692 episode score is 769.03


 12%|█▏        | 11613/100000 [3:49:50<27:47:46,  1.13s/it]

113702 episode score is 780.07


 12%|█▏        | 11614/100000 [3:49:51<27:33:16,  1.12s/it]

113712 episode score is 753.92


 12%|█▏        | 11615/100000 [3:49:52<27:20:31,  1.11s/it]

113722 episode score is 747.89


 12%|█▏        | 11616/100000 [3:49:53<27:30:25,  1.12s/it]

113732 episode score is 779.77


 12%|█▏        | 11617/100000 [3:49:54<27:23:09,  1.12s/it]

113742 episode score is 753.66


 12%|█▏        | 11618/100000 [3:49:55<27:30:32,  1.12s/it]

113752 episode score is 764.96


 12%|█▏        | 11619/100000 [3:49:57<27:58:37,  1.14s/it]

113763 episode score is 731.84


 12%|█▏        | 11620/100000 [3:49:58<27:58:05,  1.14s/it]

113773 episode score is 784.35


 12%|█▏        | 11621/100000 [3:49:59<27:35:07,  1.12s/it]

113783 episode score is 751.77


 12%|█▏        | 11622/100000 [3:50:00<27:18:29,  1.11s/it]

113793 episode score is 749.60


 12%|█▏        | 11623/100000 [3:50:01<27:14:45,  1.11s/it]

113803 episode score is 764.64


 12%|█▏        | 11624/100000 [3:50:02<27:30:41,  1.12s/it]

113813 episode score is 772.03


 12%|█▏        | 11625/100000 [3:50:03<27:32:54,  1.12s/it]

113823 episode score is 772.53


 12%|█▏        | 11626/100000 [3:50:04<27:34:18,  1.12s/it]

113834 episode score is 682.63


 12%|█▏        | 11627/100000 [3:50:05<27:25:18,  1.12s/it]

113846 episode score is 586.11


 12%|█▏        | 11628/100000 [3:50:07<27:42:28,  1.13s/it]

113857 episode score is 715.43


 12%|█▏        | 11629/100000 [3:50:08<27:34:49,  1.12s/it]

113868 episode score is 671.91


 12%|█▏        | 11630/100000 [3:50:09<27:53:58,  1.14s/it]

113879 episode score is 704.91


 12%|█▏        | 11631/100000 [3:50:10<28:04:37,  1.14s/it]

113890 episode score is 709.29


 12%|█▏        | 11632/100000 [3:50:11<28:05:34,  1.14s/it]

113900 episode score is 781.93


 12%|█▏        | 11633/100000 [3:50:12<28:25:28,  1.16s/it]

113911 episode score is 726.81


 12%|█▏        | 11634/100000 [3:50:14<28:12:41,  1.15s/it]

113922 episode score is 688.62


 12%|█▏        | 11635/100000 [3:50:15<27:53:28,  1.14s/it]

113931 episode score is 797.30


 12%|█▏        | 11636/100000 [3:50:16<28:17:30,  1.15s/it]

113941 episode score is 753.56


 12%|█▏        | 11637/100000 [3:50:17<28:07:36,  1.15s/it]

113950 episode score is 802.43


 12%|█▏        | 11638/100000 [3:50:18<28:01:24,  1.14s/it]

113959 episode score is 769.32


 12%|█▏        | 11639/100000 [3:50:19<27:51:07,  1.13s/it]

113968 episode score is 799.75


 12%|█▏        | 11640/100000 [3:50:20<28:19:30,  1.15s/it]

113977 episode score is 861.03


 12%|█▏        | 11641/100000 [3:50:22<28:27:23,  1.16s/it]

113987 episode score is 758.25


 12%|█▏        | 11642/100000 [3:50:23<28:52:33,  1.18s/it]

113997 episode score is 774.09


 12%|█▏        | 11643/100000 [3:50:24<28:49:57,  1.17s/it]

114007 episode score is 765.29


 12%|█▏        | 11644/100000 [3:50:25<28:31:07,  1.16s/it]

114016 episode score is 804.10


 12%|█▏        | 11645/100000 [3:50:26<28:37:42,  1.17s/it]

114025 episode score is 842.88


 12%|█▏        | 11646/100000 [3:50:27<28:17:32,  1.15s/it]

114034 episode score is 807.82


 12%|█▏        | 11647/100000 [3:50:29<27:57:07,  1.14s/it]

114043 episode score is 802.29


 12%|█▏        | 11648/100000 [3:50:30<28:24:26,  1.16s/it]

114053 episode score is 783.99


 12%|█▏        | 11649/100000 [3:50:31<28:05:13,  1.14s/it]

114062 episode score is 794.53


 12%|█▏        | 11650/100000 [3:50:32<27:52:02,  1.14s/it]

114071 episode score is 795.32


 12%|█▏        | 11651/100000 [3:50:33<27:32:06,  1.12s/it]

114080 episode score is 781.42


 12%|█▏        | 11652/100000 [3:50:34<27:53:20,  1.14s/it]

114090 episode score is 750.56


 12%|█▏        | 11653/100000 [3:50:35<27:38:47,  1.13s/it]

114099 episode score is 784.70


 12%|█▏        | 11654/100000 [3:50:36<27:16:14,  1.11s/it]

114108 episode score is 781.09


 12%|█▏        | 11655/100000 [3:50:38<27:15:54,  1.11s/it]

114117 episode score is 789.28


 12%|█▏        | 11656/100000 [3:50:39<27:03:16,  1.10s/it]

114126 episode score is 777.37


 12%|█▏        | 11657/100000 [3:50:40<27:09:34,  1.11s/it]

114135 episode score is 798.07


 12%|█▏        | 11658/100000 [3:50:41<27:10:20,  1.11s/it]

114144 episode score is 787.74


 12%|█▏        | 11659/100000 [3:50:42<27:04:18,  1.10s/it]

114153 episode score is 779.05


 12%|█▏        | 11660/100000 [3:50:43<27:18:37,  1.11s/it]

114162 episode score is 804.09


 12%|█▏        | 11661/100000 [3:50:44<27:22:09,  1.12s/it]

114172 episode score is 727.68


 12%|█▏        | 11662/100000 [3:50:45<27:38:34,  1.13s/it]

114182 episode score is 743.10


 12%|█▏        | 11663/100000 [3:50:46<27:39:07,  1.13s/it]

114192 episode score is 749.05
114201 episode score is 781.42


 12%|█▏        | 11665/100000 [3:50:49<28:15:16,  1.15s/it]

114211 episode score is 755.62


 12%|█▏        | 11666/100000 [3:50:50<27:45:31,  1.13s/it]

114221 episode score is 701.60


 12%|█▏        | 11667/100000 [3:50:51<28:02:41,  1.14s/it]

114231 episode score is 745.33


 12%|█▏        | 11668/100000 [3:50:52<28:08:17,  1.15s/it]

114241 episode score is 747.19


 12%|█▏        | 11669/100000 [3:50:53<28:08:06,  1.15s/it]

114250 episode score is 822.73


 12%|█▏        | 11670/100000 [3:50:55<28:17:25,  1.15s/it]

114260 episode score is 757.22


 12%|█▏        | 11671/100000 [3:50:56<28:06:52,  1.15s/it]

114269 episode score is 809.53


 12%|█▏        | 11672/100000 [3:50:57<28:09:35,  1.15s/it]

114279 episode score is 757.59


 12%|█▏        | 11673/100000 [3:50:58<27:55:41,  1.14s/it]

114288 episode score is 808.87


 12%|█▏        | 11674/100000 [3:50:59<28:15:01,  1.15s/it]

114298 episode score is 758.07


 12%|█▏        | 11675/100000 [3:51:00<27:58:38,  1.14s/it]

114308 episode score is 724.16


 12%|█▏        | 11676/100000 [3:51:01<27:32:16,  1.12s/it]

114317 episode score is 784.99


 12%|█▏        | 11677/100000 [3:51:02<27:37:26,  1.13s/it]

114327 episode score is 748.16


 12%|█▏        | 11678/100000 [3:51:04<27:21:07,  1.11s/it]

114337 episode score is 714.15


 12%|█▏        | 11679/100000 [3:51:05<27:21:24,  1.12s/it]

114347 episode score is 723.95


 12%|█▏        | 11680/100000 [3:51:06<27:40:28,  1.13s/it]

114357 episode score is 755.97


 12%|█▏        | 11681/100000 [3:51:07<28:04:15,  1.14s/it]

114367 episode score is 775.38


 12%|█▏        | 11682/100000 [3:51:08<28:13:50,  1.15s/it]

114378 episode score is 695.66


 12%|█▏        | 11683/100000 [3:51:09<28:13:48,  1.15s/it]

114388 episode score is 753.31


 12%|█▏        | 11684/100000 [3:51:10<28:23:17,  1.16s/it]

114398 episode score is 761.06


 12%|█▏        | 11685/100000 [3:51:12<28:28:09,  1.16s/it]

114408 episode score is 763.52


 12%|█▏        | 11686/100000 [3:51:13<28:23:16,  1.16s/it]

114418 episode score is 744.91


 12%|█▏        | 11687/100000 [3:51:14<28:16:51,  1.15s/it]

114428 episode score is 751.73


 12%|█▏        | 11688/100000 [3:51:15<28:21:23,  1.16s/it]

114438 episode score is 747.85


 12%|█▏        | 11689/100000 [3:51:16<28:29:56,  1.16s/it]

114448 episode score is 769.97


 12%|█▏        | 11690/100000 [3:51:17<28:22:50,  1.16s/it]

114458 episode score is 764.66


 12%|█▏        | 11691/100000 [3:51:19<28:29:34,  1.16s/it]

114469 episode score is 706.84


 12%|█▏        | 11692/100000 [3:51:20<28:15:13,  1.15s/it]

114479 episode score is 755.54


 12%|█▏        | 11693/100000 [3:51:21<28:21:42,  1.16s/it]

114490 episode score is 704.00


 12%|█▏        | 11694/100000 [3:51:22<28:09:23,  1.15s/it]

114500 episode score is 765.45


 12%|█▏        | 11695/100000 [3:51:23<27:50:02,  1.13s/it]

114510 episode score is 739.50


 12%|█▏        | 11696/100000 [3:51:24<27:32:35,  1.12s/it]

114520 episode score is 743.26


 12%|█▏        | 11697/100000 [3:51:25<27:36:34,  1.13s/it]

114530 episode score is 765.58


 12%|█▏        | 11698/100000 [3:51:26<27:39:53,  1.13s/it]

114540 episode score is 775.28


 12%|█▏        | 11699/100000 [3:51:28<27:53:40,  1.14s/it]

114550 episode score is 780.25
114560 episode score is 774.06


 12%|█▏        | 11700/100000 [3:51:30<35:32:42,  1.45s/it]

Iteration 11700: Average test reward: 780.17


 12%|█▏        | 11701/100000 [3:51:31<33:04:39,  1.35s/it]

114570 episode score is 765.07


 12%|█▏        | 11702/100000 [3:51:32<31:36:51,  1.29s/it]

114580 episode score is 768.68


 12%|█▏        | 11703/100000 [3:51:33<30:32:19,  1.25s/it]

114590 episode score is 765.40


 12%|█▏        | 11704/100000 [3:51:34<29:36:28,  1.21s/it]

114600 episode score is 755.23


 12%|█▏        | 11705/100000 [3:51:36<29:21:58,  1.20s/it]

114610 episode score is 781.97


 12%|█▏        | 11706/100000 [3:51:37<28:52:02,  1.18s/it]

114620 episode score is 758.43


 12%|█▏        | 11707/100000 [3:51:38<28:28:13,  1.16s/it]

114630 episode score is 762.32


 12%|█▏        | 11708/100000 [3:51:39<28:12:33,  1.15s/it]

114640 episode score is 759.98


 12%|█▏        | 11709/100000 [3:51:40<27:52:33,  1.14s/it]

114650 episode score is 739.12


 12%|█▏        | 11710/100000 [3:51:41<27:45:04,  1.13s/it]

114660 episode score is 759.43


 12%|█▏        | 11711/100000 [3:51:42<27:27:25,  1.12s/it]

114670 episode score is 733.65


 12%|█▏        | 11712/100000 [3:51:43<27:26:01,  1.12s/it]

114680 episode score is 747.84


 12%|█▏        | 11713/100000 [3:51:44<27:44:11,  1.13s/it]

114690 episode score is 782.17


 12%|█▏        | 11714/100000 [3:51:46<27:45:21,  1.13s/it]

114700 episode score is 771.89


 12%|█▏        | 11715/100000 [3:51:47<27:34:17,  1.12s/it]

114710 episode score is 749.36


 12%|█▏        | 11716/100000 [3:51:48<27:34:53,  1.12s/it]

114721 episode score is 654.60


 12%|█▏        | 11717/100000 [3:51:49<27:36:17,  1.13s/it]

114731 episode score is 755.86


 12%|█▏        | 11718/100000 [3:51:50<27:40:00,  1.13s/it]

114741 episode score is 766.52


 12%|█▏        | 11719/100000 [3:51:51<27:28:05,  1.12s/it]

114751 episode score is 740.61


 12%|█▏        | 11720/100000 [3:51:52<27:20:34,  1.12s/it]

114761 episode score is 736.23


 12%|█▏        | 11721/100000 [3:51:53<27:16:59,  1.11s/it]

114771 episode score is 738.39


 12%|█▏        | 11722/100000 [3:51:55<27:28:01,  1.12s/it]

114781 episode score is 756.69


 12%|█▏        | 11723/100000 [3:51:56<27:24:01,  1.12s/it]

114791 episode score is 738.08


 12%|█▏        | 11724/100000 [3:51:57<27:07:58,  1.11s/it]

114801 episode score is 725.58


 12%|█▏        | 11725/100000 [3:51:58<27:11:59,  1.11s/it]

114811 episode score is 759.76


 12%|█▏        | 11726/100000 [3:51:59<27:01:33,  1.10s/it]

114821 episode score is 719.57


 12%|█▏        | 11727/100000 [3:52:00<26:59:07,  1.10s/it]

114831 episode score is 734.26


 12%|█▏        | 11728/100000 [3:52:01<27:07:51,  1.11s/it]

114841 episode score is 756.87


 12%|█▏        | 11729/100000 [3:52:02<27:33:15,  1.12s/it]

114851 episode score is 775.99


 12%|█▏        | 11730/100000 [3:52:04<28:36:13,  1.17s/it]

114861 episode score is 775.33


 12%|█▏        | 11731/100000 [3:52:05<28:27:06,  1.16s/it]

114871 episode score is 768.85


 12%|█▏        | 11732/100000 [3:52:06<28:12:31,  1.15s/it]

114881 episode score is 753.02


 12%|█▏        | 11733/100000 [3:52:07<28:00:43,  1.14s/it]

114891 episode score is 760.76


 12%|█▏        | 11734/100000 [3:52:08<27:45:12,  1.13s/it]

114901 episode score is 741.84


 12%|█▏        | 11735/100000 [3:52:09<27:40:18,  1.13s/it]

114911 episode score is 752.41


 12%|█▏        | 11736/100000 [3:52:10<27:38:23,  1.13s/it]

114921 episode score is 760.67


 12%|█▏        | 11737/100000 [3:52:11<27:33:27,  1.12s/it]

114931 episode score is 752.70


 12%|█▏        | 11738/100000 [3:52:13<27:27:08,  1.12s/it]

114941 episode score is 749.50


 12%|█▏        | 11739/100000 [3:52:14<27:54:24,  1.14s/it]

114952 episode score is 717.77


 12%|█▏        | 11740/100000 [3:52:15<27:44:53,  1.13s/it]

114962 episode score is 756.53


 12%|█▏        | 11741/100000 [3:52:16<27:49:08,  1.13s/it]

114972 episode score is 776.42


 12%|█▏        | 11742/100000 [3:52:17<27:37:05,  1.13s/it]

114982 episode score is 734.73


 12%|█▏        | 11743/100000 [3:52:18<27:22:16,  1.12s/it]

114992 episode score is 726.00


 12%|█▏        | 11744/100000 [3:52:19<27:26:16,  1.12s/it]

115002 episode score is 745.88


 12%|█▏        | 11745/100000 [3:52:21<27:55:50,  1.14s/it]

115013 episode score is 726.72


 12%|█▏        | 11746/100000 [3:52:22<28:18:25,  1.15s/it]

115024 episode score is 720.42


 12%|█▏        | 11747/100000 [3:52:23<28:28:18,  1.16s/it]

115035 episode score is 708.27


 12%|█▏        | 11748/100000 [3:52:24<28:40:44,  1.17s/it]

115046 episode score is 721.23


 12%|█▏        | 11749/100000 [3:52:25<28:41:03,  1.17s/it]

115057 episode score is 704.51


 12%|█▏        | 11750/100000 [3:52:26<28:13:36,  1.15s/it]

115067 episode score is 745.02


 12%|█▏        | 11751/100000 [3:52:27<27:51:45,  1.14s/it]

115077 episode score is 738.71


 12%|█▏        | 11752/100000 [3:52:29<28:09:31,  1.15s/it]

115088 episode score is 716.04


 12%|█▏        | 11753/100000 [3:52:30<28:17:17,  1.15s/it]

115099 episode score is 707.28


 12%|█▏        | 11754/100000 [3:52:31<27:58:50,  1.14s/it]

115109 episode score is 739.36


 12%|█▏        | 11755/100000 [3:52:32<27:39:21,  1.13s/it]

115119 episode score is 725.89


 12%|█▏        | 11756/100000 [3:52:33<27:27:19,  1.12s/it]

115129 episode score is 732.45


 12%|█▏        | 11757/100000 [3:52:34<27:17:20,  1.11s/it]

115139 episode score is 741.77


 12%|█▏        | 11758/100000 [3:52:35<27:29:01,  1.12s/it]

115150 episode score is 689.95


 12%|█▏        | 11759/100000 [3:52:37<27:52:58,  1.14s/it]

115161 episode score is 711.09


 12%|█▏        | 11760/100000 [3:52:38<27:51:19,  1.14s/it]

115172 episode score is 674.15


 12%|█▏        | 11761/100000 [3:52:39<27:58:05,  1.14s/it]

115183 episode score is 693.32


 12%|█▏        | 11762/100000 [3:52:40<27:53:34,  1.14s/it]

115194 episode score is 681.44


 12%|█▏        | 11763/100000 [3:52:41<28:21:03,  1.16s/it]

115206 episode score is 646.39


 12%|█▏        | 11764/100000 [3:52:42<28:10:46,  1.15s/it]

115218 episode score is 597.04


 12%|█▏        | 11765/100000 [3:52:43<27:44:55,  1.13s/it]

115229 episode score is 644.03


 12%|█▏        | 11766/100000 [3:52:45<27:48:50,  1.13s/it]

115240 episode score is 684.59


 12%|█▏        | 11767/100000 [3:52:46<28:14:43,  1.15s/it]

115252 episode score is 634.38


 12%|█▏        | 11768/100000 [3:52:47<28:20:17,  1.16s/it]

115263 episode score is 698.49


 12%|█▏        | 11769/100000 [3:52:48<28:31:00,  1.16s/it]

115274 episode score is 707.70


 12%|█▏        | 11770/100000 [3:52:49<28:01:01,  1.14s/it]

115284 episode score is 741.45


 12%|█▏        | 11771/100000 [3:52:50<28:21:56,  1.16s/it]

115296 episode score is 639.42


 12%|█▏        | 11772/100000 [3:52:51<27:55:45,  1.14s/it]

115307 episode score is 651.65


 12%|█▏        | 11773/100000 [3:52:53<28:04:11,  1.15s/it]

115319 episode score is 618.58


 12%|█▏        | 11774/100000 [3:52:54<28:12:42,  1.15s/it]

115330 episode score is 706.74


 12%|█▏        | 11775/100000 [3:52:55<27:58:41,  1.14s/it]

115341 episode score is 661.23


 12%|█▏        | 11776/100000 [3:52:56<27:37:38,  1.13s/it]

115352 episode score is 638.32


 12%|█▏        | 11777/100000 [3:52:57<27:31:53,  1.12s/it]

115363 episode score is 658.54


 12%|█▏        | 11778/100000 [3:52:58<27:17:22,  1.11s/it]

115374 episode score is 641.11


 12%|█▏        | 11779/100000 [3:52:59<27:22:28,  1.12s/it]

115385 episode score is 694.51


 12%|█▏        | 11780/100000 [3:53:00<27:09:18,  1.11s/it]

115396 episode score is 647.75


 12%|█▏        | 11781/100000 [3:53:01<27:09:51,  1.11s/it]

115407 episode score is 665.45


 12%|█▏        | 11782/100000 [3:53:03<27:45:34,  1.13s/it]

115419 episode score is 648.58


 12%|█▏        | 11783/100000 [3:53:04<27:28:35,  1.12s/it]

115430 episode score is 660.05


 12%|█▏        | 11784/100000 [3:53:05<27:15:05,  1.11s/it]

115441 episode score is 655.38


 12%|█▏        | 11785/100000 [3:53:06<27:56:15,  1.14s/it]

115452 episode score is 665.49


 12%|█▏        | 11786/100000 [3:53:07<27:42:24,  1.13s/it]

115463 episode score is 661.96


 12%|█▏        | 11787/100000 [3:53:08<27:40:36,  1.13s/it]

115474 episode score is 680.52


 12%|█▏        | 11788/100000 [3:53:09<27:17:48,  1.11s/it]

115484 episode score is 726.26


 12%|█▏        | 11789/100000 [3:53:10<27:06:35,  1.11s/it]

115494 episode score is 740.42


 12%|█▏        | 11790/100000 [3:53:12<27:27:33,  1.12s/it]

115505 episode score is 700.01


 12%|█▏        | 11791/100000 [3:53:13<27:13:01,  1.11s/it]

115515 episode score is 731.28


 12%|█▏        | 11792/100000 [3:53:14<27:17:29,  1.11s/it]

115526 episode score is 675.35


 12%|█▏        | 11793/100000 [3:53:15<27:16:01,  1.11s/it]

115537 episode score is 670.33


 12%|█▏        | 11794/100000 [3:53:16<27:46:20,  1.13s/it]

115548 episode score is 721.12


 12%|█▏        | 11795/100000 [3:53:17<27:32:54,  1.12s/it]

115558 episode score is 730.77


 12%|█▏        | 11796/100000 [3:53:18<27:41:52,  1.13s/it]

115569 episode score is 678.70


 12%|█▏        | 11797/100000 [3:53:20<27:46:08,  1.13s/it]

115580 episode score is 676.08


 12%|█▏        | 11798/100000 [3:53:21<27:26:52,  1.12s/it]

115590 episode score is 737.77


 12%|█▏        | 11799/100000 [3:53:22<27:30:04,  1.12s/it]

115600 episode score is 762.85
115610 episode score is 734.63


 12%|█▏        | 11800/100000 [3:53:24<34:07:16,  1.39s/it]

Iteration 11800: Average test reward: 659.45


 12%|█▏        | 11801/100000 [3:53:25<31:56:03,  1.30s/it]

115620 episode score is 727.96


 12%|█▏        | 11802/100000 [3:53:26<30:53:13,  1.26s/it]

115631 episode score is 695.61


 12%|█▏        | 11803/100000 [3:53:27<29:41:29,  1.21s/it]

115641 episode score is 740.32


 12%|█▏        | 11804/100000 [3:53:28<28:45:22,  1.17s/it]

115651 episode score is 733.12


 12%|█▏        | 11805/100000 [3:53:29<28:20:06,  1.16s/it]

115661 episode score is 753.48


 12%|█▏        | 11806/100000 [3:53:30<28:19:58,  1.16s/it]

115672 episode score is 705.19


 12%|█▏        | 11807/100000 [3:53:32<28:24:51,  1.16s/it]

115683 episode score is 715.73


 12%|█▏        | 11808/100000 [3:53:33<28:40:03,  1.17s/it]

115694 episode score is 728.67


 12%|█▏        | 11809/100000 [3:53:34<28:46:08,  1.17s/it]

115705 episode score is 722.27


 12%|█▏        | 11810/100000 [3:53:35<28:27:56,  1.16s/it]

115715 episode score is 772.60


 12%|█▏        | 11811/100000 [3:53:36<28:17:53,  1.16s/it]

115726 episode score is 683.68


 12%|█▏        | 11812/100000 [3:53:37<28:18:39,  1.16s/it]

115737 episode score is 701.42


 12%|█▏        | 11813/100000 [3:53:39<28:01:13,  1.14s/it]

115747 episode score is 762.10


 12%|█▏        | 11814/100000 [3:53:40<27:43:23,  1.13s/it]

115757 episode score is 730.62


 12%|█▏        | 11815/100000 [3:53:41<27:41:42,  1.13s/it]

115767 episode score is 766.95


 12%|█▏        | 11816/100000 [3:53:42<27:59:23,  1.14s/it]

115778 episode score is 707.63


 12%|█▏        | 11817/100000 [3:53:43<27:40:13,  1.13s/it]

115788 episode score is 742.53


 12%|█▏        | 11818/100000 [3:53:44<27:20:35,  1.12s/it]

115798 episode score is 732.51


 12%|█▏        | 11819/100000 [3:53:45<27:12:06,  1.11s/it]

115808 episode score is 734.70


 12%|█▏        | 11820/100000 [3:53:46<27:00:10,  1.10s/it]

115818 episode score is 727.93


 12%|█▏        | 11821/100000 [3:53:48<27:37:27,  1.13s/it]

115829 episode score is 710.55


 12%|█▏        | 11822/100000 [3:53:49<27:23:15,  1.12s/it]

115839 episode score is 736.61


 12%|█▏        | 11823/100000 [3:53:50<27:53:21,  1.14s/it]

115850 episode score is 714.52


 12%|█▏        | 11824/100000 [3:53:51<27:36:14,  1.13s/it]

115860 episode score is 743.09


 12%|█▏        | 11825/100000 [3:53:52<27:33:05,  1.12s/it]

115870 episode score is 756.66


 12%|█▏        | 11826/100000 [3:53:53<27:22:15,  1.12s/it]

115880 episode score is 738.16


 12%|█▏        | 11827/100000 [3:53:54<27:23:56,  1.12s/it]

115891 episode score is 673.95


 12%|█▏        | 11828/100000 [3:53:55<27:33:53,  1.13s/it]

115902 episode score is 677.30


 12%|█▏        | 11829/100000 [3:53:57<27:52:44,  1.14s/it]

115913 episode score is 697.84


 12%|█▏        | 11830/100000 [3:53:58<28:03:01,  1.15s/it]

115924 episode score is 705.85


 12%|█▏        | 11831/100000 [3:53:59<28:23:56,  1.16s/it]

115935 episode score is 730.01


 12%|█▏        | 11832/100000 [3:54:00<27:57:02,  1.14s/it]

115946 episode score is 670.65


 12%|█▏        | 11833/100000 [3:54:01<27:28:16,  1.12s/it]

115957 episode score is 647.21


 12%|█▏        | 11834/100000 [3:54:02<27:44:37,  1.13s/it]

115968 episode score is 710.73


 12%|█▏        | 11835/100000 [3:54:03<27:30:25,  1.12s/it]

115978 episode score is 755.48


 12%|█▏        | 11836/100000 [3:54:04<27:32:22,  1.12s/it]

115989 episode score is 680.03


 12%|█▏        | 11837/100000 [3:54:06<27:54:23,  1.14s/it]

116000 episode score is 724.39


 12%|█▏        | 11838/100000 [3:54:07<27:34:43,  1.13s/it]

116010 episode score is 743.76


 12%|█▏        | 11839/100000 [3:54:08<27:14:41,  1.11s/it]

116020 episode score is 734.78


 12%|█▏        | 11840/100000 [3:54:09<27:31:52,  1.12s/it]

116031 episode score is 696.44


 12%|█▏        | 11841/100000 [3:54:10<27:38:43,  1.13s/it]

116042 episode score is 695.31


 12%|█▏        | 11842/100000 [3:54:11<28:16:12,  1.15s/it]

116053 episode score is 668.43


 12%|█▏        | 11843/100000 [3:54:12<28:18:46,  1.16s/it]

116064 episode score is 711.70


 12%|█▏        | 11844/100000 [3:54:14<28:25:07,  1.16s/it]

116075 episode score is 715.66


 12%|█▏        | 11845/100000 [3:54:15<28:25:31,  1.16s/it]

116086 episode score is 708.30


 12%|█▏        | 11846/100000 [3:54:16<28:16:02,  1.15s/it]

116097 episode score is 687.14


 12%|█▏        | 11847/100000 [3:54:17<28:12:24,  1.15s/it]

116108 episode score is 682.15


 12%|█▏        | 11848/100000 [3:54:18<28:10:34,  1.15s/it]

116119 episode score is 699.97


 12%|█▏        | 11849/100000 [3:54:19<28:08:14,  1.15s/it]

116130 episode score is 695.92


 12%|█▏        | 11850/100000 [3:54:21<28:16:42,  1.15s/it]

116141 episode score is 716.78


 12%|█▏        | 11851/100000 [3:54:22<28:20:41,  1.16s/it]

116152 episode score is 716.20


 12%|█▏        | 11852/100000 [3:54:23<28:09:55,  1.15s/it]

116163 episode score is 678.57


 12%|█▏        | 11853/100000 [3:54:24<28:26:54,  1.16s/it]

116174 episode score is 732.63


 12%|█▏        | 11854/100000 [3:54:25<27:49:49,  1.14s/it]

116184 episode score is 734.85


 12%|█▏        | 11855/100000 [3:54:26<27:30:18,  1.12s/it]

116194 episode score is 750.62


 12%|█▏        | 11856/100000 [3:54:27<27:59:20,  1.14s/it]

116205 episode score is 734.35


 12%|█▏        | 11857/100000 [3:54:29<27:38:12,  1.13s/it]

116215 episode score is 748.78


 12%|█▏        | 11858/100000 [3:54:30<28:00:07,  1.14s/it]

116226 episode score is 722.07


 12%|█▏        | 11859/100000 [3:54:31<28:02:09,  1.15s/it]

116237 episode score is 707.32


 12%|█▏        | 11860/100000 [3:54:32<27:42:32,  1.13s/it]

116247 episode score is 737.71


 12%|█▏        | 11861/100000 [3:54:33<27:27:39,  1.12s/it]

116257 episode score is 736.21


 12%|█▏        | 11862/100000 [3:54:34<27:14:33,  1.11s/it]

116267 episode score is 740.40


 12%|█▏        | 11863/100000 [3:54:35<27:11:05,  1.11s/it]

116277 episode score is 749.95


 12%|█▏        | 11864/100000 [3:54:36<27:11:23,  1.11s/it]

116287 episode score is 757.28


 12%|█▏        | 11865/100000 [3:54:37<27:09:15,  1.11s/it]

116297 episode score is 733.13


 12%|█▏        | 11866/100000 [3:54:39<27:14:13,  1.11s/it]

116307 episode score is 760.02


 12%|█▏        | 11867/100000 [3:54:40<27:21:49,  1.12s/it]

116317 episode score is 765.14


 12%|█▏        | 11868/100000 [3:54:41<27:17:17,  1.11s/it]

116327 episode score is 752.62


 12%|█▏        | 11869/100000 [3:54:42<27:07:31,  1.11s/it]

116337 episode score is 736.23


 12%|█▏        | 11870/100000 [3:54:43<27:18:40,  1.12s/it]

116347 episode score is 766.80


 12%|█▏        | 11871/100000 [3:54:44<27:14:30,  1.11s/it]

116357 episode score is 743.15


 12%|█▏        | 11872/100000 [3:54:45<27:21:55,  1.12s/it]

116367 episode score is 761.39


 12%|█▏        | 11873/100000 [3:54:46<27:14:12,  1.11s/it]

116377 episode score is 743.07


 12%|█▏        | 11874/100000 [3:54:48<27:30:11,  1.12s/it]

116387 episode score is 771.21


 12%|█▏        | 11875/100000 [3:54:49<27:25:01,  1.12s/it]

116397 episode score is 733.22


 12%|█▏        | 11876/100000 [3:54:50<27:22:18,  1.12s/it]

116407 episode score is 752.98


 12%|█▏        | 11877/100000 [3:54:51<27:57:14,  1.14s/it]

116418 episode score is 733.05


 12%|█▏        | 11878/100000 [3:54:52<27:53:13,  1.14s/it]

116428 episode score is 762.89


 12%|█▏        | 11879/100000 [3:54:53<28:08:49,  1.15s/it]

116438 episode score is 792.56


 12%|█▏        | 11880/100000 [3:54:54<27:45:32,  1.13s/it]

116448 episode score is 726.47


 12%|█▏        | 11881/100000 [3:54:55<27:46:42,  1.13s/it]

116458 episode score is 756.90


 12%|█▏        | 11882/100000 [3:54:57<27:43:59,  1.13s/it]

116468 episode score is 758.55


 12%|█▏        | 11883/100000 [3:54:58<27:56:46,  1.14s/it]

116478 episode score is 760.47


 12%|█▏        | 11884/100000 [3:54:59<27:44:51,  1.13s/it]

116488 episode score is 752.99


 12%|█▏        | 11885/100000 [3:55:00<28:10:25,  1.15s/it]

116499 episode score is 725.93


 12%|█▏        | 11886/100000 [3:55:01<28:18:10,  1.16s/it]

116510 episode score is 704.04


 12%|█▏        | 11887/100000 [3:55:02<28:12:00,  1.15s/it]

116520 episode score is 771.82


 12%|█▏        | 11888/100000 [3:55:03<27:46:02,  1.13s/it]

116530 episode score is 734.11


 12%|█▏        | 11889/100000 [3:55:05<27:38:22,  1.13s/it]

116540 episode score is 760.37


 12%|█▏        | 11890/100000 [3:55:06<27:16:45,  1.11s/it]

116550 episode score is 733.12


 12%|█▏        | 11891/100000 [3:55:07<27:35:38,  1.13s/it]

116560 episode score is 788.58


 12%|█▏        | 11892/100000 [3:55:08<28:04:58,  1.15s/it]

116571 episode score is 724.87


 12%|█▏        | 11893/100000 [3:55:09<28:07:45,  1.15s/it]

116581 episode score is 789.52


 12%|█▏        | 11894/100000 [3:55:10<28:01:10,  1.14s/it]

116591 episode score is 775.42


 12%|█▏        | 11895/100000 [3:55:11<27:40:00,  1.13s/it]

116601 episode score is 737.08


 12%|█▏        | 11896/100000 [3:55:13<27:36:16,  1.13s/it]

116611 episode score is 753.09


 12%|█▏        | 11897/100000 [3:55:14<27:45:06,  1.13s/it]

116621 episode score is 777.49


 12%|█▏        | 11898/100000 [3:55:15<27:38:48,  1.13s/it]

116631 episode score is 765.71


 12%|█▏        | 11899/100000 [3:55:16<27:46:24,  1.13s/it]

116641 episode score is 770.32
116651 episode score is 731.31


 12%|█▏        | 11900/100000 [3:55:18<34:22:44,  1.40s/it]

Iteration 11900: Average test reward: 698.48


 12%|█▏        | 11901/100000 [3:55:19<32:10:21,  1.31s/it]

116661 episode score is 730.31


 12%|█▏        | 11902/100000 [3:55:20<30:32:55,  1.25s/it]

116671 episode score is 732.52


 12%|█▏        | 11903/100000 [3:55:21<29:37:22,  1.21s/it]

116681 episode score is 755.42


 12%|█▏        | 11904/100000 [3:55:22<28:51:23,  1.18s/it]

116691 episode score is 741.57


 12%|█▏        | 11905/100000 [3:55:24<28:49:52,  1.18s/it]

116701 episode score is 792.19


 12%|█▏        | 11906/100000 [3:55:25<29:11:41,  1.19s/it]

116711 episode score is 769.47


 12%|█▏        | 11907/100000 [3:55:26<28:50:45,  1.18s/it]

116721 episode score is 787.27


 12%|█▏        | 11908/100000 [3:55:27<28:21:21,  1.16s/it]

116731 episode score is 753.24


 12%|█▏        | 11909/100000 [3:55:28<28:19:06,  1.16s/it]

116741 episode score is 788.14


 12%|█▏        | 11910/100000 [3:55:29<28:16:41,  1.16s/it]

116751 episode score is 786.32


 12%|█▏        | 11911/100000 [3:55:31<28:06:15,  1.15s/it]

116762 episode score is 685.20


 12%|█▏        | 11912/100000 [3:55:32<27:58:05,  1.14s/it]

116773 episode score is 675.64


 12%|█▏        | 11913/100000 [3:55:33<27:50:45,  1.14s/it]

116783 episode score is 758.43


 12%|█▏        | 11914/100000 [3:55:34<27:28:54,  1.12s/it]

116793 episode score is 738.60


 12%|█▏        | 11915/100000 [3:55:35<27:50:18,  1.14s/it]

116804 episode score is 716.47


 12%|█▏        | 11916/100000 [3:55:36<27:26:09,  1.12s/it]

116814 episode score is 720.80


 12%|█▏        | 11917/100000 [3:55:37<27:32:59,  1.13s/it]

116824 episode score is 759.34


 12%|█▏        | 11918/100000 [3:55:38<27:39:57,  1.13s/it]

116835 episode score is 687.34


 12%|█▏        | 11919/100000 [3:55:40<27:39:11,  1.13s/it]

116846 episode score is 680.60


 12%|█▏        | 11920/100000 [3:55:41<27:18:44,  1.12s/it]

116856 episode score is 721.72


 12%|█▏        | 11921/100000 [3:55:42<27:33:19,  1.13s/it]

116866 episode score is 784.53


 12%|█▏        | 11922/100000 [3:55:43<27:40:55,  1.13s/it]

116876 episode score is 773.27


 12%|█▏        | 11923/100000 [3:55:44<27:27:16,  1.12s/it]

116886 episode score is 746.97


 12%|█▏        | 11924/100000 [3:55:45<27:42:06,  1.13s/it]

116896 episode score is 793.12


 12%|█▏        | 11925/100000 [3:55:46<27:44:33,  1.13s/it]

116906 episode score is 779.65


 12%|█▏        | 11926/100000 [3:55:47<28:12:12,  1.15s/it]

116917 episode score is 716.46


 12%|█▏        | 11927/100000 [3:55:49<28:14:48,  1.15s/it]

116928 episode score is 688.20


 12%|█▏        | 11928/100000 [3:55:50<28:21:36,  1.16s/it]

116939 episode score is 707.76


 12%|█▏        | 11929/100000 [3:55:51<27:57:12,  1.14s/it]

116949 episode score is 749.01


 12%|█▏        | 11930/100000 [3:55:52<27:49:45,  1.14s/it]

116960 episode score is 672.56


 12%|█▏        | 11931/100000 [3:55:53<27:38:22,  1.13s/it]

116970 episode score is 751.92


 12%|█▏        | 11932/100000 [3:55:54<27:38:20,  1.13s/it]

116980 episode score is 778.19


 12%|█▏        | 11933/100000 [3:55:55<27:55:29,  1.14s/it]

116990 episode score is 791.42


 12%|█▏        | 11934/100000 [3:55:57<27:46:03,  1.14s/it]

117000 episode score is 759.35


 12%|█▏        | 11935/100000 [3:55:58<27:31:23,  1.13s/it]

117010 episode score is 735.06


 12%|█▏        | 11936/100000 [3:55:59<27:19:01,  1.12s/it]

117020 episode score is 743.02


 12%|█▏        | 11937/100000 [3:56:00<27:09:29,  1.11s/it]

117030 episode score is 736.83


 12%|█▏        | 11938/100000 [3:56:01<27:03:07,  1.11s/it]

117040 episode score is 746.61


 12%|█▏        | 11939/100000 [3:56:02<27:19:25,  1.12s/it]

117050 episode score is 775.25


 12%|█▏        | 11940/100000 [3:56:03<27:18:02,  1.12s/it]

117060 episode score is 759.68


 12%|█▏        | 11941/100000 [3:56:04<27:06:38,  1.11s/it]

117070 episode score is 741.53


 12%|█▏        | 11942/100000 [3:56:05<26:57:02,  1.10s/it]

117080 episode score is 738.05


 12%|█▏        | 11943/100000 [3:56:07<27:18:06,  1.12s/it]

117091 episode score is 693.83


 12%|█▏        | 11944/100000 [3:56:08<27:48:43,  1.14s/it]

117102 episode score is 720.94


 12%|█▏        | 11945/100000 [3:56:09<27:28:36,  1.12s/it]

117112 episode score is 735.45


 12%|█▏        | 11946/100000 [3:56:10<27:32:15,  1.13s/it]

117123 episode score is 677.44


 12%|█▏        | 11947/100000 [3:56:11<27:51:33,  1.14s/it]

117134 episode score is 718.09


 12%|█▏        | 11948/100000 [3:56:12<28:10:53,  1.15s/it]

117145 episode score is 717.44


 12%|█▏        | 11949/100000 [3:56:13<28:10:32,  1.15s/it]

117156 episode score is 699.24


 12%|█▏        | 11950/100000 [3:56:15<27:59:14,  1.14s/it]

117167 episode score is 682.09


 12%|█▏        | 11951/100000 [3:56:16<28:19:38,  1.16s/it]

117178 episode score is 733.69


 12%|█▏        | 11952/100000 [3:56:17<27:46:57,  1.14s/it]

117188 episode score is 743.51


 12%|█▏        | 11953/100000 [3:56:18<28:07:41,  1.15s/it]

117199 episode score is 706.62


 12%|█▏        | 11954/100000 [3:56:19<28:25:51,  1.16s/it]

117210 episode score is 732.43


 12%|█▏        | 11955/100000 [3:56:20<28:10:48,  1.15s/it]

117220 episode score is 781.51


 12%|█▏        | 11956/100000 [3:56:22<28:14:07,  1.15s/it]

117231 episode score is 714.67


 12%|█▏        | 11957/100000 [3:56:23<27:44:55,  1.13s/it]

117241 episode score is 734.77


 12%|█▏        | 11958/100000 [3:56:24<27:28:09,  1.12s/it]

117251 episode score is 744.80


 12%|█▏        | 11959/100000 [3:56:25<27:43:56,  1.13s/it]

117262 episode score is 712.75


 12%|█▏        | 11960/100000 [3:56:26<27:42:42,  1.13s/it]

117272 episode score is 780.60


 12%|█▏        | 11961/100000 [3:56:27<27:43:42,  1.13s/it]

117282 episode score is 780.79


 12%|█▏        | 11962/100000 [3:56:28<27:47:36,  1.14s/it]

117292 episode score is 776.19


 12%|█▏        | 11963/100000 [3:56:29<27:54:12,  1.14s/it]

117302 episode score is 778.17


 12%|█▏        | 11964/100000 [3:56:31<27:50:33,  1.14s/it]

117312 episode score is 775.50


 12%|█▏        | 11965/100000 [3:56:32<27:49:13,  1.14s/it]

117322 episode score is 766.44


 12%|█▏        | 11966/100000 [3:56:33<28:06:46,  1.15s/it]

117332 episode score is 789.77


 12%|█▏        | 11967/100000 [3:56:34<28:31:11,  1.17s/it]

117342 episode score is 763.45


 12%|█▏        | 11968/100000 [3:56:35<28:05:26,  1.15s/it]

117352 episode score is 749.68


 12%|█▏        | 11969/100000 [3:56:36<28:21:37,  1.16s/it]

117362 episode score is 807.79


 12%|█▏        | 11970/100000 [3:56:37<27:50:23,  1.14s/it]

117372 episode score is 728.92


 12%|█▏        | 11971/100000 [3:56:39<27:28:38,  1.12s/it]

117382 episode score is 733.72


 12%|█▏        | 11972/100000 [3:56:40<27:49:54,  1.14s/it]

117392 episode score is 795.91


 12%|█▏        | 11973/100000 [3:56:41<27:47:55,  1.14s/it]

117402 episode score is 768.52


 12%|█▏        | 11974/100000 [3:56:42<28:03:05,  1.15s/it]

117412 episode score is 799.42


 12%|█▏        | 11975/100000 [3:56:43<27:46:27,  1.14s/it]

117422 episode score is 727.49


 12%|█▏        | 11976/100000 [3:56:44<28:07:48,  1.15s/it]

117432 episode score is 807.32


 12%|█▏        | 11977/100000 [3:56:45<28:16:50,  1.16s/it]

117442 episode score is 802.35


 12%|█▏        | 11978/100000 [3:56:47<28:22:30,  1.16s/it]

117452 episode score is 796.38


 12%|█▏        | 11979/100000 [3:56:48<28:34:31,  1.17s/it]

117462 episode score is 795.52


 12%|█▏        | 11980/100000 [3:56:49<28:42:30,  1.17s/it]

117472 episode score is 788.45


 12%|█▏        | 11981/100000 [3:56:50<28:36:01,  1.17s/it]

117482 episode score is 796.68


 12%|█▏        | 11982/100000 [3:56:51<28:00:02,  1.15s/it]

117492 episode score is 731.67


 12%|█▏        | 11983/100000 [3:56:52<28:08:49,  1.15s/it]

117502 episode score is 790.42


 12%|█▏        | 11984/100000 [3:56:54<27:50:09,  1.14s/it]

117513 episode score is 655.66


 12%|█▏        | 11985/100000 [3:56:55<27:36:46,  1.13s/it]

117523 episode score is 745.63


 12%|█▏        | 11986/100000 [3:56:56<27:30:07,  1.12s/it]

117534 episode score is 654.97


 12%|█▏        | 11987/100000 [3:56:57<27:31:25,  1.13s/it]

117544 episode score is 751.78


 12%|█▏        | 11988/100000 [3:56:58<27:54:50,  1.14s/it]

117554 episode score is 799.12


 12%|█▏        | 11989/100000 [3:56:59<27:45:37,  1.14s/it]

117563 episode score is 842.52


 12%|█▏        | 11990/100000 [3:57:00<27:50:44,  1.14s/it]

117573 episode score is 760.96


 12%|█▏        | 11991/100000 [3:57:01<27:42:30,  1.13s/it]

117583 episode score is 765.75


 12%|█▏        | 11992/100000 [3:57:03<27:46:41,  1.14s/it]

117593 episode score is 763.78


 12%|█▏        | 11993/100000 [3:57:04<27:58:05,  1.14s/it]

117603 episode score is 787.47


 12%|█▏        | 11994/100000 [3:57:05<28:19:27,  1.16s/it]

117613 episode score is 816.80


 12%|█▏        | 11995/100000 [3:57:06<28:42:33,  1.17s/it]

117623 episode score is 814.71


 12%|█▏        | 11996/100000 [3:57:07<28:46:07,  1.18s/it]

117633 episode score is 809.20


 12%|█▏        | 11997/100000 [3:57:08<28:06:49,  1.15s/it]

117642 episode score is 816.06


 12%|█▏        | 11998/100000 [3:57:10<28:26:21,  1.16s/it]

117652 episode score is 819.08


 12%|█▏        | 11999/100000 [3:57:11<28:32:25,  1.17s/it]

117662 episode score is 799.96
117672 episode score is 810.56


 12%|█▏        | 12000/100000 [3:57:13<36:32:27,  1.49s/it]

Iteration 12000: Average test reward: 800.25


 12%|█▏        | 12001/100000 [3:57:14<33:37:53,  1.38s/it]

117681 episode score is 827.98


 12%|█▏        | 12002/100000 [3:57:15<31:28:19,  1.29s/it]

117690 episode score is 824.47


 12%|█▏        | 12003/100000 [3:57:16<30:35:44,  1.25s/it]

117700 episode score is 780.52


 12%|█▏        | 12004/100000 [3:57:18<30:04:54,  1.23s/it]

117710 episode score is 788.91


 12%|█▏        | 12005/100000 [3:57:19<29:44:57,  1.22s/it]

117720 episode score is 799.44


 12%|█▏        | 12006/100000 [3:57:20<29:09:32,  1.19s/it]

117730 episode score is 777.33


 12%|█▏        | 12007/100000 [3:57:21<28:45:23,  1.18s/it]

117740 episode score is 766.12


 12%|█▏        | 12008/100000 [3:57:22<28:51:32,  1.18s/it]

117750 episode score is 809.97


 12%|█▏        | 12009/100000 [3:57:23<28:39:56,  1.17s/it]

117760 episode score is 777.80


 12%|█▏        | 12010/100000 [3:57:25<28:49:25,  1.18s/it]

117770 episode score is 819.60


 12%|█▏        | 12011/100000 [3:57:26<28:28:55,  1.17s/it]

117780 episode score is 769.48


 12%|█▏        | 12012/100000 [3:57:27<28:37:34,  1.17s/it]

117790 episode score is 812.76


 12%|█▏        | 12013/100000 [3:57:28<28:35:31,  1.17s/it]

117800 episode score is 790.36


 12%|█▏        | 12014/100000 [3:57:29<28:01:58,  1.15s/it]

117810 episode score is 730.15


 12%|█▏        | 12015/100000 [3:57:30<27:41:43,  1.13s/it]

117820 episode score is 752.66


 12%|█▏        | 12016/100000 [3:57:31<27:35:25,  1.13s/it]

117830 episode score is 763.78


 12%|█▏        | 12017/100000 [3:57:33<27:58:57,  1.14s/it]

117840 episode score is 790.19


 12%|█▏        | 12018/100000 [3:57:34<27:52:36,  1.14s/it]

117850 episode score is 778.03


 12%|█▏        | 12019/100000 [3:57:35<27:56:34,  1.14s/it]

117860 episode score is 794.19


 12%|█▏        | 12020/100000 [3:57:36<27:40:05,  1.13s/it]

117870 episode score is 760.22


 12%|█▏        | 12021/100000 [3:57:37<27:33:17,  1.13s/it]

117880 episode score is 766.15


 12%|█▏        | 12022/100000 [3:57:38<28:06:31,  1.15s/it]

117890 episode score is 746.43


 12%|█▏        | 12023/100000 [3:57:39<27:56:51,  1.14s/it]

117900 episode score is 765.72


 12%|█▏        | 12024/100000 [3:57:41<27:40:18,  1.13s/it]

117910 episode score is 755.61


 12%|█▏        | 12025/100000 [3:57:42<27:51:10,  1.14s/it]

117921 episode score is 707.18


 12%|█▏        | 12026/100000 [3:57:43<27:39:10,  1.13s/it]

117931 episode score is 759.05


 12%|█▏        | 12027/100000 [3:57:44<27:22:30,  1.12s/it]

117941 episode score is 722.56


 12%|█▏        | 12028/100000 [3:57:45<27:28:54,  1.12s/it]

117951 episode score is 783.35


 12%|█▏        | 12029/100000 [3:57:46<27:14:41,  1.11s/it]

117960 episode score is 835.83


 12%|█▏        | 12030/100000 [3:57:47<27:21:44,  1.12s/it]

117970 episode score is 756.41


 12%|█▏        | 12031/100000 [3:57:48<27:29:04,  1.12s/it]

117980 episode score is 771.50


 12%|█▏        | 12032/100000 [3:57:50<27:38:58,  1.13s/it]

117990 episode score is 790.43


 12%|█▏        | 12033/100000 [3:57:51<27:38:43,  1.13s/it]

118000 episode score is 773.76


 12%|█▏        | 12034/100000 [3:57:52<28:01:03,  1.15s/it]

118011 episode score is 723.03


 12%|█▏        | 12035/100000 [3:57:53<27:45:20,  1.14s/it]

118021 episode score is 766.23


 12%|█▏        | 12036/100000 [3:57:54<27:33:32,  1.13s/it]

118031 episode score is 763.66


 12%|█▏        | 12037/100000 [3:57:55<28:02:21,  1.15s/it]

118042 episode score is 726.74


 12%|█▏        | 12038/100000 [3:57:56<27:42:35,  1.13s/it]

118052 episode score is 759.68


 12%|█▏        | 12039/100000 [3:57:58<28:02:28,  1.15s/it]

118062 episode score is 811.47


 12%|█▏        | 12040/100000 [3:57:59<27:59:34,  1.15s/it]

118072 episode score is 772.64


 12%|█▏        | 12041/100000 [3:58:00<27:40:09,  1.13s/it]

118082 episode score is 754.11


 12%|█▏        | 12042/100000 [3:58:01<27:27:21,  1.12s/it]

118092 episode score is 760.14


 12%|█▏        | 12043/100000 [3:58:02<27:31:44,  1.13s/it]

118102 episode score is 775.30


 12%|█▏        | 12044/100000 [3:58:03<27:35:39,  1.13s/it]

118112 episode score is 780.64


 12%|█▏        | 12045/100000 [3:58:04<27:36:48,  1.13s/it]

118122 episode score is 773.80


 12%|█▏        | 12046/100000 [3:58:05<27:54:34,  1.14s/it]

118132 episode score is 803.36


 12%|█▏        | 12047/100000 [3:58:07<27:56:29,  1.14s/it]

118142 episode score is 790.09


 12%|█▏        | 12048/100000 [3:58:08<28:13:48,  1.16s/it]

118152 episode score is 806.31


 12%|█▏        | 12049/100000 [3:58:09<28:15:17,  1.16s/it]

118162 episode score is 791.31


 12%|█▏        | 12050/100000 [3:58:10<27:59:26,  1.15s/it]

118172 episode score is 765.82


 12%|█▏        | 12051/100000 [3:58:11<27:49:49,  1.14s/it]

118182 episode score is 763.47


 12%|█▏        | 12052/100000 [3:58:12<27:37:01,  1.13s/it]

118192 episode score is 756.79


 12%|█▏        | 12053/100000 [3:58:13<27:36:22,  1.13s/it]

118202 episode score is 782.75


 12%|█▏        | 12054/100000 [3:58:15<27:14:08,  1.11s/it]

118212 episode score is 746.65


 12%|█▏        | 12055/100000 [3:58:16<27:33:50,  1.13s/it]

118223 episode score is 720.32


 12%|█▏        | 12056/100000 [3:58:17<27:38:36,  1.13s/it]

118233 episode score is 792.61


 12%|█▏        | 12057/100000 [3:58:18<27:30:58,  1.13s/it]

118243 episode score is 756.79


 12%|█▏        | 12058/100000 [3:58:19<27:29:22,  1.13s/it]

118253 episode score is 761.33


 12%|█▏        | 12059/100000 [3:58:20<27:12:12,  1.11s/it]

118263 episode score is 750.03


 12%|█▏        | 12060/100000 [3:58:21<27:40:33,  1.13s/it]

118274 episode score is 737.61


 12%|█▏        | 12061/100000 [3:58:22<27:33:36,  1.13s/it]

118284 episode score is 770.34


 12%|█▏        | 12062/100000 [3:58:24<27:58:26,  1.15s/it]

118295 episode score is 728.85


 12%|█▏        | 12063/100000 [3:58:25<27:33:18,  1.13s/it]

118305 episode score is 739.53


 12%|█▏        | 12064/100000 [3:58:26<27:33:19,  1.13s/it]

118316 episode score is 692.17


 12%|█▏        | 12065/100000 [3:58:27<27:22:55,  1.12s/it]

118326 episode score is 762.95


 12%|█▏        | 12066/100000 [3:58:28<27:29:45,  1.13s/it]

118336 episode score is 794.52


 12%|█▏        | 12067/100000 [3:58:29<27:28:26,  1.12s/it]

118346 episode score is 766.46


 12%|█▏        | 12068/100000 [3:58:30<27:10:39,  1.11s/it]

118356 episode score is 745.61


 12%|█▏        | 12069/100000 [3:58:31<27:05:15,  1.11s/it]

118366 episode score is 765.19


 12%|█▏        | 12070/100000 [3:58:32<27:02:25,  1.11s/it]

118376 episode score is 755.86


 12%|█▏        | 12071/100000 [3:58:34<27:10:41,  1.11s/it]

118386 episode score is 773.33


 12%|█▏        | 12072/100000 [3:58:35<27:21:24,  1.12s/it]

118396 episode score is 785.19


 12%|█▏        | 12073/100000 [3:58:36<27:38:23,  1.13s/it]

118406 episode score is 795.67


 12%|█▏        | 12074/100000 [3:58:37<27:38:11,  1.13s/it]

118416 episode score is 784.98


 12%|█▏        | 12075/100000 [3:58:38<27:56:42,  1.14s/it]

118426 episode score is 818.03


 12%|█▏        | 12076/100000 [3:58:39<27:50:29,  1.14s/it]

118436 episode score is 772.91


 12%|█▏        | 12077/100000 [3:58:40<27:49:38,  1.14s/it]

118446 episode score is 787.86


 12%|█▏        | 12078/100000 [3:58:42<27:27:33,  1.12s/it]

118456 episode score is 758.18


 12%|█▏        | 12079/100000 [3:58:43<27:32:11,  1.13s/it]

118466 episode score is 784.47


 12%|█▏        | 12080/100000 [3:58:44<27:31:51,  1.13s/it]

118476 episode score is 785.23


 12%|█▏        | 12081/100000 [3:58:45<27:51:41,  1.14s/it]

118486 episode score is 822.55


 12%|█▏        | 12082/100000 [3:58:46<28:09:08,  1.15s/it]

118496 episode score is 823.78


 12%|█▏        | 12083/100000 [3:58:47<27:48:55,  1.14s/it]

118505 episode score is 840.08


 12%|█▏        | 12084/100000 [3:58:48<28:05:40,  1.15s/it]

118515 episode score is 819.30


 12%|█▏        | 12085/100000 [3:58:50<28:24:42,  1.16s/it]

118525 episode score is 824.06


 12%|█▏        | 12086/100000 [3:58:51<28:06:56,  1.15s/it]

118535 episode score is 777.77


 12%|█▏        | 12087/100000 [3:58:52<28:11:54,  1.15s/it]

118545 episode score is 804.73


 12%|█▏        | 12088/100000 [3:58:53<27:48:53,  1.14s/it]

118555 episode score is 760.28


 12%|█▏        | 12089/100000 [3:58:54<27:59:22,  1.15s/it]

118565 episode score is 820.85


 12%|█▏        | 12090/100000 [3:58:55<28:08:36,  1.15s/it]

118575 episode score is 819.82


 12%|█▏        | 12091/100000 [3:58:57<28:16:44,  1.16s/it]

118585 episode score is 823.19


 12%|█▏        | 12092/100000 [3:58:58<28:21:55,  1.16s/it]

118595 episode score is 818.19


 12%|█▏        | 12093/100000 [3:58:59<28:11:02,  1.15s/it]

118605 episode score is 763.97


 12%|█▏        | 12094/100000 [3:59:00<28:58:59,  1.19s/it]

118615 episode score is 801.42


 12%|█▏        | 12095/100000 [3:59:01<28:52:12,  1.18s/it]

118625 episode score is 805.81


 12%|█▏        | 12096/100000 [3:59:02<28:09:34,  1.15s/it]

118634 episode score is 841.20


 12%|█▏        | 12097/100000 [3:59:04<28:01:58,  1.15s/it]

118644 episode score is 785.11


 12%|█▏        | 12098/100000 [3:59:05<27:31:22,  1.13s/it]

118653 episode score is 847.56


 12%|█▏        | 12099/100000 [3:59:06<27:58:13,  1.15s/it]

118663 episode score is 829.75
118673 episode score is 798.57


 12%|█▏        | 12100/100000 [3:59:08<35:41:58,  1.46s/it]

Iteration 12100: Average test reward: 830.90


 12%|█▏        | 12101/100000 [3:59:09<33:36:23,  1.38s/it]

118683 episode score is 813.92


 12%|█▏        | 12102/100000 [3:59:10<31:50:05,  1.30s/it]

118693 episode score is 781.48


 12%|█▏        | 12103/100000 [3:59:11<30:34:30,  1.25s/it]

118703 episode score is 787.67


 12%|█▏        | 12104/100000 [3:59:13<29:59:34,  1.23s/it]

118713 episode score is 817.17


 12%|█▏        | 12105/100000 [3:59:14<29:06:51,  1.19s/it]

118723 episode score is 760.14


 12%|█▏        | 12106/100000 [3:59:15<28:26:45,  1.17s/it]

118733 episode score is 756.09


 12%|█▏        | 12107/100000 [3:59:16<28:12:52,  1.16s/it]

118743 episode score is 784.43


 12%|█▏        | 12108/100000 [3:59:17<27:50:28,  1.14s/it]

118753 episode score is 753.16


 12%|█▏        | 12109/100000 [3:59:18<28:05:53,  1.15s/it]

118764 episode score is 717.83


 12%|█▏        | 12110/100000 [3:59:19<27:48:16,  1.14s/it]

118774 episode score is 761.43


 12%|█▏        | 12111/100000 [3:59:20<27:25:19,  1.12s/it]

118784 episode score is 743.15


 12%|█▏        | 12112/100000 [3:59:22<27:11:23,  1.11s/it]

118794 episode score is 757.67


 12%|█▏        | 12113/100000 [3:59:23<27:17:13,  1.12s/it]

118804 episode score is 776.08


 12%|█▏        | 12114/100000 [3:59:24<27:01:18,  1.11s/it]

118814 episode score is 745.03


 12%|█▏        | 12115/100000 [3:59:25<27:14:40,  1.12s/it]

118825 episode score is 678.54


 12%|█▏        | 12116/100000 [3:59:26<26:57:06,  1.10s/it]

118835 episode score is 745.61


 12%|█▏        | 12117/100000 [3:59:27<26:46:53,  1.10s/it]

118845 episode score is 743.77


 12%|█▏        | 12118/100000 [3:59:28<27:11:15,  1.11s/it]

118855 episode score is 805.71


 12%|█▏        | 12119/100000 [3:59:29<27:31:40,  1.13s/it]

118865 episode score is 799.49


 12%|█▏        | 12120/100000 [3:59:30<27:48:10,  1.14s/it]

118875 episode score is 812.25


 12%|█▏        | 12121/100000 [3:59:32<27:56:18,  1.14s/it]

118885 episode score is 808.55


 12%|█▏        | 12122/100000 [3:59:33<27:58:47,  1.15s/it]

118895 episode score is 792.36


 12%|█▏        | 12123/100000 [3:59:34<27:55:06,  1.14s/it]

118905 episode score is 792.26


 12%|█▏        | 12124/100000 [3:59:35<27:27:06,  1.12s/it]

118914 episode score is 839.79


 12%|█▏        | 12125/100000 [3:59:36<27:14:58,  1.12s/it]

118924 episode score is 757.88


 12%|█▏        | 12126/100000 [3:59:37<27:35:38,  1.13s/it]

118934 episode score is 804.37


 12%|█▏        | 12127/100000 [3:59:38<27:16:37,  1.12s/it]

118943 episode score is 840.15


 12%|█▏        | 12128/100000 [3:59:40<27:38:28,  1.13s/it]

118953 episode score is 786.78


 12%|█▏        | 12129/100000 [3:59:41<27:50:25,  1.14s/it]

118963 episode score is 803.32


 12%|█▏        | 12130/100000 [3:59:42<27:51:25,  1.14s/it]

118973 episode score is 777.39


 12%|█▏        | 12131/100000 [3:59:43<28:14:30,  1.16s/it]

118983 episode score is 823.13


 12%|█▏        | 12132/100000 [3:59:44<28:31:10,  1.17s/it]

118994 episode score is 725.19


 12%|█▏        | 12133/100000 [3:59:45<27:59:27,  1.15s/it]

119004 episode score is 744.34


 12%|█▏        | 12134/100000 [3:59:46<27:34:57,  1.13s/it]

119014 episode score is 733.02


 12%|█▏        | 12135/100000 [3:59:48<28:05:43,  1.15s/it]

119024 episode score is 813.39


 12%|█▏        | 12136/100000 [3:59:49<28:15:37,  1.16s/it]

119034 episode score is 808.98


 12%|█▏        | 12137/100000 [3:59:50<28:28:56,  1.17s/it]

119044 episode score is 830.37


 12%|█▏        | 12138/100000 [3:59:51<28:22:15,  1.16s/it]

119054 episode score is 792.52


 12%|█▏        | 12139/100000 [3:59:52<28:05:22,  1.15s/it]

119064 episode score is 768.33


 12%|█▏        | 12140/100000 [3:59:53<27:55:36,  1.14s/it]

119074 episode score is 768.25


 12%|█▏        | 12141/100000 [3:59:55<28:03:44,  1.15s/it]

119084 episode score is 797.77


 12%|█▏        | 12142/100000 [3:59:56<28:21:59,  1.16s/it]

119094 episode score is 816.70


 12%|█▏        | 12143/100000 [3:59:57<28:02:27,  1.15s/it]

119103 episode score is 876.80


 12%|█▏        | 12144/100000 [3:59:58<28:15:54,  1.16s/it]

119113 episode score is 828.81


 12%|█▏        | 12145/100000 [3:59:59<27:42:05,  1.14s/it]

119122 episode score is 846.02


 12%|█▏        | 12146/100000 [4:00:00<28:00:39,  1.15s/it]

119132 episode score is 815.43


 12%|█▏        | 12147/100000 [4:00:01<28:18:17,  1.16s/it]

119142 episode score is 830.93


 12%|█▏        | 12148/100000 [4:00:03<28:28:25,  1.17s/it]

119152 episode score is 806.81


 12%|█▏        | 12149/100000 [4:00:04<28:18:11,  1.16s/it]

119162 episode score is 789.45


 12%|█▏        | 12150/100000 [4:00:05<28:19:52,  1.16s/it]

119172 episode score is 816.34


 12%|█▏        | 12151/100000 [4:00:06<28:12:31,  1.16s/it]

119182 episode score is 806.19


 12%|█▏        | 12152/100000 [4:00:07<28:53:39,  1.18s/it]

119192 episode score is 802.04


 12%|█▏        | 12153/100000 [4:00:08<28:26:55,  1.17s/it]

119202 episode score is 780.83


 12%|█▏        | 12154/100000 [4:00:10<28:24:41,  1.16s/it]

119213 episode score is 711.73


 12%|█▏        | 12155/100000 [4:00:11<27:51:06,  1.14s/it]

119223 episode score is 757.20


 12%|█▏        | 12156/100000 [4:00:12<27:53:15,  1.14s/it]

119233 episode score is 776.78


 12%|█▏        | 12157/100000 [4:00:13<27:52:30,  1.14s/it]

119243 episode score is 791.01


 12%|█▏        | 12158/100000 [4:00:14<28:08:07,  1.15s/it]

119254 episode score is 724.13


 12%|█▏        | 12159/100000 [4:00:15<27:58:54,  1.15s/it]

119264 episode score is 793.35


 12%|█▏        | 12160/100000 [4:00:16<27:46:56,  1.14s/it]

119274 episode score is 774.51


 12%|█▏        | 12161/100000 [4:00:18<27:44:21,  1.14s/it]

119284 episode score is 769.34


 12%|█▏        | 12162/100000 [4:00:19<28:10:19,  1.15s/it]

119295 episode score is 734.65


 12%|█▏        | 12163/100000 [4:00:20<28:21:15,  1.16s/it]

119306 episode score is 728.62


 12%|█▏        | 12164/100000 [4:00:21<28:12:24,  1.16s/it]

119317 episode score is 694.84


 12%|█▏        | 12165/100000 [4:00:22<28:11:38,  1.16s/it]

119328 episode score is 717.64


 12%|█▏        | 12166/100000 [4:00:23<28:24:00,  1.16s/it]

119339 episode score is 738.29


 12%|█▏        | 12167/100000 [4:00:25<28:28:49,  1.17s/it]

119350 episode score is 733.02


 12%|█▏        | 12168/100000 [4:00:26<28:05:38,  1.15s/it]

119360 episode score is 776.58


 12%|█▏        | 12169/100000 [4:00:27<27:36:55,  1.13s/it]

119370 episode score is 735.16


 12%|█▏        | 12170/100000 [4:00:28<27:19:18,  1.12s/it]

119380 episode score is 735.04


 12%|█▏        | 12171/100000 [4:00:29<27:38:38,  1.13s/it]

119390 episode score is 795.52


 12%|█▏        | 12172/100000 [4:00:30<27:50:14,  1.14s/it]

119400 episode score is 784.75


 12%|█▏        | 12173/100000 [4:00:31<27:38:32,  1.13s/it]

119410 episode score is 758.01


 12%|█▏        | 12174/100000 [4:00:33<28:00:33,  1.15s/it]

119420 episode score is 824.74


 12%|█▏        | 12175/100000 [4:00:34<28:02:03,  1.15s/it]

119430 episode score is 802.88


 12%|█▏        | 12176/100000 [4:00:35<27:42:55,  1.14s/it]

119440 episode score is 745.52


 12%|█▏        | 12177/100000 [4:00:36<27:24:59,  1.12s/it]

119450 episode score is 751.96


 12%|█▏        | 12178/100000 [4:00:37<27:01:15,  1.11s/it]

119459 episode score is 832.69


 12%|█▏        | 12179/100000 [4:00:38<27:08:34,  1.11s/it]

119469 episode score is 780.85


 12%|█▏        | 12180/100000 [4:00:39<27:03:28,  1.11s/it]

119479 episode score is 740.41


 12%|█▏        | 12181/100000 [4:00:40<27:07:19,  1.11s/it]

119489 episode score is 742.15


 12%|█▏        | 12182/100000 [4:00:41<27:13:51,  1.12s/it]

119499 episode score is 774.30


 12%|█▏        | 12183/100000 [4:00:43<27:18:23,  1.12s/it]

119509 episode score is 768.88


 12%|█▏        | 12184/100000 [4:00:44<27:06:14,  1.11s/it]

119519 episode score is 740.03


 12%|█▏        | 12185/100000 [4:00:45<27:16:09,  1.12s/it]

119529 episode score is 789.73


 12%|█▏        | 12186/100000 [4:00:46<27:30:24,  1.13s/it]

119540 episode score is 701.00


 12%|█▏        | 12187/100000 [4:00:47<27:44:35,  1.14s/it]

119550 episode score is 805.27


 12%|█▏        | 12188/100000 [4:00:48<27:40:08,  1.13s/it]

119560 episode score is 758.42


 12%|█▏        | 12189/100000 [4:00:49<27:54:12,  1.14s/it]

119570 episode score is 809.10


 12%|█▏        | 12190/100000 [4:00:50<27:34:59,  1.13s/it]

119580 episode score is 760.15


 12%|█▏        | 12191/100000 [4:00:52<27:35:52,  1.13s/it]

119590 episode score is 788.12


 12%|█▏        | 12192/100000 [4:00:53<27:24:57,  1.12s/it]

119601 episode score is 665.70


 12%|█▏        | 12193/100000 [4:00:54<27:29:03,  1.13s/it]

119611 episode score is 783.77


 12%|█▏        | 12194/100000 [4:00:55<27:29:14,  1.13s/it]

119621 episode score is 773.85


 12%|█▏        | 12195/100000 [4:00:56<27:41:24,  1.14s/it]

119631 episode score is 812.64


 12%|█▏        | 12196/100000 [4:00:57<28:02:07,  1.15s/it]

119642 episode score is 732.15


 12%|█▏        | 12197/100000 [4:00:59<28:26:36,  1.17s/it]

119652 episode score is 769.19


 12%|█▏        | 12198/100000 [4:01:00<28:22:47,  1.16s/it]

119663 episode score is 694.98


 12%|█▏        | 12199/100000 [4:01:01<28:26:57,  1.17s/it]

119673 episode score is 822.48
119683 episode score is 772.68


 12%|█▏        | 12200/100000 [4:01:03<35:44:39,  1.47s/it]

Iteration 12200: Average test reward: 788.63


 12%|█▏        | 12201/100000 [4:01:04<32:54:39,  1.35s/it]

119692 episode score is 835.54


 12%|█▏        | 12202/100000 [4:01:05<31:14:42,  1.28s/it]

119702 episode score is 777.63


 12%|█▏        | 12203/100000 [4:01:06<30:32:39,  1.25s/it]

119712 episode score is 819.36


 12%|█▏        | 12204/100000 [4:01:08<29:40:00,  1.22s/it]

119722 episode score is 769.41


 12%|█▏        | 12205/100000 [4:01:09<28:42:36,  1.18s/it]

119731 episode score is 840.27


 12%|█▏        | 12206/100000 [4:01:10<28:48:08,  1.18s/it]

119741 episode score is 816.35


 12%|█▏        | 12207/100000 [4:01:11<28:46:46,  1.18s/it]

119751 episode score is 802.21


 12%|█▏        | 12208/100000 [4:01:12<28:46:41,  1.18s/it]

119761 episode score is 802.67


 12%|█▏        | 12209/100000 [4:01:13<28:04:57,  1.15s/it]

119770 episode score is 824.04


 12%|█▏        | 12210/100000 [4:01:14<27:41:26,  1.14s/it]

119779 episode score is 842.24


 12%|█▏        | 12211/100000 [4:01:16<28:11:03,  1.16s/it]

119789 episode score is 819.67


 12%|█▏        | 12212/100000 [4:01:17<27:51:33,  1.14s/it]

119799 episode score is 748.93


 12%|█▏        | 12213/100000 [4:01:18<28:00:03,  1.15s/it]

119809 episode score is 770.80


 12%|█▏        | 12214/100000 [4:01:19<27:43:36,  1.14s/it]

119819 episode score is 745.82


 12%|█▏        | 12215/100000 [4:01:20<27:31:44,  1.13s/it]

119829 episode score is 751.17


 12%|█▏        | 12216/100000 [4:01:21<27:27:41,  1.13s/it]

119839 episode score is 761.93


 12%|█▏        | 12217/100000 [4:01:22<27:50:18,  1.14s/it]

119849 episode score is 811.21


 12%|█▏        | 12218/100000 [4:01:23<27:37:25,  1.13s/it]

119859 episode score is 757.00


 12%|█▏        | 12219/100000 [4:01:25<27:35:49,  1.13s/it]

119869 episode score is 757.77


 12%|█▏        | 12220/100000 [4:01:26<27:50:35,  1.14s/it]

119879 episode score is 795.38


 12%|█▏        | 12221/100000 [4:01:27<27:56:29,  1.15s/it]

119889 episode score is 797.49


 12%|█▏        | 12222/100000 [4:01:28<27:58:10,  1.15s/it]

119899 episode score is 783.21


 12%|█▏        | 12223/100000 [4:01:29<28:03:54,  1.15s/it]

119909 episode score is 795.73


 12%|█▏        | 12224/100000 [4:01:30<28:10:14,  1.16s/it]

119919 episode score is 801.62


 12%|█▏        | 12225/100000 [4:01:32<28:11:32,  1.16s/it]

119929 episode score is 791.02


 12%|█▏        | 12226/100000 [4:01:33<28:06:50,  1.15s/it]

119940 episode score is 695.30


 12%|█▏        | 12227/100000 [4:01:34<28:05:04,  1.15s/it]

119950 episode score is 790.56


 12%|█▏        | 12228/100000 [4:01:35<28:00:37,  1.15s/it]

119960 episode score is 794.43


 12%|█▏        | 12229/100000 [4:01:36<28:01:24,  1.15s/it]

119971 episode score is 702.80


 12%|█▏        | 12230/100000 [4:01:37<27:40:02,  1.13s/it]

119981 episode score is 757.67


 12%|█▏        | 12231/100000 [4:01:38<27:48:07,  1.14s/it]

119991 episode score is 806.07


 12%|█▏        | 12232/100000 [4:01:40<27:55:17,  1.15s/it]

120001 episode score is 809.45


 12%|█▏        | 12233/100000 [4:01:41<27:31:01,  1.13s/it]

120011 episode score is 732.28


 12%|█▏        | 12234/100000 [4:01:42<27:18:50,  1.12s/it]

120021 episode score is 751.53


 12%|█▏        | 12235/100000 [4:01:43<27:47:20,  1.14s/it]

120032 episode score is 719.30


 12%|█▏        | 12236/100000 [4:01:44<27:53:52,  1.14s/it]

120042 episode score is 793.35


 12%|█▏        | 12237/100000 [4:01:45<27:39:14,  1.13s/it]

120052 episode score is 758.89


 12%|█▏        | 12238/100000 [4:01:46<27:36:05,  1.13s/it]

120062 episode score is 776.26


 12%|█▏        | 12239/100000 [4:01:47<27:42:59,  1.14s/it]

120072 episode score is 786.28


 12%|█▏        | 12240/100000 [4:01:49<27:38:51,  1.13s/it]

120082 episode score is 770.52


 12%|█▏        | 12241/100000 [4:01:50<27:56:14,  1.15s/it]

120092 episode score is 805.60


 12%|█▏        | 12242/100000 [4:01:51<28:01:21,  1.15s/it]

120102 episode score is 791.77


 12%|█▏        | 12243/100000 [4:01:52<28:09:18,  1.15s/it]

120112 episode score is 811.03


 12%|█▏        | 12244/100000 [4:01:53<27:46:17,  1.14s/it]

120122 episode score is 744.27


 12%|█▏        | 12245/100000 [4:01:54<27:43:58,  1.14s/it]

120132 episode score is 792.94


 12%|█▏        | 12246/100000 [4:01:55<27:37:34,  1.13s/it]

120142 episode score is 781.46


 12%|█▏        | 12247/100000 [4:01:57<27:40:58,  1.14s/it]

120153 episode score is 692.83


 12%|█▏        | 12248/100000 [4:01:58<27:22:18,  1.12s/it]

120163 episode score is 755.52


 12%|█▏        | 12249/100000 [4:01:59<27:48:08,  1.14s/it]

120174 episode score is 730.69


 12%|█▏        | 12250/100000 [4:02:00<27:33:23,  1.13s/it]

120184 episode score is 744.49


 12%|█▏        | 12251/100000 [4:02:01<27:57:34,  1.15s/it]

120195 episode score is 732.61


 12%|█▏        | 12252/100000 [4:02:02<27:48:39,  1.14s/it]

120205 episode score is 755.49


 12%|█▏        | 12253/100000 [4:02:03<28:01:50,  1.15s/it]

120216 episode score is 709.48


 12%|█▏        | 12254/100000 [4:02:05<27:44:42,  1.14s/it]

120226 episode score is 771.53


 12%|█▏        | 12255/100000 [4:02:06<27:51:43,  1.14s/it]

120236 episode score is 807.24


 12%|█▏        | 12256/100000 [4:02:07<28:26:59,  1.17s/it]

120246 episode score is 782.87


 12%|█▏        | 12257/100000 [4:02:08<27:51:29,  1.14s/it]

120256 episode score is 744.38


 12%|█▏        | 12258/100000 [4:02:09<27:32:39,  1.13s/it]

120266 episode score is 759.79


 12%|█▏        | 12259/100000 [4:02:10<27:31:17,  1.13s/it]

120276 episode score is 775.61


 12%|█▏        | 12260/100000 [4:02:11<27:53:15,  1.14s/it]

120286 episode score is 819.55


 12%|█▏        | 12261/100000 [4:02:13<27:50:20,  1.14s/it]

120296 episode score is 780.00


 12%|█▏        | 12262/100000 [4:02:14<27:49:17,  1.14s/it]

120306 episode score is 790.22


 12%|█▏        | 12263/100000 [4:02:15<27:57:17,  1.15s/it]

120316 episode score is 802.92


 12%|█▏        | 12264/100000 [4:02:16<27:53:18,  1.14s/it]

120326 episode score is 788.52


 12%|█▏        | 12265/100000 [4:02:17<28:07:55,  1.15s/it]

120336 episode score is 809.14


 12%|█▏        | 12266/100000 [4:02:18<28:08:31,  1.15s/it]

120346 episode score is 795.28


 12%|█▏        | 12267/100000 [4:02:19<27:53:45,  1.14s/it]

120356 episode score is 761.97


 12%|█▏        | 12268/100000 [4:02:21<27:51:54,  1.14s/it]

120366 episode score is 785.52


 12%|█▏        | 12269/100000 [4:02:22<27:58:46,  1.15s/it]

120376 episode score is 796.18


 12%|█▏        | 12270/100000 [4:02:23<28:06:25,  1.15s/it]

120386 episode score is 805.85


 12%|█▏        | 12271/100000 [4:02:24<28:13:38,  1.16s/it]

120396 episode score is 815.47


 12%|█▏        | 12272/100000 [4:02:25<28:20:15,  1.16s/it]

120406 episode score is 808.38


 12%|█▏        | 12273/100000 [4:02:26<28:08:02,  1.15s/it]

120416 episode score is 784.62


 12%|█▏        | 12274/100000 [4:02:28<27:47:00,  1.14s/it]

120426 episode score is 760.28


 12%|█▏        | 12275/100000 [4:02:29<27:46:03,  1.14s/it]

120436 episode score is 783.53


 12%|█▏        | 12276/100000 [4:02:30<27:52:49,  1.14s/it]

120446 episode score is 788.86


 12%|█▏        | 12277/100000 [4:02:31<28:14:29,  1.16s/it]

120456 episode score is 811.15


 12%|█▏        | 12278/100000 [4:02:32<28:22:02,  1.16s/it]

120466 episode score is 813.13


 12%|█▏        | 12279/100000 [4:02:33<28:27:33,  1.17s/it]

120476 episode score is 814.35


 12%|█▏        | 12280/100000 [4:02:35<28:19:23,  1.16s/it]

120486 episode score is 799.87


 12%|█▏        | 12281/100000 [4:02:36<27:58:44,  1.15s/it]

120496 episode score is 773.02


 12%|█▏        | 12282/100000 [4:02:37<27:47:46,  1.14s/it]

120506 episode score is 769.40


 12%|█▏        | 12283/100000 [4:02:38<27:44:24,  1.14s/it]

120516 episode score is 782.76


 12%|█▏        | 12284/100000 [4:02:39<27:57:46,  1.15s/it]

120526 episode score is 815.97


 12%|█▏        | 12285/100000 [4:02:40<27:40:13,  1.14s/it]

120536 episode score is 759.31


 12%|█▏        | 12286/100000 [4:02:41<27:38:13,  1.13s/it]

120546 episode score is 775.79


 12%|█▏        | 12287/100000 [4:02:42<27:55:36,  1.15s/it]

120556 episode score is 808.68


 12%|█▏        | 12288/100000 [4:02:44<28:00:16,  1.15s/it]

120566 episode score is 801.15


 12%|█▏        | 12289/100000 [4:02:45<28:19:36,  1.16s/it]

120577 episode score is 739.26


 12%|█▏        | 12290/100000 [4:02:46<28:11:33,  1.16s/it]

120587 episode score is 792.06


 12%|█▏        | 12291/100000 [4:02:47<28:04:57,  1.15s/it]

120597 episode score is 779.26


 12%|█▏        | 12292/100000 [4:02:48<28:17:09,  1.16s/it]

120607 episode score is 806.67


 12%|█▏        | 12293/100000 [4:02:49<28:07:24,  1.15s/it]

120617 episode score is 788.32


 12%|█▏        | 12294/100000 [4:02:51<27:53:21,  1.14s/it]

120627 episode score is 777.66


 12%|█▏        | 12295/100000 [4:02:52<28:08:06,  1.15s/it]

120637 episode score is 824.03


 12%|█▏        | 12296/100000 [4:02:53<28:06:00,  1.15s/it]

120647 episode score is 799.67


 12%|█▏        | 12297/100000 [4:02:54<27:51:02,  1.14s/it]

120657 episode score is 760.71


 12%|█▏        | 12298/100000 [4:02:55<27:48:02,  1.14s/it]

120667 episode score is 782.11


 12%|█▏        | 12299/100000 [4:02:56<27:37:07,  1.13s/it]

120677 episode score is 783.52
120687 episode score is 752.59


 12%|█▏        | 12300/100000 [4:02:58<34:24:11,  1.41s/it]

Iteration 12300: Average test reward: 725.89


 12%|█▏        | 12301/100000 [4:02:59<32:20:03,  1.33s/it]

120697 episode score is 780.67


 12%|█▏        | 12302/100000 [4:03:01<30:54:02,  1.27s/it]

120707 episode score is 794.88


 12%|█▏        | 12303/100000 [4:03:02<29:50:56,  1.23s/it]

120717 episode score is 780.29


 12%|█▏        | 12304/100000 [4:03:03<29:15:22,  1.20s/it]

120727 episode score is 765.32


 12%|█▏        | 12305/100000 [4:03:04<28:46:26,  1.18s/it]

120737 episode score is 789.67


 12%|█▏        | 12306/100000 [4:03:05<28:35:14,  1.17s/it]

120747 episode score is 800.31


 12%|█▏        | 12307/100000 [4:03:06<28:08:34,  1.16s/it]

120757 episode score is 759.73


 12%|█▏        | 12308/100000 [4:03:07<27:48:02,  1.14s/it]

120767 episode score is 764.47


 12%|█▏        | 12309/100000 [4:03:09<28:12:08,  1.16s/it]

120777 episode score is 839.86


 12%|█▏        | 12310/100000 [4:03:10<28:07:29,  1.15s/it]

120787 episode score is 786.78


 12%|█▏        | 12311/100000 [4:03:11<28:14:30,  1.16s/it]

120797 episode score is 793.39


 12%|█▏        | 12312/100000 [4:03:12<28:23:02,  1.17s/it]

120807 episode score is 811.81


 12%|█▏        | 12313/100000 [4:03:13<28:01:18,  1.15s/it]

120817 episode score is 764.33


 12%|█▏        | 12314/100000 [4:03:14<27:44:27,  1.14s/it]

120827 episode score is 748.83


 12%|█▏        | 12315/100000 [4:03:15<27:46:35,  1.14s/it]

120837 episode score is 796.01


 12%|█▏        | 12316/100000 [4:03:16<27:24:38,  1.13s/it]

120847 episode score is 738.95


 12%|█▏        | 12317/100000 [4:03:18<27:27:00,  1.13s/it]

120857 episode score is 776.44


 12%|█▏        | 12318/100000 [4:03:19<27:46:06,  1.14s/it]

120867 episode score is 798.41


 12%|█▏        | 12319/100000 [4:03:20<27:49:30,  1.14s/it]

120877 episode score is 799.13


 12%|█▏        | 12320/100000 [4:03:21<27:41:28,  1.14s/it]

120887 episode score is 775.59


 12%|█▏        | 12321/100000 [4:03:22<27:25:52,  1.13s/it]

120897 episode score is 760.42


 12%|█▏        | 12322/100000 [4:03:23<27:31:36,  1.13s/it]

120908 episode score is 705.00


 12%|█▏        | 12323/100000 [4:03:24<27:56:31,  1.15s/it]

120919 episode score is 743.74


 12%|█▏        | 12324/100000 [4:03:26<28:07:43,  1.15s/it]

120930 episode score is 740.30


 12%|█▏        | 12325/100000 [4:03:27<27:39:48,  1.14s/it]

120940 episode score is 762.66


 12%|█▏        | 12326/100000 [4:03:28<27:28:04,  1.13s/it]

120950 episode score is 767.30


 12%|█▏        | 12327/100000 [4:03:29<27:48:35,  1.14s/it]

120961 episode score is 734.49


 12%|█▏        | 12328/100000 [4:03:30<27:27:30,  1.13s/it]

120971 episode score is 761.49


 12%|█▏        | 12329/100000 [4:03:31<27:56:54,  1.15s/it]

120982 episode score is 747.82


 12%|█▏        | 12330/100000 [4:03:32<27:54:41,  1.15s/it]

120992 episode score is 784.76


 12%|█▏        | 12331/100000 [4:03:34<27:30:43,  1.13s/it]

121002 episode score is 760.91


 12%|█▏        | 12332/100000 [4:03:35<27:57:51,  1.15s/it]

121013 episode score is 749.62


 12%|█▏        | 12333/100000 [4:03:36<27:36:35,  1.13s/it]

121023 episode score is 769.66


 12%|█▏        | 12334/100000 [4:03:37<27:58:54,  1.15s/it]

121034 episode score is 745.02


 12%|█▏        | 12335/100000 [4:03:38<27:47:52,  1.14s/it]

121045 episode score is 688.94


 12%|█▏        | 12336/100000 [4:03:39<27:38:40,  1.14s/it]

121056 episode score is 690.10


 12%|█▏        | 12337/100000 [4:03:40<27:59:44,  1.15s/it]

121068 episode score is 652.60


 12%|█▏        | 12338/100000 [4:03:42<27:58:40,  1.15s/it]

121079 episode score is 712.23


 12%|█▏        | 12339/100000 [4:03:43<28:04:26,  1.15s/it]

121090 episode score is 713.15


 12%|█▏        | 12340/100000 [4:03:44<28:15:58,  1.16s/it]

121101 episode score is 739.72


 12%|█▏        | 12341/100000 [4:03:45<28:30:21,  1.17s/it]

121112 episode score is 742.70


 12%|█▏        | 12342/100000 [4:03:46<28:28:27,  1.17s/it]

121123 episode score is 732.55


 12%|█▏        | 12343/100000 [4:03:47<28:31:50,  1.17s/it]

121134 episode score is 720.51


 12%|█▏        | 12344/100000 [4:03:49<28:28:51,  1.17s/it]

121145 episode score is 730.07


 12%|█▏        | 12345/100000 [4:03:50<28:05:59,  1.15s/it]

121155 episode score is 773.62


 12%|█▏        | 12346/100000 [4:03:51<28:16:24,  1.16s/it]

121165 episode score is 751.98


 12%|█▏        | 12347/100000 [4:03:52<28:03:33,  1.15s/it]

121176 episode score is 685.51


 12%|█▏        | 12348/100000 [4:03:53<27:49:37,  1.14s/it]

121186 episode score is 778.19


 12%|█▏        | 12349/100000 [4:03:54<27:25:00,  1.13s/it]

121196 episode score is 759.39


 12%|█▏        | 12350/100000 [4:03:56<27:58:06,  1.15s/it]

121207 episode score is 745.58


 12%|█▏        | 12351/100000 [4:03:57<28:16:47,  1.16s/it]

121218 episode score is 741.14


 12%|█▏        | 12352/100000 [4:03:58<28:01:29,  1.15s/it]

121228 episode score is 785.72


 12%|█▏        | 12353/100000 [4:03:59<27:40:35,  1.14s/it]

121238 episode score is 760.56


 12%|█▏        | 12354/100000 [4:04:00<28:00:25,  1.15s/it]

121249 episode score is 748.43


 12%|█▏        | 12355/100000 [4:04:01<28:07:33,  1.16s/it]

121260 episode score is 726.63


 12%|█▏        | 12356/100000 [4:04:02<28:02:10,  1.15s/it]

121270 episode score is 783.15


 12%|█▏        | 12357/100000 [4:04:04<28:05:18,  1.15s/it]

121280 episode score is 806.07


 12%|█▏        | 12358/100000 [4:04:05<28:02:49,  1.15s/it]

121290 episode score is 798.04


 12%|█▏        | 12359/100000 [4:04:06<27:54:44,  1.15s/it]

121300 episode score is 789.79


 12%|█▏        | 12360/100000 [4:04:07<28:02:01,  1.15s/it]

121310 episode score is 810.63


 12%|█▏        | 12361/100000 [4:04:08<27:33:52,  1.13s/it]

121320 episode score is 753.25


 12%|█▏        | 12362/100000 [4:04:09<27:57:33,  1.15s/it]

121331 episode score is 738.54


 12%|█▏        | 12363/100000 [4:04:10<27:47:52,  1.14s/it]

121341 episode score is 794.79


 12%|█▏        | 12364/100000 [4:04:12<27:35:18,  1.13s/it]

121351 episode score is 773.09


 12%|█▏        | 12365/100000 [4:04:13<27:27:07,  1.13s/it]

121361 episode score is 770.55


 12%|█▏        | 12366/100000 [4:04:14<27:46:55,  1.14s/it]

121372 episode score is 722.36


 12%|█▏        | 12367/100000 [4:04:15<27:46:57,  1.14s/it]

121382 episode score is 802.24


 12%|█▏        | 12368/100000 [4:04:16<27:50:45,  1.14s/it]

121392 episode score is 798.14


 12%|█▏        | 12369/100000 [4:04:17<28:19:55,  1.16s/it]

121402 episode score is 842.25


 12%|█▏        | 12370/100000 [4:04:18<28:01:16,  1.15s/it]

121412 episode score is 777.48


 12%|█▏        | 12371/100000 [4:04:20<27:50:38,  1.14s/it]

121422 episode score is 780.21


 12%|█▏        | 12372/100000 [4:04:21<28:01:02,  1.15s/it]

121432 episode score is 827.25


 12%|█▏        | 12373/100000 [4:04:22<28:26:55,  1.17s/it]

121443 episode score is 753.15


 12%|█▏        | 12374/100000 [4:04:23<28:05:26,  1.15s/it]

121453 episode score is 776.66


 12%|█▏        | 12375/100000 [4:04:24<28:14:46,  1.16s/it]

121464 episode score is 734.51


 12%|█▏        | 12376/100000 [4:04:25<28:21:23,  1.17s/it]

121475 episode score is 733.21


 12%|█▏        | 12377/100000 [4:04:27<28:33:39,  1.17s/it]

121486 episode score is 751.25


 12%|█▏        | 12378/100000 [4:04:28<28:15:35,  1.16s/it]

121496 episode score is 782.93


 12%|█▏        | 12379/100000 [4:04:29<28:15:13,  1.16s/it]

121506 episode score is 809.13


 12%|█▏        | 12380/100000 [4:04:30<27:45:35,  1.14s/it]

121516 episode score is 744.80


 12%|█▏        | 12381/100000 [4:04:31<27:47:35,  1.14s/it]

121526 episode score is 796.12


 12%|█▏        | 12382/100000 [4:04:32<28:00:03,  1.15s/it]

121536 episode score is 800.57


 12%|█▏        | 12383/100000 [4:04:33<27:50:11,  1.14s/it]

121546 episode score is 776.07


 12%|█▏        | 12384/100000 [4:04:35<27:29:16,  1.13s/it]

121556 episode score is 755.62


 12%|█▏        | 12385/100000 [4:04:36<27:56:10,  1.15s/it]

121567 episode score is 734.18


 12%|█▏        | 12386/100000 [4:04:37<27:43:36,  1.14s/it]

121577 episode score is 770.58


 12%|█▏        | 12387/100000 [4:04:38<27:35:58,  1.13s/it]

121587 episode score is 776.19


 12%|█▏        | 12388/100000 [4:04:39<27:17:21,  1.12s/it]

121597 episode score is 744.52


 12%|█▏        | 12389/100000 [4:04:40<27:24:25,  1.13s/it]

121607 episode score is 785.34


 12%|█▏        | 12390/100000 [4:04:41<27:22:37,  1.12s/it]

121617 episode score is 767.94


 12%|█▏        | 12391/100000 [4:04:43<28:02:51,  1.15s/it]

121627 episode score is 769.33


 12%|█▏        | 12392/100000 [4:04:44<27:48:59,  1.14s/it]

121637 episode score is 764.30


 12%|█▏        | 12393/100000 [4:04:45<27:42:17,  1.14s/it]

121647 episode score is 779.55


 12%|█▏        | 12394/100000 [4:04:46<27:51:07,  1.14s/it]

121657 episode score is 809.62


 12%|█▏        | 12395/100000 [4:04:47<28:03:51,  1.15s/it]

121667 episode score is 807.83


 12%|█▏        | 12396/100000 [4:04:48<28:08:41,  1.16s/it]

121677 episode score is 798.40


 12%|█▏        | 12397/100000 [4:04:49<28:28:34,  1.17s/it]

121688 episode score is 738.12


 12%|█▏        | 12398/100000 [4:04:51<27:57:53,  1.15s/it]

121697 episode score is 858.43


 12%|█▏        | 12399/100000 [4:04:52<28:21:17,  1.17s/it]

121707 episode score is 839.13
121717 episode score is 813.73


 12%|█▏        | 12400/100000 [4:04:54<36:02:20,  1.48s/it]

Iteration 12400: Average test reward: 816.96


 12%|█▏        | 12401/100000 [4:04:55<33:15:29,  1.37s/it]

121727 episode score is 762.02


 12%|█▏        | 12402/100000 [4:04:56<31:13:24,  1.28s/it]

121737 episode score is 753.71


 12%|█▏        | 12403/100000 [4:04:57<30:30:52,  1.25s/it]

121747 episode score is 834.01


 12%|█▏        | 12404/100000 [4:04:59<29:38:01,  1.22s/it]

121758 episode score is 689.60


 12%|█▏        | 12405/100000 [4:05:00<29:16:06,  1.20s/it]

121768 episode score is 780.32


 12%|█▏        | 12406/100000 [4:05:01<28:28:19,  1.17s/it]

121778 episode score is 755.20


 12%|█▏        | 12407/100000 [4:05:02<28:17:39,  1.16s/it]

121788 episode score is 779.07


 12%|█▏        | 12408/100000 [4:05:03<28:08:01,  1.16s/it]

121798 episode score is 789.12


 12%|█▏        | 12409/100000 [4:05:04<27:45:40,  1.14s/it]

121808 episode score is 757.05


 12%|█▏        | 12410/100000 [4:05:05<27:22:25,  1.13s/it]

121818 episode score is 745.35


 12%|█▏        | 12411/100000 [4:05:06<27:42:36,  1.14s/it]

121829 episode score is 733.97


 12%|█▏        | 12412/100000 [4:05:08<27:19:46,  1.12s/it]

121839 episode score is 752.32


 12%|█▏        | 12413/100000 [4:05:09<27:09:50,  1.12s/it]

121849 episode score is 761.51


 12%|█▏        | 12414/100000 [4:05:10<27:09:19,  1.12s/it]

121860 episode score is 675.63


 12%|█▏        | 12415/100000 [4:05:11<26:56:40,  1.11s/it]

121870 episode score is 743.58


 12%|█▏        | 12416/100000 [4:05:12<27:07:28,  1.11s/it]

121880 episode score is 776.50


 12%|█▏        | 12417/100000 [4:05:13<27:15:03,  1.12s/it]

121890 episode score is 782.94


 12%|█▏        | 12418/100000 [4:05:14<27:21:55,  1.12s/it]

121900 episode score is 778.50


 12%|█▏        | 12419/100000 [4:05:15<27:15:24,  1.12s/it]

121910 episode score is 752.76


 12%|█▏        | 12420/100000 [4:05:16<27:19:40,  1.12s/it]

121920 episode score is 774.45


 12%|█▏        | 12421/100000 [4:05:18<27:45:43,  1.14s/it]

121931 episode score is 720.72


 12%|█▏        | 12422/100000 [4:05:19<27:39:48,  1.14s/it]

121941 episode score is 777.02


 12%|█▏        | 12423/100000 [4:05:20<27:29:45,  1.13s/it]

121951 episode score is 768.37


 12%|█▏        | 12424/100000 [4:05:21<27:50:10,  1.14s/it]

121962 episode score is 729.02


 12%|█▏        | 12425/100000 [4:05:22<27:47:00,  1.14s/it]

121972 episode score is 776.70


 12%|█▏        | 12426/100000 [4:05:23<27:29:33,  1.13s/it]

121982 episode score is 757.84


 12%|█▏        | 12427/100000 [4:05:24<27:10:17,  1.12s/it]

121992 episode score is 741.14


 12%|█▏        | 12428/100000 [4:05:26<27:14:08,  1.12s/it]

122002 episode score is 769.76


 12%|█▏        | 12429/100000 [4:05:27<27:19:46,  1.12s/it]

122012 episode score is 788.08


 12%|█▏        | 12430/100000 [4:05:28<27:17:00,  1.12s/it]

122022 episode score is 771.89


 12%|█▏        | 12431/100000 [4:05:29<27:19:40,  1.12s/it]

122032 episode score is 778.05


 12%|█▏        | 12432/100000 [4:05:30<27:18:04,  1.12s/it]

122042 episode score is 779.63


 12%|█▏        | 12433/100000 [4:05:31<26:58:53,  1.11s/it]

122052 episode score is 742.80


 12%|█▏        | 12434/100000 [4:05:32<27:01:55,  1.11s/it]

122062 episode score is 746.06


 12%|█▏        | 12435/100000 [4:05:33<27:14:26,  1.12s/it]

122072 episode score is 789.25


 12%|█▏        | 12436/100000 [4:05:34<27:02:19,  1.11s/it]

122082 episode score is 747.09


 12%|█▏        | 12437/100000 [4:05:36<27:22:15,  1.13s/it]

122093 episode score is 710.90


 12%|█▏        | 12438/100000 [4:05:37<27:36:54,  1.14s/it]

122103 episode score is 807.61


 12%|█▏        | 12439/100000 [4:05:38<28:01:31,  1.15s/it]

122114 episode score is 739.68


 12%|█▏        | 12440/100000 [4:05:39<27:47:08,  1.14s/it]

122124 episode score is 773.94


 12%|█▏        | 12441/100000 [4:05:40<27:23:24,  1.13s/it]

122134 episode score is 735.73


 12%|█▏        | 12442/100000 [4:05:41<27:13:35,  1.12s/it]

122144 episode score is 759.44


 12%|█▏        | 12443/100000 [4:05:42<27:58:27,  1.15s/it]

122154 episode score is 767.39


 12%|█▏        | 12444/100000 [4:05:44<27:38:42,  1.14s/it]

122164 episode score is 757.84


 12%|█▏        | 12445/100000 [4:05:45<27:19:35,  1.12s/it]

122174 episode score is 747.25


 12%|█▏        | 12446/100000 [4:05:46<27:16:51,  1.12s/it]

122184 episode score is 769.37


 12%|█▏        | 12447/100000 [4:05:47<27:46:22,  1.14s/it]

122195 episode score is 718.34


 12%|█▏        | 12448/100000 [4:05:48<27:39:04,  1.14s/it]

122205 episode score is 758.30


 12%|█▏        | 12449/100000 [4:05:49<27:42:24,  1.14s/it]

122215 episode score is 789.14


 12%|█▏        | 12450/100000 [4:05:50<27:38:31,  1.14s/it]

122225 episode score is 767.05


 12%|█▏        | 12451/100000 [4:05:52<27:37:56,  1.14s/it]

122235 episode score is 777.92


 12%|█▏        | 12452/100000 [4:05:53<28:06:23,  1.16s/it]

122246 episode score is 730.65


 12%|█▏        | 12453/100000 [4:05:54<27:44:24,  1.14s/it]

122256 episode score is 752.42


 12%|█▏        | 12454/100000 [4:05:55<27:47:09,  1.14s/it]

122266 episode score is 781.87


 12%|█▏        | 12455/100000 [4:05:56<27:52:12,  1.15s/it]

122276 episode score is 787.98


 12%|█▏        | 12456/100000 [4:05:57<27:54:10,  1.15s/it]

122286 episode score is 772.69


 12%|█▏        | 12457/100000 [4:05:58<27:46:37,  1.14s/it]

122296 episode score is 768.59


 12%|█▏        | 12458/100000 [4:06:00<28:02:32,  1.15s/it]

122306 episode score is 811.34


 12%|█▏        | 12459/100000 [4:06:01<28:10:34,  1.16s/it]

122316 episode score is 804.46


 12%|█▏        | 12460/100000 [4:06:02<28:35:11,  1.18s/it]

122327 episode score is 736.99


 12%|█▏        | 12461/100000 [4:06:03<28:53:14,  1.19s/it]

122338 episode score is 746.14


 12%|█▏        | 12462/100000 [4:06:04<28:19:04,  1.16s/it]

122348 episode score is 752.69


 12%|█▏        | 12463/100000 [4:06:05<28:17:28,  1.16s/it]

122359 episode score is 711.32


 12%|█▏        | 12464/100000 [4:06:07<28:08:07,  1.16s/it]

122370 episode score is 695.59


 12%|█▏        | 12465/100000 [4:06:08<27:40:53,  1.14s/it]

122380 episode score is 745.87


 12%|█▏        | 12466/100000 [4:06:09<27:18:57,  1.12s/it]

122390 episode score is 741.52


 12%|█▏        | 12467/100000 [4:06:10<27:34:25,  1.13s/it]

122401 episode score is 708.60


 12%|█▏        | 12468/100000 [4:06:11<27:24:56,  1.13s/it]

122411 episode score is 757.38


 12%|█▏        | 12469/100000 [4:06:12<27:24:15,  1.13s/it]

122421 episode score is 771.08


 12%|█▏        | 12470/100000 [4:06:13<27:13:14,  1.12s/it]

122431 episode score is 748.95


 12%|█▏        | 12471/100000 [4:06:14<27:20:46,  1.12s/it]

122442 episode score is 697.41


 12%|█▏        | 12472/100000 [4:06:16<27:15:35,  1.12s/it]

122453 episode score is 664.40


 12%|█▏        | 12473/100000 [4:06:17<27:25:50,  1.13s/it]

122463 episode score is 787.29


 12%|█▏        | 12474/100000 [4:06:18<27:58:21,  1.15s/it]

122474 episode score is 730.12


 12%|█▏        | 12475/100000 [4:06:19<27:43:51,  1.14s/it]

122484 episode score is 761.96


 12%|█▏        | 12476/100000 [4:06:20<27:22:02,  1.13s/it]

122494 episode score is 742.94


 12%|█▏        | 12477/100000 [4:06:21<27:42:30,  1.14s/it]

122505 episode score is 714.00


 12%|█▏        | 12478/100000 [4:06:22<27:24:23,  1.13s/it]

122515 episode score is 738.13


 12%|█▏        | 12479/100000 [4:06:23<27:14:01,  1.12s/it]

122525 episode score is 754.51


 12%|█▏        | 12480/100000 [4:06:25<27:44:25,  1.14s/it]

122536 episode score is 739.72


 12%|█▏        | 12481/100000 [4:06:26<27:35:22,  1.13s/it]

122546 episode score is 769.65


 12%|█▏        | 12482/100000 [4:06:27<27:48:00,  1.14s/it]

122557 episode score is 714.95


 12%|█▏        | 12483/100000 [4:06:28<27:37:27,  1.14s/it]

122567 episode score is 761.90


 12%|█▏        | 12484/100000 [4:06:29<27:23:49,  1.13s/it]

122577 episode score is 756.79


 12%|█▏        | 12485/100000 [4:06:30<27:48:12,  1.14s/it]

122588 episode score is 737.29


 12%|█▏        | 12486/100000 [4:06:31<27:38:25,  1.14s/it]

122598 episode score is 780.94


 12%|█▏        | 12487/100000 [4:06:33<27:23:36,  1.13s/it]

122608 episode score is 751.11


 12%|█▏        | 12488/100000 [4:06:34<27:10:47,  1.12s/it]

122618 episode score is 747.71


 12%|█▏        | 12489/100000 [4:06:35<28:19:25,  1.17s/it]

122629 episode score is 726.59


 12%|█▏        | 12490/100000 [4:06:36<27:49:29,  1.14s/it]

122639 episode score is 755.83


 12%|█▏        | 12491/100000 [4:06:37<27:54:25,  1.15s/it]

122649 episode score is 798.86


 12%|█▏        | 12492/100000 [4:06:38<28:02:21,  1.15s/it]

122660 episode score is 722.27


 12%|█▏        | 12493/100000 [4:06:40<28:04:01,  1.15s/it]

122670 episode score is 794.07


 12%|█▏        | 12494/100000 [4:06:41<27:58:51,  1.15s/it]

122680 episode score is 788.05


 12%|█▏        | 12495/100000 [4:06:42<28:16:54,  1.16s/it]

122690 episode score is 818.99


 12%|█▏        | 12496/100000 [4:06:43<28:21:04,  1.17s/it]

122700 episode score is 787.37


 12%|█▏        | 12497/100000 [4:06:44<27:49:25,  1.14s/it]

122709 episode score is 832.91


 12%|█▏        | 12498/100000 [4:06:45<28:02:39,  1.15s/it]

122719 episode score is 812.15


 12%|█▏        | 12499/100000 [4:06:46<27:41:46,  1.14s/it]

122728 episode score is 863.24
122738 episode score is 792.76


 12%|█▎        | 12500/100000 [4:06:49<35:43:49,  1.47s/it]

Iteration 12500: Average test reward: 797.91


 13%|█▎        | 12501/100000 [4:06:50<33:13:36,  1.37s/it]

122747 episode score is 860.93


 13%|█▎        | 12502/100000 [4:06:51<31:14:02,  1.29s/it]

122756 episode score is 829.74


 13%|█▎        | 12503/100000 [4:06:52<30:33:49,  1.26s/it]

122766 episode score is 816.25


 13%|█▎        | 12504/100000 [4:06:53<29:42:56,  1.22s/it]

122776 episode score is 769.09


 13%|█▎        | 12505/100000 [4:06:54<29:20:50,  1.21s/it]

122786 episode score is 806.90


 13%|█▎        | 12506/100000 [4:06:56<29:02:03,  1.19s/it]

122797 episode score is 698.41


 13%|█▎        | 12507/100000 [4:06:57<28:45:56,  1.18s/it]

122807 episode score is 796.74


 13%|█▎        | 12508/100000 [4:06:58<28:03:41,  1.15s/it]

122817 episode score is 733.56


 13%|█▎        | 12509/100000 [4:06:59<28:09:12,  1.16s/it]

122827 episode score is 807.04


 13%|█▎        | 12510/100000 [4:07:00<28:24:39,  1.17s/it]

122837 episode score is 808.65


 13%|█▎        | 12511/100000 [4:07:01<28:14:21,  1.16s/it]

122847 episode score is 793.24


 13%|█▎        | 12512/100000 [4:07:02<28:02:24,  1.15s/it]

122857 episode score is 783.94


 13%|█▎        | 12513/100000 [4:07:04<27:53:12,  1.15s/it]

122867 episode score is 789.23


 13%|█▎        | 12514/100000 [4:07:05<27:29:33,  1.13s/it]

122877 episode score is 743.05


 13%|█▎        | 12515/100000 [4:07:06<27:25:50,  1.13s/it]

122887 episode score is 769.56


 13%|█▎        | 12516/100000 [4:07:07<27:10:11,  1.12s/it]

122896 episode score is 846.55


 13%|█▎        | 12517/100000 [4:07:08<27:18:50,  1.12s/it]

122906 episode score is 778.95


 13%|█▎        | 12518/100000 [4:07:09<27:22:12,  1.13s/it]

122916 episode score is 778.53


 13%|█▎        | 12519/100000 [4:07:10<27:31:14,  1.13s/it]

122926 episode score is 804.37


 13%|█▎        | 12520/100000 [4:07:11<27:20:10,  1.12s/it]

122936 episode score is 770.24


 13%|█▎        | 12521/100000 [4:07:12<27:10:32,  1.12s/it]

122946 episode score is 757.41


 13%|█▎        | 12522/100000 [4:07:14<27:36:38,  1.14s/it]

122956 episode score is 819.72


 13%|█▎        | 12523/100000 [4:07:15<27:32:22,  1.13s/it]

122966 episode score is 762.28


 13%|█▎        | 12524/100000 [4:07:16<27:33:26,  1.13s/it]

122976 episode score is 784.52


 13%|█▎        | 12525/100000 [4:07:17<27:15:45,  1.12s/it]

122986 episode score is 732.61


 13%|█▎        | 12526/100000 [4:07:18<27:06:21,  1.12s/it]

122996 episode score is 754.43


 13%|█▎        | 12527/100000 [4:07:19<27:26:39,  1.13s/it]

123006 episode score is 800.40


 13%|█▎        | 12528/100000 [4:07:20<27:24:33,  1.13s/it]

123016 episode score is 770.28


 13%|█▎        | 12529/100000 [4:07:22<27:41:37,  1.14s/it]

123026 episode score is 812.88


 13%|█▎        | 12530/100000 [4:07:23<27:51:10,  1.15s/it]

123036 episode score is 791.05


 13%|█▎        | 12531/100000 [4:07:24<28:12:43,  1.16s/it]

123047 episode score is 730.77


 13%|█▎        | 12532/100000 [4:07:25<27:58:15,  1.15s/it]

123057 episode score is 763.32


 13%|█▎        | 12533/100000 [4:07:26<28:02:38,  1.15s/it]

123067 episode score is 802.53


 13%|█▎        | 12534/100000 [4:07:27<27:47:18,  1.14s/it]

123077 episode score is 747.10


 13%|█▎        | 12535/100000 [4:07:28<27:44:50,  1.14s/it]

123087 episode score is 787.55


 13%|█▎        | 12536/100000 [4:07:30<27:39:27,  1.14s/it]

123097 episode score is 764.83


 13%|█▎        | 12537/100000 [4:07:31<27:37:48,  1.14s/it]

123107 episode score is 765.88


 13%|█▎        | 12538/100000 [4:07:32<27:24:55,  1.13s/it]

123117 episode score is 753.57


 13%|█▎        | 12539/100000 [4:07:33<27:22:18,  1.13s/it]

123127 episode score is 765.89


 13%|█▎        | 12540/100000 [4:07:34<27:13:44,  1.12s/it]

123137 episode score is 756.99


 13%|█▎        | 12541/100000 [4:07:35<27:15:11,  1.12s/it]

123147 episode score is 760.48


 13%|█▎        | 12542/100000 [4:07:36<28:06:44,  1.16s/it]

123157 episode score is 785.82


 13%|█▎        | 12543/100000 [4:07:38<27:36:27,  1.14s/it]

123166 episode score is 838.48


 13%|█▎        | 12544/100000 [4:07:39<27:29:31,  1.13s/it]

123176 episode score is 758.44


 13%|█▎        | 12545/100000 [4:07:40<27:20:14,  1.13s/it]

123185 episode score is 849.78


 13%|█▎        | 12546/100000 [4:07:41<27:26:45,  1.13s/it]

123195 episode score is 770.89


 13%|█▎        | 12547/100000 [4:07:42<27:37:23,  1.14s/it]

123205 episode score is 786.14


 13%|█▎        | 12548/100000 [4:07:43<27:53:23,  1.15s/it]

123215 episode score is 803.93


 13%|█▎        | 12549/100000 [4:07:44<27:59:28,  1.15s/it]

123225 episode score is 796.29


 13%|█▎        | 12550/100000 [4:07:46<28:10:54,  1.16s/it]

123235 episode score is 821.37


 13%|█▎        | 12551/100000 [4:07:47<27:50:26,  1.15s/it]

123245 episode score is 760.52


 13%|█▎        | 12552/100000 [4:07:48<28:06:18,  1.16s/it]

123255 episode score is 809.33


 13%|█▎        | 12553/100000 [4:07:49<27:43:29,  1.14s/it]

123265 episode score is 759.12


 13%|█▎        | 12554/100000 [4:07:50<27:22:48,  1.13s/it]

123275 episode score is 743.23


 13%|█▎        | 12555/100000 [4:07:51<27:05:05,  1.12s/it]

123285 episode score is 728.12


 13%|█▎        | 12556/100000 [4:07:52<27:21:31,  1.13s/it]

123295 episode score is 793.99


 13%|█▎        | 12557/100000 [4:07:53<27:26:12,  1.13s/it]

123305 episode score is 771.91


 13%|█▎        | 12558/100000 [4:07:55<27:55:19,  1.15s/it]

123316 episode score is 735.99


 13%|█▎        | 12559/100000 [4:07:56<27:55:48,  1.15s/it]

123326 episode score is 785.43


 13%|█▎        | 12560/100000 [4:07:57<27:25:08,  1.13s/it]

123335 episode score is 839.66


 13%|█▎        | 12561/100000 [4:07:58<27:42:02,  1.14s/it]

123345 episode score is 778.74


 13%|█▎        | 12562/100000 [4:07:59<27:52:17,  1.15s/it]

123355 episode score is 797.08


 13%|█▎        | 12563/100000 [4:08:00<27:47:32,  1.14s/it]

123365 episode score is 776.77


 13%|█▎        | 12564/100000 [4:08:01<27:44:56,  1.14s/it]

123375 episode score is 771.02


 13%|█▎        | 12565/100000 [4:08:03<27:27:49,  1.13s/it]

123384 episode score is 836.60


 13%|█▎        | 12566/100000 [4:08:04<27:25:21,  1.13s/it]

123393 episode score is 848.08


 13%|█▎        | 12567/100000 [4:08:05<27:22:27,  1.13s/it]

123403 episode score is 766.73


 13%|█▎        | 12568/100000 [4:08:06<27:27:11,  1.13s/it]

123413 episode score is 777.33


 13%|█▎        | 12569/100000 [4:08:07<27:37:36,  1.14s/it]

123423 episode score is 787.59


 13%|█▎        | 12570/100000 [4:08:08<27:13:26,  1.12s/it]

123432 episode score is 829.45


 13%|█▎        | 12571/100000 [4:08:09<27:40:52,  1.14s/it]

123442 episode score is 805.50


 13%|█▎        | 12572/100000 [4:08:11<27:44:55,  1.14s/it]

123452 episode score is 779.23


 13%|█▎        | 12573/100000 [4:08:12<27:25:01,  1.13s/it]

123462 episode score is 737.29


 13%|█▎        | 12574/100000 [4:08:13<27:42:42,  1.14s/it]

123472 episode score is 800.93


 13%|█▎        | 12575/100000 [4:08:14<27:35:07,  1.14s/it]

123482 episode score is 737.61


 13%|█▎        | 12576/100000 [4:08:15<27:26:19,  1.13s/it]

123492 episode score is 750.79


 13%|█▎        | 12577/100000 [4:08:16<27:23:56,  1.13s/it]

123502 episode score is 777.39


 13%|█▎        | 12578/100000 [4:08:17<27:33:13,  1.13s/it]

123512 episode score is 776.07


 13%|█▎        | 12579/100000 [4:08:18<27:31:01,  1.13s/it]

123522 episode score is 766.37


 13%|█▎        | 12580/100000 [4:08:20<27:27:30,  1.13s/it]

123532 episode score is 750.23


 13%|█▎        | 12581/100000 [4:08:21<27:59:12,  1.15s/it]

123542 episode score is 821.75


 13%|█▎        | 12582/100000 [4:08:22<27:46:09,  1.14s/it]

123552 episode score is 755.81


 13%|█▎        | 12583/100000 [4:08:23<27:50:36,  1.15s/it]

123562 episode score is 776.82


 13%|█▎        | 12584/100000 [4:08:24<27:40:33,  1.14s/it]

123572 episode score is 764.26


 13%|█▎        | 12585/100000 [4:08:25<27:55:05,  1.15s/it]

123582 episode score is 780.84


 13%|█▎        | 12586/100000 [4:08:26<27:43:07,  1.14s/it]

123592 episode score is 762.81


 13%|█▎        | 12587/100000 [4:08:28<28:05:15,  1.16s/it]

123602 episode score is 812.95


 13%|█▎        | 12588/100000 [4:08:29<28:14:29,  1.16s/it]

123612 episode score is 707.32


 13%|█▎        | 12589/100000 [4:08:30<27:39:07,  1.14s/it]

123622 episode score is 716.86


 13%|█▎        | 12590/100000 [4:08:31<27:36:14,  1.14s/it]

123632 episode score is 762.89


 13%|█▎        | 12591/100000 [4:08:32<28:10:08,  1.16s/it]

123642 episode score is 820.42


 13%|█▎        | 12592/100000 [4:08:33<28:19:24,  1.17s/it]

123652 episode score is 801.06


 13%|█▎        | 12593/100000 [4:08:35<27:50:56,  1.15s/it]

123661 episode score is 836.42


 13%|█▎        | 12594/100000 [4:08:36<28:04:24,  1.16s/it]

123671 episode score is 810.27


 13%|█▎        | 12595/100000 [4:08:37<28:01:43,  1.15s/it]

123681 episode score is 772.17


 13%|█▎        | 12596/100000 [4:08:38<27:55:20,  1.15s/it]

123691 episode score is 785.54


 13%|█▎        | 12597/100000 [4:08:39<27:36:16,  1.14s/it]

123701 episode score is 747.37


 13%|█▎        | 12598/100000 [4:08:40<27:39:36,  1.14s/it]

123711 episode score is 778.39


 13%|█▎        | 12599/100000 [4:08:41<27:31:57,  1.13s/it]

123721 episode score is 755.33
123730 episode score is 841.10


 13%|█▎        | 12600/100000 [4:08:44<35:07:01,  1.45s/it]

Iteration 12600: Average test reward: 815.85


 13%|█▎        | 12601/100000 [4:08:45<33:19:18,  1.37s/it]

123740 episode score is 816.36


 13%|█▎        | 12602/100000 [4:08:46<31:35:14,  1.30s/it]

123750 episode score is 768.05


 13%|█▎        | 12603/100000 [4:08:47<30:33:52,  1.26s/it]

123760 episode score is 779.56


 13%|█▎        | 12604/100000 [4:08:48<29:42:23,  1.22s/it]

123770 episode score is 757.11


 13%|█▎        | 12605/100000 [4:08:49<28:48:56,  1.19s/it]

123780 episode score is 741.03


 13%|█▎        | 12606/100000 [4:08:50<28:19:07,  1.17s/it]

123790 episode score is 772.04


 13%|█▎        | 12607/100000 [4:08:52<27:47:54,  1.15s/it]

123800 episode score is 741.77


 13%|█▎        | 12608/100000 [4:08:53<27:53:01,  1.15s/it]

123810 episode score is 785.20


 13%|█▎        | 12609/100000 [4:08:54<27:40:40,  1.14s/it]

123820 episode score is 766.30


 13%|█▎        | 12610/100000 [4:08:55<27:19:12,  1.13s/it]

123830 episode score is 720.44


 13%|█▎        | 12611/100000 [4:08:56<27:00:35,  1.11s/it]

123839 episode score is 831.41


 13%|█▎        | 12612/100000 [4:08:57<26:55:58,  1.11s/it]

123849 episode score is 758.02


 13%|█▎        | 12613/100000 [4:08:58<26:53:48,  1.11s/it]

123859 episode score is 750.09


 13%|█▎        | 12614/100000 [4:08:59<27:28:36,  1.13s/it]

123869 episode score is 822.47


 13%|█▎        | 12615/100000 [4:09:01<27:32:44,  1.13s/it]

123879 episode score is 770.06


 13%|█▎        | 12616/100000 [4:09:02<27:15:32,  1.12s/it]

123888 episode score is 829.18


 13%|█▎        | 12617/100000 [4:09:03<27:07:34,  1.12s/it]

123897 episode score is 827.56


 13%|█▎        | 12618/100000 [4:09:04<27:07:16,  1.12s/it]

123906 episode score is 853.81


 13%|█▎        | 12619/100000 [4:09:05<26:57:05,  1.11s/it]

123915 episode score is 837.54


 13%|█▎        | 12620/100000 [4:09:06<27:21:20,  1.13s/it]

123925 episode score is 790.15


 13%|█▎        | 12621/100000 [4:09:07<27:04:28,  1.12s/it]

123934 episode score is 827.06


 13%|█▎        | 12622/100000 [4:09:08<26:50:14,  1.11s/it]

123943 episode score is 831.02


 13%|█▎        | 12623/100000 [4:09:09<26:53:37,  1.11s/it]

123953 episode score is 750.51


 13%|█▎        | 12624/100000 [4:09:10<27:01:49,  1.11s/it]

123963 episode score is 778.89


 13%|█▎        | 12625/100000 [4:09:12<27:10:58,  1.12s/it]

123973 episode score is 776.15


 13%|█▎        | 12626/100000 [4:09:13<26:59:51,  1.11s/it]

123982 episode score is 832.42


 13%|█▎        | 12627/100000 [4:09:14<27:27:50,  1.13s/it]

123992 episode score is 804.45


 13%|█▎        | 12628/100000 [4:09:15<27:12:13,  1.12s/it]

124001 episode score is 841.53


 13%|█▎        | 12629/100000 [4:09:16<27:28:32,  1.13s/it]

124011 episode score is 804.27


 13%|█▎        | 12630/100000 [4:09:17<27:48:25,  1.15s/it]

124021 episode score is 806.28


 13%|█▎        | 12631/100000 [4:09:19<28:08:33,  1.16s/it]

124031 episode score is 830.49


 13%|█▎        | 12632/100000 [4:09:20<28:03:25,  1.16s/it]

124041 episode score is 784.81


 13%|█▎        | 12633/100000 [4:09:21<27:46:02,  1.14s/it]

124051 episode score is 764.20


 13%|█▎        | 12634/100000 [4:09:22<27:19:03,  1.13s/it]

124060 episode score is 842.31


 13%|█▎        | 12635/100000 [4:09:23<27:30:19,  1.13s/it]

124070 episode score is 790.60


 13%|█▎        | 12636/100000 [4:09:24<27:44:29,  1.14s/it]

124080 episode score is 798.07


 13%|█▎        | 12637/100000 [4:09:25<27:40:10,  1.14s/it]

124090 episode score is 776.78


 13%|█▎        | 12638/100000 [4:09:26<27:47:12,  1.15s/it]

124100 episode score is 788.40


 13%|█▎        | 12639/100000 [4:09:28<27:25:21,  1.13s/it]

124110 episode score is 747.91


 13%|█▎        | 12640/100000 [4:09:29<27:58:32,  1.15s/it]

124120 episode score is 825.63


 13%|█▎        | 12641/100000 [4:09:30<27:35:53,  1.14s/it]

124130 episode score is 763.22


 13%|█▎        | 12642/100000 [4:09:31<27:48:03,  1.15s/it]

124140 episode score is 814.26


 13%|█▎        | 12643/100000 [4:09:32<27:40:54,  1.14s/it]

124150 episode score is 765.22


 13%|█▎        | 12644/100000 [4:09:33<27:28:06,  1.13s/it]

124160 episode score is 759.14


 13%|█▎        | 12645/100000 [4:09:34<27:59:58,  1.15s/it]

124170 episode score is 825.39


 13%|█▎        | 12646/100000 [4:09:36<27:46:22,  1.14s/it]

124180 episode score is 775.21


 13%|█▎        | 12647/100000 [4:09:37<27:41:52,  1.14s/it]

124190 episode score is 795.95


 13%|█▎        | 12648/100000 [4:09:38<27:40:33,  1.14s/it]

124200 episode score is 792.07


 13%|█▎        | 12649/100000 [4:09:39<27:21:23,  1.13s/it]

124210 episode score is 751.48


 13%|█▎        | 12650/100000 [4:09:40<27:42:39,  1.14s/it]

124220 episode score is 755.24


 13%|█▎        | 12651/100000 [4:09:41<27:27:22,  1.13s/it]

124230 episode score is 767.28


 13%|█▎        | 12652/100000 [4:09:42<27:19:13,  1.13s/it]

124240 episode score is 758.91


 13%|█▎        | 12653/100000 [4:09:43<27:09:20,  1.12s/it]

124250 episode score is 763.06


 13%|█▎        | 12654/100000 [4:09:45<27:04:56,  1.12s/it]

124260 episode score is 746.73


 13%|█▎        | 12655/100000 [4:09:46<26:52:59,  1.11s/it]

124270 episode score is 742.13


 13%|█▎        | 12656/100000 [4:09:47<27:08:01,  1.12s/it]

124280 episode score is 792.35


 13%|█▎        | 12657/100000 [4:09:48<27:03:45,  1.12s/it]

124290 episode score is 756.82


 13%|█▎        | 12658/100000 [4:09:49<27:09:12,  1.12s/it]

124300 episode score is 778.25


 13%|█▎        | 12659/100000 [4:09:50<27:22:01,  1.13s/it]

124311 episode score is 705.43


 13%|█▎        | 12660/100000 [4:09:51<27:19:09,  1.13s/it]

124321 episode score is 777.78


 13%|█▎        | 12661/100000 [4:09:52<27:08:56,  1.12s/it]

124331 episode score is 755.86


 13%|█▎        | 12662/100000 [4:09:54<27:37:13,  1.14s/it]

124342 episode score is 744.90


 13%|█▎        | 12663/100000 [4:09:55<27:18:05,  1.13s/it]

124352 episode score is 741.94


 13%|█▎        | 12664/100000 [4:09:56<27:15:12,  1.12s/it]

124362 episode score is 762.86


 13%|█▎        | 12665/100000 [4:09:57<27:15:28,  1.12s/it]

124372 episode score is 778.14


 13%|█▎        | 12666/100000 [4:09:58<27:12:01,  1.12s/it]

124382 episode score is 779.38


 13%|█▎        | 12667/100000 [4:09:59<27:25:36,  1.13s/it]

124392 episode score is 804.81


 13%|█▎        | 12668/100000 [4:10:00<27:11:13,  1.12s/it]

124402 episode score is 760.11


 13%|█▎        | 12669/100000 [4:10:01<27:24:01,  1.13s/it]

124412 episode score is 809.68


 13%|█▎        | 12670/100000 [4:10:03<27:25:40,  1.13s/it]

124422 episode score is 783.49


 13%|█▎        | 12671/100000 [4:10:04<27:21:28,  1.13s/it]

124432 episode score is 778.88


 13%|█▎        | 12672/100000 [4:10:05<27:36:53,  1.14s/it]

124442 episode score is 810.65


 13%|█▎        | 12673/100000 [4:10:06<27:36:46,  1.14s/it]

124452 episode score is 798.25


 13%|█▎        | 12674/100000 [4:10:07<27:12:12,  1.12s/it]

124462 episode score is 746.01


 13%|█▎        | 12675/100000 [4:10:08<27:30:40,  1.13s/it]

124473 episode score is 712.72


 13%|█▎        | 12676/100000 [4:10:09<27:59:03,  1.15s/it]

124483 episode score is 823.51


 13%|█▎        | 12677/100000 [4:10:11<27:42:50,  1.14s/it]

124493 episode score is 774.00


 13%|█▎        | 12678/100000 [4:10:12<27:48:31,  1.15s/it]

124503 episode score is 811.61


 13%|█▎        | 12679/100000 [4:10:13<27:50:01,  1.15s/it]

124513 episode score is 799.67


 13%|█▎        | 12680/100000 [4:10:14<27:44:14,  1.14s/it]

124523 episode score is 797.59


 13%|█▎        | 12681/100000 [4:10:15<27:41:51,  1.14s/it]

124533 episode score is 764.57


 13%|█▎        | 12682/100000 [4:10:16<27:23:45,  1.13s/it]

124543 episode score is 757.06


 13%|█▎        | 12683/100000 [4:10:17<27:19:28,  1.13s/it]

124553 episode score is 762.44


 13%|█▎        | 12684/100000 [4:10:18<27:09:30,  1.12s/it]

124563 episode score is 749.42


 13%|█▎        | 12685/100000 [4:10:20<27:02:16,  1.11s/it]

124573 episode score is 754.76


 13%|█▎        | 12686/100000 [4:10:21<27:10:29,  1.12s/it]

124583 episode score is 792.60


 13%|█▎        | 12687/100000 [4:10:22<26:53:41,  1.11s/it]

124593 episode score is 748.18


 13%|█▎        | 12688/100000 [4:10:23<26:44:33,  1.10s/it]

124602 episode score is 848.99


 13%|█▎        | 12689/100000 [4:10:24<27:00:48,  1.11s/it]

124612 episode score is 801.58


 13%|█▎        | 12690/100000 [4:10:25<27:13:07,  1.12s/it]

124622 episode score is 775.57


 13%|█▎        | 12691/100000 [4:10:26<27:24:14,  1.13s/it]

124632 episode score is 805.17


 13%|█▎        | 12692/100000 [4:10:28<27:48:33,  1.15s/it]

124642 episode score is 833.06


 13%|█▎        | 12693/100000 [4:10:29<27:47:44,  1.15s/it]

124652 episode score is 792.66


 13%|█▎        | 12694/100000 [4:10:30<27:53:43,  1.15s/it]

124662 episode score is 808.19


 13%|█▎        | 12695/100000 [4:10:31<27:55:02,  1.15s/it]

124672 episode score is 807.32


 13%|█▎        | 12696/100000 [4:10:32<28:01:43,  1.16s/it]

124682 episode score is 797.70


 13%|█▎        | 12697/100000 [4:10:33<27:55:45,  1.15s/it]

124692 episode score is 787.87


 13%|█▎        | 12698/100000 [4:10:34<27:50:37,  1.15s/it]

124702 episode score is 788.78


 13%|█▎        | 12699/100000 [4:10:36<27:48:16,  1.15s/it]

124712 episode score is 775.19
124722 episode score is 834.45


 13%|█▎        | 12700/100000 [4:10:38<35:42:38,  1.47s/it]

Iteration 12700: Average test reward: 792.83


 13%|█▎        | 12701/100000 [4:10:39<33:24:22,  1.38s/it]

124732 episode score is 799.39


 13%|█▎        | 12702/100000 [4:10:40<31:53:39,  1.32s/it]

124742 episode score is 812.60


 13%|█▎        | 12703/100000 [4:10:41<30:52:36,  1.27s/it]

124752 episode score is 804.04


 13%|█▎        | 12704/100000 [4:10:42<30:18:52,  1.25s/it]

124762 episode score is 829.08


 13%|█▎        | 12705/100000 [4:10:44<29:52:34,  1.23s/it]

124772 episode score is 813.35


 13%|█▎        | 12706/100000 [4:10:45<29:24:24,  1.21s/it]

124782 episode score is 798.83


 13%|█▎        | 12707/100000 [4:10:46<29:12:12,  1.20s/it]

124792 episode score is 822.08


 13%|█▎        | 12708/100000 [4:10:47<28:53:00,  1.19s/it]

124802 episode score is 795.54


 13%|█▎        | 12709/100000 [4:10:48<28:53:43,  1.19s/it]

124812 episode score is 825.43


 13%|█▎        | 12710/100000 [4:10:50<28:25:46,  1.17s/it]

124822 episode score is 770.96


 13%|█▎        | 12711/100000 [4:10:51<28:26:15,  1.17s/it]

124832 episode score is 817.19


 13%|█▎        | 12712/100000 [4:10:52<28:16:15,  1.17s/it]

124842 episode score is 795.61


 13%|█▎        | 12713/100000 [4:10:53<28:15:19,  1.17s/it]

124852 episode score is 802.22


 13%|█▎        | 12714/100000 [4:10:54<28:08:19,  1.16s/it]

124862 episode score is 797.68


 13%|█▎        | 12715/100000 [4:10:55<28:10:04,  1.16s/it]

124872 episode score is 808.88


 13%|█▎        | 12716/100000 [4:10:56<28:13:08,  1.16s/it]

124882 episode score is 817.03


 13%|█▎        | 12717/100000 [4:10:58<27:51:02,  1.15s/it]

124892 episode score is 774.27


 13%|█▎        | 12718/100000 [4:10:59<27:49:05,  1.15s/it]

124902 episode score is 779.13


 13%|█▎        | 12719/100000 [4:11:00<27:38:36,  1.14s/it]

124912 episode score is 760.76


 13%|█▎        | 12720/100000 [4:11:01<27:59:36,  1.15s/it]

124922 episode score is 815.06


 13%|█▎        | 12721/100000 [4:11:02<28:00:21,  1.16s/it]

124932 episode score is 783.19


 13%|█▎        | 12722/100000 [4:11:03<28:10:11,  1.16s/it]

124943 episode score is 718.43


 13%|█▎        | 12723/100000 [4:11:05<27:58:11,  1.15s/it]

124953 episode score is 787.67


 13%|█▎        | 12724/100000 [4:11:06<27:44:27,  1.14s/it]

124963 episode score is 756.79


 13%|█▎        | 12725/100000 [4:11:07<27:35:41,  1.14s/it]

124973 episode score is 775.95


 13%|█▎        | 12726/100000 [4:11:08<27:19:19,  1.13s/it]

124983 episode score is 744.44


 13%|█▎        | 12727/100000 [4:11:09<27:22:40,  1.13s/it]

124993 episode score is 773.59


 13%|█▎        | 12728/100000 [4:11:10<27:44:04,  1.14s/it]

125003 episode score is 821.20


 13%|█▎        | 12729/100000 [4:11:11<27:58:14,  1.15s/it]

125013 episode score is 811.99


 13%|█▎        | 12730/100000 [4:11:13<28:07:04,  1.16s/it]

125023 episode score is 807.13


 13%|█▎        | 12731/100000 [4:11:14<28:22:14,  1.17s/it]

125033 episode score is 826.59


 13%|█▎        | 12732/100000 [4:11:15<28:05:46,  1.16s/it]

125043 episode score is 772.62


 13%|█▎        | 12733/100000 [4:11:16<28:09:48,  1.16s/it]

125054 episode score is 714.07


 13%|█▎        | 12734/100000 [4:11:17<27:47:44,  1.15s/it]

125064 episode score is 749.82


 13%|█▎        | 12735/100000 [4:11:18<27:26:16,  1.13s/it]

125073 episode score is 843.51


 13%|█▎        | 12736/100000 [4:11:19<27:35:09,  1.14s/it]

125083 episode score is 793.09


 13%|█▎        | 12737/100000 [4:11:21<27:42:47,  1.14s/it]

125093 episode score is 793.28


 13%|█▎        | 12738/100000 [4:11:22<28:44:50,  1.19s/it]

125103 episode score is 823.24


 13%|█▎        | 12739/100000 [4:11:23<28:33:38,  1.18s/it]

125113 episode score is 796.39


 13%|█▎        | 12740/100000 [4:11:24<28:07:02,  1.16s/it]

125123 episode score is 756.36


 13%|█▎        | 12741/100000 [4:11:25<28:15:19,  1.17s/it]

125133 episode score is 814.22


 13%|█▎        | 12742/100000 [4:11:26<27:54:44,  1.15s/it]

125143 episode score is 752.37


 13%|█▎        | 12743/100000 [4:11:27<27:26:14,  1.13s/it]

125153 episode score is 747.68


 13%|█▎        | 12744/100000 [4:11:29<27:50:10,  1.15s/it]

125163 episode score is 818.37


 13%|█▎        | 12745/100000 [4:11:30<27:42:59,  1.14s/it]

125173 episode score is 776.25


 13%|█▎        | 12746/100000 [4:11:31<27:41:04,  1.14s/it]

125183 episode score is 791.66


 13%|█▎        | 12747/100000 [4:11:32<27:48:17,  1.15s/it]

125193 episode score is 799.39


 13%|█▎        | 12748/100000 [4:11:33<27:57:02,  1.15s/it]

125203 episode score is 794.27


 13%|█▎        | 12749/100000 [4:11:34<28:12:31,  1.16s/it]

125213 episode score is 817.63


 13%|█▎        | 12750/100000 [4:11:36<28:27:06,  1.17s/it]

125223 episode score is 825.02


 13%|█▎        | 12751/100000 [4:11:37<28:23:47,  1.17s/it]

125233 episode score is 807.90


 13%|█▎        | 12752/100000 [4:11:38<28:27:46,  1.17s/it]

125243 episode score is 808.12


 13%|█▎        | 12753/100000 [4:11:39<28:19:34,  1.17s/it]

125253 episode score is 796.00


 13%|█▎        | 12754/100000 [4:11:40<28:20:04,  1.17s/it]

125263 episode score is 807.82


 13%|█▎        | 12755/100000 [4:11:41<27:57:12,  1.15s/it]

125273 episode score is 756.55


 13%|█▎        | 12756/100000 [4:11:43<27:52:50,  1.15s/it]

125283 episode score is 772.27


 13%|█▎        | 12757/100000 [4:11:44<27:25:58,  1.13s/it]

125292 episode score is 843.32


 13%|█▎        | 12758/100000 [4:11:45<27:55:29,  1.15s/it]

125302 episode score is 828.47


 13%|█▎        | 12759/100000 [4:11:46<27:52:07,  1.15s/it]

125312 episode score is 783.51


 13%|█▎        | 12760/100000 [4:11:47<27:47:22,  1.15s/it]

125321 episode score is 868.68


 13%|█▎        | 12761/100000 [4:11:48<27:55:59,  1.15s/it]

125331 episode score is 795.26


 13%|█▎        | 12762/100000 [4:11:50<28:01:44,  1.16s/it]

125341 episode score is 785.16


 13%|█▎        | 12763/100000 [4:11:51<28:04:28,  1.16s/it]

125351 episode score is 797.58


 13%|█▎        | 12764/100000 [4:11:52<27:53:51,  1.15s/it]

125361 episode score is 774.14


 13%|█▎        | 12765/100000 [4:11:53<28:05:14,  1.16s/it]

125371 episode score is 805.24


 13%|█▎        | 12766/100000 [4:11:54<28:01:11,  1.16s/it]

125381 episode score is 791.07


 13%|█▎        | 12767/100000 [4:11:55<28:08:41,  1.16s/it]

125391 episode score is 794.65


 13%|█▎        | 12768/100000 [4:11:56<27:41:45,  1.14s/it]

125400 episode score is 841.54


 13%|█▎        | 12769/100000 [4:11:57<27:15:59,  1.13s/it]

125409 episode score is 834.54


 13%|█▎        | 12770/100000 [4:11:59<26:57:25,  1.11s/it]

125419 episode score is 746.53


 13%|█▎        | 12771/100000 [4:12:00<27:28:16,  1.13s/it]

125429 episode score is 813.44


 13%|█▎        | 12772/100000 [4:12:01<27:29:44,  1.13s/it]

125439 episode score is 781.34


 13%|█▎        | 12773/100000 [4:12:02<27:47:44,  1.15s/it]

125449 episode score is 803.42


 13%|█▎        | 12774/100000 [4:12:03<27:34:10,  1.14s/it]

125459 episode score is 754.37


 13%|█▎        | 12775/100000 [4:12:04<27:27:12,  1.13s/it]

125469 episode score is 759.51


 13%|█▎        | 12776/100000 [4:12:05<27:15:06,  1.12s/it]

125478 episode score is 847.88


 13%|█▎        | 12777/100000 [4:12:07<27:28:47,  1.13s/it]

125488 episode score is 782.29


 13%|█▎        | 12778/100000 [4:12:08<27:32:52,  1.14s/it]

125498 episode score is 783.25


 13%|█▎        | 12779/100000 [4:12:09<27:31:46,  1.14s/it]

125508 episode score is 772.73


 13%|█▎        | 12780/100000 [4:12:10<27:41:07,  1.14s/it]

125518 episode score is 797.57


 13%|█▎        | 12781/100000 [4:12:11<27:36:00,  1.14s/it]

125528 episode score is 773.04


 13%|█▎        | 12782/100000 [4:12:12<27:18:47,  1.13s/it]

125538 episode score is 750.27


 13%|█▎        | 12783/100000 [4:12:13<27:38:31,  1.14s/it]

125548 episode score is 798.87


 13%|█▎        | 12784/100000 [4:12:15<28:03:21,  1.16s/it]

125559 episode score is 729.44


 13%|█▎        | 12785/100000 [4:12:16<27:52:47,  1.15s/it]

125569 episode score is 768.88


 13%|█▎        | 12786/100000 [4:12:17<27:28:42,  1.13s/it]

125579 episode score is 738.60


 13%|█▎        | 12787/100000 [4:12:18<27:49:09,  1.15s/it]

125590 episode score is 716.75


 13%|█▎        | 12788/100000 [4:12:19<28:30:37,  1.18s/it]

125600 episode score is 782.75


 13%|█▎        | 12789/100000 [4:12:20<28:14:45,  1.17s/it]

125611 episode score is 689.50


 13%|█▎        | 12790/100000 [4:12:22<27:54:31,  1.15s/it]

125621 episode score is 772.56


 13%|█▎        | 12791/100000 [4:12:23<27:37:58,  1.14s/it]

125631 episode score is 757.42


 13%|█▎        | 12792/100000 [4:12:24<28:00:21,  1.16s/it]

125642 episode score is 724.12


 13%|█▎        | 12793/100000 [4:12:25<27:54:43,  1.15s/it]

125652 episode score is 784.51


 13%|█▎        | 12794/100000 [4:12:26<28:01:50,  1.16s/it]

125663 episode score is 713.47


 13%|█▎        | 12795/100000 [4:12:27<27:57:43,  1.15s/it]

125673 episode score is 788.14


 13%|█▎        | 12796/100000 [4:12:28<27:26:16,  1.13s/it]

125683 episode score is 736.70


 13%|█▎        | 12797/100000 [4:12:30<27:38:00,  1.14s/it]

125694 episode score is 709.43


 13%|█▎        | 12798/100000 [4:12:31<27:25:08,  1.13s/it]

125704 episode score is 758.63


 13%|█▎        | 12799/100000 [4:12:32<27:29:26,  1.13s/it]

125715 episode score is 700.26
125726 episode score is 712.62


 13%|█▎        | 12800/100000 [4:12:34<34:49:15,  1.44s/it]

Iteration 12800: Average test reward: 726.75


 13%|█▎        | 12801/100000 [4:12:35<32:24:12,  1.34s/it]

125736 episode score is 755.58


 13%|█▎        | 12802/100000 [4:12:36<31:18:35,  1.29s/it]

125747 episode score is 726.38


 13%|█▎        | 12803/100000 [4:12:37<30:25:15,  1.26s/it]

125758 episode score is 724.67


 13%|█▎        | 12804/100000 [4:12:39<29:47:47,  1.23s/it]

125769 episode score is 717.40


 13%|█▎        | 12805/100000 [4:12:40<28:56:44,  1.20s/it]

125779 episode score is 754.12


 13%|█▎        | 12806/100000 [4:12:41<28:23:23,  1.17s/it]

125789 episode score is 766.47


 13%|█▎        | 12807/100000 [4:12:42<28:35:54,  1.18s/it]

125800 episode score is 732.06


 13%|█▎        | 12808/100000 [4:12:43<28:35:48,  1.18s/it]

125811 episode score is 731.47


 13%|█▎        | 12809/100000 [4:12:44<28:24:53,  1.17s/it]

125822 episode score is 710.40


 13%|█▎        | 12810/100000 [4:12:45<28:01:16,  1.16s/it]

125832 episode score is 771.41


 13%|█▎        | 12811/100000 [4:12:47<27:54:03,  1.15s/it]

125842 episode score is 769.17


 13%|█▎        | 12812/100000 [4:12:48<27:42:36,  1.14s/it]

125852 episode score is 758.77


 13%|█▎        | 12813/100000 [4:12:49<27:57:09,  1.15s/it]

125863 episode score is 727.03


 13%|█▎        | 12814/100000 [4:12:50<27:46:45,  1.15s/it]

125874 episode score is 681.61


 13%|█▎        | 12815/100000 [4:12:51<27:57:30,  1.15s/it]

125885 episode score is 715.67


 13%|█▎        | 12816/100000 [4:12:52<27:37:41,  1.14s/it]

125895 episode score is 755.43


 13%|█▎        | 12817/100000 [4:12:53<27:48:01,  1.15s/it]

125906 episode score is 717.08


 13%|█▎        | 12818/100000 [4:12:55<27:58:54,  1.16s/it]

125917 episode score is 732.56


 13%|█▎        | 12819/100000 [4:12:56<28:11:59,  1.16s/it]

125928 episode score is 724.13


 13%|█▎        | 12820/100000 [4:12:57<28:13:38,  1.17s/it]

125939 episode score is 720.85


 13%|█▎        | 12821/100000 [4:12:58<28:15:28,  1.17s/it]

125950 episode score is 738.68


 13%|█▎        | 12822/100000 [4:12:59<28:08:23,  1.16s/it]

125961 episode score is 704.88


 13%|█▎        | 12823/100000 [4:13:00<27:34:53,  1.14s/it]

125971 episode score is 744.48


 13%|█▎        | 12824/100000 [4:13:02<28:06:47,  1.16s/it]

125982 episode score is 749.36


 13%|█▎        | 12825/100000 [4:13:03<27:46:24,  1.15s/it]

125992 episode score is 760.35


 13%|█▎        | 12826/100000 [4:13:04<28:06:12,  1.16s/it]

126003 episode score is 738.67


 13%|█▎        | 12827/100000 [4:13:05<28:06:10,  1.16s/it]

126014 episode score is 711.43


 13%|█▎        | 12828/100000 [4:13:06<27:41:00,  1.14s/it]

126024 episode score is 761.47


 13%|█▎        | 12829/100000 [4:13:07<27:33:00,  1.14s/it]

126034 episode score is 774.15


 13%|█▎        | 12830/100000 [4:13:08<27:49:41,  1.15s/it]

126045 episode score is 727.35


 13%|█▎        | 12831/100000 [4:13:10<27:49:21,  1.15s/it]

126055 episode score is 792.35


 13%|█▎        | 12832/100000 [4:13:11<28:02:38,  1.16s/it]

126066 episode score is 735.11


 13%|█▎        | 12833/100000 [4:13:12<27:45:32,  1.15s/it]

126076 episode score is 767.39


 13%|█▎        | 12834/100000 [4:13:13<27:38:20,  1.14s/it]

126087 episode score is 683.36


 13%|█▎        | 12835/100000 [4:13:14<27:19:02,  1.13s/it]

126097 episode score is 752.73


 13%|█▎        | 12836/100000 [4:13:15<27:02:14,  1.12s/it]

126107 episode score is 755.09


 13%|█▎        | 12837/100000 [4:13:16<27:02:37,  1.12s/it]

126117 episode score is 774.95


 13%|█▎        | 12838/100000 [4:13:18<27:28:23,  1.13s/it]

126127 episode score is 800.36


 13%|█▎        | 12839/100000 [4:13:19<27:37:35,  1.14s/it]

126137 episode score is 805.82


 13%|█▎        | 12840/100000 [4:13:20<27:29:38,  1.14s/it]

126147 episode score is 783.65


 13%|█▎        | 12841/100000 [4:13:21<27:19:55,  1.13s/it]

126157 episode score is 772.04


 13%|█▎        | 12842/100000 [4:13:22<27:22:33,  1.13s/it]

126167 episode score is 790.87


 13%|█▎        | 12843/100000 [4:13:23<27:19:51,  1.13s/it]

126177 episode score is 767.75


 13%|█▎        | 12844/100000 [4:13:24<27:14:59,  1.13s/it]

126187 episode score is 775.35


 13%|█▎        | 12845/100000 [4:13:25<27:13:06,  1.12s/it]

126197 episode score is 772.63


 13%|█▎        | 12846/100000 [4:13:27<27:08:27,  1.12s/it]

126207 episode score is 770.11


 13%|█▎        | 12847/100000 [4:13:28<27:15:07,  1.13s/it]

126217 episode score is 793.94


 13%|█▎        | 12848/100000 [4:13:29<27:23:11,  1.13s/it]

126227 episode score is 788.63


 13%|█▎        | 12849/100000 [4:13:30<27:00:56,  1.12s/it]

126237 episode score is 744.33


 13%|█▎        | 12850/100000 [4:13:31<26:52:37,  1.11s/it]

126247 episode score is 748.16


 13%|█▎        | 12851/100000 [4:13:32<26:53:35,  1.11s/it]

126257 episode score is 760.54


 13%|█▎        | 12852/100000 [4:13:33<26:52:29,  1.11s/it]

126267 episode score is 758.68


 13%|█▎        | 12853/100000 [4:13:34<27:31:03,  1.14s/it]

126277 episode score is 753.33


 13%|█▎        | 12854/100000 [4:13:36<27:34:06,  1.14s/it]

126287 episode score is 788.91


 13%|█▎        | 12855/100000 [4:13:37<27:28:19,  1.13s/it]

126297 episode score is 774.18


 13%|█▎        | 12856/100000 [4:13:38<27:31:19,  1.14s/it]

126307 episode score is 804.93


 13%|█▎        | 12857/100000 [4:13:39<27:11:43,  1.12s/it]

126317 episode score is 750.39


 13%|█▎        | 12858/100000 [4:13:40<27:32:21,  1.14s/it]

126328 episode score is 726.94


 13%|█▎        | 12859/100000 [4:13:41<27:17:32,  1.13s/it]

126338 episode score is 753.13


 13%|█▎        | 12860/100000 [4:13:42<27:16:29,  1.13s/it]

126349 episode score is 680.99


 13%|█▎        | 12861/100000 [4:13:43<27:23:07,  1.13s/it]

126359 episode score is 783.54


 13%|█▎        | 12862/100000 [4:13:45<27:10:29,  1.12s/it]

126369 episode score is 755.13


 13%|█▎        | 12863/100000 [4:13:46<27:06:52,  1.12s/it]

126379 episode score is 764.25


 13%|█▎        | 12864/100000 [4:13:47<27:36:52,  1.14s/it]

126390 episode score is 735.01


 13%|█▎        | 12865/100000 [4:13:48<27:18:13,  1.13s/it]

126400 episode score is 744.67


 13%|█▎        | 12866/100000 [4:13:49<27:04:25,  1.12s/it]

126410 episode score is 744.23


 13%|█▎        | 12867/100000 [4:13:50<27:04:38,  1.12s/it]

126420 episode score is 779.75


 13%|█▎        | 12868/100000 [4:13:51<27:00:40,  1.12s/it]

126430 episode score is 771.98


 13%|█▎        | 12869/100000 [4:13:52<26:56:39,  1.11s/it]

126440 episode score is 770.86


 13%|█▎        | 12870/100000 [4:13:53<26:42:53,  1.10s/it]

126450 episode score is 743.95


 13%|█▎        | 12871/100000 [4:13:55<26:57:17,  1.11s/it]

126460 episode score is 798.92


 13%|█▎        | 12872/100000 [4:13:56<26:45:04,  1.11s/it]

126470 episode score is 755.19


 13%|█▎        | 12873/100000 [4:13:57<26:56:32,  1.11s/it]

126480 episode score is 772.95


 13%|█▎        | 12874/100000 [4:13:58<27:21:35,  1.13s/it]

126491 episode score is 730.86


 13%|█▎        | 12875/100000 [4:13:59<27:11:56,  1.12s/it]

126501 episode score is 766.33


 13%|█▎        | 12876/100000 [4:14:00<27:13:27,  1.12s/it]

126511 episode score is 789.34


 13%|█▎        | 12877/100000 [4:14:01<27:09:30,  1.12s/it]

126521 episode score is 780.77


 13%|█▎        | 12878/100000 [4:14:02<27:14:45,  1.13s/it]

126531 episode score is 786.56


 13%|█▎        | 12879/100000 [4:14:04<27:14:09,  1.13s/it]

126541 episode score is 769.07


 13%|█▎        | 12880/100000 [4:14:05<27:41:45,  1.14s/it]

126552 episode score is 735.07


 13%|█▎        | 12881/100000 [4:14:06<27:55:51,  1.15s/it]

126563 episode score is 743.29


 13%|█▎        | 12882/100000 [4:14:07<27:27:02,  1.13s/it]

126573 episode score is 752.24


 13%|█▎        | 12883/100000 [4:14:08<27:45:52,  1.15s/it]

126584 episode score is 745.45


 13%|█▎        | 12884/100000 [4:14:09<27:57:28,  1.16s/it]

126595 episode score is 735.57


 13%|█▎        | 12885/100000 [4:14:10<27:22:41,  1.13s/it]

126605 episode score is 751.75


 13%|█▎        | 12886/100000 [4:14:12<27:05:21,  1.12s/it]

126615 episode score is 758.20


 13%|█▎        | 12887/100000 [4:14:13<27:31:18,  1.14s/it]

126626 episode score is 743.14


 13%|█▎        | 12888/100000 [4:14:14<27:36:08,  1.14s/it]

126637 episode score is 723.52


 13%|█▎        | 12889/100000 [4:14:15<27:39:58,  1.14s/it]

126648 episode score is 714.54


 13%|█▎        | 12890/100000 [4:14:16<28:03:13,  1.16s/it]

126659 episode score is 734.74


 13%|█▎        | 12891/100000 [4:14:17<28:07:00,  1.16s/it]

126670 episode score is 718.52


 13%|█▎        | 12892/100000 [4:14:19<27:53:23,  1.15s/it]

126681 episode score is 691.49


 13%|█▎        | 12893/100000 [4:14:20<27:53:27,  1.15s/it]

126692 episode score is 715.17


 13%|█▎        | 12894/100000 [4:14:21<28:01:10,  1.16s/it]

126703 episode score is 739.31


 13%|█▎        | 12895/100000 [4:14:22<28:02:22,  1.16s/it]

126715 episode score is 642.71


 13%|█▎        | 12896/100000 [4:14:23<27:47:05,  1.15s/it]

126726 episode score is 706.07


 13%|█▎        | 12897/100000 [4:14:24<27:41:22,  1.14s/it]

126737 episode score is 710.15


 13%|█▎        | 12898/100000 [4:14:25<27:14:10,  1.13s/it]

126748 episode score is 669.64


 13%|█▎        | 12899/100000 [4:14:27<27:24:09,  1.13s/it]

126759 episode score is 718.20
126770 episode score is 722.53


 13%|█▎        | 12900/100000 [4:14:29<34:18:31,  1.42s/it]

Iteration 12900: Average test reward: 706.22


 13%|█▎        | 12901/100000 [4:14:30<32:19:31,  1.34s/it]

126781 episode score is 717.27


 13%|█▎        | 12902/100000 [4:14:31<30:31:59,  1.26s/it]

126792 episode score is 666.32


 13%|█▎        | 12903/100000 [4:14:32<29:24:59,  1.22s/it]

126803 episode score is 674.74


 13%|█▎        | 12904/100000 [4:14:33<28:49:05,  1.19s/it]

126814 episode score is 705.47


 13%|█▎        | 12905/100000 [4:14:34<28:38:53,  1.18s/it]

126826 episode score is 648.93


 13%|█▎        | 12906/100000 [4:14:35<28:00:51,  1.16s/it]

126837 episode score is 676.63


 13%|█▎        | 12907/100000 [4:14:37<27:53:51,  1.15s/it]

126848 episode score is 719.39


 13%|█▎        | 12908/100000 [4:14:38<27:36:44,  1.14s/it]

126859 episode score is 683.28


 13%|█▎        | 12909/100000 [4:14:39<27:24:15,  1.13s/it]

126870 episode score is 681.65


 13%|█▎        | 12910/100000 [4:14:40<27:34:46,  1.14s/it]

126881 episode score is 729.25


 13%|█▎        | 12911/100000 [4:14:41<27:12:16,  1.12s/it]

126891 episode score is 758.07


 13%|█▎        | 12912/100000 [4:14:42<28:12:54,  1.17s/it]

126902 episode score is 737.71


 13%|█▎        | 12913/100000 [4:14:43<28:13:29,  1.17s/it]

126913 episode score is 735.82


 13%|█▎        | 12914/100000 [4:14:44<27:36:11,  1.14s/it]

126923 episode score is 748.66


 13%|█▎        | 12915/100000 [4:14:46<27:52:33,  1.15s/it]

126934 episode score is 730.72


 13%|█▎        | 12916/100000 [4:14:47<28:08:20,  1.16s/it]

126945 episode score is 743.86


 13%|█▎        | 12917/100000 [4:14:48<27:54:53,  1.15s/it]

126955 episode score is 768.74


 13%|█▎        | 12918/100000 [4:14:49<28:07:36,  1.16s/it]

126966 episode score is 738.92


 13%|█▎        | 12919/100000 [4:14:50<27:40:17,  1.14s/it]

126976 episode score is 759.00


 13%|█▎        | 12920/100000 [4:14:51<27:32:47,  1.14s/it]

126987 episode score is 695.85


 13%|█▎        | 12921/100000 [4:14:53<27:41:09,  1.14s/it]

126998 episode score is 724.34


 13%|█▎        | 12922/100000 [4:14:54<27:30:27,  1.14s/it]

127008 episode score is 779.10


 13%|█▎        | 12923/100000 [4:14:55<27:17:20,  1.13s/it]

127018 episode score is 767.99


 13%|█▎        | 12924/100000 [4:14:56<27:26:30,  1.13s/it]

127029 episode score is 722.51


 13%|█▎        | 12925/100000 [4:14:57<27:40:53,  1.14s/it]

127040 episode score is 723.64


 13%|█▎        | 12926/100000 [4:14:58<28:00:40,  1.16s/it]

127051 episode score is 730.51


 13%|█▎        | 12927/100000 [4:14:59<28:08:10,  1.16s/it]

127062 episode score is 743.37


 13%|█▎        | 12928/100000 [4:15:01<28:03:28,  1.16s/it]

127073 episode score is 727.63


 13%|█▎        | 12929/100000 [4:15:02<28:03:16,  1.16s/it]

127084 episode score is 729.36


 13%|█▎        | 12930/100000 [4:15:03<28:00:30,  1.16s/it]

127095 episode score is 712.92


 13%|█▎        | 12931/100000 [4:15:04<27:31:57,  1.14s/it]

127105 episode score is 760.08


 13%|█▎        | 12932/100000 [4:15:05<27:41:45,  1.15s/it]

127116 episode score is 728.34


 13%|█▎        | 12933/100000 [4:15:06<27:49:08,  1.15s/it]

127127 episode score is 729.93


 13%|█▎        | 12934/100000 [4:15:07<27:20:33,  1.13s/it]

127137 episode score is 740.04


 13%|█▎        | 12935/100000 [4:15:09<27:43:19,  1.15s/it]

127148 episode score is 736.10


 13%|█▎        | 12936/100000 [4:15:10<27:33:37,  1.14s/it]

127158 episode score is 786.21


 13%|█▎        | 12937/100000 [4:15:11<27:45:26,  1.15s/it]

127169 episode score is 727.61


 13%|█▎        | 12938/100000 [4:15:12<27:57:20,  1.16s/it]

127180 episode score is 734.90


 13%|█▎        | 12939/100000 [4:15:13<28:06:10,  1.16s/it]

127191 episode score is 731.18


 13%|█▎        | 12940/100000 [4:15:14<27:51:22,  1.15s/it]

127201 episode score is 784.71


 13%|█▎        | 12941/100000 [4:15:15<27:35:11,  1.14s/it]

127211 episode score is 779.04


 13%|█▎        | 12942/100000 [4:15:17<27:31:01,  1.14s/it]

127221 episode score is 787.45


 13%|█▎        | 12943/100000 [4:15:18<27:19:31,  1.13s/it]

127231 episode score is 761.37


 13%|█▎        | 12944/100000 [4:15:19<27:43:59,  1.15s/it]

127242 episode score is 742.91


 13%|█▎        | 12945/100000 [4:15:20<27:54:15,  1.15s/it]

127253 episode score is 738.78


 13%|█▎        | 12946/100000 [4:15:21<27:39:30,  1.14s/it]

127263 episode score is 773.27


 13%|█▎        | 12947/100000 [4:15:22<27:14:20,  1.13s/it]

127273 episode score is 750.46


 13%|█▎        | 12948/100000 [4:15:23<26:53:25,  1.11s/it]

127283 episode score is 748.12


 13%|█▎        | 12949/100000 [4:15:25<27:07:25,  1.12s/it]

127294 episode score is 727.69


 13%|█▎        | 12950/100000 [4:15:26<27:18:42,  1.13s/it]

127305 episode score is 724.51


 13%|█▎        | 12951/100000 [4:15:27<27:40:31,  1.14s/it]

127316 episode score is 749.09


 13%|█▎        | 12952/100000 [4:15:28<27:40:44,  1.14s/it]

127327 episode score is 719.99


 13%|█▎        | 12953/100000 [4:15:29<27:59:15,  1.16s/it]

127338 episode score is 751.42


 13%|█▎        | 12954/100000 [4:15:30<27:54:53,  1.15s/it]

127349 episode score is 721.08


 13%|█▎        | 12955/100000 [4:15:31<27:32:52,  1.14s/it]

127359 episode score is 769.45


 13%|█▎        | 12956/100000 [4:15:33<27:46:57,  1.15s/it]

127370 episode score is 739.57


 13%|█▎        | 12957/100000 [4:15:34<27:38:34,  1.14s/it]

127381 episode score is 711.01


 13%|█▎        | 12958/100000 [4:15:35<27:39:30,  1.14s/it]

127392 episode score is 717.71


 13%|█▎        | 12959/100000 [4:15:36<27:53:14,  1.15s/it]

127403 episode score is 742.62


 13%|█▎        | 12960/100000 [4:15:37<27:55:28,  1.15s/it]

127414 episode score is 728.90


 13%|█▎        | 12961/100000 [4:15:38<28:04:16,  1.16s/it]

127425 episode score is 740.41


 13%|█▎        | 12962/100000 [4:15:40<28:07:10,  1.16s/it]

127436 episode score is 745.57


 13%|█▎        | 12963/100000 [4:15:41<28:33:21,  1.18s/it]

127447 episode score is 714.14


 13%|█▎        | 12964/100000 [4:15:42<28:32:36,  1.18s/it]

127458 episode score is 750.47


 13%|█▎        | 12965/100000 [4:15:43<28:07:42,  1.16s/it]

127469 episode score is 683.96


 13%|█▎        | 12966/100000 [4:15:44<27:53:55,  1.15s/it]

127480 episode score is 705.50


 13%|█▎        | 12967/100000 [4:15:45<27:54:56,  1.15s/it]

127491 episode score is 734.68


 13%|█▎        | 12968/100000 [4:15:47<27:53:49,  1.15s/it]

127502 episode score is 729.23


 13%|█▎        | 12969/100000 [4:15:48<28:00:25,  1.16s/it]

127513 episode score is 708.92


 13%|█▎        | 12970/100000 [4:15:49<28:06:19,  1.16s/it]

127524 episode score is 729.18


 13%|█▎        | 12971/100000 [4:15:50<27:59:51,  1.16s/it]

127535 episode score is 704.98


 13%|█▎        | 12972/100000 [4:15:51<28:07:06,  1.16s/it]

127546 episode score is 744.14


 13%|█▎        | 12973/100000 [4:15:52<28:12:27,  1.17s/it]

127557 episode score is 732.65


 13%|█▎        | 12974/100000 [4:15:54<28:03:56,  1.16s/it]

127568 episode score is 727.11


 13%|█▎        | 12975/100000 [4:15:55<27:46:48,  1.15s/it]

127579 episode score is 699.02


 13%|█▎        | 12976/100000 [4:15:56<27:53:17,  1.15s/it]

127590 episode score is 720.41


 13%|█▎        | 12977/100000 [4:15:57<27:33:35,  1.14s/it]

127600 episode score is 770.01


 13%|█▎        | 12978/100000 [4:15:58<27:44:52,  1.15s/it]

127611 episode score is 738.79


 13%|█▎        | 12979/100000 [4:15:59<27:51:21,  1.15s/it]

127622 episode score is 714.55


 13%|█▎        | 12980/100000 [4:16:00<27:51:17,  1.15s/it]

127633 episode score is 719.29


 13%|█▎        | 12981/100000 [4:16:02<27:44:32,  1.15s/it]

127643 episode score is 791.28


 13%|█▎        | 12982/100000 [4:16:03<27:34:46,  1.14s/it]

127653 episode score is 776.21


 13%|█▎        | 12983/100000 [4:16:04<27:29:52,  1.14s/it]

127663 episode score is 786.63


 13%|█▎        | 12984/100000 [4:16:05<27:17:28,  1.13s/it]

127673 episode score is 774.13


 13%|█▎        | 12985/100000 [4:16:06<27:37:07,  1.14s/it]

127683 episode score is 824.32


 13%|█▎        | 12986/100000 [4:16:07<27:39:24,  1.14s/it]

127693 episode score is 796.46


 13%|█▎        | 12987/100000 [4:16:08<27:17:20,  1.13s/it]

127703 episode score is 754.27


 13%|█▎        | 12988/100000 [4:16:10<27:46:50,  1.15s/it]

127713 episode score is 830.87


 13%|█▎        | 12989/100000 [4:16:11<27:49:43,  1.15s/it]

127723 episode score is 804.47


 13%|█▎        | 12990/100000 [4:16:12<27:43:28,  1.15s/it]

127733 episode score is 788.15


 13%|█▎        | 12991/100000 [4:16:13<28:03:05,  1.16s/it]

127743 episode score is 820.01


 13%|█▎        | 12992/100000 [4:16:14<28:18:05,  1.17s/it]

127753 episode score is 839.48


 13%|█▎        | 12993/100000 [4:16:15<27:39:43,  1.14s/it]

127762 episode score is 832.75


 13%|█▎        | 12994/100000 [4:16:16<27:43:48,  1.15s/it]

127772 episode score is 791.16


 13%|█▎        | 12995/100000 [4:16:18<28:09:46,  1.17s/it]

127782 episode score is 824.87


 13%|█▎        | 12996/100000 [4:16:19<27:36:43,  1.14s/it]

127791 episode score is 835.40


 13%|█▎        | 12997/100000 [4:16:20<27:53:17,  1.15s/it]

127801 episode score is 820.48


 13%|█▎        | 12998/100000 [4:16:21<27:46:52,  1.15s/it]

127811 episode score is 797.50


 13%|█▎        | 12999/100000 [4:16:22<27:39:10,  1.14s/it]

127821 episode score is 788.42
127831 episode score is 775.48


 13%|█▎        | 13000/100000 [4:16:24<34:43:17,  1.44s/it]

Iteration 13000: Average test reward: 779.87


 13%|█▎        | 13001/100000 [4:16:25<32:44:03,  1.35s/it]

127841 episode score is 816.91


 13%|█▎        | 13002/100000 [4:16:27<31:13:42,  1.29s/it]

127851 episode score is 792.62


 13%|█▎        | 13003/100000 [4:16:28<30:30:25,  1.26s/it]

127861 episode score is 809.29


 13%|█▎        | 13004/100000 [4:16:29<29:17:57,  1.21s/it]

127870 episode score is 856.28


 13%|█▎        | 13005/100000 [4:16:30<28:22:21,  1.17s/it]

127879 episode score is 836.39


 13%|█▎        | 13006/100000 [4:16:31<27:49:51,  1.15s/it]

127888 episode score is 853.65


 13%|█▎        | 13007/100000 [4:16:32<27:44:48,  1.15s/it]

127898 episode score is 786.49


 13%|█▎        | 13008/100000 [4:16:33<27:50:28,  1.15s/it]

127908 episode score is 811.70


 13%|█▎        | 13009/100000 [4:16:35<27:50:37,  1.15s/it]

127918 episode score is 812.64


 13%|█▎        | 13010/100000 [4:16:36<27:34:19,  1.14s/it]

127928 episode score is 779.02


 13%|█▎        | 13011/100000 [4:16:37<27:49:20,  1.15s/it]

127939 episode score is 728.80


 13%|█▎        | 13012/100000 [4:16:38<27:27:08,  1.14s/it]

127949 episode score is 757.66


 13%|█▎        | 13013/100000 [4:16:39<27:28:13,  1.14s/it]

127960 episode score is 696.76


 13%|█▎        | 13014/100000 [4:16:40<27:04:37,  1.12s/it]

127970 episode score is 731.14


 13%|█▎        | 13015/100000 [4:16:41<26:45:19,  1.11s/it]

127980 episode score is 742.03


 13%|█▎        | 13016/100000 [4:16:42<27:16:01,  1.13s/it]

127991 episode score is 727.27


 13%|█▎        | 13017/100000 [4:16:44<27:11:21,  1.13s/it]

128001 episode score is 777.28


 13%|█▎        | 13018/100000 [4:16:45<27:13:08,  1.13s/it]

128011 episode score is 782.24


 13%|█▎        | 13019/100000 [4:16:46<27:29:58,  1.14s/it]

128021 episode score is 804.78


 13%|█▎        | 13020/100000 [4:16:47<27:14:11,  1.13s/it]

128031 episode score is 751.37


 13%|█▎        | 13021/100000 [4:16:48<27:01:33,  1.12s/it]

128042 episode score is 644.07


 13%|█▎        | 13022/100000 [4:16:49<27:31:12,  1.14s/it]

128052 episode score is 819.49


 13%|█▎        | 13023/100000 [4:16:50<27:51:17,  1.15s/it]

128063 episode score is 729.98


 13%|█▎        | 13024/100000 [4:16:52<28:48:32,  1.19s/it]

128073 episode score is 817.14


 13%|█▎        | 13025/100000 [4:16:53<28:07:30,  1.16s/it]

128083 episode score is 731.66


 13%|█▎        | 13026/100000 [4:16:54<27:57:59,  1.16s/it]

128093 episode score is 780.39


 13%|█▎        | 13027/100000 [4:16:55<27:33:27,  1.14s/it]

128103 episode score is 759.04


 13%|█▎        | 13028/100000 [4:16:56<27:37:50,  1.14s/it]

128113 episode score is 793.43


 13%|█▎        | 13029/100000 [4:16:57<27:08:42,  1.12s/it]

128123 episode score is 721.56


 13%|█▎        | 13030/100000 [4:16:58<27:33:30,  1.14s/it]

128133 episode score is 815.52


 13%|█▎        | 13031/100000 [4:17:00<27:49:51,  1.15s/it]

128143 episode score is 810.09


 13%|█▎        | 13032/100000 [4:17:01<28:01:30,  1.16s/it]

128153 episode score is 799.28


 13%|█▎        | 13033/100000 [4:17:02<27:29:03,  1.14s/it]

128162 episode score is 839.42


 13%|█▎        | 13034/100000 [4:17:03<27:22:54,  1.13s/it]

128172 episode score is 761.59


 13%|█▎        | 13035/100000 [4:17:04<27:10:48,  1.13s/it]

128182 episode score is 751.42


 13%|█▎        | 13036/100000 [4:17:05<27:25:19,  1.14s/it]

128192 episode score is 799.56


 13%|█▎        | 13037/100000 [4:17:06<27:24:40,  1.13s/it]

128202 episode score is 789.90


 13%|█▎        | 13038/100000 [4:17:08<27:29:44,  1.14s/it]

128212 episode score is 785.56


 13%|█▎        | 13039/100000 [4:17:09<27:22:46,  1.13s/it]

128222 episode score is 760.02


 13%|█▎        | 13040/100000 [4:17:10<27:30:30,  1.14s/it]

128232 episode score is 802.06


 13%|█▎        | 13041/100000 [4:17:11<27:43:03,  1.15s/it]

128242 episode score is 803.82


 13%|█▎        | 13042/100000 [4:17:12<27:28:57,  1.14s/it]

128252 episode score is 753.33


 13%|█▎        | 13043/100000 [4:17:13<27:30:56,  1.14s/it]

128262 episode score is 775.48


 13%|█▎        | 13044/100000 [4:17:14<27:11:29,  1.13s/it]

128272 episode score is 738.10


 13%|█▎        | 13045/100000 [4:17:15<26:58:16,  1.12s/it]

128282 episode score is 752.59


 13%|█▎        | 13046/100000 [4:17:17<27:17:46,  1.13s/it]

128292 episode score is 805.30


 13%|█▎        | 13047/100000 [4:17:18<27:31:20,  1.14s/it]

128302 episode score is 783.39


 13%|█▎        | 13048/100000 [4:17:19<27:43:34,  1.15s/it]

128312 episode score is 805.63


 13%|█▎        | 13049/100000 [4:17:20<27:24:53,  1.14s/it]

128322 episode score is 761.74


 13%|█▎        | 13050/100000 [4:17:21<27:23:15,  1.13s/it]

128332 episode score is 784.95


 13%|█▎        | 13051/100000 [4:17:22<27:30:32,  1.14s/it]

128342 episode score is 796.37


 13%|█▎        | 13052/100000 [4:17:23<27:35:07,  1.14s/it]

128353 episode score is 706.22


 13%|█▎        | 13053/100000 [4:17:25<27:55:17,  1.16s/it]

128364 episode score is 737.03


 13%|█▎        | 13054/100000 [4:17:26<27:36:08,  1.14s/it]

128374 episode score is 773.90


 13%|█▎        | 13055/100000 [4:17:27<27:53:31,  1.15s/it]

128384 episode score is 819.67


 13%|█▎        | 13056/100000 [4:17:28<27:28:24,  1.14s/it]

128394 episode score is 760.09


 13%|█▎        | 13057/100000 [4:17:29<27:26:24,  1.14s/it]

128404 episode score is 781.03


 13%|█▎        | 13058/100000 [4:17:30<27:24:28,  1.13s/it]

128414 episode score is 787.07


 13%|█▎        | 13059/100000 [4:17:31<27:06:02,  1.12s/it]

128424 episode score is 756.17


 13%|█▎        | 13060/100000 [4:17:33<27:06:40,  1.12s/it]

128434 episode score is 773.34


 13%|█▎        | 13061/100000 [4:17:34<27:13:25,  1.13s/it]

128444 episode score is 773.33


 13%|█▎        | 13062/100000 [4:17:35<27:37:03,  1.14s/it]

128454 episode score is 823.21


 13%|█▎        | 13063/100000 [4:17:36<27:41:25,  1.15s/it]

128464 episode score is 799.03


 13%|█▎        | 13064/100000 [4:17:37<27:18:49,  1.13s/it]

128474 episode score is 758.35


 13%|█▎        | 13065/100000 [4:17:38<26:57:18,  1.12s/it]

128484 episode score is 739.97


 13%|█▎        | 13066/100000 [4:17:39<27:02:14,  1.12s/it]

128494 episode score is 765.49


 13%|█▎        | 13067/100000 [4:17:40<26:57:46,  1.12s/it]

128504 episode score is 771.96


 13%|█▎        | 13068/100000 [4:17:42<26:59:48,  1.12s/it]

128514 episode score is 777.51


 13%|█▎        | 13069/100000 [4:17:43<27:12:03,  1.13s/it]

128524 episode score is 797.54


 13%|█▎        | 13070/100000 [4:17:44<27:28:29,  1.14s/it]

128534 episode score is 816.07


 13%|█▎        | 13071/100000 [4:17:45<27:21:39,  1.13s/it]

128544 episode score is 773.99


 13%|█▎        | 13072/100000 [4:17:46<27:32:42,  1.14s/it]

128554 episode score is 813.25


 13%|█▎        | 13073/100000 [4:17:47<27:54:33,  1.16s/it]

128564 episode score is 823.95


 13%|█▎        | 13074/100000 [4:17:48<27:57:19,  1.16s/it]

128574 episode score is 802.53


 13%|█▎        | 13075/100000 [4:17:50<27:59:31,  1.16s/it]

128584 episode score is 813.86


 13%|█▎        | 13076/100000 [4:17:51<27:23:08,  1.13s/it]

128594 episode score is 750.54


 13%|█▎        | 13077/100000 [4:17:52<26:55:22,  1.12s/it]

128604 episode score is 742.69


 13%|█▎        | 13078/100000 [4:17:53<26:52:21,  1.11s/it]

128615 episode score is 687.98


 13%|█▎        | 13079/100000 [4:17:54<26:48:10,  1.11s/it]

128626 episode score is 672.68


 13%|█▎        | 13080/100000 [4:17:55<27:10:42,  1.13s/it]

128637 episode score is 724.51


 13%|█▎        | 13081/100000 [4:17:56<27:09:41,  1.12s/it]

128648 episode score is 704.48


 13%|█▎        | 13082/100000 [4:17:57<26:44:56,  1.11s/it]

128658 episode score is 744.97


 13%|█▎        | 13083/100000 [4:17:58<26:31:01,  1.10s/it]

128669 episode score is 661.89


 13%|█▎        | 13084/100000 [4:18:00<26:41:59,  1.11s/it]

128680 episode score is 702.29


 13%|█▎        | 13085/100000 [4:18:01<26:42:18,  1.11s/it]

128691 episode score is 694.31


 13%|█▎        | 13086/100000 [4:18:02<27:36:45,  1.14s/it]

128702 episode score is 706.92


 13%|█▎        | 13087/100000 [4:18:03<27:51:15,  1.15s/it]

128713 episode score is 736.52


 13%|█▎        | 13088/100000 [4:18:04<27:21:11,  1.13s/it]

128723 episode score is 750.96


 13%|█▎        | 13089/100000 [4:18:05<27:36:06,  1.14s/it]

128734 episode score is 737.04


 13%|█▎        | 13090/100000 [4:18:06<27:06:05,  1.12s/it]

128744 episode score is 750.32


 13%|█▎        | 13091/100000 [4:18:08<27:20:18,  1.13s/it]

128755 episode score is 724.22


 13%|█▎        | 13092/100000 [4:18:09<27:34:04,  1.14s/it]

128766 episode score is 735.78


 13%|█▎        | 13093/100000 [4:18:10<27:03:41,  1.12s/it]

128776 episode score is 746.35


 13%|█▎        | 13094/100000 [4:18:11<27:07:25,  1.12s/it]

128787 episode score is 712.42


 13%|█▎        | 13095/100000 [4:18:12<27:24:48,  1.14s/it]

128798 episode score is 736.65


 13%|█▎        | 13096/100000 [4:18:13<27:13:59,  1.13s/it]

128808 episode score is 751.84


 13%|█▎        | 13097/100000 [4:18:14<27:15:23,  1.13s/it]

128818 episode score is 787.61


 13%|█▎        | 13098/100000 [4:18:15<26:57:38,  1.12s/it]

128828 episode score is 755.44


 13%|█▎        | 13099/100000 [4:18:16<26:50:01,  1.11s/it]

128838 episode score is 762.25
128849 episode score is 730.64


 13%|█▎        | 13100/100000 [4:18:19<34:23:27,  1.42s/it]

Iteration 13100: Average test reward: 738.58


 13%|█▎        | 13101/100000 [4:18:20<32:39:07,  1.35s/it]

128860 episode score is 734.65


 13%|█▎        | 13102/100000 [4:18:21<31:13:54,  1.29s/it]

128871 episode score is 725.94


 13%|█▎        | 13103/100000 [4:18:22<30:22:05,  1.26s/it]

128882 episode score is 739.06


 13%|█▎        | 13104/100000 [4:18:23<29:45:08,  1.23s/it]

128893 episode score is 727.44


 13%|█▎        | 13105/100000 [4:18:24<28:44:47,  1.19s/it]

128903 episode score is 760.80


 13%|█▎        | 13106/100000 [4:18:26<28:23:45,  1.18s/it]

128914 episode score is 708.02


 13%|█▎        | 13107/100000 [4:18:27<27:45:16,  1.15s/it]

128924 episode score is 759.79


 13%|█▎        | 13108/100000 [4:18:28<27:21:08,  1.13s/it]

128934 episode score is 752.55


 13%|█▎        | 13109/100000 [4:18:29<27:27:12,  1.14s/it]

128944 episode score is 796.85


 13%|█▎        | 13110/100000 [4:18:30<27:15:29,  1.13s/it]

128954 episode score is 773.93


 13%|█▎        | 13111/100000 [4:18:31<27:37:16,  1.14s/it]

128964 episode score is 798.66


 13%|█▎        | 13112/100000 [4:18:32<27:32:11,  1.14s/it]

128974 episode score is 786.64


 13%|█▎        | 13113/100000 [4:18:33<27:29:47,  1.14s/it]

128984 episode score is 791.56


 13%|█▎        | 13114/100000 [4:18:35<27:44:58,  1.15s/it]

128994 episode score is 821.15


 13%|█▎        | 13115/100000 [4:18:36<27:54:42,  1.16s/it]

129004 episode score is 811.89


 13%|█▎        | 13116/100000 [4:18:37<27:46:28,  1.15s/it]

129014 episode score is 791.94


 13%|█▎        | 13117/100000 [4:18:38<27:56:35,  1.16s/it]

129024 episode score is 821.46


 13%|█▎        | 13118/100000 [4:18:39<27:55:09,  1.16s/it]

129034 episode score is 801.75


 13%|█▎        | 13119/100000 [4:18:40<27:52:17,  1.15s/it]

129044 episode score is 804.19


 13%|█▎        | 13120/100000 [4:18:42<27:49:09,  1.15s/it]

129054 episode score is 805.57


 13%|█▎        | 13121/100000 [4:18:43<27:43:09,  1.15s/it]

129064 episode score is 791.12


 13%|█▎        | 13122/100000 [4:18:44<27:46:25,  1.15s/it]

129074 episode score is 812.98


 13%|█▎        | 13123/100000 [4:18:45<27:21:04,  1.13s/it]

129084 episode score is 755.33


 13%|█▎        | 13124/100000 [4:18:46<27:08:54,  1.12s/it]

129094 episode score is 761.04


 13%|█▎        | 13125/100000 [4:18:47<27:10:16,  1.13s/it]

129104 episode score is 774.48


 13%|█▎        | 13126/100000 [4:18:48<27:29:39,  1.14s/it]

129114 episode score is 814.52


 13%|█▎        | 13127/100000 [4:18:50<27:41:42,  1.15s/it]

129125 episode score is 716.06


 13%|█▎        | 13128/100000 [4:18:51<27:22:17,  1.13s/it]

129135 episode score is 774.00


 13%|█▎        | 13129/100000 [4:18:52<26:59:52,  1.12s/it]

129145 episode score is 738.67


 13%|█▎        | 13130/100000 [4:18:53<26:46:57,  1.11s/it]

129155 episode score is 752.55


 13%|█▎        | 13131/100000 [4:18:54<26:51:25,  1.11s/it]

129165 episode score is 781.18


 13%|█▎        | 13132/100000 [4:18:55<26:50:15,  1.11s/it]

129175 episode score is 764.56


 13%|█▎        | 13133/100000 [4:18:56<27:27:32,  1.14s/it]

129186 episode score is 756.21


 13%|█▎        | 13134/100000 [4:18:57<27:18:16,  1.13s/it]

129196 episode score is 766.03


 13%|█▎        | 13135/100000 [4:18:58<26:58:39,  1.12s/it]

129206 episode score is 755.13


 13%|█▎        | 13136/100000 [4:19:00<27:25:52,  1.14s/it]

129217 episode score is 743.06


 13%|█▎        | 13137/100000 [4:19:01<27:03:17,  1.12s/it]

129227 episode score is 755.09


 13%|█▎        | 13138/100000 [4:19:02<27:23:47,  1.14s/it]

129238 episode score is 743.16


 13%|█▎        | 13139/100000 [4:19:03<27:28:19,  1.14s/it]

129249 episode score is 711.54


 13%|█▎        | 13140/100000 [4:19:04<27:25:41,  1.14s/it]

129260 episode score is 695.08


 13%|█▎        | 13141/100000 [4:19:05<27:42:39,  1.15s/it]

129271 episode score is 723.11


 13%|█▎        | 13142/100000 [4:19:06<27:12:46,  1.13s/it]

129281 episode score is 753.09


 13%|█▎        | 13143/100000 [4:19:08<27:33:59,  1.14s/it]

129292 episode score is 746.47


 13%|█▎        | 13144/100000 [4:19:09<27:10:53,  1.13s/it]

129302 episode score is 763.76


 13%|█▎        | 13145/100000 [4:19:10<27:42:09,  1.15s/it]

129313 episode score is 739.29


 13%|█▎        | 13146/100000 [4:19:11<27:16:08,  1.13s/it]

129323 episode score is 750.23


 13%|█▎        | 13147/100000 [4:19:12<27:39:00,  1.15s/it]

129333 episode score is 758.35


 13%|█▎        | 13148/100000 [4:19:13<27:19:24,  1.13s/it]

129343 episode score is 762.52


 13%|█▎        | 13149/100000 [4:19:14<27:19:22,  1.13s/it]

129353 episode score is 792.11


 13%|█▎        | 13150/100000 [4:19:15<27:05:06,  1.12s/it]

129363 episode score is 764.20


 13%|█▎        | 13151/100000 [4:19:17<27:04:29,  1.12s/it]

129373 episode score is 781.49


 13%|█▎        | 13152/100000 [4:19:18<27:10:53,  1.13s/it]

129383 episode score is 784.32


 13%|█▎        | 13153/100000 [4:19:19<27:04:14,  1.12s/it]

129393 episode score is 773.39


 13%|█▎        | 13154/100000 [4:19:20<27:09:42,  1.13s/it]

129403 episode score is 793.45


 13%|█▎        | 13155/100000 [4:19:21<26:58:35,  1.12s/it]

129413 episode score is 770.14


 13%|█▎        | 13156/100000 [4:19:22<27:08:28,  1.13s/it]

129424 episode score is 699.60


 13%|█▎        | 13157/100000 [4:19:23<27:30:23,  1.14s/it]

129435 episode score is 740.00


 13%|█▎        | 13158/100000 [4:19:25<27:29:52,  1.14s/it]

129446 episode score is 716.83


 13%|█▎        | 13159/100000 [4:19:26<27:12:39,  1.13s/it]

129456 episode score is 764.89


 13%|█▎        | 13160/100000 [4:19:27<27:30:39,  1.14s/it]

129466 episode score is 826.25


 13%|█▎        | 13161/100000 [4:19:28<27:29:18,  1.14s/it]

129476 episode score is 801.28


 13%|█▎        | 13162/100000 [4:19:29<27:39:22,  1.15s/it]

129486 episode score is 815.46


 13%|█▎        | 13163/100000 [4:19:30<27:37:08,  1.15s/it]

129496 episode score is 802.55


 13%|█▎        | 13164/100000 [4:19:31<27:37:22,  1.15s/it]

129506 episode score is 798.87


 13%|█▎        | 13165/100000 [4:19:33<27:43:38,  1.15s/it]

129516 episode score is 812.61


 13%|█▎        | 13166/100000 [4:19:34<27:36:32,  1.14s/it]

129526 episode score is 795.26


 13%|█▎        | 13167/100000 [4:19:35<27:24:50,  1.14s/it]

129536 episode score is 775.97


 13%|█▎        | 13168/100000 [4:19:36<27:19:42,  1.13s/it]

129546 episode score is 785.08


 13%|█▎        | 13169/100000 [4:19:37<27:34:03,  1.14s/it]

129556 episode score is 819.56


 13%|█▎        | 13170/100000 [4:19:38<27:39:49,  1.15s/it]

129566 episode score is 813.67


 13%|█▎        | 13171/100000 [4:19:39<27:39:56,  1.15s/it]

129576 episode score is 801.80


 13%|█▎        | 13172/100000 [4:19:41<27:45:51,  1.15s/it]

129586 episode score is 820.74


 13%|█▎        | 13173/100000 [4:19:42<27:47:31,  1.15s/it]

129596 episode score is 814.71


 13%|█▎        | 13174/100000 [4:19:43<27:35:46,  1.14s/it]

129606 episode score is 787.47


 13%|█▎        | 13175/100000 [4:19:44<27:53:59,  1.16s/it]

129617 episode score is 745.90


 13%|█▎        | 13176/100000 [4:19:45<27:58:32,  1.16s/it]

129627 episode score is 808.65


 13%|█▎        | 13177/100000 [4:19:46<28:05:25,  1.16s/it]

129637 episode score is 827.52


 13%|█▎        | 13178/100000 [4:19:48<28:00:39,  1.16s/it]

129647 episode score is 795.68


 13%|█▎        | 13179/100000 [4:19:49<28:02:25,  1.16s/it]

129657 episode score is 810.03


 13%|█▎        | 13180/100000 [4:19:50<27:59:42,  1.16s/it]

129667 episode score is 797.03


 13%|█▎        | 13181/100000 [4:19:51<27:53:20,  1.16s/it]

129677 episode score is 797.42


 13%|█▎        | 13182/100000 [4:19:52<27:47:34,  1.15s/it]

129687 episode score is 800.06


 13%|█▎        | 13183/100000 [4:19:53<27:52:54,  1.16s/it]

129697 episode score is 814.59


 13%|█▎        | 13184/100000 [4:19:54<27:58:55,  1.16s/it]

129707 episode score is 825.80


 13%|█▎        | 13185/100000 [4:19:56<28:11:10,  1.17s/it]

129717 episode score is 824.14


 13%|█▎        | 13186/100000 [4:19:57<27:30:05,  1.14s/it]

129726 episode score is 845.37


 13%|█▎        | 13187/100000 [4:19:58<27:43:08,  1.15s/it]

129736 episode score is 815.82


 13%|█▎        | 13188/100000 [4:19:59<27:18:54,  1.13s/it]

129745 episode score is 838.41


 13%|█▎        | 13189/100000 [4:20:00<27:10:53,  1.13s/it]

129755 episode score is 761.88


 13%|█▎        | 13190/100000 [4:20:01<27:37:09,  1.15s/it]

129765 episode score is 825.70


 13%|█▎        | 13191/100000 [4:20:02<28:00:02,  1.16s/it]

129775 episode score is 833.25


 13%|█▎        | 13192/100000 [4:20:04<27:56:30,  1.16s/it]

129785 episode score is 803.73


 13%|█▎        | 13193/100000 [4:20:05<27:46:10,  1.15s/it]

129795 episode score is 792.12


 13%|█▎        | 13194/100000 [4:20:06<28:01:03,  1.16s/it]

129805 episode score is 835.43


 13%|█▎        | 13195/100000 [4:20:07<28:00:56,  1.16s/it]

129815 episode score is 822.14


 13%|█▎        | 13196/100000 [4:20:08<27:31:22,  1.14s/it]

129824 episode score is 856.60


 13%|█▎        | 13197/100000 [4:20:09<27:46:58,  1.15s/it]

129834 episode score is 821.19


 13%|█▎        | 13198/100000 [4:20:11<27:47:08,  1.15s/it]

129844 episode score is 810.28


 13%|█▎        | 13199/100000 [4:20:12<28:04:17,  1.16s/it]

129854 episode score is 829.96
129864 episode score is 836.58


 13%|█▎        | 13200/100000 [4:20:14<35:53:04,  1.49s/it]

Iteration 13200: Average test reward: 827.46


 13%|█▎        | 13201/100000 [4:20:15<33:37:28,  1.39s/it]

129874 episode score is 820.01


 13%|█▎        | 13202/100000 [4:20:16<31:58:45,  1.33s/it]

129884 episode score is 822.40


 13%|█▎        | 13203/100000 [4:20:18<30:55:21,  1.28s/it]

129894 episode score is 821.04


 13%|█▎        | 13204/100000 [4:20:19<30:16:07,  1.26s/it]

129904 episode score is 824.34


 13%|█▎        | 13205/100000 [4:20:20<29:41:36,  1.23s/it]

129914 episode score is 824.58


 13%|█▎        | 13206/100000 [4:20:21<29:16:10,  1.21s/it]

129924 episode score is 829.90


 13%|█▎        | 13207/100000 [4:20:22<29:00:26,  1.20s/it]

129934 episode score is 827.64


 13%|█▎        | 13208/100000 [4:20:23<28:15:05,  1.17s/it]

129943 episode score is 856.44


 13%|█▎        | 13209/100000 [4:20:24<27:54:39,  1.16s/it]

129953 episode score is 781.94


 13%|█▎        | 13210/100000 [4:20:26<27:30:00,  1.14s/it]

129963 episode score is 771.98


 13%|█▎        | 13211/100000 [4:20:27<27:26:04,  1.14s/it]

129973 episode score is 800.42


 13%|█▎        | 13212/100000 [4:20:28<27:29:28,  1.14s/it]

129983 episode score is 806.27


 13%|█▎        | 13213/100000 [4:20:29<27:21:27,  1.13s/it]

129993 episode score is 780.41


 13%|█▎        | 13214/100000 [4:20:30<27:19:16,  1.13s/it]

130003 episode score is 781.61


 13%|█▎        | 13215/100000 [4:20:31<27:18:39,  1.13s/it]

130013 episode score is 803.69


 13%|█▎        | 13216/100000 [4:20:32<27:02:42,  1.12s/it]

130023 episode score is 754.81


 13%|█▎        | 13217/100000 [4:20:33<27:03:44,  1.12s/it]

130033 episode score is 784.88


 13%|█▎        | 13218/100000 [4:20:35<26:53:10,  1.12s/it]

130043 episode score is 772.59


 13%|█▎        | 13219/100000 [4:20:36<26:54:49,  1.12s/it]

130053 episode score is 783.00


 13%|█▎        | 13220/100000 [4:20:37<27:00:52,  1.12s/it]

130063 episode score is 791.01


 13%|█▎        | 13221/100000 [4:20:38<27:13:01,  1.13s/it]

130073 episode score is 805.22


 13%|█▎        | 13222/100000 [4:20:39<27:24:56,  1.14s/it]

130083 episode score is 811.26


 13%|█▎        | 13223/100000 [4:20:40<27:21:29,  1.13s/it]

130093 episode score is 788.95


 13%|█▎        | 13224/100000 [4:20:41<27:03:45,  1.12s/it]

130103 episode score is 763.29


 13%|█▎        | 13225/100000 [4:20:42<27:12:45,  1.13s/it]

130113 episode score is 797.50


 13%|█▎        | 13226/100000 [4:20:44<27:05:13,  1.12s/it]

130123 episode score is 779.78


 13%|█▎        | 13227/100000 [4:20:45<26:49:48,  1.11s/it]

130133 episode score is 758.27


 13%|█▎        | 13228/100000 [4:20:46<27:39:49,  1.15s/it]

130143 episode score is 788.20


 13%|█▎        | 13229/100000 [4:20:47<27:45:30,  1.15s/it]

130153 episode score is 786.56


 13%|█▎        | 13230/100000 [4:20:48<27:35:34,  1.14s/it]

130163 episode score is 779.43


 13%|█▎        | 13231/100000 [4:20:49<27:07:17,  1.13s/it]

130172 episode score is 841.06


 13%|█▎        | 13232/100000 [4:20:50<27:23:36,  1.14s/it]

130182 episode score is 811.84


 13%|█▎        | 13233/100000 [4:20:52<27:22:48,  1.14s/it]

130192 episode score is 791.66


 13%|█▎        | 13234/100000 [4:20:53<27:35:57,  1.15s/it]

130202 episode score is 815.19


 13%|█▎        | 13235/100000 [4:20:54<27:46:09,  1.15s/it]

130212 episode score is 824.86


 13%|█▎        | 13236/100000 [4:20:55<27:39:03,  1.15s/it]

130222 episode score is 781.13


 13%|█▎        | 13237/100000 [4:20:56<27:22:48,  1.14s/it]

130232 episode score is 780.29


 13%|█▎        | 13238/100000 [4:20:57<27:23:21,  1.14s/it]

130242 episode score is 796.11


 13%|█▎        | 13239/100000 [4:20:58<27:26:56,  1.14s/it]

130252 episode score is 803.02


 13%|█▎        | 13240/100000 [4:21:00<27:11:12,  1.13s/it]

130262 episode score is 756.56


 13%|█▎        | 13241/100000 [4:21:01<27:24:28,  1.14s/it]

130272 episode score is 799.28


 13%|█▎        | 13242/100000 [4:21:02<27:19:41,  1.13s/it]

130282 episode score is 789.23


 13%|█▎        | 13243/100000 [4:21:03<27:17:02,  1.13s/it]

130292 episode score is 773.48


 13%|█▎        | 13244/100000 [4:21:04<27:44:01,  1.15s/it]

130302 episode score is 845.93


 13%|█▎        | 13245/100000 [4:21:05<27:55:30,  1.16s/it]

130312 episode score is 824.87


 13%|█▎        | 13246/100000 [4:21:06<27:23:36,  1.14s/it]

130321 episode score is 842.82


 13%|█▎        | 13247/100000 [4:21:07<27:11:10,  1.13s/it]

130330 episode score is 869.33


 13%|█▎        | 13248/100000 [4:21:09<27:18:07,  1.13s/it]

130340 episode score is 808.81


 13%|█▎        | 13249/100000 [4:21:10<27:34:56,  1.14s/it]

130350 episode score is 822.73


 13%|█▎        | 13250/100000 [4:21:11<27:35:32,  1.15s/it]

130360 episode score is 796.77


 13%|█▎        | 13251/100000 [4:21:12<27:17:22,  1.13s/it]

130370 episode score is 768.17


 13%|█▎        | 13252/100000 [4:21:13<27:20:01,  1.13s/it]

130380 episode score is 795.50


 13%|█▎        | 13253/100000 [4:21:14<26:59:21,  1.12s/it]

130390 episode score is 747.59


 13%|█▎        | 13254/100000 [4:21:15<26:53:30,  1.12s/it]

130400 episode score is 773.37


 13%|█▎        | 13255/100000 [4:21:17<27:24:09,  1.14s/it]

130410 episode score is 837.01


 13%|█▎        | 13256/100000 [4:21:18<27:46:35,  1.15s/it]

130420 episode score is 816.61


 13%|█▎        | 13257/100000 [4:21:19<27:54:54,  1.16s/it]

130431 episode score is 734.34


 13%|█▎        | 13258/100000 [4:21:20<27:46:03,  1.15s/it]

130441 episode score is 798.75


 13%|█▎        | 13259/100000 [4:21:21<27:48:03,  1.15s/it]

130451 episode score is 816.93


 13%|█▎        | 13260/100000 [4:21:22<27:25:39,  1.14s/it]

130461 episode score is 764.04


 13%|█▎        | 13261/100000 [4:21:23<27:24:10,  1.14s/it]

130471 episode score is 793.22


 13%|█▎        | 13262/100000 [4:21:25<27:20:24,  1.13s/it]

130481 episode score is 795.56


 13%|█▎        | 13263/100000 [4:21:26<27:02:37,  1.12s/it]

130491 episode score is 763.86


 13%|█▎        | 13264/100000 [4:21:27<27:03:17,  1.12s/it]

130501 episode score is 779.29


 13%|█▎        | 13265/100000 [4:21:28<27:23:59,  1.14s/it]

130511 episode score is 815.49


 13%|█▎        | 13266/100000 [4:21:29<27:36:20,  1.15s/it]

130521 episode score is 828.24


 13%|█▎        | 13267/100000 [4:21:30<27:38:05,  1.15s/it]

130531 episode score is 806.08


 13%|█▎        | 13268/100000 [4:21:31<27:42:21,  1.15s/it]

130541 episode score is 812.46


 13%|█▎        | 13269/100000 [4:21:33<27:56:43,  1.16s/it]

130551 episode score is 819.63


 13%|█▎        | 13270/100000 [4:21:34<27:52:23,  1.16s/it]

130561 episode score is 805.97


 13%|█▎        | 13271/100000 [4:21:35<27:53:46,  1.16s/it]

130571 episode score is 815.28


 13%|█▎        | 13272/100000 [4:21:36<27:53:18,  1.16s/it]

130581 episode score is 811.62


 13%|█▎        | 13273/100000 [4:21:37<28:09:14,  1.17s/it]

130591 episode score is 824.75


 13%|█▎        | 13274/100000 [4:21:38<27:34:02,  1.14s/it]

130600 episode score is 842.38


 13%|█▎        | 13275/100000 [4:21:40<27:54:19,  1.16s/it]

130610 episode score is 837.57


 13%|█▎        | 13276/100000 [4:21:41<28:01:40,  1.16s/it]

130620 episode score is 822.02


 13%|█▎        | 13277/100000 [4:21:42<27:54:33,  1.16s/it]

130630 episode score is 796.54


 13%|█▎        | 13278/100000 [4:21:43<28:05:16,  1.17s/it]

130640 episode score is 828.95


 13%|█▎        | 13279/100000 [4:21:44<27:53:14,  1.16s/it]

130650 episode score is 800.40


 13%|█▎        | 13280/100000 [4:21:45<27:45:02,  1.15s/it]

130660 episode score is 791.25


 13%|█▎        | 13281/100000 [4:21:47<27:43:15,  1.15s/it]

130670 episode score is 793.02


 13%|█▎        | 13282/100000 [4:21:48<27:46:06,  1.15s/it]

130680 episode score is 799.02


 13%|█▎        | 13283/100000 [4:21:49<27:41:39,  1.15s/it]

130690 episode score is 787.77


 13%|█▎        | 13284/100000 [4:21:50<27:18:12,  1.13s/it]

130700 episode score is 759.27


 13%|█▎        | 13285/100000 [4:21:51<27:18:33,  1.13s/it]

130710 episode score is 791.42


 13%|█▎        | 13286/100000 [4:21:52<28:11:18,  1.17s/it]

130720 episode score is 815.69


 13%|█▎        | 13287/100000 [4:21:53<27:59:49,  1.16s/it]

130730 episode score is 802.09


 13%|█▎        | 13288/100000 [4:21:55<27:55:30,  1.16s/it]

130740 episode score is 813.81


 13%|█▎        | 13289/100000 [4:21:56<27:24:06,  1.14s/it]

130749 episode score is 858.36


 13%|█▎        | 13290/100000 [4:21:57<27:11:32,  1.13s/it]

130759 episode score is 759.77


 13%|█▎        | 13291/100000 [4:21:58<27:14:32,  1.13s/it]

130769 episode score is 789.08


 13%|█▎        | 13292/100000 [4:21:59<27:23:50,  1.14s/it]

130779 episode score is 802.51


 13%|█▎        | 13293/100000 [4:22:00<27:18:03,  1.13s/it]

130789 episode score is 785.73


 13%|█▎        | 13294/100000 [4:22:01<27:09:02,  1.13s/it]

130799 episode score is 761.96


 13%|█▎        | 13295/100000 [4:22:02<27:30:48,  1.14s/it]

130809 episode score is 804.46


 13%|█▎        | 13296/100000 [4:22:04<27:44:57,  1.15s/it]

130819 episode score is 835.34


 13%|█▎        | 13297/100000 [4:22:05<27:25:38,  1.14s/it]

130829 episode score is 773.02


 13%|█▎        | 13298/100000 [4:22:06<27:15:39,  1.13s/it]

130839 episode score is 781.42


 13%|█▎        | 13299/100000 [4:22:07<26:57:48,  1.12s/it]

130849 episode score is 754.49
130859 episode score is 825.00


 13%|█▎        | 13300/100000 [4:22:09<34:48:58,  1.45s/it]

Iteration 13300: Average test reward: 799.89


 13%|█▎        | 13301/100000 [4:22:10<32:26:39,  1.35s/it]

130869 episode score is 782.57


 13%|█▎        | 13302/100000 [4:22:11<30:41:10,  1.27s/it]

130879 episode score is 768.56


 13%|█▎        | 13303/100000 [4:22:13<29:35:09,  1.23s/it]

130889 episode score is 778.93


 13%|█▎        | 13304/100000 [4:22:14<29:09:02,  1.21s/it]

130899 episode score is 812.59


 13%|█▎        | 13305/100000 [4:22:15<28:49:33,  1.20s/it]

130909 episode score is 802.31


 13%|█▎        | 13306/100000 [4:22:16<28:17:50,  1.18s/it]

130919 episode score is 786.74


 13%|█▎        | 13307/100000 [4:22:17<27:39:27,  1.15s/it]

130929 episode score is 742.55


 13%|█▎        | 13308/100000 [4:22:18<27:41:36,  1.15s/it]

130939 episode score is 797.89


 13%|█▎        | 13309/100000 [4:22:19<27:27:59,  1.14s/it]

130949 episode score is 771.22


 13%|█▎        | 13310/100000 [4:22:21<27:33:37,  1.14s/it]

130959 episode score is 806.74


 13%|█▎        | 13311/100000 [4:22:22<27:30:09,  1.14s/it]

130969 episode score is 790.45


 13%|█▎        | 13312/100000 [4:22:23<27:24:26,  1.14s/it]

130979 episode score is 786.32


 13%|█▎        | 13313/100000 [4:22:24<27:33:32,  1.14s/it]

130989 episode score is 814.81


 13%|█▎        | 13314/100000 [4:22:25<27:36:30,  1.15s/it]

130999 episode score is 804.63


 13%|█▎        | 13315/100000 [4:22:26<27:16:33,  1.13s/it]

131009 episode score is 770.55


 13%|█▎        | 13316/100000 [4:22:27<27:06:12,  1.13s/it]

131019 episode score is 767.69


 13%|█▎        | 13317/100000 [4:22:28<27:13:34,  1.13s/it]

131029 episode score is 803.46


 13%|█▎        | 13318/100000 [4:22:30<27:20:19,  1.14s/it]

131039 episode score is 798.81


 13%|█▎        | 13319/100000 [4:22:31<27:03:43,  1.12s/it]

131049 episode score is 750.34


 13%|█▎        | 13320/100000 [4:22:32<27:25:44,  1.14s/it]

131059 episode score is 802.33


 13%|█▎        | 13321/100000 [4:22:33<27:08:45,  1.13s/it]

131069 episode score is 758.73


 13%|█▎        | 13322/100000 [4:22:34<26:50:03,  1.11s/it]

131079 episode score is 752.92


 13%|█▎        | 13323/100000 [4:22:35<27:07:44,  1.13s/it]

131089 episode score is 802.72


 13%|█▎        | 13324/100000 [4:22:36<26:45:07,  1.11s/it]

131098 episode score is 838.39


 13%|█▎        | 13325/100000 [4:22:37<27:02:14,  1.12s/it]

131108 episode score is 804.26


 13%|█▎        | 13326/100000 [4:22:39<27:17:18,  1.13s/it]

131118 episode score is 806.86


 13%|█▎        | 13327/100000 [4:22:40<27:05:45,  1.13s/it]

131128 episode score is 770.84


 13%|█▎        | 13328/100000 [4:22:41<27:07:20,  1.13s/it]

131138 episode score is 783.12


 13%|█▎        | 13329/100000 [4:22:42<27:23:00,  1.14s/it]

131148 episode score is 815.89


 13%|█▎        | 13330/100000 [4:22:43<27:20:58,  1.14s/it]

131158 episode score is 796.47


 13%|█▎        | 13331/100000 [4:22:44<27:40:31,  1.15s/it]

131168 episode score is 832.61


 13%|█▎        | 13332/100000 [4:22:45<27:41:42,  1.15s/it]

131178 episode score is 808.56


 13%|█▎        | 13333/100000 [4:22:47<27:45:26,  1.15s/it]

131188 episode score is 819.81


 13%|█▎        | 13334/100000 [4:22:48<27:58:30,  1.16s/it]

131198 episode score is 809.84


 13%|█▎        | 13335/100000 [4:22:49<28:08:00,  1.17s/it]

131208 episode score is 831.68


 13%|█▎        | 13336/100000 [4:22:50<27:30:06,  1.14s/it]

131217 episode score is 852.80


 13%|█▎        | 13337/100000 [4:22:51<27:38:23,  1.15s/it]

131227 episode score is 816.69


 13%|█▎        | 13338/100000 [4:22:52<27:40:54,  1.15s/it]

131237 episode score is 810.38


 13%|█▎        | 13339/100000 [4:22:53<27:34:39,  1.15s/it]

131247 episode score is 788.70


 13%|█▎        | 13340/100000 [4:22:55<27:16:52,  1.13s/it]

131257 episode score is 775.10


 13%|█▎        | 13341/100000 [4:22:56<27:18:44,  1.13s/it]

131267 episode score is 799.01


 13%|█▎        | 13342/100000 [4:22:57<27:15:45,  1.13s/it]

131277 episode score is 801.63


 13%|█▎        | 13343/100000 [4:22:58<27:13:31,  1.13s/it]

131287 episode score is 768.95


 13%|█▎        | 13344/100000 [4:22:59<27:02:11,  1.12s/it]

131297 episode score is 777.16


 13%|█▎        | 13345/100000 [4:23:00<26:57:13,  1.12s/it]

131307 episode score is 778.59


 13%|█▎        | 13346/100000 [4:23:01<27:00:36,  1.12s/it]

131317 episode score is 794.48


 13%|█▎        | 13347/100000 [4:23:02<27:10:04,  1.13s/it]

131327 episode score is 790.71


 13%|█▎        | 13348/100000 [4:23:04<27:05:18,  1.13s/it]

131337 episode score is 774.50


 13%|█▎        | 13349/100000 [4:23:05<27:07:13,  1.13s/it]

131347 episode score is 788.58


 13%|█▎        | 13350/100000 [4:23:06<27:06:44,  1.13s/it]

131357 episode score is 798.12


 13%|█▎        | 13351/100000 [4:23:07<27:05:15,  1.13s/it]

131367 episode score is 786.53


 13%|█▎        | 13352/100000 [4:23:08<27:01:44,  1.12s/it]

131377 episode score is 788.16


 13%|█▎        | 13353/100000 [4:23:09<27:03:46,  1.12s/it]

131387 episode score is 787.72


 13%|█▎        | 13354/100000 [4:23:10<26:55:25,  1.12s/it]

131397 episode score is 781.42


 13%|█▎        | 13355/100000 [4:23:11<26:42:13,  1.11s/it]

131407 episode score is 751.65


 13%|█▎        | 13356/100000 [4:23:13<26:39:59,  1.11s/it]

131417 episode score is 776.32


 13%|█▎        | 13357/100000 [4:23:14<26:46:20,  1.11s/it]

131427 episode score is 791.14


 13%|█▎        | 13358/100000 [4:23:15<26:39:58,  1.11s/it]

131437 episode score is 760.03


 13%|█▎        | 13359/100000 [4:23:16<26:46:40,  1.11s/it]

131447 episode score is 793.43


 13%|█▎        | 13360/100000 [4:23:17<27:42:19,  1.15s/it]

131457 episode score is 796.74


 13%|█▎        | 13361/100000 [4:23:18<27:35:47,  1.15s/it]

131467 episode score is 793.12


 13%|█▎        | 13362/100000 [4:23:19<27:18:56,  1.14s/it]

131477 episode score is 768.93


 13%|█▎        | 13363/100000 [4:23:20<27:08:12,  1.13s/it]

131487 episode score is 778.49


 13%|█▎        | 13364/100000 [4:23:22<27:31:15,  1.14s/it]

131497 episode score is 838.35


 13%|█▎        | 13365/100000 [4:23:23<27:14:51,  1.13s/it]

131507 episode score is 772.71


 13%|█▎        | 13366/100000 [4:23:24<27:02:00,  1.12s/it]

131517 episode score is 772.23


 13%|█▎        | 13367/100000 [4:23:25<26:48:08,  1.11s/it]

131527 episode score is 767.15


 13%|█▎        | 13368/100000 [4:23:26<26:57:46,  1.12s/it]

131537 episode score is 809.14


 13%|█▎        | 13369/100000 [4:23:27<26:47:42,  1.11s/it]

131547 episode score is 776.78


 13%|█▎        | 13370/100000 [4:23:28<26:45:39,  1.11s/it]

131557 episode score is 783.85


 13%|█▎        | 13371/100000 [4:23:29<26:50:45,  1.12s/it]

131567 episode score is 788.25


 13%|█▎        | 13372/100000 [4:23:30<26:37:53,  1.11s/it]

131577 episode score is 761.63


 13%|█▎        | 13373/100000 [4:23:32<26:29:56,  1.10s/it]

131587 episode score is 752.64


 13%|█▎        | 13374/100000 [4:23:33<26:46:30,  1.11s/it]

131597 episode score is 792.18


 13%|█▎        | 13375/100000 [4:23:34<26:50:59,  1.12s/it]

131607 episode score is 785.17


 13%|█▎        | 13376/100000 [4:23:35<27:16:58,  1.13s/it]

131617 episode score is 831.51


 13%|█▎        | 13377/100000 [4:23:36<27:25:05,  1.14s/it]

131627 episode score is 814.34


 13%|█▎        | 13378/100000 [4:23:37<27:28:24,  1.14s/it]

131637 episode score is 810.53


 13%|█▎        | 13379/100000 [4:23:38<27:43:56,  1.15s/it]

131647 episode score is 829.57


 13%|█▎        | 13380/100000 [4:23:40<28:02:14,  1.17s/it]

131657 episode score is 844.79


 13%|█▎        | 13381/100000 [4:23:41<28:02:57,  1.17s/it]

131667 episode score is 819.27


 13%|█▎        | 13382/100000 [4:23:42<27:57:50,  1.16s/it]

131677 episode score is 811.39


 13%|█▎        | 13383/100000 [4:23:43<28:13:38,  1.17s/it]

131687 episode score is 846.09


 13%|█▎        | 13384/100000 [4:23:44<27:52:43,  1.16s/it]

131697 episode score is 796.79


 13%|█▎        | 13385/100000 [4:23:45<27:36:34,  1.15s/it]

131707 episode score is 786.14


 13%|█▎        | 13386/100000 [4:23:47<27:26:12,  1.14s/it]

131717 episode score is 788.82


 13%|█▎        | 13387/100000 [4:23:48<27:45:23,  1.15s/it]

131727 episode score is 829.12


 13%|█▎        | 13388/100000 [4:23:49<27:36:51,  1.15s/it]

131737 episode score is 794.24


 13%|█▎        | 13389/100000 [4:23:50<27:26:27,  1.14s/it]

131747 episode score is 784.40


 13%|█▎        | 13390/100000 [4:23:51<27:29:04,  1.14s/it]

131757 episode score is 799.61


 13%|█▎        | 13391/100000 [4:23:52<27:38:00,  1.15s/it]

131767 episode score is 825.79


 13%|█▎        | 13392/100000 [4:23:53<27:21:19,  1.14s/it]

131777 episode score is 778.62


 13%|█▎        | 13393/100000 [4:23:55<27:40:47,  1.15s/it]

131787 episode score is 836.25


 13%|█▎        | 13394/100000 [4:23:56<27:41:10,  1.15s/it]

131797 episode score is 816.21


 13%|█▎        | 13395/100000 [4:23:57<27:42:29,  1.15s/it]

131807 episode score is 816.60


 13%|█▎        | 13396/100000 [4:23:58<27:23:46,  1.14s/it]

131817 episode score is 760.30


 13%|█▎        | 13397/100000 [4:23:59<27:10:50,  1.13s/it]

131827 episode score is 773.00


 13%|█▎        | 13398/100000 [4:24:00<27:24:47,  1.14s/it]

131837 episode score is 821.51


 13%|█▎        | 13399/100000 [4:24:02<27:54:02,  1.16s/it]

131847 episode score is 754.40
131857 episode score is 789.90


 13%|█▎        | 13400/100000 [4:24:04<35:01:04,  1.46s/it]

Iteration 13400: Average test reward: 791.97


 13%|█▎        | 13401/100000 [4:24:05<32:36:39,  1.36s/it]

131867 episode score is 789.11


 13%|█▎        | 13402/100000 [4:24:06<31:02:28,  1.29s/it]

131877 episode score is 798.80


 13%|█▎        | 13403/100000 [4:24:07<29:41:24,  1.23s/it]

131887 episode score is 774.13


 13%|█▎        | 13404/100000 [4:24:08<28:46:19,  1.20s/it]

131897 episode score is 767.63


 13%|█▎        | 13405/100000 [4:24:09<28:09:36,  1.17s/it]

131907 episode score is 775.27


 13%|█▎        | 13406/100000 [4:24:10<27:56:44,  1.16s/it]

131917 episode score is 789.36


 13%|█▎        | 13407/100000 [4:24:12<27:57:34,  1.16s/it]

131927 episode score is 817.81


 13%|█▎        | 13408/100000 [4:24:13<27:41:43,  1.15s/it]

131937 episode score is 774.19


 13%|█▎        | 13409/100000 [4:24:14<27:41:19,  1.15s/it]

131947 episode score is 816.79


 13%|█▎        | 13410/100000 [4:24:15<27:33:37,  1.15s/it]

131957 episode score is 791.71


 13%|█▎        | 13411/100000 [4:24:16<27:48:26,  1.16s/it]

131967 episode score is 835.76


 13%|█▎        | 13412/100000 [4:24:17<27:56:39,  1.16s/it]

131977 episode score is 809.04


 13%|█▎        | 13413/100000 [4:24:18<27:55:34,  1.16s/it]

131987 episode score is 807.06


 13%|█▎        | 13414/100000 [4:24:20<27:38:08,  1.15s/it]

131997 episode score is 770.21


 13%|█▎        | 13415/100000 [4:24:21<27:53:17,  1.16s/it]

132007 episode score is 846.70


 13%|█▎        | 13416/100000 [4:24:22<27:33:23,  1.15s/it]

132017 episode score is 774.92


 13%|█▎        | 13417/100000 [4:24:23<27:26:01,  1.14s/it]

132027 episode score is 791.87


 13%|█▎        | 13418/100000 [4:24:24<27:03:22,  1.12s/it]

132037 episode score is 754.89


 13%|█▎        | 13419/100000 [4:24:25<27:03:41,  1.13s/it]

132047 episode score is 797.85


 13%|█▎        | 13420/100000 [4:24:26<27:02:57,  1.12s/it]

132057 episode score is 793.03


 13%|█▎        | 13421/100000 [4:24:27<27:06:29,  1.13s/it]

132067 episode score is 794.27


 13%|█▎        | 13422/100000 [4:24:29<27:09:07,  1.13s/it]

132077 episode score is 795.39


 13%|█▎        | 13423/100000 [4:24:30<26:58:46,  1.12s/it]

132087 episode score is 765.80


 13%|█▎        | 13424/100000 [4:24:31<27:11:23,  1.13s/it]

132097 episode score is 816.93


 13%|█▎        | 13425/100000 [4:24:32<27:41:07,  1.15s/it]

132107 episode score is 822.44


 13%|█▎        | 13426/100000 [4:24:33<27:41:23,  1.15s/it]

132117 episode score is 812.33


 13%|█▎        | 13427/100000 [4:24:34<27:23:12,  1.14s/it]

132127 episode score is 778.44


 13%|█▎        | 13428/100000 [4:24:35<27:03:31,  1.13s/it]

132137 episode score is 761.01


 13%|█▎        | 13429/100000 [4:24:37<26:52:45,  1.12s/it]

132147 episode score is 772.02


 13%|█▎        | 13430/100000 [4:24:38<26:50:27,  1.12s/it]

132157 episode score is 776.18


 13%|█▎        | 13431/100000 [4:24:39<26:43:50,  1.11s/it]

132167 episode score is 764.64


 13%|█▎        | 13432/100000 [4:24:40<26:44:14,  1.11s/it]

132177 episode score is 770.65


 13%|█▎        | 13433/100000 [4:24:41<27:11:44,  1.13s/it]

132187 episode score is 834.01


 13%|█▎        | 13434/100000 [4:24:42<26:51:27,  1.12s/it]

132196 episode score is 853.89


 13%|█▎        | 13435/100000 [4:24:43<26:58:30,  1.12s/it]

132206 episode score is 794.18


 13%|█▎        | 13436/100000 [4:24:44<27:18:07,  1.14s/it]

132216 episode score is 827.98


 13%|█▎        | 13437/100000 [4:24:46<27:32:02,  1.15s/it]

132226 episode score is 833.47


 13%|█▎        | 13438/100000 [4:24:47<27:27:27,  1.14s/it]

132236 episode score is 794.54


 13%|█▎        | 13439/100000 [4:24:48<27:29:55,  1.14s/it]

132246 episode score is 796.28


 13%|█▎        | 13440/100000 [4:24:49<27:51:49,  1.16s/it]

132256 episode score is 834.63


 13%|█▎        | 13441/100000 [4:24:50<27:58:29,  1.16s/it]

132266 episode score is 823.87


 13%|█▎        | 13442/100000 [4:24:51<27:50:47,  1.16s/it]

132276 episode score is 804.18


 13%|█▎        | 13443/100000 [4:24:53<27:53:16,  1.16s/it]

132286 episode score is 818.89


 13%|█▎        | 13444/100000 [4:24:54<27:36:03,  1.15s/it]

132296 episode score is 793.93


 13%|█▎        | 13445/100000 [4:24:55<27:51:31,  1.16s/it]

132306 episode score is 832.75


 13%|█▎        | 13446/100000 [4:24:56<27:46:40,  1.16s/it]

132316 episode score is 812.06


 13%|█▎        | 13447/100000 [4:24:57<27:56:20,  1.16s/it]

132327 episode score is 742.71


 13%|█▎        | 13448/100000 [4:24:58<27:54:35,  1.16s/it]

132337 episode score is 820.21


 13%|█▎        | 13449/100000 [4:25:00<28:05:21,  1.17s/it]

132347 episode score is 831.76


 13%|█▎        | 13450/100000 [4:25:01<27:41:07,  1.15s/it]

132356 episode score is 864.47


 13%|█▎        | 13451/100000 [4:25:02<27:52:38,  1.16s/it]

132366 episode score is 829.19


 13%|█▎        | 13452/100000 [4:25:03<28:05:44,  1.17s/it]

132376 episode score is 837.75


 13%|█▎        | 13453/100000 [4:25:04<27:56:35,  1.16s/it]

132386 episode score is 806.84


 13%|█▎        | 13454/100000 [4:25:05<27:53:10,  1.16s/it]

132396 episode score is 810.23


 13%|█▎        | 13455/100000 [4:25:06<27:59:37,  1.16s/it]

132406 episode score is 827.52


 13%|█▎        | 13456/100000 [4:25:08<27:23:15,  1.14s/it]

132415 episode score is 856.84


 13%|█▎        | 13457/100000 [4:25:09<27:33:12,  1.15s/it]

132425 episode score is 814.37


 13%|█▎        | 13458/100000 [4:25:10<27:35:08,  1.15s/it]

132435 episode score is 801.33


 13%|█▎        | 13459/100000 [4:25:11<27:06:18,  1.13s/it]

132444 episode score is 839.90


 13%|█▎        | 13460/100000 [4:25:12<27:56:30,  1.16s/it]

132454 episode score is 795.34


 13%|█▎        | 13461/100000 [4:25:13<27:25:52,  1.14s/it]

132463 episode score is 864.98


 13%|█▎        | 13462/100000 [4:25:14<26:58:41,  1.12s/it]

132472 episode score is 843.18


 13%|█▎        | 13463/100000 [4:25:16<27:28:00,  1.14s/it]

132482 episode score is 817.09


 13%|█▎        | 13464/100000 [4:25:17<27:36:12,  1.15s/it]

132492 episode score is 806.77


 13%|█▎        | 13465/100000 [4:25:18<27:27:03,  1.14s/it]

132501 episode score is 869.61


 13%|█▎        | 13466/100000 [4:25:19<27:14:25,  1.13s/it]

132510 episode score is 869.12


 13%|█▎        | 13467/100000 [4:25:20<27:26:49,  1.14s/it]

132520 episode score is 816.06


 13%|█▎        | 13468/100000 [4:25:21<27:29:28,  1.14s/it]

132530 episode score is 795.17


 13%|█▎        | 13469/100000 [4:25:22<27:40:47,  1.15s/it]

132540 episode score is 820.95


 13%|█▎        | 13470/100000 [4:25:24<27:49:35,  1.16s/it]

132550 episode score is 827.19


 13%|█▎        | 13471/100000 [4:25:25<27:23:45,  1.14s/it]

132559 episode score is 862.14


 13%|█▎        | 13472/100000 [4:25:26<26:50:14,  1.12s/it]

132568 episode score is 837.00


 13%|█▎        | 13473/100000 [4:25:27<27:12:56,  1.13s/it]

132578 episode score is 827.40


 13%|█▎        | 13474/100000 [4:25:28<26:56:44,  1.12s/it]

132587 episode score is 870.42


 13%|█▎        | 13475/100000 [4:25:29<27:07:59,  1.13s/it]

132597 episode score is 810.69


 13%|█▎        | 13476/100000 [4:25:30<27:31:52,  1.15s/it]

132607 episode score is 847.16


 13%|█▎        | 13477/100000 [4:25:31<26:56:52,  1.12s/it]

132616 episode score is 849.90


 13%|█▎        | 13478/100000 [4:25:33<27:07:52,  1.13s/it]

132626 episode score is 804.30


 13%|█▎        | 13479/100000 [4:25:34<27:12:51,  1.13s/it]

132636 episode score is 817.32


 13%|█▎        | 13480/100000 [4:25:35<27:25:44,  1.14s/it]

132646 episode score is 840.39


 13%|█▎        | 13481/100000 [4:25:36<26:58:24,  1.12s/it]

132656 episode score is 752.22


 13%|█▎        | 13482/100000 [4:25:37<27:19:02,  1.14s/it]

132667 episode score is 735.72


 13%|█▎        | 13483/100000 [4:25:38<27:09:53,  1.13s/it]

132677 episode score is 793.70


 13%|█▎        | 13484/100000 [4:25:39<27:01:55,  1.12s/it]

132687 episode score is 777.51


 13%|█▎        | 13485/100000 [4:25:41<27:22:23,  1.14s/it]

132697 episode score is 839.12


 13%|█▎        | 13486/100000 [4:25:42<27:24:34,  1.14s/it]

132707 episode score is 813.97


 13%|█▎        | 13487/100000 [4:25:43<27:41:54,  1.15s/it]

132717 episode score is 834.00


 13%|█▎        | 13488/100000 [4:25:44<27:07:09,  1.13s/it]

132727 episode score is 753.94


 13%|█▎        | 13489/100000 [4:25:45<26:57:52,  1.12s/it]

132737 episode score is 778.03


 13%|█▎        | 13490/100000 [4:25:46<26:51:01,  1.12s/it]

132747 episode score is 783.83


 13%|█▎        | 13491/100000 [4:25:47<27:03:13,  1.13s/it]

132757 episode score is 803.05


 13%|█▎        | 13492/100000 [4:25:48<27:05:11,  1.13s/it]

132767 episode score is 799.46


 13%|█▎        | 13493/100000 [4:25:50<27:06:35,  1.13s/it]

132777 episode score is 789.68


 13%|█▎        | 13494/100000 [4:25:51<27:16:46,  1.14s/it]

132787 episode score is 827.76


 13%|█▎        | 13495/100000 [4:25:52<27:23:47,  1.14s/it]

132797 episode score is 822.51


 13%|█▎        | 13496/100000 [4:25:53<27:44:10,  1.15s/it]

132807 episode score is 847.42


 13%|█▎        | 13497/100000 [4:25:54<27:11:44,  1.13s/it]

132817 episode score is 764.31


 13%|█▎        | 13498/100000 [4:25:55<27:14:40,  1.13s/it]

132827 episode score is 812.66


 13%|█▎        | 13499/100000 [4:25:56<27:09:41,  1.13s/it]

132837 episode score is 796.07
132847 episode score is 799.12


 14%|█▎        | 13500/100000 [4:25:59<34:45:20,  1.45s/it]

Iteration 13500: Average test reward: 860.83


 14%|█▎        | 13501/100000 [4:26:00<32:46:11,  1.36s/it]

132858 episode score is 737.96


 14%|█▎        | 13502/100000 [4:26:01<30:51:42,  1.28s/it]

132868 episode score is 774.28


 14%|█▎        | 13503/100000 [4:26:02<29:52:16,  1.24s/it]

132878 episode score is 805.56


 14%|█▎        | 13504/100000 [4:26:03<29:03:22,  1.21s/it]

132888 episode score is 796.23


 14%|█▎        | 13505/100000 [4:26:04<28:15:12,  1.18s/it]

132898 episode score is 777.53


 14%|█▎        | 13506/100000 [4:26:05<27:47:57,  1.16s/it]

132908 episode score is 782.57


 14%|█▎        | 13507/100000 [4:26:06<27:11:11,  1.13s/it]

132918 episode score is 750.16


 14%|█▎        | 13508/100000 [4:26:08<27:15:08,  1.13s/it]

132928 episode score is 815.09


 14%|█▎        | 13509/100000 [4:26:09<27:21:16,  1.14s/it]

132938 episode score is 820.18


 14%|█▎        | 13510/100000 [4:26:10<27:38:21,  1.15s/it]

132949 episode score is 753.22


 14%|█▎        | 13511/100000 [4:26:11<27:39:57,  1.15s/it]

132959 episode score is 822.20


 14%|█▎        | 13512/100000 [4:26:12<27:35:34,  1.15s/it]

132969 episode score is 808.46


 14%|█▎        | 13513/100000 [4:26:13<27:43:36,  1.15s/it]

132979 episode score is 815.55


 14%|█▎        | 13514/100000 [4:26:14<27:35:06,  1.15s/it]

132989 episode score is 797.56


 14%|█▎        | 13515/100000 [4:26:16<27:17:46,  1.14s/it]

132999 episode score is 784.87


 14%|█▎        | 13516/100000 [4:26:17<26:48:00,  1.12s/it]

133009 episode score is 741.87


 14%|█▎        | 13517/100000 [4:26:18<26:31:37,  1.10s/it]

133019 episode score is 747.57


 14%|█▎        | 13518/100000 [4:26:19<26:52:59,  1.12s/it]

133029 episode score is 814.28


 14%|█▎        | 13519/100000 [4:26:20<27:00:43,  1.12s/it]

133039 episode score is 806.46


 14%|█▎        | 13520/100000 [4:26:21<26:36:43,  1.11s/it]

133049 episode score is 750.73


 14%|█▎        | 13521/100000 [4:26:22<26:39:11,  1.11s/it]

133059 episode score is 788.01


 14%|█▎        | 13522/100000 [4:26:23<26:43:40,  1.11s/it]

133069 episode score is 782.67


 14%|█▎        | 13523/100000 [4:26:24<26:59:15,  1.12s/it]

133079 episode score is 814.50


 14%|█▎        | 13524/100000 [4:26:26<27:33:45,  1.15s/it]

133089 episode score is 782.40


 14%|█▎        | 13525/100000 [4:26:27<27:12:42,  1.13s/it]

133099 episode score is 775.08


 14%|█▎        | 13526/100000 [4:26:28<26:59:58,  1.12s/it]

133109 episode score is 777.13


 14%|█▎        | 13527/100000 [4:26:29<26:57:38,  1.12s/it]

133119 episode score is 797.01


 14%|█▎        | 13528/100000 [4:26:30<27:00:05,  1.12s/it]

133129 episode score is 797.05


 14%|█▎        | 13529/100000 [4:26:31<26:48:59,  1.12s/it]

133139 episode score is 763.77


 14%|█▎        | 13530/100000 [4:26:32<27:00:54,  1.12s/it]

133149 episode score is 806.86


 14%|█▎        | 13531/100000 [4:26:33<26:51:19,  1.12s/it]

133159 episode score is 773.97


 14%|█▎        | 13532/100000 [4:26:35<26:30:19,  1.10s/it]

133169 episode score is 749.86


 14%|█▎        | 13533/100000 [4:26:36<26:38:51,  1.11s/it]

133179 episode score is 790.49


 14%|█▎        | 13534/100000 [4:26:37<26:35:11,  1.11s/it]

133189 episode score is 775.61


 14%|█▎        | 13535/100000 [4:26:38<26:50:18,  1.12s/it]

133199 episode score is 806.30


 14%|█▎        | 13536/100000 [4:26:39<26:51:41,  1.12s/it]

133209 episode score is 790.39


 14%|█▎        | 13537/100000 [4:26:40<26:55:55,  1.12s/it]

133219 episode score is 796.87


 14%|█▎        | 13538/100000 [4:26:41<26:39:26,  1.11s/it]

133229 episode score is 760.42


 14%|█▎        | 13539/100000 [4:26:42<26:40:15,  1.11s/it]

133239 episode score is 779.27


 14%|█▎        | 13540/100000 [4:26:43<26:29:15,  1.10s/it]

133249 episode score is 758.81


 14%|█▎        | 13541/100000 [4:26:44<26:17:03,  1.09s/it]

133259 episode score is 754.30


 14%|█▎        | 13542/100000 [4:26:46<26:40:16,  1.11s/it]

133270 episode score is 727.44


 14%|█▎        | 13543/100000 [4:26:47<26:26:04,  1.10s/it]

133280 episode score is 749.24


 14%|█▎        | 13544/100000 [4:26:48<26:34:01,  1.11s/it]

133290 episode score is 781.09


 14%|█▎        | 13545/100000 [4:26:49<26:49:37,  1.12s/it]

133300 episode score is 802.40


 14%|█▎        | 13546/100000 [4:26:50<26:39:42,  1.11s/it]

133310 episode score is 768.93


 14%|█▎        | 13547/100000 [4:26:51<26:40:45,  1.11s/it]

133320 episode score is 797.74


 14%|█▎        | 13548/100000 [4:26:52<26:55:05,  1.12s/it]

133331 episode score is 722.88


 14%|█▎        | 13549/100000 [4:26:54<27:17:58,  1.14s/it]

133342 episode score is 738.64


 14%|█▎        | 13550/100000 [4:26:55<27:27:56,  1.14s/it]

133353 episode score is 743.64


 14%|█▎        | 13551/100000 [4:26:56<27:00:52,  1.12s/it]

133363 episode score is 756.38


 14%|█▎        | 13552/100000 [4:26:57<27:13:23,  1.13s/it]

133374 episode score is 733.93


 14%|█▎        | 13553/100000 [4:26:58<27:31:12,  1.15s/it]

133385 episode score is 743.80


 14%|█▎        | 13554/100000 [4:26:59<27:15:37,  1.14s/it]

133395 episode score is 771.80


 14%|█▎        | 13555/100000 [4:27:00<27:23:47,  1.14s/it]

133406 episode score is 731.45


 14%|█▎        | 13556/100000 [4:27:01<27:25:41,  1.14s/it]

133417 episode score is 723.44


 14%|█▎        | 13557/100000 [4:27:03<27:07:43,  1.13s/it]

133427 episode score is 755.75


 14%|█▎        | 13558/100000 [4:27:04<27:01:27,  1.13s/it]

133437 episode score is 778.25


 14%|█▎        | 13559/100000 [4:27:05<26:46:27,  1.12s/it]

133447 episode score is 762.56


 14%|█▎        | 13560/100000 [4:27:06<26:52:42,  1.12s/it]

133457 episode score is 796.26


 14%|█▎        | 13561/100000 [4:27:07<26:35:13,  1.11s/it]

133467 episode score is 752.33


 14%|█▎        | 13562/100000 [4:27:08<26:33:23,  1.11s/it]

133477 episode score is 776.36


 14%|█▎        | 13563/100000 [4:27:09<26:47:14,  1.12s/it]

133487 episode score is 791.45


 14%|█▎        | 13564/100000 [4:27:10<26:40:59,  1.11s/it]

133497 episode score is 771.03


 14%|█▎        | 13565/100000 [4:27:11<26:47:23,  1.12s/it]

133507 episode score is 787.24


 14%|█▎        | 13566/100000 [4:27:13<27:14:13,  1.13s/it]

133517 episode score is 827.38


 14%|█▎        | 13567/100000 [4:27:14<27:32:14,  1.15s/it]

133527 episode score is 819.40


 14%|█▎        | 13568/100000 [4:27:15<27:23:23,  1.14s/it]

133537 episode score is 792.40


 14%|█▎        | 13569/100000 [4:27:16<27:25:54,  1.14s/it]

133547 episode score is 790.10


 14%|█▎        | 13570/100000 [4:27:17<27:46:42,  1.16s/it]

133557 episode score is 833.33


 14%|█▎        | 13571/100000 [4:27:18<28:09:31,  1.17s/it]

133567 episode score is 836.37


 14%|█▎        | 13572/100000 [4:27:20<28:07:56,  1.17s/it]

133577 episode score is 819.14


 14%|█▎        | 13573/100000 [4:27:21<27:35:16,  1.15s/it]

133586 episode score is 855.40


 14%|█▎        | 13574/100000 [4:27:22<27:06:57,  1.13s/it]

133595 episode score is 847.62


 14%|█▎        | 13575/100000 [4:27:23<27:17:18,  1.14s/it]

133605 episode score is 798.67


 14%|█▎        | 13576/100000 [4:27:24<27:20:18,  1.14s/it]

133615 episode score is 812.53


 14%|█▎        | 13577/100000 [4:27:25<27:00:54,  1.13s/it]

133624 episode score is 863.02


 14%|█▎        | 13578/100000 [4:27:26<27:20:48,  1.14s/it]

133634 episode score is 837.03


 14%|█▎        | 13579/100000 [4:27:28<27:22:10,  1.14s/it]

133644 episode score is 811.13


 14%|█▎        | 13580/100000 [4:27:29<27:15:51,  1.14s/it]

133654 episode score is 799.18


 14%|█▎        | 13581/100000 [4:27:30<27:36:54,  1.15s/it]

133664 episode score is 836.34


 14%|█▎        | 13582/100000 [4:27:31<27:02:50,  1.13s/it]

133673 episode score is 849.94


 14%|█▎        | 13583/100000 [4:27:32<27:22:44,  1.14s/it]

133683 episode score is 826.06


 14%|█▎        | 13584/100000 [4:27:33<27:33:04,  1.15s/it]

133693 episode score is 815.49


 14%|█▎        | 13585/100000 [4:27:34<27:43:35,  1.16s/it]

133703 episode score is 834.27


 14%|█▎        | 13586/100000 [4:27:36<27:06:45,  1.13s/it]

133712 episode score is 853.92


 14%|█▎        | 13587/100000 [4:27:37<26:53:11,  1.12s/it]

133722 episode score is 772.85


 14%|█▎        | 13588/100000 [4:27:38<27:14:06,  1.13s/it]

133732 episode score is 835.93


 14%|█▎        | 13589/100000 [4:27:39<27:45:16,  1.16s/it]

133742 episode score is 781.16


 14%|█▎        | 13590/100000 [4:27:40<27:26:14,  1.14s/it]

133752 episode score is 782.62


 14%|█▎        | 13591/100000 [4:27:41<27:02:49,  1.13s/it]

133762 episode score is 770.30


 14%|█▎        | 13592/100000 [4:27:42<27:11:02,  1.13s/it]

133772 episode score is 800.99


 14%|█▎        | 13593/100000 [4:27:43<27:16:43,  1.14s/it]

133782 episode score is 800.87


 14%|█▎        | 13594/100000 [4:27:45<26:51:56,  1.12s/it]

133792 episode score is 755.17


 14%|█▎        | 13595/100000 [4:27:46<26:37:28,  1.11s/it]

133802 episode score is 762.93


 14%|█▎        | 13596/100000 [4:27:47<26:50:49,  1.12s/it]

133812 episode score is 805.22


 14%|█▎        | 13597/100000 [4:27:48<26:44:13,  1.11s/it]

133822 episode score is 766.00


 14%|█▎        | 13598/100000 [4:27:49<27:00:10,  1.13s/it]

133832 episode score is 820.14


 14%|█▎        | 13599/100000 [4:27:50<26:51:45,  1.12s/it]

133842 episode score is 786.81
133851 episode score is 859.21


 14%|█▎        | 13600/100000 [4:27:52<33:56:42,  1.41s/it]

Iteration 13600: Average test reward: 828.82


 14%|█▎        | 13601/100000 [4:27:53<31:37:27,  1.32s/it]

133861 episode score is 762.99


 14%|█▎        | 13602/100000 [4:27:55<30:32:14,  1.27s/it]

133871 episode score is 839.00


 14%|█▎        | 13603/100000 [4:27:56<29:33:15,  1.23s/it]

133881 episode score is 802.88


 14%|█▎        | 13604/100000 [4:27:57<28:52:34,  1.20s/it]

133891 episode score is 816.45


 14%|█▎        | 13605/100000 [4:27:58<28:30:10,  1.19s/it]

133901 episode score is 828.14


 14%|█▎        | 13606/100000 [4:27:59<28:26:52,  1.19s/it]

133911 episode score is 832.81


 14%|█▎        | 13607/100000 [4:28:00<28:26:52,  1.19s/it]

133921 episode score is 832.79


 14%|█▎        | 13608/100000 [4:28:01<28:14:31,  1.18s/it]

133931 episode score is 822.96


 14%|█▎        | 13609/100000 [4:28:03<27:41:47,  1.15s/it]

133940 episode score is 875.69


 14%|█▎        | 13610/100000 [4:28:04<27:22:24,  1.14s/it]

133950 episode score is 771.14


 14%|█▎        | 13611/100000 [4:28:05<27:29:47,  1.15s/it]

133960 episode score is 823.19


 14%|█▎        | 13612/100000 [4:28:06<27:35:23,  1.15s/it]

133970 episode score is 830.05


 14%|█▎        | 13613/100000 [4:28:07<27:44:23,  1.16s/it]

133980 episode score is 835.76


 14%|█▎        | 13614/100000 [4:28:08<27:39:42,  1.15s/it]

133990 episode score is 817.08


 14%|█▎        | 13615/100000 [4:28:09<27:43:23,  1.16s/it]

134000 episode score is 820.40


 14%|█▎        | 13616/100000 [4:28:11<27:41:42,  1.15s/it]

134010 episode score is 828.29


 14%|█▎        | 13617/100000 [4:28:12<27:41:43,  1.15s/it]

134020 episode score is 816.76


 14%|█▎        | 13618/100000 [4:28:13<27:28:39,  1.15s/it]

134030 episode score is 783.81


 14%|█▎        | 13619/100000 [4:28:14<27:25:12,  1.14s/it]

134040 episode score is 800.68


 14%|█▎        | 13620/100000 [4:28:15<27:29:36,  1.15s/it]

134050 episode score is 818.03


 14%|█▎        | 13621/100000 [4:28:16<27:31:05,  1.15s/it]

134060 episode score is 814.56


 14%|█▎        | 13622/100000 [4:28:17<27:26:21,  1.14s/it]

134070 episode score is 796.01


 14%|█▎        | 13623/100000 [4:28:19<27:02:20,  1.13s/it]

134080 episode score is 764.75


 14%|█▎        | 13624/100000 [4:28:20<27:30:09,  1.15s/it]

134090 episode score is 839.86


 14%|█▎        | 13625/100000 [4:28:21<27:39:00,  1.15s/it]

134100 episode score is 813.98


 14%|█▎        | 13626/100000 [4:28:22<27:51:24,  1.16s/it]

134110 episode score is 835.92


 14%|█▎        | 13627/100000 [4:28:23<27:52:52,  1.16s/it]

134120 episode score is 819.60


 14%|█▎        | 13628/100000 [4:28:24<27:39:14,  1.15s/it]

134130 episode score is 788.85


 14%|█▎        | 13629/100000 [4:28:26<27:51:30,  1.16s/it]

134140 episode score is 827.96


 14%|█▎        | 13630/100000 [4:28:27<27:14:40,  1.14s/it]

134149 episode score is 853.18


 14%|█▎        | 13631/100000 [4:28:28<27:24:53,  1.14s/it]

134159 episode score is 814.65


 14%|█▎        | 13632/100000 [4:28:29<27:22:28,  1.14s/it]

134169 episode score is 790.53


 14%|█▎        | 13633/100000 [4:28:30<27:37:24,  1.15s/it]

134179 episode score is 826.64


 14%|█▎        | 13634/100000 [4:28:31<27:49:16,  1.16s/it]

134189 episode score is 829.02


 14%|█▎        | 13635/100000 [4:28:32<27:26:39,  1.14s/it]

134198 episode score is 858.75


 14%|█▎        | 13636/100000 [4:28:33<26:58:55,  1.12s/it]

134208 episode score is 744.43


 14%|█▎        | 13637/100000 [4:28:35<27:17:30,  1.14s/it]

134218 episode score is 813.73


 14%|█▎        | 13638/100000 [4:28:36<26:51:30,  1.12s/it]

134227 episode score is 847.57


 14%|█▎        | 13639/100000 [4:28:37<26:42:34,  1.11s/it]

134237 episode score is 762.09


 14%|█▎        | 13640/100000 [4:28:38<26:55:45,  1.12s/it]

134247 episode score is 793.53


 14%|█▎        | 13641/100000 [4:28:39<27:23:55,  1.14s/it]

134257 episode score is 839.67


 14%|█▎        | 13642/100000 [4:28:40<27:46:25,  1.16s/it]

134267 episode score is 850.63


 14%|█▎        | 13643/100000 [4:28:42<27:52:06,  1.16s/it]

134277 episode score is 830.07


 14%|█▎        | 13644/100000 [4:28:43<27:37:58,  1.15s/it]

134287 episode score is 780.50


 14%|█▎        | 13645/100000 [4:28:44<27:07:08,  1.13s/it]

134296 episode score is 841.05


 14%|█▎        | 13646/100000 [4:28:45<27:26:43,  1.14s/it]

134306 episode score is 834.09


 14%|█▎        | 13647/100000 [4:28:46<27:31:30,  1.15s/it]

134316 episode score is 800.43


 14%|█▎        | 13648/100000 [4:28:47<27:29:50,  1.15s/it]

134326 episode score is 788.75


 14%|█▎        | 13649/100000 [4:28:48<27:27:38,  1.14s/it]

134336 episode score is 773.56


 14%|█▎        | 13650/100000 [4:28:50<27:37:57,  1.15s/it]

134346 episode score is 818.76


 14%|█▎        | 13651/100000 [4:28:51<27:34:53,  1.15s/it]

134356 episode score is 804.11


 14%|█▎        | 13652/100000 [4:28:52<27:07:46,  1.13s/it]

134366 episode score is 759.62


 14%|█▎        | 13653/100000 [4:28:53<27:23:33,  1.14s/it]

134376 episode score is 833.39


 14%|█▎        | 13654/100000 [4:28:54<27:28:51,  1.15s/it]

134386 episode score is 812.40


 14%|█▎        | 13655/100000 [4:28:55<27:20:14,  1.14s/it]

134396 episode score is 792.22


 14%|█▎        | 13656/100000 [4:28:56<27:19:50,  1.14s/it]

134406 episode score is 809.11


 14%|█▎        | 13657/100000 [4:28:57<27:07:37,  1.13s/it]

134416 episode score is 785.36


 14%|█▎        | 13658/100000 [4:28:59<26:49:47,  1.12s/it]

134426 episode score is 761.64


 14%|█▎        | 13659/100000 [4:29:00<26:51:47,  1.12s/it]

134436 episode score is 792.77


 14%|█▎        | 13660/100000 [4:29:01<27:10:59,  1.13s/it]

134446 episode score is 829.16


 14%|█▎        | 13661/100000 [4:29:02<27:09:26,  1.13s/it]

134456 episode score is 791.77


 14%|█▎        | 13662/100000 [4:29:03<27:29:37,  1.15s/it]

134467 episode score is 747.07


 14%|█▎        | 13663/100000 [4:29:04<27:23:54,  1.14s/it]

134477 episode score is 793.50


 14%|█▎        | 13664/100000 [4:29:05<27:21:47,  1.14s/it]

134487 episode score is 800.04


 14%|█▎        | 13665/100000 [4:29:07<27:25:57,  1.14s/it]

134497 episode score is 813.36


 14%|█▎        | 13666/100000 [4:29:08<27:16:01,  1.14s/it]

134507 episode score is 789.92


 14%|█▎        | 13667/100000 [4:29:09<27:34:32,  1.15s/it]

134518 episode score is 736.52


 14%|█▎        | 13668/100000 [4:29:10<27:22:10,  1.14s/it]

134528 episode score is 789.81


 14%|█▎        | 13669/100000 [4:29:11<27:10:00,  1.13s/it]

134538 episode score is 785.68


 14%|█▎        | 13670/100000 [4:29:12<26:50:10,  1.12s/it]

134548 episode score is 750.30


 14%|█▎        | 13671/100000 [4:29:13<26:47:36,  1.12s/it]

134558 episode score is 777.79


 14%|█▎        | 13672/100000 [4:29:14<27:09:24,  1.13s/it]

134569 episode score is 728.53


 14%|█▎        | 13673/100000 [4:29:16<28:12:02,  1.18s/it]

134579 episode score is 832.20


 14%|█▎        | 13674/100000 [4:29:17<27:40:26,  1.15s/it]

134589 episode score is 761.44


 14%|█▎        | 13675/100000 [4:29:18<27:49:06,  1.16s/it]

134599 episode score is 819.82


 14%|█▎        | 13676/100000 [4:29:19<27:51:24,  1.16s/it]

134609 episode score is 815.65


 14%|█▎        | 13677/100000 [4:29:20<27:30:23,  1.15s/it]

134619 episode score is 783.06


 14%|█▎        | 13678/100000 [4:29:21<27:16:10,  1.14s/it]

134629 episode score is 788.33


 14%|█▎        | 13679/100000 [4:29:23<27:14:31,  1.14s/it]

134639 episode score is 791.29


 14%|█▎        | 13680/100000 [4:29:24<27:10:59,  1.13s/it]

134649 episode score is 776.59


 14%|█▎        | 13681/100000 [4:29:25<27:08:49,  1.13s/it]

134659 episode score is 793.87


 14%|█▎        | 13682/100000 [4:29:26<27:05:12,  1.13s/it]

134669 episode score is 792.47


 14%|█▎        | 13683/100000 [4:29:27<27:06:08,  1.13s/it]

134679 episode score is 796.63


 14%|█▎        | 13684/100000 [4:29:28<26:49:53,  1.12s/it]

134689 episode score is 764.85


 14%|█▎        | 13685/100000 [4:29:29<26:42:26,  1.11s/it]

134699 episode score is 779.40


 14%|█▎        | 13686/100000 [4:29:30<26:39:31,  1.11s/it]

134709 episode score is 782.65


 14%|█▎        | 13687/100000 [4:29:31<26:39:57,  1.11s/it]

134719 episode score is 783.20


 14%|█▎        | 13688/100000 [4:29:33<26:55:41,  1.12s/it]

134729 episode score is 808.52


 14%|█▎        | 13689/100000 [4:29:34<26:47:26,  1.12s/it]

134739 episode score is 781.31


 14%|█▎        | 13690/100000 [4:29:35<26:39:32,  1.11s/it]

134749 episode score is 769.21


 14%|█▎        | 13691/100000 [4:29:36<27:02:32,  1.13s/it]

134759 episode score is 842.69


 14%|█▎        | 13692/100000 [4:29:37<26:37:36,  1.11s/it]

134769 episode score is 755.60


 14%|█▎        | 13693/100000 [4:29:38<26:58:36,  1.13s/it]

134780 episode score is 734.95


 14%|█▎        | 13694/100000 [4:29:39<26:47:12,  1.12s/it]

134790 episode score is 779.32


 14%|█▎        | 13695/100000 [4:29:40<27:07:33,  1.13s/it]

134801 episode score is 741.14


 14%|█▎        | 13696/100000 [4:29:42<26:49:15,  1.12s/it]

134811 episode score is 773.85


 14%|█▎        | 13697/100000 [4:29:43<26:37:10,  1.11s/it]

134821 episode score is 765.12


 14%|█▎        | 13698/100000 [4:29:44<27:02:17,  1.13s/it]

134832 episode score is 742.99


 14%|█▎        | 13699/100000 [4:29:45<27:06:08,  1.13s/it]

134842 episode score is 790.64
134852 episode score is 821.66


 14%|█▎        | 13700/100000 [4:29:47<34:11:34,  1.43s/it]

Iteration 13700: Average test reward: 727.42


 14%|█▎        | 13701/100000 [4:29:48<31:48:25,  1.33s/it]

134862 episode score is 773.53


 14%|█▎        | 13702/100000 [4:29:49<30:31:30,  1.27s/it]

134873 episode score is 725.05


 14%|█▎        | 13703/100000 [4:29:50<29:11:16,  1.22s/it]

134883 episode score is 777.00


 14%|█▎        | 13704/100000 [4:29:51<28:06:17,  1.17s/it]

134893 episode score is 753.26


 14%|█▎        | 13705/100000 [4:29:53<27:33:56,  1.15s/it]

134903 episode score is 783.81


 14%|█▎        | 13706/100000 [4:29:54<27:12:50,  1.14s/it]

134913 episode score is 790.10


 14%|█▎        | 13707/100000 [4:29:55<27:17:23,  1.14s/it]

134924 episode score is 722.80


 14%|█▎        | 13708/100000 [4:29:56<26:45:16,  1.12s/it]

134934 episode score is 746.68


 14%|█▎        | 13709/100000 [4:29:57<26:58:42,  1.13s/it]

134945 episode score is 746.44


 14%|█▎        | 13710/100000 [4:29:58<27:14:26,  1.14s/it]

134956 episode score is 743.99


 14%|█▎        | 13711/100000 [4:29:59<26:50:55,  1.12s/it]

134966 episode score is 754.02


 14%|█▎        | 13712/100000 [4:30:00<26:35:10,  1.11s/it]

134976 episode score is 773.99


 14%|█▎        | 13713/100000 [4:30:01<26:13:50,  1.09s/it]

134986 episode score is 745.06


 14%|█▎        | 13714/100000 [4:30:03<26:22:46,  1.10s/it]

134996 episode score is 780.17


 14%|█▎        | 13715/100000 [4:30:04<26:31:57,  1.11s/it]

135006 episode score is 785.95


 14%|█▎        | 13716/100000 [4:30:05<26:23:53,  1.10s/it]

135016 episode score is 751.76


 14%|█▎        | 13717/100000 [4:30:06<26:33:22,  1.11s/it]

135027 episode score is 701.22


 14%|█▎        | 13718/100000 [4:30:07<26:36:17,  1.11s/it]

135037 episode score is 785.23


 14%|█▎        | 13719/100000 [4:30:08<26:26:46,  1.10s/it]

135047 episode score is 768.37


 14%|█▎        | 13720/100000 [4:30:09<26:41:47,  1.11s/it]

135058 episode score is 716.97


 14%|█▎        | 13721/100000 [4:30:10<26:28:52,  1.10s/it]

135068 episode score is 770.59


 14%|█▎        | 13722/100000 [4:30:11<26:21:04,  1.10s/it]

135078 episode score is 753.40


 14%|█▎        | 13723/100000 [4:30:13<26:51:11,  1.12s/it]

135089 episode score is 734.23


 14%|█▎        | 13724/100000 [4:30:14<27:03:50,  1.13s/it]

135100 episode score is 721.90


 14%|█▎        | 13725/100000 [4:30:15<27:24:15,  1.14s/it]

135111 episode score is 734.93


 14%|█▎        | 13726/100000 [4:30:16<27:02:41,  1.13s/it]

135121 episode score is 762.65


 14%|█▎        | 13727/100000 [4:30:17<27:06:14,  1.13s/it]

135131 episode score is 806.84


 14%|█▎        | 13728/100000 [4:30:18<26:57:30,  1.12s/it]

135141 episode score is 782.79


 14%|█▎        | 13729/100000 [4:30:19<27:31:43,  1.15s/it]

135151 episode score is 782.50


 14%|█▎        | 13730/100000 [4:30:21<27:19:38,  1.14s/it]

135161 episode score is 796.86


 14%|█▎        | 13731/100000 [4:30:22<27:30:45,  1.15s/it]

135172 episode score is 743.80


 14%|█▎        | 13732/100000 [4:30:23<27:21:18,  1.14s/it]

135182 episode score is 796.76


 14%|█▎        | 13733/100000 [4:30:24<27:33:38,  1.15s/it]

135193 episode score is 746.92


 14%|█▎        | 13734/100000 [4:30:25<27:23:35,  1.14s/it]

135203 episode score is 778.33


 14%|█▎        | 13735/100000 [4:30:26<27:11:28,  1.13s/it]

135213 episode score is 790.08


 14%|█▎        | 13736/100000 [4:30:27<27:06:51,  1.13s/it]

135223 episode score is 789.85


 14%|█▎        | 13737/100000 [4:30:28<27:00:17,  1.13s/it]

135233 episode score is 779.69


 14%|█▎        | 13738/100000 [4:30:30<26:58:34,  1.13s/it]

135243 episode score is 781.18


 14%|█▎        | 13739/100000 [4:30:31<26:43:21,  1.12s/it]

135253 episode score is 773.55


 14%|█▎        | 13740/100000 [4:30:32<26:43:22,  1.12s/it]

135263 episode score is 785.24


 14%|█▎        | 13741/100000 [4:30:33<26:54:26,  1.12s/it]

135273 episode score is 792.60


 14%|█▎        | 13742/100000 [4:30:34<26:51:01,  1.12s/it]

135283 episode score is 783.50


 14%|█▎        | 13743/100000 [4:30:35<26:55:22,  1.12s/it]

135293 episode score is 787.75


 14%|█▎        | 13744/100000 [4:30:36<26:52:08,  1.12s/it]

135303 episode score is 787.39


 14%|█▎        | 13745/100000 [4:30:37<27:05:08,  1.13s/it]

135313 episode score is 818.16


 14%|█▎        | 13746/100000 [4:30:39<26:52:22,  1.12s/it]

135323 episode score is 780.93


 14%|█▎        | 13747/100000 [4:30:40<26:56:53,  1.12s/it]

135333 episode score is 800.08


 14%|█▎        | 13748/100000 [4:30:41<27:07:11,  1.13s/it]

135343 episode score is 816.55


 14%|█▎        | 13749/100000 [4:30:42<27:25:32,  1.14s/it]

135353 episode score is 842.38


 14%|█▍        | 13750/100000 [4:30:43<27:22:50,  1.14s/it]

135363 episode score is 806.69


 14%|█▍        | 13751/100000 [4:30:44<27:04:24,  1.13s/it]

135373 episode score is 782.56


 14%|█▍        | 13752/100000 [4:30:45<27:02:58,  1.13s/it]

135383 episode score is 794.74


 14%|█▍        | 13753/100000 [4:30:46<26:48:11,  1.12s/it]

135393 episode score is 780.80


 14%|█▍        | 13754/100000 [4:30:48<27:03:00,  1.13s/it]

135403 episode score is 809.54


 14%|█▍        | 13755/100000 [4:30:49<27:34:12,  1.15s/it]

135414 episode score is 739.42


 14%|█▍        | 13756/100000 [4:30:50<27:34:18,  1.15s/it]

135424 episode score is 816.92


 14%|█▍        | 13757/100000 [4:30:51<27:27:48,  1.15s/it]

135434 episode score is 804.98


 14%|█▍        | 13758/100000 [4:30:52<27:45:35,  1.16s/it]

135445 episode score is 740.57


 14%|█▍        | 13759/100000 [4:30:53<27:43:16,  1.16s/it]

135455 episode score is 815.62


 14%|█▍        | 13760/100000 [4:30:55<27:59:13,  1.17s/it]

135465 episode score is 848.40


 14%|█▍        | 13761/100000 [4:30:56<28:04:03,  1.17s/it]

135475 episode score is 824.67


 14%|█▍        | 13762/100000 [4:30:57<27:59:07,  1.17s/it]

135485 episode score is 828.44


 14%|█▍        | 13763/100000 [4:30:58<28:07:54,  1.17s/it]

135495 episode score is 825.98


 14%|█▍        | 13764/100000 [4:30:59<29:02:24,  1.21s/it]

135505 episode score is 847.08


 14%|█▍        | 13765/100000 [4:31:01<28:40:40,  1.20s/it]

135515 episode score is 803.14


 14%|█▍        | 13766/100000 [4:31:02<27:59:52,  1.17s/it]

135524 episode score is 848.43


 14%|█▍        | 13767/100000 [4:31:03<27:40:22,  1.16s/it]

135533 episode score is 861.70


 14%|█▍        | 13768/100000 [4:31:04<27:11:04,  1.13s/it]

135542 episode score is 849.76


 14%|█▍        | 13769/100000 [4:31:05<26:57:22,  1.13s/it]

135551 episode score is 850.75


 14%|█▍        | 13770/100000 [4:31:06<27:23:05,  1.14s/it]

135561 episode score is 825.17


 14%|█▍        | 13771/100000 [4:31:07<27:34:58,  1.15s/it]

135571 episode score is 809.96


 14%|█▍        | 13772/100000 [4:31:09<28:00:04,  1.17s/it]

135581 episode score is 839.12


 14%|█▍        | 13773/100000 [4:31:10<28:00:34,  1.17s/it]

135591 episode score is 808.35


 14%|█▍        | 13774/100000 [4:31:11<27:28:11,  1.15s/it]

135600 episode score is 848.34


 14%|█▍        | 13775/100000 [4:31:12<27:50:46,  1.16s/it]

135610 episode score is 831.18


 14%|█▍        | 13776/100000 [4:31:13<27:53:55,  1.16s/it]

135620 episode score is 810.24


 14%|█▍        | 13777/100000 [4:31:14<27:35:52,  1.15s/it]

135630 episode score is 777.61


 14%|█▍        | 13778/100000 [4:31:16<27:29:34,  1.15s/it]

135639 episode score is 874.02


 14%|█▍        | 13779/100000 [4:31:17<27:12:43,  1.14s/it]

135648 episode score is 861.30


 14%|█▍        | 13780/100000 [4:31:18<27:10:09,  1.13s/it]

135657 episode score is 857.29


 14%|█▍        | 13781/100000 [4:31:19<26:56:59,  1.13s/it]

135666 episode score is 836.96


 14%|█▍        | 13782/100000 [4:31:20<26:42:32,  1.12s/it]

135675 episode score is 838.14


 14%|█▍        | 13783/100000 [4:31:21<26:44:39,  1.12s/it]

135684 episode score is 875.16


 14%|█▍        | 13784/100000 [4:31:22<27:10:00,  1.13s/it]

135694 episode score is 827.19


 14%|█▍        | 13785/100000 [4:31:23<26:52:56,  1.12s/it]

135703 episode score is 855.20


 14%|█▍        | 13786/100000 [4:31:25<27:27:23,  1.15s/it]

135713 episode score is 837.18


 14%|█▍        | 13787/100000 [4:31:26<27:34:09,  1.15s/it]

135723 episode score is 793.11


 14%|█▍        | 13788/100000 [4:31:27<27:34:51,  1.15s/it]

135733 episode score is 796.13


 14%|█▍        | 13789/100000 [4:31:28<27:10:34,  1.13s/it]

135743 episode score is 747.38


 14%|█▍        | 13790/100000 [4:31:29<27:14:40,  1.14s/it]

135753 episode score is 793.00


 14%|█▍        | 13791/100000 [4:31:30<27:16:07,  1.14s/it]

135763 episode score is 793.37


 14%|█▍        | 13792/100000 [4:31:31<26:58:32,  1.13s/it]

135773 episode score is 759.51


 14%|█▍        | 13793/100000 [4:31:33<27:28:14,  1.15s/it]

135784 episode score is 728.00


 14%|█▍        | 13794/100000 [4:31:34<27:33:38,  1.15s/it]

135794 episode score is 800.36


 14%|█▍        | 13795/100000 [4:31:35<27:29:50,  1.15s/it]

135804 episode score is 791.20


 14%|█▍        | 13796/100000 [4:31:36<27:06:01,  1.13s/it]

135814 episode score is 745.01


 14%|█▍        | 13797/100000 [4:31:37<27:04:52,  1.13s/it]

135824 episode score is 785.22


 14%|█▍        | 13798/100000 [4:31:38<27:14:35,  1.14s/it]

135834 episode score is 810.91


 14%|█▍        | 13799/100000 [4:31:39<27:11:51,  1.14s/it]

135844 episode score is 789.56
135854 episode score is 799.44


 14%|█▍        | 13800/100000 [4:31:42<34:30:24,  1.44s/it]

Iteration 13800: Average test reward: 772.36


 14%|█▍        | 13801/100000 [4:31:43<32:02:48,  1.34s/it]

135864 episode score is 752.19


 14%|█▍        | 13802/100000 [4:31:44<30:54:56,  1.29s/it]

135874 episode score is 811.65


 14%|█▍        | 13803/100000 [4:31:45<30:07:33,  1.26s/it]

135884 episode score is 833.78


 14%|█▍        | 13804/100000 [4:31:46<29:31:59,  1.23s/it]

135894 episode score is 814.12


 14%|█▍        | 13805/100000 [4:31:47<29:09:32,  1.22s/it]

135904 episode score is 814.35


 14%|█▍        | 13806/100000 [4:31:48<28:38:26,  1.20s/it]

135914 episode score is 791.80


 14%|█▍        | 13807/100000 [4:31:50<27:57:56,  1.17s/it]

135924 episode score is 749.08


 14%|█▍        | 13808/100000 [4:31:51<27:50:45,  1.16s/it]

135934 episode score is 804.94


 14%|█▍        | 13809/100000 [4:31:52<27:42:54,  1.16s/it]

135944 episode score is 789.49


 14%|█▍        | 13810/100000 [4:31:53<27:40:15,  1.16s/it]

135954 episode score is 802.57


 14%|█▍        | 13811/100000 [4:31:54<27:32:33,  1.15s/it]

135964 episode score is 794.22


 14%|█▍        | 13812/100000 [4:31:55<27:57:43,  1.17s/it]

135974 episode score is 843.21


 14%|█▍        | 13813/100000 [4:31:57<28:11:35,  1.18s/it]

135985 episode score is 746.90


 14%|█▍        | 13814/100000 [4:31:58<28:11:35,  1.18s/it]

135996 episode score is 726.78


 14%|█▍        | 13815/100000 [4:31:59<28:26:34,  1.19s/it]

136007 episode score is 749.00


 14%|█▍        | 13816/100000 [4:32:00<28:15:51,  1.18s/it]

136018 episode score is 718.19


 14%|█▍        | 13817/100000 [4:32:01<27:58:51,  1.17s/it]

136028 episode score is 796.12


 14%|█▍        | 13818/100000 [4:32:02<28:03:49,  1.17s/it]

136038 episode score is 818.60


 14%|█▍        | 13819/100000 [4:32:04<27:55:38,  1.17s/it]

136048 episode score is 800.63


 14%|█▍        | 13820/100000 [4:32:05<27:34:59,  1.15s/it]

136057 episode score is 870.87


 14%|█▍        | 13821/100000 [4:32:06<27:48:25,  1.16s/it]

136067 episode score is 813.60


 14%|█▍        | 13822/100000 [4:32:07<27:51:11,  1.16s/it]

136077 episode score is 815.64


 14%|█▍        | 13823/100000 [4:32:08<27:21:18,  1.14s/it]

136086 episode score is 851.39


 14%|█▍        | 13824/100000 [4:32:09<27:52:29,  1.16s/it]

136096 episode score is 771.02


 14%|█▍        | 13825/100000 [4:32:11<28:03:34,  1.17s/it]

136106 episode score is 834.34


 14%|█▍        | 13826/100000 [4:32:12<27:33:14,  1.15s/it]

136116 episode score is 757.89


 14%|█▍        | 13827/100000 [4:32:13<27:52:52,  1.16s/it]

136126 episode score is 836.96


 14%|█▍        | 13828/100000 [4:32:14<27:58:36,  1.17s/it]

136136 episode score is 824.20


 14%|█▍        | 13829/100000 [4:32:15<27:55:43,  1.17s/it]

136146 episode score is 813.94


 14%|█▍        | 13830/100000 [4:32:16<28:15:48,  1.18s/it]

136156 episode score is 828.52


 14%|█▍        | 13831/100000 [4:32:18<28:09:42,  1.18s/it]

136166 episode score is 801.16


 14%|█▍        | 13832/100000 [4:32:19<28:11:18,  1.18s/it]

136176 episode score is 814.10


 14%|█▍        | 13833/100000 [4:32:20<27:41:09,  1.16s/it]

136185 episode score is 864.94


 14%|█▍        | 13834/100000 [4:32:21<27:13:18,  1.14s/it]

136194 episode score is 848.75


 14%|█▍        | 13835/100000 [4:32:22<27:38:24,  1.15s/it]

136204 episode score is 831.38


 14%|█▍        | 13836/100000 [4:32:23<27:47:44,  1.16s/it]

136214 episode score is 813.55


 14%|█▍        | 13837/100000 [4:32:24<27:18:48,  1.14s/it]

136223 episode score is 853.42


 14%|█▍        | 13838/100000 [4:32:26<27:36:10,  1.15s/it]

136233 episode score is 818.28


 14%|█▍        | 13839/100000 [4:32:27<27:35:16,  1.15s/it]

136243 episode score is 789.04


 14%|█▍        | 13840/100000 [4:32:28<27:29:29,  1.15s/it]

136253 episode score is 784.89


 14%|█▍        | 13841/100000 [4:32:29<27:17:07,  1.14s/it]

136263 episode score is 746.75


 14%|█▍        | 13842/100000 [4:32:30<27:17:20,  1.14s/it]

136274 episode score is 698.84


 14%|█▍        | 13843/100000 [4:32:31<27:26:26,  1.15s/it]

136285 episode score is 716.09


 14%|█▍        | 13844/100000 [4:32:32<27:19:50,  1.14s/it]

136295 episode score is 762.37


 14%|█▍        | 13845/100000 [4:32:34<27:10:36,  1.14s/it]

136306 episode score is 678.52


 14%|█▍        | 13846/100000 [4:32:35<26:53:21,  1.12s/it]

136316 episode score is 741.57


 14%|█▍        | 13847/100000 [4:32:36<26:49:04,  1.12s/it]

136326 episode score is 756.71


 14%|█▍        | 13848/100000 [4:32:37<27:00:00,  1.13s/it]

136336 episode score is 792.51


 14%|█▍        | 13849/100000 [4:32:38<27:12:13,  1.14s/it]

136347 episode score is 717.94


 14%|█▍        | 13850/100000 [4:32:39<27:32:11,  1.15s/it]

136358 episode score is 738.36


 14%|█▍        | 13851/100000 [4:32:40<27:28:53,  1.15s/it]

136368 episode score is 778.37


 14%|█▍        | 13852/100000 [4:32:42<27:29:17,  1.15s/it]

136378 episode score is 797.65


 14%|█▍        | 13853/100000 [4:32:43<27:45:44,  1.16s/it]

136388 episode score is 827.11


 14%|█▍        | 13854/100000 [4:32:44<27:42:36,  1.16s/it]

136398 episode score is 795.27


 14%|█▍        | 13855/100000 [4:32:45<27:54:08,  1.17s/it]

136408 episode score is 822.11


 14%|█▍        | 13856/100000 [4:32:46<27:33:39,  1.15s/it]

136418 episode score is 769.79


 14%|█▍        | 13857/100000 [4:32:47<27:21:34,  1.14s/it]

136428 episode score is 750.35


 14%|█▍        | 13858/100000 [4:32:48<27:04:56,  1.13s/it]

136437 episode score is 856.48


 14%|█▍        | 13859/100000 [4:32:50<27:27:18,  1.15s/it]

136447 episode score is 810.82


 14%|█▍        | 13860/100000 [4:32:51<27:07:09,  1.13s/it]

136457 episode score is 753.43


 14%|█▍        | 13861/100000 [4:32:52<27:14:27,  1.14s/it]

136467 episode score is 797.32


 14%|█▍        | 13862/100000 [4:32:53<27:29:07,  1.15s/it]

136477 episode score is 803.56


 14%|█▍        | 13863/100000 [4:32:54<27:23:19,  1.14s/it]

136487 episode score is 784.81


 14%|█▍        | 13864/100000 [4:32:55<27:07:09,  1.13s/it]

136497 episode score is 750.99


 14%|█▍        | 13865/100000 [4:32:56<27:08:44,  1.13s/it]

136507 episode score is 784.16


 14%|█▍        | 13866/100000 [4:32:58<27:02:04,  1.13s/it]

136517 episode score is 768.40


 14%|█▍        | 13867/100000 [4:32:59<27:10:56,  1.14s/it]

136527 episode score is 801.64


 14%|█▍        | 13868/100000 [4:33:00<27:14:55,  1.14s/it]

136537 episode score is 800.63


 14%|█▍        | 13869/100000 [4:33:01<26:56:53,  1.13s/it]

136547 episode score is 750.67


 14%|█▍        | 13870/100000 [4:33:02<27:00:22,  1.13s/it]

136557 episode score is 779.25


 14%|█▍        | 13871/100000 [4:33:03<26:55:32,  1.13s/it]

136567 episode score is 768.61


 14%|█▍        | 13872/100000 [4:33:04<27:13:09,  1.14s/it]

136577 episode score is 814.37


 14%|█▍        | 13873/100000 [4:33:06<27:35:46,  1.15s/it]

136588 episode score is 744.10


 14%|█▍        | 13874/100000 [4:33:07<27:47:51,  1.16s/it]

136598 episode score is 827.57


 14%|█▍        | 13875/100000 [4:33:08<27:29:24,  1.15s/it]

136608 episode score is 775.43


 14%|█▍        | 13876/100000 [4:33:09<27:24:04,  1.15s/it]

136618 episode score is 784.14


 14%|█▍        | 13877/100000 [4:33:10<27:11:11,  1.14s/it]

136628 episode score is 770.24


 14%|█▍        | 13878/100000 [4:33:11<27:15:32,  1.14s/it]

136638 episode score is 795.12


 14%|█▍        | 13879/100000 [4:33:12<27:13:59,  1.14s/it]

136648 episode score is 781.00


 14%|█▍        | 13880/100000 [4:33:14<27:07:26,  1.13s/it]

136658 episode score is 776.24


 14%|█▍        | 13881/100000 [4:33:15<26:54:07,  1.12s/it]

136668 episode score is 764.32


 14%|█▍        | 13882/100000 [4:33:16<26:47:11,  1.12s/it]

136678 episode score is 753.83


 14%|█▍        | 13883/100000 [4:33:17<26:43:50,  1.12s/it]

136687 episode score is 860.08


 14%|█▍        | 13884/100000 [4:33:18<26:37:11,  1.11s/it]

136697 episode score is 746.27


 14%|█▍        | 13885/100000 [4:33:19<27:06:23,  1.13s/it]

136707 episode score is 810.58


 14%|█▍        | 13886/100000 [4:33:20<28:00:45,  1.17s/it]

136717 episode score is 806.43


 14%|█▍        | 13887/100000 [4:33:22<27:47:18,  1.16s/it]

136727 episode score is 788.28


 14%|█▍        | 13888/100000 [4:33:23<27:44:11,  1.16s/it]

136737 episode score is 798.97


 14%|█▍        | 13889/100000 [4:33:24<27:26:41,  1.15s/it]

136747 episode score is 774.04


 14%|█▍        | 13890/100000 [4:33:25<27:15:37,  1.14s/it]

136757 episode score is 773.78


 14%|█▍        | 13891/100000 [4:33:26<27:26:17,  1.15s/it]

136767 episode score is 811.90


 14%|█▍        | 13892/100000 [4:33:27<27:24:33,  1.15s/it]

136777 episode score is 769.08


 14%|█▍        | 13893/100000 [4:33:28<27:29:52,  1.15s/it]

136787 episode score is 791.75


 14%|█▍        | 13894/100000 [4:33:30<27:41:40,  1.16s/it]

136798 episode score is 719.03


 14%|█▍        | 13895/100000 [4:33:31<27:38:41,  1.16s/it]

136808 episode score is 793.66


 14%|█▍        | 13896/100000 [4:33:32<27:34:58,  1.15s/it]

136818 episode score is 781.34


 14%|█▍        | 13897/100000 [4:33:33<27:44:04,  1.16s/it]

136828 episode score is 795.39


 14%|█▍        | 13898/100000 [4:33:34<27:22:56,  1.14s/it]

136838 episode score is 764.72


 14%|█▍        | 13899/100000 [4:33:35<27:23:13,  1.15s/it]

136848 episode score is 784.70
136858 episode score is 790.92


 14%|█▍        | 13900/100000 [4:33:37<34:25:10,  1.44s/it]

Iteration 13900: Average test reward: 742.68


 14%|█▍        | 13901/100000 [4:33:39<32:13:02,  1.35s/it]

136868 episode score is 778.48


 14%|█▍        | 13902/100000 [4:33:40<30:49:00,  1.29s/it]

136878 episode score is 794.12


 14%|█▍        | 13903/100000 [4:33:41<29:37:33,  1.24s/it]

136888 episode score is 775.48


 14%|█▍        | 13904/100000 [4:33:42<29:22:33,  1.23s/it]

136898 episode score is 832.26


 14%|█▍        | 13905/100000 [4:33:43<28:56:03,  1.21s/it]

136908 episode score is 803.63


 14%|█▍        | 13906/100000 [4:33:44<28:07:39,  1.18s/it]

136917 episode score is 843.12


 14%|█▍        | 13907/100000 [4:33:45<28:00:52,  1.17s/it]

136927 episode score is 790.71


 14%|█▍        | 13908/100000 [4:33:47<27:39:01,  1.16s/it]

136937 episode score is 763.53


 14%|█▍        | 13909/100000 [4:33:48<27:24:49,  1.15s/it]

136947 episode score is 760.37


 14%|█▍        | 13910/100000 [4:33:49<27:05:08,  1.13s/it]

136957 episode score is 741.36


 14%|█▍        | 13911/100000 [4:33:50<26:51:19,  1.12s/it]

136967 episode score is 752.99


 14%|█▍        | 13912/100000 [4:33:51<26:38:40,  1.11s/it]

136977 episode score is 756.24


 14%|█▍        | 13913/100000 [4:33:52<26:48:51,  1.12s/it]

136987 episode score is 786.94


 14%|█▍        | 13914/100000 [4:33:53<26:50:43,  1.12s/it]

136997 episode score is 778.30


 14%|█▍        | 13915/100000 [4:33:54<26:51:07,  1.12s/it]

137007 episode score is 774.16


 14%|█▍        | 13916/100000 [4:33:56<26:59:43,  1.13s/it]

137017 episode score is 783.34


 14%|█▍        | 13917/100000 [4:33:57<27:02:26,  1.13s/it]

137027 episode score is 784.42


 14%|█▍        | 13918/100000 [4:33:58<26:57:59,  1.13s/it]

137037 episode score is 783.59


 14%|█▍        | 13919/100000 [4:33:59<27:24:32,  1.15s/it]

137047 episode score is 827.22


 14%|█▍        | 13920/100000 [4:34:00<27:12:13,  1.14s/it]

137057 episode score is 761.97


 14%|█▍        | 13921/100000 [4:34:01<27:26:04,  1.15s/it]

137068 episode score is 734.44


 14%|█▍        | 13922/100000 [4:34:02<27:48:38,  1.16s/it]

137079 episode score is 741.97


 14%|█▍        | 13923/100000 [4:34:04<27:54:30,  1.17s/it]

137090 episode score is 739.41


 14%|█▍        | 13924/100000 [4:34:05<27:59:24,  1.17s/it]

137101 episode score is 737.09


 14%|█▍        | 13925/100000 [4:34:06<28:00:39,  1.17s/it]

137112 episode score is 728.71


 14%|█▍        | 13926/100000 [4:34:07<27:34:06,  1.15s/it]

137122 episode score is 767.27


 14%|█▍        | 13927/100000 [4:34:08<27:42:16,  1.16s/it]

137133 episode score is 737.54


 14%|█▍        | 13928/100000 [4:34:09<27:52:04,  1.17s/it]

137144 episode score is 729.22


 14%|█▍        | 13929/100000 [4:34:11<28:16:49,  1.18s/it]

137154 episode score is 788.73


 14%|█▍        | 13930/100000 [4:34:12<27:55:39,  1.17s/it]

137164 episode score is 788.27


 14%|█▍        | 13931/100000 [4:34:13<27:31:20,  1.15s/it]

137174 episode score is 769.11


 14%|█▍        | 13932/100000 [4:34:14<27:20:29,  1.14s/it]

137184 episode score is 785.11


 14%|█▍        | 13933/100000 [4:34:15<27:28:18,  1.15s/it]

137194 episode score is 817.93


 14%|█▍        | 13934/100000 [4:34:16<27:09:41,  1.14s/it]

137204 episode score is 757.66


 14%|█▍        | 13935/100000 [4:34:17<27:38:54,  1.16s/it]

137214 episode score is 828.03


 14%|█▍        | 13936/100000 [4:34:19<27:16:07,  1.14s/it]

137224 episode score is 761.63


 14%|█▍        | 13937/100000 [4:34:20<27:03:08,  1.13s/it]

137234 episode score is 773.47


 14%|█▍        | 13938/100000 [4:34:21<27:07:22,  1.13s/it]

137244 episode score is 801.08


 14%|█▍        | 13939/100000 [4:34:22<27:20:45,  1.14s/it]

137254 episode score is 814.05


 14%|█▍        | 13940/100000 [4:34:23<27:17:15,  1.14s/it]

137264 episode score is 777.03


 14%|█▍        | 13941/100000 [4:34:24<27:23:55,  1.15s/it]

137274 episode score is 803.84


 14%|█▍        | 13942/100000 [4:34:25<27:17:34,  1.14s/it]

137284 episode score is 794.26


 14%|█▍        | 13943/100000 [4:34:27<27:12:53,  1.14s/it]

137294 episode score is 786.69


 14%|█▍        | 13944/100000 [4:34:28<27:05:56,  1.13s/it]

137304 episode score is 783.88


 14%|█▍        | 13945/100000 [4:34:29<27:27:58,  1.15s/it]

137314 episode score is 824.18


 14%|█▍        | 13946/100000 [4:34:30<27:03:33,  1.13s/it]

137324 episode score is 760.77


 14%|█▍        | 13947/100000 [4:34:31<26:58:51,  1.13s/it]

137334 episode score is 781.48


 14%|█▍        | 13948/100000 [4:34:32<26:57:52,  1.13s/it]

137344 episode score is 769.18


 14%|█▍        | 13949/100000 [4:34:33<26:56:13,  1.13s/it]

137354 episode score is 780.41


 14%|█▍        | 13950/100000 [4:34:35<27:13:02,  1.14s/it]

137364 episode score is 813.00


 14%|█▍        | 13951/100000 [4:34:36<27:06:02,  1.13s/it]

137374 episode score is 772.25


 14%|█▍        | 13952/100000 [4:34:37<27:05:04,  1.13s/it]

137384 episode score is 775.10


 14%|█▍        | 13953/100000 [4:34:38<27:02:33,  1.13s/it]

137394 episode score is 781.85


 14%|█▍        | 13954/100000 [4:34:39<27:34:10,  1.15s/it]

137405 episode score is 757.19


 14%|█▍        | 13955/100000 [4:34:40<27:23:01,  1.15s/it]

137415 episode score is 786.95


 14%|█▍        | 13956/100000 [4:34:41<27:11:39,  1.14s/it]

137425 episode score is 777.52


 14%|█▍        | 13957/100000 [4:34:42<27:16:38,  1.14s/it]

137435 episode score is 799.09


 14%|█▍        | 13958/100000 [4:34:44<27:28:25,  1.15s/it]

137445 episode score is 817.25


 14%|█▍        | 13959/100000 [4:34:45<27:02:06,  1.13s/it]

137455 episode score is 758.58


 14%|█▍        | 13960/100000 [4:34:46<27:15:29,  1.14s/it]

137465 episode score is 820.28


 14%|█▍        | 13961/100000 [4:34:47<27:08:20,  1.14s/it]

137475 episode score is 766.05


 14%|█▍        | 13962/100000 [4:34:48<27:14:54,  1.14s/it]

137485 episode score is 794.27


 14%|█▍        | 13963/100000 [4:34:49<27:22:07,  1.15s/it]

137495 episode score is 794.63


 14%|█▍        | 13964/100000 [4:34:50<27:14:46,  1.14s/it]

137505 episode score is 784.09


 14%|█▍        | 13965/100000 [4:34:52<27:17:06,  1.14s/it]

137515 episode score is 798.33


 14%|█▍        | 13966/100000 [4:34:53<27:37:42,  1.16s/it]

137525 episode score is 831.21


 14%|█▍        | 13967/100000 [4:34:54<27:41:44,  1.16s/it]

137535 episode score is 822.00


 14%|█▍        | 13968/100000 [4:34:55<28:02:10,  1.17s/it]

137545 episode score is 840.63


 14%|█▍        | 13969/100000 [4:34:56<27:51:05,  1.17s/it]

137555 episode score is 796.50


 14%|█▍        | 13970/100000 [4:34:58<28:03:38,  1.17s/it]

137565 episode score is 835.17


 14%|█▍        | 13971/100000 [4:34:59<28:25:00,  1.19s/it]

137575 episode score is 822.41


 14%|█▍        | 13972/100000 [4:35:00<28:15:32,  1.18s/it]

137585 episode score is 809.35


 14%|█▍        | 13973/100000 [4:35:01<27:54:14,  1.17s/it]

137595 episode score is 789.86


 14%|█▍        | 13974/100000 [4:35:02<27:47:12,  1.16s/it]

137605 episode score is 792.94


 14%|█▍        | 13975/100000 [4:35:03<27:40:47,  1.16s/it]

137615 episode score is 803.36


 14%|█▍        | 13976/100000 [4:35:05<27:46:59,  1.16s/it]

137625 episode score is 824.98


 14%|█▍        | 13977/100000 [4:35:06<27:29:21,  1.15s/it]

137635 episode score is 777.33


 14%|█▍        | 13978/100000 [4:35:07<27:26:41,  1.15s/it]

137645 episode score is 797.14


 14%|█▍        | 13979/100000 [4:35:08<27:19:21,  1.14s/it]

137655 episode score is 780.52


 14%|█▍        | 13980/100000 [4:35:09<27:05:26,  1.13s/it]

137665 episode score is 755.45


 14%|█▍        | 13981/100000 [4:35:10<27:19:11,  1.14s/it]

137675 episode score is 822.96


 14%|█▍        | 13982/100000 [4:35:11<27:13:16,  1.14s/it]

137685 episode score is 775.77


 14%|█▍        | 13983/100000 [4:35:12<27:05:18,  1.13s/it]

137695 episode score is 772.84


 14%|█▍        | 13984/100000 [4:35:14<27:21:15,  1.14s/it]

137706 episode score is 736.28


 14%|█▍        | 13985/100000 [4:35:15<27:43:11,  1.16s/it]

137717 episode score is 758.51


 14%|█▍        | 13986/100000 [4:35:16<27:22:07,  1.15s/it]

137727 episode score is 772.01


 14%|█▍        | 13987/100000 [4:35:17<27:39:39,  1.16s/it]

137738 episode score is 738.49


 14%|█▍        | 13988/100000 [4:35:18<27:45:37,  1.16s/it]

137748 episode score is 747.24


 14%|█▍        | 13989/100000 [4:35:19<27:18:34,  1.14s/it]

137758 episode score is 750.04


 14%|█▍        | 13990/100000 [4:35:20<26:57:03,  1.13s/it]

137768 episode score is 758.98


 14%|█▍        | 13991/100000 [4:35:22<26:44:01,  1.12s/it]

137778 episode score is 753.33


 14%|█▍        | 13992/100000 [4:35:23<27:11:13,  1.14s/it]

137789 episode score is 729.53


 14%|█▍        | 13993/100000 [4:35:24<27:12:40,  1.14s/it]

137800 episode score is 709.38


 14%|█▍        | 13994/100000 [4:35:25<27:08:04,  1.14s/it]

137811 episode score is 703.94


 14%|█▍        | 13995/100000 [4:35:26<27:25:33,  1.15s/it]

137822 episode score is 728.39


 14%|█▍        | 13996/100000 [4:35:27<27:27:07,  1.15s/it]

137833 episode score is 718.32


 14%|█▍        | 13997/100000 [4:35:29<27:27:25,  1.15s/it]

137844 episode score is 714.24


 14%|█▍        | 13998/100000 [4:35:30<27:32:13,  1.15s/it]

137855 episode score is 710.91


 14%|█▍        | 13999/100000 [4:35:31<27:18:00,  1.14s/it]

137866 episode score is 688.90
137877 episode score is 685.70


 14%|█▍        | 14000/100000 [4:35:33<33:49:15,  1.42s/it]

Iteration 14000: Average test reward: 691.68


 14%|█▍        | 14001/100000 [4:35:34<32:08:00,  1.35s/it]

137889 episode score is 669.00


 14%|█▍        | 14002/100000 [4:35:35<30:20:18,  1.27s/it]

137900 episode score is 676.57


 14%|█▍        | 14003/100000 [4:35:36<29:30:24,  1.24s/it]

137912 episode score is 641.61


 14%|█▍        | 14004/100000 [4:35:37<29:11:37,  1.22s/it]

137924 episode score is 663.46


 14%|█▍        | 14005/100000 [4:35:39<28:19:32,  1.19s/it]

137935 episode score is 675.05


 14%|█▍        | 14006/100000 [4:35:40<28:02:31,  1.17s/it]

137946 episode score is 710.48


 14%|█▍        | 14007/100000 [4:35:41<27:53:05,  1.17s/it]

137957 episode score is 725.23


 14%|█▍        | 14008/100000 [4:35:42<27:44:11,  1.16s/it]

137968 episode score is 719.64


 14%|█▍        | 14009/100000 [4:35:43<27:22:35,  1.15s/it]

137979 episode score is 685.77


 14%|█▍        | 14010/100000 [4:35:44<27:20:54,  1.14s/it]

137990 episode score is 717.17


 14%|█▍        | 14011/100000 [4:35:45<27:12:48,  1.14s/it]

138001 episode score is 699.40


 14%|█▍        | 14012/100000 [4:35:46<27:03:31,  1.13s/it]

138012 episode score is 696.19


 14%|█▍        | 14013/100000 [4:35:48<27:12:10,  1.14s/it]

138023 episode score is 699.12


 14%|█▍        | 14014/100000 [4:35:49<27:14:25,  1.14s/it]

138034 episode score is 697.89


 14%|█▍        | 14015/100000 [4:35:50<27:27:08,  1.15s/it]

138045 episode score is 735.43


 14%|█▍        | 14016/100000 [4:35:51<27:44:42,  1.16s/it]

138056 episode score is 754.00


 14%|█▍        | 14017/100000 [4:35:52<27:40:13,  1.16s/it]

138067 episode score is 721.16


 14%|█▍        | 14018/100000 [4:35:53<27:14:21,  1.14s/it]

138078 episode score is 667.08


 14%|█▍        | 14019/100000 [4:35:55<27:13:56,  1.14s/it]

138089 episode score is 704.33


 14%|█▍        | 14020/100000 [4:35:56<27:06:15,  1.13s/it]

138100 episode score is 698.09


 14%|█▍        | 14021/100000 [4:35:57<26:44:46,  1.12s/it]

138111 episode score is 645.96


 14%|█▍        | 14022/100000 [4:35:58<26:53:07,  1.13s/it]

138122 episode score is 689.24


 14%|█▍        | 14023/100000 [4:35:59<26:40:51,  1.12s/it]

138133 episode score is 653.11


 14%|█▍        | 14024/100000 [4:36:00<26:59:02,  1.13s/it]

138145 episode score is 625.71


 14%|█▍        | 14025/100000 [4:36:01<26:57:16,  1.13s/it]

138156 episode score is 692.71


 14%|█▍        | 14026/100000 [4:36:02<27:11:26,  1.14s/it]

138167 episode score is 706.60


 14%|█▍        | 14027/100000 [4:36:04<27:14:20,  1.14s/it]

138178 episode score is 704.07


 14%|█▍        | 14028/100000 [4:36:05<27:25:46,  1.15s/it]

138189 episode score is 727.18


 14%|█▍        | 14029/100000 [4:36:06<27:00:58,  1.13s/it]

138199 episode score is 749.39


 14%|█▍        | 14030/100000 [4:36:07<27:12:24,  1.14s/it]

138210 episode score is 725.94


 14%|█▍        | 14031/100000 [4:36:08<27:15:12,  1.14s/it]

138221 episode score is 707.34


 14%|█▍        | 14032/100000 [4:36:09<27:12:47,  1.14s/it]

138231 episode score is 781.64


 14%|█▍        | 14033/100000 [4:36:10<26:58:29,  1.13s/it]

138241 episode score is 763.42


 14%|█▍        | 14034/100000 [4:36:12<27:16:40,  1.14s/it]

138252 episode score is 724.43


 14%|█▍        | 14035/100000 [4:36:13<26:59:15,  1.13s/it]

138262 episode score is 762.22


 14%|█▍        | 14036/100000 [4:36:14<26:50:44,  1.12s/it]

138273 episode score is 676.76


 14%|█▍        | 14037/100000 [4:36:15<27:04:39,  1.13s/it]

138284 episode score is 713.37


 14%|█▍        | 14038/100000 [4:36:16<27:19:20,  1.14s/it]

138295 episode score is 733.24


 14%|█▍        | 14039/100000 [4:36:17<27:37:32,  1.16s/it]

138306 episode score is 726.98


 14%|█▍        | 14040/100000 [4:36:18<27:32:05,  1.15s/it]

138317 episode score is 692.91


 14%|█▍        | 14041/100000 [4:36:20<27:18:44,  1.14s/it]

138327 episode score is 767.28


 14%|█▍        | 14042/100000 [4:36:21<26:59:46,  1.13s/it]

138337 episode score is 744.12


 14%|█▍        | 14043/100000 [4:36:22<26:49:16,  1.12s/it]

138347 episode score is 768.73


 14%|█▍        | 14044/100000 [4:36:23<27:32:17,  1.15s/it]

138357 episode score is 776.15


 14%|█▍        | 14045/100000 [4:36:24<27:52:25,  1.17s/it]

138368 episode score is 753.73


 14%|█▍        | 14046/100000 [4:36:25<27:45:48,  1.16s/it]

138378 episode score is 812.70


 14%|█▍        | 14047/100000 [4:36:26<27:26:36,  1.15s/it]

138388 episode score is 772.97


 14%|█▍        | 14048/100000 [4:36:28<27:08:05,  1.14s/it]

138398 episode score is 754.17


 14%|█▍        | 14049/100000 [4:36:29<27:15:25,  1.14s/it]

138408 episode score is 801.41


 14%|█▍        | 14050/100000 [4:36:30<27:28:56,  1.15s/it]

138418 episode score is 814.98


 14%|█▍        | 14051/100000 [4:36:31<27:30:02,  1.15s/it]

138428 episode score is 797.62


 14%|█▍        | 14052/100000 [4:36:32<27:29:18,  1.15s/it]

138438 episode score is 793.22


 14%|█▍        | 14053/100000 [4:36:33<27:25:34,  1.15s/it]

138448 episode score is 784.87


 14%|█▍        | 14054/100000 [4:36:34<27:08:16,  1.14s/it]

138458 episode score is 773.25


 14%|█▍        | 14055/100000 [4:36:36<27:27:39,  1.15s/it]

138469 episode score is 734.70


 14%|█▍        | 14056/100000 [4:36:37<27:27:11,  1.15s/it]

138479 episode score is 807.83


 14%|█▍        | 14057/100000 [4:36:38<27:31:09,  1.15s/it]

138489 episode score is 806.48


 14%|█▍        | 14058/100000 [4:36:39<27:42:32,  1.16s/it]

138500 episode score is 733.75


 14%|█▍        | 14059/100000 [4:36:40<27:28:18,  1.15s/it]

138510 episode score is 771.19


 14%|█▍        | 14060/100000 [4:36:41<27:31:13,  1.15s/it]

138520 episode score is 807.92


 14%|█▍        | 14061/100000 [4:36:42<27:05:49,  1.14s/it]

138529 episode score is 844.07


 14%|█▍        | 14062/100000 [4:36:44<27:30:07,  1.15s/it]

138539 episode score is 831.23


 14%|█▍        | 14063/100000 [4:36:45<27:33:00,  1.15s/it]

138549 episode score is 807.44


 14%|█▍        | 14064/100000 [4:36:46<27:19:56,  1.14s/it]

138559 episode score is 788.52


 14%|█▍        | 14065/100000 [4:36:47<27:44:21,  1.16s/it]

138569 episode score is 838.41


 14%|█▍        | 14066/100000 [4:36:48<27:33:37,  1.15s/it]

138579 episode score is 787.83


 14%|█▍        | 14067/100000 [4:36:49<27:35:07,  1.16s/it]

138589 episode score is 801.77


 14%|█▍        | 14068/100000 [4:36:51<27:05:21,  1.13s/it]

138598 episode score is 843.01


 14%|█▍        | 14069/100000 [4:36:52<27:07:55,  1.14s/it]

138608 episode score is 798.12


 14%|█▍        | 14070/100000 [4:36:53<27:27:56,  1.15s/it]

138618 episode score is 824.93


 14%|█▍        | 14071/100000 [4:36:54<27:40:38,  1.16s/it]

138628 episode score is 831.61


 14%|█▍        | 14072/100000 [4:36:55<27:38:41,  1.16s/it]

138638 episode score is 800.00


 14%|█▍        | 14073/100000 [4:36:56<27:15:22,  1.14s/it]

138648 episode score is 755.77


 14%|█▍        | 14074/100000 [4:36:57<27:35:44,  1.16s/it]

138658 episode score is 840.58


 14%|█▍        | 14075/100000 [4:36:59<27:29:48,  1.15s/it]

138668 episode score is 798.21


 14%|█▍        | 14076/100000 [4:37:00<27:42:00,  1.16s/it]

138678 episode score is 806.60


 14%|█▍        | 14077/100000 [4:37:01<27:33:54,  1.15s/it]

138688 episode score is 795.41


 14%|█▍        | 14078/100000 [4:37:02<27:39:06,  1.16s/it]

138698 episode score is 820.17


 14%|█▍        | 14079/100000 [4:37:03<27:48:44,  1.17s/it]

138708 episode score is 826.66


 14%|█▍        | 14080/100000 [4:37:04<27:44:56,  1.16s/it]

138718 episode score is 807.51


 14%|█▍        | 14081/100000 [4:37:06<27:48:26,  1.17s/it]

138728 episode score is 821.92


 14%|█▍        | 14082/100000 [4:37:07<27:40:55,  1.16s/it]

138738 episode score is 807.98


 14%|█▍        | 14083/100000 [4:37:08<27:38:11,  1.16s/it]

138748 episode score is 809.62


 14%|█▍        | 14084/100000 [4:37:09<27:21:05,  1.15s/it]

138758 episode score is 771.40


 14%|█▍        | 14085/100000 [4:37:10<27:19:22,  1.14s/it]

138768 episode score is 805.40


 14%|█▍        | 14086/100000 [4:37:11<27:25:03,  1.15s/it]

138778 episode score is 813.86


 14%|█▍        | 14087/100000 [4:37:12<27:00:59,  1.13s/it]

138788 episode score is 749.68


 14%|█▍        | 14088/100000 [4:37:14<26:59:51,  1.13s/it]

138798 episode score is 791.92


 14%|█▍        | 14089/100000 [4:37:15<26:46:17,  1.12s/it]

138808 episode score is 772.59


 14%|█▍        | 14090/100000 [4:37:16<26:33:36,  1.11s/it]

138818 episode score is 761.74


 14%|█▍        | 14091/100000 [4:37:17<26:30:05,  1.11s/it]

138828 episode score is 766.63


 14%|█▍        | 14092/100000 [4:37:18<26:21:44,  1.10s/it]

138837 episode score is 860.34


 14%|█▍        | 14093/100000 [4:37:19<26:48:28,  1.12s/it]

138847 episode score is 819.94


 14%|█▍        | 14094/100000 [4:37:20<27:01:39,  1.13s/it]

138857 episode score is 807.28


 14%|█▍        | 14095/100000 [4:37:21<27:24:11,  1.15s/it]

138867 episode score is 845.58


 14%|█▍        | 14096/100000 [4:37:23<27:35:24,  1.16s/it]

138877 episode score is 822.05


 14%|█▍        | 14097/100000 [4:37:24<27:15:16,  1.14s/it]

138887 episode score is 777.17


 14%|█▍        | 14098/100000 [4:37:25<27:25:35,  1.15s/it]

138897 episode score is 823.60


 14%|█▍        | 14099/100000 [4:37:26<27:17:49,  1.14s/it]

138907 episode score is 800.52
138917 episode score is 780.74


 14%|█▍        | 14100/100000 [4:37:28<34:52:08,  1.46s/it]

Iteration 14100: Average test reward: 841.83


 14%|█▍        | 14101/100000 [4:37:29<32:29:22,  1.36s/it]

138927 episode score is 790.05


 14%|█▍        | 14102/100000 [4:37:30<30:32:17,  1.28s/it]

138936 episode score is 847.89


 14%|█▍        | 14103/100000 [4:37:32<29:59:28,  1.26s/it]

138946 episode score is 846.88


 14%|█▍        | 14104/100000 [4:37:33<29:06:09,  1.22s/it]

138956 episode score is 785.01


 14%|█▍        | 14105/100000 [4:37:34<28:41:38,  1.20s/it]

138966 episode score is 816.86


 14%|█▍        | 14106/100000 [4:37:35<28:27:19,  1.19s/it]

138976 episode score is 823.48


 14%|█▍        | 14107/100000 [4:37:36<28:26:37,  1.19s/it]

138986 episode score is 833.19


 14%|█▍        | 14108/100000 [4:37:37<28:07:55,  1.18s/it]

138996 episode score is 804.22


 14%|█▍        | 14109/100000 [4:37:39<27:59:53,  1.17s/it]

139006 episode score is 814.84


 14%|█▍        | 14110/100000 [4:37:40<28:01:35,  1.17s/it]

139016 episode score is 809.88


 14%|█▍        | 14111/100000 [4:37:41<27:23:46,  1.15s/it]

139025 episode score is 840.13


 14%|█▍        | 14112/100000 [4:37:42<27:40:33,  1.16s/it]

139035 episode score is 822.05


 14%|█▍        | 14113/100000 [4:37:43<27:17:01,  1.14s/it]

139044 episode score is 873.63


 14%|█▍        | 14114/100000 [4:37:44<27:11:06,  1.14s/it]

139054 episode score is 790.17


 14%|█▍        | 14115/100000 [4:37:45<27:18:01,  1.14s/it]

139064 episode score is 814.34


 14%|█▍        | 14116/100000 [4:37:47<27:15:43,  1.14s/it]

139074 episode score is 799.82


 14%|█▍        | 14117/100000 [4:37:48<27:21:45,  1.15s/it]

139084 episode score is 794.94


 14%|█▍        | 14118/100000 [4:37:49<27:31:52,  1.15s/it]

139094 episode score is 803.73


 14%|█▍        | 14119/100000 [4:37:50<27:45:12,  1.16s/it]

139104 episode score is 829.03


 14%|█▍        | 14120/100000 [4:37:51<28:03:32,  1.18s/it]

139114 episode score is 845.30


 14%|█▍        | 14121/100000 [4:37:53<28:12:41,  1.18s/it]

139124 episode score is 832.69


 14%|█▍        | 14122/100000 [4:37:54<28:09:59,  1.18s/it]

139134 episode score is 823.74


 14%|█▍        | 14123/100000 [4:37:55<27:39:15,  1.16s/it]

139143 episode score is 863.90


 14%|█▍        | 14124/100000 [4:37:56<27:09:13,  1.14s/it]

139152 episode score is 846.24


 14%|█▍        | 14125/100000 [4:37:57<27:34:49,  1.16s/it]

139162 episode score is 842.51


 14%|█▍        | 14126/100000 [4:37:58<28:41:52,  1.20s/it]

139172 episode score is 844.10


 14%|█▍        | 14127/100000 [4:38:00<28:40:21,  1.20s/it]

139182 episode score is 845.42


 14%|█▍        | 14128/100000 [4:38:01<28:29:31,  1.19s/it]

139192 episode score is 814.69


 14%|█▍        | 14129/100000 [4:38:02<27:46:56,  1.16s/it]

139202 episode score is 745.32


 14%|█▍        | 14130/100000 [4:38:03<27:31:38,  1.15s/it]

139212 episode score is 774.95


 14%|█▍        | 14131/100000 [4:38:04<27:45:42,  1.16s/it]

139222 episode score is 831.71


 14%|█▍        | 14132/100000 [4:38:05<27:44:26,  1.16s/it]

139232 episode score is 811.76


 14%|█▍        | 14133/100000 [4:38:07<27:43:28,  1.16s/it]

139242 episode score is 812.94


 14%|█▍        | 14134/100000 [4:38:08<27:50:01,  1.17s/it]

139252 episode score is 822.34


 14%|█▍        | 14135/100000 [4:38:09<27:51:21,  1.17s/it]

139262 episode score is 819.11


 14%|█▍        | 14136/100000 [4:38:10<28:04:55,  1.18s/it]

139272 episode score is 840.05


 14%|█▍        | 14137/100000 [4:38:11<27:30:31,  1.15s/it]

139281 episode score is 844.89


 14%|█▍        | 14138/100000 [4:38:12<27:46:55,  1.16s/it]

139291 episode score is 814.90


 14%|█▍        | 14139/100000 [4:38:14<27:43:04,  1.16s/it]

139301 episode score is 804.87


 14%|█▍        | 14140/100000 [4:38:15<27:57:19,  1.17s/it]

139311 episode score is 838.62


 14%|█▍        | 14141/100000 [4:38:16<27:50:14,  1.17s/it]

139321 episode score is 799.52


 14%|█▍        | 14142/100000 [4:38:17<28:00:41,  1.17s/it]

139331 episode score is 836.25


 14%|█▍        | 14143/100000 [4:38:18<28:13:45,  1.18s/it]

139341 episode score is 849.12


 14%|█▍        | 14144/100000 [4:38:19<28:08:06,  1.18s/it]

139351 episode score is 812.82


 14%|█▍        | 14145/100000 [4:38:21<27:52:28,  1.17s/it]

139361 episode score is 801.36


 14%|█▍        | 14146/100000 [4:38:22<27:56:45,  1.17s/it]

139371 episode score is 838.50


 14%|█▍        | 14147/100000 [4:38:23<27:44:58,  1.16s/it]

139381 episode score is 795.95


 14%|█▍        | 14148/100000 [4:38:24<27:42:10,  1.16s/it]

139391 episode score is 814.80


 14%|█▍        | 14149/100000 [4:38:25<27:12:15,  1.14s/it]

139400 episode score is 861.71


 14%|█▍        | 14150/100000 [4:38:26<27:28:09,  1.15s/it]

139410 episode score is 843.13


 14%|█▍        | 14151/100000 [4:38:27<27:16:09,  1.14s/it]

139420 episode score is 785.57


 14%|█▍        | 14152/100000 [4:38:29<27:17:20,  1.14s/it]

139430 episode score is 810.43


 14%|█▍        | 14153/100000 [4:38:30<27:27:49,  1.15s/it]

139440 episode score is 813.08


 14%|█▍        | 14154/100000 [4:38:31<27:35:33,  1.16s/it]

139450 episode score is 830.31


 14%|█▍        | 14155/100000 [4:38:32<27:46:52,  1.17s/it]

139460 episode score is 833.08


 14%|█▍        | 14156/100000 [4:38:33<27:34:00,  1.16s/it]

139470 episode score is 784.10


 14%|█▍        | 14157/100000 [4:38:34<27:13:27,  1.14s/it]

139480 episode score is 780.45


 14%|█▍        | 14158/100000 [4:38:36<27:36:26,  1.16s/it]

139491 episode score is 738.73


 14%|█▍        | 14159/100000 [4:38:37<27:19:03,  1.15s/it]

139501 episode score is 782.53


 14%|█▍        | 14160/100000 [4:38:38<27:39:23,  1.16s/it]

139511 episode score is 846.89


 14%|█▍        | 14161/100000 [4:38:39<27:38:11,  1.16s/it]

139521 episode score is 804.78


 14%|█▍        | 14162/100000 [4:38:40<27:43:20,  1.16s/it]

139531 episode score is 824.22


 14%|█▍        | 14163/100000 [4:38:41<27:55:37,  1.17s/it]

139541 episode score is 832.88


 14%|█▍        | 14164/100000 [4:38:43<27:56:56,  1.17s/it]

139551 episode score is 834.14


 14%|█▍        | 14165/100000 [4:38:44<27:36:46,  1.16s/it]

139561 episode score is 786.58


 14%|█▍        | 14166/100000 [4:38:45<27:29:42,  1.15s/it]

139571 episode score is 804.94


 14%|█▍        | 14167/100000 [4:38:46<27:08:57,  1.14s/it]

139581 episode score is 767.74


 14%|█▍        | 14168/100000 [4:38:47<27:23:32,  1.15s/it]

139591 episode score is 810.74


 14%|█▍        | 14169/100000 [4:38:48<27:26:08,  1.15s/it]

139601 episode score is 814.16


 14%|█▍        | 14170/100000 [4:38:49<27:20:57,  1.15s/it]

139611 episode score is 790.82


 14%|█▍        | 14171/100000 [4:38:51<27:27:47,  1.15s/it]

139621 episode score is 829.73


 14%|█▍        | 14172/100000 [4:38:52<27:15:42,  1.14s/it]

139631 episode score is 784.73


 14%|█▍        | 14173/100000 [4:38:53<27:28:39,  1.15s/it]

139641 episode score is 817.73


 14%|█▍        | 14174/100000 [4:38:54<27:42:54,  1.16s/it]

139651 episode score is 842.66


 14%|█▍        | 14175/100000 [4:38:55<27:29:36,  1.15s/it]

139661 episode score is 789.22


 14%|█▍        | 14176/100000 [4:38:56<27:27:39,  1.15s/it]

139671 episode score is 806.52


 14%|█▍        | 14177/100000 [4:38:57<27:19:35,  1.15s/it]

139681 episode score is 791.92


 14%|█▍        | 14178/100000 [4:38:59<27:43:41,  1.16s/it]

139691 episode score is 856.31


 14%|█▍        | 14179/100000 [4:39:00<27:20:30,  1.15s/it]

139700 episode score is 872.65


 14%|█▍        | 14180/100000 [4:39:01<27:37:08,  1.16s/it]

139710 episode score is 830.64


 14%|█▍        | 14181/100000 [4:39:02<28:03:03,  1.18s/it]

139720 episode score is 830.24


 14%|█▍        | 14182/100000 [4:39:03<27:57:38,  1.17s/it]

139730 episode score is 805.83


 14%|█▍        | 14183/100000 [4:39:05<28:08:45,  1.18s/it]

139740 episode score is 846.23


 14%|█▍        | 14184/100000 [4:39:06<28:26:32,  1.19s/it]

139750 episode score is 840.36


 14%|█▍        | 14185/100000 [4:39:07<27:59:41,  1.17s/it]

139760 episode score is 786.07


 14%|█▍        | 14186/100000 [4:39:08<27:57:15,  1.17s/it]

139770 episode score is 808.02


 14%|█▍        | 14187/100000 [4:39:09<28:17:29,  1.19s/it]

139780 episode score is 844.00


 14%|█▍        | 14188/100000 [4:39:10<28:24:56,  1.19s/it]

139790 episode score is 847.47


 14%|█▍        | 14189/100000 [4:39:12<28:24:22,  1.19s/it]

139800 episode score is 839.15


 14%|█▍        | 14190/100000 [4:39:13<28:20:51,  1.19s/it]

139810 episode score is 818.18


 14%|█▍        | 14191/100000 [4:39:14<27:58:02,  1.17s/it]

139820 episode score is 797.10


 14%|█▍        | 14192/100000 [4:39:15<27:56:54,  1.17s/it]

139831 episode score is 729.80


 14%|█▍        | 14193/100000 [4:39:16<28:45:47,  1.21s/it]

139841 episode score is 828.87


 14%|█▍        | 14194/100000 [4:39:18<28:01:03,  1.18s/it]

139850 episode score is 842.26


 14%|█▍        | 14195/100000 [4:39:19<28:02:57,  1.18s/it]

139860 episode score is 810.40


 14%|█▍        | 14196/100000 [4:39:20<28:03:01,  1.18s/it]

139870 episode score is 805.02


 14%|█▍        | 14197/100000 [4:39:21<28:05:53,  1.18s/it]

139880 episode score is 832.14


 14%|█▍        | 14198/100000 [4:39:22<28:03:39,  1.18s/it]

139890 episode score is 810.75


 14%|█▍        | 14199/100000 [4:39:23<27:55:10,  1.17s/it]

139900 episode score is 793.46
139910 episode score is 805.03


 14%|█▍        | 14200/100000 [4:39:26<35:35:08,  1.49s/it]

Iteration 14200: Average test reward: 821.18


 14%|█▍        | 14201/100000 [4:39:27<33:31:35,  1.41s/it]

139920 episode score is 825.26


 14%|█▍        | 14202/100000 [4:39:28<31:49:32,  1.34s/it]

139930 episode score is 816.38


 14%|█▍        | 14203/100000 [4:39:29<30:32:12,  1.28s/it]

139940 episode score is 794.97


 14%|█▍        | 14204/100000 [4:39:30<29:28:46,  1.24s/it]

139950 episode score is 784.51


 14%|█▍        | 14205/100000 [4:39:31<28:22:52,  1.19s/it]

139960 episode score is 738.44


 14%|█▍        | 14206/100000 [4:39:33<28:05:50,  1.18s/it]

139970 episode score is 770.14


 14%|█▍        | 14207/100000 [4:39:34<27:32:15,  1.16s/it]

139980 episode score is 746.96


 14%|█▍        | 14208/100000 [4:39:35<27:37:35,  1.16s/it]

139990 episode score is 803.17


 14%|█▍        | 14209/100000 [4:39:36<27:58:12,  1.17s/it]

140000 episode score is 841.00


 14%|█▍        | 14210/100000 [4:39:37<27:23:56,  1.15s/it]

140009 episode score is 834.51


 14%|█▍        | 14211/100000 [4:39:38<27:05:54,  1.14s/it]

140018 episode score is 865.21


 14%|█▍        | 14212/100000 [4:39:39<27:21:33,  1.15s/it]

140028 episode score is 803.52


 14%|█▍        | 14213/100000 [4:39:41<26:58:48,  1.13s/it]

140037 episode score is 862.71


 14%|█▍        | 14214/100000 [4:39:42<27:19:03,  1.15s/it]

140047 episode score is 816.74


 14%|█▍        | 14215/100000 [4:39:43<27:03:24,  1.14s/it]

140056 episode score is 855.20


 14%|█▍        | 14216/100000 [4:39:44<26:54:29,  1.13s/it]

140065 episode score is 857.67


 14%|█▍        | 14217/100000 [4:39:45<26:46:57,  1.12s/it]

140074 episode score is 852.21


 14%|█▍        | 14218/100000 [4:39:46<26:48:47,  1.13s/it]

140084 episode score is 757.90


 14%|█▍        | 14219/100000 [4:39:47<26:38:14,  1.12s/it]

140093 episode score is 851.08


 14%|█▍        | 14220/100000 [4:39:48<27:02:54,  1.14s/it]

140103 episode score is 812.97


 14%|█▍        | 14221/100000 [4:39:50<27:04:15,  1.14s/it]

140113 episode score is 769.43


 14%|█▍        | 14222/100000 [4:39:51<26:55:14,  1.13s/it]

140123 episode score is 767.14


 14%|█▍        | 14223/100000 [4:39:52<27:28:04,  1.15s/it]

140133 episode score is 834.88


 14%|█▍        | 14224/100000 [4:39:53<27:45:03,  1.16s/it]

140143 episode score is 825.57


 14%|█▍        | 14225/100000 [4:39:54<27:22:34,  1.15s/it]

140153 episode score is 761.07


 14%|█▍        | 14226/100000 [4:39:55<27:37:26,  1.16s/it]

140163 episode score is 811.85


 14%|█▍        | 14227/100000 [4:39:57<27:42:06,  1.16s/it]

140174 episode score is 723.52


 14%|█▍        | 14228/100000 [4:39:58<27:18:57,  1.15s/it]

140184 episode score is 752.85


 14%|█▍        | 14229/100000 [4:39:59<27:43:10,  1.16s/it]

140194 episode score is 837.69


 14%|█▍        | 14230/100000 [4:40:00<27:17:03,  1.15s/it]

140203 episode score is 847.74


 14%|█▍        | 14231/100000 [4:40:01<27:36:06,  1.16s/it]

140213 episode score is 832.74


 14%|█▍        | 14232/100000 [4:40:02<28:00:22,  1.18s/it]

140223 episode score is 835.59


 14%|█▍        | 14233/100000 [4:40:04<27:51:53,  1.17s/it]

140233 episode score is 789.73


 14%|█▍        | 14234/100000 [4:40:05<27:24:08,  1.15s/it]

140242 episode score is 863.04


 14%|█▍        | 14235/100000 [4:40:06<27:46:41,  1.17s/it]

140252 episode score is 849.08


 14%|█▍        | 14236/100000 [4:40:07<27:57:19,  1.17s/it]

140262 episode score is 830.08


 14%|█▍        | 14237/100000 [4:40:08<28:03:59,  1.18s/it]

140272 episode score is 818.36


 14%|█▍        | 14238/100000 [4:40:09<28:13:45,  1.18s/it]

140282 episode score is 831.02


 14%|█▍        | 14239/100000 [4:40:11<27:34:41,  1.16s/it]

140291 episode score is 850.69


 14%|█▍        | 14240/100000 [4:40:12<27:42:33,  1.16s/it]

140301 episode score is 808.60


 14%|█▍        | 14241/100000 [4:40:13<28:01:48,  1.18s/it]

140311 episode score is 823.73


 14%|█▍        | 14242/100000 [4:40:14<28:14:07,  1.19s/it]

140321 episode score is 834.99


 14%|█▍        | 14243/100000 [4:40:15<27:35:05,  1.16s/it]

140330 episode score is 848.86


 14%|█▍        | 14244/100000 [4:40:16<27:52:46,  1.17s/it]

140340 episode score is 831.47


 14%|█▍        | 14245/100000 [4:40:18<27:34:05,  1.16s/it]

140349 episode score is 849.65


 14%|█▍        | 14246/100000 [4:40:19<28:00:25,  1.18s/it]

140359 episode score is 848.20


 14%|█▍        | 14247/100000 [4:40:20<27:39:24,  1.16s/it]

140368 episode score is 873.02


 14%|█▍        | 14248/100000 [4:40:21<27:11:04,  1.14s/it]

140377 episode score is 849.77


 14%|█▍        | 14249/100000 [4:40:22<27:12:12,  1.14s/it]

140387 episode score is 790.51


 14%|█▍        | 14250/100000 [4:40:23<26:56:13,  1.13s/it]

140396 episode score is 849.79


 14%|█▍        | 14251/100000 [4:40:24<27:25:48,  1.15s/it]

140406 episode score is 836.92


 14%|█▍        | 14252/100000 [4:40:26<27:09:07,  1.14s/it]

140415 episode score is 864.73


 14%|█▍        | 14253/100000 [4:40:27<26:59:25,  1.13s/it]

140424 episode score is 875.88


 14%|█▍        | 14254/100000 [4:40:28<27:15:35,  1.14s/it]

140434 episode score is 806.12


 14%|█▍        | 14255/100000 [4:40:29<26:55:51,  1.13s/it]

140443 episode score is 841.72


 14%|█▍        | 14256/100000 [4:40:30<27:26:08,  1.15s/it]

140452 episode score is 843.88


 14%|█▍        | 14257/100000 [4:40:31<27:19:53,  1.15s/it]

140461 episode score is 883.40


 14%|█▍        | 14258/100000 [4:40:32<27:17:39,  1.15s/it]

140470 episode score is 890.42


 14%|█▍        | 14259/100000 [4:40:34<27:03:25,  1.14s/it]

140479 episode score is 862.67


 14%|█▍        | 14260/100000 [4:40:35<27:23:01,  1.15s/it]

140489 episode score is 818.10


 14%|█▍        | 14261/100000 [4:40:36<27:05:26,  1.14s/it]

140498 episode score is 872.83


 14%|█▍        | 14262/100000 [4:40:37<26:49:42,  1.13s/it]

140507 episode score is 851.14


 14%|█▍        | 14263/100000 [4:40:38<27:19:51,  1.15s/it]

140517 episode score is 841.16


 14%|█▍        | 14264/100000 [4:40:39<27:42:43,  1.16s/it]

140527 episode score is 824.63


 14%|█▍        | 14265/100000 [4:40:40<27:28:11,  1.15s/it]

140536 episode score is 872.60


 14%|█▍        | 14266/100000 [4:40:42<27:19:49,  1.15s/it]

140545 episode score is 886.20


 14%|█▍        | 14267/100000 [4:40:43<27:01:31,  1.13s/it]

140554 episode score is 837.40


 14%|█▍        | 14268/100000 [4:40:44<26:50:09,  1.13s/it]

140563 episode score is 840.51


 14%|█▍        | 14269/100000 [4:40:45<27:06:48,  1.14s/it]

140573 episode score is 794.92


 14%|█▍        | 14270/100000 [4:40:46<27:22:23,  1.15s/it]

140583 episode score is 805.07


 14%|█▍        | 14271/100000 [4:40:47<27:13:54,  1.14s/it]

140592 episode score is 851.79


 14%|█▍        | 14272/100000 [4:40:48<26:59:30,  1.13s/it]

140601 episode score is 839.25


 14%|█▍        | 14273/100000 [4:40:49<26:46:07,  1.12s/it]

140610 episode score is 841.57


 14%|█▍        | 14274/100000 [4:40:51<26:46:03,  1.12s/it]

140619 episode score is 870.27


 14%|█▍        | 14275/100000 [4:40:52<27:08:11,  1.14s/it]

140629 episode score is 808.48


 14%|█▍        | 14276/100000 [4:40:53<27:36:34,  1.16s/it]

140639 episode score is 835.24


 14%|█▍        | 14277/100000 [4:40:54<27:58:29,  1.17s/it]

140649 episode score is 847.02


 14%|█▍        | 14278/100000 [4:40:55<27:32:05,  1.16s/it]

140658 episode score is 879.19


 14%|█▍        | 14279/100000 [4:40:56<27:03:56,  1.14s/it]

140667 episode score is 845.78


 14%|█▍        | 14280/100000 [4:40:57<26:50:13,  1.13s/it]

140676 episode score is 863.27


 14%|█▍        | 14281/100000 [4:40:59<26:50:08,  1.13s/it]

140685 episode score is 893.37


 14%|█▍        | 14282/100000 [4:41:00<26:43:38,  1.12s/it]

140694 episode score is 864.93


 14%|█▍        | 14283/100000 [4:41:01<26:57:24,  1.13s/it]

140703 episode score is 886.04


 14%|█▍        | 14284/100000 [4:41:02<27:24:25,  1.15s/it]

140713 episode score is 835.03


 14%|█▍        | 14285/100000 [4:41:03<26:56:18,  1.13s/it]

140722 episode score is 843.59


 14%|█▍        | 14286/100000 [4:41:04<27:15:03,  1.14s/it]

140732 episode score is 821.50


 14%|█▍        | 14287/100000 [4:41:06<27:27:13,  1.15s/it]

140742 episode score is 819.81


 14%|█▍        | 14288/100000 [4:41:07<27:05:47,  1.14s/it]

140751 episode score is 847.85


 14%|█▍        | 14289/100000 [4:41:08<27:06:34,  1.14s/it]

140760 episode score is 886.53


 14%|█▍        | 14290/100000 [4:41:09<26:56:43,  1.13s/it]

140769 episode score is 870.91


 14%|█▍        | 14291/100000 [4:41:10<26:56:57,  1.13s/it]

140778 episode score is 880.87


 14%|█▍        | 14292/100000 [4:41:11<26:55:35,  1.13s/it]

140787 episode score is 899.51


 14%|█▍        | 14293/100000 [4:41:12<26:48:15,  1.13s/it]

140796 episode score is 862.10


 14%|█▍        | 14294/100000 [4:41:13<26:41:56,  1.12s/it]

140805 episode score is 868.51


 14%|█▍        | 14295/100000 [4:41:14<26:26:54,  1.11s/it]

140814 episode score is 846.19


 14%|█▍        | 14296/100000 [4:41:16<27:05:04,  1.14s/it]

140824 episode score is 835.67


 14%|█▍        | 14297/100000 [4:41:17<27:29:54,  1.16s/it]

140834 episode score is 828.33


 14%|█▍        | 14298/100000 [4:41:18<27:21:25,  1.15s/it]

140844 episode score is 758.58


 14%|█▍        | 14299/100000 [4:41:19<27:19:49,  1.15s/it]

140854 episode score is 766.62
140863 episode score is 862.84


 14%|█▍        | 14300/100000 [4:41:21<34:05:10,  1.43s/it]

Iteration 14300: Average test reward: 712.41


 14%|█▍        | 14301/100000 [4:41:22<31:47:50,  1.34s/it]

140872 episode score is 839.52


 14%|█▍        | 14302/100000 [4:41:24<30:49:31,  1.29s/it]

140882 episode score is 817.03


 14%|█▍        | 14303/100000 [4:41:25<29:48:51,  1.25s/it]

140891 episode score is 860.84


 14%|█▍        | 14304/100000 [4:41:26<29:06:48,  1.22s/it]

140900 episode score is 833.99


 14%|█▍        | 14305/100000 [4:41:27<28:26:00,  1.19s/it]

140910 episode score is 713.36


 14%|█▍        | 14306/100000 [4:41:28<27:49:22,  1.17s/it]

140920 episode score is 692.02


 14%|█▍        | 14307/100000 [4:41:29<28:05:20,  1.18s/it]

140930 episode score is 801.68


 14%|█▍        | 14308/100000 [4:41:30<27:46:41,  1.17s/it]

140939 episode score is 810.98


 14%|█▍        | 14309/100000 [4:41:32<27:38:58,  1.16s/it]

140948 episode score is 870.58


 14%|█▍        | 14310/100000 [4:41:33<27:19:18,  1.15s/it]

140957 episode score is 836.35


 14%|█▍        | 14311/100000 [4:41:34<27:22:39,  1.15s/it]

140966 episode score is 833.05


 14%|█▍        | 14312/100000 [4:41:35<27:22:45,  1.15s/it]

140975 episode score is 876.00


 14%|█▍        | 14313/100000 [4:41:36<27:14:04,  1.14s/it]

140984 episode score is 833.80


 14%|█▍        | 14314/100000 [4:41:37<26:54:01,  1.13s/it]

140993 episode score is 821.20


 14%|█▍        | 14315/100000 [4:41:38<26:54:38,  1.13s/it]

141002 episode score is 857.73


 14%|█▍        | 14316/100000 [4:41:40<27:08:12,  1.14s/it]

141011 episode score is 887.32


 14%|█▍        | 14317/100000 [4:41:41<27:20:01,  1.15s/it]

141020 episode score is 890.99


 14%|█▍        | 14318/100000 [4:41:42<27:08:52,  1.14s/it]

141029 episode score is 825.23


 14%|█▍        | 14319/100000 [4:41:43<27:17:32,  1.15s/it]

141038 episode score is 850.24


 14%|█▍        | 14320/100000 [4:41:44<27:00:48,  1.14s/it]

141047 episode score is 809.15


 14%|█▍        | 14321/100000 [4:41:45<27:58:35,  1.18s/it]

141056 episode score is 897.59


 14%|█▍        | 14322/100000 [4:41:47<27:56:09,  1.17s/it]

141065 episode score is 886.38


 14%|█▍        | 14323/100000 [4:41:48<27:45:02,  1.17s/it]

141074 episode score is 849.71


 14%|█▍        | 14324/100000 [4:41:49<27:45:18,  1.17s/it]

141083 episode score is 885.86


 14%|█▍        | 14325/100000 [4:41:50<27:20:16,  1.15s/it]

141092 episode score is 839.07


 14%|█▍        | 14326/100000 [4:41:51<27:24:27,  1.15s/it]

141101 episode score is 886.48


 14%|█▍        | 14327/100000 [4:41:52<27:40:34,  1.16s/it]

141111 episode score is 810.62


 14%|█▍        | 14328/100000 [4:41:53<27:22:03,  1.15s/it]

141120 episode score is 846.23


 14%|█▍        | 14329/100000 [4:41:55<27:04:41,  1.14s/it]

141129 episode score is 850.96


 14%|█▍        | 14330/100000 [4:41:56<26:51:07,  1.13s/it]

141138 episode score is 828.14


 14%|█▍        | 14331/100000 [4:41:57<27:14:36,  1.14s/it]

141148 episode score is 792.35


 14%|█▍        | 14332/100000 [4:41:58<26:52:27,  1.13s/it]

141157 episode score is 820.75


 14%|█▍        | 14333/100000 [4:41:59<26:52:17,  1.13s/it]

141166 episode score is 860.54


 14%|█▍        | 14334/100000 [4:42:00<26:56:27,  1.13s/it]

141175 episode score is 852.56


 14%|█▍        | 14335/100000 [4:42:01<26:45:20,  1.12s/it]

141184 episode score is 815.46


 14%|█▍        | 14336/100000 [4:42:02<26:36:17,  1.12s/it]

141193 episode score is 794.66


 14%|█▍        | 14337/100000 [4:42:04<27:00:23,  1.13s/it]

141202 episode score is 889.06


 14%|█▍        | 14338/100000 [4:42:05<27:16:41,  1.15s/it]

141211 episode score is 897.84


 14%|█▍        | 14339/100000 [4:42:06<27:16:16,  1.15s/it]

141220 episode score is 872.22


 14%|█▍        | 14340/100000 [4:42:07<27:20:05,  1.15s/it]

141229 episode score is 877.43


 14%|█▍        | 14341/100000 [4:42:08<26:57:42,  1.13s/it]

141238 episode score is 809.88


 14%|█▍        | 14342/100000 [4:42:09<26:55:50,  1.13s/it]

141247 episode score is 856.86


 14%|█▍        | 14343/100000 [4:42:10<27:14:33,  1.14s/it]

141256 episode score is 871.82


 14%|█▍        | 14344/100000 [4:42:12<27:16:04,  1.15s/it]

141265 episode score is 850.83


 14%|█▍        | 14345/100000 [4:42:13<27:06:43,  1.14s/it]

141274 episode score is 833.88


 14%|█▍        | 14346/100000 [4:42:14<26:53:42,  1.13s/it]

141283 episode score is 820.92


 14%|█▍        | 14347/100000 [4:42:15<26:36:44,  1.12s/it]

141292 episode score is 827.08


 14%|█▍        | 14348/100000 [4:42:16<27:08:57,  1.14s/it]

141302 episode score is 833.87


 14%|█▍        | 14349/100000 [4:42:17<26:58:16,  1.13s/it]

141312 episode score is 748.52


 14%|█▍        | 14350/100000 [4:42:18<27:18:00,  1.15s/it]

141322 episode score is 807.76


 14%|█▍        | 14351/100000 [4:42:20<27:18:59,  1.15s/it]

141332 episode score is 776.92


 14%|█▍        | 14352/100000 [4:42:21<27:00:57,  1.14s/it]

141341 episode score is 857.52


 14%|█▍        | 14353/100000 [4:42:22<26:49:00,  1.13s/it]

141350 episode score is 847.66


 14%|█▍        | 14354/100000 [4:42:23<26:38:38,  1.12s/it]

141359 episode score is 846.72


 14%|█▍        | 14355/100000 [4:42:24<27:08:16,  1.14s/it]

141369 episode score is 815.66


 14%|█▍        | 14356/100000 [4:42:25<26:55:35,  1.13s/it]

141378 episode score is 853.89


 14%|█▍        | 14357/100000 [4:42:26<26:50:54,  1.13s/it]

141387 episode score is 865.43


 14%|█▍        | 14358/100000 [4:42:27<26:38:08,  1.12s/it]

141396 episode score is 830.19


 14%|█▍        | 14359/100000 [4:42:28<26:45:36,  1.12s/it]

141405 episode score is 860.23


 14%|█▍        | 14360/100000 [4:42:30<27:11:23,  1.14s/it]

141415 episode score is 821.49


 14%|█▍        | 14361/100000 [4:42:31<27:23:03,  1.15s/it]

141425 episode score is 798.99


 14%|█▍        | 14362/100000 [4:42:32<27:11:54,  1.14s/it]

141434 episode score is 850.64


 14%|█▍        | 14363/100000 [4:42:33<27:05:36,  1.14s/it]

141443 episode score is 852.48


 14%|█▍        | 14364/100000 [4:42:34<27:16:23,  1.15s/it]

141453 episode score is 802.55


 14%|█▍        | 14365/100000 [4:42:35<27:37:15,  1.16s/it]

141463 episode score is 832.06


 14%|█▍        | 14366/100000 [4:42:37<27:58:03,  1.18s/it]

141473 episode score is 842.98


 14%|█▍        | 14367/100000 [4:42:38<27:31:23,  1.16s/it]

141482 episode score is 852.04


 14%|█▍        | 14368/100000 [4:42:39<27:19:44,  1.15s/it]

141492 episode score is 770.74


 14%|█▍        | 14369/100000 [4:42:40<27:14:19,  1.15s/it]

141501 episode score is 892.73


 14%|█▍        | 14370/100000 [4:42:41<26:51:59,  1.13s/it]

141510 episode score is 844.34


 14%|█▍        | 14371/100000 [4:42:42<26:45:37,  1.13s/it]

141519 episode score is 845.57


 14%|█▍        | 14372/100000 [4:42:43<27:06:19,  1.14s/it]

141528 episode score is 840.22


 14%|█▍        | 14373/100000 [4:42:45<26:52:42,  1.13s/it]

141537 episode score is 868.50


 14%|█▍        | 14374/100000 [4:42:46<26:38:56,  1.12s/it]

141546 episode score is 842.98


 14%|█▍        | 14375/100000 [4:42:47<26:25:37,  1.11s/it]

141555 episode score is 842.71


 14%|█▍        | 14376/100000 [4:42:48<26:58:55,  1.13s/it]

141564 episode score is 891.21


 14%|█▍        | 14377/100000 [4:42:49<26:51:40,  1.13s/it]

141573 episode score is 868.24


 14%|█▍        | 14378/100000 [4:42:50<26:36:40,  1.12s/it]

141582 episode score is 854.68


 14%|█▍        | 14379/100000 [4:42:51<26:32:54,  1.12s/it]

141591 episode score is 854.23


 14%|█▍        | 14380/100000 [4:42:52<27:02:05,  1.14s/it]

141601 episode score is 820.01


 14%|█▍        | 14381/100000 [4:42:54<27:35:28,  1.16s/it]

141611 episode score is 837.53


 14%|█▍        | 14382/100000 [4:42:55<27:29:29,  1.16s/it]

141620 episode score is 882.95


 14%|█▍        | 14383/100000 [4:42:56<27:31:27,  1.16s/it]

141630 episode score is 795.21


 14%|█▍        | 14384/100000 [4:42:57<27:15:02,  1.15s/it]

141640 episode score is 757.67


 14%|█▍        | 14385/100000 [4:42:58<27:00:17,  1.14s/it]

141649 episode score is 853.30


 14%|█▍        | 14386/100000 [4:42:59<27:27:26,  1.15s/it]

141659 episode score is 830.03


 14%|█▍        | 14387/100000 [4:43:00<26:58:28,  1.13s/it]

141668 episode score is 838.53


 14%|█▍        | 14388/100000 [4:43:02<27:29:14,  1.16s/it]

141678 episode score is 824.33


 14%|█▍        | 14389/100000 [4:43:03<27:30:45,  1.16s/it]

141688 episode score is 756.62


 14%|█▍        | 14390/100000 [4:43:04<27:03:54,  1.14s/it]

141698 episode score is 713.53


 14%|█▍        | 14391/100000 [4:43:05<26:44:32,  1.12s/it]

141707 episode score is 820.48


 14%|█▍        | 14392/100000 [4:43:06<26:51:54,  1.13s/it]

141716 episode score is 881.14


 14%|█▍        | 14393/100000 [4:43:07<27:11:35,  1.14s/it]

141726 episode score is 785.34


 14%|█▍        | 14394/100000 [4:43:09<27:21:23,  1.15s/it]

141736 episode score is 797.81


 14%|█▍        | 14395/100000 [4:43:10<27:44:19,  1.17s/it]

141746 episode score is 820.93


 14%|█▍        | 14396/100000 [4:43:11<27:59:48,  1.18s/it]

141756 episode score is 828.76


 14%|█▍        | 14397/100000 [4:43:12<28:04:31,  1.18s/it]

141766 episode score is 820.56


 14%|█▍        | 14398/100000 [4:43:13<27:45:32,  1.17s/it]

141776 episode score is 772.69


 14%|█▍        | 14399/100000 [4:43:14<27:09:03,  1.14s/it]

141785 episode score is 835.99
141795 episode score is 813.08


 14%|█▍        | 14400/100000 [4:43:17<34:48:58,  1.46s/it]

Iteration 14400: Average test reward: 774.41


 14%|█▍        | 14401/100000 [4:43:18<32:12:17,  1.35s/it]

141804 episode score is 838.65


 14%|█▍        | 14402/100000 [4:43:19<31:03:19,  1.31s/it]

141814 episode score is 827.28


 14%|█▍        | 14403/100000 [4:43:20<30:17:11,  1.27s/it]

141825 episode score is 742.20


 14%|█▍        | 14404/100000 [4:43:21<29:19:16,  1.23s/it]

141835 episode score is 778.76


 14%|█▍        | 14405/100000 [4:43:22<28:47:07,  1.21s/it]

141846 episode score is 689.08


 14%|█▍        | 14406/100000 [4:43:23<28:17:20,  1.19s/it]

141856 episode score is 776.47


 14%|█▍        | 14407/100000 [4:43:25<28:09:52,  1.18s/it]

141866 episode score is 815.61


 14%|█▍        | 14408/100000 [4:43:26<27:26:29,  1.15s/it]

141875 episode score is 833.91


 14%|█▍        | 14409/100000 [4:43:27<27:05:24,  1.14s/it]

141884 episode score is 857.66


 14%|█▍        | 14410/100000 [4:43:28<27:19:55,  1.15s/it]

141894 episode score is 809.32


 14%|█▍        | 14411/100000 [4:43:29<26:53:55,  1.13s/it]

141903 episode score is 832.96


 14%|█▍        | 14412/100000 [4:43:30<26:33:28,  1.12s/it]

141912 episode score is 847.32


 14%|█▍        | 14413/100000 [4:43:31<26:24:22,  1.11s/it]

141921 episode score is 843.75


 14%|█▍        | 14414/100000 [4:43:32<26:42:27,  1.12s/it]

141930 episode score is 868.64


 14%|█▍        | 14415/100000 [4:43:34<26:36:27,  1.12s/it]

141939 episode score is 856.48


 14%|█▍        | 14416/100000 [4:43:35<26:29:26,  1.11s/it]

141948 episode score is 851.20


 14%|█▍        | 14417/100000 [4:43:36<26:30:26,  1.12s/it]

141957 episode score is 843.08


 14%|█▍        | 14418/100000 [4:43:37<26:36:19,  1.12s/it]

141966 episode score is 879.90


 14%|█▍        | 14419/100000 [4:43:38<26:37:55,  1.12s/it]

141975 episode score is 866.50


 14%|█▍        | 14420/100000 [4:43:39<26:24:08,  1.11s/it]

141984 episode score is 844.94


 14%|█▍        | 14421/100000 [4:43:40<26:22:21,  1.11s/it]

141993 episode score is 855.02


 14%|█▍        | 14422/100000 [4:43:41<26:42:09,  1.12s/it]

142002 episode score is 872.38


 14%|█▍        | 14423/100000 [4:43:42<26:48:04,  1.13s/it]

142011 episode score is 886.45


 14%|█▍        | 14424/100000 [4:43:44<27:56:28,  1.18s/it]

142020 episode score is 901.48


 14%|█▍        | 14425/100000 [4:43:45<27:21:38,  1.15s/it]

142029 episode score is 838.42


 14%|█▍        | 14426/100000 [4:43:46<27:24:55,  1.15s/it]

142038 episode score is 909.23


 14%|█▍        | 14427/100000 [4:43:47<26:59:05,  1.14s/it]

142047 episode score is 829.49


 14%|█▍        | 14428/100000 [4:43:48<27:01:00,  1.14s/it]

142056 episode score is 880.39


 14%|█▍        | 14429/100000 [4:43:49<26:55:05,  1.13s/it]

142065 episode score is 853.69


 14%|█▍        | 14430/100000 [4:43:51<27:13:16,  1.15s/it]

142075 episode score is 810.72


 14%|█▍        | 14431/100000 [4:43:52<26:59:48,  1.14s/it]

142084 episode score is 851.70


 14%|█▍        | 14432/100000 [4:43:53<27:02:21,  1.14s/it]

142093 episode score is 844.40


 14%|█▍        | 14433/100000 [4:43:54<27:10:29,  1.14s/it]

142102 episode score is 880.96


 14%|█▍        | 14434/100000 [4:43:55<27:10:13,  1.14s/it]

142111 episode score is 871.77


 14%|█▍        | 14435/100000 [4:43:56<27:23:18,  1.15s/it]

142120 episode score is 903.08


 14%|█▍        | 14436/100000 [4:43:57<27:28:21,  1.16s/it]

142129 episode score is 900.98


 14%|█▍        | 14437/100000 [4:43:59<27:22:48,  1.15s/it]

142138 episode score is 869.68


 14%|█▍        | 14438/100000 [4:44:00<27:21:11,  1.15s/it]

142147 episode score is 875.29


 14%|█▍        | 14439/100000 [4:44:01<27:04:08,  1.14s/it]

142156 episode score is 841.86


 14%|█▍        | 14440/100000 [4:44:02<26:56:03,  1.13s/it]

142165 episode score is 853.97


 14%|█▍        | 14441/100000 [4:44:03<26:58:57,  1.14s/it]

142174 episode score is 857.52


 14%|█▍        | 14442/100000 [4:44:04<26:58:09,  1.13s/it]

142183 episode score is 862.77


 14%|█▍        | 14443/100000 [4:44:05<27:01:22,  1.14s/it]

142192 episode score is 867.64


 14%|█▍        | 14444/100000 [4:44:07<27:02:06,  1.14s/it]

142201 episode score is 889.01


 14%|█▍        | 14445/100000 [4:44:08<27:27:17,  1.16s/it]

142211 episode score is 808.02


 14%|█▍        | 14446/100000 [4:44:09<27:16:09,  1.15s/it]

142220 episode score is 867.25


 14%|█▍        | 14447/100000 [4:44:10<26:54:32,  1.13s/it]

142229 episode score is 858.23


 14%|█▍        | 14448/100000 [4:44:11<26:34:54,  1.12s/it]

142238 episode score is 839.83


 14%|█▍        | 14449/100000 [4:44:12<26:35:34,  1.12s/it]

142247 episode score is 855.18


 14%|█▍        | 14450/100000 [4:44:13<26:36:49,  1.12s/it]

142256 episode score is 840.21


 14%|█▍        | 14451/100000 [4:44:14<27:00:46,  1.14s/it]

142265 episode score is 908.05


 14%|█▍        | 14452/100000 [4:44:16<26:51:20,  1.13s/it]

142274 episode score is 844.63


 14%|█▍        | 14453/100000 [4:44:17<26:51:17,  1.13s/it]

142283 episode score is 863.63


 14%|█▍        | 14454/100000 [4:44:18<26:59:10,  1.14s/it]

142292 episode score is 850.84


 14%|█▍        | 14455/100000 [4:44:19<27:13:18,  1.15s/it]

142301 episode score is 897.14


 14%|█▍        | 14456/100000 [4:44:20<26:55:47,  1.13s/it]

142310 episode score is 838.62


 14%|█▍        | 14457/100000 [4:44:21<26:58:11,  1.14s/it]

142319 episode score is 878.51


 14%|█▍        | 14458/100000 [4:44:22<27:13:10,  1.15s/it]

142329 episode score is 788.16


 14%|█▍        | 14459/100000 [4:44:24<27:00:40,  1.14s/it]

142338 episode score is 855.16


 14%|█▍        | 14460/100000 [4:44:25<27:15:13,  1.15s/it]

142348 episode score is 806.26


 14%|█▍        | 14461/100000 [4:44:26<27:21:43,  1.15s/it]

142357 episode score is 879.41


 14%|█▍        | 14462/100000 [4:44:27<27:23:51,  1.15s/it]

142366 episode score is 886.17


 14%|█▍        | 14463/100000 [4:44:28<27:30:49,  1.16s/it]

142375 episode score is 882.97


 14%|█▍        | 14464/100000 [4:44:29<27:01:35,  1.14s/it]

142384 episode score is 841.10


 14%|█▍        | 14465/100000 [4:44:30<27:24:31,  1.15s/it]

142394 episode score is 831.41


 14%|█▍        | 14466/100000 [4:44:32<27:34:27,  1.16s/it]

142404 episode score is 795.84


 14%|█▍        | 14467/100000 [4:44:33<27:39:32,  1.16s/it]

142414 episode score is 782.76


 14%|█▍        | 14468/100000 [4:44:34<27:30:59,  1.16s/it]

142424 episode score is 785.47


 14%|█▍        | 14469/100000 [4:44:35<27:52:33,  1.17s/it]

142434 episode score is 830.86


 14%|█▍        | 14470/100000 [4:44:36<27:26:39,  1.16s/it]

142443 episode score is 856.19


 14%|█▍        | 14471/100000 [4:44:37<27:11:00,  1.14s/it]

142452 episode score is 870.43


 14%|█▍        | 14472/100000 [4:44:39<27:08:46,  1.14s/it]

142461 episode score is 890.15


 14%|█▍        | 14473/100000 [4:44:40<26:48:32,  1.13s/it]

142470 episode score is 834.91


 14%|█▍        | 14474/100000 [4:44:41<26:46:10,  1.13s/it]

142479 episode score is 863.39


 14%|█▍        | 14475/100000 [4:44:42<27:15:30,  1.15s/it]

142488 episode score is 928.25


 14%|█▍        | 14476/100000 [4:44:43<26:52:43,  1.13s/it]

142497 episode score is 825.73


 14%|█▍        | 14477/100000 [4:44:44<26:35:29,  1.12s/it]

142506 episode score is 829.28


 14%|█▍        | 14478/100000 [4:44:45<26:27:13,  1.11s/it]

142515 episode score is 848.21


 14%|█▍        | 14479/100000 [4:44:46<26:31:51,  1.12s/it]

142524 episode score is 878.00


 14%|█▍        | 14480/100000 [4:44:48<27:25:28,  1.15s/it]

142534 episode score is 777.77


 14%|█▍        | 14481/100000 [4:44:49<27:14:33,  1.15s/it]

142543 episode score is 859.45


 14%|█▍        | 14482/100000 [4:44:50<26:57:41,  1.13s/it]

142552 episode score is 850.07


 14%|█▍        | 14483/100000 [4:44:51<26:42:13,  1.12s/it]

142561 episode score is 855.09


 14%|█▍        | 14484/100000 [4:44:52<26:37:03,  1.12s/it]

142570 episode score is 848.25


 14%|█▍        | 14485/100000 [4:44:53<26:34:19,  1.12s/it]

142579 episode score is 873.95


 14%|█▍        | 14486/100000 [4:44:54<26:19:42,  1.11s/it]

142588 episode score is 822.53


 14%|█▍        | 14487/100000 [4:44:55<26:34:06,  1.12s/it]

142597 episode score is 856.48


 14%|█▍        | 14488/100000 [4:44:57<26:26:18,  1.11s/it]

142606 episode score is 837.11


 14%|█▍        | 14489/100000 [4:44:58<26:29:38,  1.12s/it]

142615 episode score is 851.93


 14%|█▍        | 14490/100000 [4:44:59<26:26:18,  1.11s/it]

142624 episode score is 841.69


 14%|█▍        | 14491/100000 [4:45:00<26:32:42,  1.12s/it]

142633 episode score is 851.61


 14%|█▍        | 14492/100000 [4:45:01<26:24:11,  1.11s/it]

142642 episode score is 833.15


 14%|█▍        | 14493/100000 [4:45:02<27:01:51,  1.14s/it]

142651 episode score is 886.91


 14%|█▍        | 14494/100000 [4:45:03<27:10:45,  1.14s/it]

142660 episode score is 879.32


 14%|█▍        | 14495/100000 [4:45:04<27:02:31,  1.14s/it]

142669 episode score is 879.06


 14%|█▍        | 14496/100000 [4:45:06<26:51:31,  1.13s/it]

142678 episode score is 847.97


 14%|█▍        | 14497/100000 [4:45:07<27:09:05,  1.14s/it]

142687 episode score is 893.16


 14%|█▍        | 14498/100000 [4:45:08<27:14:42,  1.15s/it]

142696 episode score is 901.88


 14%|█▍        | 14499/100000 [4:45:09<27:11:31,  1.14s/it]

142705 episode score is 863.67
142714 episode score is 881.34


 14%|█▍        | 14500/100000 [4:45:11<35:20:52,  1.49s/it]

Iteration 14500: Average test reward: 862.83


 15%|█▍        | 14501/100000 [4:45:12<33:05:32,  1.39s/it]

142723 episode score is 897.34


 15%|█▍        | 14502/100000 [4:45:14<31:14:44,  1.32s/it]

142732 episode score is 867.96


 15%|█▍        | 14503/100000 [4:45:15<30:06:39,  1.27s/it]

142741 episode score is 878.84


 15%|█▍        | 14504/100000 [4:45:16<29:13:39,  1.23s/it]

142750 episode score is 874.01


 15%|█▍        | 14505/100000 [4:45:17<28:43:16,  1.21s/it]

142759 episode score is 860.50


 15%|█▍        | 14506/100000 [4:45:18<28:08:30,  1.19s/it]

142768 episode score is 862.53


 15%|█▍        | 14507/100000 [4:45:19<27:31:37,  1.16s/it]

142777 episode score is 847.15


 15%|█▍        | 14508/100000 [4:45:20<27:26:12,  1.16s/it]

142786 episode score is 888.57


 15%|█▍        | 14509/100000 [4:45:22<27:21:53,  1.15s/it]

142795 episode score is 898.03


 15%|█▍        | 14510/100000 [4:45:23<27:13:05,  1.15s/it]

142804 episode score is 870.51


 15%|█▍        | 14511/100000 [4:45:24<27:05:55,  1.14s/it]

142813 episode score is 843.03


 15%|█▍        | 14512/100000 [4:45:25<27:04:43,  1.14s/it]

142822 episode score is 890.98


 15%|█▍        | 14513/100000 [4:45:26<27:03:04,  1.14s/it]

142831 episode score is 893.27


 15%|█▍        | 14514/100000 [4:45:27<27:03:44,  1.14s/it]

142840 episode score is 890.32


 15%|█▍        | 14515/100000 [4:45:28<26:58:22,  1.14s/it]

142849 episode score is 866.49


 15%|█▍        | 14516/100000 [4:45:30<26:41:34,  1.12s/it]

142858 episode score is 841.42


 15%|█▍        | 14517/100000 [4:45:31<26:31:03,  1.12s/it]

142867 episode score is 863.42


 15%|█▍        | 14518/100000 [4:45:32<26:34:13,  1.12s/it]

142876 episode score is 873.72


 15%|█▍        | 14519/100000 [4:45:33<26:45:55,  1.13s/it]

142885 episode score is 882.12


 15%|█▍        | 14520/100000 [4:45:34<27:00:08,  1.14s/it]

142894 episode score is 889.54


 15%|█▍        | 14521/100000 [4:45:35<27:08:55,  1.14s/it]

142903 episode score is 904.87


 15%|█▍        | 14522/100000 [4:45:36<26:58:21,  1.14s/it]

142912 episode score is 870.21


 15%|█▍        | 14523/100000 [4:45:37<26:54:19,  1.13s/it]

142921 episode score is 877.34


 15%|█▍        | 14524/100000 [4:45:39<26:55:37,  1.13s/it]

142930 episode score is 891.85


 15%|█▍        | 14525/100000 [4:45:40<26:43:09,  1.13s/it]

142939 episode score is 830.78


 15%|█▍        | 14526/100000 [4:45:41<26:40:09,  1.12s/it]

142948 episode score is 889.57


 15%|█▍        | 14527/100000 [4:45:42<26:54:36,  1.13s/it]

142957 episode score is 879.77


 15%|█▍        | 14528/100000 [4:45:43<27:12:15,  1.15s/it]

142966 episode score is 921.61


 15%|█▍        | 14529/100000 [4:45:44<27:34:15,  1.16s/it]

142976 episode score is 833.45


 15%|█▍        | 14530/100000 [4:45:45<27:05:09,  1.14s/it]

142985 episode score is 857.66


 15%|█▍        | 14531/100000 [4:45:47<26:56:17,  1.13s/it]

142994 episode score is 887.56


 15%|█▍        | 14532/100000 [4:45:48<26:45:51,  1.13s/it]

143003 episode score is 847.57


 15%|█▍        | 14533/100000 [4:45:49<27:15:49,  1.15s/it]

143013 episode score is 836.12


 15%|█▍        | 14534/100000 [4:45:50<27:28:50,  1.16s/it]

143022 episode score is 858.55


 15%|█▍        | 14535/100000 [4:45:51<27:18:18,  1.15s/it]

143032 episode score is 786.04


 15%|█▍        | 14536/100000 [4:45:52<27:35:14,  1.16s/it]

143042 episode score is 823.54


 15%|█▍        | 14537/100000 [4:45:54<27:42:43,  1.17s/it]

143052 episode score is 830.25


 15%|█▍        | 14538/100000 [4:45:55<27:20:46,  1.15s/it]

143061 episode score is 873.78


 15%|█▍        | 14539/100000 [4:45:56<27:17:21,  1.15s/it]

143071 episode score is 798.88


 15%|█▍        | 14540/100000 [4:45:57<27:25:11,  1.16s/it]

143081 episode score is 826.82


 15%|█▍        | 14541/100000 [4:45:58<27:23:12,  1.15s/it]

143091 episode score is 784.91


 15%|█▍        | 14542/100000 [4:45:59<27:50:02,  1.17s/it]

143102 episode score is 754.05


 15%|█▍        | 14543/100000 [4:46:00<27:29:17,  1.16s/it]

143112 episode score is 780.83


 15%|█▍        | 14544/100000 [4:46:02<27:37:07,  1.16s/it]

143122 episode score is 811.59


 15%|█▍        | 14545/100000 [4:46:03<27:32:52,  1.16s/it]

143131 episode score is 890.80


 15%|█▍        | 14546/100000 [4:46:04<27:39:45,  1.17s/it]

143141 episode score is 828.43


 15%|█▍        | 14547/100000 [4:46:05<27:14:53,  1.15s/it]

143150 episode score is 873.83


 15%|█▍        | 14548/100000 [4:46:06<26:50:08,  1.13s/it]

143159 episode score is 861.25


 15%|█▍        | 14549/100000 [4:46:07<26:56:35,  1.14s/it]

143169 episode score is 792.66


 15%|█▍        | 14550/100000 [4:46:08<27:05:23,  1.14s/it]

143179 episode score is 805.29


 15%|█▍        | 14551/100000 [4:46:10<26:54:30,  1.13s/it]

143189 episode score is 761.10


 15%|█▍        | 14552/100000 [4:46:11<27:12:31,  1.15s/it]

143200 episode score is 709.25


 15%|█▍        | 14553/100000 [4:46:12<27:42:39,  1.17s/it]

143211 episode score is 738.82


 15%|█▍        | 14554/100000 [4:46:13<27:45:38,  1.17s/it]

143221 episode score is 809.40


 15%|█▍        | 14555/100000 [4:46:14<27:13:21,  1.15s/it]

143231 episode score is 739.91


 15%|█▍        | 14556/100000 [4:46:15<27:22:08,  1.15s/it]

143241 episode score is 805.30


 15%|█▍        | 14557/100000 [4:46:17<27:18:18,  1.15s/it]

143251 episode score is 785.06


 15%|█▍        | 14558/100000 [4:46:18<27:34:25,  1.16s/it]

143261 episode score is 808.63


 15%|█▍        | 14559/100000 [4:46:19<27:44:06,  1.17s/it]

143271 episode score is 820.96


 15%|█▍        | 14560/100000 [4:46:20<27:31:40,  1.16s/it]

143280 episode score is 904.71


 15%|█▍        | 14561/100000 [4:46:21<27:08:51,  1.14s/it]

143289 episode score is 870.51


 15%|█▍        | 14562/100000 [4:46:22<26:55:19,  1.13s/it]

143298 episode score is 871.20


 15%|█▍        | 14563/100000 [4:46:23<27:11:32,  1.15s/it]

143308 episode score is 811.62


 15%|█▍        | 14564/100000 [4:46:25<27:07:09,  1.14s/it]

143317 episode score is 894.20


 15%|█▍        | 14565/100000 [4:46:26<27:03:00,  1.14s/it]

143326 episode score is 891.95


 15%|█▍        | 14566/100000 [4:46:27<27:32:49,  1.16s/it]

143336 episode score is 852.79


 15%|█▍        | 14567/100000 [4:46:28<27:47:06,  1.17s/it]

143346 episode score is 838.20


 15%|█▍        | 14568/100000 [4:46:29<27:32:44,  1.16s/it]

143355 episode score is 898.24


 15%|█▍        | 14569/100000 [4:46:30<27:31:51,  1.16s/it]

143365 episode score is 815.91


 15%|█▍        | 14570/100000 [4:46:32<27:07:21,  1.14s/it]

143374 episode score is 858.06


 15%|█▍        | 14571/100000 [4:46:33<27:17:51,  1.15s/it]

143383 episode score is 913.97


 15%|█▍        | 14572/100000 [4:46:34<27:14:20,  1.15s/it]

143392 episode score is 903.28


 15%|█▍        | 14573/100000 [4:46:35<27:06:27,  1.14s/it]

143401 episode score is 894.05


 15%|█▍        | 14574/100000 [4:46:36<26:52:25,  1.13s/it]

143410 episode score is 872.68


 15%|█▍        | 14575/100000 [4:46:37<26:38:35,  1.12s/it]

143419 episode score is 871.10


 15%|█▍        | 14576/100000 [4:46:38<27:14:48,  1.15s/it]

143429 episode score is 848.00


 15%|█▍        | 14577/100000 [4:46:40<27:25:34,  1.16s/it]

143438 episode score is 933.58


 15%|█▍        | 14578/100000 [4:46:41<27:11:42,  1.15s/it]

143447 episode score is 882.52


 15%|█▍        | 14579/100000 [4:46:42<27:18:03,  1.15s/it]

143456 episode score is 918.76


 15%|█▍        | 14580/100000 [4:46:43<27:33:02,  1.16s/it]

143465 episode score is 935.11


 15%|█▍        | 14581/100000 [4:46:44<27:16:33,  1.15s/it]

143474 episode score is 893.16


 15%|█▍        | 14582/100000 [4:46:45<26:51:10,  1.13s/it]

143483 episode score is 855.12


 15%|█▍        | 14583/100000 [4:46:46<26:32:13,  1.12s/it]

143492 episode score is 854.92


 15%|█▍        | 14584/100000 [4:46:47<26:29:53,  1.12s/it]

143501 episode score is 874.33


 15%|█▍        | 14585/100000 [4:46:49<27:35:05,  1.16s/it]

143510 episode score is 925.59


 15%|█▍        | 14586/100000 [4:46:50<27:41:14,  1.17s/it]

143519 episode score is 929.18


 15%|█▍        | 14587/100000 [4:46:51<27:24:19,  1.16s/it]

143528 episode score is 878.20


 15%|█▍        | 14588/100000 [4:46:52<27:29:14,  1.16s/it]

143537 episode score is 918.19


 15%|█▍        | 14589/100000 [4:46:53<27:54:14,  1.18s/it]

143547 episode score is 841.44


 15%|█▍        | 14590/100000 [4:46:55<27:34:51,  1.16s/it]

143556 episode score is 891.86


 15%|█▍        | 14591/100000 [4:46:56<27:19:57,  1.15s/it]

143565 episode score is 871.73


 15%|█▍        | 14592/100000 [4:46:57<27:23:33,  1.15s/it]

143574 episode score is 927.92


 15%|█▍        | 14593/100000 [4:46:58<26:58:36,  1.14s/it]

143583 episode score is 847.03


 15%|█▍        | 14594/100000 [4:46:59<27:21:38,  1.15s/it]

143593 episode score is 830.64


 15%|█▍        | 14595/100000 [4:47:00<26:58:13,  1.14s/it]

143602 episode score is 857.03


 15%|█▍        | 14596/100000 [4:47:01<27:20:23,  1.15s/it]

143612 episode score is 825.38


 15%|█▍        | 14597/100000 [4:47:03<27:12:46,  1.15s/it]

143622 episode score is 761.66


 15%|█▍        | 14598/100000 [4:47:04<27:29:37,  1.16s/it]

143632 episode score is 795.49


 15%|█▍        | 14599/100000 [4:47:05<27:35:43,  1.16s/it]

143642 episode score is 808.47
143652 episode score is 788.31


 15%|█▍        | 14600/100000 [4:47:07<34:53:35,  1.47s/it]

Iteration 14600: Average test reward: 822.33


 15%|█▍        | 14601/100000 [4:47:08<32:13:20,  1.36s/it]

143661 episode score is 856.87


 15%|█▍        | 14602/100000 [4:47:09<30:41:40,  1.29s/it]

143670 episode score is 901.52


 15%|█▍        | 14603/100000 [4:47:10<29:38:13,  1.25s/it]

143680 episode score is 782.87


 15%|█▍        | 14604/100000 [4:47:12<29:00:44,  1.22s/it]

143689 episode score is 919.52


 15%|█▍        | 14605/100000 [4:47:13<28:55:24,  1.22s/it]

143699 episode score is 829.74


 15%|█▍        | 14606/100000 [4:47:14<28:47:19,  1.21s/it]

143709 episode score is 833.01


 15%|█▍        | 14607/100000 [4:47:15<28:33:36,  1.20s/it]

143719 episode score is 824.89


 15%|█▍        | 14608/100000 [4:47:16<28:23:39,  1.20s/it]

143729 episode score is 822.21


 15%|█▍        | 14609/100000 [4:47:18<28:23:34,  1.20s/it]

143739 episode score is 826.54


 15%|█▍        | 14610/100000 [4:47:19<27:46:58,  1.17s/it]

143748 episode score is 866.89


 15%|█▍        | 14611/100000 [4:47:20<27:47:48,  1.17s/it]

143758 episode score is 811.66


 15%|█▍        | 14612/100000 [4:47:21<27:34:45,  1.16s/it]

143767 episode score is 898.38


 15%|█▍        | 14613/100000 [4:47:22<27:38:50,  1.17s/it]

143777 episode score is 811.49


 15%|█▍        | 14614/100000 [4:47:23<27:41:58,  1.17s/it]

143787 episode score is 805.05


 15%|█▍        | 14615/100000 [4:47:24<27:21:55,  1.15s/it]

143796 episode score is 875.04


 15%|█▍        | 14616/100000 [4:47:26<27:24:34,  1.16s/it]

143805 episode score is 923.27


 15%|█▍        | 14617/100000 [4:47:27<27:00:35,  1.14s/it]

143814 episode score is 860.44


 15%|█▍        | 14618/100000 [4:47:28<26:48:48,  1.13s/it]

143823 episode score is 867.38


 15%|█▍        | 14619/100000 [4:47:29<26:40:47,  1.12s/it]

143832 episode score is 868.63


 15%|█▍        | 14620/100000 [4:47:30<26:28:51,  1.12s/it]

143841 episode score is 850.33


 15%|█▍        | 14621/100000 [4:47:31<26:39:48,  1.12s/it]

143850 episode score is 871.26


 15%|█▍        | 14622/100000 [4:47:32<26:41:11,  1.13s/it]

143859 episode score is 868.33


 15%|█▍        | 14623/100000 [4:47:33<26:43:24,  1.13s/it]

143868 episode score is 886.82


 15%|█▍        | 14624/100000 [4:47:35<26:40:03,  1.12s/it]

143877 episode score is 870.66


 15%|█▍        | 14625/100000 [4:47:36<26:24:40,  1.11s/it]

143886 episode score is 852.93


 15%|█▍        | 14626/100000 [4:47:37<26:26:22,  1.11s/it]

143895 episode score is 868.63


 15%|█▍        | 14627/100000 [4:47:38<26:36:29,  1.12s/it]

143904 episode score is 896.65


 15%|█▍        | 14628/100000 [4:47:39<26:53:34,  1.13s/it]

143913 episode score is 898.13


 15%|█▍        | 14629/100000 [4:47:40<27:02:19,  1.14s/it]

143922 episode score is 906.17


 15%|█▍        | 14630/100000 [4:47:41<26:59:15,  1.14s/it]

143931 episode score is 879.76


 15%|█▍        | 14631/100000 [4:47:43<26:57:40,  1.14s/it]

143941 episode score is 771.31


 15%|█▍        | 14632/100000 [4:47:44<26:41:31,  1.13s/it]

143950 episode score is 848.42


 15%|█▍        | 14633/100000 [4:47:45<27:06:32,  1.14s/it]

143960 episode score is 819.02


 15%|█▍        | 14634/100000 [4:47:46<27:18:26,  1.15s/it]

143970 episode score is 809.58


 15%|█▍        | 14635/100000 [4:47:47<27:38:48,  1.17s/it]

143980 episode score is 824.07


 15%|█▍        | 14636/100000 [4:47:48<27:24:47,  1.16s/it]

143989 episode score is 873.86


 15%|█▍        | 14637/100000 [4:47:49<27:44:44,  1.17s/it]

143999 episode score is 840.41


 15%|█▍        | 14638/100000 [4:47:51<27:27:40,  1.16s/it]

144009 episode score is 778.62


 15%|█▍        | 14639/100000 [4:47:52<27:25:07,  1.16s/it]

144019 episode score is 796.00


 15%|█▍        | 14640/100000 [4:47:53<27:33:41,  1.16s/it]

144029 episode score is 820.44


 15%|█▍        | 14641/100000 [4:47:54<27:20:43,  1.15s/it]

144039 episode score is 765.43


 15%|█▍        | 14642/100000 [4:47:55<27:23:51,  1.16s/it]

144049 episode score is 811.17


 15%|█▍        | 14643/100000 [4:47:57<28:06:23,  1.19s/it]

144059 episode score is 811.98


 15%|█▍        | 14644/100000 [4:47:58<27:44:24,  1.17s/it]

144069 episode score is 791.59


 15%|█▍        | 14645/100000 [4:47:59<27:11:36,  1.15s/it]

144078 episode score is 856.71


 15%|█▍        | 14646/100000 [4:48:00<27:00:29,  1.14s/it]

144088 episode score is 772.38


 15%|█▍        | 14647/100000 [4:48:01<26:39:56,  1.12s/it]

144097 episode score is 867.98


 15%|█▍        | 14648/100000 [4:48:02<26:36:35,  1.12s/it]

144106 episode score is 862.06


 15%|█▍        | 14649/100000 [4:48:03<26:33:18,  1.12s/it]

144115 episode score is 874.14


 15%|█▍        | 14650/100000 [4:48:04<26:20:25,  1.11s/it]

144124 episode score is 848.54


 15%|█▍        | 14651/100000 [4:48:05<26:33:14,  1.12s/it]

144133 episode score is 887.34


 15%|█▍        | 14652/100000 [4:48:07<26:31:10,  1.12s/it]

144142 episode score is 868.45


 15%|█▍        | 14653/100000 [4:48:08<26:56:01,  1.14s/it]

144152 episode score is 814.12


 15%|█▍        | 14654/100000 [4:48:09<26:32:28,  1.12s/it]

144161 episode score is 842.89


 15%|█▍        | 14655/100000 [4:48:10<26:23:01,  1.11s/it]

144170 episode score is 836.27


 15%|█▍        | 14656/100000 [4:48:11<26:15:56,  1.11s/it]

144179 episode score is 859.28


 15%|█▍        | 14657/100000 [4:48:12<26:17:44,  1.11s/it]

144188 episode score is 866.70


 15%|█▍        | 14658/100000 [4:48:13<26:15:16,  1.11s/it]

144197 episode score is 872.18


 15%|█▍        | 14659/100000 [4:48:14<26:28:16,  1.12s/it]

144206 episode score is 894.20


 15%|█▍        | 14660/100000 [4:48:15<26:41:25,  1.13s/it]

144215 episode score is 913.85


 15%|█▍        | 14661/100000 [4:48:17<26:48:58,  1.13s/it]

144224 episode score is 903.32


 15%|█▍        | 14662/100000 [4:48:18<27:04:13,  1.14s/it]

144233 episode score is 915.86


 15%|█▍        | 14663/100000 [4:48:19<26:58:55,  1.14s/it]

144242 episode score is 870.87


 15%|█▍        | 14664/100000 [4:48:20<27:16:52,  1.15s/it]

144252 episode score is 820.68


 15%|█▍        | 14665/100000 [4:48:21<27:38:40,  1.17s/it]

144262 episode score is 845.81


 15%|█▍        | 14666/100000 [4:48:22<27:15:37,  1.15s/it]

144271 episode score is 877.33


 15%|█▍        | 14667/100000 [4:48:24<27:09:59,  1.15s/it]

144281 episode score is 775.52


 15%|█▍        | 14668/100000 [4:48:25<27:28:46,  1.16s/it]

144291 episode score is 814.29


 15%|█▍        | 14669/100000 [4:48:26<27:04:21,  1.14s/it]

144300 episode score is 868.38


 15%|█▍        | 14670/100000 [4:48:27<27:14:30,  1.15s/it]

144310 episode score is 795.31


 15%|█▍        | 14671/100000 [4:48:28<27:23:02,  1.16s/it]

144320 episode score is 808.57


 15%|█▍        | 14672/100000 [4:48:29<27:27:36,  1.16s/it]

144330 episode score is 795.67


 15%|█▍        | 14673/100000 [4:48:31<27:45:30,  1.17s/it]

144340 episode score is 838.68


 15%|█▍        | 14674/100000 [4:48:32<27:48:45,  1.17s/it]

144349 episode score is 935.51


 15%|█▍        | 14675/100000 [4:48:33<27:43:52,  1.17s/it]

144358 episode score is 910.92


 15%|█▍        | 14676/100000 [4:48:34<27:23:39,  1.16s/it]

144367 episode score is 873.15


 15%|█▍        | 14677/100000 [4:48:35<27:33:53,  1.16s/it]

144376 episode score is 914.19


 15%|█▍        | 14678/100000 [4:48:36<27:06:43,  1.14s/it]

144385 episode score is 843.94


 15%|█▍        | 14679/100000 [4:48:37<27:08:34,  1.15s/it]

144394 episode score is 907.14


 15%|█▍        | 14680/100000 [4:48:39<27:07:33,  1.14s/it]

144403 episode score is 900.69


 15%|█▍        | 14681/100000 [4:48:40<26:52:05,  1.13s/it]

144412 episode score is 859.57


 15%|█▍        | 14682/100000 [4:48:41<27:09:21,  1.15s/it]

144421 episode score is 933.70


 15%|█▍        | 14683/100000 [4:48:42<27:16:59,  1.15s/it]

144430 episode score is 920.27


 15%|█▍        | 14684/100000 [4:48:43<27:18:21,  1.15s/it]

144439 episode score is 911.59


 15%|█▍        | 14685/100000 [4:48:44<27:06:17,  1.14s/it]

144448 episode score is 878.81


 15%|█▍        | 14686/100000 [4:48:45<27:21:56,  1.15s/it]

144457 episode score is 931.40


 15%|█▍        | 14687/100000 [4:48:47<27:07:02,  1.14s/it]

144466 episode score is 883.00


 15%|█▍        | 14688/100000 [4:48:48<27:28:23,  1.16s/it]

144475 episode score is 932.23


 15%|█▍        | 14689/100000 [4:48:49<27:18:51,  1.15s/it]

144484 episode score is 881.13


 15%|█▍        | 14690/100000 [4:48:50<27:36:16,  1.16s/it]

144494 episode score is 838.07


 15%|█▍        | 14691/100000 [4:48:51<27:45:23,  1.17s/it]

144504 episode score is 828.57


 15%|█▍        | 14692/100000 [4:48:52<27:16:16,  1.15s/it]

144513 episode score is 871.45


 15%|█▍        | 14693/100000 [4:48:54<27:30:07,  1.16s/it]

144523 episode score is 823.29


 15%|█▍        | 14694/100000 [4:48:55<27:19:25,  1.15s/it]

144532 episode score is 896.80


 15%|█▍        | 14695/100000 [4:48:56<26:48:53,  1.13s/it]

144541 episode score is 837.53


 15%|█▍        | 14696/100000 [4:48:57<26:33:28,  1.12s/it]

144550 episode score is 862.69


 15%|█▍        | 14697/100000 [4:48:58<26:37:50,  1.12s/it]

144559 episode score is 891.22


 15%|█▍        | 14698/100000 [4:48:59<27:04:13,  1.14s/it]

144568 episode score is 843.45


 15%|█▍        | 14699/100000 [4:49:00<27:04:00,  1.14s/it]

144577 episode score is 879.87
144587 episode score is 803.92


 15%|█▍        | 14700/100000 [4:49:03<35:38:25,  1.50s/it]

Iteration 14700: Average test reward: 902.32


 15%|█▍        | 14701/100000 [4:49:04<33:04:20,  1.40s/it]

144597 episode score is 769.81


 15%|█▍        | 14702/100000 [4:49:05<30:54:10,  1.30s/it]

144606 episode score is 831.37


 15%|█▍        | 14703/100000 [4:49:06<29:42:07,  1.25s/it]

144617 episode score is 678.26


 15%|█▍        | 14704/100000 [4:49:07<28:53:54,  1.22s/it]

144627 episode score is 785.77


 15%|█▍        | 14705/100000 [4:49:08<28:03:59,  1.18s/it]

144636 episode score is 861.46


 15%|█▍        | 14706/100000 [4:49:09<27:53:55,  1.18s/it]

144645 episode score is 916.15


 15%|█▍        | 14707/100000 [4:49:11<27:29:19,  1.16s/it]

144655 episode score is 749.71


 15%|█▍        | 14708/100000 [4:49:12<27:28:48,  1.16s/it]

144664 episode score is 910.41


 15%|█▍        | 14709/100000 [4:49:13<27:31:39,  1.16s/it]

144673 episode score is 925.29


 15%|█▍        | 14710/100000 [4:49:14<26:58:40,  1.14s/it]

144682 episode score is 818.79


 15%|█▍        | 14711/100000 [4:49:15<27:10:42,  1.15s/it]

144691 episode score is 914.62


 15%|█▍        | 14712/100000 [4:49:16<27:33:20,  1.16s/it]

144701 episode score is 810.84


 15%|█▍        | 14713/100000 [4:49:18<27:16:50,  1.15s/it]

144710 episode score is 854.30


 15%|█▍        | 14714/100000 [4:49:19<27:41:47,  1.17s/it]

144720 episode score is 825.27


 15%|█▍        | 14715/100000 [4:49:20<27:45:24,  1.17s/it]

144730 episode score is 820.07


 15%|█▍        | 14716/100000 [4:49:21<27:07:08,  1.14s/it]

144739 episode score is 847.10


 15%|█▍        | 14717/100000 [4:49:22<27:09:34,  1.15s/it]

144748 episode score is 909.15


 15%|█▍        | 14718/100000 [4:49:23<27:10:15,  1.15s/it]

144757 episode score is 914.77


 15%|█▍        | 14719/100000 [4:49:24<27:10:02,  1.15s/it]

144767 episode score is 791.63


 15%|█▍        | 14720/100000 [4:49:26<27:07:51,  1.15s/it]

144777 episode score is 787.51


 15%|█▍        | 14721/100000 [4:49:27<27:21:23,  1.15s/it]

144787 episode score is 826.26


 15%|█▍        | 14722/100000 [4:49:28<27:25:52,  1.16s/it]

144797 episode score is 812.13


 15%|█▍        | 14723/100000 [4:49:29<27:07:47,  1.15s/it]

144806 episode score is 873.11


 15%|█▍        | 14724/100000 [4:49:30<26:44:10,  1.13s/it]

144815 episode score is 860.18


 15%|█▍        | 14725/100000 [4:49:31<26:55:10,  1.14s/it]

144824 episode score is 923.32


 15%|█▍        | 14726/100000 [4:49:32<27:03:35,  1.14s/it]

144833 episode score is 897.24


 15%|█▍        | 14727/100000 [4:49:34<27:10:50,  1.15s/it]

144843 episode score is 797.81


 15%|█▍        | 14728/100000 [4:49:35<27:13:17,  1.15s/it]

144852 episode score is 909.78


 15%|█▍        | 14729/100000 [4:49:36<27:23:55,  1.16s/it]

144862 episode score is 814.95


 15%|█▍        | 14730/100000 [4:49:37<27:10:32,  1.15s/it]

144872 episode score is 763.25


 15%|█▍        | 14731/100000 [4:49:38<27:18:00,  1.15s/it]

144882 episode score is 817.11


 15%|█▍        | 14732/100000 [4:49:39<27:42:43,  1.17s/it]

144892 episode score is 846.70


 15%|█▍        | 14733/100000 [4:49:40<27:06:35,  1.14s/it]

144901 episode score is 832.53


 15%|█▍        | 14734/100000 [4:49:42<26:59:29,  1.14s/it]

144910 episode score is 880.05


 15%|█▍        | 14735/100000 [4:49:43<26:55:53,  1.14s/it]

144919 episode score is 869.84


 15%|█▍        | 14736/100000 [4:49:44<27:08:00,  1.15s/it]

144928 episode score is 914.48


 15%|█▍        | 14737/100000 [4:49:45<27:25:55,  1.16s/it]

144937 episode score is 937.78


 15%|█▍        | 14738/100000 [4:49:46<27:37:31,  1.17s/it]

144946 episode score is 947.31


 15%|█▍        | 14739/100000 [4:49:47<27:15:59,  1.15s/it]

144955 episode score is 847.84


 15%|█▍        | 14740/100000 [4:49:49<27:35:15,  1.16s/it]

144964 episode score is 931.79


 15%|█▍        | 14741/100000 [4:49:50<27:43:22,  1.17s/it]

144973 episode score is 923.01


 15%|█▍        | 14742/100000 [4:49:51<27:43:29,  1.17s/it]

144982 episode score is 925.73


 15%|█▍        | 14743/100000 [4:49:52<27:43:32,  1.17s/it]

144991 episode score is 925.33


 15%|█▍        | 14744/100000 [4:49:53<27:24:18,  1.16s/it]

145000 episode score is 873.35


 15%|█▍        | 14745/100000 [4:49:54<27:23:15,  1.16s/it]

145009 episode score is 901.02


 15%|█▍        | 14746/100000 [4:49:56<27:15:31,  1.15s/it]

145018 episode score is 873.27


 15%|█▍        | 14747/100000 [4:49:57<26:51:38,  1.13s/it]

145027 episode score is 840.75


 15%|█▍        | 14748/100000 [4:49:58<27:22:59,  1.16s/it]

145037 episode score is 808.02


 15%|█▍        | 14749/100000 [4:49:59<27:33:25,  1.16s/it]

145046 episode score is 925.28


 15%|█▍        | 14750/100000 [4:50:00<27:29:07,  1.16s/it]

145055 episode score is 884.96


 15%|█▍        | 14751/100000 [4:50:01<27:17:13,  1.15s/it]

145064 episode score is 875.63


 15%|█▍        | 14752/100000 [4:50:03<27:28:55,  1.16s/it]

145074 episode score is 789.35


 15%|█▍        | 14753/100000 [4:50:04<27:09:42,  1.15s/it]

145083 episode score is 842.37


 15%|█▍        | 14754/100000 [4:50:05<28:03:45,  1.19s/it]

145092 episode score is 892.56


 15%|█▍        | 14755/100000 [4:50:06<27:44:47,  1.17s/it]

145101 episode score is 858.43


 15%|█▍        | 14756/100000 [4:50:07<27:45:32,  1.17s/it]

145110 episode score is 885.73


 15%|█▍        | 14757/100000 [4:50:08<27:58:00,  1.18s/it]

145119 episode score is 940.21


 15%|█▍        | 14758/100000 [4:50:10<27:43:16,  1.17s/it]

145128 episode score is 861.23


 15%|█▍        | 14759/100000 [4:50:11<27:45:25,  1.17s/it]

145137 episode score is 873.54


 15%|█▍        | 14760/100000 [4:50:12<27:34:55,  1.16s/it]

145146 episode score is 841.69


 15%|█▍        | 14761/100000 [4:50:13<27:35:49,  1.17s/it]

145155 episode score is 885.20


 15%|█▍        | 14762/100000 [4:50:14<27:25:13,  1.16s/it]

145164 episode score is 867.20


 15%|█▍        | 14763/100000 [4:50:15<27:35:21,  1.17s/it]

145173 episode score is 888.04


 15%|█▍        | 14764/100000 [4:50:17<27:38:33,  1.17s/it]

145182 episode score is 891.27


 15%|█▍        | 14765/100000 [4:50:18<27:27:51,  1.16s/it]

145191 episode score is 847.50


 15%|█▍        | 14766/100000 [4:50:19<27:36:33,  1.17s/it]

145200 episode score is 894.22


 15%|█▍        | 14767/100000 [4:50:20<27:40:54,  1.17s/it]

145209 episode score is 905.92


 15%|█▍        | 14768/100000 [4:50:21<27:34:53,  1.16s/it]

145218 episode score is 886.29


 15%|█▍        | 14769/100000 [4:50:22<27:27:27,  1.16s/it]

145227 episode score is 869.03


 15%|█▍        | 14770/100000 [4:50:24<27:31:58,  1.16s/it]

145236 episode score is 889.32


 15%|█▍        | 14771/100000 [4:50:25<27:26:52,  1.16s/it]

145245 episode score is 880.27


 15%|█▍        | 14772/100000 [4:50:26<27:24:13,  1.16s/it]

145254 episode score is 883.57


 15%|█▍        | 14773/100000 [4:50:27<27:15:00,  1.15s/it]

145263 episode score is 853.31


 15%|█▍        | 14774/100000 [4:50:28<27:00:00,  1.14s/it]

145272 episode score is 837.85


 15%|█▍        | 14775/100000 [4:50:29<27:12:21,  1.15s/it]

145281 episode score is 897.64


 15%|█▍        | 14776/100000 [4:50:30<27:18:44,  1.15s/it]

145290 episode score is 900.75


 15%|█▍        | 14777/100000 [4:50:32<27:26:11,  1.16s/it]

145299 episode score is 893.11


 15%|█▍        | 14778/100000 [4:50:33<27:34:13,  1.16s/it]

145308 episode score is 894.42


 15%|█▍        | 14779/100000 [4:50:34<27:18:01,  1.15s/it]

145317 episode score is 865.12


 15%|█▍        | 14780/100000 [4:50:35<27:23:56,  1.16s/it]

145326 episode score is 902.21


 15%|█▍        | 14781/100000 [4:50:36<27:26:52,  1.16s/it]

145335 episode score is 907.70


 15%|█▍        | 14782/100000 [4:50:37<27:36:34,  1.17s/it]

145344 episode score is 917.92


 15%|█▍        | 14783/100000 [4:50:39<27:31:19,  1.16s/it]

145353 episode score is 902.90


 15%|█▍        | 14784/100000 [4:50:40<27:19:18,  1.15s/it]

145362 episode score is 871.69


 15%|█▍        | 14785/100000 [4:50:41<27:23:52,  1.16s/it]

145371 episode score is 905.65


 15%|█▍        | 14786/100000 [4:50:42<27:34:19,  1.16s/it]

145380 episode score is 924.27


 15%|█▍        | 14787/100000 [4:50:43<27:36:09,  1.17s/it]

145389 episode score is 911.79


 15%|█▍        | 14788/100000 [4:50:44<27:34:24,  1.16s/it]

145398 episode score is 907.84


 15%|█▍        | 14789/100000 [4:50:46<27:26:37,  1.16s/it]

145407 episode score is 886.72


 15%|█▍        | 14790/100000 [4:50:47<26:59:41,  1.14s/it]

145416 episode score is 842.38


 15%|█▍        | 14791/100000 [4:50:48<27:11:12,  1.15s/it]

145425 episode score is 896.56


 15%|█▍        | 14792/100000 [4:50:49<27:22:31,  1.16s/it]

145434 episode score is 923.00


 15%|█▍        | 14793/100000 [4:50:50<27:24:39,  1.16s/it]

145443 episode score is 911.76


 15%|█▍        | 14794/100000 [4:50:51<27:33:18,  1.16s/it]

145452 episode score is 938.60


 15%|█▍        | 14795/100000 [4:50:52<27:25:20,  1.16s/it]

145461 episode score is 886.17


 15%|█▍        | 14796/100000 [4:50:54<27:34:54,  1.17s/it]

145470 episode score is 936.75


 15%|█▍        | 14797/100000 [4:50:55<27:36:48,  1.17s/it]

145479 episode score is 914.32


 15%|█▍        | 14798/100000 [4:50:56<27:45:02,  1.17s/it]

145488 episode score is 941.99


 15%|█▍        | 14799/100000 [4:50:57<27:27:15,  1.16s/it]

145497 episode score is 886.70
145506 episode score is 911.74


 15%|█▍        | 14800/100000 [4:50:59<35:30:21,  1.50s/it]

Iteration 14800: Average test reward: 923.85


 15%|█▍        | 14801/100000 [4:51:01<33:15:49,  1.41s/it]

145515 episode score is 940.80


 15%|█▍        | 14802/100000 [4:51:02<31:31:47,  1.33s/it]

145524 episode score is 916.25


 15%|█▍        | 14803/100000 [4:51:03<30:22:19,  1.28s/it]

145533 episode score is 923.60


 15%|█▍        | 14804/100000 [4:51:04<29:09:19,  1.23s/it]

145542 episode score is 873.45


 15%|█▍        | 14805/100000 [4:51:05<28:25:00,  1.20s/it]

145551 episode score is 892.58


 15%|█▍        | 14806/100000 [4:51:06<28:00:55,  1.18s/it]

145560 episode score is 907.45


 15%|█▍        | 14807/100000 [4:51:07<27:30:51,  1.16s/it]

145569 episode score is 880.18


 15%|█▍        | 14808/100000 [4:51:09<27:02:31,  1.14s/it]

145578 episode score is 856.61


 15%|█▍        | 14809/100000 [4:51:10<27:01:09,  1.14s/it]

145587 episode score is 901.34


 15%|█▍        | 14810/100000 [4:51:11<26:58:46,  1.14s/it]

145596 episode score is 894.25


 15%|█▍        | 14811/100000 [4:51:12<27:09:16,  1.15s/it]

145605 episode score is 923.28


 15%|█▍        | 14812/100000 [4:51:13<27:27:32,  1.16s/it]

145615 episode score is 824.99


 15%|█▍        | 14813/100000 [4:51:14<27:00:52,  1.14s/it]

145624 episode score is 850.42


 15%|█▍        | 14814/100000 [4:51:15<26:59:22,  1.14s/it]

145633 episode score is 883.70


 15%|█▍        | 14815/100000 [4:51:16<26:51:40,  1.14s/it]

145642 episode score is 893.53


 15%|█▍        | 14816/100000 [4:51:18<27:17:19,  1.15s/it]

145651 episode score is 909.80


 15%|█▍        | 14817/100000 [4:51:19<27:22:16,  1.16s/it]

145660 episode score is 895.55


 15%|█▍        | 14818/100000 [4:51:20<27:27:44,  1.16s/it]

145669 episode score is 917.35


 15%|█▍        | 14819/100000 [4:51:21<27:36:21,  1.17s/it]

145678 episode score is 904.00


 15%|█▍        | 14820/100000 [4:51:22<27:43:11,  1.17s/it]

145687 episode score is 904.04


 15%|█▍        | 14821/100000 [4:51:24<27:25:03,  1.16s/it]

145696 episode score is 860.05


 15%|█▍        | 14822/100000 [4:51:25<28:06:23,  1.19s/it]

145705 episode score is 875.46


 15%|█▍        | 14823/100000 [4:51:26<27:46:54,  1.17s/it]

145714 episode score is 889.21


 15%|█▍        | 14824/100000 [4:51:27<27:16:41,  1.15s/it]

145723 episode score is 843.35


 15%|█▍        | 14825/100000 [4:51:28<26:53:22,  1.14s/it]

145732 episode score is 836.62


 15%|█▍        | 14826/100000 [4:51:29<26:49:38,  1.13s/it]

145741 episode score is 823.83


 15%|█▍        | 14827/100000 [4:51:30<26:58:14,  1.14s/it]

145750 episode score is 870.39


 15%|█▍        | 14828/100000 [4:51:32<26:55:30,  1.14s/it]

145759 episode score is 843.06


 15%|█▍        | 14829/100000 [4:51:33<27:22:20,  1.16s/it]

145768 episode score is 911.30


 15%|█▍        | 14830/100000 [4:51:34<27:30:33,  1.16s/it]

145777 episode score is 915.30


 15%|█▍        | 14831/100000 [4:51:35<27:36:21,  1.17s/it]

145786 episode score is 932.31


 15%|█▍        | 14832/100000 [4:51:36<27:39:45,  1.17s/it]

145795 episode score is 886.43


 15%|█▍        | 14833/100000 [4:51:37<27:50:25,  1.18s/it]

145804 episode score is 931.03


 15%|█▍        | 14834/100000 [4:51:39<27:53:29,  1.18s/it]

145813 episode score is 911.74


 15%|█▍        | 14835/100000 [4:51:40<27:48:15,  1.18s/it]

145822 episode score is 894.06


 15%|█▍        | 14836/100000 [4:51:41<27:53:12,  1.18s/it]

145831 episode score is 927.66


 15%|█▍        | 14837/100000 [4:51:42<28:03:10,  1.19s/it]

145840 episode score is 951.34


 15%|█▍        | 14838/100000 [4:51:43<27:41:49,  1.17s/it]

145849 episode score is 850.80


 15%|█▍        | 14839/100000 [4:51:45<27:47:03,  1.17s/it]

145858 episode score is 900.75


 15%|█▍        | 14840/100000 [4:51:46<27:45:59,  1.17s/it]

145867 episode score is 935.04


 15%|█▍        | 14841/100000 [4:51:47<27:29:16,  1.16s/it]

145876 episode score is 879.14


 15%|█▍        | 14842/100000 [4:51:48<27:09:30,  1.15s/it]

145885 episode score is 851.91


 15%|█▍        | 14843/100000 [4:51:49<26:55:27,  1.14s/it]

145894 episode score is 865.89


 15%|█▍        | 14844/100000 [4:51:50<27:04:23,  1.14s/it]

145903 episode score is 914.56


 15%|█▍        | 14845/100000 [4:51:51<27:18:44,  1.15s/it]

145912 episode score is 927.47


 15%|█▍        | 14846/100000 [4:51:53<27:16:24,  1.15s/it]

145921 episode score is 887.83


 15%|█▍        | 14847/100000 [4:51:54<26:54:50,  1.14s/it]

145930 episode score is 850.78


 15%|█▍        | 14848/100000 [4:51:55<26:32:54,  1.12s/it]

145939 episode score is 817.61


 15%|█▍        | 14849/100000 [4:51:56<26:43:13,  1.13s/it]

145948 episode score is 889.57


 15%|█▍        | 14850/100000 [4:51:57<26:58:54,  1.14s/it]

145957 episode score is 921.32


 15%|█▍        | 14851/100000 [4:51:58<26:49:42,  1.13s/it]

145966 episode score is 857.19


 15%|█▍        | 14852/100000 [4:51:59<26:51:54,  1.14s/it]

145975 episode score is 869.42


 15%|█▍        | 14853/100000 [4:52:01<27:21:46,  1.16s/it]

145984 episode score is 946.67


 15%|█▍        | 14854/100000 [4:52:02<27:36:48,  1.17s/it]

145994 episode score is 809.91


 15%|█▍        | 14855/100000 [4:52:03<27:43:09,  1.17s/it]

146003 episode score is 908.78


 15%|█▍        | 14856/100000 [4:52:04<27:30:00,  1.16s/it]

146012 episode score is 885.78


 15%|█▍        | 14857/100000 [4:52:05<27:30:20,  1.16s/it]

146021 episode score is 920.07


 15%|█▍        | 14858/100000 [4:52:06<27:15:28,  1.15s/it]

146030 episode score is 875.52


 15%|█▍        | 14859/100000 [4:52:07<27:09:22,  1.15s/it]

146040 episode score is 768.30


 15%|█▍        | 14860/100000 [4:52:09<26:55:53,  1.14s/it]

146049 episode score is 857.38


 15%|█▍        | 14861/100000 [4:52:10<26:50:57,  1.14s/it]

146058 episode score is 864.10


 15%|█▍        | 14862/100000 [4:52:11<26:55:47,  1.14s/it]

146067 episode score is 895.01


 15%|█▍        | 14863/100000 [4:52:12<27:25:38,  1.16s/it]

146077 episode score is 849.08


 15%|█▍        | 14864/100000 [4:52:13<27:03:51,  1.14s/it]

146086 episode score is 855.99


 15%|█▍        | 14865/100000 [4:52:14<26:52:24,  1.14s/it]

146095 episode score is 872.34


 15%|█▍        | 14866/100000 [4:52:15<26:35:00,  1.12s/it]

146104 episode score is 836.30


 15%|█▍        | 14867/100000 [4:52:17<26:47:43,  1.13s/it]

146113 episode score is 902.44


 15%|█▍        | 14868/100000 [4:52:18<27:01:26,  1.14s/it]

146122 episode score is 894.69


 15%|█▍        | 14869/100000 [4:52:19<26:51:36,  1.14s/it]

146131 episode score is 856.75


 15%|█▍        | 14870/100000 [4:52:20<27:24:26,  1.16s/it]

146141 episode score is 838.05


 15%|█▍        | 14871/100000 [4:52:21<27:32:39,  1.16s/it]

146150 episode score is 931.44


 15%|█▍        | 14872/100000 [4:52:22<27:09:20,  1.15s/it]

146159 episode score is 866.85


 15%|█▍        | 14873/100000 [4:52:23<26:52:38,  1.14s/it]

146168 episode score is 863.31


 15%|█▍        | 14874/100000 [4:52:25<26:37:21,  1.13s/it]

146177 episode score is 850.20


 15%|█▍        | 14875/100000 [4:52:26<26:51:15,  1.14s/it]

146187 episode score is 786.75


 15%|█▍        | 14876/100000 [4:52:27<27:08:34,  1.15s/it]

146197 episode score is 809.75


 15%|█▍        | 14877/100000 [4:52:28<27:15:37,  1.15s/it]

146207 episode score is 801.76


 15%|█▍        | 14878/100000 [4:52:29<27:17:55,  1.15s/it]

146217 episode score is 783.41


 15%|█▍        | 14879/100000 [4:52:30<27:13:53,  1.15s/it]

146227 episode score is 789.59


 15%|█▍        | 14880/100000 [4:52:31<27:24:16,  1.16s/it]

146237 episode score is 817.22


 15%|█▍        | 14881/100000 [4:52:33<27:37:23,  1.17s/it]

146247 episode score is 814.48


 15%|█▍        | 14882/100000 [4:52:34<27:39:43,  1.17s/it]

146257 episode score is 806.85


 15%|█▍        | 14883/100000 [4:52:35<27:25:31,  1.16s/it]

146267 episode score is 779.05


 15%|█▍        | 14884/100000 [4:52:36<27:37:43,  1.17s/it]

146277 episode score is 821.66


 15%|█▍        | 14885/100000 [4:52:37<27:11:23,  1.15s/it]

146286 episode score is 862.76


 15%|█▍        | 14886/100000 [4:52:39<28:12:00,  1.19s/it]

146296 episode score is 842.03


 15%|█▍        | 14887/100000 [4:52:40<28:13:53,  1.19s/it]

146306 episode score is 827.90


 15%|█▍        | 14888/100000 [4:52:41<28:12:13,  1.19s/it]

146316 episode score is 825.30


 15%|█▍        | 14889/100000 [4:52:42<27:34:44,  1.17s/it]

146325 episode score is 854.34


 15%|█▍        | 14890/100000 [4:52:43<27:15:52,  1.15s/it]

146334 episode score is 881.78


 15%|█▍        | 14891/100000 [4:52:44<27:22:49,  1.16s/it]

146344 episode score is 803.77


 15%|█▍        | 14892/100000 [4:52:45<27:07:23,  1.15s/it]

146353 episode score is 874.01


 15%|█▍        | 14893/100000 [4:52:47<26:49:45,  1.13s/it]

146362 episode score is 851.43


 15%|█▍        | 14894/100000 [4:52:48<27:08:28,  1.15s/it]

146371 episode score is 923.61


 15%|█▍        | 14895/100000 [4:52:49<27:26:28,  1.16s/it]

146380 episode score is 932.86


 15%|█▍        | 14896/100000 [4:52:50<27:29:57,  1.16s/it]

146390 episode score is 805.96


 15%|█▍        | 14897/100000 [4:52:51<27:25:12,  1.16s/it]

146399 episode score is 894.90


 15%|█▍        | 14898/100000 [4:52:52<27:16:02,  1.15s/it]

146408 episode score is 882.00


 15%|█▍        | 14899/100000 [4:52:54<27:08:26,  1.15s/it]

146417 episode score is 869.53
146426 episode score is 879.78


 15%|█▍        | 14900/100000 [4:52:56<35:05:10,  1.48s/it]

Iteration 14900: Average test reward: 876.06


 15%|█▍        | 14901/100000 [4:52:57<32:27:20,  1.37s/it]

146435 episode score is 849.17


 15%|█▍        | 14902/100000 [4:52:58<30:27:33,  1.29s/it]

146444 episode score is 831.16


 15%|█▍        | 14903/100000 [4:52:59<29:26:37,  1.25s/it]

146453 episode score is 897.83


 15%|█▍        | 14904/100000 [4:53:00<28:44:36,  1.22s/it]

146462 episode score is 911.19


 15%|█▍        | 14905/100000 [4:53:01<28:19:24,  1.20s/it]

146471 episode score is 926.65


 15%|█▍        | 14906/100000 [4:53:03<27:52:12,  1.18s/it]

146480 episode score is 840.18


 15%|█▍        | 14907/100000 [4:53:04<27:48:47,  1.18s/it]

146489 episode score is 920.57


 15%|█▍        | 14908/100000 [4:53:05<27:32:27,  1.17s/it]

146498 episode score is 890.93


 15%|█▍        | 14909/100000 [4:53:06<27:25:17,  1.16s/it]

146508 episode score is 775.26


 15%|█▍        | 14910/100000 [4:53:07<27:03:02,  1.14s/it]

146517 episode score is 855.30


 15%|█▍        | 14911/100000 [4:53:08<26:59:01,  1.14s/it]

146526 episode score is 903.90


 15%|█▍        | 14912/100000 [4:53:09<26:36:01,  1.13s/it]

146535 episode score is 848.99


 15%|█▍        | 14913/100000 [4:53:10<26:19:55,  1.11s/it]

146544 episode score is 848.42


 15%|█▍        | 14914/100000 [4:53:12<26:48:20,  1.13s/it]

146553 episode score is 922.03


 15%|█▍        | 14915/100000 [4:53:13<27:21:56,  1.16s/it]

146562 episode score is 965.58


 15%|█▍        | 14916/100000 [4:53:14<27:32:35,  1.17s/it]

146571 episode score is 926.05


 15%|█▍        | 14917/100000 [4:53:15<27:57:38,  1.18s/it]

146580 episode score is 959.04


 15%|█▍        | 14918/100000 [4:53:16<27:59:29,  1.18s/it]

146589 episode score is 927.71


 15%|█▍        | 14919/100000 [4:53:18<27:47:16,  1.18s/it]

146598 episode score is 845.52


 15%|█▍        | 14920/100000 [4:53:19<28:01:24,  1.19s/it]

146608 episode score is 829.60


 15%|█▍        | 14921/100000 [4:53:20<27:42:52,  1.17s/it]

146617 episode score is 878.90


 15%|█▍        | 14922/100000 [4:53:21<27:33:18,  1.17s/it]

146626 episode score is 890.16


 15%|█▍        | 14923/100000 [4:53:22<27:31:49,  1.16s/it]

146635 episode score is 905.64


 15%|█▍        | 14924/100000 [4:53:24<27:45:28,  1.17s/it]

146644 episode score is 927.80


 15%|█▍        | 14925/100000 [4:53:25<27:45:57,  1.17s/it]

146653 episode score is 923.95


 15%|█▍        | 14926/100000 [4:53:26<27:51:04,  1.18s/it]

146662 episode score is 929.89


 15%|█▍        | 14927/100000 [4:53:27<27:19:37,  1.16s/it]

146671 episode score is 837.90


 15%|█▍        | 14928/100000 [4:53:28<27:16:51,  1.15s/it]

146680 episode score is 888.11


 15%|█▍        | 14929/100000 [4:53:29<27:08:55,  1.15s/it]

146689 episode score is 874.70


 15%|█▍        | 14930/100000 [4:53:30<27:23:31,  1.16s/it]

146698 episode score is 919.85


 15%|█▍        | 14931/100000 [4:53:32<27:31:56,  1.17s/it]

146707 episode score is 934.83


 15%|█▍        | 14932/100000 [4:53:33<27:58:48,  1.18s/it]

146716 episode score is 953.68


 15%|█▍        | 14933/100000 [4:53:34<27:27:23,  1.16s/it]

146725 episode score is 863.67


 15%|█▍        | 14934/100000 [4:53:35<27:14:20,  1.15s/it]

146734 episode score is 891.42


 15%|█▍        | 14935/100000 [4:53:36<27:18:41,  1.16s/it]

146743 episode score is 892.88


 15%|█▍        | 14936/100000 [4:53:37<26:49:38,  1.14s/it]

146752 episode score is 822.05


 15%|█▍        | 14937/100000 [4:53:39<27:18:03,  1.16s/it]

146761 episode score is 965.83


 15%|█▍        | 14938/100000 [4:53:40<27:31:16,  1.16s/it]

146770 episode score is 938.53


 15%|█▍        | 14939/100000 [4:53:41<27:23:04,  1.16s/it]

146779 episode score is 897.31


 15%|█▍        | 14940/100000 [4:53:42<27:44:00,  1.17s/it]

146788 episode score is 964.22


 15%|█▍        | 14941/100000 [4:53:43<27:45:13,  1.17s/it]

146797 episode score is 928.89


 15%|█▍        | 14942/100000 [4:53:44<27:28:59,  1.16s/it]

146806 episode score is 896.13


 15%|█▍        | 14943/100000 [4:53:46<27:52:15,  1.18s/it]

146815 episode score is 967.11


 15%|█▍        | 14944/100000 [4:53:47<28:06:28,  1.19s/it]

146824 episode score is 950.00


 15%|█▍        | 14945/100000 [4:53:48<28:19:56,  1.20s/it]

146833 episode score is 941.75


 15%|█▍        | 14946/100000 [4:53:49<28:12:08,  1.19s/it]

146842 episode score is 932.35


 15%|█▍        | 14947/100000 [4:53:50<27:57:36,  1.18s/it]

146851 episode score is 930.98


 15%|█▍        | 14948/100000 [4:53:52<27:50:10,  1.18s/it]

146860 episode score is 916.03


 15%|█▍        | 14949/100000 [4:53:53<27:30:46,  1.16s/it]

146869 episode score is 893.55


 15%|█▍        | 14950/100000 [4:53:54<27:28:03,  1.16s/it]

146879 episode score is 801.43


 15%|█▍        | 14951/100000 [4:53:55<27:02:49,  1.14s/it]

146888 episode score is 854.01


 15%|█▍        | 14952/100000 [4:53:56<26:56:49,  1.14s/it]

146897 episode score is 890.76


 15%|█▍        | 14953/100000 [4:53:57<28:09:17,  1.19s/it]

146907 episode score is 840.97


 15%|█▍        | 14954/100000 [4:53:59<27:51:45,  1.18s/it]

146917 episode score is 795.29


 15%|█▍        | 14955/100000 [4:54:00<27:37:31,  1.17s/it]

146927 episode score is 772.98


 15%|█▍        | 14956/100000 [4:54:01<27:24:52,  1.16s/it]

146936 episode score is 898.36


 15%|█▍        | 14957/100000 [4:54:02<26:56:09,  1.14s/it]

146945 episode score is 852.55


 15%|█▍        | 14958/100000 [4:54:03<27:05:43,  1.15s/it]

146954 episode score is 917.53


 15%|█▍        | 14959/100000 [4:54:04<27:09:50,  1.15s/it]

146963 episode score is 919.50


 15%|█▍        | 14960/100000 [4:54:05<26:56:09,  1.14s/it]

146972 episode score is 869.19


 15%|█▍        | 14961/100000 [4:54:07<27:05:55,  1.15s/it]

146981 episode score is 909.34


 15%|█▍        | 14962/100000 [4:54:08<26:47:02,  1.13s/it]

146990 episode score is 857.20


 15%|█▍        | 14963/100000 [4:54:09<26:48:23,  1.13s/it]

146999 episode score is 893.33


 15%|█▍        | 14964/100000 [4:54:10<26:54:08,  1.14s/it]

147008 episode score is 910.32


 15%|█▍        | 14965/100000 [4:54:11<26:35:06,  1.13s/it]

147017 episode score is 850.37


 15%|█▍        | 14966/100000 [4:54:12<26:18:03,  1.11s/it]

147026 episode score is 833.02


 15%|█▍        | 14967/100000 [4:54:13<26:59:31,  1.14s/it]

147036 episode score is 853.27


 15%|█▍        | 14968/100000 [4:54:14<27:20:03,  1.16s/it]

147046 episode score is 833.06


 15%|█▍        | 14969/100000 [4:54:16<27:31:19,  1.17s/it]

147055 episode score is 946.06


 15%|█▍        | 14970/100000 [4:54:17<27:45:32,  1.18s/it]

147064 episode score is 946.99


 15%|█▍        | 14971/100000 [4:54:18<27:48:23,  1.18s/it]

147073 episode score is 933.81


 15%|█▍        | 14972/100000 [4:54:19<27:55:57,  1.18s/it]

147083 episode score is 809.59


 15%|█▍        | 14973/100000 [4:54:20<27:39:42,  1.17s/it]

147092 episode score is 906.97


 15%|█▍        | 14974/100000 [4:54:22<27:46:23,  1.18s/it]

147101 episode score is 925.44


 15%|█▍        | 14975/100000 [4:54:23<27:24:43,  1.16s/it]

147110 episode score is 856.79


 15%|█▍        | 14976/100000 [4:54:24<27:11:08,  1.15s/it]

147119 episode score is 883.64


 15%|█▍        | 14977/100000 [4:54:25<26:58:31,  1.14s/it]

147128 episode score is 884.24


 15%|█▍        | 14978/100000 [4:54:26<26:48:21,  1.14s/it]

147137 episode score is 864.85


 15%|█▍        | 14979/100000 [4:54:27<26:28:56,  1.12s/it]

147145 episode score is 973.87


 15%|█▍        | 14980/100000 [4:54:28<26:15:13,  1.11s/it]

147154 episode score is 835.80


 15%|█▍        | 14981/100000 [4:54:29<26:21:27,  1.12s/it]

147163 episode score is 890.56


 15%|█▍        | 14982/100000 [4:54:30<26:13:20,  1.11s/it]

147172 episode score is 854.41


 15%|█▍        | 14983/100000 [4:54:32<26:03:48,  1.10s/it]

147181 episode score is 845.52


 15%|█▍        | 14984/100000 [4:54:33<26:06:03,  1.11s/it]

147190 episode score is 852.94


 15%|█▍        | 14985/100000 [4:54:34<25:55:45,  1.10s/it]

147199 episode score is 850.26


 15%|█▍        | 14986/100000 [4:54:35<26:20:01,  1.12s/it]

147209 episode score is 797.98


 15%|█▍        | 14987/100000 [4:54:36<26:30:18,  1.12s/it]

147218 episode score is 907.18


 15%|█▍        | 14988/100000 [4:54:37<27:02:25,  1.15s/it]

147228 episode score is 832.82


 15%|█▍        | 14989/100000 [4:54:38<27:14:12,  1.15s/it]

147237 episode score is 924.23


 15%|█▍        | 14990/100000 [4:54:40<26:45:42,  1.13s/it]

147246 episode score is 823.33


 15%|█▍        | 14991/100000 [4:54:41<27:09:05,  1.15s/it]

147256 episode score is 819.21


 15%|█▍        | 14992/100000 [4:54:42<27:16:14,  1.15s/it]

147266 episode score is 794.09


 15%|█▍        | 14993/100000 [4:54:43<27:25:40,  1.16s/it]

147275 episode score is 939.91


 15%|█▍        | 14994/100000 [4:54:44<27:16:36,  1.16s/it]

147284 episode score is 871.19


 15%|█▍        | 14995/100000 [4:54:45<27:16:33,  1.16s/it]

147294 episode score is 782.22


 15%|█▍        | 14996/100000 [4:54:46<27:11:50,  1.15s/it]

147303 episode score is 898.61


 15%|█▍        | 14997/100000 [4:54:48<27:22:41,  1.16s/it]

147313 episode score is 779.93


 15%|█▍        | 14998/100000 [4:54:49<26:53:44,  1.14s/it]

147323 episode score is 706.83


 15%|█▍        | 14999/100000 [4:54:50<26:35:44,  1.13s/it]

147332 episode score is 849.04
147341 episode score is 844.96


 15%|█▌        | 15000/100000 [4:54:52<34:19:54,  1.45s/it]

Iteration 15000: Average test reward: 864.81


 15%|█▌        | 15001/100000 [4:54:53<32:17:23,  1.37s/it]

147350 episode score is 902.16


 15%|█▌        | 15002/100000 [4:54:54<30:23:27,  1.29s/it]

147359 episode score is 850.81


 15%|█▌        | 15003/100000 [4:54:55<29:19:47,  1.24s/it]

147368 episode score is 864.88


 15%|█▌        | 15004/100000 [4:54:57<28:39:18,  1.21s/it]

147377 episode score is 907.66


 15%|█▌        | 15005/100000 [4:54:58<28:21:37,  1.20s/it]

147386 episode score is 909.26


 15%|█▌        | 15006/100000 [4:54:59<27:54:26,  1.18s/it]

147395 episode score is 887.51


 15%|█▌        | 15007/100000 [4:55:00<27:31:51,  1.17s/it]

147404 episode score is 878.77


 15%|█▌        | 15008/100000 [4:55:01<27:30:07,  1.16s/it]

147413 episode score is 921.33


 15%|█▌        | 15009/100000 [4:55:02<27:17:17,  1.16s/it]

147422 episode score is 855.51


 15%|█▌        | 15010/100000 [4:55:04<27:21:19,  1.16s/it]

147431 episode score is 923.49


 15%|█▌        | 15011/100000 [4:55:05<27:34:36,  1.17s/it]

147440 episode score is 938.40


 15%|█▌        | 15012/100000 [4:55:06<27:57:19,  1.18s/it]

147449 episode score is 884.95


 15%|█▌        | 15013/100000 [4:55:07<28:03:44,  1.19s/it]

147459 episode score is 828.70


 15%|█▌        | 15014/100000 [4:55:08<27:49:35,  1.18s/it]

147468 episode score is 904.32


 15%|█▌        | 15015/100000 [4:55:09<27:58:29,  1.19s/it]

147477 episode score is 971.60


 15%|█▌        | 15016/100000 [4:55:11<28:07:34,  1.19s/it]

147486 episode score is 956.85


 15%|█▌        | 15017/100000 [4:55:12<28:09:47,  1.19s/it]

147495 episode score is 959.98


 15%|█▌        | 15018/100000 [4:55:13<27:46:03,  1.18s/it]

147504 episode score is 891.96


 15%|█▌        | 15019/100000 [4:55:14<27:36:53,  1.17s/it]

147513 episode score is 921.97


 15%|█▌        | 15020/100000 [4:55:15<27:18:00,  1.16s/it]

147522 episode score is 887.39


 15%|█▌        | 15021/100000 [4:55:16<27:27:04,  1.16s/it]

147531 episode score is 926.84


 15%|█▌        | 15022/100000 [4:55:18<27:24:37,  1.16s/it]

147540 episode score is 893.15


 15%|█▌        | 15023/100000 [4:55:19<27:12:26,  1.15s/it]